Switch to dfa for strings

This commit is contained in:
2025-06-21 17:12:48 -04:00
parent 6c48c40d67
commit 229a68bfdd
2 changed files with 18 additions and 167 deletions

View File

@@ -1,50 +1,12 @@
#pragma once
constexpr inline struct Tables {
enum StringByteMeaning {
INVALID,
NORMAL,
DUBQUOTE,
BACKSLASH,
TWO_BYTE_UTF8,
THREE_BYTE_UTF8,
FOUR_BYTE_UTF8,
CONTINUATION_BYTE,
};
constexpr Tables() {
whitespace[' '] = true;
whitespace['\n'] = true;
whitespace['\r'] = true;
whitespace['\t'] = true;
for (int i = 0; i < 256; ++i) {
if ((i & 0b11000000) == 0b10000000) {
stringByteMeaning[i] = CONTINUATION_BYTE;
}
if ((i & 0b11100000) == 0b11000000) {
stringByteMeaning[i] = TWO_BYTE_UTF8;
}
if ((i & 0b11110000) == 0b11100000) {
stringByteMeaning[i] = THREE_BYTE_UTF8;
}
if ((i & 0b11111000) == 0b11110000) {
stringByteMeaning[i] = FOUR_BYTE_UTF8;
}
}
for (int i = 0x20; i < 128; ++i) {
stringByteMeaning[i] = NORMAL;
}
stringByteMeaning['"'] = DUBQUOTE;
stringByteMeaning['\\'] = BACKSLASH;
stringByteMeaning[0xc0] = INVALID;
stringByteMeaning[0xc1] = INVALID;
for (int i = 0xF5; i < 0x100; ++i) {
stringByteMeaning[i] = INVALID;
}
unescape['n'] = '\n';
unescape['r'] = '\r';
unescape['t'] = '\t';
@@ -55,6 +17,5 @@ constexpr inline struct Tables {
unescape['/'] = '/';
}
bool whitespace[256]{};
StringByteMeaning stringByteMeaning[256]{};
char unescape[256]{};
} tables;