Move dfa's to tables.h

This commit is contained in:
2025-06-23 14:15:05 -04:00
parent 5df9d958ab
commit 451c07747e
4 changed files with 631 additions and 629 deletions

View File

@@ -16,632 +16,6 @@
namespace parser3 {
// See https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725 for
// an explanation of this cycle/byte dfa implementation.
//
// Recognizes json number syntax. As a regex:
// -?([0-9]|[1-9][0-9]*)(\.[0-9]+)?((e|E)(-|\+)?[0-9]+)?
struct NumDfa {
constexpr static uint64_t table[256] = {
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x36000ull,
0x0ull,
0x36600ull,
0x12480000000000ull,
0x0ull,
0x780aa47b091ec00ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x780aa47aa91ea80ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0xc30c000000000ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0xc30c000000000ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
};
// Restore this dfa to its start state
void reset() { state = 6; }
// Return true if this dfa is in an accept state. You probably want to call
// scan until the match ends first.
bool accept() const {
return (state & 63) == 30 || (state & 63) == 36 || (state & 63) == 48 ||
(state & 63) == 42;
}
// clang-format off
#ifdef __x86_64__
__attribute__((target_clones("default", "bmi2")))
#endif
// Return value either points to the first byte which does not match, or bufEnd.
// Leaves the dfa in the last state of the match.
const char *scan(const char *buf, const char *bufEnd) {
// clang-format on
auto state_ = state;
for (;;) {
constexpr int kStride = 16;
if (bufEnd - buf < kStride) [[unlikely]] {
while (buf != bufEnd) {
uint64_t row = table[uint8_t(*buf)];
auto prev = state_;
state_ = (row >> (state_ & 63)) & 63;
if (state_ == 0) {
state_ = prev;
break;
}
++buf;
}
state = state_;
return buf;
}
uint8_t prev[kStride + 1];
prev[0] = state_;
for (int i = 0; i < kStride; ++i) {
uint64_t row = table[uint8_t(*buf)];
prev[i + 1] = row >> (prev[i] & 63);
if ((prev[i + 1] & 63) == 0) {
state = prev[i];
return buf;
}
++buf;
}
state_ = prev[kStride];
}
}
private:
uint64_t state = 6;
};
// Recognizes sequences of valid utf8 characters except 0-0x20, double quote,
// and backslash
struct Utf8Dfa {
constexpr static uint64_t table[256] = {
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x30000000000000ull,
0x30000000000000ull,
0x0ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x0ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x30000000000000ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x18630780780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x1863001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x60063001e780ull,
0x0ull,
0x0ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x18000000000000ull,
0x2a000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0x24000000000000ull,
0x1e000000000000ull,
0x1e000000000000ull,
0xc000000000000ull,
0x6000000000000ull,
0x6000000000000ull,
0x6000000000000ull,
0x12000000000000ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
0x0ull,
};
// Restore this dfa to its start state
void reset() { state = 48; }
// Return true if this dfa is in an accept state. You probably want to call
// scan until the match ends first.
bool accept() const { return (state & 63) == 48; }
// clang-format off
#ifdef __x86_64__
__attribute__((target_clones("default", "bmi2")))
#endif
// Return value either points to the first byte which does not match, or bufEnd.
// Leaves the dfa in the last state of the match.
const char *scan(const char *buf, const char *bufEnd) {
// clang-format on
auto state_ = state;
for (;;) {
constexpr int kStride = 16;
if (bufEnd - buf < kStride) [[unlikely]] {
while (buf != bufEnd) {
uint64_t row = table[uint8_t(*buf)];
auto prev = state_;
state_ = (row >> (state_ & 63)) & 63;
if (state_ == 0) {
state_ = prev;
break;
}
++buf;
}
state = state_;
return buf;
}
uint8_t prev[kStride + 1];
prev[0] = state_;
for (int i = 0; i < kStride; ++i) {
uint64_t row = table[uint8_t(*buf)];
prev[i + 1] = row >> (prev[i] & 63);
if ((prev[i + 1] & 63) == 0) {
state = prev[i];
return buf;
}
++buf;
}
state_ = prev[kStride];
}
}
private:
uint64_t state = 48;
};
typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *,
char *buf, char *bufEnd);