Try out dfa for numbers

This commit is contained in:
2025-06-19 16:13:46 -04:00
parent e1ba8e9fa8
commit 976f64b7d3
2 changed files with 159 additions and 225 deletions

View File

@@ -16,6 +16,122 @@
namespace parser3 { namespace parser3 {
class NumDfa {
constexpr static uint64_t num_dfa_table[256] = {
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6d80db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6d86d80db6db6ull, 0xd8cdb6336db6db6ull, 0xdb6db6db6db6db6ull,
0xdb0a9edaa4927aaull, 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull,
0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull,
0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull,
0xdb0ab0daa492c2aull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xd98db6636636db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xd98db6636636db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull,
0xdb6db6db6db6db6ull,
};
uint8_t state = 36;
public:
// Restore this dfa to its start state
void reset() { state = 36; }
// Return true if this dfa is in an accept state. You probably want to call
// scan until the match ends first.
bool accept() const {
return (state & 63) == 18 || (state & 63) == 42 || (state & 63) == 48 ||
(state & 63) == 30;
}
// return value either points to the first byte which does not match, or
// bufEnd. Leaves the dfa in the last state of the match.
const char *scan(const char *buf, const char *bufEnd) {
while (buf != bufEnd) {
uint64_t row = num_dfa_table[uint8_t(*buf)];
auto next = row >> (state & 63);
if ((next & 63) == 54) {
break;
}
state = next;
++buf;
}
return buf;
}
};
typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *, typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *,
char *buf, char *bufEnd); char *buf, char *bufEnd);
@@ -30,13 +146,8 @@ enum Symbol : uint8_t {
N_STRING, N_STRING,
N_STRING2, N_STRING2,
N_STRING_FOLLOWING_ESCAPE, N_STRING_FOLLOWING_ESCAPE,
N_INTEGER2,
N_DIGITS,
N_DIGITS2,
N_FRACTION,
N_EXPONENT,
N_SIGN,
N_WHITESPACE, N_WHITESPACE,
N_NUMBER,
N_TRUE, N_TRUE,
N_FALSE, N_FALSE,
N_NULL, N_NULL,
@@ -53,10 +164,7 @@ enum Symbol : uint8_t {
T_HEX, T_HEX,
T_HEX2, T_HEX2,
T_HEX3, T_HEX3,
T_DIGIT,
T_ONENINE,
T_EOF, T_EOF,
T_END_NUMBER,
T_BACKSLASH, T_BACKSLASH,
N_SYMBOL_COUNT, // Must be last N_SYMBOL_COUNT, // Must be last
}; };
@@ -131,6 +239,7 @@ struct Parser3 {
uint32_t minCodepoint; uint32_t minCodepoint;
int const stackSize; int const stackSize;
bool complete; bool complete;
NumDfa numDfa;
}; };
inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf, inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf,
@@ -149,6 +258,21 @@ inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf,
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
} }
inline PRESERVE_NONE WeaselJsonStatus n_number(Parser3 *self, char *buf,
char *bufEnd) {
buf = (char *)self->numDfa.scan(buf, bufEnd);
if (buf == bufEnd && !self->complete) {
self->flushNumber(false, buf);
return WeaselJson_AGAIN;
}
if (!self->numDfa.accept()) [[unlikely]] {
return WeaselJson_REJECT;
}
self->flushNumber(true, buf);
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf, inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
char *bufEnd) { char *bufEnd) {
assert(bufEnd - buf != 0); assert(bufEnd - buf != 0);
@@ -184,13 +308,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
} }
break; break;
case '0': case '0':
self->pop();
if (auto s = self->push({N_FRACTION, N_EXPONENT})) {
return s;
}
self->dataBegin = buf;
++buf;
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case '1': case '1':
case '2': case '2':
case '3': case '3':
@@ -200,18 +317,11 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
case '7': case '7':
case '8': case '8':
case '9': case '9':
self->pop();
self->dataBegin = buf;
++buf;
if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case '-': case '-':
self->pop();
self->dataBegin = buf; self->dataBegin = buf;
++buf; self->pop();
if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) { self->numDfa.reset();
if (auto s = self->push({N_NUMBER})) {
return s; return s;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
@@ -733,171 +843,6 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf,
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
} }
inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self, char *buf,
char *bufEnd) {
self->dataBegin = buf;
switch (*buf) {
case '0':
++buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++buf;
self->pop();
if (auto s = self->push({N_DIGITS2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case '-':
++buf;
self->pop();
if (auto s = self->push({N_INTEGER2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
default:
[[unlikely]] return WeaselJson_REJECT;
}
}
inline PRESERVE_NONE WeaselJsonStatus n_integer2(Parser3 *self, char *buf,
char *bufEnd) {
switch (*buf) {
case '0':
++buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++buf;
self->pop();
if (auto s = self->push({N_DIGITS2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
default:
[[unlikely]] return WeaselJson_REJECT;
}
}
inline PRESERVE_NONE WeaselJsonStatus n_digits(Parser3 *self, char *buf,
char *bufEnd) {
switch (*buf) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++buf;
self->pop();
if (auto s = self->push({N_DIGITS2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
default:
[[unlikely]] return WeaselJson_REJECT;
}
}
inline PRESERVE_NONE WeaselJsonStatus n_digits2(Parser3 *self, char *buf,
char *bufEnd) {
if (bufEnd - buf == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
// Advance buf to the first non-decimal character
while (buf != bufEnd && '0' <= *buf && *buf <= '9') {
++buf;
}
if (buf == bufEnd) {
self->flushNumber(false, buf);
return WeaselJson_AGAIN;
}
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self, char *buf,
char *bufEnd) {
if (bufEnd - buf == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
switch (*buf) {
case '.':
++buf;
self->pop();
if (auto s = self->push({N_DIGITS})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
default:
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
}
inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self, char *buf,
char *bufEnd) {
if (bufEnd - buf == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
switch (*buf) {
case 'e':
case 'E':
++buf;
self->pop();
if (auto s = self->push({N_SIGN, N_DIGITS, T_END_NUMBER})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
default:
self->pop();
self->flushNumber(true, buf);
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
}
inline PRESERVE_NONE WeaselJsonStatus n_sign(Parser3 *self, char *buf,
char *bufEnd) {
if (bufEnd - buf == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
switch (*buf) {
case '+':
case '-':
++buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
default:
self->pop();
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
}
}
inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf, inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf,
char *bufEnd) { char *bufEnd) {
if (*buf == 'e') { if (*buf == 'e') {
@@ -987,13 +932,8 @@ constexpr inline struct ContinuationTable {
continuations[N_STRING] = n_string; continuations[N_STRING] = n_string;
continuations[N_STRING2] = n_string2; continuations[N_STRING2] = n_string2;
continuations[N_STRING_FOLLOWING_ESCAPE] = n_string_following_escape; continuations[N_STRING_FOLLOWING_ESCAPE] = n_string_following_escape;
continuations[N_INTEGER2] = n_integer2;
continuations[N_DIGITS] = n_digits;
continuations[N_DIGITS2] = n_digits2;
continuations[N_FRACTION] = n_fraction;
continuations[N_EXPONENT] = n_exponent;
continuations[N_SIGN] = n_sign;
continuations[N_WHITESPACE] = n_whitespace; continuations[N_WHITESPACE] = n_whitespace;
continuations[N_NUMBER] = n_number;
continuations[N_TRUE] = n_true; continuations[N_TRUE] = n_true;
continuations[N_FALSE] = n_false; continuations[N_FALSE] = n_false;
continuations[N_NULL] = n_null; continuations[N_NULL] = n_null;
@@ -1010,10 +950,7 @@ constexpr inline struct ContinuationTable {
continuations[T_HEX] = t_hex; continuations[T_HEX] = t_hex;
continuations[T_HEX2] = t_hex2; continuations[T_HEX2] = t_hex2;
continuations[T_HEX3] = t_hex3; continuations[T_HEX3] = t_hex3;
continuations[T_DIGIT] = t_digit;
continuations[T_ONENINE] = t_onenine;
continuations[T_EOF] = t_eof; continuations[T_EOF] = t_eof;
continuations[T_END_NUMBER] = t_end_number;
continuations[T_BACKSLASH] = singleChar<'\\'>; continuations[T_BACKSLASH] = singleChar<'\\'>;
symbolNames[N_VALUE] = "n_value"; symbolNames[N_VALUE] = "n_value";
@@ -1024,13 +961,8 @@ constexpr inline struct ContinuationTable {
symbolNames[N_STRING] = "n_string"; symbolNames[N_STRING] = "n_string";
symbolNames[N_STRING2] = "n_string2"; symbolNames[N_STRING2] = "n_string2";
symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape"; symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape";
symbolNames[N_INTEGER2] = "n_integer2";
symbolNames[N_DIGITS] = "n_digits";
symbolNames[N_DIGITS2] = "n_digits2";
symbolNames[N_FRACTION] = "n_fraction";
symbolNames[N_EXPONENT] = "n_exponent";
symbolNames[N_SIGN] = "n_sign";
symbolNames[N_WHITESPACE] = "n_whitespace"; symbolNames[N_WHITESPACE] = "n_whitespace";
symbolNames[N_NUMBER] = "n_number";
symbolNames[N_TRUE] = "n_true"; symbolNames[N_TRUE] = "n_true";
symbolNames[N_FALSE] = "n_false"; symbolNames[N_FALSE] = "n_false";
symbolNames[N_NULL] = "n_null"; symbolNames[N_NULL] = "n_null";
@@ -1045,20 +977,13 @@ constexpr inline struct ContinuationTable {
symbolNames[T_HEX] = "t_hex"; symbolNames[T_HEX] = "t_hex";
symbolNames[T_HEX2] = "t_hex2"; symbolNames[T_HEX2] = "t_hex2";
symbolNames[T_HEX3] = "t_hex3"; symbolNames[T_HEX3] = "t_hex3";
symbolNames[T_DIGIT] = "t_digit";
symbolNames[T_ONENINE] = "t_onenine";
symbolNames[T_EOF] = "t_eof"; symbolNames[T_EOF] = "t_eof";
symbolNames[T_BACKSLASH] = "singleChar<'\\'>"; symbolNames[T_BACKSLASH] = "singleChar<'\\'>";
symbolNames[T_END_NUMBER] = "t_end_number";
// All others can assume that there's at least one byte when they're called // All others can assume that there's at least one byte when they're called
acceptsEmptyString[N_DIGITS2] = true; acceptsEmptyString[N_NUMBER] = true;
acceptsEmptyString[N_FRACTION] = true;
acceptsEmptyString[N_EXPONENT] = true;
acceptsEmptyString[N_SIGN] = true;
acceptsEmptyString[N_WHITESPACE] = true; acceptsEmptyString[N_WHITESPACE] = true;
acceptsEmptyString[T_EOF] = true; acceptsEmptyString[T_EOF] = true;
acceptsEmptyString[T_END_NUMBER] = true;
} }
Continuation continuations[N_SYMBOL_COUNT]{}; Continuation continuations[N_SYMBOL_COUNT]{};
const char *symbolNames[N_SYMBOL_COUNT]{}; const char *symbolNames[N_SYMBOL_COUNT]{};
@@ -1071,17 +996,6 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self,
if (bufEnd - buf == 0) { if (bufEnd - buf == 0) {
if (!self->complete) { if (!self->complete) {
switch (self->top()) { switch (self->top()) {
case N_INTEGER2:
case N_DIGITS:
case N_DIGITS2:
case N_FRACTION:
case N_EXPONENT:
case N_SIGN:
case T_DIGIT:
case T_ONENINE:
case T_END_NUMBER:
self->flushNumber(false, buf);
break;
case N_STRING2: case N_STRING2:
case N_STRING_FOLLOWING_ESCAPE: case N_STRING_FOLLOWING_ESCAPE:
case T_UTF8_CONTINUATION_BYTE: case T_UTF8_CONTINUATION_BYTE:
@@ -1101,6 +1015,7 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self,
case N_ARRAY2: case N_ARRAY2:
case N_ARRAY3: case N_ARRAY3:
case N_WHITESPACE: case N_WHITESPACE:
case N_NUMBER:
case N_TRUE: case N_TRUE:
case N_FALSE: case N_FALSE:
case N_NULL: case N_NULL:

View File

@@ -12,6 +12,7 @@
#include <simdjson.h> #include <simdjson.h>
#include "callbacks.h" #include "callbacks.h"
#include "parser3.h"
#include "weaseljson.h" #include "weaseljson.h"
// This is the JSON grammar in McKeeman Form. // This is the JSON grammar in McKeeman Form.
@@ -306,3 +307,21 @@ TEST_CASE("bench5") {
bench.doNotOptimizeAway(doc); bench.doNotOptimizeAway(doc);
}); });
} }
TEST_CASE("num dfa") {
parser3::NumDfa dfa;
std::string match = "-1231279127389127389127398127389712893791287389217327482"
"374.0e69010101010101010101010101010101";
auto *buf = dfa.scan(match.data(), match.data() + match.size());
CHECK(buf == match.data() + match.size());
CHECK(dfa.accept());
ankerl::nanobench::Bench bench;
bench.batch(match.size());
bench.unit("byte");
bench.run("number dfa", [&]() {
dfa.reset();
bench.doNotOptimizeAway(
dfa.scan(match.data(), match.data() + match.size()));
});
}