From 976f64b7d3f5333478b8ae78312b8259f0f09360 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 19 Jun 2025 16:13:46 -0400 Subject: [PATCH] Try out dfa for numbers --- src/parser3.h | 365 +++++++++++++++++++------------------------------- src/test.cpp | 19 +++ 2 files changed, 159 insertions(+), 225 deletions(-) diff --git a/src/parser3.h b/src/parser3.h index 7114794..8b19107 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -16,6 +16,122 @@ namespace parser3 { +class NumDfa { + constexpr static uint64_t num_dfa_table[256] = { + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6d80db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6d86d80db6db6ull, 0xd8cdb6336db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb0a9edaa4927aaull, 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, + 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, + 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, 0xdb0ab0daa492c2aull, + 0xdb0ab0daa492c2aull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xd98db6636636db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xd98db6636636db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, 0xdb6db6db6db6db6ull, + 0xdb6db6db6db6db6ull, + }; + uint8_t state = 36; + +public: + // Restore this dfa to its start state + void reset() { state = 36; } + // Return true if this dfa is in an accept state. You probably want to call + // scan until the match ends first. + bool accept() const { + return (state & 63) == 18 || (state & 63) == 42 || (state & 63) == 48 || + (state & 63) == 30; + } + // return value either points to the first byte which does not match, or + // bufEnd. Leaves the dfa in the last state of the match. + const char *scan(const char *buf, const char *bufEnd) { + while (buf != bufEnd) { + uint64_t row = num_dfa_table[uint8_t(*buf)]; + auto next = row >> (state & 63); + if ((next & 63) == 54) { + break; + } + state = next; + ++buf; + } + return buf; + } +}; + typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *, char *buf, char *bufEnd); @@ -30,13 +146,8 @@ enum Symbol : uint8_t { N_STRING, N_STRING2, N_STRING_FOLLOWING_ESCAPE, - N_INTEGER2, - N_DIGITS, - N_DIGITS2, - N_FRACTION, - N_EXPONENT, - N_SIGN, N_WHITESPACE, + N_NUMBER, N_TRUE, N_FALSE, N_NULL, @@ -53,10 +164,7 @@ enum Symbol : uint8_t { T_HEX, T_HEX2, T_HEX3, - T_DIGIT, - T_ONENINE, T_EOF, - T_END_NUMBER, T_BACKSLASH, N_SYMBOL_COUNT, // Must be last }; @@ -131,6 +239,7 @@ struct Parser3 { uint32_t minCodepoint; int const stackSize; bool complete; + NumDfa numDfa; }; inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf, @@ -149,6 +258,21 @@ inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf, MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } +inline PRESERVE_NONE WeaselJsonStatus n_number(Parser3 *self, char *buf, + char *bufEnd) { + buf = (char *)self->numDfa.scan(buf, bufEnd); + if (buf == bufEnd && !self->complete) { + self->flushNumber(false, buf); + return WeaselJson_AGAIN; + } + if (!self->numDfa.accept()) [[unlikely]] { + return WeaselJson_REJECT; + } + self->flushNumber(true, buf); + self->pop(); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); +} + inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf, char *bufEnd) { assert(bufEnd - buf != 0); @@ -184,13 +308,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf, } break; case '0': - self->pop(); - if (auto s = self->push({N_FRACTION, N_EXPONENT})) { - return s; - } - self->dataBegin = buf; - ++buf; - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '1': case '2': case '3': @@ -200,18 +317,11 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf, case '7': case '8': case '9': - self->pop(); - self->dataBegin = buf; - ++buf; - if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '-': - self->pop(); self->dataBegin = buf; - ++buf; - if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) { + self->pop(); + self->numDfa.reset(); + if (auto s = self->push({N_NUMBER})) { return s; } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); @@ -733,171 +843,6 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf, MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self, char *buf, - char *bufEnd) { - self->dataBegin = buf; - switch (*buf) { - case '0': - ++buf; - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++buf; - self->pop(); - if (auto s = self->push({N_DIGITS2})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - case '-': - ++buf; - self->pop(); - if (auto s = self->push({N_INTEGER2})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - default: - [[unlikely]] return WeaselJson_REJECT; - } -} - -inline PRESERVE_NONE WeaselJsonStatus n_integer2(Parser3 *self, char *buf, - char *bufEnd) { - switch (*buf) { - case '0': - ++buf; - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++buf; - self->pop(); - if (auto s = self->push({N_DIGITS2})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - default: - [[unlikely]] return WeaselJson_REJECT; - } -} - -inline PRESERVE_NONE WeaselJsonStatus n_digits(Parser3 *self, char *buf, - char *bufEnd) { - switch (*buf) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ++buf; - self->pop(); - if (auto s = self->push({N_DIGITS2})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - default: - [[unlikely]] return WeaselJson_REJECT; - } -} - -inline PRESERVE_NONE WeaselJsonStatus n_digits2(Parser3 *self, char *buf, - char *bufEnd) { - if (bufEnd - buf == 0) { - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } - - // Advance buf to the first non-decimal character - while (buf != bufEnd && '0' <= *buf && *buf <= '9') { - ++buf; - } - if (buf == bufEnd) { - self->flushNumber(false, buf); - return WeaselJson_AGAIN; - } - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); -} - -inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self, char *buf, - char *bufEnd) { - if (bufEnd - buf == 0) { - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } - switch (*buf) { - case '.': - ++buf; - self->pop(); - if (auto s = self->push({N_DIGITS})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - default: - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } -} - -inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self, char *buf, - char *bufEnd) { - if (bufEnd - buf == 0) { - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } - switch (*buf) { - case 'e': - case 'E': - ++buf; - self->pop(); - if (auto s = self->push({N_SIGN, N_DIGITS, T_END_NUMBER})) { - return s; - } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - default: - self->pop(); - self->flushNumber(true, buf); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } -} - -inline PRESERVE_NONE WeaselJsonStatus n_sign(Parser3 *self, char *buf, - char *bufEnd) { - if (bufEnd - buf == 0) { - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } - switch (*buf) { - case '+': - case '-': - ++buf; - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - default: - self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); - } -} - inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf, char *bufEnd) { if (*buf == 'e') { @@ -987,13 +932,8 @@ constexpr inline struct ContinuationTable { continuations[N_STRING] = n_string; continuations[N_STRING2] = n_string2; continuations[N_STRING_FOLLOWING_ESCAPE] = n_string_following_escape; - continuations[N_INTEGER2] = n_integer2; - continuations[N_DIGITS] = n_digits; - continuations[N_DIGITS2] = n_digits2; - continuations[N_FRACTION] = n_fraction; - continuations[N_EXPONENT] = n_exponent; - continuations[N_SIGN] = n_sign; continuations[N_WHITESPACE] = n_whitespace; + continuations[N_NUMBER] = n_number; continuations[N_TRUE] = n_true; continuations[N_FALSE] = n_false; continuations[N_NULL] = n_null; @@ -1010,10 +950,7 @@ constexpr inline struct ContinuationTable { continuations[T_HEX] = t_hex; continuations[T_HEX2] = t_hex2; continuations[T_HEX3] = t_hex3; - continuations[T_DIGIT] = t_digit; - continuations[T_ONENINE] = t_onenine; continuations[T_EOF] = t_eof; - continuations[T_END_NUMBER] = t_end_number; continuations[T_BACKSLASH] = singleChar<'\\'>; symbolNames[N_VALUE] = "n_value"; @@ -1024,13 +961,8 @@ constexpr inline struct ContinuationTable { symbolNames[N_STRING] = "n_string"; symbolNames[N_STRING2] = "n_string2"; symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape"; - symbolNames[N_INTEGER2] = "n_integer2"; - symbolNames[N_DIGITS] = "n_digits"; - symbolNames[N_DIGITS2] = "n_digits2"; - symbolNames[N_FRACTION] = "n_fraction"; - symbolNames[N_EXPONENT] = "n_exponent"; - symbolNames[N_SIGN] = "n_sign"; symbolNames[N_WHITESPACE] = "n_whitespace"; + symbolNames[N_NUMBER] = "n_number"; symbolNames[N_TRUE] = "n_true"; symbolNames[N_FALSE] = "n_false"; symbolNames[N_NULL] = "n_null"; @@ -1045,20 +977,13 @@ constexpr inline struct ContinuationTable { symbolNames[T_HEX] = "t_hex"; symbolNames[T_HEX2] = "t_hex2"; symbolNames[T_HEX3] = "t_hex3"; - symbolNames[T_DIGIT] = "t_digit"; - symbolNames[T_ONENINE] = "t_onenine"; symbolNames[T_EOF] = "t_eof"; symbolNames[T_BACKSLASH] = "singleChar<'\\'>"; - symbolNames[T_END_NUMBER] = "t_end_number"; // All others can assume that there's at least one byte when they're called - acceptsEmptyString[N_DIGITS2] = true; - acceptsEmptyString[N_FRACTION] = true; - acceptsEmptyString[N_EXPONENT] = true; - acceptsEmptyString[N_SIGN] = true; + acceptsEmptyString[N_NUMBER] = true; acceptsEmptyString[N_WHITESPACE] = true; acceptsEmptyString[T_EOF] = true; - acceptsEmptyString[T_END_NUMBER] = true; } Continuation continuations[N_SYMBOL_COUNT]{}; const char *symbolNames[N_SYMBOL_COUNT]{}; @@ -1071,17 +996,6 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self, if (bufEnd - buf == 0) { if (!self->complete) { switch (self->top()) { - case N_INTEGER2: - case N_DIGITS: - case N_DIGITS2: - case N_FRACTION: - case N_EXPONENT: - case N_SIGN: - case T_DIGIT: - case T_ONENINE: - case T_END_NUMBER: - self->flushNumber(false, buf); - break; case N_STRING2: case N_STRING_FOLLOWING_ESCAPE: case T_UTF8_CONTINUATION_BYTE: @@ -1101,6 +1015,7 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self, case N_ARRAY2: case N_ARRAY3: case N_WHITESPACE: + case N_NUMBER: case N_TRUE: case N_FALSE: case N_NULL: diff --git a/src/test.cpp b/src/test.cpp index 30550cc..d24f1fa 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -12,6 +12,7 @@ #include #include "callbacks.h" +#include "parser3.h" #include "weaseljson.h" // This is the JSON grammar in McKeeman Form. @@ -306,3 +307,21 @@ TEST_CASE("bench5") { bench.doNotOptimizeAway(doc); }); } + +TEST_CASE("num dfa") { + parser3::NumDfa dfa; + std::string match = "-1231279127389127389127398127389712893791287389217327482" + "374.0e69010101010101010101010101010101"; + auto *buf = dfa.scan(match.data(), match.data() + match.size()); + CHECK(buf == match.data() + match.size()); + CHECK(dfa.accept()); + + ankerl::nanobench::Bench bench; + bench.batch(match.size()); + bench.unit("byte"); + bench.run("number dfa", [&]() { + dfa.reset(); + bench.doNotOptimizeAway( + dfa.scan(match.data(), match.data() + match.size())); + }); +}