diff --git a/src/parser3.h b/src/parser3.h index 18ec8de..659b5c4 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -491,6 +491,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf, self->flushString(true); ++buf; self->pop(); + if (buf == bufEnd) { + return WeaselJson_AGAIN; + } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '\\': ++buf; @@ -498,6 +501,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf, if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) { return s; } + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else { if (*buf == 'u') { @@ -584,7 +591,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self, case 't': *self->writeBuf++ = tables.unescape[uint8_t(*buf++)]; self->pop(); - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); + break; case 'u': ++buf; self->utf8Codepoint = 0; @@ -592,10 +599,15 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self, if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); + break; default: [[unlikely]] return WeaselJson_REJECT; } + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf, @@ -611,6 +623,10 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf, self->utf8Codepoint |= hexVal; ++buf; self->pop(); + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } @@ -658,6 +674,10 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, self->push({T_BACKSLASH, T_U2, T_HEX, T_HEX, T_HEX, T_HEX3})) { return s; } + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } bool useTmp = buf - self->writeBuf < 3; @@ -678,6 +698,10 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, } self->pop(); + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } @@ -729,9 +753,33 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf, } self->pop(); + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } +template +inline PRESERVE_NONE WeaselJsonStatus singleCharInString(Parser3 *self, + char *buf, + char *bufEnd) { + if (buf == bufEnd) [[unlikely]] { + return WeaselJson_REJECT; + } + if (*buf == kChar) { + ++buf; + self->pop(); + if (buf == bufEnd) { + self->flushString(false); + return WeaselJson_AGAIN; + } + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); + } else [[unlikely]] { + return WeaselJson_REJECT; + } +} + inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf, char *bufEnd) { if (buf == bufEnd) [[unlikely]] { @@ -829,7 +877,7 @@ constexpr inline struct ContinuationTable { continuations[N_NULL] = n_null; continuations[T_R] = singleChar<'r'>; continuations[T_U] = singleChar<'u'>; - continuations[T_U2] = singleChar<'u'>; + continuations[T_U2] = singleCharInString<'u'>; continuations[T_A] = singleChar<'a'>; continuations[T_L] = singleChar<'l'>; continuations[T_S] = singleChar<'s'>; @@ -838,7 +886,7 @@ constexpr inline struct ContinuationTable { continuations[T_HEX2] = t_hex2; continuations[T_HEX3] = t_hex3; continuations[T_EOF] = t_eof; - continuations[T_BACKSLASH] = singleChar<'\\'>; + continuations[T_BACKSLASH] = singleCharInString<'\\'>; symbolNames[N_VALUE] = "n_value"; symbolNames[N_OBJECT2] = "n_object2"; @@ -875,40 +923,6 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self, char *bufEnd) { if (bufEnd - buf == 0) { if (!self->complete) { - switch (self->top()) { - case N_STRING2: - case N_STRING_FOLLOWING_ESCAPE: - case T_HEX: - case T_HEX2: - case T_HEX3: - case T_BACKSLASH: - case T_U2: - self->flushString(false); - break; - case N_STRING: // The beginning of the string is in the future in this - // state. There's no data to flush yet - case N_VALUE: - case N_OBJECT2: - case N_OBJECT3: - case N_ARRAY2: - case N_ARRAY3: - case N_WHITESPACE: - case N_NUMBER: - case N_TRUE: - case N_FALSE: - case N_NULL: - case T_R: - case T_U: - case T_A: - case T_L: - case T_S: - case T_COLON: - case T_EOF: - case N_SYMBOL_COUNT: - break; - default: - __builtin_unreachable(); - } return WeaselJson_AGAIN; } }