#pragma once #include #include #include #include #include #include #include #include #include #include "musttail.h" #include "tables.h" #include "weaseljson.h" namespace parser3 { typedef WeaselJsonStatus (*Continuation)(struct Parser3 *); // These appear in the stack of the pushdown // automata enum Symbol : uint8_t { N_VALUE, N_OBJECT2, N_OBJECT3, N_ARRAY2, N_ARRAY3, N_STRING, N_STRING2, N_STRING_FOLLOWING_ESCAPE, N_INTEGER2, N_DIGITS, N_DIGITS2, N_FRACTION, N_EXPONENT, N_SIGN, N_WHITESPACE, N_TRUE, N_FALSE, N_NULL, T_R, T_U, // u inside of a string T_U2, T_A, T_L, T_S, T_COLON, T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, T_HEX, T_HEX2, T_HEX3, T_DIGIT, T_ONENINE, T_EOF, T_END_NUMBER, T_BACKSLASH, N_SYMBOL_COUNT, // Must be last }; struct Parser3 { Parser3(const WeaselJsonCallbacks *callbacks, void *data) : callbacks(callbacks), data(data) { std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF}); } [[nodiscard]] WeaselJsonStatus parse(char *buf, int len) { complete = len == 0; this->buf = this->dataBegin = this->writeBuf = buf; this->bufEnd = buf + len; return keepGoing(this); } void flushNumber() { int len = buf - dataBegin; if (len > 0) { callbacks->on_number_data(data, dataBegin, len); } } void flushString() { int len = writeBuf - dataBegin; if (len > 0) { callbacks->on_string_data(data, dataBegin, len); } dataBegin = writeBuf; } [[nodiscard]] bool empty() const { return stackPtr == stack; } void pop() { assert(!empty()); --stackPtr; } [[nodiscard]] WeaselJsonStatus push(std::initializer_list symbols) { if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] { return WeaselJson_OVERFLOW; } for (int i = symbols.size() - 1; i >= 0; --i) { *stackPtr++ = *(symbols.begin() + i); } return WeaselJson_OK; } [[nodiscard]] int len() const { auto result = bufEnd - buf; assert(result >= 0); return result; } Symbol top() const { assert(!empty()); return *(stackPtr - 1); } static WeaselJsonStatus keepGoing(Parser3 *self); constexpr static int kMaxStackSize = 1024; [[maybe_unused]] void debugPrint(); // Pointer to the next byte in the input to consume char *buf = nullptr; // Pointer past the end of the last byte available to consume char *bufEnd = nullptr; // Used for flushing pending data with on_*_data callbacks char *dataBegin; // Used for unescaping string data in place char *writeBuf; const WeaselJsonCallbacks *const callbacks; void *const data; Symbol stack[kMaxStackSize]; Symbol *stackPtr = stack; bool complete = false; uint32_t utf8Codepoint; uint32_t utf16Surrogate; uint32_t minCodepoint; }; inline WeaselJsonStatus n_whitespace(Parser3 *self) { if (self->len() == 0) { self->pop(); MUSTTAIL return Parser3::keepGoing(self); } while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } self->pop(); MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus n_value(Parser3 *self) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } switch (*self->buf) { case '{': self->callbacks->on_begin_object(self->data); ++self->buf; self->pop(); if (auto s = self->push({N_OBJECT2})) { return s; } break; case '[': self->callbacks->on_begin_array(self->data); ++self->buf; self->pop(); if (auto s = self->push({N_ARRAY2})) { return s; } break; case '"': self->callbacks->on_begin_string(self->data); ++self->buf; self->dataBegin = self->writeBuf = self->buf; self->pop(); if (auto s = self->push({N_STRING2})) { return s; } break; case '0': self->pop(); if (auto s = self->push({N_FRACTION, N_EXPONENT})) { return s; } self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; ++self->buf; MUSTTAIL return Parser3::keepGoing(self); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': self->pop(); self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; ++self->buf; if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case '-': self->pop(); self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; ++self->buf; if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case 't': ++self->buf; self->pop(); if (self->len() >= 3) { if (memcmp(self->buf, "rue", 3) == 0) { self->callbacks->on_true_literal(self->data); self->buf += 3; } else [[unlikely]] { return WeaselJson_REJECT; } } else { if (auto s = self->push({T_R, T_U, N_TRUE})) { return s; } } break; case 'f': ++self->buf; self->pop(); if (self->len() >= 4) { if (memcmp(self->buf, "alse", 4) == 0) { self->callbacks->on_false_literal(self->data); self->buf += 4; } else [[unlikely]] { return WeaselJson_REJECT; } } else { if (auto s = self->push({T_A, T_L, T_S, N_FALSE})) { return s; } } break; case 'n': ++self->buf; self->pop(); if (self->len() >= 3) { if (memcmp(self->buf, "ull", 3) == 0) { self->callbacks->on_null_literal(self->data); self->buf += 3; } else [[unlikely]] { return WeaselJson_REJECT; } } else { if (auto s = self->push({T_U, T_L, N_NULL})) { return s; } } break; default: [[unlikely]] return WeaselJson_REJECT; } MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus n_object2(Parser3 *self) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } switch (*self->buf) { case '}': ++self->buf; self->pop(); self->callbacks->on_end_object(self->data); MUSTTAIL return Parser3::keepGoing(self); case '"': self->callbacks->on_begin_string(self->data); ++self->buf; self->dataBegin = self->writeBuf = self->buf; self->pop(); if (auto s = self->push({N_STRING2, T_COLON, N_VALUE, N_OBJECT3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_object3(Parser3 *self) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } switch (*self->buf) { case '}': ++self->buf; self->pop(); self->callbacks->on_end_object(self->data); MUSTTAIL return Parser3::keepGoing(self); case ',': ++self->buf; self->pop(); if (auto s = self->push({N_STRING, T_COLON, N_VALUE, N_OBJECT3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_array2(Parser3 *self) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } switch (*self->buf) { case ']': ++self->buf; self->pop(); self->callbacks->on_end_array(self->data); MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); if (auto s = self->push({N_VALUE, N_ARRAY3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); } } inline WeaselJsonStatus n_array3(Parser3 *self) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } switch (*self->buf) { case ']': ++self->buf; self->pop(); self->callbacks->on_end_array(self->data); MUSTTAIL return Parser3::keepGoing(self); case ',': ++self->buf; self->pop(); if (auto s = self->push({N_VALUE, N_ARRAY3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_string(Parser3 *self) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } if (*self->buf != '"') [[unlikely]] { return WeaselJson_REJECT; } self->callbacks->on_begin_string(self->data); ++self->buf; self->dataBegin = self->writeBuf = self->buf; self->pop(); if (auto s = self->push({N_STRING2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus n_string2(Parser3 *self) { auto commit = [self, before = self->buf]() { int len = self->buf - before; if (self->writeBuf != before) { memmove(self->writeBuf, before, len); } self->writeBuf += len; }; begin: auto meaning = tables.stringByteMeaning[uint8_t(*self->buf)]; if (meaning == Tables::NORMAL) { ++self->buf; if (self->buf == self->bufEnd) { commit(); MUSTTAIL return Parser3::keepGoing(self); } goto begin; } commit(); switch (meaning) { case Tables::NORMAL: __builtin_unreachable(); case Tables::DUBQUOTE: self->flushString(); self->callbacks->on_end_string(self->data); ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); case Tables::BACKSLASH: ++self->buf; self->pop(); if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case Tables::TWO_BYTE_UTF8: // two byte utf-8 encoding self->utf8Codepoint = *self->buf & 0b00011111; self->minCodepoint = 0x80; *self->writeBuf++ = *self->buf++; self->pop(); if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case Tables::THREE_BYTE_UTF8: // three byte utf-8 encoding self->utf8Codepoint = *self->buf & 0b00001111; self->minCodepoint = 0x800; *self->writeBuf++ = *self->buf++; self->pop(); if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case Tables::FOUR_BYTE_UTF8: // four byte utf-8 encoding self->utf8Codepoint = *self->buf & 0b00000111; self->minCodepoint = 0x10000; *self->writeBuf++ = *self->buf++; self->pop(); if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case Tables::CONTINUATION_BYTE: case Tables::INVALID: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_string_following_escape(Parser3 *self) { switch (*self->buf) { case '"': case '\\': case '/': case 'b': case 'f': case 'n': case 'r': case 't': *self->writeBuf++ = tables.unescape[*self->buf++]; self->pop(); if (auto s = self->push({N_STRING2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case 'u': ++self->buf; self->utf8Codepoint = 0; self->pop(); if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2, N_STRING2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus t_utf8_continuation_byte(Parser3 *self) { if (tables.stringByteMeaning[uint8_t(*self->buf)] != Tables::CONTINUATION_BYTE) [[unlikely]] { return WeaselJson_REJECT; } self->utf8Codepoint <<= 6; self->utf8Codepoint |= *self->buf & 0b00111111; *self->writeBuf++ = *self->buf++; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus t_utf8_last_continuation_byte(Parser3 *self) { if (tables.stringByteMeaning[uint8_t(*self->buf)] != Tables::CONTINUATION_BYTE) [[unlikely]] { return WeaselJson_REJECT; } self->utf8Codepoint <<= 6; self->utf8Codepoint |= *self->buf & 0b00111111; if (self->utf8Codepoint < self->minCodepoint || self->utf8Codepoint > 0x10ffff || (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) [[unlikely]] { return WeaselJson_REJECT; } // TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized *self->writeBuf++ = *self->buf++; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus t_digit(Parser3 *self) { if ('0' <= *self->buf && *self->buf <= '9') { ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } else [[unlikely]] { return WeaselJson_REJECT; } } inline WeaselJsonStatus t_onenine(Parser3 *self) { if ('1' <= *self->buf && *self->buf <= '9') { ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } else [[unlikely]] { return WeaselJson_REJECT; } } inline WeaselJsonStatus t_hex(Parser3 *self) { self->utf8Codepoint <<= 4; if (('0' <= *self->buf && *self->buf <= '9')) { self->utf8Codepoint |= *self->buf - '0'; } else if ('a' <= *self->buf && *self->buf <= 'f') { self->utf8Codepoint |= 10 + *self->buf - 'a'; } else if ('A' <= *self->buf && *self->buf <= 'F') { self->utf8Codepoint |= 10 + *self->buf - 'A'; } else [[unlikely]] { return WeaselJson_REJECT; } ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus t_hex2(Parser3 *self) { self->utf8Codepoint <<= 4; if (('0' <= *self->buf && *self->buf <= '9')) { self->utf8Codepoint |= *self->buf - '0'; } else if ('a' <= *self->buf && *self->buf <= 'f') { self->utf8Codepoint |= 10 + *self->buf - 'a'; } else if ('A' <= *self->buf && *self->buf <= 'F') { self->utf8Codepoint |= 10 + *self->buf - 'A'; } else [[unlikely]] { return WeaselJson_REJECT; } ++self->buf; // Write codepoint in utf-8 if there's room in the user provided buffer. If // there's not room, flush, write into a temp buffer, and flush again. char tmp[3]; if (self->utf8Codepoint < 0x80) { assert(self->buf - self->writeBuf >= 1); *self->writeBuf++ = self->utf8Codepoint; } else if (self->utf8Codepoint < 0x800) { bool useTmp = self->buf - self->writeBuf < 2; char *p = tmp; if (useTmp) { self->flushString(); } auto &w = useTmp ? p : self->writeBuf; w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6; w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000; w += 2; if (useTmp) { self->callbacks->on_string_data(self->data, tmp, 2); } } else { assert(self->utf8Codepoint < 0x10000); if (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff) { // utf-16 surrogate self->utf16Surrogate = self->utf8Codepoint; self->utf8Codepoint = 0; self->pop(); if (auto s = self->push({T_BACKSLASH, T_U2, T_HEX, T_HEX, T_HEX, T_HEX3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); } bool useTmp = self->buf - self->writeBuf < 3; char *p = tmp; if (useTmp) { self->flushString(); } auto &w = useTmp ? p : self->writeBuf; w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6; w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6; w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000; w += 3; if (useTmp) { self->callbacks->on_string_data(self->data, tmp, 3); } } self->pop(); MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus t_hex3(Parser3 *self) { self->utf8Codepoint <<= 4; if (('0' <= *self->buf && *self->buf <= '9')) { self->utf8Codepoint |= *self->buf - '0'; } else if ('a' <= *self->buf && *self->buf <= 'f') { self->utf8Codepoint |= 10 + *self->buf - 'a'; } else if ('A' <= *self->buf && *self->buf <= 'F') { self->utf8Codepoint |= 10 + *self->buf - 'A'; } else [[unlikely]] { return WeaselJson_REJECT; } ++self->buf; if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) [[unlikely]] { return WeaselJson_REJECT; } // Decode utf16 surrogate pair self->utf8Codepoint = 0x10000 + (self->utf16Surrogate - 0xd800) * 0x400 + (self->utf8Codepoint - 0xdc00); // Write codepoint in utf-8 if there's room in the user provided buffer. If // there's not room, flush, write into a temp buffer, and flush again. char tmp[4]; assert(self->utf8Codepoint >= 0x10000); if (self->utf8Codepoint > 0x10FFFF) [[unlikely]] { return WeaselJson_REJECT; } bool useTmp = self->buf - self->writeBuf < 4; char *p = tmp; if (useTmp) { self->flushString(); } auto &w = useTmp ? p : self->writeBuf; w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6; w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6; w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6; w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000; w += 4; if (useTmp) { self->callbacks->on_string_data(self->data, tmp, 4); } self->pop(); MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus n_integer(Parser3 *self) { self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; switch (*self->buf) { case '0': ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ++self->buf; self->pop(); if (auto s = self->push({N_DIGITS2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); case '-': ++self->buf; self->pop(); if (auto s = self->push({N_INTEGER2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_integer2(Parser3 *self) { switch (*self->buf) { case '0': ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ++self->buf; self->pop(); if (auto s = self->push({N_DIGITS2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_digits(Parser3 *self) { switch (*self->buf) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ++self->buf; self->pop(); if (auto s = self->push({N_DIGITS2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: [[unlikely]] return WeaselJson_REJECT; } } inline WeaselJsonStatus n_digits2(Parser3 *self) { if (self->len() == 0) { self->pop(); MUSTTAIL return Parser3::keepGoing(self); } switch (*self->buf) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': ++self->buf; MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); MUSTTAIL return Parser3::keepGoing(self); } } inline WeaselJsonStatus n_fraction(Parser3 *self) { if (self->len() == 0) { self->pop(); MUSTTAIL return Parser3::keepGoing(self); } switch (*self->buf) { case '.': ++self->buf; self->pop(); if (auto s = self->push({N_DIGITS})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); MUSTTAIL return Parser3::keepGoing(self); } } // Responsible for ensuring that on_end_number gets called inline WeaselJsonStatus n_exponent(Parser3 *self) { if (self->len() == 0) { self->pop(); MUSTTAIL return Parser3::keepGoing(self); } switch (*self->buf) { case 'e': case 'E': ++self->buf; self->pop(); if (auto s = self->push({N_SIGN, N_DIGITS, T_END_NUMBER})) { return s; } MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); self->flushNumber(); self->callbacks->on_end_number(self->data); MUSTTAIL return Parser3::keepGoing(self); } } inline WeaselJsonStatus n_sign(Parser3 *self) { if (self->len() == 0) { self->pop(); MUSTTAIL return Parser3::keepGoing(self); } switch (*self->buf) { case '+': case '-': ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); MUSTTAIL return Parser3::keepGoing(self); } } inline WeaselJsonStatus n_true(Parser3 *self) { if (*self->buf == 'e') { ++self->buf; self->pop(); self->callbacks->on_true_literal(self->data); MUSTTAIL return Parser3::keepGoing(self); } else [[unlikely]] { return WeaselJson_REJECT; } } inline WeaselJsonStatus n_false(Parser3 *self) { if (*self->buf == 'e') { ++self->buf; self->pop(); self->callbacks->on_false_literal(self->data); MUSTTAIL return Parser3::keepGoing(self); } else [[unlikely]] { return WeaselJson_REJECT; } } inline WeaselJsonStatus n_null(Parser3 *self) { if (*self->buf == 'l') { ++self->buf; self->pop(); self->callbacks->on_null_literal(self->data); MUSTTAIL return Parser3::keepGoing(self); } else [[unlikely]] { return WeaselJson_REJECT; } } template inline WeaselJsonStatus singleChar(Parser3 *self) { if constexpr (kSkipWhitespace) { assert(self->len() != 0); while (tables.whitespace[uint8_t(*self->buf)]) { ++self->buf; if (self->buf == self->bufEnd) { return WeaselJson_AGAIN; } } } if (*self->buf == kChar) { ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } else [[unlikely]] { return WeaselJson_REJECT; } } inline WeaselJsonStatus t_eof(Parser3 *self) { if (self->len() > 0) [[unlikely]] { return WeaselJson_REJECT; } return self->complete ? WeaselJson_OK : WeaselJson_AGAIN; } inline WeaselJsonStatus t_end_number(Parser3 *self) { self->pop(); self->flushNumber(); self->callbacks->on_end_number(self->data); MUSTTAIL return Parser3::keepGoing(self); } constexpr inline struct ContinuationTable { constexpr ContinuationTable() { // Defaults for (int i = 0; i < N_SYMBOL_COUNT; ++i) { continuations[i] = +[](struct Parser3 *) { printf("unimplemented\n"); return WeaselJson_REJECT; }; } continuations[N_VALUE] = n_value; continuations[N_OBJECT2] = n_object2; continuations[N_OBJECT3] = n_object3; continuations[N_ARRAY2] = n_array2; continuations[N_ARRAY3] = n_array3; continuations[N_STRING] = n_string; continuations[N_STRING2] = n_string2; continuations[N_STRING_FOLLOWING_ESCAPE] = n_string_following_escape; continuations[N_INTEGER2] = n_integer2; continuations[N_DIGITS] = n_digits; continuations[N_DIGITS2] = n_digits2; continuations[N_FRACTION] = n_fraction; continuations[N_EXPONENT] = n_exponent; continuations[N_SIGN] = n_sign; continuations[N_WHITESPACE] = n_whitespace; continuations[N_TRUE] = n_true; continuations[N_FALSE] = n_false; continuations[N_NULL] = n_null; continuations[T_R] = singleChar<'r'>; continuations[T_U] = singleChar<'u'>; continuations[T_U2] = singleChar<'u'>; continuations[T_A] = singleChar<'a'>; continuations[T_L] = singleChar<'l'>; continuations[T_S] = singleChar<'s'>; continuations[T_COLON] = singleChar<':', true>; continuations[T_UTF8_CONTINUATION_BYTE] = t_utf8_continuation_byte; continuations[T_UTF8_LAST_CONTINUATION_BYTE] = t_utf8_last_continuation_byte; continuations[T_HEX] = t_hex; continuations[T_HEX2] = t_hex2; continuations[T_HEX3] = t_hex3; continuations[T_DIGIT] = t_digit; continuations[T_ONENINE] = t_onenine; continuations[T_EOF] = t_eof; continuations[T_END_NUMBER] = t_end_number; continuations[T_BACKSLASH] = singleChar<'\\'>; symbolNames[N_VALUE] = "n_value"; symbolNames[N_OBJECT2] = "n_object2"; symbolNames[N_OBJECT3] = "n_object3"; symbolNames[N_ARRAY2] = "n_array2"; symbolNames[N_ARRAY3] = "n_array3"; symbolNames[N_STRING] = "n_string"; symbolNames[N_STRING2] = "n_string2"; symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape"; symbolNames[N_INTEGER2] = "n_integer2"; symbolNames[N_DIGITS] = "n_digits"; symbolNames[N_DIGITS2] = "n_digits2"; symbolNames[N_FRACTION] = "n_fraction"; symbolNames[N_EXPONENT] = "n_exponent"; symbolNames[N_SIGN] = "n_sign"; symbolNames[N_WHITESPACE] = "n_whitespace"; symbolNames[N_TRUE] = "n_true"; symbolNames[N_FALSE] = "n_false"; symbolNames[N_NULL] = "n_null"; symbolNames[T_R] = "singleChar<'r'>"; symbolNames[T_U] = "singleChar<'u'>"; symbolNames[T_U2] = "singleChar<'u'> (in string)"; symbolNames[T_A] = "singleChar<'a'>"; symbolNames[T_L] = "singleChar<'l'>"; symbolNames[T_S] = "singleChar<'s'>"; symbolNames[T_COLON] = "singleChar<':'>"; symbolNames[T_UTF8_CONTINUATION_BYTE] = "t_utf8_continuation_byte"; symbolNames[T_HEX] = "t_hex"; symbolNames[T_HEX2] = "t_hex2"; symbolNames[T_HEX3] = "t_hex3"; symbolNames[T_DIGIT] = "t_digit"; symbolNames[T_ONENINE] = "t_onenine"; symbolNames[T_EOF] = "t_eof"; symbolNames[T_BACKSLASH] = "singleChar<'\\'>"; symbolNames[T_END_NUMBER] = "t_end_number"; // All others can assume that there's at least one byte when they're called acceptsEmptyString[N_DIGITS2] = true; acceptsEmptyString[N_FRACTION] = true; acceptsEmptyString[N_EXPONENT] = true; acceptsEmptyString[N_SIGN] = true; acceptsEmptyString[N_WHITESPACE] = true; acceptsEmptyString[T_EOF] = true; acceptsEmptyString[T_END_NUMBER] = true; } Continuation continuations[N_SYMBOL_COUNT]{}; const char *symbolNames[N_SYMBOL_COUNT]{}; bool acceptsEmptyString[N_SYMBOL_COUNT]{}; } symbolTables; inline WeaselJsonStatus Parser3::keepGoing(Parser3 *self) { // self->debugPrint(); if (self->len() == 0) { if (!self->complete) { switch (self->top()) { case N_INTEGER2: case N_DIGITS: case N_DIGITS2: case N_FRACTION: case N_EXPONENT: case N_SIGN: case T_DIGIT: case T_ONENINE: case T_END_NUMBER: self->flushNumber(); break; case N_STRING: case N_STRING2: case N_STRING_FOLLOWING_ESCAPE: case T_UTF8_CONTINUATION_BYTE: case T_UTF8_LAST_CONTINUATION_BYTE: case T_HEX: case T_HEX2: case T_HEX3: case T_BACKSLASH: case T_U2: self->flushString(); break; case N_VALUE: case N_OBJECT2: case N_OBJECT3: case N_ARRAY2: case N_ARRAY3: case N_WHITESPACE: case N_TRUE: case N_FALSE: case N_NULL: case T_R: case T_U: case T_A: case T_L: case T_S: case T_COLON: case T_EOF: case N_SYMBOL_COUNT: break; } return WeaselJson_AGAIN; } if (!symbolTables.acceptsEmptyString[self->top()]) [[unlikely]] { return WeaselJson_REJECT; } } MUSTTAIL return symbolTables.continuations[self->top()](self); } inline void Parser3::debugPrint() { for (int i = 0; i < stackPtr - stack; ++i) { printf("%s ", symbolTables.symbolNames[stack[i]]); } printf("\n"); for (int i = 0; i < len(); ++i) { if (isprint(buf[i])) { printf("%c", buf[i]); } else { printf("\\x%02x", uint8_t(buf[i])); } } printf("\n"); } } // namespace parser3