diff --git a/src/parser3.h b/src/parser3.h index 7410392..53bfb69 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -16,7 +16,8 @@ namespace parser3 { -typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *); +typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *, + char *buf, char *bufEnd); // These appear in the stack of the pushdown // automata @@ -67,12 +68,11 @@ struct Parser3 { [[nodiscard]] WeaselJsonStatus parse(char *buf, int len) { complete = len == 0; - this->buf = this->dataBegin = this->writeBuf = buf; - this->bufEnd = buf + len; - return keepGoing(this); + this->dataBegin = this->writeBuf = buf; + return keepGoing(this, buf, buf + len); } - void flushNumber(bool done) { + void flushNumber(bool done, char *buf) { int len = buf - dataBegin; assert(len >= 0); if (done || len > 0) { @@ -103,19 +103,13 @@ struct Parser3 { } return WeaselJson_OK; } - [[nodiscard]] int len() const { - auto result = bufEnd - buf; - assert(result >= 0); - return result; - } [[nodiscard]] Symbol top() const { assert(!empty()); return *(stackPtr - 1); } - static PRESERVE_NONE WeaselJsonStatus keepGoing(Parser3 *self); - - [[maybe_unused]] void debugPrint() const; + static PRESERVE_NONE WeaselJsonStatus keepGoing(Parser3 *self, char *buf, + char *bufEnd); Symbol *stack() const { return (Symbol *)(this + 1); } @@ -125,10 +119,6 @@ struct Parser3 { std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF}); } - // Pointer to the next byte in the input to consume - char *buf; - // Pointer past the end of the last byte available to consume - char *bufEnd; // Used for flushing pending data with on_*_data callbacks char *dataBegin; // Used for unescaping string data in place @@ -143,33 +133,35 @@ struct Parser3 { int stackSize; }; -inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self) { - if (self->len() == 0) { +inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf, + char *bufEnd) { + if (bufEnd - buf == 0) { self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { +inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf, + char *bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } - switch (*self->buf) { + switch (*buf) { case '{': self->callbacks->on_begin_object(self->userdata); - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_OBJECT2})) { return s; @@ -177,15 +169,15 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { break; case '[': self->callbacks->on_begin_array(self->userdata); - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_ARRAY2})) { return s; } break; case '"': - ++self->buf; - self->dataBegin = self->writeBuf = self->buf; + ++buf; + self->dataBegin = self->writeBuf = buf; self->pop(); if (auto s = self->push({N_STRING2})) { return s; @@ -196,9 +188,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { if (auto s = self->push({N_FRACTION, N_EXPONENT})) { return s; } - self->dataBegin = self->buf; - ++self->buf; - MUSTTAIL return Parser3::keepGoing(self); + self->dataBegin = buf; + ++buf; + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '1': case '2': case '3': @@ -209,27 +201,27 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { case '8': case '9': self->pop(); - self->dataBegin = self->buf; - ++self->buf; + self->dataBegin = buf; + ++buf; if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '-': self->pop(); - self->dataBegin = self->buf; - ++self->buf; + self->dataBegin = buf; + ++buf; if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case 't': - ++self->buf; + ++buf; self->pop(); - if (self->len() >= 3) { - if (memcmp(self->buf, "rue", 3) == 0) { + if (bufEnd - buf >= 3) { + if (memcmp(buf, "rue", 3) == 0) { self->callbacks->on_true_literal(self->userdata); - self->buf += 3; + buf += 3; } else [[unlikely]] { return WeaselJson_REJECT; } @@ -240,12 +232,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { } break; case 'f': - ++self->buf; + ++buf; self->pop(); - if (self->len() >= 4) { - if (memcmp(self->buf, "alse", 4) == 0) { + if (bufEnd - buf >= 4) { + if (memcmp(buf, "alse", 4) == 0) { self->callbacks->on_false_literal(self->userdata); - self->buf += 4; + buf += 4; } else [[unlikely]] { return WeaselJson_REJECT; } @@ -256,12 +248,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { } break; case 'n': - ++self->buf; + ++buf; self->pop(); - if (self->len() >= 3) { - if (memcmp(self->buf, "ull", 3) == 0) { + if (bufEnd - buf >= 3) { + if (memcmp(buf, "ull", 3) == 0) { self->callbacks->on_null_literal(self->userdata); - self->buf += 3; + buf += 3; } else [[unlikely]] { return WeaselJson_REJECT; } @@ -274,211 +266,217 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { default: [[unlikely]] return WeaselJson_REJECT; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { +inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf, + char *bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } - switch (*self->buf) { + switch (*buf) { case '}': - ++self->buf; + ++buf; self->pop(); self->callbacks->on_end_object(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '"': - ++self->buf; - self->dataBegin = self->writeBuf = self->buf; + ++buf; + self->dataBegin = self->writeBuf = buf; self->pop(); if (auto s = self->push({N_STRING2, T_COLON, N_VALUE, N_OBJECT3})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { +inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf, + char *bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } - switch (*self->buf) { + switch (*buf) { case '}': - ++self->buf; + ++buf; self->pop(); self->callbacks->on_end_object(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case ',': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_STRING, T_COLON, N_VALUE, N_OBJECT3})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { +inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self, char *buf, + char *bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } - switch (*self->buf) { + switch (*buf) { case ']': - ++self->buf; + ++buf; self->pop(); self->callbacks->on_end_array(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: self->pop(); if (auto s = self->push({N_VALUE, N_ARRAY3})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } } -inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { +inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf, + char *bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } - switch (*self->buf) { + switch (*buf) { case ']': - ++self->buf; + ++buf; self->pop(); self->callbacks->on_end_array(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case ',': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_VALUE, N_ARRAY3})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { +inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf, + char *bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } - if (*self->buf != '"') [[unlikely]] { + if (*buf != '"') [[unlikely]] { return WeaselJson_REJECT; } - ++self->buf; - self->dataBegin = self->writeBuf = self->buf; + ++buf; + self->dataBegin = self->writeBuf = buf; self->pop(); if (auto s = self->push({N_STRING2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) { - const auto before = self->buf; +inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf, + char *bufEnd) { + const auto before = buf; - // Advance self->buf to the first "non-normal" character + // Advance buf to the first "non-normal" character for (;;) { constexpr int kStride = 64; - if (self->bufEnd - self->buf < kStride) [[unlikely]] { - while (self->buf != self->bufEnd && - tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) { - ++self->buf; + if (bufEnd - buf < kStride) [[unlikely]] { + while (buf != bufEnd && + tables.stringByteMeaning[uint8_t(*buf)] == Tables::NORMAL) { + ++buf; } break; } using V = simd; - auto v = V{(int8_t *)self->buf}; + auto v = V{(int8_t *)buf}; int normal = (v != V::splat('"') & v != V::splat('\\') & v >= V::splat(0x20)) .count_leading_nonzero_lanes(); - self->buf += normal; + buf += normal; if (normal < kStride) { break; } } - int len = self->buf - before; + int len = buf - before; memmove(self->writeBuf, before, len); self->writeBuf += len; - if (self->buf == self->bufEnd) { + if (buf == bufEnd) { self->flushString(false); return WeaselJson_AGAIN; } - switch (tables.stringByteMeaning[uint8_t(*self->buf)]) { + switch (tables.stringByteMeaning[uint8_t(*buf)]) { case Tables::NORMAL: __builtin_unreachable(); case Tables::DUBQUOTE: self->flushString(true); - ++self->buf; + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case Tables::BACKSLASH: - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case Tables::TWO_BYTE_UTF8: // two byte utf-8 encoding - self->utf8Codepoint = *self->buf & 0b00011111; + self->utf8Codepoint = *buf & 0b00011111; self->minCodepoint = 0x80; - *self->writeBuf++ = *self->buf++; + *self->writeBuf++ = *buf++; self->pop(); if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case Tables::THREE_BYTE_UTF8: // three byte utf-8 encoding - self->utf8Codepoint = *self->buf & 0b00001111; + self->utf8Codepoint = *buf & 0b00001111; self->minCodepoint = 0x800; - *self->writeBuf++ = *self->buf++; + *self->writeBuf++ = *buf++; self->pop(); if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case Tables::FOUR_BYTE_UTF8: // four byte utf-8 encoding - self->utf8Codepoint = *self->buf & 0b00000111; + self->utf8Codepoint = *buf & 0b00000111; self->minCodepoint = 0x10000; - *self->writeBuf++ = *self->buf++; + *self->writeBuf++ = *buf++; self->pop(); if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case Tables::CONTINUATION_BYTE: case Tables::INVALID: [[unlikely]] return WeaselJson_REJECT; @@ -487,8 +485,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) { } } -inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self) { - switch (*self->buf) { +inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self, + char *buf, + char *bufEnd) { + switch (*buf) { case '"': case '\\': case '/': @@ -497,45 +497,47 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self) { case 'n': case 'r': case 't': - *self->writeBuf++ = tables.unescape[uint8_t(*self->buf++)]; + *self->writeBuf++ = tables.unescape[uint8_t(*buf++)]; self->pop(); if (auto s = self->push({N_STRING2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case 'u': - ++self->buf; + ++buf; self->utf8Codepoint = 0; self->pop(); if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2, N_STRING2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus t_utf8_continuation_byte(Parser3 *self) { - if (tables.stringByteMeaning[uint8_t(*self->buf)] != - Tables::CONTINUATION_BYTE) [[unlikely]] { +inline PRESERVE_NONE WeaselJsonStatus t_utf8_continuation_byte(Parser3 *self, + char *buf, + char *bufEnd) { + if (tables.stringByteMeaning[uint8_t(*buf)] != Tables::CONTINUATION_BYTE) + [[unlikely]] { return WeaselJson_REJECT; } self->utf8Codepoint <<= 6; - self->utf8Codepoint |= *self->buf & 0b00111111; - *self->writeBuf++ = *self->buf++; + self->utf8Codepoint |= *buf & 0b00111111; + *self->writeBuf++ = *buf++; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } inline PRESERVE_NONE WeaselJsonStatus -t_utf8_last_continuation_byte(Parser3 *self) { - if (tables.stringByteMeaning[uint8_t(*self->buf)] != - Tables::CONTINUATION_BYTE) [[unlikely]] { +t_utf8_last_continuation_byte(Parser3 *self, char *buf, char *bufEnd) { + if (tables.stringByteMeaning[uint8_t(*buf)] != Tables::CONTINUATION_BYTE) + [[unlikely]] { return WeaselJson_REJECT; } self->utf8Codepoint <<= 6; - self->utf8Codepoint |= *self->buf & 0b00111111; + self->utf8Codepoint |= *buf & 0b00111111; if (self->utf8Codepoint < self->minCodepoint || self->utf8Codepoint > 0x10ffff || (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) @@ -543,68 +545,72 @@ t_utf8_last_continuation_byte(Parser3 *self) { return WeaselJson_REJECT; } // TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized - *self->writeBuf++ = *self->buf++; + *self->writeBuf++ = *buf++; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus t_digit(Parser3 *self) { - if ('0' <= *self->buf && *self->buf <= '9') { - ++self->buf; +inline PRESERVE_NONE WeaselJsonStatus t_digit(Parser3 *self, char *buf, + char *bufEnd) { + if ('0' <= *buf && *buf <= '9') { + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else [[unlikely]] { return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus t_onenine(Parser3 *self) { - if ('1' <= *self->buf && *self->buf <= '9') { - ++self->buf; +inline PRESERVE_NONE WeaselJsonStatus t_onenine(Parser3 *self, char *buf, + char *bufEnd) { + if ('1' <= *buf && *buf <= '9') { + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else [[unlikely]] { return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self) { +inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf, + char *bufEnd) { self->utf8Codepoint <<= 4; - if (('0' <= *self->buf && *self->buf <= '9')) { - self->utf8Codepoint |= *self->buf - '0'; - } else if ('a' <= *self->buf && *self->buf <= 'f') { - self->utf8Codepoint |= 10 + *self->buf - 'a'; - } else if ('A' <= *self->buf && *self->buf <= 'F') { - self->utf8Codepoint |= 10 + *self->buf - 'A'; + if (('0' <= *buf && *buf <= '9')) { + self->utf8Codepoint |= *buf - '0'; + } else if ('a' <= *buf && *buf <= 'f') { + self->utf8Codepoint |= 10 + *buf - 'a'; + } else if ('A' <= *buf && *buf <= 'F') { + self->utf8Codepoint |= 10 + *buf - 'A'; } else [[unlikely]] { return WeaselJson_REJECT; } - ++self->buf; + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { +inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, + char *bufEnd) { self->utf8Codepoint <<= 4; - if (('0' <= *self->buf && *self->buf <= '9')) { - self->utf8Codepoint |= *self->buf - '0'; - } else if ('a' <= *self->buf && *self->buf <= 'f') { - self->utf8Codepoint |= 10 + *self->buf - 'a'; - } else if ('A' <= *self->buf && *self->buf <= 'F') { - self->utf8Codepoint |= 10 + *self->buf - 'A'; + if (('0' <= *buf && *buf <= '9')) { + self->utf8Codepoint |= *buf - '0'; + } else if ('a' <= *buf && *buf <= 'f') { + self->utf8Codepoint |= 10 + *buf - 'a'; + } else if ('A' <= *buf && *buf <= 'F') { + self->utf8Codepoint |= 10 + *buf - 'A'; } else [[unlikely]] { return WeaselJson_REJECT; } - ++self->buf; + ++buf; // Write codepoint in utf-8 if there's room in the user provided buffer. If // there's not room, flush, write into a temp buffer, and flush again. char tmp[3]; if (self->utf8Codepoint < 0x80) { - assert(self->buf - self->writeBuf >= 1); + assert(buf - self->writeBuf >= 1); *self->writeBuf++ = self->utf8Codepoint; } else if (self->utf8Codepoint < 0x800) { - bool useTmp = self->buf - self->writeBuf < 2; + bool useTmp = buf - self->writeBuf < 2; char *p = tmp; if (useTmp) [[unlikely]] { self->flushString(false); @@ -628,9 +634,9 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { self->push({T_BACKSLASH, T_U2, T_HEX, T_HEX, T_HEX, T_HEX3})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } - bool useTmp = self->buf - self->writeBuf < 3; + bool useTmp = buf - self->writeBuf < 3; char *p = tmp; if (useTmp) [[unlikely]] { self->flushString(false); @@ -648,21 +654,22 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { } self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) { +inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf, + char *bufEnd) { self->utf8Codepoint <<= 4; - if (('0' <= *self->buf && *self->buf <= '9')) { - self->utf8Codepoint |= *self->buf - '0'; - } else if ('a' <= *self->buf && *self->buf <= 'f') { - self->utf8Codepoint |= 10 + *self->buf - 'a'; - } else if ('A' <= *self->buf && *self->buf <= 'F') { - self->utf8Codepoint |= 10 + *self->buf - 'A'; + if (('0' <= *buf && *buf <= '9')) { + self->utf8Codepoint |= *buf - '0'; + } else if ('a' <= *buf && *buf <= 'f') { + self->utf8Codepoint |= 10 + *buf - 'a'; + } else if ('A' <= *buf && *buf <= 'F') { + self->utf8Codepoint |= 10 + *buf - 'A'; } else [[unlikely]] { return WeaselJson_REJECT; } - ++self->buf; + ++buf; if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) [[unlikely]] { @@ -680,7 +687,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) { if (self->utf8Codepoint > 0x10FFFF) [[unlikely]] { return WeaselJson_REJECT; } - bool useTmp = self->buf - self->writeBuf < 4; + bool useTmp = buf - self->writeBuf < 4; char *p = tmp; if (useTmp) [[unlikely]] { self->flushString(false); @@ -699,16 +706,17 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) { } self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self) { - self->dataBegin = self->buf; - switch (*self->buf) { +inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self, char *buf, + char *bufEnd) { + self->dataBegin = buf; + switch (*buf) { case '0': - ++self->buf; + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '1': case '2': case '3': @@ -718,30 +726,31 @@ inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self) { case '7': case '8': case '9': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_DIGITS2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '-': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_INTEGER2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_integer2(Parser3 *self) { - switch (*self->buf) { +inline PRESERVE_NONE WeaselJsonStatus n_integer2(Parser3 *self, char *buf, + char *bufEnd) { + switch (*buf) { case '0': - ++self->buf; + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '1': case '2': case '3': @@ -751,19 +760,20 @@ inline PRESERVE_NONE WeaselJsonStatus n_integer2(Parser3 *self) { case '7': case '8': case '9': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_DIGITS2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_digits(Parser3 *self) { - switch (*self->buf) { +inline PRESERVE_NONE WeaselJsonStatus n_digits(Parser3 *self, char *buf, + char *bufEnd) { + switch (*buf) { case '0': case '1': case '2': @@ -774,163 +784,173 @@ inline PRESERVE_NONE WeaselJsonStatus n_digits(Parser3 *self) { case '7': case '8': case '9': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_DIGITS2})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: [[unlikely]] return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_digits2(Parser3 *self) { - if (self->len() == 0) { +inline PRESERVE_NONE WeaselJsonStatus n_digits2(Parser3 *self, char *buf, + char *bufEnd) { + if (bufEnd - buf == 0) { self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } - // Advance self->buf to the first non-decimal character - while (self->buf != self->bufEnd && '0' <= *self->buf && *self->buf <= '9') { - ++self->buf; + // Advance buf to the first non-decimal character + while (buf != bufEnd && '0' <= *buf && *buf <= '9') { + ++buf; } - if (self->buf == self->bufEnd) { - self->flushNumber(false); + if (buf == bufEnd) { + self->flushNumber(false, buf); return WeaselJson_AGAIN; } self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } -inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self) { - if (self->len() == 0) { +inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self, char *buf, + char *bufEnd) { + if (bufEnd - buf == 0) { self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } - switch (*self->buf) { + switch (*buf) { case '.': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_DIGITS})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } } -inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) { - if (self->len() == 0) { +inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self, char *buf, + char *bufEnd) { + if (bufEnd - buf == 0) { self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } - switch (*self->buf) { + switch (*buf) { case 'e': case 'E': - ++self->buf; + ++buf; self->pop(); if (auto s = self->push({N_SIGN, N_DIGITS, T_END_NUMBER})) { return s; } - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: self->pop(); - self->flushNumber(true); - MUSTTAIL return Parser3::keepGoing(self); + self->flushNumber(true, buf); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } } -inline PRESERVE_NONE WeaselJsonStatus n_sign(Parser3 *self) { - if (self->len() == 0) { +inline PRESERVE_NONE WeaselJsonStatus n_sign(Parser3 *self, char *buf, + char *bufEnd) { + if (bufEnd - buf == 0) { self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } - switch (*self->buf) { + switch (*buf) { case '+': case '-': - ++self->buf; + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); default: self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } } -inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self) { - if (*self->buf == 'e') { - ++self->buf; +inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf, + char *bufEnd) { + if (*buf == 'e') { + ++buf; self->pop(); self->callbacks->on_true_literal(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else [[unlikely]] { return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_false(Parser3 *self) { - if (*self->buf == 'e') { - ++self->buf; +inline PRESERVE_NONE WeaselJsonStatus n_false(Parser3 *self, char *buf, + char *bufEnd) { + if (*buf == 'e') { + ++buf; self->pop(); self->callbacks->on_false_literal(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else [[unlikely]] { return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus n_null(Parser3 *self) { - if (*self->buf == 'l') { - ++self->buf; +inline PRESERVE_NONE WeaselJsonStatus n_null(Parser3 *self, char *buf, + char *bufEnd) { + if (*buf == 'l') { + ++buf; self->pop(); self->callbacks->on_null_literal(self->userdata); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else [[unlikely]] { return WeaselJson_REJECT; } } template -inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self) { +inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf, + char *bufEnd) { if constexpr (kSkipWhitespace) { - assert(self->len() != 0); - while (tables.whitespace[uint8_t(*self->buf)]) { - ++self->buf; - if (self->buf == self->bufEnd) { + assert(bufEnd - buf != 0); + while (tables.whitespace[uint8_t(*buf)]) { + ++buf; + if (buf == bufEnd) { return WeaselJson_AGAIN; } } } - if (*self->buf == kChar) { - ++self->buf; + if (*buf == kChar) { + ++buf; self->pop(); - MUSTTAIL return Parser3::keepGoing(self); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } else [[unlikely]] { return WeaselJson_REJECT; } } -inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self) { - if (self->len() > 0) [[unlikely]] { +inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self, char *buf, + char *bufEnd) { + if (bufEnd - buf > 0) [[unlikely]] { return WeaselJson_REJECT; } return self->complete ? WeaselJson_OK : WeaselJson_AGAIN; } -inline PRESERVE_NONE WeaselJsonStatus t_end_number(Parser3 *self) { +inline PRESERVE_NONE WeaselJsonStatus t_end_number(Parser3 *self, char *buf, + char *bufEnd) { self->pop(); - self->flushNumber(true); - MUSTTAIL return Parser3::keepGoing(self); + self->flushNumber(true, buf); + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); } constexpr inline struct ContinuationTable { constexpr ContinuationTable() { // Defaults for (int i = 0; i < N_SYMBOL_COUNT; ++i) { - continuations[i] = +[](struct Parser3 *) PRESERVE_NONE { + continuations[i] = +[](struct Parser3 *, char *, char *) PRESERVE_NONE { printf("unimplemented\n"); return WeaselJson_REJECT; }; @@ -1021,9 +1041,10 @@ constexpr inline struct ContinuationTable { bool acceptsEmptyString[N_SYMBOL_COUNT]{}; } symbolTables; -inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) { - // self->debugPrint(); - if (self->len() == 0) { +inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self, + char *buf, + char *bufEnd) { + if (bufEnd - buf == 0) { if (!self->complete) { switch (self->top()) { case N_INTEGER2: @@ -1035,7 +1056,7 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) { case T_DIGIT: case T_ONENINE: case T_END_NUMBER: - self->flushNumber(false); + self->flushNumber(false, buf); break; case N_STRING2: case N_STRING_FOLLOWING_ESCAPE: @@ -1077,22 +1098,7 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) { return WeaselJson_REJECT; } } - MUSTTAIL return symbolTables.continuations[self->top()](self); -} - -inline void Parser3::debugPrint() const { - for (int i = 0; i < stackPtr - stack(); ++i) { - printf("%s ", symbolTables.symbolNames[stack()[i]]); - } - printf("\n"); - for (int i = 0; i < len(); ++i) { - if (isprint(buf[i])) { - printf("%c", buf[i]); - } else { - printf("\\x%02x", uint8_t(buf[i])); - } - } - printf("\n"); + MUSTTAIL return symbolTables.continuations[self->top()](self, buf, bufEnd); } } // namespace parser3