diff --git a/src/parser3.h b/src/parser3.h index a7a9b55..3d92ec0 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -79,7 +79,7 @@ struct Parser3 { [[nodiscard]] Status parse(char *buf, int len) { complete = len == 0; - this->buf = this->dataBegin = buf; + this->buf = this->dataBegin = this->writeBuf = buf; this->bufEnd = buf + len; return keepGoing(this); } @@ -92,7 +92,7 @@ struct Parser3 { } void flushString() { - int len = buf - dataBegin; + int len = writeBuf - dataBegin; if (len > 0) { callbacks->on_string_data(data, dataBegin, len); } @@ -127,10 +127,14 @@ struct Parser3 { constexpr static int kMaxStackSize = 1024; [[maybe_unused]] void debugPrint(); + // Pointer to the next byte in the input to consume char *buf = nullptr; + // Pointer past the end of the last byte available to consume char *bufEnd = nullptr; // Used for flushing pending data with on_*_data callbacks char *dataBegin; + // Used for unescaping string data in place + char *writeBuf; const Callbacks *const callbacks; void *const data; Symbol stack[kMaxStackSize]; @@ -368,9 +372,9 @@ inline Status n_string2(Parser3 *self) { // one byte utf-8 encoding switch (*self->buf) { case '"': - self->flushString(); ++self->buf; self->pop(); + self->flushString(); self->callbacks->on_end_string(self->data); MUSTTAIL return Parser3::keepGoing(self); case '\\': @@ -389,6 +393,7 @@ inline Status n_string2(Parser3 *self) { self->utf8Codepoint = *self->buf & 0b00011111; self->minCodepoint = 0x80; ++self->buf; + ++self->writeBuf; self->pop(); if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { return s; @@ -400,6 +405,7 @@ inline Status n_string2(Parser3 *self) { self->utf8Codepoint = *self->buf & 0b00001111; self->minCodepoint = 0x800; ++self->buf; + ++self->writeBuf; self->pop(); if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { @@ -411,6 +417,7 @@ inline Status n_string2(Parser3 *self) { self->utf8Codepoint = *self->buf & 0b00000111; self->minCodepoint = 0x10000; ++self->buf; + ++self->writeBuf; self->pop(); if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE, T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) { @@ -434,7 +441,7 @@ inline Status n_string_following_escape(Parser3 *self) { case 'n': case 'r': case 't': - ++self->buf; + *self->writeBuf++ = tables.unescape[*self->buf++]; self->pop(); if (auto s = self->push({N_STRING2})) { return s; @@ -442,6 +449,7 @@ inline Status n_string_following_escape(Parser3 *self) { MUSTTAIL return Parser3::keepGoing(self); case 'u': ++self->buf; + // TODO unescape self->pop(); if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX, N_STRING2})) { return s; @@ -463,6 +471,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) { self->utf8Codepoint <<= 6; self->utf8Codepoint |= *self->buf & 0b00111111; ++self->buf; + ++self->writeBuf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } @@ -486,6 +495,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) { } // TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized ++self->buf; + ++self->writeBuf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); } @@ -524,6 +534,7 @@ inline Status t_hex(Parser3 *self) { ('a' <= *self->buf && *self->buf <= 'f') || ('A' <= *self->buf && *self->buf <= 'F')) { ++self->buf; + ++self->writeBuf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); }