From 297d6b48f71069a2f4eaeda3c385974dfc66e46a Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 22 May 2025 11:38:42 -0400 Subject: [PATCH] Fuse whitespace into object2, object3, array2, array3, colon --- src/parser3.h | 57 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/src/parser3.h b/src/parser3.h index 1685c88..e1cbe5a 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -230,13 +230,20 @@ inline WeaselJsonStatus n_object(Parser3 *self) { self->callbacks->on_begin_object(self->data); ++self->buf; self->pop(); - if (auto s = self->push({N_WHITESPACE, N_OBJECT2})) { + if (auto s = self->push({N_OBJECT2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus n_object2(Parser3 *self) { + assert(self->len() != 0); + while (tables.whitespace[uint8_t(*self->buf)]) { + ++self->buf; + if (self->buf == self->bufEnd) { + return WeaselJson_AGAIN; + } + } switch (*self->buf) { case '}': ++self->buf; @@ -245,8 +252,7 @@ inline WeaselJsonStatus n_object2(Parser3 *self) { MUSTTAIL return Parser3::keepGoing(self); case '"': self->pop(); - if (auto s = self->push({N_STRING, N_WHITESPACE, T_COLON, N_VALUE, - N_WHITESPACE, N_OBJECT3})) { + if (auto s = self->push({N_STRING, T_COLON, N_VALUE, N_OBJECT3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); @@ -256,6 +262,13 @@ inline WeaselJsonStatus n_object2(Parser3 *self) { } inline WeaselJsonStatus n_object3(Parser3 *self) { + assert(self->len() != 0); + while (tables.whitespace[uint8_t(*self->buf)]) { + ++self->buf; + if (self->buf == self->bufEnd) { + return WeaselJson_AGAIN; + } + } switch (*self->buf) { case '}': ++self->buf; @@ -265,8 +278,8 @@ inline WeaselJsonStatus n_object3(Parser3 *self) { case ',': ++self->buf; self->pop(); - if (auto s = self->push({N_WHITESPACE, N_STRING, N_WHITESPACE, T_COLON, - N_VALUE, N_WHITESPACE, N_OBJECT3})) { + if (auto s = + self->push({N_WHITESPACE, N_STRING, T_COLON, N_VALUE, N_OBJECT3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); @@ -282,13 +295,20 @@ inline WeaselJsonStatus n_array(Parser3 *self) { self->callbacks->on_begin_array(self->data); ++self->buf; self->pop(); - if (auto s = self->push({N_WHITESPACE, N_ARRAY2})) { + if (auto s = self->push({N_ARRAY2})) { return s; } MUSTTAIL return Parser3::keepGoing(self); } inline WeaselJsonStatus n_array2(Parser3 *self) { + assert(self->len() != 0); + while (tables.whitespace[uint8_t(*self->buf)]) { + ++self->buf; + if (self->buf == self->bufEnd) { + return WeaselJson_AGAIN; + } + } switch (*self->buf) { case ']': ++self->buf; @@ -297,7 +317,7 @@ inline WeaselJsonStatus n_array2(Parser3 *self) { MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); - if (auto s = self->push({N_VALUE, N_WHITESPACE, N_ARRAY3})) { + if (auto s = self->push({N_VALUE, N_ARRAY3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); @@ -305,6 +325,13 @@ inline WeaselJsonStatus n_array2(Parser3 *self) { } inline WeaselJsonStatus n_array3(Parser3 *self) { + assert(self->len() != 0); + while (tables.whitespace[uint8_t(*self->buf)]) { + ++self->buf; + if (self->buf == self->bufEnd) { + return WeaselJson_AGAIN; + } + } switch (*self->buf) { case ']': ++self->buf; @@ -314,7 +341,7 @@ inline WeaselJsonStatus n_array3(Parser3 *self) { case ',': ++self->buf; self->pop(); - if (auto s = self->push({N_VALUE, N_WHITESPACE, N_ARRAY3})) { + if (auto s = self->push({N_VALUE, N_ARRAY3})) { return s; } MUSTTAIL return Parser3::keepGoing(self); @@ -814,7 +841,17 @@ inline WeaselJsonStatus n_null(Parser3 *self) { return WeaselJson_REJECT; } -template inline WeaselJsonStatus singleChar(Parser3 *self) { +template +inline WeaselJsonStatus singleChar(Parser3 *self) { + if constexpr (kSkipWhitespace) { + assert(self->len() != 0); + while (tables.whitespace[uint8_t(*self->buf)]) { + ++self->buf; + if (self->buf == self->bufEnd) { + return WeaselJson_AGAIN; + } + } + } if (*self->buf == kChar) { ++self->buf; self->pop(); @@ -873,7 +910,7 @@ constexpr inline struct ContinuationTable { continuations[T_A] = singleChar<'a'>; continuations[T_L] = singleChar<'l'>; continuations[T_S] = singleChar<'s'>; - continuations[T_COLON] = singleChar<':'>; + continuations[T_COLON] = singleChar<':', true>; continuations[T_UTF8_CONTINUATION_BYTE] = t_utf8_continuation_byte; continuations[T_UTF8_LAST_CONTINUATION_BYTE] = t_utf8_last_continuation_byte;