From a9ebff72b059d5d981b34c87946afb9aa87c681c Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 15 May 2025 17:24:37 -0400 Subject: [PATCH] Close to streaming parser --- src/test.cpp | 179 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 131 insertions(+), 48 deletions(-) diff --git a/src/test.cpp b/src/test.cpp index edc4124..43ffa20 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -139,9 +139,19 @@ private: // Terminals and Nonterminals. These appear in the stack of the pushdown // automata enum Symbol : int8_t { - T_STRING, // Multibyte! T_COLON, + T_TRUE, + T_FALSE, + T_NULL, + T_R, + T_U, + T_A, + T_L, + T_S, + T_DUBQUOTE, // Nonterminals + N_STRING, // Not including leading double quote, but including trailing quote + N_NUMBER, N_VALUE, N_ARRAY_VALUE_OR_END, N_OBJECT_VALUE_OR_END, @@ -150,14 +160,24 @@ enum Symbol : int8_t { N_PAST_END, // Must be last nonterminal }; -const char *symbolNames[] = { - "T_STRING", - "T_COLON", - "N_VALUE", - "N_ARRAY_VALUE_OR_END", - "N_OBJECT_VALUE_OR_END", - "N_ARRAY_MAYBE_CONTINUE", - "N_OBJECT_MAYBE_CONTINUE", +static const char *symbolNames[N_PAST_END] = { + "COLON", + "TRUE", + "FALSE", + "NULL", + "R", + "U", + "A", + "L", + "S", + "DUBQUOTE", + "STRING", + "NUMBER", + "VALUE", + "ARRAY_VALUE_OR_END", + "OBJECT_VALUE_OR_END", + "ARRAY_MAYBE_CONTINUE", + "OBJECT_MAYBE_CONTINUE", }; constexpr static struct Tables { @@ -416,7 +436,9 @@ private: // [0-9.]+. Could be adapted to have a streaming interface. Uses O(1) memory. struct Parser2 { Parser2(const Callbacks *callbacks, void *data) - : callbacks(callbacks), data(data) {} + : callbacks(callbacks), data(data) { + std::ignore = push({N_VALUE}); + } void prime(char *buf, int len) { this->buf = buf; @@ -434,12 +456,7 @@ struct Parser2 { S_OVERFLOW, }; - [[nodiscard]] Status parse() { - if (Status s = push({N_VALUE})) { - return s; - } - return keepGoing(this); - } + [[nodiscard]] Status parse() { return keepGoing(this); } Parser2(Parser2 const &) = delete; Parser2 &operator=(Parser2 const &) = delete; @@ -485,9 +502,6 @@ private: return S_OK; } Status parse_string() { - if (Status s = parseLiteral("\"")) { - return s; - } callbacks->on_begin_string(data); auto *result = (char *)memchr(buf, '"', len()); if (result == nullptr) { @@ -518,6 +532,9 @@ private: return S_OK; } auto top = *(self->stackPtr - 1); + if (self->len() == 0) { + return S_AGAIN; + } self->maybeSkipWs(); MUSTTAIL return table[top](self); } @@ -529,48 +546,65 @@ private: self->pop(); MUSTTAIL return keepGoing(self); } - static Status colon(Parser2 *self) { - if (Status s = self->parseLiteral(":")) { + static Status number(Parser2 *self) { + if (Status s = self->parse_number()) { return s; } self->pop(); MUSTTAIL return keepGoing(self); } static Status value(Parser2 *self) { - if (self->parse_string() == S_OK) { + switch (*self->buf) { + case '{': + ++self->buf; self->pop(); - MUSTTAIL return keepGoing(self); - } else if (self->parse_number() == S_OK) { - self->pop(); - MUSTTAIL return keepGoing(self); - } else if (self->parseLiteral("{") == S_OK) { - self->pop(); - self->callbacks->on_begin_object(self->data); if (Status s = self->push({N_OBJECT_VALUE_OR_END})) { return s; } - MUSTTAIL return keepGoing(self); - } else if (self->parseLiteral("[") == S_OK) { + break; + case '[': + ++self->buf; self->pop(); - self->callbacks->on_begin_array(self->data); if (Status s = self->push({N_ARRAY_VALUE_OR_END})) { return s; } - MUSTTAIL return keepGoing(self); - } else if (self->parseLiteral("true") == S_OK) { + break; + case '"': + ++self->buf; self->pop(); - self->callbacks->on_true_literal(self->data); - MUSTTAIL return keepGoing(self); - } else if (self->parseLiteral("false") == S_OK) { + if (Status s = self->push({N_STRING})) { + return s; + } + break; + case 't': + ++self->buf; self->pop(); - self->callbacks->on_false_literal(self->data); - MUSTTAIL return keepGoing(self); - } else if (self->parseLiteral("null") == S_OK) { + if (Status s = self->push({T_R, T_U, T_TRUE})) { + return s; + } + break; + case 'f': + ++self->buf; self->pop(); - self->callbacks->on_null_literal(self->data); - MUSTTAIL return keepGoing(self); + if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) { + return s; + } + break; + case 'n': + ++self->buf; + self->pop(); + if (Status s = self->push({T_U, T_L, T_NULL})) { + return s; + } + break; + default: + self->pop(); + if (Status s = self->push({N_NUMBER})) { + return s; + } + break; } - return S_REJECT; + MUSTTAIL return keepGoing(self); } static Status arrayOrEnd(Parser2 *self) { if (self->parseLiteral("]") == S_OK) { @@ -590,10 +624,10 @@ private: self->pop(); self->callbacks->on_end_object(self->data); MUSTTAIL return keepGoing(self); - } else { + } else if (self->parseLiteral("\"") == S_OK) { self->pop(); if (Status s = self->push( - {T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { + {N_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { return s; } MUSTTAIL return keepGoing(self); @@ -617,8 +651,8 @@ private: static Status objectContinue(Parser2 *self) { if (self->parseLiteral(",") == S_OK) { self->pop(); - if (Status s = self->push( - {T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { + if (Status s = self->push({T_DUBQUOTE, N_STRING, T_COLON, N_VALUE, + N_OBJECT_MAYBE_CONTINUE})) { return s; } MUSTTAIL return keepGoing(self); @@ -629,10 +663,59 @@ private: } return S_REJECT; } + static Status colon(Parser2 *self) { + if (*self->buf++ == ':') { + self->pop(); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status finishTrue(Parser2 *self) { + if (*self->buf++ == 'e') { + self->pop(); + self->callbacks->on_true_literal(self->data); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status finishFalse(Parser2 *self) { + if (*self->buf++ == 'e') { + self->pop(); + self->callbacks->on_false_literal(self->data); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status finishNull(Parser2 *self) { + if (*self->buf++ == 'l') { + self->pop(); + self->callbacks->on_null_literal(self->data); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + template static Status singleChar(Parser2 *self) { + if (*self->buf == kChar) { + ++self->buf; + self->pop(); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } static constexpr continuation table[N_PAST_END] = { - /*T_STRING*/ string, /*T_COLON*/ colon, + /*T_TRUE*/ finishTrue, + /*T_FALSE*/ finishFalse, + /*T_NULL*/ finishNull, + /*T_R*/ singleChar<'r'>, + /*T_U*/ singleChar<'u'>, + /*T_A*/ singleChar<'a'>, + /*T_L*/ singleChar<'l'>, + /*T_S*/ singleChar<'s'>, + /*T_DUBQUOTE*/ singleChar<'"'>, + /*N_STRING*/ string, + /*N_NUMBER*/ number, /*N_VALUE*/ value, /*N_ARRAY_VALUE_OR_END*/ arrayOrEnd, /*N_OBJECT_VALUE_OR_END*/ objectOrEnd,