From c822d0ffaa244f08f0df8d6d17fe3c0c7e3a1844 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 13 May 2025 17:49:12 -0400 Subject: [PATCH] Simplify table Now we only have symbols that actually go on the stack --- src/test.cpp | 382 +++++++++++---------------------------------------- 1 file changed, 81 insertions(+), 301 deletions(-) diff --git a/src/test.cpp b/src/test.cpp index 45f9a7c..542d60d 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -138,35 +138,21 @@ private: // Terminals and Nonterminals. These appear in the stack of the pushdown // automata enum Symbol : int8_t { - // Terminals - T_INVALID, - T_EOF, - T_LBRACE, - T_RBRACE, - T_COMMA, - T_ATOM, // Multibyte! T_STRING, // Multibyte! - T_LBRACKET, - T_RBRACKET, T_COLON, - T_PAST_END, // Must be last terminal // Nonterminals - N_VALUE = T_PAST_END, + N_VALUE, N_ARRAY_MAYBE_CONTINUE, - N_OBJECT, N_OBJECT_MAYBE_CONTINUE, N_PAST_END, // Must be last nonterminal }; const char *symbolNames[] = { - "T_INVALID", "T_EOF", - "T_LBRACE", "T_RBRACE", - "T_COMMA", "T_ATOM", - "T_STRING", "T_LBRACKET", - "T_RBRACKET", "T_COLON", - "N_VALUE", "N_ARRAY_MAYBE_CONTINUE", - "N_OBJECT", "N_OBJECT_MAYBE_CONTINUE", - "N_PAST_END", + "T_STRING", + "T_COLON", + "N_VALUE", + "N_ARRAY_MAYBE_CONTINUE", + "N_OBJECT_MAYBE_CONTINUE", }; namespace { @@ -503,316 +489,110 @@ private: typedef bool (*continuation)(Parser2 *); - [[maybe_unused]] void debugPrint(Symbol token) { - printf("token: %s\n", symbolNames[token]); + [[maybe_unused]] void debugPrint() { for (int i = 0; i < stackPtr - stack; ++i) { printf("%s ", symbolNames[stack[i]]); } printf("\n"); } - static bool tokenMatch(Parser2 *self) { - self->pop(); - MUSTTAIL return keepGoing(self); - } - static bool keepGoing(Parser2 *self) { + // self->debugPrint(); if (self->empty()) { return true; } auto top = *(self->stackPtr - 1); - auto token = self->nextToken(top); - // self->debugPrint(token); - MUSTTAIL return table[top][token](self); + self->maybeSkipWs(); + MUSTTAIL return table[top](self); } - static bool reject(Parser2 *) { return false; } - static bool object(Parser2 *self) { - self->pop(); - self->callbacks->on_begin_object(self->data); - if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { + static bool string(Parser2 *self) { + if (!self->parse_string()) { return false; } + self->pop(); MUSTTAIL return keepGoing(self); } - static bool array(Parser2 *self) { - self->pop(); - self->callbacks->on_begin_array(self->data); - if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) { + static bool colon(Parser2 *self) { + if (!self->parseLiteral(":")) { return false; } + self->pop(); MUSTTAIL return keepGoing(self); } - static bool continueArray(Parser2 *self) { - self->pop(); - if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) { - return false; + static bool value(Parser2 *self) { + if (self->parse_string()) { + self->pop(); + MUSTTAIL return keepGoing(self); + } else if (self->parse_number()) { + self->pop(); + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("{")) { + self->pop(); + self->callbacks->on_begin_object(self->data); + if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { + return false; + } + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("[")) { + self->pop(); + self->callbacks->on_begin_array(self->data); + if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) { + return false; + } + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("true")) { + self->pop(); + self->callbacks->on_true_literal(self->data); + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("false")) { + self->pop(); + self->callbacks->on_false_literal(self->data); + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("null")) { + self->pop(); + self->callbacks->on_null_literal(self->data); + MUSTTAIL return keepGoing(self); } - MUSTTAIL return keepGoing(self); + return false; } - static bool continueObject(Parser2 *self) { - self->pop(); - if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { - return false; + static bool arrayContinue(Parser2 *self) { + if (self->parseLiteral(",")) { + self->pop(); + if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) { + return false; + } + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("]")) { + self->pop(); + self->callbacks->on_end_array(self->data); + MUSTTAIL return keepGoing(self); } - MUSTTAIL return keepGoing(self); + return false; } - static bool finishArray(Parser2 *self) { - self->pop(); - self->callbacks->on_end_array(self->data); - MUSTTAIL return keepGoing(self); - } - static bool finishObject(Parser2 *self) { - self->pop(); - self->callbacks->on_end_object(self->data); - MUSTTAIL return keepGoing(self); + static bool objectContinue(Parser2 *self) { + if (self->parseLiteral(",")) { + self->pop(); + if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { + return false; + } + MUSTTAIL return keepGoing(self); + } else if (self->parseLiteral("}")) { + self->pop(); + self->callbacks->on_end_object(self->data); + MUSTTAIL return keepGoing(self); + } + return false; } - // table[nonterminal][terminal] - static constexpr continuation table[N_PAST_END][T_PAST_END] = { - /*T_INVALID*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_EOF*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ tokenMatch, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_LBRACE*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ tokenMatch, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_RBRACE*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ tokenMatch, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_COMMA*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ tokenMatch, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_ATOM*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ tokenMatch, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_STRING*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ tokenMatch, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_LBRACKET*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ tokenMatch, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*T_RBRACKET*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ tokenMatch, - /*T_COLON*/ reject, - }, - /*T_COLON*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ tokenMatch, - }, - /*N_VALUE*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ object, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ tokenMatch, - /*T_STRING*/ tokenMatch, - /*T_LBRACKET*/ array, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*N_ARRAY_MAYBE_CONTINUE*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ reject, - /*T_COMMA*/ continueArray, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ finishArray, - /*T_COLON*/ reject, - }, - /*N_OBJECT*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ object, - /*T_RBRACE*/ reject, - /*T_COMMA*/ reject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, - /*N_OBJECT_MAYBE_CONTINUE*/ - { - /*T_INVALID*/ reject, - /*T_EOF*/ reject, - /*T_LBRACE*/ reject, - /*T_RBRACE*/ finishObject, - /*T_COMMA*/ continueObject, - /*T_ATOM*/ reject, - /*T_STRING*/ reject, - /*T_LBRACKET*/ reject, - /*T_RBRACKET*/ reject, - /*T_COLON*/ reject, - }, + static constexpr continuation table[N_PAST_END] = { + /*T_STRING*/ string, + /*T_COLON*/ colon, + /*N_VALUE*/ value, + /*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue, + /*N_OBJECT_MAYBE_CONTINUE*/ objectContinue, }; - Symbol nextToken(Symbol expected) { - if (len == 0) { - return expected == T_EOF ? T_EOF : T_INVALID; - } - maybeSkipWs(); - switch (expected) { - case N_OBJECT: - case T_LBRACE: - return parseLiteral("{") ? T_LBRACE : T_INVALID; - case T_RBRACE: - return parseLiteral("}") ? T_RBRACE : T_INVALID; - case T_COMMA: - return parseLiteral(",") ? T_COMMA : T_INVALID; - case T_STRING: - return parse_string() ? T_STRING : T_INVALID; - case T_LBRACKET: - return parseLiteral("[") ? T_LBRACKET : T_INVALID; - case T_RBRACKET: - return parseLiteral("]") ? T_RBRACKET : T_INVALID; - case T_COLON: - return parseLiteral(":") ? T_COLON : T_INVALID; - case T_ATOM: - case N_VALUE: - break; - case N_ARRAY_MAYBE_CONTINUE: - return parseLiteral(",") ? T_COMMA - : parseLiteral("]") ? T_RBRACKET - : T_INVALID; - case N_OBJECT_MAYBE_CONTINUE: - return parseLiteral(",") ? T_COMMA - : parseLiteral("}") ? T_RBRACE - : T_INVALID; - case T_INVALID: - case T_EOF: - case N_PAST_END: - default: - __builtin_unreachable(); - } - if (parseLiteral("{")) { - return T_LBRACE; - } else if (parseLiteral("[")) { - return T_LBRACKET; - } else if (parse_string()) { - return T_STRING; - } else if (parse_number()) { - return T_ATOM; - } else if (parseLiteral("true")) { - callbacks->on_true_literal(data); - return T_ATOM; - } else if (parseLiteral("false")) { - callbacks->on_false_literal(data); - return T_ATOM; - } else if (parseLiteral("null")) { - callbacks->on_null_literal(data); - return T_ATOM; - } - return T_INVALID; - } - char *buf; int len; const Callbacks *const callbacks;