Simplify table

Now we only have symbols that actually go on the stack
This commit is contained in:
2025-05-13 17:49:12 -04:00
parent cf08854664
commit c822d0ffaa

View File

@@ -138,35 +138,21 @@ private:
// Terminals and Nonterminals. These appear in the stack of the pushdown // Terminals and Nonterminals. These appear in the stack of the pushdown
// automata // automata
enum Symbol : int8_t { enum Symbol : int8_t {
// Terminals
T_INVALID,
T_EOF,
T_LBRACE,
T_RBRACE,
T_COMMA,
T_ATOM, // Multibyte!
T_STRING, // Multibyte! T_STRING, // Multibyte!
T_LBRACKET,
T_RBRACKET,
T_COLON, T_COLON,
T_PAST_END, // Must be last terminal
// Nonterminals // Nonterminals
N_VALUE = T_PAST_END, N_VALUE,
N_ARRAY_MAYBE_CONTINUE, N_ARRAY_MAYBE_CONTINUE,
N_OBJECT,
N_OBJECT_MAYBE_CONTINUE, N_OBJECT_MAYBE_CONTINUE,
N_PAST_END, // Must be last nonterminal N_PAST_END, // Must be last nonterminal
}; };
const char *symbolNames[] = { const char *symbolNames[] = {
"T_INVALID", "T_EOF", "T_STRING",
"T_LBRACE", "T_RBRACE", "T_COLON",
"T_COMMA", "T_ATOM", "N_VALUE",
"T_STRING", "T_LBRACKET", "N_ARRAY_MAYBE_CONTINUE",
"T_RBRACKET", "T_COLON", "N_OBJECT_MAYBE_CONTINUE",
"N_VALUE", "N_ARRAY_MAYBE_CONTINUE",
"N_OBJECT", "N_OBJECT_MAYBE_CONTINUE",
"N_PAST_END",
}; };
namespace { namespace {
@@ -503,316 +489,110 @@ private:
typedef bool (*continuation)(Parser2 *); typedef bool (*continuation)(Parser2 *);
[[maybe_unused]] void debugPrint(Symbol token) { [[maybe_unused]] void debugPrint() {
printf("token: %s\n", symbolNames[token]);
for (int i = 0; i < stackPtr - stack; ++i) { for (int i = 0; i < stackPtr - stack; ++i) {
printf("%s ", symbolNames[stack[i]]); printf("%s ", symbolNames[stack[i]]);
} }
printf("\n"); printf("\n");
} }
static bool tokenMatch(Parser2 *self) {
self->pop();
MUSTTAIL return keepGoing(self);
}
static bool keepGoing(Parser2 *self) { static bool keepGoing(Parser2 *self) {
// self->debugPrint();
if (self->empty()) { if (self->empty()) {
return true; return true;
} }
auto top = *(self->stackPtr - 1); auto top = *(self->stackPtr - 1);
auto token = self->nextToken(top); self->maybeSkipWs();
// self->debugPrint(token); MUSTTAIL return table[top](self);
MUSTTAIL return table[top][token](self);
} }
static bool reject(Parser2 *) { return false; } static bool string(Parser2 *self) {
static bool object(Parser2 *self) { if (!self->parse_string()) {
return false;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static bool colon(Parser2 *self) {
if (!self->parseLiteral(":")) {
return false;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static bool value(Parser2 *self) {
if (self->parse_string()) {
self->pop();
MUSTTAIL return keepGoing(self);
} else if (self->parse_number()) {
self->pop();
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("{")) {
self->pop(); self->pop();
self->callbacks->on_begin_object(self->data); self->callbacks->on_begin_object(self->data);
if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
return false; return false;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} } else if (self->parseLiteral("[")) {
static bool array(Parser2 *self) {
self->pop(); self->pop();
self->callbacks->on_begin_array(self->data); self->callbacks->on_begin_array(self->data);
if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) { if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) {
return false; return false;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("true")) {
self->pop();
self->callbacks->on_true_literal(self->data);
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("false")) {
self->pop();
self->callbacks->on_false_literal(self->data);
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("null")) {
self->pop();
self->callbacks->on_null_literal(self->data);
MUSTTAIL return keepGoing(self);
} }
static bool continueArray(Parser2 *self) { return false;
}
static bool arrayContinue(Parser2 *self) {
if (self->parseLiteral(",")) {
self->pop(); self->pop();
if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) { if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) {
return false; return false;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("]")) {
self->pop();
self->callbacks->on_end_array(self->data);
MUSTTAIL return keepGoing(self);
} }
static bool continueObject(Parser2 *self) { return false;
}
static bool objectContinue(Parser2 *self) {
if (self->parseLiteral(",")) {
self->pop(); self->pop();
if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
return false; return false;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} } else if (self->parseLiteral("}")) {
static bool finishArray(Parser2 *self) {
self->pop();
self->callbacks->on_end_array(self->data);
MUSTTAIL return keepGoing(self);
}
static bool finishObject(Parser2 *self) {
self->pop(); self->pop();
self->callbacks->on_end_object(self->data); self->callbacks->on_end_object(self->data);
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
return false;
}
// table[nonterminal][terminal] static constexpr continuation table[N_PAST_END] = {
static constexpr continuation table[N_PAST_END][T_PAST_END] = { /*T_STRING*/ string,
/*T_INVALID*/ /*T_COLON*/ colon,
{ /*N_VALUE*/ value,
/*T_INVALID*/ reject, /*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue,
/*T_EOF*/ reject, /*N_OBJECT_MAYBE_CONTINUE*/ objectContinue,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_EOF*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ tokenMatch,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_LBRACE*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ tokenMatch,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_RBRACE*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ tokenMatch,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_COMMA*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ tokenMatch,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_ATOM*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ tokenMatch,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_STRING*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ tokenMatch,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_LBRACKET*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ tokenMatch,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*T_RBRACKET*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ tokenMatch,
/*T_COLON*/ reject,
},
/*T_COLON*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ tokenMatch,
},
/*N_VALUE*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ object,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ tokenMatch,
/*T_STRING*/ tokenMatch,
/*T_LBRACKET*/ array,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*N_ARRAY_MAYBE_CONTINUE*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ reject,
/*T_COMMA*/ continueArray,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ finishArray,
/*T_COLON*/ reject,
},
/*N_OBJECT*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ object,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
/*N_OBJECT_MAYBE_CONTINUE*/
{
/*T_INVALID*/ reject,
/*T_EOF*/ reject,
/*T_LBRACE*/ reject,
/*T_RBRACE*/ finishObject,
/*T_COMMA*/ continueObject,
/*T_ATOM*/ reject,
/*T_STRING*/ reject,
/*T_LBRACKET*/ reject,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
},
}; };
Symbol nextToken(Symbol expected) {
if (len == 0) {
return expected == T_EOF ? T_EOF : T_INVALID;
}
maybeSkipWs();
switch (expected) {
case N_OBJECT:
case T_LBRACE:
return parseLiteral("{") ? T_LBRACE : T_INVALID;
case T_RBRACE:
return parseLiteral("}") ? T_RBRACE : T_INVALID;
case T_COMMA:
return parseLiteral(",") ? T_COMMA : T_INVALID;
case T_STRING:
return parse_string() ? T_STRING : T_INVALID;
case T_LBRACKET:
return parseLiteral("[") ? T_LBRACKET : T_INVALID;
case T_RBRACKET:
return parseLiteral("]") ? T_RBRACKET : T_INVALID;
case T_COLON:
return parseLiteral(":") ? T_COLON : T_INVALID;
case T_ATOM:
case N_VALUE:
break;
case N_ARRAY_MAYBE_CONTINUE:
return parseLiteral(",") ? T_COMMA
: parseLiteral("]") ? T_RBRACKET
: T_INVALID;
case N_OBJECT_MAYBE_CONTINUE:
return parseLiteral(",") ? T_COMMA
: parseLiteral("}") ? T_RBRACE
: T_INVALID;
case T_INVALID:
case T_EOF:
case N_PAST_END:
default:
__builtin_unreachable();
}
if (parseLiteral("{")) {
return T_LBRACE;
} else if (parseLiteral("[")) {
return T_LBRACKET;
} else if (parse_string()) {
return T_STRING;
} else if (parse_number()) {
return T_ATOM;
} else if (parseLiteral("true")) {
callbacks->on_true_literal(data);
return T_ATOM;
} else if (parseLiteral("false")) {
callbacks->on_false_literal(data);
return T_ATOM;
} else if (parseLiteral("null")) {
callbacks->on_null_literal(data);
return T_ATOM;
}
return T_INVALID;
}
char *buf; char *buf;
int len; int len;
const Callbacks *const callbacks; const Callbacks *const callbacks;