Make use of "expected symbol" info

This commit is contained in:
2025-05-13 13:24:45 -04:00
parent 052e452669
commit 1fab01516e

View File

@@ -448,16 +448,20 @@ private:
if (len < litLen) {
return false;
}
len -= litLen;
return memcmp(std::exchange(buf, buf + litLen), literal, litLen) == 0;
if (memcmp(buf, literal, litLen) == 0) {
len -= litLen;
buf += litLen;
return true;
}
return false;
}
bool parse_number() {
callbacks->on_begin_number(data);
char *const bufBefore = buf;
if (len == 0 || !('0' <= *buf && *buf <= '9')) {
return false;
}
callbacks->on_begin_number(data);
for (;;) {
if (len == 0) {
return false;
}
if ('0' <= *buf && *buf <= '9') {
++buf;
--len;
@@ -473,10 +477,10 @@ private:
return true;
}
bool parse_string() {
callbacks->on_begin_string(data);
if (!parseLiteral("\"")) {
return false;
}
callbacks->on_begin_string(data);
auto *result = (char *)memchr(buf, '"', len);
if (result == nullptr) {
return false;
@@ -526,26 +530,6 @@ private:
}
MUSTTAIL return keepGoing(self);
}
static bool atom(Parser2 *self) {
self->pop();
if (*self->bufBefore == 't') {
self->callbacks->on_true_literal(self->data);
} else if (*self->bufBefore == 'f') {
self->callbacks->on_false_literal(self->data);
} else if (*self->bufBefore == 'n') {
self->callbacks->on_null_literal(self->data);
} else {
self->callbacks->on_begin_number(self->data);
self->callbacks->on_number_data(self->data, self->bufBefore + 1,
self->buf - self->bufBefore - 2);
self->callbacks->on_end_number(self->data);
}
MUSTTAIL return keepGoing(self);
}
static bool string(Parser2 *self) {
self->pop();
MUSTTAIL return keepGoing(self);
}
static bool array(Parser2 *self) {
self->pop();
self->callbacks->on_begin_array(self->data);
@@ -718,8 +702,8 @@ private:
/*T_LBRACE*/ object,
/*T_RBRACE*/ reject,
/*T_COMMA*/ reject,
/*T_ATOM*/ atom,
/*T_STRING*/ string,
/*T_ATOM*/ tokenMatch,
/*T_STRING*/ tokenMatch,
/*T_LBRACKET*/ array,
/*T_RBRACKET*/ reject,
/*T_COLON*/ reject,
@@ -765,55 +749,71 @@ private:
},
};
const char *bufBefore;
Symbol nextToken(Symbol expected) {
maybeSkipWs();
bufBefore = buf;
if (len == 0) {
return T_EOF;
return expected == T_EOF ? T_EOF : T_INVALID;
}
if (*buf == '{') {
parseLiteral("{");
maybeSkipWs();
switch (expected) {
case N_OBJECT:
case T_LBRACE:
return parseLiteral("{") ? T_LBRACE : T_INVALID;
case T_RBRACE:
return parseLiteral("}") ? T_RBRACE : T_INVALID;
case T_COMMA:
return parseLiteral(",") ? T_COMMA : T_INVALID;
case T_STRING:
return parse_string() ? T_STRING : T_INVALID;
case T_LBRACKET:
return parseLiteral("[") ? T_LBRACKET : T_INVALID;
case T_RBRACKET:
return parseLiteral("]") ? T_RBRACKET : T_INVALID;
case T_COLON:
return parseLiteral(":") ? T_COLON : T_INVALID;
case T_ATOM:
case N_VALUE:
break;
case N_ARRAY_MAYBE_CONTINUE:
return parseLiteral(",") ? T_COMMA
: parseLiteral("]") ? T_RBRACKET
: T_INVALID;
case N_OBJECT_MAYBE_CONTINUE:
return parseLiteral(",") ? T_COMMA
: parseLiteral("}") ? T_RBRACE
: T_INVALID;
case T_INVALID:
case T_EOF:
case N_PAST_END:
default:
__builtin_unreachable();
}
if (parseLiteral("{")) {
return T_LBRACE;
} else if (*buf == '[') {
parseLiteral("[");
} else if (parseLiteral("[")) {
return T_LBRACKET;
} else if (*buf == '}') {
parseLiteral("}");
return T_RBRACE;
} else if (*buf == ']') {
parseLiteral("]");
return T_RBRACKET;
} else if (*buf == ':') {
parseLiteral(":");
return T_COLON;
} else if (*buf == ',') {
parseLiteral(",");
return T_COMMA;
} else if (*buf == '"') {
if (!parse_string()) {
return T_INVALID;
}
} else if (parse_string()) {
return T_STRING;
} else if (*buf == 't') {
if (!parseLiteral("true")) {
return T_INVALID;
}
} else if (parse_number()) {
return T_ATOM;
} else if (*buf == 'f') {
if (!parseLiteral("false")) {
return T_INVALID;
}
} else if (*buf == 'n') {
if (!parseLiteral("null")) {
return T_INVALID;
}
} else {
if (!parse_number()) {
return T_INVALID;
}
} else if (parseLiteral("true")) {
callbacks->on_true_literal(data);
return T_ATOM;
} else if (parseLiteral("false")) {
callbacks->on_false_literal(data);
return T_ATOM;
} else if (parseLiteral("null")) {
callbacks->on_null_literal(data);
return T_ATOM;
} else if (parseLiteral("}")) {
return T_RBRACE;
} else if (parseLiteral("]")) {
return T_RBRACKET;
} else if (parseLiteral(":")) {
return T_COLON;
} else if (parseLiteral(",")) {
return T_COMMA;
}
return T_ATOM;
return T_INVALID;
}
char *buf;