Close to streaming parser

This commit is contained in:
2025-05-15 17:24:37 -04:00
parent 2507e34883
commit a9ebff72b0

View File

@@ -139,9 +139,19 @@ private:
// Terminals and Nonterminals. These appear in the stack of the pushdown
// automata
enum Symbol : int8_t {
T_STRING, // Multibyte!
T_COLON,
T_TRUE,
T_FALSE,
T_NULL,
T_R,
T_U,
T_A,
T_L,
T_S,
T_DUBQUOTE,
// Nonterminals
N_STRING, // Not including leading double quote, but including trailing quote
N_NUMBER,
N_VALUE,
N_ARRAY_VALUE_OR_END,
N_OBJECT_VALUE_OR_END,
@@ -150,14 +160,24 @@ enum Symbol : int8_t {
N_PAST_END, // Must be last nonterminal
};
const char *symbolNames[] = {
"T_STRING",
"T_COLON",
"N_VALUE",
"N_ARRAY_VALUE_OR_END",
"N_OBJECT_VALUE_OR_END",
"N_ARRAY_MAYBE_CONTINUE",
"N_OBJECT_MAYBE_CONTINUE",
static const char *symbolNames[N_PAST_END] = {
"COLON",
"TRUE",
"FALSE",
"NULL",
"R",
"U",
"A",
"L",
"S",
"DUBQUOTE",
"STRING",
"NUMBER",
"VALUE",
"ARRAY_VALUE_OR_END",
"OBJECT_VALUE_OR_END",
"ARRAY_MAYBE_CONTINUE",
"OBJECT_MAYBE_CONTINUE",
};
constexpr static struct Tables {
@@ -416,7 +436,9 @@ private:
// [0-9.]+. Could be adapted to have a streaming interface. Uses O(1) memory.
struct Parser2 {
Parser2(const Callbacks *callbacks, void *data)
: callbacks(callbacks), data(data) {}
: callbacks(callbacks), data(data) {
std::ignore = push({N_VALUE});
}
void prime(char *buf, int len) {
this->buf = buf;
@@ -434,12 +456,7 @@ struct Parser2 {
S_OVERFLOW,
};
[[nodiscard]] Status parse() {
if (Status s = push({N_VALUE})) {
return s;
}
return keepGoing(this);
}
[[nodiscard]] Status parse() { return keepGoing(this); }
Parser2(Parser2 const &) = delete;
Parser2 &operator=(Parser2 const &) = delete;
@@ -485,9 +502,6 @@ private:
return S_OK;
}
Status parse_string() {
if (Status s = parseLiteral("\"")) {
return s;
}
callbacks->on_begin_string(data);
auto *result = (char *)memchr(buf, '"', len());
if (result == nullptr) {
@@ -518,6 +532,9 @@ private:
return S_OK;
}
auto top = *(self->stackPtr - 1);
if (self->len() == 0) {
return S_AGAIN;
}
self->maybeSkipWs();
MUSTTAIL return table[top](self);
}
@@ -529,48 +546,65 @@ private:
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status colon(Parser2 *self) {
if (Status s = self->parseLiteral(":")) {
static Status number(Parser2 *self) {
if (Status s = self->parse_number()) {
return s;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status value(Parser2 *self) {
if (self->parse_string() == S_OK) {
switch (*self->buf) {
case '{':
++self->buf;
self->pop();
MUSTTAIL return keepGoing(self);
} else if (self->parse_number() == S_OK) {
self->pop();
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("{") == S_OK) {
self->pop();
self->callbacks->on_begin_object(self->data);
if (Status s = self->push({N_OBJECT_VALUE_OR_END})) {
return s;
}
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("[") == S_OK) {
break;
case '[':
++self->buf;
self->pop();
self->callbacks->on_begin_array(self->data);
if (Status s = self->push({N_ARRAY_VALUE_OR_END})) {
return s;
}
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("true") == S_OK) {
break;
case '"':
++self->buf;
self->pop();
self->callbacks->on_true_literal(self->data);
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("false") == S_OK) {
if (Status s = self->push({N_STRING})) {
return s;
}
break;
case 't':
++self->buf;
self->pop();
self->callbacks->on_false_literal(self->data);
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("null") == S_OK) {
if (Status s = self->push({T_R, T_U, T_TRUE})) {
return s;
}
break;
case 'f':
++self->buf;
self->pop();
self->callbacks->on_null_literal(self->data);
MUSTTAIL return keepGoing(self);
if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) {
return s;
}
break;
case 'n':
++self->buf;
self->pop();
if (Status s = self->push({T_U, T_L, T_NULL})) {
return s;
}
break;
default:
self->pop();
if (Status s = self->push({N_NUMBER})) {
return s;
}
break;
}
return S_REJECT;
MUSTTAIL return keepGoing(self);
}
static Status arrayOrEnd(Parser2 *self) {
if (self->parseLiteral("]") == S_OK) {
@@ -590,10 +624,10 @@ private:
self->pop();
self->callbacks->on_end_object(self->data);
MUSTTAIL return keepGoing(self);
} else {
} else if (self->parseLiteral("\"") == S_OK) {
self->pop();
if (Status s = self->push(
{T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
{N_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
return s;
}
MUSTTAIL return keepGoing(self);
@@ -617,8 +651,8 @@ private:
static Status objectContinue(Parser2 *self) {
if (self->parseLiteral(",") == S_OK) {
self->pop();
if (Status s = self->push(
{T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
if (Status s = self->push({T_DUBQUOTE, N_STRING, T_COLON, N_VALUE,
N_OBJECT_MAYBE_CONTINUE})) {
return s;
}
MUSTTAIL return keepGoing(self);
@@ -629,10 +663,59 @@ private:
}
return S_REJECT;
}
static Status colon(Parser2 *self) {
if (*self->buf++ == ':') {
self->pop();
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishTrue(Parser2 *self) {
if (*self->buf++ == 'e') {
self->pop();
self->callbacks->on_true_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishFalse(Parser2 *self) {
if (*self->buf++ == 'e') {
self->pop();
self->callbacks->on_false_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishNull(Parser2 *self) {
if (*self->buf++ == 'l') {
self->pop();
self->callbacks->on_null_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
template <char kChar> static Status singleChar(Parser2 *self) {
if (*self->buf == kChar) {
++self->buf;
self->pop();
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static constexpr continuation table[N_PAST_END] = {
/*T_STRING*/ string,
/*T_COLON*/ colon,
/*T_TRUE*/ finishTrue,
/*T_FALSE*/ finishFalse,
/*T_NULL*/ finishNull,
/*T_R*/ singleChar<'r'>,
/*T_U*/ singleChar<'u'>,
/*T_A*/ singleChar<'a'>,
/*T_L*/ singleChar<'l'>,
/*T_S*/ singleChar<'s'>,
/*T_DUBQUOTE*/ singleChar<'"'>,
/*N_STRING*/ string,
/*N_NUMBER*/ number,
/*N_VALUE*/ value,
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd,