Close to streaming parser

This commit is contained in:
2025-05-15 17:24:37 -04:00
parent 2507e34883
commit a9ebff72b0

View File

@@ -139,9 +139,19 @@ private:
// Terminals and Nonterminals. These appear in the stack of the pushdown // Terminals and Nonterminals. These appear in the stack of the pushdown
// automata // automata
enum Symbol : int8_t { enum Symbol : int8_t {
T_STRING, // Multibyte!
T_COLON, T_COLON,
T_TRUE,
T_FALSE,
T_NULL,
T_R,
T_U,
T_A,
T_L,
T_S,
T_DUBQUOTE,
// Nonterminals // Nonterminals
N_STRING, // Not including leading double quote, but including trailing quote
N_NUMBER,
N_VALUE, N_VALUE,
N_ARRAY_VALUE_OR_END, N_ARRAY_VALUE_OR_END,
N_OBJECT_VALUE_OR_END, N_OBJECT_VALUE_OR_END,
@@ -150,14 +160,24 @@ enum Symbol : int8_t {
N_PAST_END, // Must be last nonterminal N_PAST_END, // Must be last nonterminal
}; };
const char *symbolNames[] = { static const char *symbolNames[N_PAST_END] = {
"T_STRING", "COLON",
"T_COLON", "TRUE",
"N_VALUE", "FALSE",
"N_ARRAY_VALUE_OR_END", "NULL",
"N_OBJECT_VALUE_OR_END", "R",
"N_ARRAY_MAYBE_CONTINUE", "U",
"N_OBJECT_MAYBE_CONTINUE", "A",
"L",
"S",
"DUBQUOTE",
"STRING",
"NUMBER",
"VALUE",
"ARRAY_VALUE_OR_END",
"OBJECT_VALUE_OR_END",
"ARRAY_MAYBE_CONTINUE",
"OBJECT_MAYBE_CONTINUE",
}; };
constexpr static struct Tables { constexpr static struct Tables {
@@ -416,7 +436,9 @@ private:
// [0-9.]+. Could be adapted to have a streaming interface. Uses O(1) memory. // [0-9.]+. Could be adapted to have a streaming interface. Uses O(1) memory.
struct Parser2 { struct Parser2 {
Parser2(const Callbacks *callbacks, void *data) Parser2(const Callbacks *callbacks, void *data)
: callbacks(callbacks), data(data) {} : callbacks(callbacks), data(data) {
std::ignore = push({N_VALUE});
}
void prime(char *buf, int len) { void prime(char *buf, int len) {
this->buf = buf; this->buf = buf;
@@ -434,12 +456,7 @@ struct Parser2 {
S_OVERFLOW, S_OVERFLOW,
}; };
[[nodiscard]] Status parse() { [[nodiscard]] Status parse() { return keepGoing(this); }
if (Status s = push({N_VALUE})) {
return s;
}
return keepGoing(this);
}
Parser2(Parser2 const &) = delete; Parser2(Parser2 const &) = delete;
Parser2 &operator=(Parser2 const &) = delete; Parser2 &operator=(Parser2 const &) = delete;
@@ -485,9 +502,6 @@ private:
return S_OK; return S_OK;
} }
Status parse_string() { Status parse_string() {
if (Status s = parseLiteral("\"")) {
return s;
}
callbacks->on_begin_string(data); callbacks->on_begin_string(data);
auto *result = (char *)memchr(buf, '"', len()); auto *result = (char *)memchr(buf, '"', len());
if (result == nullptr) { if (result == nullptr) {
@@ -518,6 +532,9 @@ private:
return S_OK; return S_OK;
} }
auto top = *(self->stackPtr - 1); auto top = *(self->stackPtr - 1);
if (self->len() == 0) {
return S_AGAIN;
}
self->maybeSkipWs(); self->maybeSkipWs();
MUSTTAIL return table[top](self); MUSTTAIL return table[top](self);
} }
@@ -529,48 +546,65 @@ private:
self->pop(); self->pop();
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
static Status colon(Parser2 *self) { static Status number(Parser2 *self) {
if (Status s = self->parseLiteral(":")) { if (Status s = self->parse_number()) {
return s; return s;
} }
self->pop(); self->pop();
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
static Status value(Parser2 *self) { static Status value(Parser2 *self) {
if (self->parse_string() == S_OK) { switch (*self->buf) {
case '{':
++self->buf;
self->pop(); self->pop();
MUSTTAIL return keepGoing(self);
} else if (self->parse_number() == S_OK) {
self->pop();
MUSTTAIL return keepGoing(self);
} else if (self->parseLiteral("{") == S_OK) {
self->pop();
self->callbacks->on_begin_object(self->data);
if (Status s = self->push({N_OBJECT_VALUE_OR_END})) { if (Status s = self->push({N_OBJECT_VALUE_OR_END})) {
return s; return s;
} }
MUSTTAIL return keepGoing(self); break;
} else if (self->parseLiteral("[") == S_OK) { case '[':
++self->buf;
self->pop(); self->pop();
self->callbacks->on_begin_array(self->data);
if (Status s = self->push({N_ARRAY_VALUE_OR_END})) { if (Status s = self->push({N_ARRAY_VALUE_OR_END})) {
return s; return s;
} }
MUSTTAIL return keepGoing(self); break;
} else if (self->parseLiteral("true") == S_OK) { case '"':
++self->buf;
self->pop(); self->pop();
self->callbacks->on_true_literal(self->data); if (Status s = self->push({N_STRING})) {
MUSTTAIL return keepGoing(self); return s;
} else if (self->parseLiteral("false") == S_OK) { }
break;
case 't':
++self->buf;
self->pop(); self->pop();
self->callbacks->on_false_literal(self->data); if (Status s = self->push({T_R, T_U, T_TRUE})) {
MUSTTAIL return keepGoing(self); return s;
} else if (self->parseLiteral("null") == S_OK) { }
break;
case 'f':
++self->buf;
self->pop(); self->pop();
self->callbacks->on_null_literal(self->data); if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) {
MUSTTAIL return keepGoing(self); return s;
}
break;
case 'n':
++self->buf;
self->pop();
if (Status s = self->push({T_U, T_L, T_NULL})) {
return s;
}
break;
default:
self->pop();
if (Status s = self->push({N_NUMBER})) {
return s;
}
break;
} }
return S_REJECT; MUSTTAIL return keepGoing(self);
} }
static Status arrayOrEnd(Parser2 *self) { static Status arrayOrEnd(Parser2 *self) {
if (self->parseLiteral("]") == S_OK) { if (self->parseLiteral("]") == S_OK) {
@@ -590,10 +624,10 @@ private:
self->pop(); self->pop();
self->callbacks->on_end_object(self->data); self->callbacks->on_end_object(self->data);
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} else { } else if (self->parseLiteral("\"") == S_OK) {
self->pop(); self->pop();
if (Status s = self->push( if (Status s = self->push(
{T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { {N_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
return s; return s;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
@@ -617,8 +651,8 @@ private:
static Status objectContinue(Parser2 *self) { static Status objectContinue(Parser2 *self) {
if (self->parseLiteral(",") == S_OK) { if (self->parseLiteral(",") == S_OK) {
self->pop(); self->pop();
if (Status s = self->push( if (Status s = self->push({T_DUBQUOTE, N_STRING, T_COLON, N_VALUE,
{T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) { N_OBJECT_MAYBE_CONTINUE})) {
return s; return s;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
@@ -629,10 +663,59 @@ private:
} }
return S_REJECT; return S_REJECT;
} }
static Status colon(Parser2 *self) {
if (*self->buf++ == ':') {
self->pop();
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishTrue(Parser2 *self) {
if (*self->buf++ == 'e') {
self->pop();
self->callbacks->on_true_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishFalse(Parser2 *self) {
if (*self->buf++ == 'e') {
self->pop();
self->callbacks->on_false_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishNull(Parser2 *self) {
if (*self->buf++ == 'l') {
self->pop();
self->callbacks->on_null_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
template <char kChar> static Status singleChar(Parser2 *self) {
if (*self->buf == kChar) {
++self->buf;
self->pop();
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static constexpr continuation table[N_PAST_END] = { static constexpr continuation table[N_PAST_END] = {
/*T_STRING*/ string,
/*T_COLON*/ colon, /*T_COLON*/ colon,
/*T_TRUE*/ finishTrue,
/*T_FALSE*/ finishFalse,
/*T_NULL*/ finishNull,
/*T_R*/ singleChar<'r'>,
/*T_U*/ singleChar<'u'>,
/*T_A*/ singleChar<'a'>,
/*T_L*/ singleChar<'l'>,
/*T_S*/ singleChar<'s'>,
/*T_DUBQUOTE*/ singleChar<'"'>,
/*N_STRING*/ string,
/*N_NUMBER*/ number,
/*N_VALUE*/ value, /*N_VALUE*/ value,
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd, /*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd, /*N_OBJECT_VALUE_OR_END*/ objectOrEnd,