Close to streaming parser
This commit is contained in:
179
src/test.cpp
179
src/test.cpp
@@ -139,9 +139,19 @@ private:
|
|||||||
// Terminals and Nonterminals. These appear in the stack of the pushdown
|
// Terminals and Nonterminals. These appear in the stack of the pushdown
|
||||||
// automata
|
// automata
|
||||||
enum Symbol : int8_t {
|
enum Symbol : int8_t {
|
||||||
T_STRING, // Multibyte!
|
|
||||||
T_COLON,
|
T_COLON,
|
||||||
|
T_TRUE,
|
||||||
|
T_FALSE,
|
||||||
|
T_NULL,
|
||||||
|
T_R,
|
||||||
|
T_U,
|
||||||
|
T_A,
|
||||||
|
T_L,
|
||||||
|
T_S,
|
||||||
|
T_DUBQUOTE,
|
||||||
// Nonterminals
|
// Nonterminals
|
||||||
|
N_STRING, // Not including leading double quote, but including trailing quote
|
||||||
|
N_NUMBER,
|
||||||
N_VALUE,
|
N_VALUE,
|
||||||
N_ARRAY_VALUE_OR_END,
|
N_ARRAY_VALUE_OR_END,
|
||||||
N_OBJECT_VALUE_OR_END,
|
N_OBJECT_VALUE_OR_END,
|
||||||
@@ -150,14 +160,24 @@ enum Symbol : int8_t {
|
|||||||
N_PAST_END, // Must be last nonterminal
|
N_PAST_END, // Must be last nonterminal
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *symbolNames[] = {
|
static const char *symbolNames[N_PAST_END] = {
|
||||||
"T_STRING",
|
"COLON",
|
||||||
"T_COLON",
|
"TRUE",
|
||||||
"N_VALUE",
|
"FALSE",
|
||||||
"N_ARRAY_VALUE_OR_END",
|
"NULL",
|
||||||
"N_OBJECT_VALUE_OR_END",
|
"R",
|
||||||
"N_ARRAY_MAYBE_CONTINUE",
|
"U",
|
||||||
"N_OBJECT_MAYBE_CONTINUE",
|
"A",
|
||||||
|
"L",
|
||||||
|
"S",
|
||||||
|
"DUBQUOTE",
|
||||||
|
"STRING",
|
||||||
|
"NUMBER",
|
||||||
|
"VALUE",
|
||||||
|
"ARRAY_VALUE_OR_END",
|
||||||
|
"OBJECT_VALUE_OR_END",
|
||||||
|
"ARRAY_MAYBE_CONTINUE",
|
||||||
|
"OBJECT_MAYBE_CONTINUE",
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr static struct Tables {
|
constexpr static struct Tables {
|
||||||
@@ -416,7 +436,9 @@ private:
|
|||||||
// [0-9.]+. Could be adapted to have a streaming interface. Uses O(1) memory.
|
// [0-9.]+. Could be adapted to have a streaming interface. Uses O(1) memory.
|
||||||
struct Parser2 {
|
struct Parser2 {
|
||||||
Parser2(const Callbacks *callbacks, void *data)
|
Parser2(const Callbacks *callbacks, void *data)
|
||||||
: callbacks(callbacks), data(data) {}
|
: callbacks(callbacks), data(data) {
|
||||||
|
std::ignore = push({N_VALUE});
|
||||||
|
}
|
||||||
|
|
||||||
void prime(char *buf, int len) {
|
void prime(char *buf, int len) {
|
||||||
this->buf = buf;
|
this->buf = buf;
|
||||||
@@ -434,12 +456,7 @@ struct Parser2 {
|
|||||||
S_OVERFLOW,
|
S_OVERFLOW,
|
||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] Status parse() {
|
[[nodiscard]] Status parse() { return keepGoing(this); }
|
||||||
if (Status s = push({N_VALUE})) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
return keepGoing(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
Parser2(Parser2 const &) = delete;
|
Parser2(Parser2 const &) = delete;
|
||||||
Parser2 &operator=(Parser2 const &) = delete;
|
Parser2 &operator=(Parser2 const &) = delete;
|
||||||
@@ -485,9 +502,6 @@ private:
|
|||||||
return S_OK;
|
return S_OK;
|
||||||
}
|
}
|
||||||
Status parse_string() {
|
Status parse_string() {
|
||||||
if (Status s = parseLiteral("\"")) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
callbacks->on_begin_string(data);
|
callbacks->on_begin_string(data);
|
||||||
auto *result = (char *)memchr(buf, '"', len());
|
auto *result = (char *)memchr(buf, '"', len());
|
||||||
if (result == nullptr) {
|
if (result == nullptr) {
|
||||||
@@ -518,6 +532,9 @@ private:
|
|||||||
return S_OK;
|
return S_OK;
|
||||||
}
|
}
|
||||||
auto top = *(self->stackPtr - 1);
|
auto top = *(self->stackPtr - 1);
|
||||||
|
if (self->len() == 0) {
|
||||||
|
return S_AGAIN;
|
||||||
|
}
|
||||||
self->maybeSkipWs();
|
self->maybeSkipWs();
|
||||||
MUSTTAIL return table[top](self);
|
MUSTTAIL return table[top](self);
|
||||||
}
|
}
|
||||||
@@ -529,48 +546,65 @@ private:
|
|||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
static Status colon(Parser2 *self) {
|
static Status number(Parser2 *self) {
|
||||||
if (Status s = self->parseLiteral(":")) {
|
if (Status s = self->parse_number()) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
static Status value(Parser2 *self) {
|
static Status value(Parser2 *self) {
|
||||||
if (self->parse_string() == S_OK) {
|
switch (*self->buf) {
|
||||||
|
case '{':
|
||||||
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return keepGoing(self);
|
|
||||||
} else if (self->parse_number() == S_OK) {
|
|
||||||
self->pop();
|
|
||||||
MUSTTAIL return keepGoing(self);
|
|
||||||
} else if (self->parseLiteral("{") == S_OK) {
|
|
||||||
self->pop();
|
|
||||||
self->callbacks->on_begin_object(self->data);
|
|
||||||
if (Status s = self->push({N_OBJECT_VALUE_OR_END})) {
|
if (Status s = self->push({N_OBJECT_VALUE_OR_END})) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return keepGoing(self);
|
break;
|
||||||
} else if (self->parseLiteral("[") == S_OK) {
|
case '[':
|
||||||
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_begin_array(self->data);
|
|
||||||
if (Status s = self->push({N_ARRAY_VALUE_OR_END})) {
|
if (Status s = self->push({N_ARRAY_VALUE_OR_END})) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return keepGoing(self);
|
break;
|
||||||
} else if (self->parseLiteral("true") == S_OK) {
|
case '"':
|
||||||
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_true_literal(self->data);
|
if (Status s = self->push({N_STRING})) {
|
||||||
MUSTTAIL return keepGoing(self);
|
return s;
|
||||||
} else if (self->parseLiteral("false") == S_OK) {
|
}
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_false_literal(self->data);
|
if (Status s = self->push({T_R, T_U, T_TRUE})) {
|
||||||
MUSTTAIL return keepGoing(self);
|
return s;
|
||||||
} else if (self->parseLiteral("null") == S_OK) {
|
}
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_null_literal(self->data);
|
if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) {
|
||||||
MUSTTAIL return keepGoing(self);
|
return s;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
++self->buf;
|
||||||
|
self->pop();
|
||||||
|
if (Status s = self->push({T_U, T_L, T_NULL})) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
self->pop();
|
||||||
|
if (Status s = self->push({N_NUMBER})) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
static Status arrayOrEnd(Parser2 *self) {
|
static Status arrayOrEnd(Parser2 *self) {
|
||||||
if (self->parseLiteral("]") == S_OK) {
|
if (self->parseLiteral("]") == S_OK) {
|
||||||
@@ -590,10 +624,10 @@ private:
|
|||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_end_object(self->data);
|
self->callbacks->on_end_object(self->data);
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
} else {
|
} else if (self->parseLiteral("\"") == S_OK) {
|
||||||
self->pop();
|
self->pop();
|
||||||
if (Status s = self->push(
|
if (Status s = self->push(
|
||||||
{T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
|
{N_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
@@ -617,8 +651,8 @@ private:
|
|||||||
static Status objectContinue(Parser2 *self) {
|
static Status objectContinue(Parser2 *self) {
|
||||||
if (self->parseLiteral(",") == S_OK) {
|
if (self->parseLiteral(",") == S_OK) {
|
||||||
self->pop();
|
self->pop();
|
||||||
if (Status s = self->push(
|
if (Status s = self->push({T_DUBQUOTE, N_STRING, T_COLON, N_VALUE,
|
||||||
{T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
|
N_OBJECT_MAYBE_CONTINUE})) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
@@ -629,10 +663,59 @@ private:
|
|||||||
}
|
}
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
}
|
}
|
||||||
|
static Status colon(Parser2 *self) {
|
||||||
|
if (*self->buf++ == ':') {
|
||||||
|
self->pop();
|
||||||
|
MUSTTAIL return keepGoing(self);
|
||||||
|
}
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
|
static Status finishTrue(Parser2 *self) {
|
||||||
|
if (*self->buf++ == 'e') {
|
||||||
|
self->pop();
|
||||||
|
self->callbacks->on_true_literal(self->data);
|
||||||
|
MUSTTAIL return keepGoing(self);
|
||||||
|
}
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
|
static Status finishFalse(Parser2 *self) {
|
||||||
|
if (*self->buf++ == 'e') {
|
||||||
|
self->pop();
|
||||||
|
self->callbacks->on_false_literal(self->data);
|
||||||
|
MUSTTAIL return keepGoing(self);
|
||||||
|
}
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
|
static Status finishNull(Parser2 *self) {
|
||||||
|
if (*self->buf++ == 'l') {
|
||||||
|
self->pop();
|
||||||
|
self->callbacks->on_null_literal(self->data);
|
||||||
|
MUSTTAIL return keepGoing(self);
|
||||||
|
}
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
|
template <char kChar> static Status singleChar(Parser2 *self) {
|
||||||
|
if (*self->buf == kChar) {
|
||||||
|
++self->buf;
|
||||||
|
self->pop();
|
||||||
|
MUSTTAIL return keepGoing(self);
|
||||||
|
}
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
|
|
||||||
static constexpr continuation table[N_PAST_END] = {
|
static constexpr continuation table[N_PAST_END] = {
|
||||||
/*T_STRING*/ string,
|
|
||||||
/*T_COLON*/ colon,
|
/*T_COLON*/ colon,
|
||||||
|
/*T_TRUE*/ finishTrue,
|
||||||
|
/*T_FALSE*/ finishFalse,
|
||||||
|
/*T_NULL*/ finishNull,
|
||||||
|
/*T_R*/ singleChar<'r'>,
|
||||||
|
/*T_U*/ singleChar<'u'>,
|
||||||
|
/*T_A*/ singleChar<'a'>,
|
||||||
|
/*T_L*/ singleChar<'l'>,
|
||||||
|
/*T_S*/ singleChar<'s'>,
|
||||||
|
/*T_DUBQUOTE*/ singleChar<'"'>,
|
||||||
|
/*N_STRING*/ string,
|
||||||
|
/*N_NUMBER*/ number,
|
||||||
/*N_VALUE*/ value,
|
/*N_VALUE*/ value,
|
||||||
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
|
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
|
||||||
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd,
|
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd,
|
||||||
|
|||||||
Reference in New Issue
Block a user