Remove parser2

This commit is contained in:
2025-05-18 11:42:48 -04:00
parent 19208c0e0a
commit dd8b316e29

View File

@@ -1,448 +0,0 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <tuple>
#include <utility>
#include "musttail.h"
#include "tables.h"
#include "weaseljson.h"
// Terminals and Nonterminals. These appear in the stack of the pushdown
// automata
enum Symbol : int8_t {
T_COLON,
T_TRUE,
T_FALSE,
T_NULL,
T_R,
T_U,
T_A,
T_L,
T_S,
T_DUBQUOTE,
T_EOF,
// Nonterminals
N_STRING, // Not including leading double quote, but including trailing quote
N_STRING_FROM_ESCAPE, // Immediately after a backslach
N_NUMBER,
N_VALUE,
N_ARRAY_VALUE_OR_END,
N_OBJECT_VALUE_OR_END,
N_ARRAY_MAYBE_CONTINUE,
N_OBJECT_MAYBE_CONTINUE,
N_WHITESPACE,
N_PAST_END, // Must be last nonterminal
};
inline const char *symbolNames[] = {
"T_COLON",
"T_TRUE",
"T_FALSE",
"T_NULL",
"T_R",
"T_U",
"T_A",
"T_L",
"T_S",
"T_DUBQUOTE",
"T_EOF",
"N_STRING",
"N_STRING_FROM_ESCAPE",
"N_NUMBER",
"N_VALUE",
"N_ARRAY_VALUE_OR_END",
"N_OBJECT_VALUE_OR_END",
"N_ARRAY_MAYBE_CONTINUE",
"N_OBJECT_MAYBE_CONTINUE",
"N_WHITESPACE",
};
static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END);
// Table-based ll(1) parser that doesn't handle escaping and all numbers, with a
// streaming interface. Does not validate utf-8. Uses O(1) memory.
struct Parser2 {
Parser2(const Callbacks *callbacks, void *data)
: callbacks(callbacks), data(data) {
std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
}
enum Status {
// Accept input
S_OK,
// Consumed available input. Prime more and parse again
S_AGAIN,
// Invalid json
S_REJECT,
// json is too deeply nested
S_OVERFLOW,
};
[[nodiscard]] Status parse(char *buf, int len) {
complete = len == 0;
this->buf = buf;
this->bufEnd = buf + len;
return keepGoing(this);
}
Parser2(Parser2 const &) = delete;
Parser2 &operator=(Parser2 const &) = delete;
Parser2(Parser2 &&) = delete;
Parser2 &operator=(Parser2 &&) = delete;
static constexpr int kMaxStackSize = 1 << 10;
private:
// Helpers
void maybeSkipWs() {
while (buf != bufEnd && tables.whitespace[*buf]) {
++buf;
}
}
Status parse_number() {
char *const bufBefore = buf;
while (len() > 0) {
if (tables.number[*buf]) {
++buf;
} else {
break;
}
}
if (buf != bufBefore) {
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
}
if (len() == 0 && !complete) {
return S_AGAIN;
}
callbacks->on_end_number(data);
return S_OK;
}
Status parse_string(bool fromEscape) {
auto *result = buf;
if (fromEscape) {
if (*result == '\"') {
++result;
}
pop();
if (Status s = push({N_STRING})) {
return s;
}
}
for (;;) {
result = result == nullptr ? nullptr
: (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) {
if (complete) {
return S_REJECT;
}
callbacks->on_string_data(data, buf, len());
if (bufEnd[-1] == '\\') {
pop();
if (Status s = push({N_STRING_FROM_ESCAPE})) {
return s;
}
}
return S_AGAIN;
}
if (result != buf && result[-1] == '\\') {
++result;
if (result == bufEnd) {
if (complete) {
return S_REJECT;
}
callbacks->on_string_data(data, buf, len());
return S_AGAIN;
}
continue;
}
break;
}
int stringLen = result - buf;
if (stringLen > 0) {
callbacks->on_string_data(data, buf, stringLen);
}
buf += stringLen + 1;
callbacks->on_end_string(data);
return S_OK;
}
typedef Status (*continuation)(Parser2 *);
[[maybe_unused]] void debugPrint() {
for (int i = 0; i < stackPtr - stack; ++i) {
printf("%s ", symbolNames[stack[i]]);
}
printf("\n");
}
static Status keepGoing(Parser2 *self) {
if (self->len() == 0 && !self->complete) {
return S_AGAIN;
}
// self->debugPrint();
MUSTTAIL return table[*(self->stackPtr - 1)](self);
}
static Status string(Parser2 *self) {
if (Status s = self->parse_string(false)) {
return s;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status stringFromEscape(Parser2 *self) {
if (Status s = self->parse_string(true)) {
return s;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status number(Parser2 *self) {
if (Status s = self->parse_number()) {
return s;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status value(Parser2 *self) {
switch (*self->buf) {
case '{':
++self->buf;
self->callbacks->on_begin_object(self->data);
self->pop();
if (Status s = self->push({N_WHITESPACE, N_OBJECT_VALUE_OR_END})) {
return s;
}
break;
case '[':
++self->buf;
self->callbacks->on_begin_array(self->data);
self->pop();
if (Status s = self->push({N_WHITESPACE, N_ARRAY_VALUE_OR_END})) {
return s;
}
break;
case '"':
++self->buf;
self->pop();
self->callbacks->on_begin_string(self->data);
if (Status s = self->push({N_STRING})) {
return s;
}
break;
case 't':
++self->buf;
self->pop();
if (Status s = self->push({T_R, T_U, T_TRUE})) {
return s;
}
break;
case 'f':
++self->buf;
self->pop();
if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) {
return s;
}
break;
case 'n':
++self->buf;
self->pop();
if (Status s = self->push({T_U, T_L, T_NULL})) {
return s;
}
break;
default:
if (tables.number[*self->buf]) {
self->pop();
self->callbacks->on_begin_number(self->data);
if (Status s = self->push({N_NUMBER})) {
return s;
}
break;
}
return S_REJECT;
}
MUSTTAIL return keepGoing(self);
}
static Status arrayOrEnd(Parser2 *self) {
if (*self->buf == ']') {
++self->buf;
self->pop();
self->callbacks->on_end_array(self->data);
MUSTTAIL return keepGoing(self);
} else {
self->pop();
if (Status s =
self->push({N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) {
return s;
}
MUSTTAIL return keepGoing(self);
}
}
static Status objectOrEnd(Parser2 *self) {
if (*self->buf == '}') {
++self->buf;
self->pop();
self->callbacks->on_end_object(self->data);
MUSTTAIL return keepGoing(self);
} else if (*self->buf == '"') {
self->callbacks->on_begin_string(self->data);
++self->buf;
self->pop();
if (Status s =
self->push({N_STRING, N_WHITESPACE, T_COLON, N_WHITESPACE,
N_VALUE, N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) {
return s;
}
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status arrayContinue(Parser2 *self) {
if (*self->buf == ',') {
++self->buf;
self->pop();
if (Status s = self->push(
{N_WHITESPACE, N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) {
return s;
}
MUSTTAIL return keepGoing(self);
} else if (*self->buf == ']') {
++self->buf;
self->pop();
self->callbacks->on_end_array(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status objectContinue(Parser2 *self) {
if (*self->buf == ',') {
++self->buf;
self->pop();
if (Status s = self->push({N_WHITESPACE, T_DUBQUOTE, N_STRING,
N_WHITESPACE, T_COLON, N_WHITESPACE, N_VALUE,
N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) {
return s;
}
MUSTTAIL return keepGoing(self);
} else if (*self->buf == '}') {
++self->buf;
self->pop();
self->callbacks->on_end_object(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishTrue(Parser2 *self) {
if (*self->buf++ == 'e') {
self->pop();
self->callbacks->on_true_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishFalse(Parser2 *self) {
if (*self->buf++ == 'e') {
self->pop();
self->callbacks->on_false_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status finishNull(Parser2 *self) {
if (*self->buf++ == 'l') {
self->pop();
self->callbacks->on_null_literal(self->data);
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
template <char kChar> static Status singleChar(Parser2 *self) {
if (*self->buf++ == kChar) {
self->pop();
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status dubquote(Parser2 *self) {
if (*self->buf++ == '"') {
self->callbacks->on_begin_string(self->data);
self->pop();
MUSTTAIL return keepGoing(self);
}
return S_REJECT;
}
static Status whitespace(Parser2 *self) {
self->maybeSkipWs();
if (self->len() == 0 && !self->complete) {
return S_AGAIN;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status eof(Parser2 *self) {
if (self->len() > 0) {
return S_REJECT;
}
return self->complete ? S_OK : S_AGAIN;
}
static constexpr continuation table[] = {
/*T_COLON*/ singleChar<':'>,
/*T_TRUE*/ finishTrue,
/*T_FALSE*/ finishFalse,
/*T_NULL*/ finishNull,
/*T_R*/ singleChar<'r'>,
/*T_U*/ singleChar<'u'>,
/*T_A*/ singleChar<'a'>,
/*T_L*/ singleChar<'l'>,
/*T_S*/ singleChar<'s'>,
/*T_DUBQUOTE*/ dubquote,
/*T_EOF*/ eof,
/*N_STRING*/ string,
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
/*N_NUMBER*/ number,
/*N_VALUE*/ value,
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd,
/*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue,
/*N_OBJECT_MAYBE_CONTINUE*/ objectContinue,
/*N_WHITESPACE*/ whitespace,
};
static_assert(sizeof(table) / sizeof(table[0]) == N_PAST_END);
bool empty() const { return stackPtr == stack; }
void pop() {
assert(!empty());
--stackPtr;
}
[[nodiscard]] Status push(std::initializer_list<Symbol> symbols) {
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
return S_OVERFLOW;
}
for (int i = symbols.size() - 1; i >= 0; --i) {
*stackPtr++ = *(symbols.begin() + i);
}
return S_OK;
}
int len() const {
auto result = bufEnd - buf;
assert(result >= 0);
return result;
}
char *buf = nullptr;
char *bufEnd = nullptr;
const Callbacks *const callbacks;
void *const data;
Symbol stack[kMaxStackSize];
Symbol *stackPtr = stack;
bool complete = false;
};