Start organizing code
This commit is contained in:
@@ -45,5 +45,6 @@ include(CTest)
|
||||
include(doctest)
|
||||
|
||||
add_executable(mytest src/test.cpp)
|
||||
target_include_directories(mytest PRIVATE include)
|
||||
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
|
||||
doctest_discover_tests(mytest)
|
||||
|
||||
20
include/weaseljson.h
Normal file
20
include/weaseljson.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef WEASELJSON_H
|
||||
#define WEASELJSON_H
|
||||
|
||||
struct Callbacks {
|
||||
void (*on_begin_object)(void *data);
|
||||
void (*on_end_object)(void *data);
|
||||
void (*on_begin_string)(void *data);
|
||||
void (*on_string_data)(void *data, const char *buf, int len);
|
||||
void (*on_end_string)(void *data);
|
||||
void (*on_begin_array)(void *data);
|
||||
void (*on_end_array)(void *data);
|
||||
void (*on_begin_number)(void *data);
|
||||
void (*on_number_data)(void *data, const char *buf, int len);
|
||||
void (*on_end_number)(void *data);
|
||||
void (*on_true_literal)(void *data);
|
||||
void (*on_false_literal)(void *data);
|
||||
void (*on_null_literal)(void *data);
|
||||
};
|
||||
|
||||
#endif
|
||||
11
src/musttail.h
Normal file
11
src/musttail.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
|
||||
#if __has_attribute(musttail)
|
||||
#define MUSTTAIL __attribute__((musttail))
|
||||
#else
|
||||
#define MUSTTAIL
|
||||
#endif
|
||||
452
src/parser.h
Normal file
452
src/parser.h
Normal file
@@ -0,0 +1,452 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <utility>
|
||||
|
||||
#include "musttail.h"
|
||||
#include "tables.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
// Terminals and Nonterminals. These appear in the stack of the pushdown
|
||||
// automata
|
||||
enum Symbol : int8_t {
|
||||
T_COLON,
|
||||
T_TRUE,
|
||||
T_FALSE,
|
||||
T_NULL,
|
||||
T_R,
|
||||
T_U,
|
||||
T_A,
|
||||
T_L,
|
||||
T_S,
|
||||
T_DUBQUOTE,
|
||||
// Nonterminals
|
||||
N_STRING, // Not including leading double quote, but including trailing quote
|
||||
N_STRING_FROM_ESCAPE, // Immediately after a backslach
|
||||
N_NUMBER,
|
||||
N_VALUE,
|
||||
N_ARRAY_VALUE_OR_END,
|
||||
N_OBJECT_VALUE_OR_END,
|
||||
N_ARRAY_MAYBE_CONTINUE,
|
||||
N_OBJECT_MAYBE_CONTINUE,
|
||||
N_WHITESPACE,
|
||||
N_PAST_END, // Must be last nonterminal
|
||||
};
|
||||
|
||||
inline const char *symbolNames[] = {
|
||||
"T_COLON",
|
||||
"T_TRUE",
|
||||
"T_FALSE",
|
||||
"T_NULL",
|
||||
"T_R",
|
||||
"T_U",
|
||||
"T_A",
|
||||
"T_L",
|
||||
"T_S",
|
||||
"T_DUBQUOTE",
|
||||
"N_STRING",
|
||||
"N_STRING_FROM_ESCAPE",
|
||||
"N_NUMBER",
|
||||
"N_VALUE",
|
||||
"N_ARRAY_VALUE_OR_END",
|
||||
"N_OBJECT_VALUE_OR_END",
|
||||
"N_ARRAY_MAYBE_CONTINUE",
|
||||
"N_OBJECT_MAYBE_CONTINUE",
|
||||
"N_WHITESPACE",
|
||||
};
|
||||
|
||||
static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END);
|
||||
|
||||
// Table-based ll(1) parser that doesn't handle escaping and all numbers, with a
|
||||
// streaming interface. Does not validate utf-8. Uses O(1) memory.
|
||||
struct Parser2 {
|
||||
Parser2(const Callbacks *callbacks, void *data)
|
||||
: callbacks(callbacks), data(data) {
|
||||
std::ignore = push({N_WHITESPACE, N_VALUE});
|
||||
}
|
||||
|
||||
void prime(char *buf, int len) {
|
||||
this->buf = buf;
|
||||
this->bufEnd = buf + len;
|
||||
}
|
||||
|
||||
enum Status {
|
||||
// Accept input
|
||||
S_OK,
|
||||
// Consumed available input. Prime more and parse again
|
||||
S_AGAIN,
|
||||
// Invalid json
|
||||
S_REJECT,
|
||||
// json is too deeply nested
|
||||
S_OVERFLOW,
|
||||
};
|
||||
|
||||
[[nodiscard]] Status parse() { return keepGoing(this); }
|
||||
|
||||
Parser2(Parser2 const &) = delete;
|
||||
Parser2 &operator=(Parser2 const &) = delete;
|
||||
Parser2(Parser2 &&) = delete;
|
||||
Parser2 &operator=(Parser2 &&) = delete;
|
||||
|
||||
static constexpr int kMaxStackSize = 1 << 10;
|
||||
|
||||
private:
|
||||
// Helpers
|
||||
void maybeSkipWs() {
|
||||
while (buf != bufEnd && tables.whitespace[*buf]) {
|
||||
++buf;
|
||||
}
|
||||
}
|
||||
Status parse_number() {
|
||||
char *const bufBefore = buf;
|
||||
while (len() > 0) {
|
||||
if (tables.number[*buf]) {
|
||||
++buf;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (buf != bufBefore) {
|
||||
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
||||
}
|
||||
if (len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
callbacks->on_end_number(data);
|
||||
return S_OK;
|
||||
}
|
||||
Status parse_string(bool fromEscape) {
|
||||
auto *result = buf;
|
||||
if (fromEscape) {
|
||||
if (*result == '\"') {
|
||||
++result;
|
||||
}
|
||||
pop();
|
||||
if (Status s = push({N_STRING})) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
for (;;) {
|
||||
result = (char *)memchr(result, '"', bufEnd - result);
|
||||
if (result == nullptr) {
|
||||
callbacks->on_string_data(data, buf, len());
|
||||
if (bufEnd[-1] == '\\') {
|
||||
pop();
|
||||
if (Status s = push({N_STRING_FROM_ESCAPE})) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return S_AGAIN;
|
||||
}
|
||||
if (result != buf && result[-1] == '\\') {
|
||||
++result;
|
||||
if (result == bufEnd) {
|
||||
callbacks->on_string_data(data, buf, len());
|
||||
return S_AGAIN;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
int stringLen = result - buf;
|
||||
if (stringLen > 0) {
|
||||
callbacks->on_string_data(data, buf, stringLen);
|
||||
}
|
||||
buf += stringLen + 1;
|
||||
callbacks->on_end_string(data);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
typedef Status (*continuation)(Parser2 *);
|
||||
|
||||
[[maybe_unused]] void debugPrint() {
|
||||
for (int i = 0; i < stackPtr - stack; ++i) {
|
||||
printf("%s ", symbolNames[stack[i]]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static Status keepGoing(Parser2 *self) {
|
||||
if (self->len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
// self->debugPrint();
|
||||
MUSTTAIL return table[*(self->stackPtr - 1)](self);
|
||||
}
|
||||
|
||||
static Status string(Parser2 *self) {
|
||||
if (Status s = self->parse_string(false)) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status stringFromEscape(Parser2 *self) {
|
||||
if (Status s = self->parse_string(true)) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status number(Parser2 *self) {
|
||||
if (Status s = self->parse_number()) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status value(Parser2 *self) {
|
||||
switch (*self->buf) {
|
||||
case '{':
|
||||
++self->buf;
|
||||
self->callbacks->on_begin_object(self->data);
|
||||
self->pop();
|
||||
if (Status s = self->push({N_WHITESPACE, N_OBJECT_VALUE_OR_END})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case '[':
|
||||
++self->buf;
|
||||
self->callbacks->on_begin_array(self->data);
|
||||
self->pop();
|
||||
if (Status s = self->push({N_WHITESPACE, N_ARRAY_VALUE_OR_END})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
if (Status s = self->push({N_STRING})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({T_R, T_U, T_TRUE})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({T_U, T_L, T_NULL})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
self->pop();
|
||||
self->callbacks->on_begin_number(self->data);
|
||||
if (Status s = self->push({N_NUMBER})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status arrayOrEnd(Parser2 *self) {
|
||||
if (*self->buf == ']') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else {
|
||||
self->pop();
|
||||
if (Status s =
|
||||
self->push({N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
}
|
||||
static Status objectOrEnd(Parser2 *self) {
|
||||
if (*self->buf == '}') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == '"') {
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s =
|
||||
self->push({N_STRING, N_WHITESPACE, T_COLON, N_WHITESPACE,
|
||||
N_VALUE, N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status arrayContinue(Parser2 *self) {
|
||||
if (*self->buf == ',') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push(
|
||||
{N_WHITESPACE, N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == ']') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status objectContinue(Parser2 *self) {
|
||||
if (*self->buf == ',') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({N_WHITESPACE, T_DUBQUOTE, N_STRING,
|
||||
N_WHITESPACE, T_COLON, N_WHITESPACE, N_VALUE,
|
||||
N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == '}') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status finishTrue(Parser2 *self) {
|
||||
if (*self->buf++ == 'e') {
|
||||
self->pop();
|
||||
self->callbacks->on_true_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status finishFalse(Parser2 *self) {
|
||||
if (*self->buf++ == 'e') {
|
||||
self->pop();
|
||||
self->callbacks->on_false_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status finishNull(Parser2 *self) {
|
||||
if (*self->buf++ == 'l') {
|
||||
self->pop();
|
||||
self->callbacks->on_null_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
template <char kChar> static Status singleChar(Parser2 *self) {
|
||||
if (*self->buf++ == kChar) {
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status dubquote(Parser2 *self) {
|
||||
if (*self->buf++ == '"') {
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status whitespace(Parser2 *self) {
|
||||
self->maybeSkipWs();
|
||||
if (self->len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
|
||||
static constexpr continuation table[] = {
|
||||
/*T_COLON*/ singleChar<':'>,
|
||||
/*T_TRUE*/ finishTrue,
|
||||
/*T_FALSE*/ finishFalse,
|
||||
/*T_NULL*/ finishNull,
|
||||
/*T_R*/ singleChar<'r'>,
|
||||
/*T_U*/ singleChar<'u'>,
|
||||
/*T_A*/ singleChar<'a'>,
|
||||
/*T_L*/ singleChar<'l'>,
|
||||
/*T_S*/ singleChar<'s'>,
|
||||
/*T_DUBQUOTE*/ dubquote,
|
||||
/*N_STRING*/ string,
|
||||
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
|
||||
/*N_NUMBER*/ number,
|
||||
/*N_VALUE*/ value,
|
||||
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
|
||||
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd,
|
||||
/*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue,
|
||||
/*N_OBJECT_MAYBE_CONTINUE*/ objectContinue,
|
||||
/*N_WHITESPACE*/ whitespace,
|
||||
};
|
||||
|
||||
static_assert(sizeof(table) / sizeof(table[0]) == N_PAST_END);
|
||||
|
||||
char *buf = nullptr;
|
||||
char *bufEnd = nullptr;
|
||||
int len() const { return bufEnd - buf; }
|
||||
const Callbacks *const callbacks;
|
||||
void *const data;
|
||||
Symbol stack[kMaxStackSize];
|
||||
Symbol *stackPtr = stack;
|
||||
bool empty() const { return stackPtr == stack; }
|
||||
void pop() {
|
||||
assert(!empty());
|
||||
--stackPtr;
|
||||
}
|
||||
[[nodiscard]] Status push(std::initializer_list<Symbol> symbols) {
|
||||
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
|
||||
return S_OVERFLOW;
|
||||
}
|
||||
for (int i = symbols.size() - 1; i >= 0; --i) {
|
||||
*stackPtr++ = *(symbols.begin() + i);
|
||||
}
|
||||
return S_OK;
|
||||
}
|
||||
};
|
||||
18
src/tables.h
Normal file
18
src/tables.h
Normal file
@@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
constexpr inline struct Tables {
|
||||
constexpr Tables() {
|
||||
whitespace[' '] = true;
|
||||
whitespace['\n'] = true;
|
||||
whitespace['\r'] = true;
|
||||
whitespace['\t'] = true;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
number['0' + i] = true;
|
||||
}
|
||||
number['.'] = true;
|
||||
number['+'] = true;
|
||||
number['-'] = true;
|
||||
}
|
||||
alignas(16) bool whitespace[256]{};
|
||||
alignas(16) bool number[256]{};
|
||||
} tables;
|
||||
501
src/test.cpp
501
src/test.cpp
@@ -4,7 +4,6 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include <initializer_list>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
@@ -12,6 +11,8 @@
|
||||
#include <nanobench.h>
|
||||
#include <simdjson.h>
|
||||
|
||||
#include "parser.h"
|
||||
|
||||
// This is the JSON grammar in McKeeman Form.
|
||||
|
||||
// json
|
||||
@@ -116,93 +117,6 @@
|
||||
// '000D' ws
|
||||
// '0009' ws
|
||||
|
||||
struct Callbacks {
|
||||
void (*on_begin_object)(void *data) = noop;
|
||||
void (*on_end_object)(void *data) = noop;
|
||||
void (*on_begin_string)(void *data) = noop;
|
||||
void (*on_string_data)(void *data, const char *buf, int len) = noop;
|
||||
void (*on_end_string)(void *data) = noop;
|
||||
void (*on_begin_array)(void *data) = noop;
|
||||
void (*on_end_array)(void *data) = noop;
|
||||
void (*on_begin_number)(void *data) = noop;
|
||||
void (*on_number_data)(void *data, const char *buf, int len) = noop;
|
||||
void (*on_end_number)(void *data) = noop;
|
||||
void (*on_true_literal)(void *data) = noop;
|
||||
void (*on_false_literal)(void *data) = noop;
|
||||
void (*on_null_literal)(void *data) = noop;
|
||||
|
||||
private:
|
||||
static void noop(void *) {}
|
||||
static void noop(void *, const char *, int) {}
|
||||
};
|
||||
|
||||
// Terminals and Nonterminals. These appear in the stack of the pushdown
|
||||
// automata
|
||||
enum Symbol : int8_t {
|
||||
T_COLON,
|
||||
T_TRUE,
|
||||
T_FALSE,
|
||||
T_NULL,
|
||||
T_R,
|
||||
T_U,
|
||||
T_A,
|
||||
T_L,
|
||||
T_S,
|
||||
T_DUBQUOTE,
|
||||
// Nonterminals
|
||||
N_STRING, // Not including leading double quote, but including trailing quote
|
||||
N_STRING_FROM_ESCAPE, // Immediately after a backslach
|
||||
N_NUMBER,
|
||||
N_VALUE,
|
||||
N_ARRAY_VALUE_OR_END,
|
||||
N_OBJECT_VALUE_OR_END,
|
||||
N_ARRAY_MAYBE_CONTINUE,
|
||||
N_OBJECT_MAYBE_CONTINUE,
|
||||
N_WHITESPACE,
|
||||
N_PAST_END, // Must be last nonterminal
|
||||
};
|
||||
|
||||
static const char *symbolNames[] = {
|
||||
"T_COLON",
|
||||
"T_TRUE",
|
||||
"T_FALSE",
|
||||
"T_NULL",
|
||||
"T_R",
|
||||
"T_U",
|
||||
"T_A",
|
||||
"T_L",
|
||||
"T_S",
|
||||
"T_DUBQUOTE",
|
||||
"N_STRING",
|
||||
"N_STRING_FROM_ESCAPE",
|
||||
"N_NUMBER",
|
||||
"N_VALUE",
|
||||
"N_ARRAY_VALUE_OR_END",
|
||||
"N_OBJECT_VALUE_OR_END",
|
||||
"N_ARRAY_MAYBE_CONTINUE",
|
||||
"N_OBJECT_MAYBE_CONTINUE",
|
||||
"N_WHITESPACE",
|
||||
};
|
||||
|
||||
static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END);
|
||||
|
||||
constexpr static struct Tables {
|
||||
constexpr Tables() {
|
||||
whitespace[' '] = true;
|
||||
whitespace['\n'] = true;
|
||||
whitespace['\r'] = true;
|
||||
whitespace['\t'] = true;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
number['0' + i] = true;
|
||||
}
|
||||
number['.'] = true;
|
||||
number['+'] = true;
|
||||
number['-'] = true;
|
||||
}
|
||||
alignas(16) bool whitespace[256]{};
|
||||
alignas(16) bool number[256]{};
|
||||
} tables;
|
||||
|
||||
namespace {
|
||||
|
||||
// Straightforward recursive descent that doesn't handle string escaping and
|
||||
@@ -453,395 +367,6 @@ private:
|
||||
#define MUSTTAIL
|
||||
#endif
|
||||
|
||||
// Table-based ll(1) parser that doesn't handle escaping and all numbers, with a
|
||||
// streaming interface. Does not validate utf-8. Uses O(1) memory.
|
||||
struct Parser2 {
|
||||
Parser2(const Callbacks *callbacks, void *data)
|
||||
: callbacks(callbacks), data(data) {
|
||||
std::ignore = push({N_WHITESPACE, N_VALUE});
|
||||
}
|
||||
|
||||
void prime(char *buf, int len) {
|
||||
this->buf = buf;
|
||||
this->bufEnd = buf + len;
|
||||
}
|
||||
|
||||
enum Status {
|
||||
// Accept input
|
||||
S_OK,
|
||||
// Consumed available input. Prime more and parse again
|
||||
S_AGAIN,
|
||||
// Invalid json
|
||||
S_REJECT,
|
||||
// json is too deeply nested
|
||||
S_OVERFLOW,
|
||||
};
|
||||
|
||||
[[nodiscard]] Status parse() { return keepGoing(this); }
|
||||
|
||||
Parser2(Parser2 const &) = delete;
|
||||
Parser2 &operator=(Parser2 const &) = delete;
|
||||
Parser2(Parser2 &&) = delete;
|
||||
Parser2 &operator=(Parser2 &&) = delete;
|
||||
|
||||
static constexpr int kMaxStackSize = 1 << 10;
|
||||
|
||||
private:
|
||||
// Helpers
|
||||
void maybeSkipWs() {
|
||||
while (buf != bufEnd && tables.whitespace[*buf]) {
|
||||
++buf;
|
||||
}
|
||||
}
|
||||
Status parse_number() {
|
||||
char *const bufBefore = buf;
|
||||
while (len() > 0) {
|
||||
if (tables.number[*buf]) {
|
||||
++buf;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (buf != bufBefore) {
|
||||
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
||||
}
|
||||
if (len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
callbacks->on_end_number(data);
|
||||
return S_OK;
|
||||
}
|
||||
Status parse_string(bool fromEscape) {
|
||||
auto *result = buf;
|
||||
if (fromEscape) {
|
||||
if (*result == '\"') {
|
||||
++result;
|
||||
}
|
||||
pop();
|
||||
if (Status s = push({N_STRING})) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
for (;;) {
|
||||
result = (char *)memchr(result, '"', bufEnd - result);
|
||||
if (result == nullptr) {
|
||||
callbacks->on_string_data(data, buf, len());
|
||||
if (bufEnd[-1] == '\\') {
|
||||
pop();
|
||||
if (Status s = push({N_STRING_FROM_ESCAPE})) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return S_AGAIN;
|
||||
}
|
||||
if (result != buf && result[-1] == '\\') {
|
||||
++result;
|
||||
if (result == bufEnd) {
|
||||
callbacks->on_string_data(data, buf, len());
|
||||
return S_AGAIN;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
int stringLen = result - buf;
|
||||
if (stringLen > 0) {
|
||||
callbacks->on_string_data(data, buf, stringLen);
|
||||
}
|
||||
buf += stringLen + 1;
|
||||
callbacks->on_end_string(data);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
typedef Status (*continuation)(Parser2 *);
|
||||
|
||||
[[maybe_unused]] void debugPrint() {
|
||||
for (int i = 0; i < stackPtr - stack; ++i) {
|
||||
printf("%s ", symbolNames[stack[i]]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static Status keepGoing(Parser2 *self) {
|
||||
if (self->len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
// self->debugPrint();
|
||||
MUSTTAIL return table[*(self->stackPtr - 1)](self);
|
||||
}
|
||||
|
||||
static Status string(Parser2 *self) {
|
||||
if (Status s = self->parse_string(false)) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status stringFromEscape(Parser2 *self) {
|
||||
if (Status s = self->parse_string(true)) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status number(Parser2 *self) {
|
||||
if (Status s = self->parse_number()) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status value(Parser2 *self) {
|
||||
switch (*self->buf) {
|
||||
case '{':
|
||||
++self->buf;
|
||||
self->callbacks->on_begin_object(self->data);
|
||||
self->pop();
|
||||
if (Status s = self->push({N_WHITESPACE, N_OBJECT_VALUE_OR_END})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case '[':
|
||||
++self->buf;
|
||||
self->callbacks->on_begin_array(self->data);
|
||||
self->pop();
|
||||
if (Status s = self->push({N_WHITESPACE, N_ARRAY_VALUE_OR_END})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
if (Status s = self->push({N_STRING})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({T_R, T_U, T_TRUE})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({T_U, T_L, T_NULL})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
self->pop();
|
||||
self->callbacks->on_begin_number(self->data);
|
||||
if (Status s = self->push({N_NUMBER})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status arrayOrEnd(Parser2 *self) {
|
||||
if (*self->buf == ']') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else {
|
||||
self->pop();
|
||||
if (Status s =
|
||||
self->push({N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
}
|
||||
static Status objectOrEnd(Parser2 *self) {
|
||||
if (*self->buf == '}') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == '"') {
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s =
|
||||
self->push({N_STRING, N_WHITESPACE, T_COLON, N_WHITESPACE,
|
||||
N_VALUE, N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status arrayContinue(Parser2 *self) {
|
||||
if (*self->buf == ',') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push(
|
||||
{N_WHITESPACE, N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == ']') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status objectContinue(Parser2 *self) {
|
||||
if (*self->buf == ',') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
if (Status s = self->push({N_WHITESPACE, T_DUBQUOTE, N_STRING,
|
||||
N_WHITESPACE, T_COLON, N_WHITESPACE, N_VALUE,
|
||||
N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == '}') {
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status finishTrue(Parser2 *self) {
|
||||
if (*self->buf++ == 'e') {
|
||||
self->pop();
|
||||
self->callbacks->on_true_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status finishFalse(Parser2 *self) {
|
||||
if (*self->buf++ == 'e') {
|
||||
self->pop();
|
||||
self->callbacks->on_false_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status finishNull(Parser2 *self) {
|
||||
if (*self->buf++ == 'l') {
|
||||
self->pop();
|
||||
self->callbacks->on_null_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
template <char kChar> static Status singleChar(Parser2 *self) {
|
||||
if (*self->buf++ == kChar) {
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status dubquote(Parser2 *self) {
|
||||
if (*self->buf++ == '"') {
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
static Status whitespace(Parser2 *self) {
|
||||
self->maybeSkipWs();
|
||||
if (self->len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
|
||||
static constexpr continuation table[] = {
|
||||
/*T_COLON*/ singleChar<':'>,
|
||||
/*T_TRUE*/ finishTrue,
|
||||
/*T_FALSE*/ finishFalse,
|
||||
/*T_NULL*/ finishNull,
|
||||
/*T_R*/ singleChar<'r'>,
|
||||
/*T_U*/ singleChar<'u'>,
|
||||
/*T_A*/ singleChar<'a'>,
|
||||
/*T_L*/ singleChar<'l'>,
|
||||
/*T_S*/ singleChar<'s'>,
|
||||
/*T_DUBQUOTE*/ dubquote,
|
||||
/*N_STRING*/ string,
|
||||
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
|
||||
/*N_NUMBER*/ number,
|
||||
/*N_VALUE*/ value,
|
||||
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
|
||||
/*N_OBJECT_VALUE_OR_END*/ objectOrEnd,
|
||||
/*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue,
|
||||
/*N_OBJECT_MAYBE_CONTINUE*/ objectContinue,
|
||||
/*N_WHITESPACE*/ whitespace,
|
||||
};
|
||||
|
||||
static_assert(sizeof(table) / sizeof(table[0]) == N_PAST_END);
|
||||
|
||||
char *buf = nullptr;
|
||||
char *bufEnd = nullptr;
|
||||
int len() const { return bufEnd - buf; }
|
||||
const Callbacks *const callbacks;
|
||||
void *const data;
|
||||
Symbol stack[kMaxStackSize];
|
||||
Symbol *stackPtr = stack;
|
||||
bool empty() const { return stackPtr == stack; }
|
||||
void pop() {
|
||||
assert(!empty());
|
||||
--stackPtr;
|
||||
}
|
||||
[[nodiscard]] Status push(std::initializer_list<Symbol> symbols) {
|
||||
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
|
||||
return S_OVERFLOW;
|
||||
}
|
||||
for (int i = symbols.size() - 1; i >= 0; --i) {
|
||||
*stackPtr++ = *(symbols.begin() + i);
|
||||
}
|
||||
return S_OK;
|
||||
}
|
||||
};
|
||||
|
||||
const std::string json = R"({
|
||||
"a number": 12345,
|
||||
"true": true,
|
||||
@@ -972,6 +497,24 @@ Callbacks minifyCallbacks() {
|
||||
return result;
|
||||
}
|
||||
|
||||
Callbacks noopCallbacks() {
|
||||
Callbacks result;
|
||||
result.on_begin_object = +[](void *) {};
|
||||
result.on_end_object = +[](void *) {};
|
||||
result.on_begin_string = +[](void *) {};
|
||||
result.on_string_data = +[](void *, const char *buf, int len) {};
|
||||
result.on_end_string = +[](void *) {};
|
||||
result.on_begin_array = +[](void *) {};
|
||||
result.on_end_array = +[](void *) {};
|
||||
result.on_begin_number = +[](void *) {};
|
||||
result.on_number_data = +[](void *, const char *buf, int len) {};
|
||||
result.on_end_number = +[](void *) {};
|
||||
result.on_true_literal = +[](void *) {};
|
||||
result.on_false_literal = +[](void *) {};
|
||||
result.on_null_literal = +[](void *) {};
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST_CASE("parser1") {
|
||||
@@ -1016,7 +559,7 @@ TEST_CASE("parser2") {
|
||||
}
|
||||
|
||||
TEST_CASE("bench1") {
|
||||
auto c = Callbacks{};
|
||||
auto c = noopCallbacks();
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(json.size());
|
||||
bench.unit("byte");
|
||||
@@ -1028,7 +571,7 @@ TEST_CASE("bench1") {
|
||||
}
|
||||
|
||||
TEST_CASE("bench2") {
|
||||
auto c = Callbacks{};
|
||||
auto c = noopCallbacks();
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(json.size());
|
||||
bench.unit("byte");
|
||||
|
||||
Reference in New Issue
Block a user