927 lines
21 KiB
C++
927 lines
21 KiB
C++
#include <cassert>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
|
|
#include <initializer_list>
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include <doctest.h>
|
|
#include <nanobench.h>
|
|
|
|
// This is the JSON grammar in McKeeman Form.
|
|
|
|
// json
|
|
// element
|
|
|
|
// value
|
|
// object
|
|
// array
|
|
// string
|
|
// number
|
|
// "true"
|
|
// "false"
|
|
// "null"
|
|
|
|
// object
|
|
// '{' ws '}'
|
|
// '{' members '}'
|
|
|
|
// members
|
|
// member
|
|
// member ',' members
|
|
|
|
// member
|
|
// ws string ws ':' element
|
|
|
|
// array
|
|
// '[' ws ']'
|
|
// '[' elements ']'
|
|
|
|
// elements
|
|
// element
|
|
// element ',' elements
|
|
|
|
// element
|
|
// ws value ws
|
|
|
|
// string
|
|
// '"' characters '"'
|
|
|
|
// characters
|
|
// ""
|
|
// character characters
|
|
|
|
// character
|
|
// '0020' . '10FFFF' - '"' - '\'
|
|
// '\' escape
|
|
|
|
// escape
|
|
// '"'
|
|
// '\'
|
|
// '/'
|
|
// 'b'
|
|
// 'f'
|
|
// 'n'
|
|
// 'r'
|
|
// 't'
|
|
// 'u' hex hex hex hex
|
|
|
|
// hex
|
|
// digit
|
|
// 'A' . 'F'
|
|
// 'a' . 'f'
|
|
|
|
// number
|
|
// integer fraction exponent
|
|
|
|
// integer
|
|
// digit
|
|
// onenine digits
|
|
// '-' digit
|
|
// '-' onenine digits
|
|
|
|
// digits
|
|
// digit
|
|
// digit digits
|
|
|
|
// digit
|
|
// '0'
|
|
// onenine
|
|
|
|
// onenine
|
|
// '1' . '9'
|
|
|
|
// fraction
|
|
// ""
|
|
// '.' digits
|
|
|
|
// exponent
|
|
// ""
|
|
// 'E' sign digits
|
|
// 'e' sign digits
|
|
|
|
// sign
|
|
// ""
|
|
// '+'
|
|
// '-'
|
|
|
|
// ws
|
|
// ""
|
|
// '0020' ws
|
|
// '000A' ws
|
|
// '000D' ws
|
|
// '0009' ws
|
|
|
|
struct Callbacks {
|
|
void (*on_begin_object)(void *data) = noop;
|
|
void (*on_end_object)(void *data) = noop;
|
|
void (*on_begin_string)(void *data) = noop;
|
|
void (*on_string_data)(void *data, const char *buf, int len) = noop;
|
|
void (*on_end_string)(void *data) = noop;
|
|
void (*on_begin_array)(void *data) = noop;
|
|
void (*on_end_array)(void *data) = noop;
|
|
void (*on_begin_number)(void *data) = noop;
|
|
void (*on_number_data)(void *data, const char *buf, int len) = noop;
|
|
void (*on_end_number)(void *data) = noop;
|
|
void (*on_true_literal)(void *data) = noop;
|
|
void (*on_false_literal)(void *data) = noop;
|
|
void (*on_null_literal)(void *data) = noop;
|
|
|
|
private:
|
|
static void noop(void *) {}
|
|
static void noop(void *, const char *, int) {}
|
|
};
|
|
|
|
// Terminals and Nonterminals. These appear in the stack of the pushdown
|
|
// automata
|
|
enum Symbol : int8_t {
|
|
// Terminals
|
|
T_INVALID,
|
|
T_EOF,
|
|
T_LBRACE,
|
|
T_RBRACE,
|
|
T_COMMA,
|
|
T_ATOM, // Multibyte!
|
|
T_STRING, // Multibyte!
|
|
T_LBRACKET,
|
|
T_RBRACKET,
|
|
T_COLON,
|
|
T_PAST_END, // Must be last terminal
|
|
// Nonterminals
|
|
N_VALUE = T_PAST_END,
|
|
N_ARRAY_MAYBE_CONTINUE,
|
|
N_OBJECT,
|
|
N_OBJECT_MAYBE_CONTINUE,
|
|
N_PAST_END, // Must be last nonterminal
|
|
};
|
|
|
|
const char *symbolNames[] = {
|
|
"T_INVALID", "T_EOF",
|
|
"T_LBRACE", "T_RBRACE",
|
|
"T_COMMA", "T_ATOM",
|
|
"T_STRING", "T_LBRACKET",
|
|
"T_RBRACKET", "T_COLON",
|
|
"N_VALUE", "N_ARRAY_MAYBE_CONTINUE",
|
|
"N_OBJECT", "N_OBJECT_MAYBE_CONTINUE",
|
|
"N_PAST_END",
|
|
};
|
|
|
|
namespace {
|
|
|
|
bool whitespace(char x) {
|
|
return x == 0x20 || x == 0x0A || x == 0x0D || x == 0x09;
|
|
}
|
|
|
|
// Straightforward recursive descent that doesn't handle string escaping and
|
|
// treats numbers as [0-9]+
|
|
struct Parser1 {
|
|
Parser1(char *buf, int len, const Callbacks *callbacks, void *data)
|
|
: buf(buf), len(len), callbacks(callbacks), data(data) {}
|
|
|
|
// Returns false to reject
|
|
[[nodiscard]] bool parse() { return parse_element(); }
|
|
|
|
Parser1(Parser1 const &) = delete;
|
|
Parser1 &operator=(Parser1 const &) = delete;
|
|
Parser1(Parser1 &&) = delete;
|
|
Parser1 &operator=(Parser1 &&) = delete;
|
|
|
|
private:
|
|
char *buf;
|
|
int len;
|
|
const Callbacks *const callbacks;
|
|
void *const data;
|
|
|
|
// Helpers
|
|
void maybeSkipWs() {
|
|
while (len > 0 && whitespace(*buf)) {
|
|
++buf;
|
|
--len;
|
|
}
|
|
}
|
|
bool parseLiteral(const char *literal) {
|
|
const int litLen = strlen(literal);
|
|
if (len < litLen) {
|
|
return false;
|
|
}
|
|
len -= litLen;
|
|
return memcmp(std::exchange(buf, buf + litLen), literal, litLen) == 0;
|
|
}
|
|
|
|
// functions corresponding to productions
|
|
bool parse_element() {
|
|
maybeSkipWs();
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if (*buf == '{') {
|
|
if (!parse_object()) {
|
|
return false;
|
|
}
|
|
} else if (*buf == '[') {
|
|
if (!parse_array()) {
|
|
return false;
|
|
}
|
|
} else if (*buf == '"') {
|
|
if (!parse_string()) {
|
|
return false;
|
|
}
|
|
} else if (*buf == 't') {
|
|
if (!parse_true()) {
|
|
return false;
|
|
}
|
|
} else if (*buf == 'f') {
|
|
if (!parse_false()) {
|
|
return false;
|
|
}
|
|
} else if (*buf == 'n') {
|
|
if (!parse_null()) {
|
|
return false;
|
|
}
|
|
} else {
|
|
if (!parse_number()) {
|
|
return false;
|
|
}
|
|
}
|
|
maybeSkipWs();
|
|
return true;
|
|
}
|
|
|
|
bool parse_object() {
|
|
if (!parseLiteral("{")) {
|
|
return false;
|
|
}
|
|
callbacks->on_begin_object(data);
|
|
maybeSkipWs();
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if (*buf != '}') {
|
|
if (!parse_members()) {
|
|
}
|
|
}
|
|
if (!parseLiteral("}")) {
|
|
return false;
|
|
}
|
|
callbacks->on_end_object(data);
|
|
return true;
|
|
}
|
|
|
|
bool parse_members() {
|
|
begin:
|
|
if (!parse_member()) {
|
|
return false;
|
|
}
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if (*buf == ',') {
|
|
if (!parseLiteral(",")) {
|
|
return false;
|
|
}
|
|
goto begin; // tail call
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_member() {
|
|
maybeSkipWs();
|
|
if (!parse_string()) {
|
|
return false;
|
|
}
|
|
maybeSkipWs();
|
|
if (!parseLiteral(":")) {
|
|
return false;
|
|
}
|
|
if (!parse_element()) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_array() {
|
|
if (!parseLiteral("[")) {
|
|
return false;
|
|
}
|
|
callbacks->on_begin_array(data);
|
|
maybeSkipWs();
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if (*buf != ']') {
|
|
if (!parse_elements()) {
|
|
return false;
|
|
}
|
|
}
|
|
if (!parseLiteral("]")) {
|
|
return false;
|
|
}
|
|
callbacks->on_end_array(data);
|
|
return true;
|
|
}
|
|
|
|
bool parse_elements() {
|
|
begin:
|
|
if (!parse_element()) {
|
|
return false;
|
|
}
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if (*buf == ',') {
|
|
if (!parseLiteral(",")) {
|
|
return false;
|
|
}
|
|
goto begin; // tail call
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool parse_string() {
|
|
callbacks->on_begin_string(data);
|
|
if (!parseLiteral("\"")) {
|
|
return false;
|
|
}
|
|
auto *result = (char *)memchr(buf, '"', len);
|
|
if (result == nullptr) {
|
|
return false;
|
|
}
|
|
int stringLen = result - buf;
|
|
callbacks->on_string_data(data, buf, stringLen);
|
|
buf += stringLen;
|
|
len -= stringLen;
|
|
if (!parseLiteral("\"")) {
|
|
return false;
|
|
}
|
|
callbacks->on_end_string(data);
|
|
return true;
|
|
}
|
|
|
|
bool parse_number() {
|
|
callbacks->on_begin_number(data);
|
|
char *const bufBefore = buf;
|
|
for (;;) {
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if ('0' <= *buf && *buf <= '9') {
|
|
++buf;
|
|
--len;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (buf == bufBefore) {
|
|
return false;
|
|
}
|
|
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
|
callbacks->on_end_number(data);
|
|
return true;
|
|
}
|
|
|
|
bool parse_true() {
|
|
if (!parseLiteral("true")) {
|
|
return false;
|
|
}
|
|
callbacks->on_true_literal(data);
|
|
return true;
|
|
}
|
|
|
|
bool parse_false() {
|
|
if (!parseLiteral("false")) {
|
|
return false;
|
|
}
|
|
callbacks->on_false_literal(data);
|
|
return true;
|
|
}
|
|
|
|
bool parse_null() {
|
|
if (!parseLiteral("null")) {
|
|
return false;
|
|
}
|
|
callbacks->on_null_literal(data);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
#ifndef __has_attribute
|
|
#define __has_attribute(x) 0
|
|
#endif
|
|
|
|
#if __has_attribute(musttail)
|
|
#define MUSTTAIL __attribute__((musttail))
|
|
#else
|
|
#define MUSTTAIL
|
|
#endif
|
|
|
|
struct Parser2 {
|
|
Parser2(char *buf, int len, const Callbacks *callbacks, void *data)
|
|
: buf(buf), len(len), callbacks(callbacks), data(data) {}
|
|
|
|
// Returns false to reject
|
|
[[nodiscard]] bool parse() {
|
|
if (!push({N_VALUE})) {
|
|
return false;
|
|
}
|
|
return keepGoing(this);
|
|
}
|
|
|
|
Parser2(Parser2 const &) = delete;
|
|
Parser2 &operator=(Parser2 const &) = delete;
|
|
Parser2(Parser2 &&) = delete;
|
|
Parser2 &operator=(Parser2 &&) = delete;
|
|
|
|
static constexpr int kMaxStackSize = 1 << 10;
|
|
|
|
private:
|
|
// Helpers
|
|
void maybeSkipWs() {
|
|
while (len > 0 && whitespace(*buf)) {
|
|
++buf;
|
|
--len;
|
|
}
|
|
}
|
|
bool parseLiteral(const char *literal) {
|
|
const int litLen = strlen(literal);
|
|
if (len < litLen) {
|
|
return false;
|
|
}
|
|
len -= litLen;
|
|
return memcmp(std::exchange(buf, buf + litLen), literal, litLen) == 0;
|
|
}
|
|
bool parse_number() {
|
|
callbacks->on_begin_number(data);
|
|
char *const bufBefore = buf;
|
|
for (;;) {
|
|
if (len == 0) {
|
|
return false;
|
|
}
|
|
if ('0' <= *buf && *buf <= '9') {
|
|
++buf;
|
|
--len;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (buf == bufBefore) {
|
|
return false;
|
|
}
|
|
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
|
callbacks->on_end_number(data);
|
|
return true;
|
|
}
|
|
bool parse_string() {
|
|
callbacks->on_begin_string(data);
|
|
if (!parseLiteral("\"")) {
|
|
return false;
|
|
}
|
|
auto *result = (char *)memchr(buf, '"', len);
|
|
if (result == nullptr) {
|
|
return false;
|
|
}
|
|
int stringLen = result - buf;
|
|
callbacks->on_string_data(data, buf, stringLen);
|
|
buf += stringLen;
|
|
len -= stringLen;
|
|
if (!parseLiteral("\"")) {
|
|
return false;
|
|
}
|
|
callbacks->on_end_string(data);
|
|
return true;
|
|
}
|
|
|
|
typedef bool (*continuation)(Parser2 *);
|
|
|
|
[[maybe_unused]] void debugPrint(Symbol token) {
|
|
printf("token: %s\n", symbolNames[token]);
|
|
for (int i = 0; i < stackPtr - stack; ++i) {
|
|
printf("%s ", symbolNames[stack[i]]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
static bool tokenMatch(Parser2 *self) {
|
|
self->pop();
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
|
|
static bool keepGoing(Parser2 *self) {
|
|
if (self->empty()) {
|
|
return true;
|
|
}
|
|
auto token = self->nextToken();
|
|
// self->debugPrint(token);
|
|
MUSTTAIL return table[*(self->stackPtr - 1)][token](self);
|
|
}
|
|
|
|
static bool reject(Parser2 *self) {
|
|
self->pop();
|
|
return false;
|
|
}
|
|
static bool object(Parser2 *self) {
|
|
self->pop();
|
|
self->callbacks->on_begin_object(self->data);
|
|
if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
|
|
return false;
|
|
}
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool atom(Parser2 *self) {
|
|
self->pop();
|
|
if (*self->bufBefore == 't') {
|
|
self->callbacks->on_true_literal(self->data);
|
|
} else if (*self->bufBefore == 'f') {
|
|
self->callbacks->on_false_literal(self->data);
|
|
} else if (*self->bufBefore == 'n') {
|
|
self->callbacks->on_null_literal(self->data);
|
|
} else {
|
|
self->callbacks->on_begin_number(self->data);
|
|
self->callbacks->on_number_data(self->data, self->bufBefore + 1,
|
|
self->buf - self->bufBefore - 2);
|
|
self->callbacks->on_end_number(self->data);
|
|
}
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool string(Parser2 *self) {
|
|
self->pop();
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool array(Parser2 *self) {
|
|
self->pop();
|
|
self->callbacks->on_begin_array(self->data);
|
|
if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) {
|
|
return false;
|
|
}
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool continueArray(Parser2 *self) {
|
|
self->pop();
|
|
if (!self->push({N_VALUE, N_ARRAY_MAYBE_CONTINUE})) {
|
|
return false;
|
|
}
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool continueObject(Parser2 *self) {
|
|
self->pop();
|
|
if (!self->push({T_STRING, T_COLON, N_VALUE, N_OBJECT_MAYBE_CONTINUE})) {
|
|
return false;
|
|
}
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool finishArray(Parser2 *self) {
|
|
self->pop();
|
|
self->callbacks->on_end_array(self->data);
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
static bool finishObject(Parser2 *self) {
|
|
self->pop();
|
|
self->callbacks->on_end_object(self->data);
|
|
MUSTTAIL return keepGoing(self);
|
|
}
|
|
|
|
// table[nonterminal][terminal]
|
|
static constexpr continuation table[N_PAST_END][T_PAST_END] = {
|
|
/*T_INVALID*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_EOF*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ tokenMatch,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_LBRACE*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ tokenMatch,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_RBRACE*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ tokenMatch,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_COMMA*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ tokenMatch,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_ATOM*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ tokenMatch,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_STRING*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ tokenMatch,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_LBRACKET*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ tokenMatch,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_RBRACKET*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ tokenMatch,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*T_COLON*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ tokenMatch,
|
|
},
|
|
/*N_VALUE*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ object,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ atom,
|
|
/*T_STRING*/ string,
|
|
/*T_LBRACKET*/ array,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*N_ARRAY_MAYBE_CONTINUE*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ continueArray,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ finishArray,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*N_OBJECT*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ object,
|
|
/*T_RBRACE*/ reject,
|
|
/*T_COMMA*/ reject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
/*N_OBJECT_MAYBE_CONTINUE*/
|
|
{
|
|
/*T_INVALID*/ reject,
|
|
/*T_EOF*/ reject,
|
|
/*T_LBRACE*/ reject,
|
|
/*T_RBRACE*/ finishObject,
|
|
/*T_COMMA*/ continueObject,
|
|
/*T_ATOM*/ reject,
|
|
/*T_STRING*/ reject,
|
|
/*T_LBRACKET*/ reject,
|
|
/*T_RBRACKET*/ reject,
|
|
/*T_COLON*/ reject,
|
|
},
|
|
};
|
|
|
|
const char *bufBefore;
|
|
Symbol nextToken() {
|
|
maybeSkipWs();
|
|
bufBefore = buf;
|
|
if (len == 0) {
|
|
return T_EOF;
|
|
}
|
|
if (*buf == '{') {
|
|
parseLiteral("{");
|
|
return T_LBRACE;
|
|
} else if (*buf == '[') {
|
|
parseLiteral("[");
|
|
return T_LBRACKET;
|
|
} else if (*buf == '}') {
|
|
parseLiteral("}");
|
|
return T_RBRACE;
|
|
} else if (*buf == ']') {
|
|
parseLiteral("]");
|
|
return T_RBRACKET;
|
|
} else if (*buf == ':') {
|
|
parseLiteral(":");
|
|
return T_COLON;
|
|
} else if (*buf == ',') {
|
|
parseLiteral(",");
|
|
return T_COMMA;
|
|
} else if (*buf == '"') {
|
|
if (!parse_string()) {
|
|
return T_INVALID;
|
|
}
|
|
return T_STRING;
|
|
} else if (*buf == 't') {
|
|
if (!parseLiteral("true")) {
|
|
return T_INVALID;
|
|
}
|
|
return T_ATOM;
|
|
} else if (*buf == 'f') {
|
|
if (!parseLiteral("false")) {
|
|
return T_INVALID;
|
|
}
|
|
} else if (*buf == 'n') {
|
|
if (!parseLiteral("null")) {
|
|
return T_INVALID;
|
|
}
|
|
} else {
|
|
if (!parse_number()) {
|
|
return T_INVALID;
|
|
}
|
|
}
|
|
return T_ATOM;
|
|
}
|
|
|
|
char *buf;
|
|
int len;
|
|
const Callbacks *const callbacks;
|
|
void *const data;
|
|
Symbol stack[kMaxStackSize];
|
|
Symbol *stackPtr = stack;
|
|
bool empty() { return stackPtr == stack; }
|
|
void pop() {
|
|
assert(!empty());
|
|
--stackPtr;
|
|
}
|
|
[[nodiscard]] bool push(std::initializer_list<Symbol> symbols) {
|
|
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
|
|
return false;
|
|
}
|
|
for (int i = symbols.size() - 1; i >= 0; --i) {
|
|
*stackPtr++ = *(symbols.begin() + i);
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
const std::string json = R"({
|
|
"glossary": {
|
|
"title": "example glossary",
|
|
"GlossDiv": {
|
|
"title": "S",
|
|
"GlossList": {
|
|
"GlossEntry": {
|
|
"ID": "SGML",
|
|
"SortAs": "SGML",
|
|
"GlossTerm": "Standard Generalized Markup Language",
|
|
"Acronym": "SGML",
|
|
"Abbrev": "ISO 8879:1986",
|
|
"GlossDef": {
|
|
"para": "A meta-markup language, used to create markup languages such as DocBook.",
|
|
"GlossSeeAlso": ["GML", "XML"]
|
|
},
|
|
"GlossSee": "markup"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})";
|
|
|
|
Callbacks printCallbacks() {
|
|
Callbacks result;
|
|
result.on_begin_object = +[](void *) { puts("on_begin_object"); };
|
|
result.on_end_object = +[](void *) { puts("on_end_object"); };
|
|
result.on_begin_string = +[](void *) { puts("on_begin_string"); };
|
|
result.on_string_data = +[](void *, const char *buf, int len) {
|
|
printf("on_string_data `%.*s`\n", len, buf);
|
|
};
|
|
result.on_end_string = +[](void *) { puts("on_end_string"); };
|
|
result.on_begin_array = +[](void *) { puts("on_begin_array"); };
|
|
result.on_end_array = +[](void *) { puts("on_end_array"); };
|
|
result.on_begin_number = +[](void *) { puts("on_begin_number"); };
|
|
result.on_number_data = +[](void *, const char *buf, int len) {
|
|
printf("on_number_data `%.*s`\n", len, buf);
|
|
};
|
|
result.on_end_number = +[](void *) { puts("on_end_number"); };
|
|
result.on_true_literal = +[](void *) { puts("on_true_literal"); };
|
|
result.on_false_literal = +[](void *) { puts("on_false_literal"); };
|
|
result.on_null_literal = +[](void *) { puts("on_null_literal"); };
|
|
return result;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
TEST_CASE("parser1") {
|
|
Callbacks c = printCallbacks();
|
|
auto copy = json;
|
|
Parser1 parser(copy.data(), copy.length(), &c, nullptr);
|
|
CHECK(parser.parse());
|
|
}
|
|
|
|
TEST_CASE("parser2") {
|
|
Callbacks c = printCallbacks();
|
|
auto copy = json;
|
|
Parser2 parser(copy.data(), copy.length(), &c, nullptr);
|
|
CHECK(parser.parse());
|
|
}
|
|
|
|
TEST_CASE("bench1") {
|
|
auto c = Callbacks{};
|
|
ankerl::nanobench::Bench bench;
|
|
bench.batch(json.size());
|
|
bench.unit("byte");
|
|
bench.run("parser1", [&]() {
|
|
auto copy = json;
|
|
Parser1 parser(copy.data(), copy.length(), &c, nullptr);
|
|
bench.doNotOptimizeAway(parser.parse());
|
|
});
|
|
}
|
|
|
|
TEST_CASE("bench2") {
|
|
auto c = Callbacks{};
|
|
ankerl::nanobench::Bench bench;
|
|
bench.batch(json.size());
|
|
bench.unit("byte");
|
|
bench.run("parser2", [&]() {
|
|
auto copy = json;
|
|
Parser2 parser(copy.data(), copy.length(), &c, nullptr);
|
|
bench.doNotOptimizeAway(parser.parse());
|
|
});
|
|
}
|