diff --git a/CMakeLists.txt b/CMakeLists.txt index 22a038d..ce89c5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,5 +45,6 @@ include(CTest) include(doctest) add_executable(mytest src/test.cpp) +target_include_directories(mytest PRIVATE include) target_link_libraries(mytest PRIVATE doctest nanobench simdjson) doctest_discover_tests(mytest) diff --git a/include/weaseljson.h b/include/weaseljson.h new file mode 100644 index 0000000..7dbe201 --- /dev/null +++ b/include/weaseljson.h @@ -0,0 +1,20 @@ +#ifndef WEASELJSON_H +#define WEASELJSON_H + +struct Callbacks { + void (*on_begin_object)(void *data); + void (*on_end_object)(void *data); + void (*on_begin_string)(void *data); + void (*on_string_data)(void *data, const char *buf, int len); + void (*on_end_string)(void *data); + void (*on_begin_array)(void *data); + void (*on_end_array)(void *data); + void (*on_begin_number)(void *data); + void (*on_number_data)(void *data, const char *buf, int len); + void (*on_end_number)(void *data); + void (*on_true_literal)(void *data); + void (*on_false_literal)(void *data); + void (*on_null_literal)(void *data); +}; + +#endif diff --git a/src/musttail.h b/src/musttail.h new file mode 100644 index 0000000..667e6d1 --- /dev/null +++ b/src/musttail.h @@ -0,0 +1,11 @@ +#pragma once + +#ifndef __has_attribute +#define __has_attribute(x) 0 +#endif + +#if __has_attribute(musttail) +#define MUSTTAIL __attribute__((musttail)) +#else +#define MUSTTAIL +#endif diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..b8e0898 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,452 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "musttail.h" +#include "tables.h" +#include "weaseljson.h" + +// Terminals and Nonterminals. These appear in the stack of the pushdown +// automata +enum Symbol : int8_t { + T_COLON, + T_TRUE, + T_FALSE, + T_NULL, + T_R, + T_U, + T_A, + T_L, + T_S, + T_DUBQUOTE, + // Nonterminals + N_STRING, // Not including leading double quote, but including trailing quote + N_STRING_FROM_ESCAPE, // Immediately after a backslach + N_NUMBER, + N_VALUE, + N_ARRAY_VALUE_OR_END, + N_OBJECT_VALUE_OR_END, + N_ARRAY_MAYBE_CONTINUE, + N_OBJECT_MAYBE_CONTINUE, + N_WHITESPACE, + N_PAST_END, // Must be last nonterminal +}; + +inline const char *symbolNames[] = { + "T_COLON", + "T_TRUE", + "T_FALSE", + "T_NULL", + "T_R", + "T_U", + "T_A", + "T_L", + "T_S", + "T_DUBQUOTE", + "N_STRING", + "N_STRING_FROM_ESCAPE", + "N_NUMBER", + "N_VALUE", + "N_ARRAY_VALUE_OR_END", + "N_OBJECT_VALUE_OR_END", + "N_ARRAY_MAYBE_CONTINUE", + "N_OBJECT_MAYBE_CONTINUE", + "N_WHITESPACE", +}; + +static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END); + +// Table-based ll(1) parser that doesn't handle escaping and all numbers, with a +// streaming interface. Does not validate utf-8. Uses O(1) memory. +struct Parser2 { + Parser2(const Callbacks *callbacks, void *data) + : callbacks(callbacks), data(data) { + std::ignore = push({N_WHITESPACE, N_VALUE}); + } + + void prime(char *buf, int len) { + this->buf = buf; + this->bufEnd = buf + len; + } + + enum Status { + // Accept input + S_OK, + // Consumed available input. Prime more and parse again + S_AGAIN, + // Invalid json + S_REJECT, + // json is too deeply nested + S_OVERFLOW, + }; + + [[nodiscard]] Status parse() { return keepGoing(this); } + + Parser2(Parser2 const &) = delete; + Parser2 &operator=(Parser2 const &) = delete; + Parser2(Parser2 &&) = delete; + Parser2 &operator=(Parser2 &&) = delete; + + static constexpr int kMaxStackSize = 1 << 10; + +private: + // Helpers + void maybeSkipWs() { + while (buf != bufEnd && tables.whitespace[*buf]) { + ++buf; + } + } + Status parse_number() { + char *const bufBefore = buf; + while (len() > 0) { + if (tables.number[*buf]) { + ++buf; + } else { + break; + } + } + if (buf != bufBefore) { + callbacks->on_number_data(data, bufBefore, buf - bufBefore); + } + if (len() == 0) { + return S_AGAIN; + } + callbacks->on_end_number(data); + return S_OK; + } + Status parse_string(bool fromEscape) { + auto *result = buf; + if (fromEscape) { + if (*result == '\"') { + ++result; + } + pop(); + if (Status s = push({N_STRING})) { + return s; + } + } + for (;;) { + result = (char *)memchr(result, '"', bufEnd - result); + if (result == nullptr) { + callbacks->on_string_data(data, buf, len()); + if (bufEnd[-1] == '\\') { + pop(); + if (Status s = push({N_STRING_FROM_ESCAPE})) { + return s; + } + } + return S_AGAIN; + } + if (result != buf && result[-1] == '\\') { + ++result; + if (result == bufEnd) { + callbacks->on_string_data(data, buf, len()); + return S_AGAIN; + } + continue; + } + break; + } + int stringLen = result - buf; + if (stringLen > 0) { + callbacks->on_string_data(data, buf, stringLen); + } + buf += stringLen + 1; + callbacks->on_end_string(data); + return S_OK; + } + + typedef Status (*continuation)(Parser2 *); + + [[maybe_unused]] void debugPrint() { + for (int i = 0; i < stackPtr - stack; ++i) { + printf("%s ", symbolNames[stack[i]]); + } + printf("\n"); + } + + static Status keepGoing(Parser2 *self) { + if (self->len() == 0) { + return S_AGAIN; + } + // self->debugPrint(); + MUSTTAIL return table[*(self->stackPtr - 1)](self); + } + + static Status string(Parser2 *self) { + if (Status s = self->parse_string(false)) { + return s; + } + self->pop(); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + static Status stringFromEscape(Parser2 *self) { + if (Status s = self->parse_string(true)) { + return s; + } + self->pop(); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + static Status number(Parser2 *self) { + if (Status s = self->parse_number()) { + return s; + } + self->pop(); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + static Status value(Parser2 *self) { + switch (*self->buf) { + case '{': + ++self->buf; + self->callbacks->on_begin_object(self->data); + self->pop(); + if (Status s = self->push({N_WHITESPACE, N_OBJECT_VALUE_OR_END})) { + return s; + } + break; + case '[': + ++self->buf; + self->callbacks->on_begin_array(self->data); + self->pop(); + if (Status s = self->push({N_WHITESPACE, N_ARRAY_VALUE_OR_END})) { + return s; + } + break; + case '"': + ++self->buf; + self->pop(); + self->callbacks->on_begin_string(self->data); + if (Status s = self->push({N_STRING})) { + return s; + } + break; + case 't': + ++self->buf; + self->pop(); + if (Status s = self->push({T_R, T_U, T_TRUE})) { + return s; + } + break; + case 'f': + ++self->buf; + self->pop(); + if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) { + return s; + } + break; + case 'n': + ++self->buf; + self->pop(); + if (Status s = self->push({T_U, T_L, T_NULL})) { + return s; + } + break; + default: + self->pop(); + self->callbacks->on_begin_number(self->data); + if (Status s = self->push({N_NUMBER})) { + return s; + } + break; + } + MUSTTAIL return keepGoing(self); + } + static Status arrayOrEnd(Parser2 *self) { + if (*self->buf == ']') { + ++self->buf; + self->pop(); + self->callbacks->on_end_array(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } else { + self->pop(); + if (Status s = + self->push({N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) { + return s; + } + MUSTTAIL return keepGoing(self); + } + } + static Status objectOrEnd(Parser2 *self) { + if (*self->buf == '}') { + ++self->buf; + self->pop(); + self->callbacks->on_end_object(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } else if (*self->buf == '"') { + self->callbacks->on_begin_string(self->data); + ++self->buf; + self->pop(); + if (Status s = + self->push({N_STRING, N_WHITESPACE, T_COLON, N_WHITESPACE, + N_VALUE, N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) { + return s; + } + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status arrayContinue(Parser2 *self) { + if (*self->buf == ',') { + ++self->buf; + self->pop(); + if (Status s = self->push( + {N_WHITESPACE, N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) { + return s; + } + MUSTTAIL return keepGoing(self); + } else if (*self->buf == ']') { + ++self->buf; + self->pop(); + self->callbacks->on_end_array(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status objectContinue(Parser2 *self) { + if (*self->buf == ',') { + ++self->buf; + self->pop(); + if (Status s = self->push({N_WHITESPACE, T_DUBQUOTE, N_STRING, + N_WHITESPACE, T_COLON, N_WHITESPACE, N_VALUE, + N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) { + return s; + } + MUSTTAIL return keepGoing(self); + } else if (*self->buf == '}') { + ++self->buf; + self->pop(); + self->callbacks->on_end_object(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status finishTrue(Parser2 *self) { + if (*self->buf++ == 'e') { + self->pop(); + self->callbacks->on_true_literal(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status finishFalse(Parser2 *self) { + if (*self->buf++ == 'e') { + self->pop(); + self->callbacks->on_false_literal(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status finishNull(Parser2 *self) { + if (*self->buf++ == 'l') { + self->pop(); + self->callbacks->on_null_literal(self->data); + if (self->empty()) { + return S_OK; + } + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + template static Status singleChar(Parser2 *self) { + if (*self->buf++ == kChar) { + self->pop(); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status dubquote(Parser2 *self) { + if (*self->buf++ == '"') { + self->callbacks->on_begin_string(self->data); + self->pop(); + MUSTTAIL return keepGoing(self); + } + return S_REJECT; + } + static Status whitespace(Parser2 *self) { + self->maybeSkipWs(); + if (self->len() == 0) { + return S_AGAIN; + } + self->pop(); + MUSTTAIL return keepGoing(self); + } + + static constexpr continuation table[] = { + /*T_COLON*/ singleChar<':'>, + /*T_TRUE*/ finishTrue, + /*T_FALSE*/ finishFalse, + /*T_NULL*/ finishNull, + /*T_R*/ singleChar<'r'>, + /*T_U*/ singleChar<'u'>, + /*T_A*/ singleChar<'a'>, + /*T_L*/ singleChar<'l'>, + /*T_S*/ singleChar<'s'>, + /*T_DUBQUOTE*/ dubquote, + /*N_STRING*/ string, + /*N_STRING_FROM_ESCAPE*/ stringFromEscape, + /*N_NUMBER*/ number, + /*N_VALUE*/ value, + /*N_ARRAY_VALUE_OR_END*/ arrayOrEnd, + /*N_OBJECT_VALUE_OR_END*/ objectOrEnd, + /*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue, + /*N_OBJECT_MAYBE_CONTINUE*/ objectContinue, + /*N_WHITESPACE*/ whitespace, + }; + + static_assert(sizeof(table) / sizeof(table[0]) == N_PAST_END); + + char *buf = nullptr; + char *bufEnd = nullptr; + int len() const { return bufEnd - buf; } + const Callbacks *const callbacks; + void *const data; + Symbol stack[kMaxStackSize]; + Symbol *stackPtr = stack; + bool empty() const { return stackPtr == stack; } + void pop() { + assert(!empty()); + --stackPtr; + } + [[nodiscard]] Status push(std::initializer_list symbols) { + if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] { + return S_OVERFLOW; + } + for (int i = symbols.size() - 1; i >= 0; --i) { + *stackPtr++ = *(symbols.begin() + i); + } + return S_OK; + } +}; diff --git a/src/tables.h b/src/tables.h new file mode 100644 index 0000000..0a725a8 --- /dev/null +++ b/src/tables.h @@ -0,0 +1,18 @@ +#pragma once + +constexpr inline struct Tables { + constexpr Tables() { + whitespace[' '] = true; + whitespace['\n'] = true; + whitespace['\r'] = true; + whitespace['\t'] = true; + for (int i = 0; i < 10; ++i) { + number['0' + i] = true; + } + number['.'] = true; + number['+'] = true; + number['-'] = true; + } + alignas(16) bool whitespace[256]{}; + alignas(16) bool number[256]{}; +} tables; diff --git a/src/test.cpp b/src/test.cpp index 5422466..6c0bd57 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -4,7 +4,6 @@ #include #include -#include #include #include @@ -12,6 +11,8 @@ #include #include +#include "parser.h" + // This is the JSON grammar in McKeeman Form. // json @@ -116,93 +117,6 @@ // '000D' ws // '0009' ws -struct Callbacks { - void (*on_begin_object)(void *data) = noop; - void (*on_end_object)(void *data) = noop; - void (*on_begin_string)(void *data) = noop; - void (*on_string_data)(void *data, const char *buf, int len) = noop; - void (*on_end_string)(void *data) = noop; - void (*on_begin_array)(void *data) = noop; - void (*on_end_array)(void *data) = noop; - void (*on_begin_number)(void *data) = noop; - void (*on_number_data)(void *data, const char *buf, int len) = noop; - void (*on_end_number)(void *data) = noop; - void (*on_true_literal)(void *data) = noop; - void (*on_false_literal)(void *data) = noop; - void (*on_null_literal)(void *data) = noop; - -private: - static void noop(void *) {} - static void noop(void *, const char *, int) {} -}; - -// Terminals and Nonterminals. These appear in the stack of the pushdown -// automata -enum Symbol : int8_t { - T_COLON, - T_TRUE, - T_FALSE, - T_NULL, - T_R, - T_U, - T_A, - T_L, - T_S, - T_DUBQUOTE, - // Nonterminals - N_STRING, // Not including leading double quote, but including trailing quote - N_STRING_FROM_ESCAPE, // Immediately after a backslach - N_NUMBER, - N_VALUE, - N_ARRAY_VALUE_OR_END, - N_OBJECT_VALUE_OR_END, - N_ARRAY_MAYBE_CONTINUE, - N_OBJECT_MAYBE_CONTINUE, - N_WHITESPACE, - N_PAST_END, // Must be last nonterminal -}; - -static const char *symbolNames[] = { - "T_COLON", - "T_TRUE", - "T_FALSE", - "T_NULL", - "T_R", - "T_U", - "T_A", - "T_L", - "T_S", - "T_DUBQUOTE", - "N_STRING", - "N_STRING_FROM_ESCAPE", - "N_NUMBER", - "N_VALUE", - "N_ARRAY_VALUE_OR_END", - "N_OBJECT_VALUE_OR_END", - "N_ARRAY_MAYBE_CONTINUE", - "N_OBJECT_MAYBE_CONTINUE", - "N_WHITESPACE", -}; - -static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END); - -constexpr static struct Tables { - constexpr Tables() { - whitespace[' '] = true; - whitespace['\n'] = true; - whitespace['\r'] = true; - whitespace['\t'] = true; - for (int i = 0; i < 10; ++i) { - number['0' + i] = true; - } - number['.'] = true; - number['+'] = true; - number['-'] = true; - } - alignas(16) bool whitespace[256]{}; - alignas(16) bool number[256]{}; -} tables; - namespace { // Straightforward recursive descent that doesn't handle string escaping and @@ -453,395 +367,6 @@ private: #define MUSTTAIL #endif -// Table-based ll(1) parser that doesn't handle escaping and all numbers, with a -// streaming interface. Does not validate utf-8. Uses O(1) memory. -struct Parser2 { - Parser2(const Callbacks *callbacks, void *data) - : callbacks(callbacks), data(data) { - std::ignore = push({N_WHITESPACE, N_VALUE}); - } - - void prime(char *buf, int len) { - this->buf = buf; - this->bufEnd = buf + len; - } - - enum Status { - // Accept input - S_OK, - // Consumed available input. Prime more and parse again - S_AGAIN, - // Invalid json - S_REJECT, - // json is too deeply nested - S_OVERFLOW, - }; - - [[nodiscard]] Status parse() { return keepGoing(this); } - - Parser2(Parser2 const &) = delete; - Parser2 &operator=(Parser2 const &) = delete; - Parser2(Parser2 &&) = delete; - Parser2 &operator=(Parser2 &&) = delete; - - static constexpr int kMaxStackSize = 1 << 10; - -private: - // Helpers - void maybeSkipWs() { - while (buf != bufEnd && tables.whitespace[*buf]) { - ++buf; - } - } - Status parse_number() { - char *const bufBefore = buf; - while (len() > 0) { - if (tables.number[*buf]) { - ++buf; - } else { - break; - } - } - if (buf != bufBefore) { - callbacks->on_number_data(data, bufBefore, buf - bufBefore); - } - if (len() == 0) { - return S_AGAIN; - } - callbacks->on_end_number(data); - return S_OK; - } - Status parse_string(bool fromEscape) { - auto *result = buf; - if (fromEscape) { - if (*result == '\"') { - ++result; - } - pop(); - if (Status s = push({N_STRING})) { - return s; - } - } - for (;;) { - result = (char *)memchr(result, '"', bufEnd - result); - if (result == nullptr) { - callbacks->on_string_data(data, buf, len()); - if (bufEnd[-1] == '\\') { - pop(); - if (Status s = push({N_STRING_FROM_ESCAPE})) { - return s; - } - } - return S_AGAIN; - } - if (result != buf && result[-1] == '\\') { - ++result; - if (result == bufEnd) { - callbacks->on_string_data(data, buf, len()); - return S_AGAIN; - } - continue; - } - break; - } - int stringLen = result - buf; - if (stringLen > 0) { - callbacks->on_string_data(data, buf, stringLen); - } - buf += stringLen + 1; - callbacks->on_end_string(data); - return S_OK; - } - - typedef Status (*continuation)(Parser2 *); - - [[maybe_unused]] void debugPrint() { - for (int i = 0; i < stackPtr - stack; ++i) { - printf("%s ", symbolNames[stack[i]]); - } - printf("\n"); - } - - static Status keepGoing(Parser2 *self) { - if (self->len() == 0) { - return S_AGAIN; - } - // self->debugPrint(); - MUSTTAIL return table[*(self->stackPtr - 1)](self); - } - - static Status string(Parser2 *self) { - if (Status s = self->parse_string(false)) { - return s; - } - self->pop(); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - static Status stringFromEscape(Parser2 *self) { - if (Status s = self->parse_string(true)) { - return s; - } - self->pop(); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - static Status number(Parser2 *self) { - if (Status s = self->parse_number()) { - return s; - } - self->pop(); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - static Status value(Parser2 *self) { - switch (*self->buf) { - case '{': - ++self->buf; - self->callbacks->on_begin_object(self->data); - self->pop(); - if (Status s = self->push({N_WHITESPACE, N_OBJECT_VALUE_OR_END})) { - return s; - } - break; - case '[': - ++self->buf; - self->callbacks->on_begin_array(self->data); - self->pop(); - if (Status s = self->push({N_WHITESPACE, N_ARRAY_VALUE_OR_END})) { - return s; - } - break; - case '"': - ++self->buf; - self->pop(); - self->callbacks->on_begin_string(self->data); - if (Status s = self->push({N_STRING})) { - return s; - } - break; - case 't': - ++self->buf; - self->pop(); - if (Status s = self->push({T_R, T_U, T_TRUE})) { - return s; - } - break; - case 'f': - ++self->buf; - self->pop(); - if (Status s = self->push({T_A, T_L, T_S, T_FALSE})) { - return s; - } - break; - case 'n': - ++self->buf; - self->pop(); - if (Status s = self->push({T_U, T_L, T_NULL})) { - return s; - } - break; - default: - self->pop(); - self->callbacks->on_begin_number(self->data); - if (Status s = self->push({N_NUMBER})) { - return s; - } - break; - } - MUSTTAIL return keepGoing(self); - } - static Status arrayOrEnd(Parser2 *self) { - if (*self->buf == ']') { - ++self->buf; - self->pop(); - self->callbacks->on_end_array(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } else { - self->pop(); - if (Status s = - self->push({N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) { - return s; - } - MUSTTAIL return keepGoing(self); - } - } - static Status objectOrEnd(Parser2 *self) { - if (*self->buf == '}') { - ++self->buf; - self->pop(); - self->callbacks->on_end_object(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } else if (*self->buf == '"') { - self->callbacks->on_begin_string(self->data); - ++self->buf; - self->pop(); - if (Status s = - self->push({N_STRING, N_WHITESPACE, T_COLON, N_WHITESPACE, - N_VALUE, N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) { - return s; - } - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status arrayContinue(Parser2 *self) { - if (*self->buf == ',') { - ++self->buf; - self->pop(); - if (Status s = self->push( - {N_WHITESPACE, N_VALUE, N_WHITESPACE, N_ARRAY_MAYBE_CONTINUE})) { - return s; - } - MUSTTAIL return keepGoing(self); - } else if (*self->buf == ']') { - ++self->buf; - self->pop(); - self->callbacks->on_end_array(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status objectContinue(Parser2 *self) { - if (*self->buf == ',') { - ++self->buf; - self->pop(); - if (Status s = self->push({N_WHITESPACE, T_DUBQUOTE, N_STRING, - N_WHITESPACE, T_COLON, N_WHITESPACE, N_VALUE, - N_WHITESPACE, N_OBJECT_MAYBE_CONTINUE})) { - return s; - } - MUSTTAIL return keepGoing(self); - } else if (*self->buf == '}') { - ++self->buf; - self->pop(); - self->callbacks->on_end_object(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status finishTrue(Parser2 *self) { - if (*self->buf++ == 'e') { - self->pop(); - self->callbacks->on_true_literal(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status finishFalse(Parser2 *self) { - if (*self->buf++ == 'e') { - self->pop(); - self->callbacks->on_false_literal(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status finishNull(Parser2 *self) { - if (*self->buf++ == 'l') { - self->pop(); - self->callbacks->on_null_literal(self->data); - if (self->empty()) { - return S_OK; - } - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - template static Status singleChar(Parser2 *self) { - if (*self->buf++ == kChar) { - self->pop(); - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status dubquote(Parser2 *self) { - if (*self->buf++ == '"') { - self->callbacks->on_begin_string(self->data); - self->pop(); - MUSTTAIL return keepGoing(self); - } - return S_REJECT; - } - static Status whitespace(Parser2 *self) { - self->maybeSkipWs(); - if (self->len() == 0) { - return S_AGAIN; - } - self->pop(); - MUSTTAIL return keepGoing(self); - } - - static constexpr continuation table[] = { - /*T_COLON*/ singleChar<':'>, - /*T_TRUE*/ finishTrue, - /*T_FALSE*/ finishFalse, - /*T_NULL*/ finishNull, - /*T_R*/ singleChar<'r'>, - /*T_U*/ singleChar<'u'>, - /*T_A*/ singleChar<'a'>, - /*T_L*/ singleChar<'l'>, - /*T_S*/ singleChar<'s'>, - /*T_DUBQUOTE*/ dubquote, - /*N_STRING*/ string, - /*N_STRING_FROM_ESCAPE*/ stringFromEscape, - /*N_NUMBER*/ number, - /*N_VALUE*/ value, - /*N_ARRAY_VALUE_OR_END*/ arrayOrEnd, - /*N_OBJECT_VALUE_OR_END*/ objectOrEnd, - /*N_ARRAY_MAYBE_CONTINUE*/ arrayContinue, - /*N_OBJECT_MAYBE_CONTINUE*/ objectContinue, - /*N_WHITESPACE*/ whitespace, - }; - - static_assert(sizeof(table) / sizeof(table[0]) == N_PAST_END); - - char *buf = nullptr; - char *bufEnd = nullptr; - int len() const { return bufEnd - buf; } - const Callbacks *const callbacks; - void *const data; - Symbol stack[kMaxStackSize]; - Symbol *stackPtr = stack; - bool empty() const { return stackPtr == stack; } - void pop() { - assert(!empty()); - --stackPtr; - } - [[nodiscard]] Status push(std::initializer_list symbols) { - if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] { - return S_OVERFLOW; - } - for (int i = symbols.size() - 1; i >= 0; --i) { - *stackPtr++ = *(symbols.begin() + i); - } - return S_OK; - } -}; - const std::string json = R"({ "a number": 12345, "true": true, @@ -972,6 +497,24 @@ Callbacks minifyCallbacks() { return result; } +Callbacks noopCallbacks() { + Callbacks result; + result.on_begin_object = +[](void *) {}; + result.on_end_object = +[](void *) {}; + result.on_begin_string = +[](void *) {}; + result.on_string_data = +[](void *, const char *buf, int len) {}; + result.on_end_string = +[](void *) {}; + result.on_begin_array = +[](void *) {}; + result.on_end_array = +[](void *) {}; + result.on_begin_number = +[](void *) {}; + result.on_number_data = +[](void *, const char *buf, int len) {}; + result.on_end_number = +[](void *) {}; + result.on_true_literal = +[](void *) {}; + result.on_false_literal = +[](void *) {}; + result.on_null_literal = +[](void *) {}; + return result; +} + } // namespace TEST_CASE("parser1") { @@ -1016,7 +559,7 @@ TEST_CASE("parser2") { } TEST_CASE("bench1") { - auto c = Callbacks{}; + auto c = noopCallbacks(); ankerl::nanobench::Bench bench; bench.batch(json.size()); bench.unit("byte"); @@ -1028,7 +571,7 @@ TEST_CASE("bench1") { } TEST_CASE("bench2") { - auto c = Callbacks{}; + auto c = noopCallbacks(); ankerl::nanobench::Bench bench; bench.batch(json.size()); bench.unit("byte");