Pivot to simpler approach. Passes JSONTestSuite

This commit is contained in:
2025-05-18 11:34:12 -04:00
parent 6cb7645675
commit 19208c0e0a
5 changed files with 998 additions and 13 deletions

127
src/callbacks.h Normal file
View File

@@ -0,0 +1,127 @@
#pragma once
#include "weaseljson.h"
#include <cstdint>
#include <cstdio>
#include <vector>
inline Callbacks printCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *) { puts("on_begin_object"); };
result.on_end_object = +[](void *) { puts("on_end_object"); };
result.on_begin_string = +[](void *) { puts("on_begin_string"); };
result.on_string_data = +[](void *, const char *buf, int len) {
printf("on_string_data `%.*s`\n", len, buf);
};
result.on_end_string = +[](void *) { puts("on_end_string"); };
result.on_begin_array = +[](void *) { puts("on_begin_array"); };
result.on_end_array = +[](void *) { puts("on_end_array"); };
result.on_begin_number = +[](void *) { puts("on_begin_number"); };
result.on_number_data = +[](void *, const char *buf, int len) {
printf("on_number_data `%.*s`\n", len, buf);
};
result.on_end_number = +[](void *) { puts("on_end_number"); };
result.on_true_literal = +[](void *) { puts("on_true_literal"); };
result.on_false_literal = +[](void *) { puts("on_false_literal"); };
result.on_null_literal = +[](void *) { puts("on_null_literal"); };
return result;
}
struct MinifyState {
bool isKey = false;
struct Cursor {
int64_t index;
bool isObject;
};
void on_begin_value() {
if (!stack.empty()) {
auto &back = stack.back();
if (back.isObject && back.index % 2 == 0 && back.index > 0) {
printf(",");
}
if (back.isObject && back.index % 2 == 1 && back.index > 0) {
printf(":");
}
if (!back.isObject && back.index > 0) {
printf(",");
}
++back.index;
}
}
std::vector<Cursor> stack;
};
inline Callbacks minifyCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->stack.push_back({0, true});
printf("{");
};
result.on_end_object = +[](void *p) {
auto *state = (MinifyState *)p;
state->stack.pop_back();
printf("}");
};
result.on_begin_string = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
printf("\"");
};
result.on_string_data =
+[](void *, const char *buf, int len) { printf("%.*s", len, buf); };
result.on_end_string = +[](void *p) { printf("\""); };
result.on_begin_array = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->stack.push_back({0, false});
printf("[");
};
result.on_end_array = +[](void *p) {
auto *state = (MinifyState *)p;
state->stack.pop_back();
printf("]");
};
result.on_begin_number = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
};
result.on_number_data =
+[](void *, const char *buf, int len) { printf("%.*s", len, buf); };
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
printf("true");
};
result.on_false_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
printf("false");
};
result.on_null_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
printf("null");
};
return result;
}
inline Callbacks noopCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *) {};
result.on_end_object = +[](void *) {};
result.on_begin_string = +[](void *) {};
result.on_string_data = +[](void *, const char *buf, int len) {};
result.on_end_string = +[](void *) {};
result.on_begin_array = +[](void *) {};
result.on_end_array = +[](void *) {};
result.on_begin_number = +[](void *) {};
result.on_number_data = +[](void *, const char *buf, int len) {};
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *) {};
result.on_false_literal = +[](void *) {};
result.on_null_literal = +[](void *) {};
return result;
}

View File

@@ -88,7 +88,7 @@ struct Parser2 {
complete = len == 0; complete = len == 0;
this->buf = buf; this->buf = buf;
this->bufEnd = buf + len; this->bufEnd = buf + len;
return table[*(stackPtr - 1)](this); return keepGoing(this);
} }
Parser2(Parser2 const &) = delete; Parser2(Parser2 const &) = delete;

812
src/parser3.h Normal file
View File

@@ -0,0 +1,812 @@
#pragma once
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <tuple>
#include <utility>
#include "musttail.h"
#include "tables.h"
#include "weaseljson.h"
namespace parser3 {
enum Status {
// Accept input
S_OK,
// Consumed all available input.
S_AGAIN,
// Invalid json
S_REJECT,
// json is too deeply nested
S_OVERFLOW,
};
typedef Status (*Continuation)(struct Parser3 *);
// These appear in the stack of the pushdown
// automata
enum Symbol : uint8_t {
N_JSON,
N_VALUE,
N_OBJECT,
N_OBJECT2,
N_OBJECT3,
N_ARRAY,
N_ARRAY2,
N_ARRAY3,
N_ELEMENT,
N_STRING,
N_STRING2,
N_STRING_FOLLOWING_ESCAPE,
N_NUMBER,
N_INTEGER,
N_INTEGER2,
N_DIGITS,
N_DIGITS2,
N_FRACTION,
N_EXPONENT,
N_SIGN,
N_WHITESPACE,
N_TRUE,
N_FALSE,
N_NULL,
T_R,
T_U,
T_A,
T_L,
T_S,
T_COLON,
T_UTF8_CONTINUATION_BYTE,
T_HEX,
T_DIGIT,
T_ONENINE,
T_EOF,
N_SYMBOL_COUNT, // Must be last
};
struct Parser3 {
Parser3(const Callbacks *callbacks, void *data)
: callbacks(callbacks), data(data) {
std::ignore = push({N_JSON, T_EOF});
}
[[nodiscard]] Status parse(char *buf, int len) {
complete = len == 0;
this->buf = buf;
this->bufEnd = buf + len;
return keepGoing(this);
}
[[nodiscard]] bool empty() const { return stackPtr == stack; }
void pop() {
assert(!empty());
--stackPtr;
}
[[nodiscard]] Status push(std::initializer_list<Symbol> symbols) {
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
return S_OVERFLOW;
}
for (int i = symbols.size() - 1; i >= 0; --i) {
*stackPtr++ = *(symbols.begin() + i);
}
return S_OK;
}
[[nodiscard]] int len() const {
auto result = bufEnd - buf;
assert(result >= 0);
return result;
}
Symbol top() const {
assert(!empty());
return *(stackPtr - 1);
}
static Status keepGoing(Parser3 *self);
constexpr static int kMaxStackSize = 1024;
[[maybe_unused]] void debugPrint();
char *buf = nullptr;
char *bufEnd = nullptr;
const Callbacks *const callbacks;
void *const data;
Symbol stack[kMaxStackSize];
Symbol *stackPtr = stack;
bool complete = false;
};
inline Status n_json(Parser3 *self) {
self->pop();
if (auto s = self->push({N_ELEMENT})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_value(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '{':
self->pop();
if (auto s = self->push({N_OBJECT})) {
return s;
}
break;
case '[':
self->pop();
if (auto s = self->push({N_ARRAY})) {
return s;
}
break;
case '"':
self->pop();
if (auto s = self->push({N_STRING})) {
return s;
}
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
self->pop();
if (auto s = self->push({N_NUMBER})) {
return s;
}
break;
case 't':
++self->buf;
self->pop();
if (auto s = self->push({T_R, T_U, N_TRUE})) {
return s;
}
break;
case 'f':
++self->buf;
self->pop();
if (auto s = self->push({T_A, T_L, T_S, N_FALSE})) {
return s;
}
break;
case 'n':
++self->buf;
self->pop();
if (auto s = self->push({T_U, T_L, N_NULL})) {
return s;
}
break;
default:
return S_REJECT;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_object(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf != '{') {
return S_REJECT;
}
++self->buf;
self->pop();
if (auto s = self->push({N_WHITESPACE, N_OBJECT2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_object2(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '}':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '"':
self->pop();
if (auto s = self->push(
{N_STRING, N_WHITESPACE, T_COLON, N_ELEMENT, N_OBJECT3})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status n_object3(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '}':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case ',':
++self->buf;
self->pop();
if (auto s = self->push({N_WHITESPACE, N_STRING, N_WHITESPACE, T_COLON,
N_ELEMENT, N_OBJECT3})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status n_array(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf != '[') {
return S_REJECT;
}
++self->buf;
self->pop();
if (auto s = self->push({N_WHITESPACE, N_ARRAY2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_array2(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case ']':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
if (auto s = self->push({N_VALUE, N_WHITESPACE, N_ARRAY3})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
}
inline Status n_array3(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case ']':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case ',':
++self->buf;
self->pop();
if (auto s = self->push({N_ELEMENT, N_ARRAY3})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status n_element(Parser3 *self) {
self->pop();
if (auto s = self->push({N_WHITESPACE, N_VALUE, N_WHITESPACE})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_string(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf != '"') {
return S_REJECT;
}
++self->buf;
self->pop();
if (auto s = self->push({N_STRING2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_string2(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
// Try subtract and unsigned compare to save a branch?
if (uint8_t(*self->buf) < 0x20) {
return S_REJECT;
}
if (int8_t(*self->buf) > 0) {
// one byte utf-8 encoding
switch (*self->buf) {
case '"':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '\\':
++self->buf;
self->pop();
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
++self->buf;
MUSTTAIL return Parser3::keepGoing(self);
}
} else if ((*self->buf & 0b11100000) == 0b11000000) {
// two byte utf-8 encoding
++self->buf;
self->pop();
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, N_STRING2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
if ((*self->buf & 0b11110000) == 0b11100000) {
// three byte utf-8 encoding
++self->buf;
self->pop();
if (auto s = self->push(
{T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE, N_STRING2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
} else if ((*self->buf & 0b11111000) == 0b11110000) {
// four byte utf-8 encoding
++self->buf;
self->pop();
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
T_UTF8_CONTINUATION_BYTE, N_STRING2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status n_string_following_escape(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '"':
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
++self->buf;
self->pop();
if (auto s = self->push({N_STRING2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
case 'u':
++self->buf;
self->pop();
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX, N_STRING2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status t_utf8_continuation_byte(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if ((*self->buf & 0b11000000) == 0b10000000) {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status t_digit(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if ('0' <= *self->buf && *self->buf <= '9') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status t_onenine(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if ('1' <= *self->buf && *self->buf <= '9') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status t_hex(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (('0' <= *self->buf && *self->buf <= '9') ||
('a' <= *self->buf && *self->buf <= 'f') ||
('A' <= *self->buf && *self->buf <= 'F')) {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status n_number(Parser3 *self) {
self->pop();
if (auto s = self->push({N_INTEGER, N_FRACTION, N_EXPONENT})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_integer(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '0':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++self->buf;
self->pop();
if (auto s = self->push({N_DIGITS2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
case '-':
++self->buf;
self->pop();
if (auto s = self->push({N_INTEGER2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status n_integer2(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '0':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++self->buf;
self->pop();
if (auto s = self->push({N_DIGITS2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status n_digits(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
switch (*self->buf) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++self->buf;
self->pop();
if (auto s = self->push({N_DIGITS2})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
}
}
inline Status n_digits2(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
switch (*self->buf) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
++self->buf;
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
}
inline Status n_fraction(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
switch (*self->buf) {
case '.':
++self->buf;
self->pop();
if (auto s = self->push({N_DIGITS})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
}
inline Status n_exponent(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
switch (*self->buf) {
case 'e':
case 'E':
++self->buf;
self->pop();
if (auto s = self->push({N_SIGN, N_DIGITS})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
}
inline Status n_sign(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
switch (*self->buf) {
case '+':
case '-':
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
}
inline Status n_whitespace(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
if (tables.whitespace[uint8_t(*self->buf)]) {
++self->buf;
MUSTTAIL return Parser3::keepGoing(self);
}
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_true(Parser3 *self) {
if (*self->buf == 'e') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status n_false(Parser3 *self) {
if (*self->buf == 'e') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status n_null(Parser3 *self) {
if (*self->buf == 'l') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
template <char kChar> inline Status singleChar(Parser3 *self) {
if (*self->buf == kChar) {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
}
inline Status t_eof(Parser3 *self) {
if (self->len() > 0) {
return S_REJECT;
}
return self->complete ? S_OK : S_AGAIN;
}
constexpr inline struct ContinuationTable {
constexpr ContinuationTable() {
// Defaults
for (int i = 0; i < N_SYMBOL_COUNT; ++i) {
continuations[i] = +[](struct Parser3 *) {
printf("unimplemented\n");
return S_REJECT;
};
}
continuations[N_JSON] = n_json;
continuations[N_VALUE] = n_value;
continuations[N_OBJECT] = n_object;
continuations[N_OBJECT2] = n_object2;
continuations[N_OBJECT3] = n_object3;
continuations[N_ARRAY] = n_array;
continuations[N_ARRAY2] = n_array2;
continuations[N_ARRAY3] = n_array3;
continuations[N_ELEMENT] = n_element;
continuations[N_STRING] = n_string;
continuations[N_STRING2] = n_string2;
continuations[N_STRING_FOLLOWING_ESCAPE] = n_string_following_escape;
continuations[N_NUMBER] = n_number;
continuations[N_INTEGER] = n_integer;
continuations[N_INTEGER2] = n_integer2;
continuations[N_DIGITS] = n_digits;
continuations[N_DIGITS2] = n_digits2;
continuations[N_FRACTION] = n_fraction;
continuations[N_EXPONENT] = n_exponent;
continuations[N_SIGN] = n_sign;
continuations[N_WHITESPACE] = n_whitespace;
continuations[N_TRUE] = n_true;
continuations[N_FALSE] = n_false;
continuations[N_NULL] = n_null;
continuations[T_R] = singleChar<'r'>;
continuations[T_U] = singleChar<'u'>;
continuations[T_A] = singleChar<'a'>;
continuations[T_L] = singleChar<'l'>;
continuations[T_S] = singleChar<'s'>;
continuations[T_COLON] = singleChar<':'>;
continuations[T_UTF8_CONTINUATION_BYTE] = t_utf8_continuation_byte;
continuations[T_HEX] = t_hex;
continuations[T_DIGIT] = t_digit;
continuations[T_ONENINE] = t_onenine;
continuations[T_EOF] = t_eof;
symbolNames[N_JSON] = "n_json";
symbolNames[N_VALUE] = "n_value";
symbolNames[N_OBJECT] = "n_object";
symbolNames[N_OBJECT2] = "n_object2";
symbolNames[N_OBJECT3] = "n_object3";
symbolNames[N_ARRAY] = "n_array";
symbolNames[N_ARRAY2] = "n_array2";
symbolNames[N_ARRAY3] = "n_array3";
symbolNames[N_ELEMENT] = "n_element";
symbolNames[N_STRING] = "n_string";
symbolNames[N_STRING2] = "n_string2";
symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape";
symbolNames[N_NUMBER] = "n_number";
symbolNames[N_INTEGER] = "n_integer";
symbolNames[N_INTEGER2] = "n_integer2";
symbolNames[N_DIGITS] = "n_digits";
symbolNames[N_DIGITS2] = "n_digits2";
symbolNames[N_FRACTION] = "n_fraction";
symbolNames[N_EXPONENT] = "n_exponent";
symbolNames[N_SIGN] = "n_sign";
symbolNames[N_WHITESPACE] = "n_whitespace";
symbolNames[N_TRUE] = "n_true";
symbolNames[N_FALSE] = "n_false";
symbolNames[N_NULL] = "n_null";
symbolNames[T_R] = "singleChar<'r'>";
symbolNames[T_U] = "singleChar<'u'>";
symbolNames[T_A] = "singleChar<'a'>";
symbolNames[T_L] = "singleChar<'l'>";
symbolNames[T_S] = "singleChar<'s'>";
symbolNames[T_COLON] = "singleChar<':'>";
symbolNames[T_UTF8_CONTINUATION_BYTE] = "t_utf8_continuation_byte";
symbolNames[T_HEX] = "t_hex";
symbolNames[T_DIGIT] = "t_digit";
symbolNames[T_ONENINE] = "t_onenine";
symbolNames[T_EOF] = "t_eof";
}
Continuation continuations[N_SYMBOL_COUNT]{};
const char *symbolNames[N_SYMBOL_COUNT]{};
} symbolTables;
inline Status Parser3::keepGoing(Parser3 *self) {
if (self->len() == 0 && !self->complete) {
return S_AGAIN;
}
self->debugPrint();
MUSTTAIL return symbolTables.continuations[self->top()](self);
}
inline void Parser3::debugPrint() {
for (int i = 0; i < stackPtr - stack; ++i) {
printf("%s ", symbolTables.symbolNames[stack[i]]);
}
printf("\n");
for (int i = 0; i < len(); ++i) {
if (isprint(buf[i])) {
printf("%c", buf[i]);
} else {
printf("\\x%02x", uint8_t(buf[i]));
}
}
printf("\n");
}
} // namespace parser3

View File

@@ -11,7 +11,7 @@
#include <nanobench.h> #include <nanobench.h>
#include <simdjson.h> #include <simdjson.h>
#include "parser.h" #include "parser3.h"
// This is the JSON grammar in McKeeman Form. // This is the JSON grammar in McKeeman Form.
@@ -539,28 +539,35 @@ TEST_CASE("parser2") {
MinifyState state; MinifyState state;
{ {
auto copy = json; auto copy = json;
Parser2 parser(&c, &state); parser3::Parser3 parser(&c, &state);
int i = 0; int i = 0;
for (; i < copy.length() - 1; ++i) { for (; i < copy.length() - 1; ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN); REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN);
} }
CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN); CHECK(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK); CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
puts(""); puts("");
} }
{ {
std::string copy = "{\"x\": [], \"y\": {}}"; std::string copy = "{\"x\": [], \"y\": {}}";
Parser2 parser(&c, &state); parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN); CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK); CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
puts(""); puts("");
} }
{ {
auto c = noopCallbacks(); auto c = noopCallbacks();
std::string copy = "{\"a\":\"a"; std::string copy = "{\"a\":\"a";
Parser2 parser(&c, &state); parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN); CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == Parser2::S_REJECT); CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT);
}
{
auto c = noopCallbacks();
std::string copy = "[";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT);
} }
} }
@@ -583,7 +590,7 @@ TEST_CASE("bench2") {
bench.unit("byte"); bench.unit("byte");
bench.run("parser2", [&]() { bench.run("parser2", [&]() {
auto copy = json; auto copy = json;
Parser2 parser(&c, nullptr); parser3::Parser3 parser(&c, nullptr);
bench.doNotOptimizeAway(parser.parse(copy.data(), copy.length())); bench.doNotOptimizeAway(parser.parse(copy.data(), copy.length()));
}); });
} }

39
src/validate.cpp Normal file
View File

@@ -0,0 +1,39 @@
#include <fcntl.h>
#include <unistd.h>
#include "callbacks.h"
#include "parser3.h"
int main(int argc, char **argv) {
if (argc < 2) {
printf("Usage: %s <path>\n", argv[0]);
return 1;
}
int fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return 1;
}
auto c = noopCallbacks();
parser3::Parser3 parser(&c, nullptr);
for (;;) {
char buf[1024];
int l = read(fd, buf, sizeof(buf));
if (l == -1) {
perror("read");
return 1;
}
switch (parser.parse(buf, l)) {
case parser3::S_OK:
return 0;
case parser3::S_AGAIN:
continue;
case parser3::S_REJECT:
case parser3::S_OVERFLOW:
return 1;
}
if (l == 0) {
return 1;
}
}
}