Start working on c api

This commit is contained in:
2025-05-22 10:55:15 -04:00
parent 96ef50d52f
commit 6e602d8fd5
6 changed files with 150 additions and 121 deletions

View File

@@ -1,6 +1,10 @@
#ifndef WEASELJSON_H #ifndef WEASELJSON_H
#define WEASELJSON_H #define WEASELJSON_H
#ifdef __cplusplus
extern "C" {
#endif
struct WeaselJsonCallbacks { struct WeaselJsonCallbacks {
void (*on_begin_object)(void *data); void (*on_begin_object)(void *data);
void (*on_end_object)(void *data); void (*on_end_object)(void *data);
@@ -17,4 +21,37 @@ struct WeaselJsonCallbacks {
void (*on_null_literal)(void *data); void (*on_null_literal)(void *data);
}; };
enum WeaselJsonStatus {
/** Accept input */
WeaselJson_OK,
/** Consumed all available input. Call WeaselJsonParser_parse with more data
to provide more input, or call with length 0 to indicate end of data*/
WeaselJson_AGAIN,
/** Invalid json */
WeaselJson_REJECT,
/** json is too deeply nested */
WeaselJson_OVERFLOW,
};
typedef struct WeaselJsonParser WeaselJsonParser;
/** Increasing stack size increases memory usage but also increases the depth of
* nested json accepted. */
WeaselJsonParser *WeaselJsonParser_create(int stackSize);
/** Restore the parser to its newly-created state */
void WeaselJsonParser_reset(WeaselJsonParser *parser);
/** Destroy the parser */
void WeaselJsonParser_destroy(WeaselJsonParser *parser);
/** Incrementally parse `len` more bytes starting at `buf`. `buf` may be
* modified. Call with `len` 0 to indicate end of data */
WeaselJsonStatus WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf,
int len);
#ifdef __cplusplus
}
#endif
#endif #endif

View File

@@ -1,48 +1,49 @@
#include "callbacks.h" #include "callbacks.h"
#include "json_value.h" #include "json_value.h"
#include "parser3.h" #include "parser3.h"
#include "weaseljson.h"
#include <simdjson.h> #include <simdjson.h>
std::pair<std::string, parser3::Status> runStreaming(std::string copy, std::pair<std::string, WeaselJsonStatus> runStreaming(std::string copy,
int stride) { int stride) {
SerializeState state; SerializeState state;
auto c = serializeCallbacks(); auto c = serializeCallbacks();
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
if (stride == 0) { if (stride == 0) {
auto s = parser.parse(copy.data(), copy.size()); auto s = parser.parse(copy.data(), copy.size());
if (s != parser3::S_AGAIN) { if (s != WeaselJson_AGAIN) {
return {state.result, s}; return {state.result, s};
} }
} else { } else {
for (int i = 0; i < copy.size(); i += stride) { for (int i = 0; i < copy.size(); i += stride) {
auto s = auto s =
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i)); parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i));
if (s != parser3::S_AGAIN) { if (s != WeaselJson_AGAIN) {
return {state.result, s}; return {state.result, s};
} }
} }
} }
auto s = parser.parse(nullptr, 0); auto s = parser.parse(nullptr, 0);
if (s != parser3::S_OK) { if (s != WeaselJson_OK) {
return {state.result, s}; return {state.result, s};
} }
return {state.result, parser3::S_OK}; return {state.result, WeaselJson_OK};
} }
std::pair<std::string, parser3::Status> runBatch(std::string copy) { std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
SerializeState state; SerializeState state;
auto c = serializeCallbacks(); auto c = serializeCallbacks();
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
auto s = parser.parse(copy.data(), copy.size()); auto s = parser.parse(copy.data(), copy.size());
if (s != parser3::S_AGAIN) { if (s != WeaselJson_AGAIN) {
return {state.result, s}; return {state.result, s};
} }
s = parser.parse(nullptr, 0); s = parser.parse(nullptr, 0);
if (s != parser3::S_OK) { if (s != WeaselJson_OK) {
return {state.result, s}; return {state.result, s};
} }
return {state.result, parser3::S_OK}; return {state.result, WeaselJson_OK};
} }
void testStreaming(std::string const &json) { void testStreaming(std::string const &json) {
@@ -51,15 +52,15 @@ void testStreaming(std::string const &json) {
auto streaming = runStreaming(json, stride); auto streaming = runStreaming(json, stride);
if (streaming != batch) { if (streaming != batch) {
if (streaming.second == batch.second && if (streaming.second == batch.second &&
streaming.second != parser3::S_OK) { streaming.second != WeaselJson_OK) {
// It's ok if the processed data doesn't match if parsing failed // It's ok if the processed data doesn't match if parsing failed
return; return;
} }
printf("streaming: %s, %s\n", printf("streaming: %s, %s\n",
streaming.second == parser3::S_OK ? "accept" : "reject", streaming.second == WeaselJson_OK ? "accept" : "reject",
streaming.first.c_str()); streaming.first.c_str());
printf("batch: %s, %s\n", printf("batch: %s, %s\n",
streaming.second == parser3::S_OK ? "accept" : "reject", streaming.second == WeaselJson_OK ? "accept" : "reject",
batch.first.c_str()); batch.first.c_str());
abort(); abort();
} }
@@ -67,13 +68,13 @@ void testStreaming(std::string const &json) {
} }
void compareWithSimdjson(std::string const &json) { void compareWithSimdjson(std::string const &json) {
parser3::Status ours; WeaselJsonStatus ours;
{ {
auto copy = json; auto copy = json;
auto c = noopCallbacks(); auto c = noopCallbacks();
parser3::Parser3 parser3(&c, nullptr); parser3::Parser3 parser3(&c, nullptr);
ours = parser3.parse(copy.data(), copy.size()); ours = parser3.parse(copy.data(), copy.size());
if (ours == parser3::S_AGAIN) { if (ours == WeaselJson_AGAIN) {
ours = parser3.parse(nullptr, 0); ours = parser3.parse(nullptr, 0);
} }
} }
@@ -83,10 +84,10 @@ void compareWithSimdjson(std::string const &json) {
simdjson::dom::parser parser; simdjson::dom::parser parser;
auto doc = parser.parse(my_padded_data); auto doc = parser.parse(my_padded_data);
auto theirs = doc.error(); auto theirs = doc.error();
if (ours == parser3::S_OVERFLOW || theirs == simdjson::DEPTH_ERROR) { if (ours == WeaselJson_OVERFLOW || theirs == simdjson::DEPTH_ERROR) {
return; return;
} }
if ((ours == parser3::S_OK) != (theirs == simdjson::SUCCESS)) { if ((ours == WeaselJson_OK) != (theirs == simdjson::SUCCESS)) {
if (json.starts_with("\xef\xbb\xbf")) { if (json.starts_with("\xef\xbb\xbf")) {
// What to do with byte order mark? // What to do with byte order mark?
return; return;

View File

@@ -197,19 +197,19 @@ inline std::optional<JsonValue> toValue(std::string copy, int stride) {
auto c = readValueCallbacks(); auto c = readValueCallbacks();
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
if (stride == 0) { if (stride == 0) {
if (parser.parse(copy.data(), copy.size()) != parser3::S_AGAIN) { if (parser.parse(copy.data(), copy.size()) != WeaselJson_AGAIN) {
return std::nullopt; return std::nullopt;
} }
} else { } else {
for (int i = 0; i < copy.size(); i += stride) { for (int i = 0; i < copy.size(); i += stride) {
if (parser.parse(copy.data() + i, if (parser.parse(copy.data() + i,
std::min<int>(stride, copy.size() - i)) != std::min<int>(stride, copy.size() - i)) !=
parser3::S_AGAIN) { WeaselJson_AGAIN) {
return std::nullopt; return std::nullopt;
} }
} }
} }
if (parser.parse(nullptr, 0) != parser3::S_OK) { if (parser.parse(nullptr, 0) != WeaselJson_OK) {
return std::nullopt; return std::nullopt;
} }
return std::move(state.result); return std::move(state.result);

View File

@@ -16,18 +16,7 @@
namespace parser3 { namespace parser3 {
enum Status { typedef WeaselJsonStatus (*Continuation)(struct Parser3 *);
// Accept input
S_OK,
// Consumed all available input.
S_AGAIN,
// Invalid json
S_REJECT,
// json is too deeply nested
S_OVERFLOW,
};
typedef Status (*Continuation)(struct Parser3 *);
// These appear in the stack of the pushdown // These appear in the stack of the pushdown
// automata // automata
@@ -79,7 +68,7 @@ struct Parser3 {
std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF}); std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
} }
[[nodiscard]] Status parse(char *buf, int len) { [[nodiscard]] WeaselJsonStatus parse(char *buf, int len) {
complete = len == 0; complete = len == 0;
this->buf = this->dataBegin = this->writeBuf = buf; this->buf = this->dataBegin = this->writeBuf = buf;
this->bufEnd = buf + len; this->bufEnd = buf + len;
@@ -106,14 +95,14 @@ struct Parser3 {
assert(!empty()); assert(!empty());
--stackPtr; --stackPtr;
} }
[[nodiscard]] Status push(std::initializer_list<Symbol> symbols) { [[nodiscard]] WeaselJsonStatus push(std::initializer_list<Symbol> symbols) {
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] { if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
return S_OVERFLOW; return WeaselJson_OVERFLOW;
} }
for (int i = symbols.size() - 1; i >= 0; --i) { for (int i = symbols.size() - 1; i >= 0; --i) {
*stackPtr++ = *(symbols.begin() + i); *stackPtr++ = *(symbols.begin() + i);
} }
return S_OK; return WeaselJson_OK;
} }
[[nodiscard]] int len() const { [[nodiscard]] int len() const {
auto result = bufEnd - buf; auto result = bufEnd - buf;
@@ -125,7 +114,7 @@ struct Parser3 {
return *(stackPtr - 1); return *(stackPtr - 1);
} }
static Status keepGoing(Parser3 *self); static WeaselJsonStatus keepGoing(Parser3 *self);
constexpr static int kMaxStackSize = 1024; constexpr static int kMaxStackSize = 1024;
@@ -148,7 +137,7 @@ struct Parser3 {
uint32_t minCodepoint; uint32_t minCodepoint;
}; };
inline Status n_value(Parser3 *self) { inline WeaselJsonStatus n_value(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case '{': case '{':
self->pop(); self->pop();
@@ -207,14 +196,14 @@ inline Status n_value(Parser3 *self) {
} }
break; break;
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status n_object(Parser3 *self) { inline WeaselJsonStatus n_object(Parser3 *self) {
if (*self->buf != '{') { if (*self->buf != '{') {
return S_REJECT; return WeaselJson_REJECT;
} }
self->callbacks->on_begin_object(self->data); self->callbacks->on_begin_object(self->data);
++self->buf; ++self->buf;
@@ -225,7 +214,7 @@ inline Status n_object(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status n_object2(Parser3 *self) { inline WeaselJsonStatus n_object2(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case '}': case '}':
++self->buf; ++self->buf;
@@ -240,11 +229,11 @@ inline Status n_object2(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_object3(Parser3 *self) { inline WeaselJsonStatus n_object3(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case '}': case '}':
++self->buf; ++self->buf;
@@ -260,13 +249,13 @@ inline Status n_object3(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_array(Parser3 *self) { inline WeaselJsonStatus n_array(Parser3 *self) {
if (*self->buf != '[') { if (*self->buf != '[') {
return S_REJECT; return WeaselJson_REJECT;
} }
self->callbacks->on_begin_array(self->data); self->callbacks->on_begin_array(self->data);
++self->buf; ++self->buf;
@@ -277,7 +266,7 @@ inline Status n_array(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status n_array2(Parser3 *self) { inline WeaselJsonStatus n_array2(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case ']': case ']':
++self->buf; ++self->buf;
@@ -293,7 +282,7 @@ inline Status n_array2(Parser3 *self) {
} }
} }
inline Status n_array3(Parser3 *self) { inline WeaselJsonStatus n_array3(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case ']': case ']':
++self->buf; ++self->buf;
@@ -308,13 +297,13 @@ inline Status n_array3(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_string(Parser3 *self) { inline WeaselJsonStatus n_string(Parser3 *self) {
if (*self->buf != '"') { if (*self->buf != '"') {
return S_REJECT; return WeaselJson_REJECT;
} }
self->callbacks->on_begin_string(self->data); self->callbacks->on_begin_string(self->data);
++self->buf; ++self->buf;
@@ -326,7 +315,7 @@ inline Status n_string(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status n_string2(Parser3 *self) { inline WeaselJsonStatus n_string2(Parser3 *self) {
auto commit = [self, before = self->buf]() { auto commit = [self, before = self->buf]() {
int len = self->buf - before; int len = self->buf - before;
if (self->writeBuf != before) { if (self->writeBuf != before) {
@@ -395,11 +384,11 @@ begin:
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
case Tables::CONTINUATION_BYTE: case Tables::CONTINUATION_BYTE:
case Tables::INVALID: case Tables::INVALID:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_string_following_escape(Parser3 *self) { inline WeaselJsonStatus n_string_following_escape(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case '"': case '"':
case '\\': case '\\':
@@ -424,14 +413,14 @@ inline Status n_string_following_escape(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status t_utf8_continuation_byte(Parser3 *self) { inline WeaselJsonStatus t_utf8_continuation_byte(Parser3 *self) {
if (tables.stringByteMeaning[uint8_t(*self->buf)] != if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
Tables::CONTINUATION_BYTE) { Tables::CONTINUATION_BYTE) {
return S_REJECT; return WeaselJson_REJECT;
} }
self->utf8Codepoint <<= 6; self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111; self->utf8Codepoint |= *self->buf & 0b00111111;
@@ -440,17 +429,17 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status t_utf8_last_continuation_byte(Parser3 *self) { inline WeaselJsonStatus t_utf8_last_continuation_byte(Parser3 *self) {
if (tables.stringByteMeaning[uint8_t(*self->buf)] != if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
Tables::CONTINUATION_BYTE) { Tables::CONTINUATION_BYTE) {
return S_REJECT; return WeaselJson_REJECT;
} }
self->utf8Codepoint <<= 6; self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111; self->utf8Codepoint |= *self->buf & 0b00111111;
if (self->utf8Codepoint < self->minCodepoint || if (self->utf8Codepoint < self->minCodepoint ||
self->utf8Codepoint > 0x10ffff || self->utf8Codepoint > 0x10ffff ||
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) { (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
return S_REJECT; return WeaselJson_REJECT;
} }
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized // TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
*self->writeBuf++ = *self->buf++; *self->writeBuf++ = *self->buf++;
@@ -458,25 +447,25 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status t_digit(Parser3 *self) { inline WeaselJsonStatus t_digit(Parser3 *self) {
if ('0' <= *self->buf && *self->buf <= '9') { if ('0' <= *self->buf && *self->buf <= '9') {
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
return S_REJECT; return WeaselJson_REJECT;
} }
inline Status t_onenine(Parser3 *self) { inline WeaselJsonStatus t_onenine(Parser3 *self) {
if ('1' <= *self->buf && *self->buf <= '9') { if ('1' <= *self->buf && *self->buf <= '9') {
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
return S_REJECT; return WeaselJson_REJECT;
} }
inline Status t_hex(Parser3 *self) { inline WeaselJsonStatus t_hex(Parser3 *self) {
self->utf8Codepoint <<= 4; self->utf8Codepoint <<= 4;
if (('0' <= *self->buf && *self->buf <= '9')) { if (('0' <= *self->buf && *self->buf <= '9')) {
self->utf8Codepoint |= *self->buf - '0'; self->utf8Codepoint |= *self->buf - '0';
@@ -485,14 +474,14 @@ inline Status t_hex(Parser3 *self) {
} else if ('A' <= *self->buf && *self->buf <= 'F') { } else if ('A' <= *self->buf && *self->buf <= 'F') {
self->utf8Codepoint |= 10 + *self->buf - 'A'; self->utf8Codepoint |= 10 + *self->buf - 'A';
} else { } else {
return S_REJECT; return WeaselJson_REJECT;
} }
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status t_hex2(Parser3 *self) { inline WeaselJsonStatus t_hex2(Parser3 *self) {
self->utf8Codepoint <<= 4; self->utf8Codepoint <<= 4;
if (('0' <= *self->buf && *self->buf <= '9')) { if (('0' <= *self->buf && *self->buf <= '9')) {
self->utf8Codepoint |= *self->buf - '0'; self->utf8Codepoint |= *self->buf - '0';
@@ -501,7 +490,7 @@ inline Status t_hex2(Parser3 *self) {
} else if ('A' <= *self->buf && *self->buf <= 'F') { } else if ('A' <= *self->buf && *self->buf <= 'F') {
self->utf8Codepoint |= 10 + *self->buf - 'A'; self->utf8Codepoint |= 10 + *self->buf - 'A';
} else { } else {
return S_REJECT; return WeaselJson_REJECT;
} }
++self->buf; ++self->buf;
@@ -559,7 +548,7 @@ inline Status t_hex2(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status t_hex3(Parser3 *self) { inline WeaselJsonStatus t_hex3(Parser3 *self) {
self->utf8Codepoint <<= 4; self->utf8Codepoint <<= 4;
if (('0' <= *self->buf && *self->buf <= '9')) { if (('0' <= *self->buf && *self->buf <= '9')) {
self->utf8Codepoint |= *self->buf - '0'; self->utf8Codepoint |= *self->buf - '0';
@@ -568,12 +557,12 @@ inline Status t_hex3(Parser3 *self) {
} else if ('A' <= *self->buf && *self->buf <= 'F') { } else if ('A' <= *self->buf && *self->buf <= 'F') {
self->utf8Codepoint |= 10 + *self->buf - 'A'; self->utf8Codepoint |= 10 + *self->buf - 'A';
} else { } else {
return S_REJECT; return WeaselJson_REJECT;
} }
++self->buf; ++self->buf;
if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) { if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
return S_REJECT; return WeaselJson_REJECT;
} }
// Decode utf16 surrogate pair // Decode utf16 surrogate pair
@@ -585,7 +574,7 @@ inline Status t_hex3(Parser3 *self) {
char tmp[4]; char tmp[4];
assert(self->utf8Codepoint >= 0x10000); assert(self->utf8Codepoint >= 0x10000);
if (self->utf8Codepoint > 0x10FFFF) { if (self->utf8Codepoint > 0x10FFFF) {
return S_REJECT; return WeaselJson_REJECT;
} }
bool useTmp = self->buf - self->writeBuf < 4; bool useTmp = self->buf - self->writeBuf < 4;
char *p = tmp; char *p = tmp;
@@ -609,7 +598,7 @@ inline Status t_hex3(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status n_integer(Parser3 *self) { inline WeaselJsonStatus n_integer(Parser3 *self) {
self->callbacks->on_begin_number(self->data); self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf; self->dataBegin = self->buf;
switch (*self->buf) { switch (*self->buf) {
@@ -640,11 +629,11 @@ inline Status n_integer(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_integer2(Parser3 *self) { inline WeaselJsonStatus n_integer2(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case '0': case '0':
++self->buf; ++self->buf;
@@ -666,11 +655,11 @@ inline Status n_integer2(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_digits(Parser3 *self) { inline WeaselJsonStatus n_digits(Parser3 *self) {
switch (*self->buf) { switch (*self->buf) {
case '0': case '0':
case '1': case '1':
@@ -689,11 +678,11 @@ inline Status n_digits(Parser3 *self) {
} }
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
default: default:
return S_REJECT; return WeaselJson_REJECT;
} }
} }
inline Status n_digits2(Parser3 *self) { inline WeaselJsonStatus n_digits2(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -717,7 +706,7 @@ inline Status n_digits2(Parser3 *self) {
} }
} }
inline Status n_fraction(Parser3 *self) { inline WeaselJsonStatus n_fraction(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -736,7 +725,7 @@ inline Status n_fraction(Parser3 *self) {
} }
} }
inline Status n_exponent(Parser3 *self) { inline WeaselJsonStatus n_exponent(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -756,7 +745,7 @@ inline Status n_exponent(Parser3 *self) {
} }
} }
inline Status n_sign(Parser3 *self) { inline WeaselJsonStatus n_sign(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -773,7 +762,7 @@ inline Status n_sign(Parser3 *self) {
} }
} }
inline Status n_whitespace(Parser3 *self) { inline WeaselJsonStatus n_whitespace(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -781,60 +770,60 @@ inline Status n_whitespace(Parser3 *self) {
while (tables.whitespace[uint8_t(*self->buf)]) { while (tables.whitespace[uint8_t(*self->buf)]) {
++self->buf; ++self->buf;
if (self->buf == self->bufEnd) { if (self->buf == self->bufEnd) {
return S_AGAIN; return WeaselJson_AGAIN;
} }
} }
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
inline Status n_true(Parser3 *self) { inline WeaselJsonStatus n_true(Parser3 *self) {
if (*self->buf == 'e') { if (*self->buf == 'e') {
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_true_literal(self->data); self->callbacks->on_true_literal(self->data);
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
return S_REJECT; return WeaselJson_REJECT;
} }
inline Status n_false(Parser3 *self) { inline WeaselJsonStatus n_false(Parser3 *self) {
if (*self->buf == 'e') { if (*self->buf == 'e') {
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_false_literal(self->data); self->callbacks->on_false_literal(self->data);
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
return S_REJECT; return WeaselJson_REJECT;
} }
inline Status n_null(Parser3 *self) { inline WeaselJsonStatus n_null(Parser3 *self) {
if (*self->buf == 'l') { if (*self->buf == 'l') {
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_null_literal(self->data); self->callbacks->on_null_literal(self->data);
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
return S_REJECT; return WeaselJson_REJECT;
} }
template <char kChar> inline Status singleChar(Parser3 *self) { template <char kChar> inline WeaselJsonStatus singleChar(Parser3 *self) {
if (*self->buf == kChar) { if (*self->buf == kChar) {
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
} }
return S_REJECT; return WeaselJson_REJECT;
} }
inline Status t_eof(Parser3 *self) { inline WeaselJsonStatus t_eof(Parser3 *self) {
if (self->len() > 0) { if (self->len() > 0) {
return S_REJECT; return WeaselJson_REJECT;
} }
return self->complete ? S_OK : S_AGAIN; return self->complete ? WeaselJson_OK : WeaselJson_AGAIN;
} }
inline Status t_end_number(Parser3 *self) { inline WeaselJsonStatus t_end_number(Parser3 *self) {
self->pop(); self->pop();
self->flushNumber(); self->flushNumber();
self->callbacks->on_end_number(self->data); self->callbacks->on_end_number(self->data);
@@ -847,7 +836,7 @@ constexpr inline struct ContinuationTable {
for (int i = 0; i < N_SYMBOL_COUNT; ++i) { for (int i = 0; i < N_SYMBOL_COUNT; ++i) {
continuations[i] = +[](struct Parser3 *) { continuations[i] = +[](struct Parser3 *) {
printf("unimplemented\n"); printf("unimplemented\n");
return S_REJECT; return WeaselJson_REJECT;
}; };
} }
continuations[N_VALUE] = n_value; continuations[N_VALUE] = n_value;
@@ -942,7 +931,7 @@ constexpr inline struct ContinuationTable {
bool acceptsEmptyString[N_SYMBOL_COUNT]{}; bool acceptsEmptyString[N_SYMBOL_COUNT]{};
} symbolTables; } symbolTables;
inline Status Parser3::keepGoing(Parser3 *self) { inline WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
// self->debugPrint(); // self->debugPrint();
if (self->len() == 0) { if (self->len() == 0) {
if (!self->complete) { if (!self->complete) {
@@ -992,10 +981,10 @@ inline Status Parser3::keepGoing(Parser3 *self) {
case N_SYMBOL_COUNT: case N_SYMBOL_COUNT:
break; break;
} }
return S_AGAIN; return WeaselJson_AGAIN;
} }
if (!symbolTables.acceptsEmptyString[self->top()]) { if (!symbolTables.acceptsEmptyString[self->top()]) {
return S_REJECT; return WeaselJson_REJECT;
} }
} }
MUSTTAIL return symbolTables.continuations[self->top()](self); MUSTTAIL return symbolTables.continuations[self->top()](self);

View File

@@ -12,6 +12,7 @@
#include "callbacks.h" #include "callbacks.h"
#include "parser3.h" #include "parser3.h"
#include "weaseljson.h"
// This is the JSON grammar in McKeeman Form. // This is the JSON grammar in McKeeman Form.
@@ -154,15 +155,15 @@ void testStreaming(std::string const &json) {
auto copy = json; auto copy = json;
parser3::Parser3 parser(&c, &streaming); parser3::Parser3 parser(&c, &streaming);
for (int i = 0; i < copy.size(); ++i) { for (int i = 0; i < copy.size(); ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN); REQUIRE(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
} }
CHECK(parser.parse(nullptr, 0) == parser3::S_OK); CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
} }
{ {
auto copy = json; auto copy = json;
parser3::Parser3 parser(&c, &batch); parser3::Parser3 parser(&c, &batch);
REQUIRE(parser.parse(copy.data(), copy.size()) == parser3::S_AGAIN); REQUIRE(parser.parse(copy.data(), copy.size()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_OK); CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
} }
CHECK(streaming.result == batch.result); CHECK(streaming.result == batch.result);
} }
@@ -177,32 +178,32 @@ TEST_CASE("parser3") {
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
int i = 0; int i = 0;
for (; i < copy.length() - 1; ++i) { for (; i < copy.length() - 1; ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN); REQUIRE(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
} }
CHECK(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN); CHECK(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_OK); CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
puts(""); puts("");
} }
{ {
std::string copy = "{\"x\": [], \"y\": {}}"; std::string copy = "{\"x\": [], \"y\": {}}";
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN); CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_OK); CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
puts(""); puts("");
} }
{ {
auto c = noopCallbacks(); auto c = noopCallbacks();
std::string copy = "{\"a\":\"a"; std::string copy = "{\"a\":\"a";
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN); CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT); CHECK(parser.parse(nullptr, 0) == WeaselJson_REJECT);
} }
{ {
auto c = noopCallbacks(); auto c = noopCallbacks();
std::string copy = "["; std::string copy = "[";
parser3::Parser3 parser(&c, &state); parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN); CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT); CHECK(parser.parse(nullptr, 0) == WeaselJson_REJECT);
} }
} }
@@ -225,9 +226,9 @@ void doTestUnescapingUtf8(std::string const &escaped,
CAPTURE(i); CAPTURE(i);
CHECK( CHECK(
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i)) == parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i)) ==
parser3::S_AGAIN); WeaselJson_AGAIN);
} }
CHECK(parser.parse(nullptr, 0) == parser3::S_OK); CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
CHECK(result.size() == expected.size()); CHECK(result.size() == expected.size());
CHECK(result == expected); CHECK(result == expected);
} }
@@ -269,7 +270,7 @@ TEST_CASE("bench3") {
auto copy = json; auto copy = json;
parser3::Parser3 parser(&c, nullptr); parser3::Parser3 parser(&c, nullptr);
bench.doNotOptimizeAway(parser.parse(copy.data(), copy.length())); bench.doNotOptimizeAway(parser.parse(copy.data(), copy.length()));
bench.doNotOptimizeAway(parser.parse(nullptr, 0) == parser3::S_OK); bench.doNotOptimizeAway(parser.parse(nullptr, 0) == WeaselJson_OK);
}); });
} }

View File

@@ -3,6 +3,7 @@
#include "callbacks.h" #include "callbacks.h"
#include "parser3.h" #include "parser3.h"
#include "weaseljson.h"
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc < 2) { if (argc < 2) {
@@ -24,12 +25,12 @@ int main(int argc, char **argv) {
return 1; return 1;
} }
switch (parser.parse(buf, l)) { switch (parser.parse(buf, l)) {
case parser3::S_OK: case WeaselJson_OK:
return 0; return 0;
case parser3::S_AGAIN: case WeaselJson_AGAIN:
continue; continue;
case parser3::S_REJECT: case WeaselJson_REJECT:
case parser3::S_OVERFLOW: case WeaselJson_OVERFLOW:
return 1; return 1;
} }
if (l == 0) { if (l == 0) {