Start working on c api

This commit is contained in:
2025-05-22 10:55:15 -04:00
parent 96ef50d52f
commit 6e602d8fd5
6 changed files with 150 additions and 121 deletions

View File

@@ -16,18 +16,7 @@
namespace parser3 {
enum Status {
// Accept input
S_OK,
// Consumed all available input.
S_AGAIN,
// Invalid json
S_REJECT,
// json is too deeply nested
S_OVERFLOW,
};
typedef Status (*Continuation)(struct Parser3 *);
typedef WeaselJsonStatus (*Continuation)(struct Parser3 *);
// These appear in the stack of the pushdown
// automata
@@ -79,7 +68,7 @@ struct Parser3 {
std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
}
[[nodiscard]] Status parse(char *buf, int len) {
[[nodiscard]] WeaselJsonStatus parse(char *buf, int len) {
complete = len == 0;
this->buf = this->dataBegin = this->writeBuf = buf;
this->bufEnd = buf + len;
@@ -106,14 +95,14 @@ struct Parser3 {
assert(!empty());
--stackPtr;
}
[[nodiscard]] Status push(std::initializer_list<Symbol> symbols) {
[[nodiscard]] WeaselJsonStatus push(std::initializer_list<Symbol> symbols) {
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
return S_OVERFLOW;
return WeaselJson_OVERFLOW;
}
for (int i = symbols.size() - 1; i >= 0; --i) {
*stackPtr++ = *(symbols.begin() + i);
}
return S_OK;
return WeaselJson_OK;
}
[[nodiscard]] int len() const {
auto result = bufEnd - buf;
@@ -125,7 +114,7 @@ struct Parser3 {
return *(stackPtr - 1);
}
static Status keepGoing(Parser3 *self);
static WeaselJsonStatus keepGoing(Parser3 *self);
constexpr static int kMaxStackSize = 1024;
@@ -148,7 +137,7 @@ struct Parser3 {
uint32_t minCodepoint;
};
inline Status n_value(Parser3 *self) {
inline WeaselJsonStatus n_value(Parser3 *self) {
switch (*self->buf) {
case '{':
self->pop();
@@ -207,14 +196,14 @@ inline Status n_value(Parser3 *self) {
}
break;
default:
return S_REJECT;
return WeaselJson_REJECT;
}
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_object(Parser3 *self) {
inline WeaselJsonStatus n_object(Parser3 *self) {
if (*self->buf != '{') {
return S_REJECT;
return WeaselJson_REJECT;
}
self->callbacks->on_begin_object(self->data);
++self->buf;
@@ -225,7 +214,7 @@ inline Status n_object(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_object2(Parser3 *self) {
inline WeaselJsonStatus n_object2(Parser3 *self) {
switch (*self->buf) {
case '}':
++self->buf;
@@ -240,11 +229,11 @@ inline Status n_object2(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_object3(Parser3 *self) {
inline WeaselJsonStatus n_object3(Parser3 *self) {
switch (*self->buf) {
case '}':
++self->buf;
@@ -260,13 +249,13 @@ inline Status n_object3(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_array(Parser3 *self) {
inline WeaselJsonStatus n_array(Parser3 *self) {
if (*self->buf != '[') {
return S_REJECT;
return WeaselJson_REJECT;
}
self->callbacks->on_begin_array(self->data);
++self->buf;
@@ -277,7 +266,7 @@ inline Status n_array(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_array2(Parser3 *self) {
inline WeaselJsonStatus n_array2(Parser3 *self) {
switch (*self->buf) {
case ']':
++self->buf;
@@ -293,7 +282,7 @@ inline Status n_array2(Parser3 *self) {
}
}
inline Status n_array3(Parser3 *self) {
inline WeaselJsonStatus n_array3(Parser3 *self) {
switch (*self->buf) {
case ']':
++self->buf;
@@ -308,13 +297,13 @@ inline Status n_array3(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_string(Parser3 *self) {
inline WeaselJsonStatus n_string(Parser3 *self) {
if (*self->buf != '"') {
return S_REJECT;
return WeaselJson_REJECT;
}
self->callbacks->on_begin_string(self->data);
++self->buf;
@@ -326,7 +315,7 @@ inline Status n_string(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_string2(Parser3 *self) {
inline WeaselJsonStatus n_string2(Parser3 *self) {
auto commit = [self, before = self->buf]() {
int len = self->buf - before;
if (self->writeBuf != before) {
@@ -395,11 +384,11 @@ begin:
MUSTTAIL return Parser3::keepGoing(self);
case Tables::CONTINUATION_BYTE:
case Tables::INVALID:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_string_following_escape(Parser3 *self) {
inline WeaselJsonStatus n_string_following_escape(Parser3 *self) {
switch (*self->buf) {
case '"':
case '\\':
@@ -424,14 +413,14 @@ inline Status n_string_following_escape(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status t_utf8_continuation_byte(Parser3 *self) {
inline WeaselJsonStatus t_utf8_continuation_byte(Parser3 *self) {
if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
Tables::CONTINUATION_BYTE) {
return S_REJECT;
return WeaselJson_REJECT;
}
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
@@ -440,17 +429,17 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status t_utf8_last_continuation_byte(Parser3 *self) {
inline WeaselJsonStatus t_utf8_last_continuation_byte(Parser3 *self) {
if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
Tables::CONTINUATION_BYTE) {
return S_REJECT;
return WeaselJson_REJECT;
}
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
if (self->utf8Codepoint < self->minCodepoint ||
self->utf8Codepoint > 0x10ffff ||
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
return S_REJECT;
return WeaselJson_REJECT;
}
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
*self->writeBuf++ = *self->buf++;
@@ -458,25 +447,25 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status t_digit(Parser3 *self) {
inline WeaselJsonStatus t_digit(Parser3 *self) {
if ('0' <= *self->buf && *self->buf <= '9') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
return WeaselJson_REJECT;
}
inline Status t_onenine(Parser3 *self) {
inline WeaselJsonStatus t_onenine(Parser3 *self) {
if ('1' <= *self->buf && *self->buf <= '9') {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
return WeaselJson_REJECT;
}
inline Status t_hex(Parser3 *self) {
inline WeaselJsonStatus t_hex(Parser3 *self) {
self->utf8Codepoint <<= 4;
if (('0' <= *self->buf && *self->buf <= '9')) {
self->utf8Codepoint |= *self->buf - '0';
@@ -485,14 +474,14 @@ inline Status t_hex(Parser3 *self) {
} else if ('A' <= *self->buf && *self->buf <= 'F') {
self->utf8Codepoint |= 10 + *self->buf - 'A';
} else {
return S_REJECT;
return WeaselJson_REJECT;
}
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status t_hex2(Parser3 *self) {
inline WeaselJsonStatus t_hex2(Parser3 *self) {
self->utf8Codepoint <<= 4;
if (('0' <= *self->buf && *self->buf <= '9')) {
self->utf8Codepoint |= *self->buf - '0';
@@ -501,7 +490,7 @@ inline Status t_hex2(Parser3 *self) {
} else if ('A' <= *self->buf && *self->buf <= 'F') {
self->utf8Codepoint |= 10 + *self->buf - 'A';
} else {
return S_REJECT;
return WeaselJson_REJECT;
}
++self->buf;
@@ -559,7 +548,7 @@ inline Status t_hex2(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status t_hex3(Parser3 *self) {
inline WeaselJsonStatus t_hex3(Parser3 *self) {
self->utf8Codepoint <<= 4;
if (('0' <= *self->buf && *self->buf <= '9')) {
self->utf8Codepoint |= *self->buf - '0';
@@ -568,12 +557,12 @@ inline Status t_hex3(Parser3 *self) {
} else if ('A' <= *self->buf && *self->buf <= 'F') {
self->utf8Codepoint |= 10 + *self->buf - 'A';
} else {
return S_REJECT;
return WeaselJson_REJECT;
}
++self->buf;
if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
return S_REJECT;
return WeaselJson_REJECT;
}
// Decode utf16 surrogate pair
@@ -585,7 +574,7 @@ inline Status t_hex3(Parser3 *self) {
char tmp[4];
assert(self->utf8Codepoint >= 0x10000);
if (self->utf8Codepoint > 0x10FFFF) {
return S_REJECT;
return WeaselJson_REJECT;
}
bool useTmp = self->buf - self->writeBuf < 4;
char *p = tmp;
@@ -609,7 +598,7 @@ inline Status t_hex3(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_integer(Parser3 *self) {
inline WeaselJsonStatus n_integer(Parser3 *self) {
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
switch (*self->buf) {
@@ -640,11 +629,11 @@ inline Status n_integer(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_integer2(Parser3 *self) {
inline WeaselJsonStatus n_integer2(Parser3 *self) {
switch (*self->buf) {
case '0':
++self->buf;
@@ -666,11 +655,11 @@ inline Status n_integer2(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_digits(Parser3 *self) {
inline WeaselJsonStatus n_digits(Parser3 *self) {
switch (*self->buf) {
case '0':
case '1':
@@ -689,11 +678,11 @@ inline Status n_digits(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
return S_REJECT;
return WeaselJson_REJECT;
}
}
inline Status n_digits2(Parser3 *self) {
inline WeaselJsonStatus n_digits2(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -717,7 +706,7 @@ inline Status n_digits2(Parser3 *self) {
}
}
inline Status n_fraction(Parser3 *self) {
inline WeaselJsonStatus n_fraction(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -736,7 +725,7 @@ inline Status n_fraction(Parser3 *self) {
}
}
inline Status n_exponent(Parser3 *self) {
inline WeaselJsonStatus n_exponent(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -756,7 +745,7 @@ inline Status n_exponent(Parser3 *self) {
}
}
inline Status n_sign(Parser3 *self) {
inline WeaselJsonStatus n_sign(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -773,7 +762,7 @@ inline Status n_sign(Parser3 *self) {
}
}
inline Status n_whitespace(Parser3 *self) {
inline WeaselJsonStatus n_whitespace(Parser3 *self) {
if (self->len() == 0) {
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -781,60 +770,60 @@ inline Status n_whitespace(Parser3 *self) {
while (tables.whitespace[uint8_t(*self->buf)]) {
++self->buf;
if (self->buf == self->bufEnd) {
return S_AGAIN;
return WeaselJson_AGAIN;
}
}
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status n_true(Parser3 *self) {
inline WeaselJsonStatus n_true(Parser3 *self) {
if (*self->buf == 'e') {
++self->buf;
self->pop();
self->callbacks->on_true_literal(self->data);
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
return WeaselJson_REJECT;
}
inline Status n_false(Parser3 *self) {
inline WeaselJsonStatus n_false(Parser3 *self) {
if (*self->buf == 'e') {
++self->buf;
self->pop();
self->callbacks->on_false_literal(self->data);
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
return WeaselJson_REJECT;
}
inline Status n_null(Parser3 *self) {
inline WeaselJsonStatus n_null(Parser3 *self) {
if (*self->buf == 'l') {
++self->buf;
self->pop();
self->callbacks->on_null_literal(self->data);
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
return WeaselJson_REJECT;
}
template <char kChar> inline Status singleChar(Parser3 *self) {
template <char kChar> inline WeaselJsonStatus singleChar(Parser3 *self) {
if (*self->buf == kChar) {
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
return WeaselJson_REJECT;
}
inline Status t_eof(Parser3 *self) {
inline WeaselJsonStatus t_eof(Parser3 *self) {
if (self->len() > 0) {
return S_REJECT;
return WeaselJson_REJECT;
}
return self->complete ? S_OK : S_AGAIN;
return self->complete ? WeaselJson_OK : WeaselJson_AGAIN;
}
inline Status t_end_number(Parser3 *self) {
inline WeaselJsonStatus t_end_number(Parser3 *self) {
self->pop();
self->flushNumber();
self->callbacks->on_end_number(self->data);
@@ -847,7 +836,7 @@ constexpr inline struct ContinuationTable {
for (int i = 0; i < N_SYMBOL_COUNT; ++i) {
continuations[i] = +[](struct Parser3 *) {
printf("unimplemented\n");
return S_REJECT;
return WeaselJson_REJECT;
};
}
continuations[N_VALUE] = n_value;
@@ -942,7 +931,7 @@ constexpr inline struct ContinuationTable {
bool acceptsEmptyString[N_SYMBOL_COUNT]{};
} symbolTables;
inline Status Parser3::keepGoing(Parser3 *self) {
inline WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
// self->debugPrint();
if (self->len() == 0) {
if (!self->complete) {
@@ -992,10 +981,10 @@ inline Status Parser3::keepGoing(Parser3 *self) {
case N_SYMBOL_COUNT:
break;
}
return S_AGAIN;
return WeaselJson_AGAIN;
}
if (!symbolTables.acceptsEmptyString[self->top()]) {
return S_REJECT;
return WeaselJson_REJECT;
}
}
MUSTTAIL return symbolTables.continuations[self->top()](self);