Compare commits
12 Commits
9803364adb
...
2e803b5a76
| Author | SHA1 | Date | |
|---|---|---|---|
| 2e803b5a76 | |||
| a8aab0187e | |||
| 63a1be497b | |||
| 7470c69845 | |||
| c2f5d6983a | |||
| 575b6e5c62 | |||
| 67e63dc611 | |||
| 9319076b44 | |||
| d1de15a0ca | |||
| dfce1ae412 | |||
| 5e3fa62a06 | |||
| 330101a937 |
@@ -8,7 +8,7 @@ extern "C" {
|
||||
__attribute__((visibility("default"))) WeaselJsonParser *
|
||||
WeaselJsonParser_create(int stackSize, const WeaselJsonCallbacks *callbacks,
|
||||
void *userdata) {
|
||||
auto *buf = malloc(sizeof(Parser3) + stackSize);
|
||||
auto *buf = malloc(sizeof(Parser3) + stackSize * sizeof(*Parser3::stackPtr));
|
||||
if (buf == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
388
src/parser3.h
388
src/parser3.h
@@ -2,6 +2,7 @@
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
@@ -16,9 +17,21 @@
|
||||
|
||||
namespace parser3 {
|
||||
|
||||
// Calling a continuation with buf == bufEnd means end of input
|
||||
typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *,
|
||||
char *buf, char *bufEnd);
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
char *bufEnd);
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self, char *buf,
|
||||
char *bufEnd);
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
char *bufEnd);
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf,
|
||||
char *bufEnd);
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_number(Parser3 *self, char *buf,
|
||||
char *bufEnd);
|
||||
|
||||
// These appear in the stack of the pushdown
|
||||
// automata
|
||||
enum Symbol : uint8_t {
|
||||
@@ -52,15 +65,12 @@ enum Symbol : uint8_t {
|
||||
};
|
||||
struct Parser3 {
|
||||
Parser3(const WeaselJsonCallbacks *callbacks, void *userdata, int stackSize)
|
||||
: callbacks(callbacks), userdata(userdata), stackSize(stackSize) {
|
||||
: callbacks(callbacks), userdata(userdata),
|
||||
stackEnd(stack() + stackSize) {
|
||||
reset();
|
||||
}
|
||||
|
||||
[[nodiscard]] WeaselJsonStatus parse(char *buf, int len) {
|
||||
complete = len == 0;
|
||||
this->dataBegin = this->writeBuf = buf;
|
||||
return keepGoing(this, buf, buf + len);
|
||||
}
|
||||
[[nodiscard]] WeaselJsonStatus parse(char *buf, int len);
|
||||
|
||||
void flushNumber(bool done, char *buf) {
|
||||
int len = buf - dataBegin;
|
||||
@@ -83,8 +93,11 @@ struct Parser3 {
|
||||
assert(!empty());
|
||||
--stackPtr;
|
||||
}
|
||||
|
||||
// Pushing symbols onto the stack roughly corresponds with "parse these
|
||||
// nonterminals/terminals in order"
|
||||
[[nodiscard]] WeaselJsonStatus push(std::initializer_list<Symbol> symbols) {
|
||||
if (stackPtr >= stack() + stackSize - symbols.size()) [[unlikely]] {
|
||||
if (stackEnd - stackPtr < ptrdiff_t(symbols.size())) [[unlikely]] {
|
||||
return WeaselJson_OVERFLOW;
|
||||
}
|
||||
for (int i = symbols.size() - 1; i >= 0; --i) {
|
||||
@@ -92,6 +105,7 @@ struct Parser3 {
|
||||
}
|
||||
return WeaselJson_OK;
|
||||
}
|
||||
|
||||
[[nodiscard]] Symbol top() const {
|
||||
assert(!empty());
|
||||
return *(stackPtr - 1);
|
||||
@@ -104,7 +118,6 @@ struct Parser3 {
|
||||
|
||||
void reset() {
|
||||
stackPtr = stack();
|
||||
complete = false;
|
||||
std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF});
|
||||
}
|
||||
|
||||
@@ -115,11 +128,10 @@ struct Parser3 {
|
||||
WeaselJsonCallbacks const *const callbacks;
|
||||
void *const userdata;
|
||||
Symbol *stackPtr;
|
||||
Symbol *const stackEnd;
|
||||
uint32_t utf8Codepoint;
|
||||
uint32_t utf16Surrogate;
|
||||
uint32_t minCodepoint;
|
||||
int const stackSize;
|
||||
bool complete;
|
||||
NumDfa numDfa;
|
||||
Utf8Dfa strDfa;
|
||||
};
|
||||
@@ -158,10 +170,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_number(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
buf = (char *)self->numDfa.scan(buf, bufEnd);
|
||||
if (buf == bufEnd && !self->complete) {
|
||||
self->flushNumber(false, buf);
|
||||
return WeaselJson_AGAIN;
|
||||
if (buf != bufEnd) {
|
||||
buf = (char *)self->numDfa.scan(buf, bufEnd);
|
||||
if (buf == bufEnd) {
|
||||
self->flushNumber(false, buf);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
}
|
||||
if (!self->numDfa.accept()) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -245,7 +259,9 @@ inline PRESERVE_NONE WeaselJsonStatus scan_string(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -254,27 +270,30 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
self->callbacks->on_begin_object(self->userdata);
|
||||
++buf;
|
||||
self->pop();
|
||||
if (auto s = self->push({N_OBJECT2})) {
|
||||
return s;
|
||||
std::ignore = self->push({N_OBJECT2});
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
break;
|
||||
MUSTTAIL return n_object2(self, buf, bufEnd);
|
||||
case '[':
|
||||
self->callbacks->on_begin_array(self->userdata);
|
||||
++buf;
|
||||
self->pop();
|
||||
if (auto s = self->push({N_ARRAY2})) {
|
||||
return s;
|
||||
std::ignore = self->push({N_ARRAY2});
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
break;
|
||||
MUSTTAIL return n_array2(self, buf, bufEnd);
|
||||
case '"':
|
||||
++buf;
|
||||
self->dataBegin = self->writeBuf = buf;
|
||||
self->pop();
|
||||
self->strDfa.reset();
|
||||
if (auto s2 = self->push({N_STRING2})) {
|
||||
return s2;
|
||||
std::ignore = self->push({N_STRING2});
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
break;
|
||||
MUSTTAIL return n_string2(self, buf, bufEnd);
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
@@ -289,10 +308,11 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
self->dataBegin = buf;
|
||||
self->pop();
|
||||
self->numDfa.reset();
|
||||
if (auto s2 = self->push({N_NUMBER})) {
|
||||
return s2;
|
||||
std::ignore = self->push({N_NUMBER});
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
break;
|
||||
MUSTTAIL return n_number(self, buf, bufEnd);
|
||||
case 't':
|
||||
++buf;
|
||||
self->pop();
|
||||
@@ -344,12 +364,17 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -358,6 +383,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case '"':
|
||||
++buf;
|
||||
@@ -367,7 +395,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
if (auto s = self->push({N_STRING2, T_COLON, N_VALUE, N_OBJECT3})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return n_string2(self, buf, bufEnd);
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
@@ -375,7 +406,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -384,6 +417,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case ',':
|
||||
++buf;
|
||||
@@ -391,7 +427,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
|
||||
if (auto s = self->push({N_STRING, T_COLON, N_VALUE, N_OBJECT3})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return n_string(self, buf, bufEnd);
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
@@ -399,7 +438,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -408,19 +449,24 @@ inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
default:
|
||||
self->pop();
|
||||
if (auto s = self->push({N_VALUE, N_ARRAY3})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
MUSTTAIL return n_value(self, buf, bufEnd);
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -429,6 +475,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case ',':
|
||||
++buf;
|
||||
@@ -436,7 +485,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf,
|
||||
if (auto s = self->push({N_VALUE, N_ARRAY3})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return n_value(self, buf, bufEnd);
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
@@ -444,7 +496,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -455,14 +509,23 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf,
|
||||
self->dataBegin = self->writeBuf = buf;
|
||||
self->pop();
|
||||
self->strDfa.reset();
|
||||
if (auto s = self->push({N_STRING2})) {
|
||||
return s;
|
||||
std::ignore = self->push({N_STRING2});
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
MUSTTAIL return n_string2(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline int32_t read4_hex(const char *buf) {
|
||||
return tables.hex[uint8_t(buf[0])] << 12 | tables.hex[uint8_t(buf[1])] << 8 |
|
||||
tables.hex[uint8_t(buf[2])] << 4 | tables.hex[uint8_t(buf[3])] << 0;
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (auto s = scan_string(self, buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -471,13 +534,83 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
self->flushString(true);
|
||||
++buf;
|
||||
self->pop();
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case '\\':
|
||||
++buf;
|
||||
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
||||
return s;
|
||||
if (bufEnd - buf < /*strlen("u0000\\u0000")*/ 11) {
|
||||
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
||||
return s;
|
||||
}
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else {
|
||||
if (*buf == 'u') {
|
||||
++buf;
|
||||
int32_t codepoint = read4_hex(buf);
|
||||
if (codepoint < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
buf += 4;
|
||||
if (0xd800 <= codepoint && codepoint <= 0xdfff) {
|
||||
// utf-16 surrogate
|
||||
int32_t codepoint2 = read4_hex(buf + 2);
|
||||
if (!(buf[0] == '\\' && buf[1] == 'u' && 0xdc00 <= codepoint2 &&
|
||||
codepoint2 <= 0xdfff)) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
codepoint =
|
||||
0x10000 + (codepoint - 0xd800) * 0x400 + (codepoint2 - 0xdc00);
|
||||
assert(codepoint >= 0x10000);
|
||||
if (codepoint > 0x10FFFF) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
buf += 6;
|
||||
assert(codepoint <= 0x10ffff);
|
||||
self->writeBuf[3] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[0] = (0b00000111 & codepoint) | 0b11110000;
|
||||
self->writeBuf += 4;
|
||||
} else {
|
||||
if (codepoint < 0x80) {
|
||||
*self->writeBuf++ = codepoint;
|
||||
} else if (codepoint < 0x800) {
|
||||
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[0] = (0b00011111 & codepoint) | 0b11000000;
|
||||
self->writeBuf += 2;
|
||||
} else {
|
||||
assert(codepoint < 0x10000);
|
||||
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[0] = (0b00001111 & codepoint) | 0b11100000;
|
||||
self->writeBuf += 3;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto unescaped = tables.unescape[uint8_t(*buf++)];
|
||||
if (unescaped == 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
*self->writeBuf++ = unescaped;
|
||||
}
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return n_string2(self, buf, bufEnd);
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
@@ -486,6 +619,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
|
||||
char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
assert(self->strDfa.accept());
|
||||
switch (*buf) {
|
||||
case '"':
|
||||
@@ -498,7 +634,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
|
||||
case 't':
|
||||
*self->writeBuf++ = tables.unescape[uint8_t(*buf++)];
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
break;
|
||||
case 'u':
|
||||
++buf;
|
||||
self->utf8Codepoint = 0;
|
||||
@@ -506,14 +642,22 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
|
||||
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
break;
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
auto hexVal = tables.hex[uint8_t(*buf)];
|
||||
if (hexVal < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -522,11 +666,18 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf,
|
||||
self->utf8Codepoint |= hexVal;
|
||||
++buf;
|
||||
self->pop();
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
auto hexVal = tables.hex[uint8_t(*buf)];
|
||||
if (hexVal < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -566,6 +717,10 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
|
||||
self->push({T_BACKSLASH, T_U2, T_HEX, T_HEX, T_HEX, T_HEX3})) {
|
||||
return s;
|
||||
}
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
bool useTmp = buf - self->writeBuf < 3;
|
||||
@@ -586,11 +741,18 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
|
||||
}
|
||||
|
||||
self->pop();
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
auto hexVal = tables.hex[uint8_t(*buf)];
|
||||
if (hexVal < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -634,15 +796,45 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf,
|
||||
}
|
||||
|
||||
self->pop();
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
template <char kChar>
|
||||
inline PRESERVE_NONE WeaselJsonStatus singleCharInString(Parser3 *self,
|
||||
char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (*buf == kChar) {
|
||||
++buf;
|
||||
self->pop();
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (*buf == 'e') {
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_true_literal(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -651,10 +843,16 @@ inline PRESERVE_NONE WeaselJsonStatus n_true(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_false(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (*buf == 'e') {
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_false_literal(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -663,10 +861,16 @@ inline PRESERVE_NONE WeaselJsonStatus n_false(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_null(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if (*buf == 'l') {
|
||||
++buf;
|
||||
self->pop();
|
||||
self->callbacks->on_null_literal(self->userdata);
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -676,8 +880,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_null(Parser3 *self, char *buf,
|
||||
template <char kChar, bool kSkipWhitespace = false>
|
||||
inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf == bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
if constexpr (kSkipWhitespace) {
|
||||
assert(bufEnd - buf != 0);
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
@@ -685,18 +891,21 @@ inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf,
|
||||
if (*buf == kChar) {
|
||||
++buf;
|
||||
self->pop();
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *, char *buf,
|
||||
char *bufEnd) {
|
||||
if (buf != bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
return self->complete ? WeaselJson_OK : WeaselJson_AGAIN;
|
||||
return WeaselJson_OK;
|
||||
}
|
||||
|
||||
constexpr inline struct ContinuationTable {
|
||||
@@ -723,7 +932,7 @@ constexpr inline struct ContinuationTable {
|
||||
continuations[N_NULL] = n_null;
|
||||
continuations[T_R] = singleChar<'r'>;
|
||||
continuations[T_U] = singleChar<'u'>;
|
||||
continuations[T_U2] = singleChar<'u'>;
|
||||
continuations[T_U2] = singleCharInString<'u'>;
|
||||
continuations[T_A] = singleChar<'a'>;
|
||||
continuations[T_L] = singleChar<'l'>;
|
||||
continuations[T_S] = singleChar<'s'>;
|
||||
@@ -732,90 +941,19 @@ constexpr inline struct ContinuationTable {
|
||||
continuations[T_HEX2] = t_hex2;
|
||||
continuations[T_HEX3] = t_hex3;
|
||||
continuations[T_EOF] = t_eof;
|
||||
continuations[T_BACKSLASH] = singleChar<'\\'>;
|
||||
|
||||
symbolNames[N_VALUE] = "n_value";
|
||||
symbolNames[N_OBJECT2] = "n_object2";
|
||||
symbolNames[N_OBJECT3] = "n_object3";
|
||||
symbolNames[N_ARRAY2] = "n_array2";
|
||||
symbolNames[N_ARRAY3] = "n_array3";
|
||||
symbolNames[N_STRING] = "n_string";
|
||||
symbolNames[N_STRING2] = "n_string2";
|
||||
symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape";
|
||||
symbolNames[N_WHITESPACE] = "n_whitespace";
|
||||
symbolNames[N_NUMBER] = "n_number";
|
||||
symbolNames[N_TRUE] = "n_true";
|
||||
symbolNames[N_FALSE] = "n_false";
|
||||
symbolNames[N_NULL] = "n_null";
|
||||
symbolNames[T_R] = "singleChar<'r'>";
|
||||
symbolNames[T_U] = "singleChar<'u'>";
|
||||
symbolNames[T_U2] = "singleChar<'u'> (in string)";
|
||||
symbolNames[T_A] = "singleChar<'a'>";
|
||||
symbolNames[T_L] = "singleChar<'l'>";
|
||||
symbolNames[T_S] = "singleChar<'s'>";
|
||||
symbolNames[T_COLON] = "singleChar<':'>";
|
||||
symbolNames[T_HEX] = "t_hex";
|
||||
symbolNames[T_HEX2] = "t_hex2";
|
||||
symbolNames[T_HEX3] = "t_hex3";
|
||||
symbolNames[T_EOF] = "t_eof";
|
||||
symbolNames[T_BACKSLASH] = "singleChar<'\\'>";
|
||||
|
||||
// All others can assume that there's at least one byte when they're called
|
||||
acceptsEmptyString[N_NUMBER] = true;
|
||||
acceptsEmptyString[N_WHITESPACE] = true;
|
||||
acceptsEmptyString[T_EOF] = true;
|
||||
continuations[T_BACKSLASH] = singleCharInString<'\\'>;
|
||||
}
|
||||
Continuation continuations[N_SYMBOL_COUNT]{};
|
||||
const char *symbolNames[N_SYMBOL_COUNT]{};
|
||||
bool acceptsEmptyString[N_SYMBOL_COUNT]{};
|
||||
} symbolTables;
|
||||
|
||||
inline WeaselJsonStatus Parser3::parse(char *buf, int len) {
|
||||
this->dataBegin = this->writeBuf = buf;
|
||||
return symbolTables.continuations[top()](this, buf, buf + len);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self,
|
||||
char *buf,
|
||||
char *bufEnd) {
|
||||
if (bufEnd - buf == 0) {
|
||||
if (!self->complete) {
|
||||
switch (self->top()) {
|
||||
case N_STRING2:
|
||||
case N_STRING_FOLLOWING_ESCAPE:
|
||||
case T_HEX:
|
||||
case T_HEX2:
|
||||
case T_HEX3:
|
||||
case T_BACKSLASH:
|
||||
case T_U2:
|
||||
self->flushString(false);
|
||||
break;
|
||||
case N_STRING: // The beginning of the string is in the future in this
|
||||
// state. There's no data to flush yet
|
||||
case N_VALUE:
|
||||
case N_OBJECT2:
|
||||
case N_OBJECT3:
|
||||
case N_ARRAY2:
|
||||
case N_ARRAY3:
|
||||
case N_WHITESPACE:
|
||||
case N_NUMBER:
|
||||
case N_TRUE:
|
||||
case N_FALSE:
|
||||
case N_NULL:
|
||||
case T_R:
|
||||
case T_U:
|
||||
case T_A:
|
||||
case T_L:
|
||||
case T_S:
|
||||
case T_COLON:
|
||||
case T_EOF:
|
||||
case N_SYMBOL_COUNT:
|
||||
break;
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (!symbolTables.acceptsEmptyString[self->top()]) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
// printf("%s\n", symbolTables.symbolNames[self->top()]);
|
||||
MUSTTAIL return symbolTables.continuations[self->top()](self, buf, bufEnd);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user