Merge remote-tracking branch 'origin/flags'

This commit is contained in:
2025-08-04 12:36:20 -04:00
8 changed files with 180 additions and 118 deletions

View File

@@ -36,13 +36,18 @@ enum WeaselJsonStatus {
typedef struct WeaselJsonParser WeaselJsonParser; typedef struct WeaselJsonParser WeaselJsonParser;
enum WeaselJsonFlags {
/** Do not unescape strings or write to the supplied buffer at all. */
WeaselJsonRaw = 1,
};
/** Create a parser. Increasing stack size increases memory usage but also /** Create a parser. Increasing stack size increases memory usage but also
* increases the depth of nested json accepted. `callbacks` and `userdata` must * increases the depth of nested json accepted. `callbacks` and `userdata` must
* outlive the returned parser. Returns null if there's insufficient available * outlive the returned parser. Returns null if there's insufficient available
* memory */ * memory */
WeaselJsonParser *WeaselJsonParser_create(int stackSize, WeaselJsonParser *WeaselJsonParser_create(int stackSize,
const WeaselJsonCallbacks *callbacks, const WeaselJsonCallbacks *callbacks,
void *userdata); void *userdata, int flags);
/** Restore the parser to its newly-created state */ /** Restore the parser to its newly-created state */
void WeaselJsonParser_reset(WeaselJsonParser *parser); void WeaselJsonParser_reset(WeaselJsonParser *parser);

View File

@@ -10,7 +10,9 @@ std::pair<std::string, WeaselJsonStatus> runStreaming(std::string copy,
SerializeState state; SerializeState state;
auto c = serializeCallbacks(); auto c = serializeCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{ std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; WeaselJsonParser_create(1024, &c, &state,
copy.size() % 2 == 0 ? WeaselJsonRaw : 0),
WeaselJsonParser_destroy};
if (stride == 0) { if (stride == 0) {
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()); auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (s != WeaselJson_AGAIN) { if (s != WeaselJson_AGAIN) {
@@ -33,7 +35,9 @@ std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
SerializeState state; SerializeState state;
auto c = serializeCallbacks(); auto c = serializeCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{ std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; WeaselJsonParser_create(1024, &c, &state,
copy.size() % 2 == 0 ? WeaselJsonRaw : 0),
WeaselJsonParser_destroy};
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()); auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (s != WeaselJson_AGAIN) { if (s != WeaselJson_AGAIN) {
return {state.result, s}; return {state.result, s};
@@ -47,7 +51,9 @@ std::pair<std::string, WeaselJsonStatus> runPrefix(std::string copy,
SerializeState state; SerializeState state;
auto c = serializeCallbacks(); auto c = serializeCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{ std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; WeaselJsonParser_create(1024, &c, &state,
copy.size() % 2 == 0 ? WeaselJsonRaw : 0),
WeaselJsonParser_destroy};
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), prefix); auto s = WeaselJsonParser_parse(parser.get(), copy.data(), prefix);
if (s != WeaselJson_AGAIN) { if (s != WeaselJson_AGAIN) {
return {state.result, s}; return {state.result, s};
@@ -116,7 +122,8 @@ void compareWithSimdjson(std::string const &json) {
auto copy = json; auto copy = json;
auto c = noopCallbacks(); auto c = noopCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)>
parser{WeaselJsonParser_create(1024, &c, nullptr), parser{WeaselJsonParser_create(
1024, &c, nullptr, json.size() % 2 == 0 ? WeaselJsonRaw : 0),
WeaselJsonParser_destroy}; WeaselJsonParser_destroy};
ours = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()); ours = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (ours == WeaselJson_AGAIN) { if (ours == WeaselJson_AGAIN) {

View File

@@ -196,7 +196,7 @@ inline std::optional<JsonValue> toValue(std::string copy, int stride) {
ReadValueState state; ReadValueState state;
auto c = readValueCallbacks(); auto c = readValueCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{ std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; WeaselJsonParser_create(1024, &c, &state, 0), WeaselJsonParser_destroy};
if (stride == 0) { if (stride == 0) {
if (WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()) != if (WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()) !=
WeaselJson_AGAIN) { WeaselJson_AGAIN) {

View File

@@ -7,12 +7,13 @@ extern "C" {
__attribute__((visibility("default"))) WeaselJsonParser * __attribute__((visibility("default"))) WeaselJsonParser *
WeaselJsonParser_create(int stackSize, const WeaselJsonCallbacks *callbacks, WeaselJsonParser_create(int stackSize, const WeaselJsonCallbacks *callbacks,
void *userdata) { void *userdata, int flags) {
auto *buf = malloc(sizeof(Parser3) + stackSize * sizeof(*Parser3::stackPtr)); auto *buf = malloc(sizeof(Parser3) + stackSize * sizeof(*Parser3::stackPtr));
if (buf == nullptr) { if (buf == nullptr) {
return nullptr; return nullptr;
} }
return (WeaselJsonParser *)new (buf) Parser3{callbacks, userdata, stackSize}; return (WeaselJsonParser *)new (buf)
Parser3{callbacks, userdata, stackSize, flags};
} }
__attribute__((visibility("default"))) void __attribute__((visibility("default"))) void

View File

@@ -64,9 +64,10 @@ enum Symbol : uint8_t {
N_SYMBOL_COUNT, // Must be last N_SYMBOL_COUNT, // Must be last
}; };
struct Parser3 { struct Parser3 {
Parser3(const WeaselJsonCallbacks *callbacks, void *userdata, int stackSize) Parser3(const WeaselJsonCallbacks *callbacks, void *userdata, int stackSize,
: callbacks(callbacks), userdata(userdata), int flags)
stackEnd(stack() + stackSize) { : callbacks(callbacks), userdata(userdata), stackEnd(stack() + stackSize),
flags(flags) {
reset(); reset();
} }
@@ -80,8 +81,13 @@ struct Parser3 {
} }
} }
void flushString(bool done) { void flushString(bool done, char *buf) {
int len = writeBuf - dataBegin; int len;
if (!(flags & WeaselJsonRaw)) {
len = writeBuf - dataBegin;
} else {
len = buf - dataBegin;
}
assert(len >= 0); assert(len >= 0);
if (done || len > 0) { if (done || len > 0) {
callbacks->on_string_data(userdata, dataBegin, len, done); callbacks->on_string_data(userdata, dataBegin, len, done);
@@ -129,6 +135,7 @@ struct Parser3 {
void *const userdata; void *const userdata;
Symbol *stackPtr; Symbol *stackPtr;
Symbol *const stackEnd; Symbol *const stackEnd;
int const flags;
uint32_t utf8Codepoint; uint32_t utf8Codepoint;
uint32_t utf16Surrogate; uint32_t utf16Surrogate;
uint32_t minCodepoint; uint32_t minCodepoint;
@@ -213,13 +220,15 @@ inline PRESERVE_NONE WeaselJsonStatus scan_string_impl(Parser3 *self,
buf = (char *)self->strDfa.scan(buf, bufEnd); buf = (char *)self->strDfa.scan(buf, bufEnd);
int len = buf - before; int len = buf - before;
if (self->writeBuf != before) { if (!(self->flags & WeaselJsonRaw)) {
memmove(self->writeBuf, before, len); if (self->writeBuf != before) {
memmove(self->writeBuf, before, len);
}
self->writeBuf += len;
} }
self->writeBuf += len;
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
@@ -531,7 +540,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
} }
switch (*buf) { switch (*buf) {
case '"': case '"':
self->flushString(true); self->flushString(true, buf);
++buf; ++buf;
self->pop(); self->pop();
if (buf == bufEnd) { if (buf == bufEnd) {
@@ -545,7 +554,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
return s; return s;
} }
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
@@ -571,42 +580,49 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
return WeaselJson_REJECT; return WeaselJson_REJECT;
} }
buf += 6; buf += 6;
assert(codepoint <= 0x10ffff); if (!(self->flags & WeaselJsonRaw)) {
self->writeBuf[3] = (0b00111111 & codepoint) | 0b10000000; assert(codepoint <= 0x10ffff);
codepoint >>= 6; self->writeBuf[3] = (0b00111111 & codepoint) | 0b10000000;
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6;
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6;
self->writeBuf[0] = (0b00000111 & codepoint) | 0b11110000;
self->writeBuf += 4;
} else {
if (codepoint < 0x80) {
*self->writeBuf++ = codepoint;
} else if (codepoint < 0x800) {
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6; codepoint >>= 6;
self->writeBuf[0] = (0b00011111 & codepoint) | 0b11000000;
self->writeBuf += 2;
} else {
assert(codepoint < 0x10000);
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000; self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6; codepoint >>= 6;
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000; self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6; codepoint >>= 6;
self->writeBuf[0] = (0b00001111 & codepoint) | 0b11100000; self->writeBuf[0] = (0b00000111 & codepoint) | 0b11110000;
self->writeBuf += 3; self->writeBuf += 4;
}
} else {
if (!(self->flags & WeaselJsonRaw)) {
if (codepoint < 0x80) {
*self->writeBuf++ = codepoint;
} else if (codepoint < 0x800) {
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6;
self->writeBuf[0] = (0b00011111 & codepoint) | 0b11000000;
self->writeBuf += 2;
} else {
assert(codepoint < 0x10000);
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6;
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
codepoint >>= 6;
self->writeBuf[0] = (0b00001111 & codepoint) | 0b11100000;
self->writeBuf += 3;
}
} }
} }
} else { } else {
auto unescaped = tables.unescape[uint8_t(*buf++)]; auto unescaped = tables.unescape[uint8_t(*buf)];
if (unescaped == 0) [[unlikely]] { if (unescaped == 0) [[unlikely]] {
return WeaselJson_REJECT; return WeaselJson_REJECT;
} }
*self->writeBuf++ = unescaped; if (!(self->flags & WeaselJsonRaw)) {
*self->writeBuf++ = unescaped;
}
++buf;
} }
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return n_string2(self, buf, bufEnd); MUSTTAIL return n_string2(self, buf, bufEnd);
@@ -632,7 +648,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
case 'n': case 'n':
case 'r': case 'r':
case 't': case 't':
*self->writeBuf++ = tables.unescape[uint8_t(*buf++)]; if (!(self->flags & WeaselJsonRaw)) {
*self->writeBuf++ = tables.unescape[uint8_t(*buf)];
}
++buf;
self->pop(); self->pop();
break; break;
case 'u': case 'u':
@@ -647,7 +666,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
[[unlikely]] return WeaselJson_REJECT; [[unlikely]] return WeaselJson_REJECT;
} }
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
@@ -667,7 +686,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf,
++buf; ++buf;
self->pop(); self->pop();
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
@@ -690,21 +709,25 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
// there's not room, flush, write into a temp buffer, and flush again. // there's not room, flush, write into a temp buffer, and flush again.
char tmp[3]; char tmp[3];
if (self->utf8Codepoint < 0x80) { if (self->utf8Codepoint < 0x80) {
assert(buf - self->writeBuf >= 1); if (!(self->flags & WeaselJsonRaw)) {
*self->writeBuf++ = self->utf8Codepoint; assert(buf - self->writeBuf >= 1);
} else if (self->utf8Codepoint < 0x800) { *self->writeBuf++ = self->utf8Codepoint;
bool useTmp = buf - self->writeBuf < 2;
char *p = tmp;
if (useTmp) [[unlikely]] {
self->flushString(false);
} }
auto &w = useTmp ? p : self->writeBuf; } else if (self->utf8Codepoint < 0x800) {
w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; if (!(self->flags & WeaselJsonRaw)) {
self->utf8Codepoint >>= 6; bool useTmp = buf - self->writeBuf < 2;
w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000; char *p = tmp;
w += 2; if (useTmp) [[unlikely]] {
if (useTmp) [[unlikely]] { self->flushString(false, buf);
self->callbacks->on_string_data(self->userdata, tmp, 2, false); }
auto &w = useTmp ? p : self->writeBuf;
w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
self->utf8Codepoint >>= 6;
w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000;
w += 2;
if (useTmp) [[unlikely]] {
self->callbacks->on_string_data(self->userdata, tmp, 2, false);
}
} }
} else { } else {
assert(self->utf8Codepoint < 0x10000); assert(self->utf8Codepoint < 0x10000);
@@ -718,31 +741,33 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
return s; return s;
} }
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
} }
bool useTmp = buf - self->writeBuf < 3; if (!(self->flags & WeaselJsonRaw)) {
char *p = tmp; bool useTmp = buf - self->writeBuf < 3;
if (useTmp) [[unlikely]] { char *p = tmp;
self->flushString(false); if (useTmp) [[unlikely]] {
} self->flushString(false, buf);
auto &w = useTmp ? p : self->writeBuf; }
w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000; auto &w = useTmp ? p : self->writeBuf;
self->utf8Codepoint >>= 6; w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6;
self->utf8Codepoint >>= 6; w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000; self->utf8Codepoint >>= 6;
w += 3; w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000;
if (useTmp) [[unlikely]] { w += 3;
self->callbacks->on_string_data(self->userdata, tmp, 3, false); if (useTmp) [[unlikely]] {
self->callbacks->on_string_data(self->userdata, tmp, 3, false);
}
} }
} }
self->pop(); self->pop();
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
@@ -777,27 +802,29 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf,
if (self->utf8Codepoint > 0x10FFFF) [[unlikely]] { if (self->utf8Codepoint > 0x10FFFF) [[unlikely]] {
return WeaselJson_REJECT; return WeaselJson_REJECT;
} }
bool useTmp = buf - self->writeBuf < 4; if (!(self->flags & WeaselJsonRaw)) {
char *p = tmp; bool useTmp = buf - self->writeBuf < 4;
if (useTmp) [[unlikely]] { char *p = tmp;
self->flushString(false); if (useTmp) [[unlikely]] {
} self->flushString(false, buf);
auto &w = useTmp ? p : self->writeBuf; }
w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000; auto &w = useTmp ? p : self->writeBuf;
self->utf8Codepoint >>= 6; w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6;
self->utf8Codepoint >>= 6; w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; self->utf8Codepoint >>= 6;
self->utf8Codepoint >>= 6; w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000; self->utf8Codepoint >>= 6;
w += 4; w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000;
if (useTmp) [[unlikely]] { w += 4;
self->callbacks->on_string_data(self->userdata, tmp, 4, false); if (useTmp) [[unlikely]] {
self->callbacks->on_string_data(self->userdata, tmp, 4, false);
}
} }
self->pop(); self->pop();
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
@@ -814,7 +841,7 @@ inline PRESERVE_NONE WeaselJsonStatus singleCharInString(Parser3 *self,
++buf; ++buf;
self->pop(); self->pop();
if (buf == bufEnd) { if (buf == bufEnd) {
self->flushString(false); self->flushString(false, buf);
return WeaselJson_AGAIN; return WeaselJson_AGAIN;
} }
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);

View File

@@ -133,7 +133,7 @@ void testStreaming(std::string const &json) {
auto c = serializeCallbacks(); auto c = serializeCallbacks();
{ {
auto copy = json; auto copy = json;
auto *parser = WeaselJsonParser_create(1024, &c, &streaming); auto *parser = WeaselJsonParser_create(1024, &c, &streaming, 0);
for (size_t i = 0; i < copy.size(); ++i) { for (size_t i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) == REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN); WeaselJson_AGAIN);
@@ -143,7 +143,7 @@ void testStreaming(std::string const &json) {
} }
{ {
auto copy = json; auto copy = json;
auto *parser = WeaselJsonParser_create(1024, &c, &batch); auto *parser = WeaselJsonParser_create(1024, &c, &batch, 0);
REQUIRE(WeaselJsonParser_parse(parser, copy.data(), copy.size()) == REQUIRE(WeaselJsonParser_parse(parser, copy.data(), copy.size()) ==
WeaselJson_AGAIN); WeaselJson_AGAIN);
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK); REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
@@ -159,7 +159,7 @@ TEST_CASE("parser3") {
SerializeState state; SerializeState state;
{ {
auto copy = json; auto copy = json;
auto *parser = WeaselJsonParser_create(1024, &c, &state); auto *parser = WeaselJsonParser_create(1024, &c, &state, 0);
for (size_t i = 0; i < copy.size(); ++i) { for (size_t i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) == REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN); WeaselJson_AGAIN);
@@ -169,7 +169,7 @@ TEST_CASE("parser3") {
} }
{ {
std::string copy = "{\"x\": [], \"y\": {}}"; std::string copy = "{\"x\": [], \"y\": {}}";
auto *parser = WeaselJsonParser_create(1024, &c, &state); auto *parser = WeaselJsonParser_create(1024, &c, &state, 0);
for (size_t i = 0; i < copy.size(); ++i) { for (size_t i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) == REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN); WeaselJson_AGAIN);
@@ -181,7 +181,7 @@ TEST_CASE("parser3") {
{ {
auto c = noopCallbacks(); auto c = noopCallbacks();
std::string copy = "{\"a\":\"a"; std::string copy = "{\"a\":\"a";
auto *parser = WeaselJsonParser_create(1024, &c, &state); auto *parser = WeaselJsonParser_create(1024, &c, &state, 0);
for (size_t i = 0; i < copy.size(); ++i) { for (size_t i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) == REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN); WeaselJson_AGAIN);
@@ -192,7 +192,7 @@ TEST_CASE("parser3") {
{ {
auto c = noopCallbacks(); auto c = noopCallbacks();
std::string copy = "["; std::string copy = "[";
auto *parser = WeaselJsonParser_create(1024, &c, &state); auto *parser = WeaselJsonParser_create(1024, &c, &state, 0);
for (size_t i = 0; i < copy.size(); ++i) { for (size_t i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) == REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN); WeaselJson_AGAIN);
@@ -205,7 +205,7 @@ TEST_CASE("parser3") {
TEST_CASE("streaming") { testStreaming(json); } TEST_CASE("streaming") { testStreaming(json); }
void doTestUnescapingUtf8(std::string const &escaped, void doTestUnescapingUtf8(std::string const &escaped,
std::string const &expected, int stride) { std::string const &expected, int stride, int flags) {
CAPTURE(escaped); CAPTURE(escaped);
CAPTURE(expected); CAPTURE(expected);
CAPTURE(stride); CAPTURE(stride);
@@ -215,7 +215,7 @@ void doTestUnescapingUtf8(std::string const &escaped,
auto &s = *(std::string *)p; auto &s = *(std::string *)p;
s.append(buf, len); s.append(buf, len);
}; };
auto *parser = WeaselJsonParser_create(1024, &c, &result); auto *parser = WeaselJsonParser_create(1024, &c, &result, flags);
auto copy = escaped; auto copy = escaped;
for (size_t i = 0; i < copy.size(); i += stride) { for (size_t i = 0; i < copy.size(); i += stride) {
CAPTURE(i); CAPTURE(i);
@@ -233,8 +233,11 @@ void testUnescapingUtf8(std::string const &escaped,
std::string const &expected) { std::string const &expected) {
for (int stride = 0; stride < 10; ++stride) { for (int stride = 0; stride < 10; ++stride) {
doTestUnescapingUtf8(escaped, expected, doTestUnescapingUtf8(escaped, expected,
stride == 0 ? std::numeric_limits<int>::max() stride == 0 ? std::numeric_limits<int>::max() : stride,
: stride); 0);
doTestUnescapingUtf8(
escaped, escaped.substr(1).substr(0, escaped.size() - 2),
stride == 0 ? std::numeric_limits<int>::max() : stride, WeaselJsonRaw);
} }
} }
@@ -262,7 +265,7 @@ TEST_CASE("bench3") {
ankerl::nanobench::Bench bench; ankerl::nanobench::Bench bench;
bench.batch(json.size()); bench.batch(json.size());
bench.unit("byte"); bench.unit("byte");
auto *parser = WeaselJsonParser_create(1024, &c, nullptr); auto *parser = WeaselJsonParser_create(1024, &c, nullptr, 0);
for (size_t stride = 128; stride <= json.size(); stride *= 2) { for (size_t stride = 128; stride <= json.size(); stride *= 2) {
bench.run("parser3 (stride: " + std::to_string(stride) + ")", [&]() { bench.run("parser3 (stride: " + std::to_string(stride) + ")", [&]() {
auto copy = json; auto copy = json;
@@ -376,19 +379,36 @@ TEST_CASE("bench input types") {
bench.doNotOptimizeAway(doc); bench.doNotOptimizeAway(doc);
}); });
auto *parser = WeaselJsonParser_create(1024, &c, nullptr); {
bench.run("parser3 " + name, [&]() { auto *parser = WeaselJsonParser_create(1024, &c, nullptr, 0);
auto copy = json; bench.run("parser3 " + name, [&]() {
WeaselJsonParser_reset(parser); auto copy = json;
if (WeaselJsonParser_parse(parser, copy.data(), copy.size()) != WeaselJsonParser_reset(parser);
WeaselJson_AGAIN) { if (WeaselJsonParser_parse(parser, copy.data(), copy.size()) !=
abort(); WeaselJson_AGAIN) {
} abort();
if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) { }
abort(); if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) {
} abort();
}); }
WeaselJsonParser_destroy(parser); });
WeaselJsonParser_destroy(parser);
}
{
auto *parser = WeaselJsonParser_create(1024, &c, nullptr, WeaselJsonRaw);
bench.run("parser3 (raw) " + name, [&]() {
auto copy = json;
WeaselJsonParser_reset(parser);
if (WeaselJsonParser_parse(parser, copy.data(), copy.size()) !=
WeaselJson_AGAIN) {
abort();
}
if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) {
abort();
}
});
WeaselJsonParser_destroy(parser);
}
}; };
bench("numbers", "[-123456789.000000000000000123456789e+12, " bench("numbers", "[-123456789.000000000000000123456789e+12, "

View File

@@ -17,7 +17,7 @@ int main(int argc, char **argv) {
} }
auto c = noopCallbacks(); auto c = noopCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{ std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, nullptr), WeaselJsonParser_destroy}; WeaselJsonParser_create(1024, &c, nullptr, 0), WeaselJsonParser_destroy};
for (;;) { for (;;) {
char buf[1024]; char buf[1024];
int l = read(fd, buf, sizeof(buf)); int l = read(fd, buf, sizeof(buf));

View File

@@ -92,6 +92,7 @@ class WeaselJsonParser:
ctypes.c_int, ctypes.c_int,
ctypes.POINTER(WeaselJsonCallbacks), ctypes.POINTER(WeaselJsonCallbacks),
ctypes.c_void_p, ctypes.c_void_p,
ctypes.c_int,
) )
self._lib.WeaselJsonParser_create.restype = ctypes.c_void_p self._lib.WeaselJsonParser_create.restype = ctypes.c_void_p
self._lib.WeaselJsonParser_reset.argtypes = (ctypes.c_void_p,) self._lib.WeaselJsonParser_reset.argtypes = (ctypes.c_void_p,)
@@ -110,6 +111,7 @@ class WeaselJsonParser:
stackSize, stackSize,
c_callbacks, c_callbacks,
self.voidp_callbacks, self.voidp_callbacks,
0,
) )
def parse(self, data: bytes) -> WeaselJsonStatus: def parse(self, data: bytes) -> WeaselJsonStatus: