From f6cd807da3c4eb7743c90ae24a400d40b980eae0 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Sun, 25 May 2025 21:01:37 -0400 Subject: [PATCH] Remove on_{begin,end}_{string,number} And add `done` arg to data callback --- include/weaseljson.h | 35 +++++++++++------------ src/callbacks.h | 57 +++++++++++++++++--------------------- src/json_value.h | 37 +++++++++++++------------ src/parser3.h | 52 +++++++++++++++------------------- src/test.cpp | 2 +- weaseljson.py | 66 ++++++++------------------------------------ 6 files changed, 95 insertions(+), 154 deletions(-) diff --git a/include/weaseljson.h b/include/weaseljson.h index ea7b70c..1442376 100644 --- a/include/weaseljson.h +++ b/include/weaseljson.h @@ -6,23 +6,20 @@ extern "C" { #endif struct WeaselJsonCallbacks { - void (*on_begin_object)(void *data); - void (*on_end_object)(void *data); - void (*on_begin_string)(void *data); - /** May be called multiple times per string if not all string data is - * available yet. The string data provided is unescaped. */ - void (*on_string_data)(void *data, const char *buf, int len); - void (*on_end_string)(void *data); - void (*on_begin_array)(void *data); - void (*on_end_array)(void *data); - void (*on_begin_number)(void *data); - /** May be called multiple times per number if not all number data is - * available yet */ - void (*on_number_data)(void *data, const char *buf, int len); - void (*on_end_number)(void *data); - void (*on_true_literal)(void *data); - void (*on_false_literal)(void *data); - void (*on_null_literal)(void *data); + void (*on_begin_object)(void *userdata); + void (*on_end_object)(void *userdata); + /** The string data provided has already been unescaped. If `done` is false, + * this string may be incomplete and there will be another call with more data + */ + void (*on_string_data)(void *userdata, const char *buf, int len, int done); + void (*on_begin_array)(void *userdata); + void (*on_end_array)(void *userdata); + /*If `done` is false, this number may be incomplete and there will be another + * call with more data*/ + void (*on_number_data)(void *userdata, const char *buf, int len, int done); + void (*on_true_literal)(void *userdata); + void (*on_false_literal)(void *userdata); + void (*on_null_literal)(void *userdata); }; enum WeaselJsonStatus { @@ -40,12 +37,12 @@ enum WeaselJsonStatus { typedef struct WeaselJsonParser WeaselJsonParser; /** Create a parser. Increasing stack size increases memory usage but also - * increases the depth of nested json accepted. `callbacks` and `data` must + * increases the depth of nested json accepted. `callbacks` and `userdata` must * outlive the returned parser. Returns null if there's insufficient available * memory */ WeaselJsonParser *WeaselJsonParser_create(int stackSize, const WeaselJsonCallbacks *callbacks, - void *data); + void *userdata); /** Restore the parser to its newly-created state */ void WeaselJsonParser_reset(WeaselJsonParser *parser); diff --git a/src/callbacks.h b/src/callbacks.h index a00a334..cb9cd34 100644 --- a/src/callbacks.h +++ b/src/callbacks.h @@ -11,18 +11,14 @@ inline WeaselJsonCallbacks printCallbacks() { WeaselJsonCallbacks result; result.on_begin_object = +[](void *) { puts("on_begin_object"); }; result.on_end_object = +[](void *) { puts("on_end_object"); }; - result.on_begin_string = +[](void *) { puts("on_begin_string"); }; - result.on_string_data = +[](void *, const char *buf, int len) { + result.on_string_data = +[](void *, const char *buf, int len, int /*done*/) { printf("on_string_data `%.*s`\n", len, buf); }; - result.on_end_string = +[](void *) { puts("on_end_string"); }; result.on_begin_array = +[](void *) { puts("on_begin_array"); }; result.on_end_array = +[](void *) { puts("on_end_array"); }; - result.on_begin_number = +[](void *) { puts("on_begin_number"); }; - result.on_number_data = +[](void *, const char *buf, int len) { + result.on_number_data = +[](void *, const char *buf, int len, int /*done*/) { printf("on_number_data `%.*s`\n", len, buf); }; - result.on_end_number = +[](void *) { puts("on_end_number"); }; result.on_true_literal = +[](void *) { puts("on_true_literal"); }; result.on_false_literal = +[](void *) { puts("on_false_literal"); }; result.on_null_literal = +[](void *) { puts("on_null_literal"); }; @@ -33,14 +29,10 @@ inline WeaselJsonCallbacks noopCallbacks() { WeaselJsonCallbacks result; result.on_begin_object = +[](void *) {}; result.on_end_object = +[](void *) {}; - result.on_begin_string = +[](void *) {}; - result.on_string_data = +[](void *, const char *, int) {}; - result.on_end_string = +[](void *) {}; + result.on_string_data = +[](void *, const char *, int, int) {}; result.on_begin_array = +[](void *) {}; result.on_end_array = +[](void *) {}; - result.on_begin_number = +[](void *) {}; - result.on_number_data = +[](void *, const char *, int) {}; - result.on_end_number = +[](void *) {}; + result.on_number_data = +[](void *, const char *, int, int) {}; result.on_true_literal = +[](void *) {}; result.on_false_literal = +[](void *) {}; result.on_null_literal = +[](void *) {}; @@ -70,6 +62,7 @@ struct SerializeState { } } std::vector stack; + bool startedData = false; }; inline WeaselJsonCallbacks serializeCallbacks() { @@ -85,18 +78,18 @@ inline WeaselJsonCallbacks serializeCallbacks() { state->stack.pop_back(); state->result.append("}"); }; - result.on_begin_string = +[](void *p) { - auto *state = (SerializeState *)p; - state->on_begin_value(); - state->result.append("<"); - }; - result.on_string_data = +[](void *p, const char *buf, int len) { + result.on_string_data = +[](void *p, const char *buf, int len, int done) { auto *state = (SerializeState *)p; + if (!state->startedData) { + state->startedData = true; + state->on_begin_value(); + state->result.append("<"); + } state->result.append(std::string(buf, len)); - }; - result.on_end_string = +[](void *p) { - auto *state = (SerializeState *)p; - state->result.append(">"); + if (done) { + state->startedData = false; + state->result.append(">"); + } }; result.on_begin_array = +[](void *p) { auto *state = (SerializeState *)p; @@ -109,18 +102,18 @@ inline WeaselJsonCallbacks serializeCallbacks() { state->stack.pop_back(); state->result.append("]"); }; - result.on_begin_number = +[](void *p) { - auto *state = (SerializeState *)p; - state->on_begin_value(); - state->result.append("("); - }; - result.on_number_data = +[](void *p, const char *buf, int len) { + result.on_number_data = +[](void *p, const char *buf, int len, int done) { auto *state = (SerializeState *)p; + if (!state->startedData) { + state->startedData = true; + state->on_begin_value(); + state->result.append("("); + } state->result.append(std::string(buf, len)); - }; - result.on_end_number = +[](void *p) { - auto *state = (SerializeState *)p; - state->result.append(")"); + if (done) { + state->startedData = false; + state->result.append(")"); + } }; result.on_true_literal = +[](void *p) { auto *state = (SerializeState *)p; diff --git a/src/json_value.h b/src/json_value.h index 8cb4782..496ed35 100644 --- a/src/json_value.h +++ b/src/json_value.h @@ -52,6 +52,7 @@ struct ReadValueState { return; } } + bool startedData = false; }; inline WeaselJsonCallbacks readValueCallbacks() { @@ -66,17 +67,17 @@ inline WeaselJsonCallbacks readValueCallbacks() { state->isKeyStack.pop_back(); state->on_end_value(); }; - result.on_begin_string = +[](void *p) { - auto *state = (ReadValueState *)p; - state->valueStack.emplace_back(std::string()); - }; - result.on_string_data = +[](void *p, const char *buf, int len) { + result.on_string_data = +[](void *p, const char *buf, int len, int done) { auto *state = (ReadValueState *)p; + if (!state->startedData) { + state->startedData = true; + state->valueStack.emplace_back(std::string()); + } std::get(state->valueStack.back()).append(buf, len); - }; - result.on_end_string = +[](void *p) { - auto *state = (ReadValueState *)p; - state->on_end_value(); + if (done) { + state->startedData = false; + state->on_end_value(); + } }; result.on_begin_array = +[](void *p) { auto *state = (ReadValueState *)p; @@ -86,17 +87,17 @@ inline WeaselJsonCallbacks readValueCallbacks() { auto *state = (ReadValueState *)p; state->on_end_value(); }; - result.on_begin_number = +[](void *p) { - auto *state = (ReadValueState *)p; - state->valueStack.emplace_back(JsonNumber()); - }; - result.on_number_data = +[](void *p, const char *buf, int len) { + result.on_number_data = +[](void *p, const char *buf, int len, int done) { auto *state = (ReadValueState *)p; + if (!state->startedData) { + state->startedData = true; + state->valueStack.emplace_back(JsonNumber()); + } std::get(state->valueStack.back()).append(buf, len); - }; - result.on_end_number = +[](void *p) { - auto *state = (ReadValueState *)p; - state->on_end_value(); + if (done) { + state->startedData = false; + state->on_end_value(); + } }; result.on_true_literal = +[](void *p) { auto *state = (ReadValueState *)p; diff --git a/src/parser3.h b/src/parser3.h index b7e8c04..394262f 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -80,17 +80,19 @@ struct Parser3 { return keepGoing(this); } - void flushNumber() { + void flushNumber(bool done) { int len = buf - dataBegin; - if (len > 0) { - callbacks->on_number_data(data, dataBegin, len); + assert(len >= 0); + if (done || len > 0) { + callbacks->on_number_data(data, dataBegin, len, done); } } - void flushString() { + void flushString(bool done) { int len = writeBuf - dataBegin; - if (len > 0) { - callbacks->on_string_data(data, dataBegin, len); + assert(len >= 0); + if (done || len > 0) { + callbacks->on_string_data(data, dataBegin, len, done); } dataBegin = writeBuf; } @@ -192,7 +194,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { } break; case '"': - self->callbacks->on_begin_string(self->data); ++self->buf; self->dataBegin = self->writeBuf = self->buf; self->pop(); @@ -205,7 +206,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { if (auto s = self->push({N_FRACTION, N_EXPONENT})) { return s; } - self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; ++self->buf; MUSTTAIL return Parser3::keepGoing(self); @@ -219,7 +219,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { case '8': case '9': self->pop(); - self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; ++self->buf; if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) { @@ -228,7 +227,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) { MUSTTAIL return Parser3::keepGoing(self); case '-': self->pop(); - self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; ++self->buf; if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) { @@ -304,7 +302,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self) { self->callbacks->on_end_object(self->data); MUSTTAIL return Parser3::keepGoing(self); case '"': - self->callbacks->on_begin_string(self->data); ++self->buf; self->dataBegin = self->writeBuf = self->buf; self->pop(); @@ -403,7 +400,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self) { if (*self->buf != '"') [[unlikely]] { return WeaselJson_REJECT; } - self->callbacks->on_begin_string(self->data); ++self->buf; self->dataBegin = self->writeBuf = self->buf; self->pop(); @@ -480,7 +476,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) { self->writeBuf += len; if (self->buf == self->bufEnd) { - self->flushString(); + self->flushString(false); return WeaselJson_AGAIN; } @@ -488,8 +484,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) { case Tables::NORMAL: __builtin_unreachable(); case Tables::DUBQUOTE: - self->flushString(); - self->callbacks->on_end_string(self->data); + self->flushString(true); ++self->buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self); @@ -660,7 +655,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { bool useTmp = self->buf - self->writeBuf < 2; char *p = tmp; if (useTmp) { - self->flushString(); + self->flushString(false); } auto &w = useTmp ? p : self->writeBuf; w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000; @@ -668,7 +663,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000; w += 2; if (useTmp) { - self->callbacks->on_string_data(self->data, tmp, 2); + self->callbacks->on_string_data(self->data, tmp, 2, false); } } else { assert(self->utf8Codepoint < 0x10000); @@ -686,7 +681,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { bool useTmp = self->buf - self->writeBuf < 3; char *p = tmp; if (useTmp) { - self->flushString(); + self->flushString(false); } auto &w = useTmp ? p : self->writeBuf; w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000; @@ -696,7 +691,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) { w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000; w += 3; if (useTmp) { - self->callbacks->on_string_data(self->data, tmp, 3); + self->callbacks->on_string_data(self->data, tmp, 3, false); } } @@ -736,7 +731,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) { bool useTmp = self->buf - self->writeBuf < 4; char *p = tmp; if (useTmp) { - self->flushString(); + self->flushString(false); } auto &w = useTmp ? p : self->writeBuf; w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000; @@ -748,7 +743,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) { w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000; w += 4; if (useTmp) { - self->callbacks->on_string_data(self->data, tmp, 4); + self->callbacks->on_string_data(self->data, tmp, 4, false); } self->pop(); @@ -756,7 +751,6 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) { } inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self) { - self->callbacks->on_begin_number(self->data); self->dataBegin = self->buf; switch (*self->buf) { case '0': @@ -882,7 +876,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self) { } } -// Responsible for ensuring that on_end_number gets called inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) { if (self->len() == 0) { self->pop(); @@ -899,8 +892,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) { MUSTTAIL return Parser3::keepGoing(self); default: self->pop(); - self->flushNumber(); - self->callbacks->on_end_number(self->data); + self->flushNumber(true); MUSTTAIL return Parser3::keepGoing(self); } } @@ -984,8 +976,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self) { inline PRESERVE_NONE WeaselJsonStatus t_end_number(Parser3 *self) { self->pop(); - self->flushNumber(); - self->callbacks->on_end_number(self->data); + self->flushNumber(true); MUSTTAIL return Parser3::keepGoing(self); } @@ -1098,9 +1089,8 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) { case T_DIGIT: case T_ONENINE: case T_END_NUMBER: - self->flushNumber(); + self->flushNumber(false); break; - case N_STRING: case N_STRING2: case N_STRING_FOLLOWING_ESCAPE: case T_UTF8_CONTINUATION_BYTE: @@ -1110,8 +1100,10 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) { case T_HEX3: case T_BACKSLASH: case T_U2: - self->flushString(); + self->flushString(false); break; + case N_STRING: // The beginning of the string is in the future in this + // state. There's no data to flush yet case N_VALUE: case N_OBJECT2: case N_OBJECT3: diff --git a/src/test.cpp b/src/test.cpp index 3a41931..3c40df7 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -230,7 +230,7 @@ void doTestUnescapingUtf8(std::string const &escaped, CAPTURE(stride); auto c = noopCallbacks(); std::string result; - c.on_string_data = +[](void *p, const char *buf, int len) { + c.on_string_data = +[](void *p, const char *buf, int len, int /*done*/) { auto &s = *(std::string *)p; s.append(buf, len); }; diff --git a/weaseljson.py b/weaseljson.py index 4b20012..b792083 100644 --- a/weaseljson.py +++ b/weaseljson.py @@ -5,21 +5,19 @@ import os from typing import Optional event_callback = ctypes.CFUNCTYPE(None, ctypes.c_void_p) -data_callback = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int) +data_callback = ctypes.CFUNCTYPE( + None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int +) class WeaselJsonCallbacks(ctypes.Structure): _fields_ = [ ("on_begin_object", event_callback), ("on_end_object", event_callback), - ("on_begin_string", event_callback), ("on_string_data", data_callback), - ("on_end_string", event_callback), ("on_begin_array", event_callback), ("on_end_array", event_callback), - ("on_begin_number", event_callback), ("on_number_data", data_callback), - ("on_end_number", event_callback), ("on_true_literal", event_callback), ("on_false_literal", event_callback), ("on_null_literal", event_callback), @@ -40,13 +38,7 @@ class WeaselJsonCallbacksBase: def on_end_object(self): pass - def on_begin_string(self): - pass - - def on_string_data(self, data): - pass - - def on_end_string(self): + def on_string_data(self, data, done): pass def on_begin_array(self): @@ -55,13 +47,7 @@ class WeaselJsonCallbacksBase: def on_end_array(self): pass - def on_begin_number(self): - pass - - def on_number_data(self, data): - pass - - def on_end_number(self): + def on_number_data(self, data, done): pass def on_true_literal(self): @@ -155,22 +141,10 @@ def on_end_object(p): self.on_end_object() -@ctypes.CFUNCTYPE(None, ctypes.c_void_p) -def on_begin_string(p): +@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int) +def on_string_data(p, buf, len, done): self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value - self.on_begin_string() - - -@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int) -def on_string_data(p, buf, len): - self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value - self.on_string_data(bytes(ctypes.string_at(buf, len))) - - -@ctypes.CFUNCTYPE(None, ctypes.c_void_p) -def on_end_string(p): - self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value - self.on_end_string() + self.on_string_data(bytes(ctypes.string_at(buf, len)), bool(done)) @ctypes.CFUNCTYPE(None, ctypes.c_void_p) @@ -185,22 +159,10 @@ def on_end_array(p): self.on_end_array() -@ctypes.CFUNCTYPE(None, ctypes.c_void_p) -def on_begin_number(p): +@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int) +def on_number_data(p, buf, len, done): self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value - self.on_begin_number() - - -@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int) -def on_number_data(p, buf, len): - self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value - self.on_number_data(bytes(ctypes.string_at(buf, len))) - - -@ctypes.CFUNCTYPE(None, ctypes.c_void_p) -def on_end_number(p): - self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value - self.on_end_number() + self.on_number_data(bytes(ctypes.string_at(buf, len)), bool(done)) @ctypes.CFUNCTYPE(None, ctypes.c_void_p) @@ -224,14 +186,10 @@ def on_null_literal(p): c_callbacks = WeaselJsonCallbacks( on_begin_object, on_end_object, - on_begin_string, on_string_data, - on_end_string, on_begin_array, on_end_array, - on_begin_number, on_number_data, - on_end_number, on_true_literal, on_false_literal, on_null_literal, @@ -240,7 +198,7 @@ c_callbacks = WeaselJsonCallbacks( class MyCallbacks(WeaselJsonCallbacksBase): # override callbacks - def on_string_data(self, data): + def on_string_data(self, data, done): print(data)