Remove on_{begin,end}_{string,number}

And add `done` arg to data callback
This commit is contained in:
2025-05-25 21:01:37 -04:00
parent f92b33eec3
commit f6cd807da3
6 changed files with 95 additions and 154 deletions

View File

@@ -6,23 +6,20 @@ extern "C" {
#endif
struct WeaselJsonCallbacks {
void (*on_begin_object)(void *data);
void (*on_end_object)(void *data);
void (*on_begin_string)(void *data);
/** May be called multiple times per string if not all string data is
* available yet. The string data provided is unescaped. */
void (*on_string_data)(void *data, const char *buf, int len);
void (*on_end_string)(void *data);
void (*on_begin_array)(void *data);
void (*on_end_array)(void *data);
void (*on_begin_number)(void *data);
/** May be called multiple times per number if not all number data is
* available yet */
void (*on_number_data)(void *data, const char *buf, int len);
void (*on_end_number)(void *data);
void (*on_true_literal)(void *data);
void (*on_false_literal)(void *data);
void (*on_null_literal)(void *data);
void (*on_begin_object)(void *userdata);
void (*on_end_object)(void *userdata);
/** The string data provided has already been unescaped. If `done` is false,
* this string may be incomplete and there will be another call with more data
*/
void (*on_string_data)(void *userdata, const char *buf, int len, int done);
void (*on_begin_array)(void *userdata);
void (*on_end_array)(void *userdata);
/*If `done` is false, this number may be incomplete and there will be another
* call with more data*/
void (*on_number_data)(void *userdata, const char *buf, int len, int done);
void (*on_true_literal)(void *userdata);
void (*on_false_literal)(void *userdata);
void (*on_null_literal)(void *userdata);
};
enum WeaselJsonStatus {
@@ -40,12 +37,12 @@ enum WeaselJsonStatus {
typedef struct WeaselJsonParser WeaselJsonParser;
/** Create a parser. Increasing stack size increases memory usage but also
* increases the depth of nested json accepted. `callbacks` and `data` must
* increases the depth of nested json accepted. `callbacks` and `userdata` must
* outlive the returned parser. Returns null if there's insufficient available
* memory */
WeaselJsonParser *WeaselJsonParser_create(int stackSize,
const WeaselJsonCallbacks *callbacks,
void *data);
void *userdata);
/** Restore the parser to its newly-created state */
void WeaselJsonParser_reset(WeaselJsonParser *parser);

View File

@@ -11,18 +11,14 @@ inline WeaselJsonCallbacks printCallbacks() {
WeaselJsonCallbacks result;
result.on_begin_object = +[](void *) { puts("on_begin_object"); };
result.on_end_object = +[](void *) { puts("on_end_object"); };
result.on_begin_string = +[](void *) { puts("on_begin_string"); };
result.on_string_data = +[](void *, const char *buf, int len) {
result.on_string_data = +[](void *, const char *buf, int len, int /*done*/) {
printf("on_string_data `%.*s`\n", len, buf);
};
result.on_end_string = +[](void *) { puts("on_end_string"); };
result.on_begin_array = +[](void *) { puts("on_begin_array"); };
result.on_end_array = +[](void *) { puts("on_end_array"); };
result.on_begin_number = +[](void *) { puts("on_begin_number"); };
result.on_number_data = +[](void *, const char *buf, int len) {
result.on_number_data = +[](void *, const char *buf, int len, int /*done*/) {
printf("on_number_data `%.*s`\n", len, buf);
};
result.on_end_number = +[](void *) { puts("on_end_number"); };
result.on_true_literal = +[](void *) { puts("on_true_literal"); };
result.on_false_literal = +[](void *) { puts("on_false_literal"); };
result.on_null_literal = +[](void *) { puts("on_null_literal"); };
@@ -33,14 +29,10 @@ inline WeaselJsonCallbacks noopCallbacks() {
WeaselJsonCallbacks result;
result.on_begin_object = +[](void *) {};
result.on_end_object = +[](void *) {};
result.on_begin_string = +[](void *) {};
result.on_string_data = +[](void *, const char *, int) {};
result.on_end_string = +[](void *) {};
result.on_string_data = +[](void *, const char *, int, int) {};
result.on_begin_array = +[](void *) {};
result.on_end_array = +[](void *) {};
result.on_begin_number = +[](void *) {};
result.on_number_data = +[](void *, const char *, int) {};
result.on_end_number = +[](void *) {};
result.on_number_data = +[](void *, const char *, int, int) {};
result.on_true_literal = +[](void *) {};
result.on_false_literal = +[](void *) {};
result.on_null_literal = +[](void *) {};
@@ -70,6 +62,7 @@ struct SerializeState {
}
}
std::vector<Cursor> stack;
bool startedData = false;
};
inline WeaselJsonCallbacks serializeCallbacks() {
@@ -85,18 +78,18 @@ inline WeaselJsonCallbacks serializeCallbacks() {
state->stack.pop_back();
state->result.append("}");
};
result.on_begin_string = +[](void *p) {
result.on_string_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
if (!state->startedData) {
state->startedData = true;
state->on_begin_value();
state->result.append("<");
};
result.on_string_data = +[](void *p, const char *buf, int len) {
auto *state = (SerializeState *)p;
}
state->result.append(std::string(buf, len));
};
result.on_end_string = +[](void *p) {
auto *state = (SerializeState *)p;
if (done) {
state->startedData = false;
state->result.append(">");
}
};
result.on_begin_array = +[](void *p) {
auto *state = (SerializeState *)p;
@@ -109,18 +102,18 @@ inline WeaselJsonCallbacks serializeCallbacks() {
state->stack.pop_back();
state->result.append("]");
};
result.on_begin_number = +[](void *p) {
result.on_number_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
if (!state->startedData) {
state->startedData = true;
state->on_begin_value();
state->result.append("(");
};
result.on_number_data = +[](void *p, const char *buf, int len) {
auto *state = (SerializeState *)p;
}
state->result.append(std::string(buf, len));
};
result.on_end_number = +[](void *p) {
auto *state = (SerializeState *)p;
if (done) {
state->startedData = false;
state->result.append(")");
}
};
result.on_true_literal = +[](void *p) {
auto *state = (SerializeState *)p;

View File

@@ -52,6 +52,7 @@ struct ReadValueState {
return;
}
}
bool startedData = false;
};
inline WeaselJsonCallbacks readValueCallbacks() {
@@ -66,17 +67,17 @@ inline WeaselJsonCallbacks readValueCallbacks() {
state->isKeyStack.pop_back();
state->on_end_value();
};
result.on_begin_string = +[](void *p) {
result.on_string_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (ReadValueState *)p;
if (!state->startedData) {
state->startedData = true;
state->valueStack.emplace_back(std::string());
};
result.on_string_data = +[](void *p, const char *buf, int len) {
auto *state = (ReadValueState *)p;
}
std::get<std::string>(state->valueStack.back()).append(buf, len);
};
result.on_end_string = +[](void *p) {
auto *state = (ReadValueState *)p;
if (done) {
state->startedData = false;
state->on_end_value();
}
};
result.on_begin_array = +[](void *p) {
auto *state = (ReadValueState *)p;
@@ -86,17 +87,17 @@ inline WeaselJsonCallbacks readValueCallbacks() {
auto *state = (ReadValueState *)p;
state->on_end_value();
};
result.on_begin_number = +[](void *p) {
result.on_number_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (ReadValueState *)p;
if (!state->startedData) {
state->startedData = true;
state->valueStack.emplace_back(JsonNumber());
};
result.on_number_data = +[](void *p, const char *buf, int len) {
auto *state = (ReadValueState *)p;
}
std::get<JsonNumber>(state->valueStack.back()).append(buf, len);
};
result.on_end_number = +[](void *p) {
auto *state = (ReadValueState *)p;
if (done) {
state->startedData = false;
state->on_end_value();
}
};
result.on_true_literal = +[](void *p) {
auto *state = (ReadValueState *)p;

View File

@@ -80,17 +80,19 @@ struct Parser3 {
return keepGoing(this);
}
void flushNumber() {
void flushNumber(bool done) {
int len = buf - dataBegin;
if (len > 0) {
callbacks->on_number_data(data, dataBegin, len);
assert(len >= 0);
if (done || len > 0) {
callbacks->on_number_data(data, dataBegin, len, done);
}
}
void flushString() {
void flushString(bool done) {
int len = writeBuf - dataBegin;
if (len > 0) {
callbacks->on_string_data(data, dataBegin, len);
assert(len >= 0);
if (done || len > 0) {
callbacks->on_string_data(data, dataBegin, len, done);
}
dataBegin = writeBuf;
}
@@ -192,7 +194,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
}
break;
case '"':
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
@@ -205,7 +206,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
if (auto s = self->push({N_FRACTION, N_EXPONENT})) {
return s;
}
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
++self->buf;
MUSTTAIL return Parser3::keepGoing(self);
@@ -219,7 +219,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
case '8':
case '9':
self->pop();
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
++self->buf;
if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) {
@@ -228,7 +227,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
case '-':
self->pop();
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
++self->buf;
if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) {
@@ -304,7 +302,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self) {
self->callbacks->on_end_object(self->data);
MUSTTAIL return Parser3::keepGoing(self);
case '"':
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
@@ -403,7 +400,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self) {
if (*self->buf != '"') [[unlikely]] {
return WeaselJson_REJECT;
}
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
@@ -480,7 +476,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) {
self->writeBuf += len;
if (self->buf == self->bufEnd) {
self->flushString();
self->flushString(false);
return WeaselJson_AGAIN;
}
@@ -488,8 +484,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) {
case Tables::NORMAL:
__builtin_unreachable();
case Tables::DUBQUOTE:
self->flushString();
self->callbacks->on_end_string(self->data);
self->flushString(true);
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -660,7 +655,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
bool useTmp = self->buf - self->writeBuf < 2;
char *p = tmp;
if (useTmp) {
self->flushString();
self->flushString(false);
}
auto &w = useTmp ? p : self->writeBuf;
w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
@@ -668,7 +663,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000;
w += 2;
if (useTmp) {
self->callbacks->on_string_data(self->data, tmp, 2);
self->callbacks->on_string_data(self->data, tmp, 2, false);
}
} else {
assert(self->utf8Codepoint < 0x10000);
@@ -686,7 +681,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
bool useTmp = self->buf - self->writeBuf < 3;
char *p = tmp;
if (useTmp) {
self->flushString();
self->flushString(false);
}
auto &w = useTmp ? p : self->writeBuf;
w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
@@ -696,7 +691,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000;
w += 3;
if (useTmp) {
self->callbacks->on_string_data(self->data, tmp, 3);
self->callbacks->on_string_data(self->data, tmp, 3, false);
}
}
@@ -736,7 +731,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) {
bool useTmp = self->buf - self->writeBuf < 4;
char *p = tmp;
if (useTmp) {
self->flushString();
self->flushString(false);
}
auto &w = useTmp ? p : self->writeBuf;
w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
@@ -748,7 +743,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) {
w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000;
w += 4;
if (useTmp) {
self->callbacks->on_string_data(self->data, tmp, 4);
self->callbacks->on_string_data(self->data, tmp, 4, false);
}
self->pop();
@@ -756,7 +751,6 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) {
}
inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self) {
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
switch (*self->buf) {
case '0':
@@ -882,7 +876,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self) {
}
}
// Responsible for ensuring that on_end_number gets called
inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) {
if (self->len() == 0) {
self->pop();
@@ -899,8 +892,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
self->flushNumber();
self->callbacks->on_end_number(self->data);
self->flushNumber(true);
MUSTTAIL return Parser3::keepGoing(self);
}
}
@@ -984,8 +976,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self) {
inline PRESERVE_NONE WeaselJsonStatus t_end_number(Parser3 *self) {
self->pop();
self->flushNumber();
self->callbacks->on_end_number(self->data);
self->flushNumber(true);
MUSTTAIL return Parser3::keepGoing(self);
}
@@ -1098,9 +1089,8 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
case T_DIGIT:
case T_ONENINE:
case T_END_NUMBER:
self->flushNumber();
self->flushNumber(false);
break;
case N_STRING:
case N_STRING2:
case N_STRING_FOLLOWING_ESCAPE:
case T_UTF8_CONTINUATION_BYTE:
@@ -1110,8 +1100,10 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
case T_HEX3:
case T_BACKSLASH:
case T_U2:
self->flushString();
self->flushString(false);
break;
case N_STRING: // The beginning of the string is in the future in this
// state. There's no data to flush yet
case N_VALUE:
case N_OBJECT2:
case N_OBJECT3:

View File

@@ -230,7 +230,7 @@ void doTestUnescapingUtf8(std::string const &escaped,
CAPTURE(stride);
auto c = noopCallbacks();
std::string result;
c.on_string_data = +[](void *p, const char *buf, int len) {
c.on_string_data = +[](void *p, const char *buf, int len, int /*done*/) {
auto &s = *(std::string *)p;
s.append(buf, len);
};

View File

@@ -5,21 +5,19 @@ import os
from typing import Optional
event_callback = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
data_callback = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int)
data_callback = ctypes.CFUNCTYPE(
None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int
)
class WeaselJsonCallbacks(ctypes.Structure):
_fields_ = [
("on_begin_object", event_callback),
("on_end_object", event_callback),
("on_begin_string", event_callback),
("on_string_data", data_callback),
("on_end_string", event_callback),
("on_begin_array", event_callback),
("on_end_array", event_callback),
("on_begin_number", event_callback),
("on_number_data", data_callback),
("on_end_number", event_callback),
("on_true_literal", event_callback),
("on_false_literal", event_callback),
("on_null_literal", event_callback),
@@ -40,13 +38,7 @@ class WeaselJsonCallbacksBase:
def on_end_object(self):
pass
def on_begin_string(self):
pass
def on_string_data(self, data):
pass
def on_end_string(self):
def on_string_data(self, data, done):
pass
def on_begin_array(self):
@@ -55,13 +47,7 @@ class WeaselJsonCallbacksBase:
def on_end_array(self):
pass
def on_begin_number(self):
pass
def on_number_data(self, data):
pass
def on_end_number(self):
def on_number_data(self, data, done):
pass
def on_true_literal(self):
@@ -155,22 +141,10 @@ def on_end_object(p):
self.on_end_object()
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def on_begin_string(p):
@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int)
def on_string_data(p, buf, len, done):
self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value
self.on_begin_string()
@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int)
def on_string_data(p, buf, len):
self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value
self.on_string_data(bytes(ctypes.string_at(buf, len)))
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def on_end_string(p):
self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value
self.on_end_string()
self.on_string_data(bytes(ctypes.string_at(buf, len)), bool(done))
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
@@ -185,22 +159,10 @@ def on_end_array(p):
self.on_end_array()
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def on_begin_number(p):
@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int)
def on_number_data(p, buf, len, done):
self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value
self.on_begin_number()
@ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int)
def on_number_data(p, buf, len):
self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value
self.on_number_data(bytes(ctypes.string_at(buf, len)))
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def on_end_number(p):
self = ctypes.cast(p, ctypes.POINTER(ctypes.py_object)).contents.value
self.on_end_number()
self.on_number_data(bytes(ctypes.string_at(buf, len)), bool(done))
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
@@ -224,14 +186,10 @@ def on_null_literal(p):
c_callbacks = WeaselJsonCallbacks(
on_begin_object,
on_end_object,
on_begin_string,
on_string_data,
on_end_string,
on_begin_array,
on_end_array,
on_begin_number,
on_number_data,
on_end_number,
on_true_literal,
on_false_literal,
on_null_literal,
@@ -240,7 +198,7 @@ c_callbacks = WeaselJsonCallbacks(
class MyCallbacks(WeaselJsonCallbacksBase):
# override callbacks
def on_string_data(self, data):
def on_string_data(self, data, done):
print(data)