Remove on_{begin,end}_{string,number}

And add `done` arg to data callback
This commit is contained in:
2025-05-25 21:01:37 -04:00
parent f92b33eec3
commit f6cd807da3
6 changed files with 95 additions and 154 deletions

View File

@@ -11,18 +11,14 @@ inline WeaselJsonCallbacks printCallbacks() {
WeaselJsonCallbacks result;
result.on_begin_object = +[](void *) { puts("on_begin_object"); };
result.on_end_object = +[](void *) { puts("on_end_object"); };
result.on_begin_string = +[](void *) { puts("on_begin_string"); };
result.on_string_data = +[](void *, const char *buf, int len) {
result.on_string_data = +[](void *, const char *buf, int len, int /*done*/) {
printf("on_string_data `%.*s`\n", len, buf);
};
result.on_end_string = +[](void *) { puts("on_end_string"); };
result.on_begin_array = +[](void *) { puts("on_begin_array"); };
result.on_end_array = +[](void *) { puts("on_end_array"); };
result.on_begin_number = +[](void *) { puts("on_begin_number"); };
result.on_number_data = +[](void *, const char *buf, int len) {
result.on_number_data = +[](void *, const char *buf, int len, int /*done*/) {
printf("on_number_data `%.*s`\n", len, buf);
};
result.on_end_number = +[](void *) { puts("on_end_number"); };
result.on_true_literal = +[](void *) { puts("on_true_literal"); };
result.on_false_literal = +[](void *) { puts("on_false_literal"); };
result.on_null_literal = +[](void *) { puts("on_null_literal"); };
@@ -33,14 +29,10 @@ inline WeaselJsonCallbacks noopCallbacks() {
WeaselJsonCallbacks result;
result.on_begin_object = +[](void *) {};
result.on_end_object = +[](void *) {};
result.on_begin_string = +[](void *) {};
result.on_string_data = +[](void *, const char *, int) {};
result.on_end_string = +[](void *) {};
result.on_string_data = +[](void *, const char *, int, int) {};
result.on_begin_array = +[](void *) {};
result.on_end_array = +[](void *) {};
result.on_begin_number = +[](void *) {};
result.on_number_data = +[](void *, const char *, int) {};
result.on_end_number = +[](void *) {};
result.on_number_data = +[](void *, const char *, int, int) {};
result.on_true_literal = +[](void *) {};
result.on_false_literal = +[](void *) {};
result.on_null_literal = +[](void *) {};
@@ -70,6 +62,7 @@ struct SerializeState {
}
}
std::vector<Cursor> stack;
bool startedData = false;
};
inline WeaselJsonCallbacks serializeCallbacks() {
@@ -85,18 +78,18 @@ inline WeaselJsonCallbacks serializeCallbacks() {
state->stack.pop_back();
state->result.append("}");
};
result.on_begin_string = +[](void *p) {
auto *state = (SerializeState *)p;
state->on_begin_value();
state->result.append("<");
};
result.on_string_data = +[](void *p, const char *buf, int len) {
result.on_string_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
if (!state->startedData) {
state->startedData = true;
state->on_begin_value();
state->result.append("<");
}
state->result.append(std::string(buf, len));
};
result.on_end_string = +[](void *p) {
auto *state = (SerializeState *)p;
state->result.append(">");
if (done) {
state->startedData = false;
state->result.append(">");
}
};
result.on_begin_array = +[](void *p) {
auto *state = (SerializeState *)p;
@@ -109,18 +102,18 @@ inline WeaselJsonCallbacks serializeCallbacks() {
state->stack.pop_back();
state->result.append("]");
};
result.on_begin_number = +[](void *p) {
auto *state = (SerializeState *)p;
state->on_begin_value();
state->result.append("(");
};
result.on_number_data = +[](void *p, const char *buf, int len) {
result.on_number_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
if (!state->startedData) {
state->startedData = true;
state->on_begin_value();
state->result.append("(");
}
state->result.append(std::string(buf, len));
};
result.on_end_number = +[](void *p) {
auto *state = (SerializeState *)p;
state->result.append(")");
if (done) {
state->startedData = false;
state->result.append(")");
}
};
result.on_true_literal = +[](void *p) {
auto *state = (SerializeState *)p;

View File

@@ -52,6 +52,7 @@ struct ReadValueState {
return;
}
}
bool startedData = false;
};
inline WeaselJsonCallbacks readValueCallbacks() {
@@ -66,17 +67,17 @@ inline WeaselJsonCallbacks readValueCallbacks() {
state->isKeyStack.pop_back();
state->on_end_value();
};
result.on_begin_string = +[](void *p) {
auto *state = (ReadValueState *)p;
state->valueStack.emplace_back(std::string());
};
result.on_string_data = +[](void *p, const char *buf, int len) {
result.on_string_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (ReadValueState *)p;
if (!state->startedData) {
state->startedData = true;
state->valueStack.emplace_back(std::string());
}
std::get<std::string>(state->valueStack.back()).append(buf, len);
};
result.on_end_string = +[](void *p) {
auto *state = (ReadValueState *)p;
state->on_end_value();
if (done) {
state->startedData = false;
state->on_end_value();
}
};
result.on_begin_array = +[](void *p) {
auto *state = (ReadValueState *)p;
@@ -86,17 +87,17 @@ inline WeaselJsonCallbacks readValueCallbacks() {
auto *state = (ReadValueState *)p;
state->on_end_value();
};
result.on_begin_number = +[](void *p) {
auto *state = (ReadValueState *)p;
state->valueStack.emplace_back(JsonNumber());
};
result.on_number_data = +[](void *p, const char *buf, int len) {
result.on_number_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (ReadValueState *)p;
if (!state->startedData) {
state->startedData = true;
state->valueStack.emplace_back(JsonNumber());
}
std::get<JsonNumber>(state->valueStack.back()).append(buf, len);
};
result.on_end_number = +[](void *p) {
auto *state = (ReadValueState *)p;
state->on_end_value();
if (done) {
state->startedData = false;
state->on_end_value();
}
};
result.on_true_literal = +[](void *p) {
auto *state = (ReadValueState *)p;

View File

@@ -80,17 +80,19 @@ struct Parser3 {
return keepGoing(this);
}
void flushNumber() {
void flushNumber(bool done) {
int len = buf - dataBegin;
if (len > 0) {
callbacks->on_number_data(data, dataBegin, len);
assert(len >= 0);
if (done || len > 0) {
callbacks->on_number_data(data, dataBegin, len, done);
}
}
void flushString() {
void flushString(bool done) {
int len = writeBuf - dataBegin;
if (len > 0) {
callbacks->on_string_data(data, dataBegin, len);
assert(len >= 0);
if (done || len > 0) {
callbacks->on_string_data(data, dataBegin, len, done);
}
dataBegin = writeBuf;
}
@@ -192,7 +194,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
}
break;
case '"':
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
@@ -205,7 +206,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
if (auto s = self->push({N_FRACTION, N_EXPONENT})) {
return s;
}
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
++self->buf;
MUSTTAIL return Parser3::keepGoing(self);
@@ -219,7 +219,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
case '8':
case '9':
self->pop();
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
++self->buf;
if (auto s = self->push({N_DIGITS2, N_FRACTION, N_EXPONENT})) {
@@ -228,7 +227,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
case '-':
self->pop();
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
++self->buf;
if (auto s = self->push({N_INTEGER2, N_FRACTION, N_EXPONENT})) {
@@ -304,7 +302,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self) {
self->callbacks->on_end_object(self->data);
MUSTTAIL return Parser3::keepGoing(self);
case '"':
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
@@ -403,7 +400,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self) {
if (*self->buf != '"') [[unlikely]] {
return WeaselJson_REJECT;
}
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
@@ -480,7 +476,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) {
self->writeBuf += len;
if (self->buf == self->bufEnd) {
self->flushString();
self->flushString(false);
return WeaselJson_AGAIN;
}
@@ -488,8 +484,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) {
case Tables::NORMAL:
__builtin_unreachable();
case Tables::DUBQUOTE:
self->flushString();
self->callbacks->on_end_string(self->data);
self->flushString(true);
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
@@ -660,7 +655,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
bool useTmp = self->buf - self->writeBuf < 2;
char *p = tmp;
if (useTmp) {
self->flushString();
self->flushString(false);
}
auto &w = useTmp ? p : self->writeBuf;
w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
@@ -668,7 +663,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000;
w += 2;
if (useTmp) {
self->callbacks->on_string_data(self->data, tmp, 2);
self->callbacks->on_string_data(self->data, tmp, 2, false);
}
} else {
assert(self->utf8Codepoint < 0x10000);
@@ -686,7 +681,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
bool useTmp = self->buf - self->writeBuf < 3;
char *p = tmp;
if (useTmp) {
self->flushString();
self->flushString(false);
}
auto &w = useTmp ? p : self->writeBuf;
w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
@@ -696,7 +691,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self) {
w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000;
w += 3;
if (useTmp) {
self->callbacks->on_string_data(self->data, tmp, 3);
self->callbacks->on_string_data(self->data, tmp, 3, false);
}
}
@@ -736,7 +731,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) {
bool useTmp = self->buf - self->writeBuf < 4;
char *p = tmp;
if (useTmp) {
self->flushString();
self->flushString(false);
}
auto &w = useTmp ? p : self->writeBuf;
w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
@@ -748,7 +743,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) {
w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000;
w += 4;
if (useTmp) {
self->callbacks->on_string_data(self->data, tmp, 4);
self->callbacks->on_string_data(self->data, tmp, 4, false);
}
self->pop();
@@ -756,7 +751,6 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self) {
}
inline PRESERVE_NONE WeaselJsonStatus n_integer(Parser3 *self) {
self->callbacks->on_begin_number(self->data);
self->dataBegin = self->buf;
switch (*self->buf) {
case '0':
@@ -882,7 +876,6 @@ inline PRESERVE_NONE WeaselJsonStatus n_fraction(Parser3 *self) {
}
}
// Responsible for ensuring that on_end_number gets called
inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) {
if (self->len() == 0) {
self->pop();
@@ -899,8 +892,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_exponent(Parser3 *self) {
MUSTTAIL return Parser3::keepGoing(self);
default:
self->pop();
self->flushNumber();
self->callbacks->on_end_number(self->data);
self->flushNumber(true);
MUSTTAIL return Parser3::keepGoing(self);
}
}
@@ -984,8 +976,7 @@ inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self) {
inline PRESERVE_NONE WeaselJsonStatus t_end_number(Parser3 *self) {
self->pop();
self->flushNumber();
self->callbacks->on_end_number(self->data);
self->flushNumber(true);
MUSTTAIL return Parser3::keepGoing(self);
}
@@ -1098,9 +1089,8 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
case T_DIGIT:
case T_ONENINE:
case T_END_NUMBER:
self->flushNumber();
self->flushNumber(false);
break;
case N_STRING:
case N_STRING2:
case N_STRING_FOLLOWING_ESCAPE:
case T_UTF8_CONTINUATION_BYTE:
@@ -1110,8 +1100,10 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
case T_HEX3:
case T_BACKSLASH:
case T_U2:
self->flushString();
self->flushString(false);
break;
case N_STRING: // The beginning of the string is in the future in this
// state. There's no data to flush yet
case N_VALUE:
case N_OBJECT2:
case N_OBJECT3:

View File

@@ -230,7 +230,7 @@ void doTestUnescapingUtf8(std::string const &escaped,
CAPTURE(stride);
auto c = noopCallbacks();
std::string result;
c.on_string_data = +[](void *p, const char *buf, int len) {
c.on_string_data = +[](void *p, const char *buf, int len, int /*done*/) {
auto &s = *(std::string *)p;
s.append(buf, len);
};