Distinguish on_string_data and on_key_data

This commit is contained in:
2025-08-04 14:15:49 -04:00
parent 39fe9be4dc
commit bcb5a20f27
5 changed files with 107 additions and 11 deletions

View File

@@ -13,6 +13,11 @@ struct WeaselJsonCallbacks {
* incomplete and there will be another call, potentially with more data
*/
void (*on_string_data)(void *userdata, const char *buf, int len, int done);
/** The key data provided has already been unescaped, unless the
* WeaselJsonRaw flag is used. If `done` is false, this key may be
* incomplete and there will be another call, potentially with more data
*/
void (*on_key_data)(void *userdata, const char *buf, int len, int done);
void (*on_begin_array)(void *userdata);
void (*on_end_array)(void *userdata);
/*If `done` is false, this number may be incomplete and there will be another

View File

@@ -1,5 +1,6 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <string>
@@ -14,6 +15,9 @@ inline WeaselJsonCallbacks printCallbacks() {
result.on_string_data = +[](void *, const char *buf, int len, int /*done*/) {
printf("on_string_data `%.*s`\n", len, buf);
};
result.on_key_data = +[](void *, const char *buf, int len, int /*done*/) {
printf("on_key_data `%.*s`\n", len, buf);
};
result.on_begin_array = +[](void *) { puts("on_begin_array"); };
result.on_end_array = +[](void *) { puts("on_end_array"); };
result.on_number_data = +[](void *, const char *buf, int len, int /*done*/) {
@@ -30,6 +34,7 @@ inline WeaselJsonCallbacks noopCallbacks() {
result.on_begin_object = +[](void *) {};
result.on_end_object = +[](void *) {};
result.on_string_data = +[](void *, const char *, int, int) {};
result.on_key_data = +[](void *, const char *, int, int) {};
result.on_begin_array = +[](void *) {};
result.on_end_array = +[](void *) {};
result.on_number_data = +[](void *, const char *, int, int) {};
@@ -58,6 +63,11 @@ struct SerializeState {
if (!back.isObject && back.index > 0) {
result.append(",");
}
}
}
void on_end_value() {
if (!stack.empty()) {
auto &back = stack.back();
++back.index;
}
}
@@ -77,6 +87,7 @@ inline WeaselJsonCallbacks serializeCallbacks() {
auto *state = (SerializeState *)p;
state->stack.pop_back();
state->result.append("}");
state->on_end_value();
};
result.on_string_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
@@ -86,7 +97,28 @@ inline WeaselJsonCallbacks serializeCallbacks() {
state->result.append("<");
}
state->result.append(std::string(buf, len));
if (!state->stack.empty() && state->stack.back().isObject) {
assert(state->stack.back().index % 2 == 1);
}
if (done) {
state->on_end_value();
state->startedData = false;
state->result.append(">");
}
};
result.on_key_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
if (!state->startedData) {
state->startedData = true;
state->on_begin_value();
state->result.append("<");
}
state->result.append(std::string(buf, len));
assert(!state->stack.empty());
assert(state->stack.back().isObject);
assert(state->stack.back().index % 2 == 0);
if (done) {
state->on_end_value();
state->startedData = false;
state->result.append(">");
}
@@ -101,6 +133,7 @@ inline WeaselJsonCallbacks serializeCallbacks() {
auto *state = (SerializeState *)p;
state->stack.pop_back();
state->result.append("]");
state->on_end_value();
};
result.on_number_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (SerializeState *)p;
@@ -112,6 +145,7 @@ inline WeaselJsonCallbacks serializeCallbacks() {
state->result.append(std::string(buf, len));
if (done) {
state->startedData = false;
state->on_end_value();
state->result.append(")");
}
};
@@ -119,16 +153,19 @@ inline WeaselJsonCallbacks serializeCallbacks() {
auto *state = (SerializeState *)p;
state->on_begin_value();
state->result.append("true");
state->on_end_value();
};
result.on_false_literal = +[](void *p) {
auto *state = (SerializeState *)p;
state->on_begin_value();
state->result.append("false");
state->on_end_value();
};
result.on_null_literal = +[](void *p) {
auto *state = (SerializeState *)p;
state->on_begin_value();
state->result.append("null");
state->on_end_value();
};
return result;
}

View File

@@ -79,6 +79,18 @@ inline WeaselJsonCallbacks readValueCallbacks() {
state->on_end_value();
}
};
result.on_key_data = +[](void *p, const char *buf, int len, int done) {
auto *state = (ReadValueState *)p;
if (!state->startedData) {
state->startedData = true;
state->valueStack.emplace_back(std::string());
}
std::get<std::string>(state->valueStack.back()).append(buf, len);
if (done) {
state->startedData = false;
state->on_end_value();
}
};
result.on_begin_array = +[](void *p) {
auto *state = (ReadValueState *)p;
state->valueStack.emplace_back(std::make_unique<JsonArray>());

View File

@@ -90,7 +90,11 @@ struct Parser3 {
}
assert(len >= 0);
if (done || len > 0) {
callbacks->on_string_data(userdata, dataBegin, len, done);
if (inKey) {
callbacks->on_key_data(userdata, dataBegin, len, done);
} else {
callbacks->on_string_data(userdata, dataBegin, len, done);
}
}
}
@@ -141,6 +145,7 @@ struct Parser3 {
uint32_t minCodepoint;
NumDfa numDfa;
Utf8Dfa strDfa;
bool inKey = false;
};
inline PRESERVE_NONE WeaselJsonStatus skipWhitespace(char *&buf, char *bufEnd) {
@@ -397,6 +402,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case '"':
assert(!self->inKey);
self->inKey = true;
++buf;
self->dataBegin = self->writeBuf = buf;
self->pop();
@@ -432,6 +439,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
case ',':
++buf;
assert(!self->inKey);
self->inKey = true;
self->pop();
if (auto s = self->push({N_STRING, T_COLON, N_VALUE, N_OBJECT3})) {
return s;
@@ -726,7 +735,11 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000;
w += 2;
if (useTmp) [[unlikely]] {
self->callbacks->on_string_data(self->userdata, tmp, 2, false);
if (self->inKey) {
self->callbacks->on_key_data(self->userdata, tmp, 2, false);
} else {
self->callbacks->on_string_data(self->userdata, tmp, 2, false);
}
}
}
} else {
@@ -760,7 +773,11 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000;
w += 3;
if (useTmp) [[unlikely]] {
self->callbacks->on_string_data(self->userdata, tmp, 3, false);
if (self->inKey) {
self->callbacks->on_key_data(self->userdata, tmp, 3, false);
} else {
self->callbacks->on_string_data(self->userdata, tmp, 3, false);
}
}
}
}
@@ -818,7 +835,11 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf,
w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000;
w += 4;
if (useTmp) [[unlikely]] {
self->callbacks->on_string_data(self->userdata, tmp, 4, false);
if (self->inKey) {
self->callbacks->on_key_data(self->userdata, tmp, 4, false);
} else {
self->callbacks->on_string_data(self->userdata, tmp, 4, false);
}
}
}
@@ -904,17 +925,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_null(Parser3 *self, char *buf,
}
}
template <char kChar, bool kSkipWhitespace = false>
template <char kChar>
inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf,
char *bufEnd) {
if (buf == bufEnd) [[unlikely]] {
return WeaselJson_REJECT;
}
if constexpr (kSkipWhitespace) {
if (auto s = skipWhitespace(buf, bufEnd)) {
return s;
}
}
if (*buf == kChar) {
++buf;
self->pop();
@@ -927,6 +943,28 @@ inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf,
}
}
inline PRESERVE_NONE WeaselJsonStatus t_colon(Parser3 *self, char *buf,
char *bufEnd) {
if (buf == bufEnd) [[unlikely]] {
return WeaselJson_REJECT;
}
if (auto s = skipWhitespace(buf, bufEnd)) {
return s;
}
if (*buf == ':') {
++buf;
assert(self->inKey);
self->inKey = false;
self->pop();
if (buf == bufEnd) {
return WeaselJson_AGAIN;
}
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
} else [[unlikely]] {
return WeaselJson_REJECT;
}
}
inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *, char *buf,
char *bufEnd) {
if (buf != bufEnd) [[unlikely]] {
@@ -963,7 +1001,7 @@ constexpr inline struct ContinuationTable {
continuations[T_A] = singleChar<'a'>;
continuations[T_L] = singleChar<'l'>;
continuations[T_S] = singleChar<'s'>;
continuations[T_COLON] = singleChar<':', true>;
continuations[T_COLON] = t_colon;
continuations[T_HEX] = t_hex;
continuations[T_HEX2] = t_hex2;
continuations[T_HEX3] = t_hex3;

View File

@@ -215,6 +215,10 @@ void doTestUnescapingUtf8(std::string const &escaped,
auto &s = *(std::string *)p;
s.append(buf, len);
};
c.on_key_data = +[](void *p, const char *buf, int len, int /*done*/) {
auto &s = *(std::string *)p;
s.append(buf, len);
};
auto *parser = WeaselJsonParser_create(1024, &c, &result, flags);
auto copy = escaped;
for (size_t i = 0; i < copy.size(); i += stride) {