diff --git a/include/weaseljson.h b/include/weaseljson.h index cafce4a..54ab0cb 100644 --- a/include/weaseljson.h +++ b/include/weaseljson.h @@ -13,6 +13,11 @@ struct WeaselJsonCallbacks { * incomplete and there will be another call, potentially with more data */ void (*on_string_data)(void *userdata, const char *buf, int len, int done); + /** The key data provided has already been unescaped, unless the + * WeaselJsonRaw flag is used. If `done` is false, this key may be + * incomplete and there will be another call, potentially with more data + */ + void (*on_key_data)(void *userdata, const char *buf, int len, int done); void (*on_begin_array)(void *userdata); void (*on_end_array)(void *userdata); /*If `done` is false, this number may be incomplete and there will be another diff --git a/src/callbacks.h b/src/callbacks.h index cb9cd34..b6bb39f 100644 --- a/src/callbacks.h +++ b/src/callbacks.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -14,6 +15,9 @@ inline WeaselJsonCallbacks printCallbacks() { result.on_string_data = +[](void *, const char *buf, int len, int /*done*/) { printf("on_string_data `%.*s`\n", len, buf); }; + result.on_key_data = +[](void *, const char *buf, int len, int /*done*/) { + printf("on_key_data `%.*s`\n", len, buf); + }; result.on_begin_array = +[](void *) { puts("on_begin_array"); }; result.on_end_array = +[](void *) { puts("on_end_array"); }; result.on_number_data = +[](void *, const char *buf, int len, int /*done*/) { @@ -30,6 +34,7 @@ inline WeaselJsonCallbacks noopCallbacks() { result.on_begin_object = +[](void *) {}; result.on_end_object = +[](void *) {}; result.on_string_data = +[](void *, const char *, int, int) {}; + result.on_key_data = +[](void *, const char *, int, int) {}; result.on_begin_array = +[](void *) {}; result.on_end_array = +[](void *) {}; result.on_number_data = +[](void *, const char *, int, int) {}; @@ -58,6 +63,11 @@ struct SerializeState { if (!back.isObject && back.index > 0) { result.append(","); } + } + } + void on_end_value() { + if (!stack.empty()) { + auto &back = stack.back(); ++back.index; } } @@ -77,6 +87,7 @@ inline WeaselJsonCallbacks serializeCallbacks() { auto *state = (SerializeState *)p; state->stack.pop_back(); state->result.append("}"); + state->on_end_value(); }; result.on_string_data = +[](void *p, const char *buf, int len, int done) { auto *state = (SerializeState *)p; @@ -86,7 +97,28 @@ inline WeaselJsonCallbacks serializeCallbacks() { state->result.append("<"); } state->result.append(std::string(buf, len)); + if (!state->stack.empty() && state->stack.back().isObject) { + assert(state->stack.back().index % 2 == 1); + } if (done) { + state->on_end_value(); + state->startedData = false; + state->result.append(">"); + } + }; + result.on_key_data = +[](void *p, const char *buf, int len, int done) { + auto *state = (SerializeState *)p; + if (!state->startedData) { + state->startedData = true; + state->on_begin_value(); + state->result.append("<"); + } + state->result.append(std::string(buf, len)); + assert(!state->stack.empty()); + assert(state->stack.back().isObject); + assert(state->stack.back().index % 2 == 0); + if (done) { + state->on_end_value(); state->startedData = false; state->result.append(">"); } @@ -101,6 +133,7 @@ inline WeaselJsonCallbacks serializeCallbacks() { auto *state = (SerializeState *)p; state->stack.pop_back(); state->result.append("]"); + state->on_end_value(); }; result.on_number_data = +[](void *p, const char *buf, int len, int done) { auto *state = (SerializeState *)p; @@ -112,6 +145,7 @@ inline WeaselJsonCallbacks serializeCallbacks() { state->result.append(std::string(buf, len)); if (done) { state->startedData = false; + state->on_end_value(); state->result.append(")"); } }; @@ -119,16 +153,19 @@ inline WeaselJsonCallbacks serializeCallbacks() { auto *state = (SerializeState *)p; state->on_begin_value(); state->result.append("true"); + state->on_end_value(); }; result.on_false_literal = +[](void *p) { auto *state = (SerializeState *)p; state->on_begin_value(); state->result.append("false"); + state->on_end_value(); }; result.on_null_literal = +[](void *p) { auto *state = (SerializeState *)p; state->on_begin_value(); state->result.append("null"); + state->on_end_value(); }; return result; } diff --git a/src/json_value.h b/src/json_value.h index 76b79f9..cdf797a 100644 --- a/src/json_value.h +++ b/src/json_value.h @@ -79,6 +79,18 @@ inline WeaselJsonCallbacks readValueCallbacks() { state->on_end_value(); } }; + result.on_key_data = +[](void *p, const char *buf, int len, int done) { + auto *state = (ReadValueState *)p; + if (!state->startedData) { + state->startedData = true; + state->valueStack.emplace_back(std::string()); + } + std::get(state->valueStack.back()).append(buf, len); + if (done) { + state->startedData = false; + state->on_end_value(); + } + }; result.on_begin_array = +[](void *p) { auto *state = (ReadValueState *)p; state->valueStack.emplace_back(std::make_unique()); diff --git a/src/parser3.h b/src/parser3.h index 4d4f10c..668f983 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -90,7 +90,11 @@ struct Parser3 { } assert(len >= 0); if (done || len > 0) { - callbacks->on_string_data(userdata, dataBegin, len, done); + if (inKey) { + callbacks->on_key_data(userdata, dataBegin, len, done); + } else { + callbacks->on_string_data(userdata, dataBegin, len, done); + } } } @@ -141,6 +145,7 @@ struct Parser3 { uint32_t minCodepoint; NumDfa numDfa; Utf8Dfa strDfa; + bool inKey = false; }; inline PRESERVE_NONE WeaselJsonStatus skipWhitespace(char *&buf, char *bufEnd) { @@ -397,6 +402,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf, } MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case '"': + assert(!self->inKey); + self->inKey = true; ++buf; self->dataBegin = self->writeBuf = buf; self->pop(); @@ -432,6 +439,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf, MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); case ',': ++buf; + assert(!self->inKey); + self->inKey = true; self->pop(); if (auto s = self->push({N_STRING, T_COLON, N_VALUE, N_OBJECT3})) { return s; @@ -726,7 +735,11 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000; w += 2; if (useTmp) [[unlikely]] { - self->callbacks->on_string_data(self->userdata, tmp, 2, false); + if (self->inKey) { + self->callbacks->on_key_data(self->userdata, tmp, 2, false); + } else { + self->callbacks->on_string_data(self->userdata, tmp, 2, false); + } } } } else { @@ -760,7 +773,11 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000; w += 3; if (useTmp) [[unlikely]] { - self->callbacks->on_string_data(self->userdata, tmp, 3, false); + if (self->inKey) { + self->callbacks->on_key_data(self->userdata, tmp, 3, false); + } else { + self->callbacks->on_string_data(self->userdata, tmp, 3, false); + } } } } @@ -818,7 +835,11 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf, w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000; w += 4; if (useTmp) [[unlikely]] { - self->callbacks->on_string_data(self->userdata, tmp, 4, false); + if (self->inKey) { + self->callbacks->on_key_data(self->userdata, tmp, 4, false); + } else { + self->callbacks->on_string_data(self->userdata, tmp, 4, false); + } } } @@ -904,17 +925,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_null(Parser3 *self, char *buf, } } -template +template inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf, char *bufEnd) { if (buf == bufEnd) [[unlikely]] { return WeaselJson_REJECT; } - if constexpr (kSkipWhitespace) { - if (auto s = skipWhitespace(buf, bufEnd)) { - return s; - } - } if (*buf == kChar) { ++buf; self->pop(); @@ -927,6 +943,28 @@ inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf, } } +inline PRESERVE_NONE WeaselJsonStatus t_colon(Parser3 *self, char *buf, + char *bufEnd) { + if (buf == bufEnd) [[unlikely]] { + return WeaselJson_REJECT; + } + if (auto s = skipWhitespace(buf, bufEnd)) { + return s; + } + if (*buf == ':') { + ++buf; + assert(self->inKey); + self->inKey = false; + self->pop(); + if (buf == bufEnd) { + return WeaselJson_AGAIN; + } + MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); + } else [[unlikely]] { + return WeaselJson_REJECT; + } +} + inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *, char *buf, char *bufEnd) { if (buf != bufEnd) [[unlikely]] { @@ -963,7 +1001,7 @@ constexpr inline struct ContinuationTable { continuations[T_A] = singleChar<'a'>; continuations[T_L] = singleChar<'l'>; continuations[T_S] = singleChar<'s'>; - continuations[T_COLON] = singleChar<':', true>; + continuations[T_COLON] = t_colon; continuations[T_HEX] = t_hex; continuations[T_HEX2] = t_hex2; continuations[T_HEX3] = t_hex3; diff --git a/src/test.cpp b/src/test.cpp index eb70738..280d4f0 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -215,6 +215,10 @@ void doTestUnescapingUtf8(std::string const &escaped, auto &s = *(std::string *)p; s.append(buf, len); }; + c.on_key_data = +[](void *p, const char *buf, int len, int /*done*/) { + auto &s = *(std::string *)p; + s.append(buf, len); + }; auto *parser = WeaselJsonParser_create(1024, &c, &result, flags); auto copy = escaped; for (size_t i = 0; i < copy.size(); i += stride) {