Call on_string_data with all available data

This commit is contained in:
2025-05-19 12:41:12 -04:00
parent b7cacf13f4
commit d5bd9fc018

View File

@@ -86,7 +86,16 @@ struct Parser3 {
void flushNumber() { void flushNumber() {
int len = buf - dataBegin; int len = buf - dataBegin;
callbacks->on_number_data(data, dataBegin, len); if (len > 0) {
callbacks->on_number_data(data, dataBegin, len);
}
}
void flushString() {
int len = buf - dataBegin;
if (len > 0) {
callbacks->on_string_data(data, dataBegin, len);
}
} }
[[nodiscard]] bool empty() const { return stackPtr == stack; } [[nodiscard]] bool empty() const { return stackPtr == stack; }
@@ -340,6 +349,7 @@ inline Status n_string(Parser3 *self) {
} }
self->callbacks->on_begin_string(self->data); self->callbacks->on_begin_string(self->data);
++self->buf; ++self->buf;
self->dataBegin = self->buf;
self->pop(); self->pop();
if (auto s = self->push({N_STRING2})) { if (auto s = self->push({N_STRING2})) {
return s; return s;
@@ -354,13 +364,11 @@ inline Status n_string2(Parser3 *self) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) { if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT; return S_REJECT;
} }
if (*self->buf != '"') {
self->callbacks->on_string_data(self->data, self->buf, 1);
}
if (int8_t(*self->buf) > 0) { if (int8_t(*self->buf) > 0) {
// one byte utf-8 encoding // one byte utf-8 encoding
switch (*self->buf) { switch (*self->buf) {
case '"': case '"':
self->flushString();
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_end_string(self->data); self->callbacks->on_end_string(self->data);
@@ -417,7 +425,6 @@ inline Status n_string_following_escape(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
return S_REJECT; return S_REJECT;
} }
self->callbacks->on_string_data(self->data, self->buf, 1);
switch (*self->buf) { switch (*self->buf) {
case '"': case '"':
case '\\': case '\\':
@@ -455,7 +462,6 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
if ((*self->buf & 0b11000000) == 0b10000000) { if ((*self->buf & 0b11000000) == 0b10000000) {
self->utf8Codepoint <<= 6; self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111; self->utf8Codepoint |= *self->buf & 0b00111111;
self->callbacks->on_string_data(self->data, self->buf, 1);
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -479,7 +485,6 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
return S_REJECT; return S_REJECT;
} }
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized // TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
self->callbacks->on_string_data(self->data, self->buf, 1);
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -518,7 +523,6 @@ inline Status t_hex(Parser3 *self) {
if (('0' <= *self->buf && *self->buf <= '9') || if (('0' <= *self->buf && *self->buf <= '9') ||
('a' <= *self->buf && *self->buf <= 'f') || ('a' <= *self->buf && *self->buf <= 'f') ||
('A' <= *self->buf && *self->buf <= 'F')) { ('A' <= *self->buf && *self->buf <= 'F')) {
self->callbacks->on_string_data(self->data, self->buf, 1);
++self->buf; ++self->buf;
self->pop(); self->pop();
MUSTTAIL return Parser3::keepGoing(self); MUSTTAIL return Parser3::keepGoing(self);
@@ -889,6 +893,14 @@ inline Status Parser3::keepGoing(Parser3 *self) {
case T_END_NUMBER: case T_END_NUMBER:
self->flushNumber(); self->flushNumber();
break; break;
case N_STRING:
case N_STRING2:
case N_STRING_FOLLOWING_ESCAPE:
case T_UTF8_CONTINUATION_BYTE:
case T_UTF8_LAST_CONTINUATION_BYTE:
case T_HEX:
self->flushString();
break;
case N_JSON: case N_JSON:
case N_VALUE: case N_VALUE:
case N_OBJECT: case N_OBJECT:
@@ -898,9 +910,6 @@ inline Status Parser3::keepGoing(Parser3 *self) {
case N_ARRAY2: case N_ARRAY2:
case N_ARRAY3: case N_ARRAY3:
case N_ELEMENT: case N_ELEMENT:
case N_STRING:
case N_STRING2:
case N_STRING_FOLLOWING_ESCAPE:
case N_WHITESPACE: case N_WHITESPACE:
case N_TRUE: case N_TRUE:
case N_FALSE: case N_FALSE:
@@ -911,9 +920,6 @@ inline Status Parser3::keepGoing(Parser3 *self) {
case T_L: case T_L:
case T_S: case T_S:
case T_COLON: case T_COLON:
case T_UTF8_CONTINUATION_BYTE:
case T_UTF8_LAST_CONTINUATION_BYTE:
case T_HEX:
case T_EOF: case T_EOF:
case N_SYMBOL_COUNT: case N_SYMBOL_COUNT:
break; break;