Fix several unescaping issues

This commit is contained in:
2025-05-19 13:10:24 -04:00
parent 553a273a1b
commit d9bb22e6b1
2 changed files with 23 additions and 19 deletions

View File

@@ -37,9 +37,19 @@ std::pair<std::string, parser3::Status> runBatch(std::string copy) {
}
void testStreaming(std::string const &json) {
auto result1 = runStreaming(json);
auto result2 = runBatch(json);
if (result1 != result2) {
auto streaming = runStreaming(json);
auto batch = runBatch(json);
if (streaming != batch) {
if (streaming.second == batch.second && streaming.second != parser3::S_OK) {
// It's ok if the processed data doesn't match if parsing failed
return;
}
printf("streaming: %s, %s\n",
streaming.second == parser3::S_OK ? "accept" : "reject",
streaming.first.c_str());
printf("batch: %s, %s\n",
streaming.second == parser3::S_OK ? "accept" : "reject",
batch.first.c_str());
abort();
}
}

View File

@@ -353,7 +353,7 @@ inline Status n_string(Parser3 *self) {
}
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
if (auto s = self->push({N_STRING2})) {
return s;
@@ -372,10 +372,10 @@ inline Status n_string2(Parser3 *self) {
// one byte utf-8 encoding
switch (*self->buf) {
case '"':
++self->buf;
self->pop();
self->flushString();
self->callbacks->on_end_string(self->data);
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '\\':
++self->buf;
@@ -385,15 +385,14 @@ inline Status n_string2(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
++self->buf;
*self->writeBuf++ = *self->buf++;
MUSTTAIL return Parser3::keepGoing(self);
}
} else if ((*self->buf & 0b11100000) == 0b11000000) {
// two byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00011111;
self->minCodepoint = 0x80;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
return s;
@@ -404,8 +403,7 @@ inline Status n_string2(Parser3 *self) {
// three byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00001111;
self->minCodepoint = 0x800;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
@@ -416,8 +414,7 @@ inline Status n_string2(Parser3 *self) {
// four byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00000111;
self->minCodepoint = 0x10000;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
@@ -470,8 +467,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
if ((*self->buf & 0b11000000) == 0b10000000) {
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
@@ -494,8 +490,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
return S_REJECT;
}
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
@@ -533,8 +528,7 @@ inline Status t_hex(Parser3 *self) {
if (('0' <= *self->buf && *self->buf <= '9') ||
('a' <= *self->buf && *self->buf <= 'f') ||
('A' <= *self->buf && *self->buf <= 'F')) {
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}