Fix several unescaping issues

This commit is contained in:
2025-05-19 13:10:24 -04:00
parent 553a273a1b
commit d9bb22e6b1
2 changed files with 23 additions and 19 deletions

View File

@@ -353,7 +353,7 @@ inline Status n_string(Parser3 *self) {
}
self->callbacks->on_begin_string(self->data);
++self->buf;
self->dataBegin = self->buf;
self->dataBegin = self->writeBuf = self->buf;
self->pop();
if (auto s = self->push({N_STRING2})) {
return s;
@@ -372,10 +372,10 @@ inline Status n_string2(Parser3 *self) {
// one byte utf-8 encoding
switch (*self->buf) {
case '"':
++self->buf;
self->pop();
self->flushString();
self->callbacks->on_end_string(self->data);
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '\\':
++self->buf;
@@ -385,15 +385,14 @@ inline Status n_string2(Parser3 *self) {
}
MUSTTAIL return Parser3::keepGoing(self);
default:
++self->buf;
*self->writeBuf++ = *self->buf++;
MUSTTAIL return Parser3::keepGoing(self);
}
} else if ((*self->buf & 0b11100000) == 0b11000000) {
// two byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00011111;
self->minCodepoint = 0x80;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
return s;
@@ -404,8 +403,7 @@ inline Status n_string2(Parser3 *self) {
// three byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00001111;
self->minCodepoint = 0x800;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
@@ -416,8 +414,7 @@ inline Status n_string2(Parser3 *self) {
// four byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00000111;
self->minCodepoint = 0x10000;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
@@ -470,8 +467,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
if ((*self->buf & 0b11000000) == 0b10000000) {
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
@@ -494,8 +490,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
return S_REJECT;
}
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
@@ -533,8 +528,7 @@ inline Status t_hex(Parser3 *self) {
if (('0' <= *self->buf && *self->buf <= '9') ||
('a' <= *self->buf && *self->buf <= 'f') ||
('A' <= *self->buf && *self->buf <= 'F')) {
++self->buf;
++self->writeBuf;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}