Loop in string2 in normal case

This commit is contained in:
2025-05-19 17:50:48 -04:00
parent 918950d7f8
commit a4d7d1f91e
2 changed files with 80 additions and 56 deletions

View File

@@ -327,30 +327,28 @@ inline Status n_string(Parser3 *self) {
}
inline Status n_string2(Parser3 *self) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT;
}
if (int8_t(*self->buf) > 0) {
// one byte utf-8 encoding
switch (*self->buf) {
case '"':
self->flushString();
self->callbacks->on_end_string(self->data);
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case '\\':
++self->buf;
self->pop();
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
default:
*self->writeBuf++ = *self->buf++;
begin:
switch (tables.stringByteMeaning[uint8_t(*self->buf)]) {
case Tables::NORMAL:
*self->writeBuf++ = *self->buf++;
if (self->buf == self->bufEnd) {
MUSTTAIL return Parser3::keepGoing(self);
}
} else if ((*self->buf & 0b11100000) == 0b11000000) {
goto begin;
case Tables::DUBQUOTE:
self->flushString();
self->callbacks->on_end_string(self->data);
++self->buf;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
case Tables::BACKSLASH:
++self->buf;
self->pop();
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
case Tables::TWO_BYTE_UTF8:
// two byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00011111;
self->minCodepoint = 0x80;
@@ -360,8 +358,7 @@ inline Status n_string2(Parser3 *self) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
}
if ((*self->buf & 0b11110000) == 0b11100000) {
case Tables::THREE_BYTE_UTF8:
// three byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00001111;
self->minCodepoint = 0x800;
@@ -372,7 +369,7 @@ inline Status n_string2(Parser3 *self) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
} else if ((*self->buf & 0b11111000) == 0b11110000) {
case Tables::FOUR_BYTE_UTF8:
// four byte utf-8 encoding
self->utf8Codepoint = *self->buf & 0b00000111;
self->minCodepoint = 0x10000;
@@ -383,8 +380,10 @@ inline Status n_string2(Parser3 *self) {
return s;
}
MUSTTAIL return Parser3::keepGoing(self);
case Tables::CONTINUATION_BYTE:
case Tables::INVALID:
return S_REJECT;
}
return S_REJECT;
}
inline Status n_string_following_escape(Parser3 *self) {
@@ -417,37 +416,33 @@ inline Status n_string_following_escape(Parser3 *self) {
}
inline Status t_utf8_continuation_byte(Parser3 *self) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
Tables::CONTINUATION_BYTE) {
return S_REJECT;
}
if ((*self->buf & 0b11000000) == 0b10000000) {
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
return S_REJECT;
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status t_utf8_last_continuation_byte(Parser3 *self) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
Tables::CONTINUATION_BYTE) {
return S_REJECT;
}
if ((*self->buf & 0b11000000) == 0b10000000) {
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
if (self->utf8Codepoint < self->minCodepoint ||
self->utf8Codepoint > 0x10ffff ||
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
return S_REJECT;
}
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
self->utf8Codepoint <<= 6;
self->utf8Codepoint |= *self->buf & 0b00111111;
if (self->utf8Codepoint < self->minCodepoint ||
self->utf8Codepoint > 0x10ffff ||
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
return S_REJECT;
}
return S_REJECT;
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
*self->writeBuf++ = *self->buf++;
self->pop();
MUSTTAIL return Parser3::keepGoing(self);
}
inline Status t_digit(Parser3 *self) {