Handle unescaping directly in n_string2 if space permits
This commit is contained in:
@@ -461,6 +461,11 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf,
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline int32_t read4_hex(const char *buf) {
|
||||
return tables.hex[uint8_t(buf[0])] << 12 | tables.hex[uint8_t(buf[1])] << 8 |
|
||||
tables.hex[uint8_t(buf[2])] << 4 | tables.hex[uint8_t(buf[3])] << 0;
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (auto s = scan_string(self, buf, bufEnd)) {
|
||||
@@ -474,10 +479,69 @@ inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case '\\':
|
||||
++buf;
|
||||
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
||||
return s;
|
||||
if (bufEnd - buf < /*strlen("u0000\\u0000")*/ 11) {
|
||||
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else {
|
||||
if (*buf == 'u') {
|
||||
++buf;
|
||||
int32_t codepoint = read4_hex(buf);
|
||||
if (codepoint < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
buf += 4;
|
||||
if (0xd800 <= codepoint && codepoint <= 0xdfff) {
|
||||
// utf-16 surrogate
|
||||
int32_t codepoint2 = read4_hex(buf + 2);
|
||||
if (!(buf[0] == '\\' && buf[1] == 'u' && codepoint2 >= 0))
|
||||
[[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
codepoint =
|
||||
0x10000 + (codepoint - 0xd800) * 0x400 + (codepoint2 - 0xdc00);
|
||||
assert(codepoint >= 0x10000);
|
||||
if (codepoint > 0x10FFFF) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
buf += 6;
|
||||
assert(codepoint < 0x10ffff);
|
||||
self->writeBuf[3] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[0] = (0b00000111 & codepoint) | 0b11110000;
|
||||
self->writeBuf += 4;
|
||||
} else {
|
||||
if (codepoint < 0x80) {
|
||||
*self->writeBuf++ = codepoint;
|
||||
} else if (codepoint < 0x800) {
|
||||
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[0] = (0b00011111 & codepoint) | 0b11000000;
|
||||
self->writeBuf += 2;
|
||||
} else {
|
||||
assert(codepoint < 0x10000);
|
||||
self->writeBuf[2] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[1] = (0b00111111 & codepoint) | 0b10000000;
|
||||
codepoint >>= 6;
|
||||
self->writeBuf[0] = (0b00001111 & codepoint) | 0b11100000;
|
||||
self->writeBuf += 3;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto unescaped = tables.unescape[uint8_t(*buf++)];
|
||||
if (unescaped == 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
*self->writeBuf++ = unescaped;
|
||||
}
|
||||
MUSTTAIL return n_string2(self, buf, bufEnd);
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
default:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
@@ -760,7 +824,8 @@ constexpr inline struct ContinuationTable {
|
||||
symbolNames[T_EOF] = "t_eof";
|
||||
symbolNames[T_BACKSLASH] = "singleChar<'\\'>";
|
||||
|
||||
// All others can assume that there's at least one byte when they're called
|
||||
// All others can assume that there's at least one byte when they're
|
||||
// called
|
||||
acceptsEmptyString[N_NUMBER] = true;
|
||||
acceptsEmptyString[N_WHITESPACE] = true;
|
||||
acceptsEmptyString[T_EOF] = true;
|
||||
|
||||
Reference in New Issue
Block a user