invalidUtf8 -> invalidStringByte

This commit is contained in:
2025-05-19 12:25:13 -04:00
parent a271d698d0
commit fc747f96df
2 changed files with 21 additions and 12 deletions

View File

@@ -343,16 +343,12 @@ inline Status n_string2(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
// Try subtract and unsigned compare to save a branch?
if (uint8_t(*self->buf) < 0x20) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT;
}
if (*self->buf != '"') {
self->callbacks->on_string_data(self->data, self->buf, 1);
}
if (tables.invalidUtf8[uint8_t(*self->buf)]) {
return S_REJECT;
}
if (int8_t(*self->buf) > 0) {
// one byte utf-8 encoding
switch (*self->buf) {
@@ -445,7 +441,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (tables.invalidUtf8[uint8_t(*self->buf)]) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT;
}
if ((*self->buf & 0b11000000) == 0b10000000) {
@@ -463,7 +459,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (tables.invalidUtf8[uint8_t(*self->buf)]) {
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT;
}
if ((*self->buf & 0b11000000) == 0b10000000) {

View File

@@ -7,12 +7,25 @@ constexpr inline struct Tables {
whitespace['\r'] = true;
whitespace['\t'] = true;
invalidUtf8[0xc0] = true;
invalidUtf8[0xc1] = true;
for (int i = 0xf5; i <= 0xff; ++i) {
invalidUtf8[i] = true;
for (int i = 0; i < 0x20; ++i) {
invalidStringByte[i] = true;
}
invalidStringByte[0xc0] = true;
invalidStringByte[0xc1] = true;
for (int i = 0xf5; i <= 0xff; ++i) {
invalidStringByte[i] = true;
}
unescape['n'] = '\n';
unescape['r'] = '\r';
unescape['t'] = '\t';
unescape['"'] = '"';
unescape['b'] = '\b';
unescape['f'] = '\f';
unescape['\\'] = '\\';
unescape['/'] = '/';
}
alignas(16) bool whitespace[256]{};
alignas(16) bool invalidUtf8[256]{};
alignas(16) bool invalidStringByte[256]{};
alignas(16) char unescape[256]{};
} tables;