invalidUtf8 -> invalidStringByte

This commit is contained in:
2025-05-19 12:25:13 -04:00
parent a271d698d0
commit fc747f96df
2 changed files with 21 additions and 12 deletions

View File

@@ -343,16 +343,12 @@ inline Status n_string2(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
return S_REJECT; return S_REJECT;
} }
// Try subtract and unsigned compare to save a branch? if (tables.invalidStringByte[uint8_t(*self->buf)]) {
if (uint8_t(*self->buf) < 0x20) {
return S_REJECT; return S_REJECT;
} }
if (*self->buf != '"') { if (*self->buf != '"') {
self->callbacks->on_string_data(self->data, self->buf, 1); self->callbacks->on_string_data(self->data, self->buf, 1);
} }
if (tables.invalidUtf8[uint8_t(*self->buf)]) {
return S_REJECT;
}
if (int8_t(*self->buf) > 0) { if (int8_t(*self->buf) > 0) {
// one byte utf-8 encoding // one byte utf-8 encoding
switch (*self->buf) { switch (*self->buf) {
@@ -445,7 +441,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
return S_REJECT; return S_REJECT;
} }
if (tables.invalidUtf8[uint8_t(*self->buf)]) { if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT; return S_REJECT;
} }
if ((*self->buf & 0b11000000) == 0b10000000) { if ((*self->buf & 0b11000000) == 0b10000000) {
@@ -463,7 +459,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
if (self->len() == 0) { if (self->len() == 0) {
return S_REJECT; return S_REJECT;
} }
if (tables.invalidUtf8[uint8_t(*self->buf)]) { if (tables.invalidStringByte[uint8_t(*self->buf)]) {
return S_REJECT; return S_REJECT;
} }
if ((*self->buf & 0b11000000) == 0b10000000) { if ((*self->buf & 0b11000000) == 0b10000000) {

View File

@@ -7,12 +7,25 @@ constexpr inline struct Tables {
whitespace['\r'] = true; whitespace['\r'] = true;
whitespace['\t'] = true; whitespace['\t'] = true;
invalidUtf8[0xc0] = true; for (int i = 0; i < 0x20; ++i) {
invalidUtf8[0xc1] = true; invalidStringByte[i] = true;
for (int i = 0xf5; i <= 0xff; ++i) {
invalidUtf8[i] = true;
} }
invalidStringByte[0xc0] = true;
invalidStringByte[0xc1] = true;
for (int i = 0xf5; i <= 0xff; ++i) {
invalidStringByte[i] = true;
}
unescape['n'] = '\n';
unescape['r'] = '\r';
unescape['t'] = '\t';
unescape['"'] = '"';
unescape['b'] = '\b';
unescape['f'] = '\f';
unescape['\\'] = '\\';
unescape['/'] = '/';
} }
alignas(16) bool whitespace[256]{}; alignas(16) bool whitespace[256]{};
alignas(16) bool invalidUtf8[256]{}; alignas(16) bool invalidStringByte[256]{};
alignas(16) char unescape[256]{};
} tables; } tables;