Loop in string2 in normal case
This commit is contained in:
@@ -327,30 +327,28 @@ inline Status n_string(Parser3 *self) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline Status n_string2(Parser3 *self) {
|
inline Status n_string2(Parser3 *self) {
|
||||||
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
|
begin:
|
||||||
return S_REJECT;
|
switch (tables.stringByteMeaning[uint8_t(*self->buf)]) {
|
||||||
}
|
case Tables::NORMAL:
|
||||||
if (int8_t(*self->buf) > 0) {
|
*self->writeBuf++ = *self->buf++;
|
||||||
// one byte utf-8 encoding
|
if (self->buf == self->bufEnd) {
|
||||||
switch (*self->buf) {
|
|
||||||
case '"':
|
|
||||||
self->flushString();
|
|
||||||
self->callbacks->on_end_string(self->data);
|
|
||||||
++self->buf;
|
|
||||||
self->pop();
|
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
|
||||||
case '\\':
|
|
||||||
++self->buf;
|
|
||||||
self->pop();
|
|
||||||
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
|
||||||
default:
|
|
||||||
*self->writeBuf++ = *self->buf++;
|
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
}
|
}
|
||||||
} else if ((*self->buf & 0b11100000) == 0b11000000) {
|
goto begin;
|
||||||
|
case Tables::DUBQUOTE:
|
||||||
|
self->flushString();
|
||||||
|
self->callbacks->on_end_string(self->data);
|
||||||
|
++self->buf;
|
||||||
|
self->pop();
|
||||||
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
|
case Tables::BACKSLASH:
|
||||||
|
++self->buf;
|
||||||
|
self->pop();
|
||||||
|
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
|
case Tables::TWO_BYTE_UTF8:
|
||||||
// two byte utf-8 encoding
|
// two byte utf-8 encoding
|
||||||
self->utf8Codepoint = *self->buf & 0b00011111;
|
self->utf8Codepoint = *self->buf & 0b00011111;
|
||||||
self->minCodepoint = 0x80;
|
self->minCodepoint = 0x80;
|
||||||
@@ -360,8 +358,7 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
}
|
case Tables::THREE_BYTE_UTF8:
|
||||||
if ((*self->buf & 0b11110000) == 0b11100000) {
|
|
||||||
// three byte utf-8 encoding
|
// three byte utf-8 encoding
|
||||||
self->utf8Codepoint = *self->buf & 0b00001111;
|
self->utf8Codepoint = *self->buf & 0b00001111;
|
||||||
self->minCodepoint = 0x800;
|
self->minCodepoint = 0x800;
|
||||||
@@ -372,7 +369,7 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
} else if ((*self->buf & 0b11111000) == 0b11110000) {
|
case Tables::FOUR_BYTE_UTF8:
|
||||||
// four byte utf-8 encoding
|
// four byte utf-8 encoding
|
||||||
self->utf8Codepoint = *self->buf & 0b00000111;
|
self->utf8Codepoint = *self->buf & 0b00000111;
|
||||||
self->minCodepoint = 0x10000;
|
self->minCodepoint = 0x10000;
|
||||||
@@ -383,8 +380,10 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
|
case Tables::CONTINUATION_BYTE:
|
||||||
|
case Tables::INVALID:
|
||||||
|
return S_REJECT;
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Status n_string_following_escape(Parser3 *self) {
|
inline Status n_string_following_escape(Parser3 *self) {
|
||||||
@@ -417,37 +416,33 @@ inline Status n_string_following_escape(Parser3 *self) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline Status t_utf8_continuation_byte(Parser3 *self) {
|
inline Status t_utf8_continuation_byte(Parser3 *self) {
|
||||||
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
|
if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
|
||||||
|
Tables::CONTINUATION_BYTE) {
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
}
|
}
|
||||||
if ((*self->buf & 0b11000000) == 0b10000000) {
|
self->utf8Codepoint <<= 6;
|
||||||
self->utf8Codepoint <<= 6;
|
self->utf8Codepoint |= *self->buf & 0b00111111;
|
||||||
self->utf8Codepoint |= *self->buf & 0b00111111;
|
*self->writeBuf++ = *self->buf++;
|
||||||
*self->writeBuf++ = *self->buf++;
|
self->pop();
|
||||||
self->pop();
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
|
||||||
}
|
|
||||||
return S_REJECT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Status t_utf8_last_continuation_byte(Parser3 *self) {
|
inline Status t_utf8_last_continuation_byte(Parser3 *self) {
|
||||||
if (tables.invalidStringByte[uint8_t(*self->buf)]) {
|
if (tables.stringByteMeaning[uint8_t(*self->buf)] !=
|
||||||
|
Tables::CONTINUATION_BYTE) {
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
}
|
}
|
||||||
if ((*self->buf & 0b11000000) == 0b10000000) {
|
self->utf8Codepoint <<= 6;
|
||||||
self->utf8Codepoint <<= 6;
|
self->utf8Codepoint |= *self->buf & 0b00111111;
|
||||||
self->utf8Codepoint |= *self->buf & 0b00111111;
|
if (self->utf8Codepoint < self->minCodepoint ||
|
||||||
if (self->utf8Codepoint < self->minCodepoint ||
|
self->utf8Codepoint > 0x10ffff ||
|
||||||
self->utf8Codepoint > 0x10ffff ||
|
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
|
||||||
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
|
return S_REJECT;
|
||||||
return S_REJECT;
|
|
||||||
}
|
|
||||||
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
|
|
||||||
*self->writeBuf++ = *self->buf++;
|
|
||||||
self->pop();
|
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
|
||||||
|
*self->writeBuf++ = *self->buf++;
|
||||||
|
self->pop();
|
||||||
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Status t_digit(Parser3 *self) {
|
inline Status t_digit(Parser3 *self) {
|
||||||
|
|||||||
43
src/tables.h
43
src/tables.h
@@ -1,19 +1,48 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
constexpr inline struct Tables {
|
constexpr inline struct Tables {
|
||||||
|
enum StringByteMeaning {
|
||||||
|
INVALID,
|
||||||
|
NORMAL,
|
||||||
|
DUBQUOTE,
|
||||||
|
BACKSLASH,
|
||||||
|
TWO_BYTE_UTF8,
|
||||||
|
THREE_BYTE_UTF8,
|
||||||
|
FOUR_BYTE_UTF8,
|
||||||
|
CONTINUATION_BYTE,
|
||||||
|
};
|
||||||
|
|
||||||
constexpr Tables() {
|
constexpr Tables() {
|
||||||
whitespace[' '] = true;
|
whitespace[' '] = true;
|
||||||
whitespace['\n'] = true;
|
whitespace['\n'] = true;
|
||||||
whitespace['\r'] = true;
|
whitespace['\r'] = true;
|
||||||
whitespace['\t'] = true;
|
whitespace['\t'] = true;
|
||||||
|
|
||||||
for (int i = 0; i < 0x20; ++i) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
invalidStringByte[i] = true;
|
if ((i & 0b11000000) == 0b10000000) {
|
||||||
|
stringByteMeaning[i] = CONTINUATION_BYTE;
|
||||||
|
}
|
||||||
|
if ((i & 0b11100000) == 0b11000000) {
|
||||||
|
stringByteMeaning[i] = TWO_BYTE_UTF8;
|
||||||
|
}
|
||||||
|
if ((i & 0b11110000) == 0b11100000) {
|
||||||
|
stringByteMeaning[i] = THREE_BYTE_UTF8;
|
||||||
|
}
|
||||||
|
if ((i & 0b11111000) == 0b11110000) {
|
||||||
|
stringByteMeaning[i] = FOUR_BYTE_UTF8;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
invalidStringByte[0xc0] = true;
|
|
||||||
invalidStringByte[0xc1] = true;
|
for (int i = 0x20; i < 128; ++i) {
|
||||||
for (int i = 0xf5; i <= 0xff; ++i) {
|
stringByteMeaning[i] = NORMAL;
|
||||||
invalidStringByte[i] = true;
|
}
|
||||||
|
stringByteMeaning['"'] = DUBQUOTE;
|
||||||
|
stringByteMeaning['\\'] = BACKSLASH;
|
||||||
|
|
||||||
|
stringByteMeaning[0xc0] = INVALID;
|
||||||
|
stringByteMeaning[0xc1] = INVALID;
|
||||||
|
for (int i = 0xF5; i < 0x100; ++i) {
|
||||||
|
stringByteMeaning[i] = INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
unescape['n'] = '\n';
|
unescape['n'] = '\n';
|
||||||
@@ -26,6 +55,6 @@ constexpr inline struct Tables {
|
|||||||
unescape['/'] = '/';
|
unescape['/'] = '/';
|
||||||
}
|
}
|
||||||
alignas(16) bool whitespace[256]{};
|
alignas(16) bool whitespace[256]{};
|
||||||
alignas(16) bool invalidStringByte[256]{};
|
alignas(16) StringByteMeaning stringByteMeaning[256]{};
|
||||||
alignas(16) char unescape[256]{};
|
alignas(16) char unescape[256]{};
|
||||||
} tables;
|
} tables;
|
||||||
|
|||||||
Reference in New Issue
Block a user