Basic unescaping
This commit is contained in:
@@ -79,7 +79,7 @@ struct Parser3 {
|
|||||||
|
|
||||||
[[nodiscard]] Status parse(char *buf, int len) {
|
[[nodiscard]] Status parse(char *buf, int len) {
|
||||||
complete = len == 0;
|
complete = len == 0;
|
||||||
this->buf = this->dataBegin = buf;
|
this->buf = this->dataBegin = this->writeBuf = buf;
|
||||||
this->bufEnd = buf + len;
|
this->bufEnd = buf + len;
|
||||||
return keepGoing(this);
|
return keepGoing(this);
|
||||||
}
|
}
|
||||||
@@ -92,7 +92,7 @@ struct Parser3 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void flushString() {
|
void flushString() {
|
||||||
int len = buf - dataBegin;
|
int len = writeBuf - dataBegin;
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
callbacks->on_string_data(data, dataBegin, len);
|
callbacks->on_string_data(data, dataBegin, len);
|
||||||
}
|
}
|
||||||
@@ -127,10 +127,14 @@ struct Parser3 {
|
|||||||
constexpr static int kMaxStackSize = 1024;
|
constexpr static int kMaxStackSize = 1024;
|
||||||
|
|
||||||
[[maybe_unused]] void debugPrint();
|
[[maybe_unused]] void debugPrint();
|
||||||
|
// Pointer to the next byte in the input to consume
|
||||||
char *buf = nullptr;
|
char *buf = nullptr;
|
||||||
|
// Pointer past the end of the last byte available to consume
|
||||||
char *bufEnd = nullptr;
|
char *bufEnd = nullptr;
|
||||||
// Used for flushing pending data with on_*_data callbacks
|
// Used for flushing pending data with on_*_data callbacks
|
||||||
char *dataBegin;
|
char *dataBegin;
|
||||||
|
// Used for unescaping string data in place
|
||||||
|
char *writeBuf;
|
||||||
const Callbacks *const callbacks;
|
const Callbacks *const callbacks;
|
||||||
void *const data;
|
void *const data;
|
||||||
Symbol stack[kMaxStackSize];
|
Symbol stack[kMaxStackSize];
|
||||||
@@ -368,9 +372,9 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
// one byte utf-8 encoding
|
// one byte utf-8 encoding
|
||||||
switch (*self->buf) {
|
switch (*self->buf) {
|
||||||
case '"':
|
case '"':
|
||||||
self->flushString();
|
|
||||||
++self->buf;
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
|
self->flushString();
|
||||||
self->callbacks->on_end_string(self->data);
|
self->callbacks->on_end_string(self->data);
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
case '\\':
|
case '\\':
|
||||||
@@ -389,6 +393,7 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
self->utf8Codepoint = *self->buf & 0b00011111;
|
self->utf8Codepoint = *self->buf & 0b00011111;
|
||||||
self->minCodepoint = 0x80;
|
self->minCodepoint = 0x80;
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
++self->writeBuf;
|
||||||
self->pop();
|
self->pop();
|
||||||
if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||||
return s;
|
return s;
|
||||||
@@ -400,6 +405,7 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
self->utf8Codepoint = *self->buf & 0b00001111;
|
self->utf8Codepoint = *self->buf & 0b00001111;
|
||||||
self->minCodepoint = 0x800;
|
self->minCodepoint = 0x800;
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
++self->writeBuf;
|
||||||
self->pop();
|
self->pop();
|
||||||
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
|
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
|
||||||
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||||
@@ -411,6 +417,7 @@ inline Status n_string2(Parser3 *self) {
|
|||||||
self->utf8Codepoint = *self->buf & 0b00000111;
|
self->utf8Codepoint = *self->buf & 0b00000111;
|
||||||
self->minCodepoint = 0x10000;
|
self->minCodepoint = 0x10000;
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
++self->writeBuf;
|
||||||
self->pop();
|
self->pop();
|
||||||
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
|
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
|
||||||
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||||
@@ -434,7 +441,7 @@ inline Status n_string_following_escape(Parser3 *self) {
|
|||||||
case 'n':
|
case 'n':
|
||||||
case 'r':
|
case 'r':
|
||||||
case 't':
|
case 't':
|
||||||
++self->buf;
|
*self->writeBuf++ = tables.unescape[*self->buf++];
|
||||||
self->pop();
|
self->pop();
|
||||||
if (auto s = self->push({N_STRING2})) {
|
if (auto s = self->push({N_STRING2})) {
|
||||||
return s;
|
return s;
|
||||||
@@ -442,6 +449,7 @@ inline Status n_string_following_escape(Parser3 *self) {
|
|||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
case 'u':
|
case 'u':
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
// TODO unescape
|
||||||
self->pop();
|
self->pop();
|
||||||
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX, N_STRING2})) {
|
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX, N_STRING2})) {
|
||||||
return s;
|
return s;
|
||||||
@@ -463,6 +471,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
|
|||||||
self->utf8Codepoint <<= 6;
|
self->utf8Codepoint <<= 6;
|
||||||
self->utf8Codepoint |= *self->buf & 0b00111111;
|
self->utf8Codepoint |= *self->buf & 0b00111111;
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
++self->writeBuf;
|
||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
}
|
}
|
||||||
@@ -486,6 +495,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
|
|||||||
}
|
}
|
||||||
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
|
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
++self->writeBuf;
|
||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
}
|
}
|
||||||
@@ -524,6 +534,7 @@ inline Status t_hex(Parser3 *self) {
|
|||||||
('a' <= *self->buf && *self->buf <= 'f') ||
|
('a' <= *self->buf && *self->buf <= 'f') ||
|
||||||
('A' <= *self->buf && *self->buf <= 'F')) {
|
('A' <= *self->buf && *self->buf <= 'F')) {
|
||||||
++self->buf;
|
++self->buf;
|
||||||
|
++self->writeBuf;
|
||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return Parser3::keepGoing(self);
|
MUSTTAIL return Parser3::keepGoing(self);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user