Basic unescaping
This commit is contained in:
@@ -79,7 +79,7 @@ struct Parser3 {
|
||||
|
||||
[[nodiscard]] Status parse(char *buf, int len) {
|
||||
complete = len == 0;
|
||||
this->buf = this->dataBegin = buf;
|
||||
this->buf = this->dataBegin = this->writeBuf = buf;
|
||||
this->bufEnd = buf + len;
|
||||
return keepGoing(this);
|
||||
}
|
||||
@@ -92,7 +92,7 @@ struct Parser3 {
|
||||
}
|
||||
|
||||
void flushString() {
|
||||
int len = buf - dataBegin;
|
||||
int len = writeBuf - dataBegin;
|
||||
if (len > 0) {
|
||||
callbacks->on_string_data(data, dataBegin, len);
|
||||
}
|
||||
@@ -127,10 +127,14 @@ struct Parser3 {
|
||||
constexpr static int kMaxStackSize = 1024;
|
||||
|
||||
[[maybe_unused]] void debugPrint();
|
||||
// Pointer to the next byte in the input to consume
|
||||
char *buf = nullptr;
|
||||
// Pointer past the end of the last byte available to consume
|
||||
char *bufEnd = nullptr;
|
||||
// Used for flushing pending data with on_*_data callbacks
|
||||
char *dataBegin;
|
||||
// Used for unescaping string data in place
|
||||
char *writeBuf;
|
||||
const Callbacks *const callbacks;
|
||||
void *const data;
|
||||
Symbol stack[kMaxStackSize];
|
||||
@@ -368,9 +372,9 @@ inline Status n_string2(Parser3 *self) {
|
||||
// one byte utf-8 encoding
|
||||
switch (*self->buf) {
|
||||
case '"':
|
||||
self->flushString();
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->flushString();
|
||||
self->callbacks->on_end_string(self->data);
|
||||
MUSTTAIL return Parser3::keepGoing(self);
|
||||
case '\\':
|
||||
@@ -389,6 +393,7 @@ inline Status n_string2(Parser3 *self) {
|
||||
self->utf8Codepoint = *self->buf & 0b00011111;
|
||||
self->minCodepoint = 0x80;
|
||||
++self->buf;
|
||||
++self->writeBuf;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||
return s;
|
||||
@@ -400,6 +405,7 @@ inline Status n_string2(Parser3 *self) {
|
||||
self->utf8Codepoint = *self->buf & 0b00001111;
|
||||
self->minCodepoint = 0x800;
|
||||
++self->buf;
|
||||
++self->writeBuf;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
|
||||
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||
@@ -411,6 +417,7 @@ inline Status n_string2(Parser3 *self) {
|
||||
self->utf8Codepoint = *self->buf & 0b00000111;
|
||||
self->minCodepoint = 0x10000;
|
||||
++self->buf;
|
||||
++self->writeBuf;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
|
||||
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||
@@ -434,7 +441,7 @@ inline Status n_string_following_escape(Parser3 *self) {
|
||||
case 'n':
|
||||
case 'r':
|
||||
case 't':
|
||||
++self->buf;
|
||||
*self->writeBuf++ = tables.unescape[*self->buf++];
|
||||
self->pop();
|
||||
if (auto s = self->push({N_STRING2})) {
|
||||
return s;
|
||||
@@ -442,6 +449,7 @@ inline Status n_string_following_escape(Parser3 *self) {
|
||||
MUSTTAIL return Parser3::keepGoing(self);
|
||||
case 'u':
|
||||
++self->buf;
|
||||
// TODO unescape
|
||||
self->pop();
|
||||
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX, N_STRING2})) {
|
||||
return s;
|
||||
@@ -463,6 +471,7 @@ inline Status t_utf8_continuation_byte(Parser3 *self) {
|
||||
self->utf8Codepoint <<= 6;
|
||||
self->utf8Codepoint |= *self->buf & 0b00111111;
|
||||
++self->buf;
|
||||
++self->writeBuf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self);
|
||||
}
|
||||
@@ -486,6 +495,7 @@ inline Status t_utf8_last_continuation_byte(Parser3 *self) {
|
||||
}
|
||||
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
|
||||
++self->buf;
|
||||
++self->writeBuf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self);
|
||||
}
|
||||
@@ -524,6 +534,7 @@ inline Status t_hex(Parser3 *self) {
|
||||
('a' <= *self->buf && *self->buf <= 'f') ||
|
||||
('A' <= *self->buf && *self->buf <= 'F')) {
|
||||
++self->buf;
|
||||
++self->writeBuf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user