Handle string framing (i.e. \")

This commit is contained in:
2025-05-16 12:38:38 -04:00
parent aa25751c17
commit 545836f6d2

View File

@@ -151,6 +151,7 @@ enum Symbol : int8_t {
T_DUBQUOTE,
// Nonterminals
N_STRING, // Not including leading double quote, but including trailing quote
N_STRING_FROM_ESCAPE, // Immediately after a backslach
N_NUMBER,
N_VALUE,
N_ARRAY_VALUE_OR_END,
@@ -173,6 +174,7 @@ static const char *symbolNames[] = {
"T_S",
"T_DUBQUOTE",
"N_STRING",
"N_STRING_FROM_ESCAPE",
"N_NUMBER",
"N_VALUE",
"N_ARRAY_VALUE_OR_END",
@@ -366,16 +368,23 @@ private:
if (!parseLiteral("\"")) {
return false;
}
auto *result = (char *)memchr(buf, '"', len());
if (result == nullptr) {
return false;
char *result = buf;
for (;;) {
result = (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) {
return false;
}
if (result != buf && result[-1] == '\\') {
++result;
continue;
}
break;
}
int stringLen = result - buf;
callbacks->on_string_data(data, buf, stringLen);
buf += stringLen;
if (!parseLiteral("\"")) {
return false;
if (stringLen > 0) {
callbacks->on_string_data(data, buf, stringLen);
}
buf += stringLen + 1;
callbacks->on_end_string(data);
return true;
}
@@ -494,12 +503,38 @@ private:
callbacks->on_end_number(data);
return S_OK;
}
Status parse_string() {
auto *result = (char *)memchr(buf, '"', len());
if (result == nullptr) {
callbacks->on_string_data(data, buf, len());
buf += len();
return S_AGAIN;
Status parse_string(bool fromEscape) {
auto *result = buf;
if (fromEscape) {
if (*result == '\"') {
++result;
}
pop();
if (Status s = push({N_STRING})) {
return s;
}
}
for (;;) {
result = (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) {
callbacks->on_string_data(data, buf, len());
if (bufEnd[-1] == '\\') {
pop();
if (Status s = push({N_STRING_FROM_ESCAPE})) {
return s;
}
}
return S_AGAIN;
}
if (result != buf && result[-1] == '\\') {
++result;
if (result == bufEnd) {
callbacks->on_string_data(data, buf, len());
return S_AGAIN;
}
continue;
}
break;
}
int stringLen = result - buf;
if (stringLen > 0) {
@@ -531,7 +566,14 @@ private:
}
static Status string(Parser2 *self) {
if (Status s = self->parse_string()) {
if (Status s = self->parse_string(false)) {
return s;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status stringFromEscape(Parser2 *self) {
if (Status s = self->parse_string(true)) {
return s;
}
self->pop();
@@ -730,6 +772,7 @@ private:
/*T_S*/ singleChar<'s'>,
/*T_DUBQUOTE*/ dubquote,
/*N_STRING*/ string,
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
/*N_NUMBER*/ number,
/*N_VALUE*/ value,
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
@@ -813,10 +856,30 @@ Callbacks printCallbacks() {
return result;
}
Callbacks minifyCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *) { printf("{"); };
result.on_end_object = +[](void *) { printf("}\n"); };
result.on_begin_string = +[](void *) { printf("\""); };
result.on_string_data =
+[](void *, const char *buf, int len) { printf("%.*s", len, buf); };
result.on_end_string = +[](void *) { printf("\""); };
result.on_begin_array = +[](void *) { printf("["); };
result.on_end_array = +[](void *) { printf("]\n"); };
result.on_begin_number = +[](void *) {};
result.on_number_data =
+[](void *, const char *buf, int len) { printf("%.*s", len, buf); };
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *) { printf("true"); };
result.on_false_literal = +[](void *) { printf("false"); };
result.on_null_literal = +[](void *) { printf("null"); };
return result;
}
} // namespace
TEST_CASE("parser1") {
Callbacks c = printCallbacks();
Callbacks c = minifyCallbacks();
{
auto copy = json;
Parser1 parser(copy.data(), copy.length(), &c, nullptr);
@@ -830,7 +893,7 @@ TEST_CASE("parser1") {
}
TEST_CASE("parser2") {
Callbacks c = printCallbacks();
Callbacks c = minifyCallbacks();
{
auto copy = json;
Parser2 parser(&c, nullptr);