Handle string framing (i.e. \")

This commit is contained in:
2025-05-16 12:38:38 -04:00
parent aa25751c17
commit 545836f6d2

View File

@@ -151,6 +151,7 @@ enum Symbol : int8_t {
T_DUBQUOTE, T_DUBQUOTE,
// Nonterminals // Nonterminals
N_STRING, // Not including leading double quote, but including trailing quote N_STRING, // Not including leading double quote, but including trailing quote
N_STRING_FROM_ESCAPE, // Immediately after a backslach
N_NUMBER, N_NUMBER,
N_VALUE, N_VALUE,
N_ARRAY_VALUE_OR_END, N_ARRAY_VALUE_OR_END,
@@ -173,6 +174,7 @@ static const char *symbolNames[] = {
"T_S", "T_S",
"T_DUBQUOTE", "T_DUBQUOTE",
"N_STRING", "N_STRING",
"N_STRING_FROM_ESCAPE",
"N_NUMBER", "N_NUMBER",
"N_VALUE", "N_VALUE",
"N_ARRAY_VALUE_OR_END", "N_ARRAY_VALUE_OR_END",
@@ -366,16 +368,23 @@ private:
if (!parseLiteral("\"")) { if (!parseLiteral("\"")) {
return false; return false;
} }
auto *result = (char *)memchr(buf, '"', len()); char *result = buf;
if (result == nullptr) { for (;;) {
return false; result = (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) {
return false;
}
if (result != buf && result[-1] == '\\') {
++result;
continue;
}
break;
} }
int stringLen = result - buf; int stringLen = result - buf;
callbacks->on_string_data(data, buf, stringLen); if (stringLen > 0) {
buf += stringLen; callbacks->on_string_data(data, buf, stringLen);
if (!parseLiteral("\"")) {
return false;
} }
buf += stringLen + 1;
callbacks->on_end_string(data); callbacks->on_end_string(data);
return true; return true;
} }
@@ -494,12 +503,38 @@ private:
callbacks->on_end_number(data); callbacks->on_end_number(data);
return S_OK; return S_OK;
} }
Status parse_string() { Status parse_string(bool fromEscape) {
auto *result = (char *)memchr(buf, '"', len()); auto *result = buf;
if (result == nullptr) { if (fromEscape) {
callbacks->on_string_data(data, buf, len()); if (*result == '\"') {
buf += len(); ++result;
return S_AGAIN; }
pop();
if (Status s = push({N_STRING})) {
return s;
}
}
for (;;) {
result = (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) {
callbacks->on_string_data(data, buf, len());
if (bufEnd[-1] == '\\') {
pop();
if (Status s = push({N_STRING_FROM_ESCAPE})) {
return s;
}
}
return S_AGAIN;
}
if (result != buf && result[-1] == '\\') {
++result;
if (result == bufEnd) {
callbacks->on_string_data(data, buf, len());
return S_AGAIN;
}
continue;
}
break;
} }
int stringLen = result - buf; int stringLen = result - buf;
if (stringLen > 0) { if (stringLen > 0) {
@@ -531,7 +566,14 @@ private:
} }
static Status string(Parser2 *self) { static Status string(Parser2 *self) {
if (Status s = self->parse_string()) { if (Status s = self->parse_string(false)) {
return s;
}
self->pop();
MUSTTAIL return keepGoing(self);
}
static Status stringFromEscape(Parser2 *self) {
if (Status s = self->parse_string(true)) {
return s; return s;
} }
self->pop(); self->pop();
@@ -730,6 +772,7 @@ private:
/*T_S*/ singleChar<'s'>, /*T_S*/ singleChar<'s'>,
/*T_DUBQUOTE*/ dubquote, /*T_DUBQUOTE*/ dubquote,
/*N_STRING*/ string, /*N_STRING*/ string,
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
/*N_NUMBER*/ number, /*N_NUMBER*/ number,
/*N_VALUE*/ value, /*N_VALUE*/ value,
/*N_ARRAY_VALUE_OR_END*/ arrayOrEnd, /*N_ARRAY_VALUE_OR_END*/ arrayOrEnd,
@@ -813,10 +856,30 @@ Callbacks printCallbacks() {
return result; return result;
} }
Callbacks minifyCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *) { printf("{"); };
result.on_end_object = +[](void *) { printf("}\n"); };
result.on_begin_string = +[](void *) { printf("\""); };
result.on_string_data =
+[](void *, const char *buf, int len) { printf("%.*s", len, buf); };
result.on_end_string = +[](void *) { printf("\""); };
result.on_begin_array = +[](void *) { printf("["); };
result.on_end_array = +[](void *) { printf("]\n"); };
result.on_begin_number = +[](void *) {};
result.on_number_data =
+[](void *, const char *buf, int len) { printf("%.*s", len, buf); };
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *) { printf("true"); };
result.on_false_literal = +[](void *) { printf("false"); };
result.on_null_literal = +[](void *) { printf("null"); };
return result;
}
} // namespace } // namespace
TEST_CASE("parser1") { TEST_CASE("parser1") {
Callbacks c = printCallbacks(); Callbacks c = minifyCallbacks();
{ {
auto copy = json; auto copy = json;
Parser1 parser(copy.data(), copy.length(), &c, nullptr); Parser1 parser(copy.data(), copy.length(), &c, nullptr);
@@ -830,7 +893,7 @@ TEST_CASE("parser1") {
} }
TEST_CASE("parser2") { TEST_CASE("parser2") {
Callbacks c = printCallbacks(); Callbacks c = minifyCallbacks();
{ {
auto copy = json; auto copy = json;
Parser2 parser(&c, nullptr); Parser2 parser(&c, nullptr);