diff --git a/src/test.cpp b/src/test.cpp index d9e4bb2..3b93de9 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -151,6 +151,7 @@ enum Symbol : int8_t { T_DUBQUOTE, // Nonterminals N_STRING, // Not including leading double quote, but including trailing quote + N_STRING_FROM_ESCAPE, // Immediately after a backslach N_NUMBER, N_VALUE, N_ARRAY_VALUE_OR_END, @@ -173,6 +174,7 @@ static const char *symbolNames[] = { "T_S", "T_DUBQUOTE", "N_STRING", + "N_STRING_FROM_ESCAPE", "N_NUMBER", "N_VALUE", "N_ARRAY_VALUE_OR_END", @@ -366,16 +368,23 @@ private: if (!parseLiteral("\"")) { return false; } - auto *result = (char *)memchr(buf, '"', len()); - if (result == nullptr) { - return false; + char *result = buf; + for (;;) { + result = (char *)memchr(result, '"', bufEnd - result); + if (result == nullptr) { + return false; + } + if (result != buf && result[-1] == '\\') { + ++result; + continue; + } + break; } int stringLen = result - buf; - callbacks->on_string_data(data, buf, stringLen); - buf += stringLen; - if (!parseLiteral("\"")) { - return false; + if (stringLen > 0) { + callbacks->on_string_data(data, buf, stringLen); } + buf += stringLen + 1; callbacks->on_end_string(data); return true; } @@ -494,12 +503,38 @@ private: callbacks->on_end_number(data); return S_OK; } - Status parse_string() { - auto *result = (char *)memchr(buf, '"', len()); - if (result == nullptr) { - callbacks->on_string_data(data, buf, len()); - buf += len(); - return S_AGAIN; + Status parse_string(bool fromEscape) { + auto *result = buf; + if (fromEscape) { + if (*result == '\"') { + ++result; + } + pop(); + if (Status s = push({N_STRING})) { + return s; + } + } + for (;;) { + result = (char *)memchr(result, '"', bufEnd - result); + if (result == nullptr) { + callbacks->on_string_data(data, buf, len()); + if (bufEnd[-1] == '\\') { + pop(); + if (Status s = push({N_STRING_FROM_ESCAPE})) { + return s; + } + } + return S_AGAIN; + } + if (result != buf && result[-1] == '\\') { + ++result; + if (result == bufEnd) { + callbacks->on_string_data(data, buf, len()); + return S_AGAIN; + } + continue; + } + break; } int stringLen = result - buf; if (stringLen > 0) { @@ -531,7 +566,14 @@ private: } static Status string(Parser2 *self) { - if (Status s = self->parse_string()) { + if (Status s = self->parse_string(false)) { + return s; + } + self->pop(); + MUSTTAIL return keepGoing(self); + } + static Status stringFromEscape(Parser2 *self) { + if (Status s = self->parse_string(true)) { return s; } self->pop(); @@ -730,6 +772,7 @@ private: /*T_S*/ singleChar<'s'>, /*T_DUBQUOTE*/ dubquote, /*N_STRING*/ string, + /*N_STRING_FROM_ESCAPE*/ stringFromEscape, /*N_NUMBER*/ number, /*N_VALUE*/ value, /*N_ARRAY_VALUE_OR_END*/ arrayOrEnd, @@ -813,10 +856,30 @@ Callbacks printCallbacks() { return result; } +Callbacks minifyCallbacks() { + Callbacks result; + result.on_begin_object = +[](void *) { printf("{"); }; + result.on_end_object = +[](void *) { printf("}\n"); }; + result.on_begin_string = +[](void *) { printf("\""); }; + result.on_string_data = + +[](void *, const char *buf, int len) { printf("%.*s", len, buf); }; + result.on_end_string = +[](void *) { printf("\""); }; + result.on_begin_array = +[](void *) { printf("["); }; + result.on_end_array = +[](void *) { printf("]\n"); }; + result.on_begin_number = +[](void *) {}; + result.on_number_data = + +[](void *, const char *buf, int len) { printf("%.*s", len, buf); }; + result.on_end_number = +[](void *) {}; + result.on_true_literal = +[](void *) { printf("true"); }; + result.on_false_literal = +[](void *) { printf("false"); }; + result.on_null_literal = +[](void *) { printf("null"); }; + return result; +} + } // namespace TEST_CASE("parser1") { - Callbacks c = printCallbacks(); + Callbacks c = minifyCallbacks(); { auto copy = json; Parser1 parser(copy.data(), copy.length(), &c, nullptr); @@ -830,7 +893,7 @@ TEST_CASE("parser1") { } TEST_CASE("parser2") { - Callbacks c = printCallbacks(); + Callbacks c = minifyCallbacks(); { auto copy = json; Parser2 parser(&c, nullptr);