diff --git a/CMakeLists.txt b/CMakeLists.txt index ce89c5d..4dbd385 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,3 +48,6 @@ add_executable(mytest src/test.cpp) target_include_directories(mytest PRIVATE include) target_link_libraries(mytest PRIVATE doctest nanobench simdjson) doctest_discover_tests(mytest) + +add_executable(validate src/validate.cpp) +target_include_directories(validate PRIVATE include) diff --git a/src/parser.h b/src/parser.h index 147af3a..16e72a4 100644 --- a/src/parser.h +++ b/src/parser.h @@ -26,6 +26,7 @@ enum Symbol : int8_t { T_L, T_S, T_DUBQUOTE, + T_EOF, // Nonterminals N_STRING, // Not including leading double quote, but including trailing quote N_STRING_FROM_ESCAPE, // Immediately after a backslach @@ -50,6 +51,7 @@ inline const char *symbolNames[] = { "T_L", "T_S", "T_DUBQUOTE", + "T_EOF", "N_STRING", "N_STRING_FROM_ESCAPE", "N_NUMBER", @@ -68,7 +70,7 @@ static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END); struct Parser2 { Parser2(const Callbacks *callbacks, void *data) : callbacks(callbacks), data(data) { - std::ignore = push({N_WHITESPACE, N_VALUE}); + std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF}); } enum Status { @@ -83,9 +85,10 @@ struct Parser2 { }; [[nodiscard]] Status parse(char *buf, int len) { + complete = len == 0; this->buf = buf; this->bufEnd = buf + len; - return keepGoing(this); + return table[*(stackPtr - 1)](this); } Parser2(Parser2 const &) = delete; @@ -96,6 +99,7 @@ struct Parser2 { static constexpr int kMaxStackSize = 1 << 10; private: + bool complete = false; // Helpers void maybeSkipWs() { while (buf != bufEnd && tables.whitespace[*buf]) { @@ -114,7 +118,7 @@ private: if (buf != bufBefore) { callbacks->on_number_data(data, bufBefore, buf - bufBefore); } - if (len() == 0) { + if (len() == 0 && !complete) { return S_AGAIN; } callbacks->on_end_number(data); @@ -134,6 +138,9 @@ private: for (;;) { result = (char *)memchr(result, '"', bufEnd - result); if (result == nullptr) { + if (complete) { + return S_REJECT; + } callbacks->on_string_data(data, buf, len()); if (bufEnd[-1] == '\\') { pop(); @@ -146,6 +153,9 @@ private: if (result != buf && result[-1] == '\\') { ++result; if (result == bufEnd) { + if (complete) { + return S_REJECT; + } callbacks->on_string_data(data, buf, len()); return S_AGAIN; } @@ -172,6 +182,7 @@ private: } static Status keepGoing(Parser2 *self) { + assert(!self->complete); if (self->len() == 0) { return S_AGAIN; } @@ -184,9 +195,6 @@ private: return s; } self->pop(); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } static Status stringFromEscape(Parser2 *self) { @@ -194,9 +202,6 @@ private: return s; } self->pop(); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } static Status number(Parser2 *self) { @@ -204,9 +209,6 @@ private: return s; } self->pop(); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } static Status value(Parser2 *self) { @@ -257,12 +259,15 @@ private: } break; default: - self->pop(); - self->callbacks->on_begin_number(self->data); - if (Status s = self->push({N_NUMBER})) { - return s; + if (tables.number[*self->buf]) { + self->pop(); + self->callbacks->on_begin_number(self->data); + if (Status s = self->push({N_NUMBER})) { + return s; + } + break; } - break; + return S_REJECT; } MUSTTAIL return keepGoing(self); } @@ -271,9 +276,6 @@ private: ++self->buf; self->pop(); self->callbacks->on_end_array(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } else { self->pop(); @@ -289,9 +291,6 @@ private: ++self->buf; self->pop(); self->callbacks->on_end_object(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } else if (*self->buf == '"') { self->callbacks->on_begin_string(self->data); @@ -319,9 +318,6 @@ private: ++self->buf; self->pop(); self->callbacks->on_end_array(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } return S_REJECT; @@ -340,9 +336,6 @@ private: ++self->buf; self->pop(); self->callbacks->on_end_object(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } return S_REJECT; @@ -351,9 +344,6 @@ private: if (*self->buf++ == 'e') { self->pop(); self->callbacks->on_true_literal(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } return S_REJECT; @@ -362,9 +352,6 @@ private: if (*self->buf++ == 'e') { self->pop(); self->callbacks->on_false_literal(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } return S_REJECT; @@ -373,9 +360,6 @@ private: if (*self->buf++ == 'l') { self->pop(); self->callbacks->on_null_literal(self->data); - if (self->empty()) { - return S_OK; - } MUSTTAIL return keepGoing(self); } return S_REJECT; @@ -397,12 +381,18 @@ private: } static Status whitespace(Parser2 *self) { self->maybeSkipWs(); - if (self->len() == 0) { + if (self->len() == 0 && !self->complete) { return S_AGAIN; } self->pop(); MUSTTAIL return keepGoing(self); } + static Status eof(Parser2 *self) { + if (self->complete) { + return S_OK; + } + return S_REJECT; + } static constexpr continuation table[] = { /*T_COLON*/ singleChar<':'>, @@ -415,6 +405,7 @@ private: /*T_L*/ singleChar<'l'>, /*T_S*/ singleChar<'s'>, /*T_DUBQUOTE*/ dubquote, + /*T_EOF*/ eof, /*N_STRING*/ string, /*N_STRING_FROM_ESCAPE*/ stringFromEscape, /*N_NUMBER*/ number, diff --git a/src/test.cpp b/src/test.cpp index 88b2aaf..a846cc2 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -544,15 +544,24 @@ TEST_CASE("parser2") { for (; i < copy.length() - 1; ++i) { REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN); } - CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_OK); + CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN); + CHECK(parser.parse(nullptr, 0) == Parser2::S_OK); puts(""); } { std::string copy = "{\"x\": [], \"y\": {}}"; Parser2 parser(&c, &state); - CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_OK); + CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN); + CHECK(parser.parse(nullptr, 0) == Parser2::S_OK); puts(""); } + { + auto c = noopCallbacks(); + std::string copy = "{\"a\":\"a"; + Parser2 parser(&c, &state); + CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN); + CHECK(parser.parse(nullptr, 0) == Parser2::S_REJECT); + } } TEST_CASE("bench1") {