Add T_EOF

This commit is contained in:
2025-05-17 17:15:01 -04:00
parent 37c860ce62
commit 925429f8e4
3 changed files with 44 additions and 41 deletions

View File

@@ -48,3 +48,6 @@ add_executable(mytest src/test.cpp)
target_include_directories(mytest PRIVATE include) target_include_directories(mytest PRIVATE include)
target_link_libraries(mytest PRIVATE doctest nanobench simdjson) target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
doctest_discover_tests(mytest) doctest_discover_tests(mytest)
add_executable(validate src/validate.cpp)
target_include_directories(validate PRIVATE include)

View File

@@ -26,6 +26,7 @@ enum Symbol : int8_t {
T_L, T_L,
T_S, T_S,
T_DUBQUOTE, T_DUBQUOTE,
T_EOF,
// Nonterminals // Nonterminals
N_STRING, // Not including leading double quote, but including trailing quote N_STRING, // Not including leading double quote, but including trailing quote
N_STRING_FROM_ESCAPE, // Immediately after a backslach N_STRING_FROM_ESCAPE, // Immediately after a backslach
@@ -50,6 +51,7 @@ inline const char *symbolNames[] = {
"T_L", "T_L",
"T_S", "T_S",
"T_DUBQUOTE", "T_DUBQUOTE",
"T_EOF",
"N_STRING", "N_STRING",
"N_STRING_FROM_ESCAPE", "N_STRING_FROM_ESCAPE",
"N_NUMBER", "N_NUMBER",
@@ -68,7 +70,7 @@ static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END);
struct Parser2 { struct Parser2 {
Parser2(const Callbacks *callbacks, void *data) Parser2(const Callbacks *callbacks, void *data)
: callbacks(callbacks), data(data) { : callbacks(callbacks), data(data) {
std::ignore = push({N_WHITESPACE, N_VALUE}); std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
} }
enum Status { enum Status {
@@ -83,9 +85,10 @@ struct Parser2 {
}; };
[[nodiscard]] Status parse(char *buf, int len) { [[nodiscard]] Status parse(char *buf, int len) {
complete = len == 0;
this->buf = buf; this->buf = buf;
this->bufEnd = buf + len; this->bufEnd = buf + len;
return keepGoing(this); return table[*(stackPtr - 1)](this);
} }
Parser2(Parser2 const &) = delete; Parser2(Parser2 const &) = delete;
@@ -96,6 +99,7 @@ struct Parser2 {
static constexpr int kMaxStackSize = 1 << 10; static constexpr int kMaxStackSize = 1 << 10;
private: private:
bool complete = false;
// Helpers // Helpers
void maybeSkipWs() { void maybeSkipWs() {
while (buf != bufEnd && tables.whitespace[*buf]) { while (buf != bufEnd && tables.whitespace[*buf]) {
@@ -114,7 +118,7 @@ private:
if (buf != bufBefore) { if (buf != bufBefore) {
callbacks->on_number_data(data, bufBefore, buf - bufBefore); callbacks->on_number_data(data, bufBefore, buf - bufBefore);
} }
if (len() == 0) { if (len() == 0 && !complete) {
return S_AGAIN; return S_AGAIN;
} }
callbacks->on_end_number(data); callbacks->on_end_number(data);
@@ -134,6 +138,9 @@ private:
for (;;) { for (;;) {
result = (char *)memchr(result, '"', bufEnd - result); result = (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) { if (result == nullptr) {
if (complete) {
return S_REJECT;
}
callbacks->on_string_data(data, buf, len()); callbacks->on_string_data(data, buf, len());
if (bufEnd[-1] == '\\') { if (bufEnd[-1] == '\\') {
pop(); pop();
@@ -146,6 +153,9 @@ private:
if (result != buf && result[-1] == '\\') { if (result != buf && result[-1] == '\\') {
++result; ++result;
if (result == bufEnd) { if (result == bufEnd) {
if (complete) {
return S_REJECT;
}
callbacks->on_string_data(data, buf, len()); callbacks->on_string_data(data, buf, len());
return S_AGAIN; return S_AGAIN;
} }
@@ -172,6 +182,7 @@ private:
} }
static Status keepGoing(Parser2 *self) { static Status keepGoing(Parser2 *self) {
assert(!self->complete);
if (self->len() == 0) { if (self->len() == 0) {
return S_AGAIN; return S_AGAIN;
} }
@@ -184,9 +195,6 @@ private:
return s; return s;
} }
self->pop(); self->pop();
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
static Status stringFromEscape(Parser2 *self) { static Status stringFromEscape(Parser2 *self) {
@@ -194,9 +202,6 @@ private:
return s; return s;
} }
self->pop(); self->pop();
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
static Status number(Parser2 *self) { static Status number(Parser2 *self) {
@@ -204,9 +209,6 @@ private:
return s; return s;
} }
self->pop(); self->pop();
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
static Status value(Parser2 *self) { static Status value(Parser2 *self) {
@@ -257,12 +259,15 @@ private:
} }
break; break;
default: default:
self->pop(); if (tables.number[*self->buf]) {
self->callbacks->on_begin_number(self->data); self->pop();
if (Status s = self->push({N_NUMBER})) { self->callbacks->on_begin_number(self->data);
return s; if (Status s = self->push({N_NUMBER})) {
return s;
}
break;
} }
break; return S_REJECT;
} }
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
@@ -271,9 +276,6 @@ private:
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_end_array(self->data); self->callbacks->on_end_array(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} else { } else {
self->pop(); self->pop();
@@ -289,9 +291,6 @@ private:
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_end_object(self->data); self->callbacks->on_end_object(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} else if (*self->buf == '"') { } else if (*self->buf == '"') {
self->callbacks->on_begin_string(self->data); self->callbacks->on_begin_string(self->data);
@@ -319,9 +318,6 @@ private:
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_end_array(self->data); self->callbacks->on_end_array(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
return S_REJECT; return S_REJECT;
@@ -340,9 +336,6 @@ private:
++self->buf; ++self->buf;
self->pop(); self->pop();
self->callbacks->on_end_object(self->data); self->callbacks->on_end_object(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
return S_REJECT; return S_REJECT;
@@ -351,9 +344,6 @@ private:
if (*self->buf++ == 'e') { if (*self->buf++ == 'e') {
self->pop(); self->pop();
self->callbacks->on_true_literal(self->data); self->callbacks->on_true_literal(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
return S_REJECT; return S_REJECT;
@@ -362,9 +352,6 @@ private:
if (*self->buf++ == 'e') { if (*self->buf++ == 'e') {
self->pop(); self->pop();
self->callbacks->on_false_literal(self->data); self->callbacks->on_false_literal(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
return S_REJECT; return S_REJECT;
@@ -373,9 +360,6 @@ private:
if (*self->buf++ == 'l') { if (*self->buf++ == 'l') {
self->pop(); self->pop();
self->callbacks->on_null_literal(self->data); self->callbacks->on_null_literal(self->data);
if (self->empty()) {
return S_OK;
}
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
return S_REJECT; return S_REJECT;
@@ -397,12 +381,18 @@ private:
} }
static Status whitespace(Parser2 *self) { static Status whitespace(Parser2 *self) {
self->maybeSkipWs(); self->maybeSkipWs();
if (self->len() == 0) { if (self->len() == 0 && !self->complete) {
return S_AGAIN; return S_AGAIN;
} }
self->pop(); self->pop();
MUSTTAIL return keepGoing(self); MUSTTAIL return keepGoing(self);
} }
static Status eof(Parser2 *self) {
if (self->complete) {
return S_OK;
}
return S_REJECT;
}
static constexpr continuation table[] = { static constexpr continuation table[] = {
/*T_COLON*/ singleChar<':'>, /*T_COLON*/ singleChar<':'>,
@@ -415,6 +405,7 @@ private:
/*T_L*/ singleChar<'l'>, /*T_L*/ singleChar<'l'>,
/*T_S*/ singleChar<'s'>, /*T_S*/ singleChar<'s'>,
/*T_DUBQUOTE*/ dubquote, /*T_DUBQUOTE*/ dubquote,
/*T_EOF*/ eof,
/*N_STRING*/ string, /*N_STRING*/ string,
/*N_STRING_FROM_ESCAPE*/ stringFromEscape, /*N_STRING_FROM_ESCAPE*/ stringFromEscape,
/*N_NUMBER*/ number, /*N_NUMBER*/ number,

View File

@@ -544,15 +544,24 @@ TEST_CASE("parser2") {
for (; i < copy.length() - 1; ++i) { for (; i < copy.length() - 1; ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN); REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
} }
CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_OK); CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK);
puts(""); puts("");
} }
{ {
std::string copy = "{\"x\": [], \"y\": {}}"; std::string copy = "{\"x\": [], \"y\": {}}";
Parser2 parser(&c, &state); Parser2 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_OK); CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK);
puts(""); puts("");
} }
{
auto c = noopCallbacks();
std::string copy = "{\"a\":\"a";
Parser2 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == Parser2::S_REJECT);
}
} }
TEST_CASE("bench1") { TEST_CASE("bench1") {