Add T_EOF
This commit is contained in:
@@ -48,3 +48,6 @@ add_executable(mytest src/test.cpp)
|
||||
target_include_directories(mytest PRIVATE include)
|
||||
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
|
||||
doctest_discover_tests(mytest)
|
||||
|
||||
add_executable(validate src/validate.cpp)
|
||||
target_include_directories(validate PRIVATE include)
|
||||
|
||||
69
src/parser.h
69
src/parser.h
@@ -26,6 +26,7 @@ enum Symbol : int8_t {
|
||||
T_L,
|
||||
T_S,
|
||||
T_DUBQUOTE,
|
||||
T_EOF,
|
||||
// Nonterminals
|
||||
N_STRING, // Not including leading double quote, but including trailing quote
|
||||
N_STRING_FROM_ESCAPE, // Immediately after a backslach
|
||||
@@ -50,6 +51,7 @@ inline const char *symbolNames[] = {
|
||||
"T_L",
|
||||
"T_S",
|
||||
"T_DUBQUOTE",
|
||||
"T_EOF",
|
||||
"N_STRING",
|
||||
"N_STRING_FROM_ESCAPE",
|
||||
"N_NUMBER",
|
||||
@@ -68,7 +70,7 @@ static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END);
|
||||
struct Parser2 {
|
||||
Parser2(const Callbacks *callbacks, void *data)
|
||||
: callbacks(callbacks), data(data) {
|
||||
std::ignore = push({N_WHITESPACE, N_VALUE});
|
||||
std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
|
||||
}
|
||||
|
||||
enum Status {
|
||||
@@ -83,9 +85,10 @@ struct Parser2 {
|
||||
};
|
||||
|
||||
[[nodiscard]] Status parse(char *buf, int len) {
|
||||
complete = len == 0;
|
||||
this->buf = buf;
|
||||
this->bufEnd = buf + len;
|
||||
return keepGoing(this);
|
||||
return table[*(stackPtr - 1)](this);
|
||||
}
|
||||
|
||||
Parser2(Parser2 const &) = delete;
|
||||
@@ -96,6 +99,7 @@ struct Parser2 {
|
||||
static constexpr int kMaxStackSize = 1 << 10;
|
||||
|
||||
private:
|
||||
bool complete = false;
|
||||
// Helpers
|
||||
void maybeSkipWs() {
|
||||
while (buf != bufEnd && tables.whitespace[*buf]) {
|
||||
@@ -114,7 +118,7 @@ private:
|
||||
if (buf != bufBefore) {
|
||||
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
||||
}
|
||||
if (len() == 0) {
|
||||
if (len() == 0 && !complete) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
callbacks->on_end_number(data);
|
||||
@@ -134,6 +138,9 @@ private:
|
||||
for (;;) {
|
||||
result = (char *)memchr(result, '"', bufEnd - result);
|
||||
if (result == nullptr) {
|
||||
if (complete) {
|
||||
return S_REJECT;
|
||||
}
|
||||
callbacks->on_string_data(data, buf, len());
|
||||
if (bufEnd[-1] == '\\') {
|
||||
pop();
|
||||
@@ -146,6 +153,9 @@ private:
|
||||
if (result != buf && result[-1] == '\\') {
|
||||
++result;
|
||||
if (result == bufEnd) {
|
||||
if (complete) {
|
||||
return S_REJECT;
|
||||
}
|
||||
callbacks->on_string_data(data, buf, len());
|
||||
return S_AGAIN;
|
||||
}
|
||||
@@ -172,6 +182,7 @@ private:
|
||||
}
|
||||
|
||||
static Status keepGoing(Parser2 *self) {
|
||||
assert(!self->complete);
|
||||
if (self->len() == 0) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
@@ -184,9 +195,6 @@ private:
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status stringFromEscape(Parser2 *self) {
|
||||
@@ -194,9 +202,6 @@ private:
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status number(Parser2 *self) {
|
||||
@@ -204,9 +209,6 @@ private:
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status value(Parser2 *self) {
|
||||
@@ -257,12 +259,15 @@ private:
|
||||
}
|
||||
break;
|
||||
default:
|
||||
self->pop();
|
||||
self->callbacks->on_begin_number(self->data);
|
||||
if (Status s = self->push({N_NUMBER})) {
|
||||
return s;
|
||||
if (tables.number[*self->buf]) {
|
||||
self->pop();
|
||||
self->callbacks->on_begin_number(self->data);
|
||||
if (Status s = self->push({N_NUMBER})) {
|
||||
return s;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
return S_REJECT;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
@@ -271,9 +276,6 @@ private:
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else {
|
||||
self->pop();
|
||||
@@ -289,9 +291,6 @@ private:
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
} else if (*self->buf == '"') {
|
||||
self->callbacks->on_begin_string(self->data);
|
||||
@@ -319,9 +318,6 @@ private:
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_array(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
@@ -340,9 +336,6 @@ private:
|
||||
++self->buf;
|
||||
self->pop();
|
||||
self->callbacks->on_end_object(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
@@ -351,9 +344,6 @@ private:
|
||||
if (*self->buf++ == 'e') {
|
||||
self->pop();
|
||||
self->callbacks->on_true_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
@@ -362,9 +352,6 @@ private:
|
||||
if (*self->buf++ == 'e') {
|
||||
self->pop();
|
||||
self->callbacks->on_false_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
@@ -373,9 +360,6 @@ private:
|
||||
if (*self->buf++ == 'l') {
|
||||
self->pop();
|
||||
self->callbacks->on_null_literal(self->data);
|
||||
if (self->empty()) {
|
||||
return S_OK;
|
||||
}
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
return S_REJECT;
|
||||
@@ -397,12 +381,18 @@ private:
|
||||
}
|
||||
static Status whitespace(Parser2 *self) {
|
||||
self->maybeSkipWs();
|
||||
if (self->len() == 0) {
|
||||
if (self->len() == 0 && !self->complete) {
|
||||
return S_AGAIN;
|
||||
}
|
||||
self->pop();
|
||||
MUSTTAIL return keepGoing(self);
|
||||
}
|
||||
static Status eof(Parser2 *self) {
|
||||
if (self->complete) {
|
||||
return S_OK;
|
||||
}
|
||||
return S_REJECT;
|
||||
}
|
||||
|
||||
static constexpr continuation table[] = {
|
||||
/*T_COLON*/ singleChar<':'>,
|
||||
@@ -415,6 +405,7 @@ private:
|
||||
/*T_L*/ singleChar<'l'>,
|
||||
/*T_S*/ singleChar<'s'>,
|
||||
/*T_DUBQUOTE*/ dubquote,
|
||||
/*T_EOF*/ eof,
|
||||
/*N_STRING*/ string,
|
||||
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
|
||||
/*N_NUMBER*/ number,
|
||||
|
||||
13
src/test.cpp
13
src/test.cpp
@@ -544,15 +544,24 @@ TEST_CASE("parser2") {
|
||||
for (; i < copy.length() - 1; ++i) {
|
||||
REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
|
||||
}
|
||||
CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_OK);
|
||||
CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK);
|
||||
puts("");
|
||||
}
|
||||
{
|
||||
std::string copy = "{\"x\": [], \"y\": {}}";
|
||||
Parser2 parser(&c, &state);
|
||||
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_OK);
|
||||
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK);
|
||||
puts("");
|
||||
}
|
||||
{
|
||||
auto c = noopCallbacks();
|
||||
std::string copy = "{\"a\":\"a";
|
||||
Parser2 parser(&c, &state);
|
||||
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == Parser2::S_REJECT);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("bench1") {
|
||||
|
||||
Reference in New Issue
Block a user