Add T_EOF
This commit is contained in:
@@ -48,3 +48,6 @@ add_executable(mytest src/test.cpp)
|
|||||||
target_include_directories(mytest PRIVATE include)
|
target_include_directories(mytest PRIVATE include)
|
||||||
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
|
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
|
||||||
doctest_discover_tests(mytest)
|
doctest_discover_tests(mytest)
|
||||||
|
|
||||||
|
add_executable(validate src/validate.cpp)
|
||||||
|
target_include_directories(validate PRIVATE include)
|
||||||
|
|||||||
69
src/parser.h
69
src/parser.h
@@ -26,6 +26,7 @@ enum Symbol : int8_t {
|
|||||||
T_L,
|
T_L,
|
||||||
T_S,
|
T_S,
|
||||||
T_DUBQUOTE,
|
T_DUBQUOTE,
|
||||||
|
T_EOF,
|
||||||
// Nonterminals
|
// Nonterminals
|
||||||
N_STRING, // Not including leading double quote, but including trailing quote
|
N_STRING, // Not including leading double quote, but including trailing quote
|
||||||
N_STRING_FROM_ESCAPE, // Immediately after a backslach
|
N_STRING_FROM_ESCAPE, // Immediately after a backslach
|
||||||
@@ -50,6 +51,7 @@ inline const char *symbolNames[] = {
|
|||||||
"T_L",
|
"T_L",
|
||||||
"T_S",
|
"T_S",
|
||||||
"T_DUBQUOTE",
|
"T_DUBQUOTE",
|
||||||
|
"T_EOF",
|
||||||
"N_STRING",
|
"N_STRING",
|
||||||
"N_STRING_FROM_ESCAPE",
|
"N_STRING_FROM_ESCAPE",
|
||||||
"N_NUMBER",
|
"N_NUMBER",
|
||||||
@@ -68,7 +70,7 @@ static_assert(sizeof(symbolNames) / sizeof(symbolNames[0]) == N_PAST_END);
|
|||||||
struct Parser2 {
|
struct Parser2 {
|
||||||
Parser2(const Callbacks *callbacks, void *data)
|
Parser2(const Callbacks *callbacks, void *data)
|
||||||
: callbacks(callbacks), data(data) {
|
: callbacks(callbacks), data(data) {
|
||||||
std::ignore = push({N_WHITESPACE, N_VALUE});
|
std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Status {
|
enum Status {
|
||||||
@@ -83,9 +85,10 @@ struct Parser2 {
|
|||||||
};
|
};
|
||||||
|
|
||||||
[[nodiscard]] Status parse(char *buf, int len) {
|
[[nodiscard]] Status parse(char *buf, int len) {
|
||||||
|
complete = len == 0;
|
||||||
this->buf = buf;
|
this->buf = buf;
|
||||||
this->bufEnd = buf + len;
|
this->bufEnd = buf + len;
|
||||||
return keepGoing(this);
|
return table[*(stackPtr - 1)](this);
|
||||||
}
|
}
|
||||||
|
|
||||||
Parser2(Parser2 const &) = delete;
|
Parser2(Parser2 const &) = delete;
|
||||||
@@ -96,6 +99,7 @@ struct Parser2 {
|
|||||||
static constexpr int kMaxStackSize = 1 << 10;
|
static constexpr int kMaxStackSize = 1 << 10;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
bool complete = false;
|
||||||
// Helpers
|
// Helpers
|
||||||
void maybeSkipWs() {
|
void maybeSkipWs() {
|
||||||
while (buf != bufEnd && tables.whitespace[*buf]) {
|
while (buf != bufEnd && tables.whitespace[*buf]) {
|
||||||
@@ -114,7 +118,7 @@ private:
|
|||||||
if (buf != bufBefore) {
|
if (buf != bufBefore) {
|
||||||
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
|
||||||
}
|
}
|
||||||
if (len() == 0) {
|
if (len() == 0 && !complete) {
|
||||||
return S_AGAIN;
|
return S_AGAIN;
|
||||||
}
|
}
|
||||||
callbacks->on_end_number(data);
|
callbacks->on_end_number(data);
|
||||||
@@ -134,6 +138,9 @@ private:
|
|||||||
for (;;) {
|
for (;;) {
|
||||||
result = (char *)memchr(result, '"', bufEnd - result);
|
result = (char *)memchr(result, '"', bufEnd - result);
|
||||||
if (result == nullptr) {
|
if (result == nullptr) {
|
||||||
|
if (complete) {
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
callbacks->on_string_data(data, buf, len());
|
callbacks->on_string_data(data, buf, len());
|
||||||
if (bufEnd[-1] == '\\') {
|
if (bufEnd[-1] == '\\') {
|
||||||
pop();
|
pop();
|
||||||
@@ -146,6 +153,9 @@ private:
|
|||||||
if (result != buf && result[-1] == '\\') {
|
if (result != buf && result[-1] == '\\') {
|
||||||
++result;
|
++result;
|
||||||
if (result == bufEnd) {
|
if (result == bufEnd) {
|
||||||
|
if (complete) {
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
callbacks->on_string_data(data, buf, len());
|
callbacks->on_string_data(data, buf, len());
|
||||||
return S_AGAIN;
|
return S_AGAIN;
|
||||||
}
|
}
|
||||||
@@ -172,6 +182,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static Status keepGoing(Parser2 *self) {
|
static Status keepGoing(Parser2 *self) {
|
||||||
|
assert(!self->complete);
|
||||||
if (self->len() == 0) {
|
if (self->len() == 0) {
|
||||||
return S_AGAIN;
|
return S_AGAIN;
|
||||||
}
|
}
|
||||||
@@ -184,9 +195,6 @@ private:
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
self->pop();
|
self->pop();
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
static Status stringFromEscape(Parser2 *self) {
|
static Status stringFromEscape(Parser2 *self) {
|
||||||
@@ -194,9 +202,6 @@ private:
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
self->pop();
|
self->pop();
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
static Status number(Parser2 *self) {
|
static Status number(Parser2 *self) {
|
||||||
@@ -204,9 +209,6 @@ private:
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
self->pop();
|
self->pop();
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
static Status value(Parser2 *self) {
|
static Status value(Parser2 *self) {
|
||||||
@@ -257,12 +259,15 @@ private:
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
self->pop();
|
if (tables.number[*self->buf]) {
|
||||||
self->callbacks->on_begin_number(self->data);
|
self->pop();
|
||||||
if (Status s = self->push({N_NUMBER})) {
|
self->callbacks->on_begin_number(self->data);
|
||||||
return s;
|
if (Status s = self->push({N_NUMBER})) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
return S_REJECT;
|
||||||
}
|
}
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
@@ -271,9 +276,6 @@ private:
|
|||||||
++self->buf;
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_end_array(self->data);
|
self->callbacks->on_end_array(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
} else {
|
} else {
|
||||||
self->pop();
|
self->pop();
|
||||||
@@ -289,9 +291,6 @@ private:
|
|||||||
++self->buf;
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_end_object(self->data);
|
self->callbacks->on_end_object(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
} else if (*self->buf == '"') {
|
} else if (*self->buf == '"') {
|
||||||
self->callbacks->on_begin_string(self->data);
|
self->callbacks->on_begin_string(self->data);
|
||||||
@@ -319,9 +318,6 @@ private:
|
|||||||
++self->buf;
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_end_array(self->data);
|
self->callbacks->on_end_array(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
@@ -340,9 +336,6 @@ private:
|
|||||||
++self->buf;
|
++self->buf;
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_end_object(self->data);
|
self->callbacks->on_end_object(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
@@ -351,9 +344,6 @@ private:
|
|||||||
if (*self->buf++ == 'e') {
|
if (*self->buf++ == 'e') {
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_true_literal(self->data);
|
self->callbacks->on_true_literal(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
@@ -362,9 +352,6 @@ private:
|
|||||||
if (*self->buf++ == 'e') {
|
if (*self->buf++ == 'e') {
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_false_literal(self->data);
|
self->callbacks->on_false_literal(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
@@ -373,9 +360,6 @@ private:
|
|||||||
if (*self->buf++ == 'l') {
|
if (*self->buf++ == 'l') {
|
||||||
self->pop();
|
self->pop();
|
||||||
self->callbacks->on_null_literal(self->data);
|
self->callbacks->on_null_literal(self->data);
|
||||||
if (self->empty()) {
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
return S_REJECT;
|
return S_REJECT;
|
||||||
@@ -397,12 +381,18 @@ private:
|
|||||||
}
|
}
|
||||||
static Status whitespace(Parser2 *self) {
|
static Status whitespace(Parser2 *self) {
|
||||||
self->maybeSkipWs();
|
self->maybeSkipWs();
|
||||||
if (self->len() == 0) {
|
if (self->len() == 0 && !self->complete) {
|
||||||
return S_AGAIN;
|
return S_AGAIN;
|
||||||
}
|
}
|
||||||
self->pop();
|
self->pop();
|
||||||
MUSTTAIL return keepGoing(self);
|
MUSTTAIL return keepGoing(self);
|
||||||
}
|
}
|
||||||
|
static Status eof(Parser2 *self) {
|
||||||
|
if (self->complete) {
|
||||||
|
return S_OK;
|
||||||
|
}
|
||||||
|
return S_REJECT;
|
||||||
|
}
|
||||||
|
|
||||||
static constexpr continuation table[] = {
|
static constexpr continuation table[] = {
|
||||||
/*T_COLON*/ singleChar<':'>,
|
/*T_COLON*/ singleChar<':'>,
|
||||||
@@ -415,6 +405,7 @@ private:
|
|||||||
/*T_L*/ singleChar<'l'>,
|
/*T_L*/ singleChar<'l'>,
|
||||||
/*T_S*/ singleChar<'s'>,
|
/*T_S*/ singleChar<'s'>,
|
||||||
/*T_DUBQUOTE*/ dubquote,
|
/*T_DUBQUOTE*/ dubquote,
|
||||||
|
/*T_EOF*/ eof,
|
||||||
/*N_STRING*/ string,
|
/*N_STRING*/ string,
|
||||||
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
|
/*N_STRING_FROM_ESCAPE*/ stringFromEscape,
|
||||||
/*N_NUMBER*/ number,
|
/*N_NUMBER*/ number,
|
||||||
|
|||||||
13
src/test.cpp
13
src/test.cpp
@@ -544,15 +544,24 @@ TEST_CASE("parser2") {
|
|||||||
for (; i < copy.length() - 1; ++i) {
|
for (; i < copy.length() - 1; ++i) {
|
||||||
REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
|
REQUIRE(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
|
||||||
}
|
}
|
||||||
CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_OK);
|
CHECK(parser.parse(copy.data() + i, 1) == Parser2::S_AGAIN);
|
||||||
|
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK);
|
||||||
puts("");
|
puts("");
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
std::string copy = "{\"x\": [], \"y\": {}}";
|
std::string copy = "{\"x\": [], \"y\": {}}";
|
||||||
Parser2 parser(&c, &state);
|
Parser2 parser(&c, &state);
|
||||||
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_OK);
|
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN);
|
||||||
|
CHECK(parser.parse(nullptr, 0) == Parser2::S_OK);
|
||||||
puts("");
|
puts("");
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
auto c = noopCallbacks();
|
||||||
|
std::string copy = "{\"a\":\"a";
|
||||||
|
Parser2 parser(&c, &state);
|
||||||
|
CHECK(parser.parse(copy.data(), copy.length()) == Parser2::S_AGAIN);
|
||||||
|
CHECK(parser.parse(nullptr, 0) == Parser2::S_REJECT);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("bench1") {
|
TEST_CASE("bench1") {
|
||||||
|
|||||||
Reference in New Issue
Block a user