Remove Parser1

This commit is contained in:
2025-05-18 11:54:37 -04:00
parent c65cf44cc8
commit 9f6a04c3e8

View File

@@ -5,7 +5,6 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <utility>
#include <doctest.h> #include <doctest.h>
#include <nanobench.h> #include <nanobench.h>
@@ -119,244 +118,6 @@
namespace { namespace {
// Straightforward recursive descent that doesn't handle string escaping and
// all numbers. Does not validate utf-8. May stack overflow on deeply nested
// json documents
struct Parser1 {
Parser1(char *buf, int len, const Callbacks *callbacks, void *data)
: buf(buf), bufEnd(buf + len), callbacks(callbacks), data(data) {}
// Returns false to reject
[[nodiscard]] bool parse() { return parse_element(); }
Parser1(Parser1 const &) = delete;
Parser1 &operator=(Parser1 const &) = delete;
Parser1(Parser1 &&) = delete;
Parser1 &operator=(Parser1 &&) = delete;
private:
char *buf;
char *bufEnd;
const Callbacks *const callbacks;
void *const data;
int len() const { return bufEnd - buf; }
// Helpers
void maybeSkipWs() {
while (buf != bufEnd && tables.whitespace[*buf]) {
++buf;
}
}
bool parseLiteral(const char *literal) {
const int litLen = strlen(literal);
if (len() < litLen) {
return false;
}
return memcmp(std::exchange(buf, buf + litLen), literal, litLen) == 0;
}
// functions corresponding to productions
bool parse_element() {
maybeSkipWs();
if (len() == 0) {
return false;
}
if (*buf == '{') {
if (!parse_object()) {
return false;
}
} else if (*buf == '[') {
if (!parse_array()) {
return false;
}
} else if (*buf == '"') {
if (!parse_string()) {
return false;
}
} else if (*buf == 't') {
if (!parse_true()) {
return false;
}
} else if (*buf == 'f') {
if (!parse_false()) {
return false;
}
} else if (*buf == 'n') {
if (!parse_null()) {
return false;
}
} else {
if (!parse_number()) {
return false;
}
}
maybeSkipWs();
return true;
}
bool parse_object() {
if (!parseLiteral("{")) {
return false;
}
callbacks->on_begin_object(data);
maybeSkipWs();
if (len() == 0) {
return false;
}
if (*buf != '}') {
if (!parse_members()) {
}
}
if (!parseLiteral("}")) {
return false;
}
callbacks->on_end_object(data);
return true;
}
bool parse_members() {
begin:
if (!parse_member()) {
return false;
}
if (len() == 0) {
return false;
}
if (*buf == ',') {
if (!parseLiteral(",")) {
return false;
}
goto begin; // tail call
}
return true;
}
bool parse_member() {
maybeSkipWs();
if (!parse_string()) {
return false;
}
maybeSkipWs();
if (!parseLiteral(":")) {
return false;
}
if (!parse_element()) {
return false;
}
return true;
}
bool parse_array() {
if (!parseLiteral("[")) {
return false;
}
callbacks->on_begin_array(data);
maybeSkipWs();
if (len() == 0) {
return false;
}
if (*buf != ']') {
if (!parse_elements()) {
return false;
}
}
if (!parseLiteral("]")) {
return false;
}
callbacks->on_end_array(data);
return true;
}
bool parse_elements() {
begin:
if (!parse_element()) {
return false;
}
if (len() == 0) {
return false;
}
if (*buf == ',') {
if (!parseLiteral(",")) {
return false;
}
goto begin; // tail call
}
return true;
}
bool parse_string() {
callbacks->on_begin_string(data);
if (!parseLiteral("\"")) {
return false;
}
char *result = buf;
for (;;) {
result = (char *)memchr(result, '"', bufEnd - result);
if (result == nullptr) {
return false;
}
if (result != buf && result[-1] == '\\') {
++result;
continue;
}
break;
}
int stringLen = result - buf;
if (stringLen > 0) {
callbacks->on_string_data(data, buf, stringLen);
}
buf += stringLen + 1;
callbacks->on_end_string(data);
return true;
}
bool parse_number() {
callbacks->on_begin_number(data);
char *const bufBefore = buf;
for (;;) {
if (len() == 0) {
return false;
}
if (tables.number[*buf]) {
++buf;
} else {
break;
}
}
if (buf == bufBefore) {
return false;
}
callbacks->on_number_data(data, bufBefore, buf - bufBefore);
callbacks->on_end_number(data);
return true;
}
bool parse_true() {
if (!parseLiteral("true")) {
return false;
}
callbacks->on_true_literal(data);
return true;
}
bool parse_false() {
if (!parseLiteral("false")) {
return false;
}
callbacks->on_false_literal(data);
return true;
}
bool parse_null() {
if (!parseLiteral("null")) {
return false;
}
callbacks->on_null_literal(data);
return true;
}
};
#ifndef __has_attribute #ifndef __has_attribute
#define __has_attribute(x) 0 #define __has_attribute(x) 0
#endif #endif
@@ -517,23 +278,6 @@ Callbacks noopCallbacks() {
} // namespace } // namespace
TEST_CASE("parser1") {
Callbacks c = minifyCallbacks();
MinifyState state;
{
auto copy = json;
Parser1 parser(copy.data(), copy.length(), &c, &state);
CHECK(parser.parse());
puts("");
}
{
std::string copy = "{\"x\": [], \"y\": {}}";
Parser1 parser(copy.data(), copy.length(), &c, &state);
CHECK(parser.parse());
puts("");
}
}
TEST_CASE("parser2") { TEST_CASE("parser2") {
Callbacks c = minifyCallbacks(); Callbacks c = minifyCallbacks();
MinifyState state; MinifyState state;
@@ -571,18 +315,6 @@ TEST_CASE("parser2") {
} }
} }
TEST_CASE("bench1") {
auto c = noopCallbacks();
ankerl::nanobench::Bench bench;
bench.batch(json.size());
bench.unit("byte");
bench.run("parser1", [&]() {
auto copy = json;
Parser1 parser(copy.data(), copy.length(), &c, nullptr);
bench.doNotOptimizeAway(parser.parse());
});
}
TEST_CASE("bench2") { TEST_CASE("bench2") {
auto c = noopCallbacks(); auto c = noopCallbacks();
ankerl::nanobench::Bench bench; ankerl::nanobench::Bench bench;