Wee fuzz test and associated bug fixes

This commit is contained in:
2025-05-18 13:46:00 -04:00
parent 9543aba2ad
commit b7f6ed1c9c
5 changed files with 179 additions and 106 deletions

View File

@@ -49,5 +49,10 @@ target_include_directories(mytest PRIVATE include)
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
doctest_discover_tests(mytest)
add_executable(fuzz src/fuzz.cpp)
target_include_directories(fuzz PRIVATE include)
target_compile_options(fuzz PRIVATE -fsanitize=fuzzer)
target_link_options(fuzz PRIVATE -fsanitize=fuzzer)
add_executable(validate src/validate.cpp)
target_include_directories(validate PRIVATE include)

51
src/fuzz.cpp Normal file
View File

@@ -0,0 +1,51 @@
#include "minify.h"
#include "parser3.h"
void testStreaming(std::string const &json) {
MinifyState streaming;
MinifyState batch;
auto c = minifyCallbacks();
parser3::Status streamingStatus = parser3::S_OK;
parser3::Status batchStatus = parser3::S_OK;
do {
auto copy = json;
parser3::Parser3 parser(&c, &streaming);
for (int i = 0; i < copy.size(); ++i) {
auto s = parser.parse(copy.data() + i, 1);
if (s != parser3::S_AGAIN) {
streamingStatus = s;
break;
}
}
auto s = parser.parse(nullptr, 0);
if (s != parser3::S_OK) {
streamingStatus = s;
break;
}
} while (0);
do {
auto copy = json;
parser3::Parser3 parser(&c, &batch);
auto s = parser.parse(copy.data(), copy.size());
if (s != parser3::S_AGAIN) {
batchStatus = s;
break;
}
s = parser.parse(nullptr, 0);
if (s != parser3::S_OK) {
batchStatus = s;
break;
}
} while (0);
if (streamingStatus != batchStatus) {
abort();
}
if (streaming.result != batch.result) {
abort();
}
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
testStreaming(std::string((const char *)data, size));
return 0;
}

94
src/minify.h Normal file
View File

@@ -0,0 +1,94 @@
#pragma once
#include "weaseljson.h"
#include <cstdint>
#include <string>
#include <vector>
struct MinifyState {
bool isKey = false;
struct Cursor {
int64_t index;
bool isObject;
};
std::string result;
void on_begin_value() {
if (!stack.empty()) {
auto &back = stack.back();
if (back.isObject && back.index % 2 == 0 && back.index > 0) {
result.append(",");
}
if (back.isObject && back.index % 2 == 1 && back.index > 0) {
result.append(":");
}
if (!back.isObject && back.index > 0) {
result.append(",");
}
++back.index;
}
}
std::vector<Cursor> stack;
};
Callbacks minifyCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->stack.push_back({0, true});
state->result.append("{");
};
result.on_end_object = +[](void *p) {
auto *state = (MinifyState *)p;
state->stack.pop_back();
state->result.append("}");
};
result.on_begin_string = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("\"");
};
result.on_string_data = +[](void *p, const char *buf, int len) {
auto *state = (MinifyState *)p;
state->result.append(std::string(buf, len));
};
result.on_end_string = +[](void *p) {
auto *state = (MinifyState *)p;
state->result.append("\"");
};
result.on_begin_array = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->stack.push_back({0, false});
state->result.append("[");
};
result.on_end_array = +[](void *p) {
auto *state = (MinifyState *)p;
state->stack.pop_back();
state->result.append("]");
};
result.on_begin_number = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
};
result.on_number_data = +[](void *p, const char *buf, int len) {
auto *state = (MinifyState *)p;
state->result.append(std::string(buf, len));
};
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("true");
};
result.on_false_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("false");
};
result.on_null_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("null");
};
return result;
}

View File

@@ -135,14 +135,12 @@ inline Status n_value(Parser3 *self) {
switch (*self->buf) {
case '{':
self->pop();
self->callbacks->on_begin_object(self->data);
if (auto s = self->push({N_OBJECT})) {
return s;
}
break;
case '[':
self->pop();
self->callbacks->on_begin_array(self->data);
if (auto s = self->push({N_ARRAY})) {
return s;
}
@@ -165,7 +163,6 @@ inline Status n_value(Parser3 *self) {
case '9':
case '-':
self->pop();
self->callbacks->on_begin_number(self->data);
if (auto s = self->push({N_NUMBER})) {
return s;
}
@@ -204,6 +201,7 @@ inline Status n_object(Parser3 *self) {
if (*self->buf != '{') {
return S_REJECT;
}
self->callbacks->on_begin_object(self->data);
++self->buf;
self->pop();
if (auto s = self->push({N_WHITESPACE, N_OBJECT2})) {
@@ -264,6 +262,7 @@ inline Status n_array(Parser3 *self) {
if (*self->buf != '[') {
return S_REJECT;
}
self->callbacks->on_begin_array(self->data);
++self->buf;
self->pop();
if (auto s = self->push({N_WHITESPACE, N_ARRAY2})) {
@@ -494,6 +493,7 @@ inline Status n_integer(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
self->callbacks->on_begin_number(self->data);
switch (*self->buf) {
case '0':
self->callbacks->on_number_data(self->data, self->buf, 1);
@@ -685,6 +685,9 @@ inline Status n_whitespace(Parser3 *self) {
}
inline Status n_true(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf == 'e') {
++self->buf;
self->pop();
@@ -695,6 +698,9 @@ inline Status n_true(Parser3 *self) {
}
inline Status n_false(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf == 'e') {
++self->buf;
self->pop();
@@ -705,6 +711,9 @@ inline Status n_false(Parser3 *self) {
}
inline Status n_null(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf == 'l') {
++self->buf;
self->pop();
@@ -715,6 +724,9 @@ inline Status n_null(Parser3 *self) {
}
template <char kChar> inline Status singleChar(Parser3 *self) {
if (self->len() == 0) {
return S_REJECT;
}
if (*self->buf == kChar) {
++self->buf;
self->pop();

View File

@@ -1,6 +1,5 @@
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdio>
#include <cstring>
@@ -10,6 +9,7 @@
#include <nanobench.h>
#include <simdjson.h>
#include "minify.h"
#include "parser3.h"
// This is the JSON grammar in McKeeman Form.
@@ -167,92 +167,21 @@ Callbacks printCallbacks() {
return result;
}
struct MinifyState {
bool isKey = false;
struct Cursor {
int64_t index;
bool isObject;
};
std::string result;
void on_begin_value() {
if (!stack.empty()) {
auto &back = stack.back();
if (back.isObject && back.index % 2 == 0 && back.index > 0) {
result.append(",");
}
if (back.isObject && back.index % 2 == 1 && back.index > 0) {
result.append(":");
}
if (!back.isObject && back.index > 0) {
result.append(",");
}
++back.index;
}
}
std::vector<Cursor> stack;
};
Callbacks minifyCallbacks() {
Callbacks noopCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->stack.push_back({0, true});
state->result.append("{");
};
result.on_end_object = +[](void *p) {
auto *state = (MinifyState *)p;
state->stack.pop_back();
state->result.append("}");
};
result.on_begin_string = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("\"");
};
result.on_string_data = +[](void *p, const char *buf, int len) {
auto *state = (MinifyState *)p;
state->result.append(std::string(buf, len));
};
result.on_end_string = +[](void *p) {
auto *state = (MinifyState *)p;
state->result.append("\"");
};
result.on_begin_array = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->stack.push_back({0, false});
state->result.append("[");
};
result.on_end_array = +[](void *p) {
auto *state = (MinifyState *)p;
state->stack.pop_back();
state->result.append("]");
};
result.on_begin_number = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
};
result.on_number_data = +[](void *p, const char *buf, int len) {
auto *state = (MinifyState *)p;
state->result.append(std::string(buf, len));
};
result.on_begin_object = +[](void *) {};
result.on_end_object = +[](void *) {};
result.on_begin_string = +[](void *) {};
result.on_string_data = +[](void *, const char *buf, int len) {};
result.on_end_string = +[](void *) {};
result.on_begin_array = +[](void *) {};
result.on_end_array = +[](void *) {};
result.on_begin_number = +[](void *) {};
result.on_number_data = +[](void *, const char *buf, int len) {};
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("true");
};
result.on_false_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("false");
};
result.on_null_literal = +[](void *p) {
auto *state = (MinifyState *)p;
state->on_begin_value();
state->result.append("null");
};
result.on_true_literal = +[](void *) {};
result.on_false_literal = +[](void *) {};
result.on_null_literal = +[](void *) {};
return result;
}
@@ -277,24 +206,6 @@ void testStreaming(std::string const &json) {
CHECK(streaming.result == batch.result);
}
Callbacks noopCallbacks() {
Callbacks result;
result.on_begin_object = +[](void *) {};
result.on_end_object = +[](void *) {};
result.on_begin_string = +[](void *) {};
result.on_string_data = +[](void *, const char *buf, int len) {};
result.on_end_string = +[](void *) {};
result.on_begin_array = +[](void *) {};
result.on_end_array = +[](void *) {};
result.on_begin_number = +[](void *) {};
result.on_number_data = +[](void *, const char *buf, int len) {};
result.on_end_number = +[](void *) {};
result.on_true_literal = +[](void *) {};
result.on_false_literal = +[](void *) {};
result.on_null_literal = +[](void *) {};
return result;
}
} // namespace
TEST_CASE("parser3") {