diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dbd385..0b646fa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,5 +49,10 @@ target_include_directories(mytest PRIVATE include) target_link_libraries(mytest PRIVATE doctest nanobench simdjson) doctest_discover_tests(mytest) +add_executable(fuzz src/fuzz.cpp) +target_include_directories(fuzz PRIVATE include) +target_compile_options(fuzz PRIVATE -fsanitize=fuzzer) +target_link_options(fuzz PRIVATE -fsanitize=fuzzer) + add_executable(validate src/validate.cpp) target_include_directories(validate PRIVATE include) diff --git a/src/fuzz.cpp b/src/fuzz.cpp new file mode 100644 index 0000000..f3282dd --- /dev/null +++ b/src/fuzz.cpp @@ -0,0 +1,51 @@ +#include "minify.h" +#include "parser3.h" + +void testStreaming(std::string const &json) { + MinifyState streaming; + MinifyState batch; + auto c = minifyCallbacks(); + parser3::Status streamingStatus = parser3::S_OK; + parser3::Status batchStatus = parser3::S_OK; + do { + auto copy = json; + parser3::Parser3 parser(&c, &streaming); + for (int i = 0; i < copy.size(); ++i) { + auto s = parser.parse(copy.data() + i, 1); + if (s != parser3::S_AGAIN) { + streamingStatus = s; + break; + } + } + auto s = parser.parse(nullptr, 0); + if (s != parser3::S_OK) { + streamingStatus = s; + break; + } + } while (0); + do { + auto copy = json; + parser3::Parser3 parser(&c, &batch); + auto s = parser.parse(copy.data(), copy.size()); + if (s != parser3::S_AGAIN) { + batchStatus = s; + break; + } + s = parser.parse(nullptr, 0); + if (s != parser3::S_OK) { + batchStatus = s; + break; + } + } while (0); + if (streamingStatus != batchStatus) { + abort(); + } + if (streaming.result != batch.result) { + abort(); + } +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + testStreaming(std::string((const char *)data, size)); + return 0; +} diff --git a/src/minify.h b/src/minify.h new file mode 100644 index 0000000..9c3fe5a --- /dev/null +++ b/src/minify.h @@ -0,0 +1,94 @@ +#pragma once + +#include "weaseljson.h" +#include +#include +#include +struct MinifyState { + bool isKey = false; + struct Cursor { + int64_t index; + bool isObject; + }; + std::string result; + void on_begin_value() { + if (!stack.empty()) { + auto &back = stack.back(); + if (back.isObject && back.index % 2 == 0 && back.index > 0) { + result.append(","); + } + if (back.isObject && back.index % 2 == 1 && back.index > 0) { + result.append(":"); + } + if (!back.isObject && back.index > 0) { + result.append(","); + } + ++back.index; + } + } + std::vector stack; +}; + +Callbacks minifyCallbacks() { + Callbacks result; + result.on_begin_object = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + state->stack.push_back({0, true}); + state->result.append("{"); + }; + result.on_end_object = +[](void *p) { + auto *state = (MinifyState *)p; + state->stack.pop_back(); + state->result.append("}"); + }; + result.on_begin_string = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + state->result.append("\""); + }; + result.on_string_data = +[](void *p, const char *buf, int len) { + auto *state = (MinifyState *)p; + state->result.append(std::string(buf, len)); + }; + result.on_end_string = +[](void *p) { + auto *state = (MinifyState *)p; + state->result.append("\""); + }; + result.on_begin_array = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + state->stack.push_back({0, false}); + state->result.append("["); + }; + result.on_end_array = +[](void *p) { + auto *state = (MinifyState *)p; + state->stack.pop_back(); + state->result.append("]"); + }; + result.on_begin_number = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + }; + result.on_number_data = +[](void *p, const char *buf, int len) { + auto *state = (MinifyState *)p; + state->result.append(std::string(buf, len)); + }; + result.on_end_number = +[](void *) {}; + result.on_true_literal = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + state->result.append("true"); + }; + result.on_false_literal = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + state->result.append("false"); + }; + result.on_null_literal = +[](void *p) { + auto *state = (MinifyState *)p; + state->on_begin_value(); + state->result.append("null"); + }; + return result; +} diff --git a/src/parser3.h b/src/parser3.h index 9d55868..9cc0794 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -135,14 +135,12 @@ inline Status n_value(Parser3 *self) { switch (*self->buf) { case '{': self->pop(); - self->callbacks->on_begin_object(self->data); if (auto s = self->push({N_OBJECT})) { return s; } break; case '[': self->pop(); - self->callbacks->on_begin_array(self->data); if (auto s = self->push({N_ARRAY})) { return s; } @@ -165,7 +163,6 @@ inline Status n_value(Parser3 *self) { case '9': case '-': self->pop(); - self->callbacks->on_begin_number(self->data); if (auto s = self->push({N_NUMBER})) { return s; } @@ -204,6 +201,7 @@ inline Status n_object(Parser3 *self) { if (*self->buf != '{') { return S_REJECT; } + self->callbacks->on_begin_object(self->data); ++self->buf; self->pop(); if (auto s = self->push({N_WHITESPACE, N_OBJECT2})) { @@ -264,6 +262,7 @@ inline Status n_array(Parser3 *self) { if (*self->buf != '[') { return S_REJECT; } + self->callbacks->on_begin_array(self->data); ++self->buf; self->pop(); if (auto s = self->push({N_WHITESPACE, N_ARRAY2})) { @@ -494,6 +493,7 @@ inline Status n_integer(Parser3 *self) { if (self->len() == 0) { return S_REJECT; } + self->callbacks->on_begin_number(self->data); switch (*self->buf) { case '0': self->callbacks->on_number_data(self->data, self->buf, 1); @@ -685,6 +685,9 @@ inline Status n_whitespace(Parser3 *self) { } inline Status n_true(Parser3 *self) { + if (self->len() == 0) { + return S_REJECT; + } if (*self->buf == 'e') { ++self->buf; self->pop(); @@ -695,6 +698,9 @@ inline Status n_true(Parser3 *self) { } inline Status n_false(Parser3 *self) { + if (self->len() == 0) { + return S_REJECT; + } if (*self->buf == 'e') { ++self->buf; self->pop(); @@ -705,6 +711,9 @@ inline Status n_false(Parser3 *self) { } inline Status n_null(Parser3 *self) { + if (self->len() == 0) { + return S_REJECT; + } if (*self->buf == 'l') { ++self->buf; self->pop(); @@ -715,6 +724,9 @@ inline Status n_null(Parser3 *self) { } template inline Status singleChar(Parser3 *self) { + if (self->len() == 0) { + return S_REJECT; + } if (*self->buf == kChar) { ++self->buf; self->pop(); diff --git a/src/test.cpp b/src/test.cpp index 4127be2..6dbb2e3 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -10,6 +9,7 @@ #include #include +#include "minify.h" #include "parser3.h" // This is the JSON grammar in McKeeman Form. @@ -167,92 +167,21 @@ Callbacks printCallbacks() { return result; } -struct MinifyState { - bool isKey = false; - struct Cursor { - int64_t index; - bool isObject; - }; - std::string result; - void on_begin_value() { - if (!stack.empty()) { - auto &back = stack.back(); - if (back.isObject && back.index % 2 == 0 && back.index > 0) { - result.append(","); - } - if (back.isObject && back.index % 2 == 1 && back.index > 0) { - result.append(":"); - } - if (!back.isObject && back.index > 0) { - result.append(","); - } - ++back.index; - } - } - std::vector stack; -}; - -Callbacks minifyCallbacks() { +Callbacks noopCallbacks() { Callbacks result; - result.on_begin_object = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - state->stack.push_back({0, true}); - state->result.append("{"); - }; - result.on_end_object = +[](void *p) { - auto *state = (MinifyState *)p; - state->stack.pop_back(); - state->result.append("}"); - }; - result.on_begin_string = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - state->result.append("\""); - }; - result.on_string_data = +[](void *p, const char *buf, int len) { - auto *state = (MinifyState *)p; - state->result.append(std::string(buf, len)); - }; - result.on_end_string = +[](void *p) { - auto *state = (MinifyState *)p; - state->result.append("\""); - }; - result.on_begin_array = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - state->stack.push_back({0, false}); - state->result.append("["); - }; - result.on_end_array = +[](void *p) { - auto *state = (MinifyState *)p; - state->stack.pop_back(); - state->result.append("]"); - }; - result.on_begin_number = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - }; - result.on_number_data = +[](void *p, const char *buf, int len) { - auto *state = (MinifyState *)p; - state->result.append(std::string(buf, len)); - }; + result.on_begin_object = +[](void *) {}; + result.on_end_object = +[](void *) {}; + result.on_begin_string = +[](void *) {}; + result.on_string_data = +[](void *, const char *buf, int len) {}; + result.on_end_string = +[](void *) {}; + result.on_begin_array = +[](void *) {}; + result.on_end_array = +[](void *) {}; + result.on_begin_number = +[](void *) {}; + result.on_number_data = +[](void *, const char *buf, int len) {}; result.on_end_number = +[](void *) {}; - result.on_true_literal = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - state->result.append("true"); - }; - result.on_false_literal = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - state->result.append("false"); - }; - result.on_null_literal = +[](void *p) { - auto *state = (MinifyState *)p; - state->on_begin_value(); - state->result.append("null"); - }; + result.on_true_literal = +[](void *) {}; + result.on_false_literal = +[](void *) {}; + result.on_null_literal = +[](void *) {}; return result; } @@ -277,24 +206,6 @@ void testStreaming(std::string const &json) { CHECK(streaming.result == batch.result); } -Callbacks noopCallbacks() { - Callbacks result; - result.on_begin_object = +[](void *) {}; - result.on_end_object = +[](void *) {}; - result.on_begin_string = +[](void *) {}; - result.on_string_data = +[](void *, const char *buf, int len) {}; - result.on_end_string = +[](void *) {}; - result.on_begin_array = +[](void *) {}; - result.on_end_array = +[](void *) {}; - result.on_begin_number = +[](void *) {}; - result.on_number_data = +[](void *, const char *buf, int len) {}; - result.on_end_number = +[](void *) {}; - result.on_true_literal = +[](void *) {}; - result.on_false_literal = +[](void *) {}; - result.on_null_literal = +[](void *) {}; - return result; -} - } // namespace TEST_CASE("parser3") {