#include "callbacks.h" #include "json_value.h" #include "parser3.h" #include "weaseljson.h" #include std::pair runStreaming(std::string copy, int stride) { SerializeState state; auto c = serializeCallbacks(); parser3::Parser3 parser(&c, &state); if (stride == 0) { auto s = parser.parse(copy.data(), copy.size()); if (s != WeaselJson_AGAIN) { return {state.result, s}; } } else { for (int i = 0; i < copy.size(); i += stride) { auto s = parser.parse(copy.data() + i, std::min(stride, copy.size() - i)); if (s != WeaselJson_AGAIN) { return {state.result, s}; } } } auto s = parser.parse(nullptr, 0); if (s != WeaselJson_OK) { return {state.result, s}; } return {state.result, WeaselJson_OK}; } std::pair runBatch(std::string copy) { SerializeState state; auto c = serializeCallbacks(); parser3::Parser3 parser(&c, &state); auto s = parser.parse(copy.data(), copy.size()); if (s != WeaselJson_AGAIN) { return {state.result, s}; } s = parser.parse(nullptr, 0); if (s != WeaselJson_OK) { return {state.result, s}; } return {state.result, WeaselJson_OK}; } void testStreaming(std::string const &json) { auto batch = runBatch(json); for (int stride = 1; stride < 16; ++stride) { auto streaming = runStreaming(json, stride); if (streaming != batch) { if (streaming.second == batch.second && streaming.second != WeaselJson_OK) { // It's ok if the processed data doesn't match if parsing failed return; } printf("streaming: %s, %s\n", streaming.second == WeaselJson_OK ? "accept" : "reject", streaming.first.c_str()); printf("batch: %s, %s\n", streaming.second == WeaselJson_OK ? "accept" : "reject", batch.first.c_str()); abort(); } } } void compareWithSimdjson(std::string const &json) { WeaselJsonStatus ours; { auto copy = json; auto c = noopCallbacks(); parser3::Parser3 parser3(&c, nullptr); ours = parser3.parse(copy.data(), copy.size()); if (ours == WeaselJson_AGAIN) { ours = parser3.parse(nullptr, 0); } } using namespace simdjson; simdjson::padded_string my_padded_data(json.data(), json.size()); simdjson::dom::parser parser; auto doc = parser.parse(my_padded_data); auto theirs = doc.error(); if (ours == WeaselJson_OVERFLOW || theirs == simdjson::DEPTH_ERROR) { return; } if ((ours == WeaselJson_OK) != (theirs == simdjson::SUCCESS)) { if (json.starts_with("\xef\xbb\xbf")) { // What to do with byte order mark? return; } if (theirs == simdjson::NUMBER_ERROR || theirs == simdjson::BIGINT_ERROR) { // This gets returned for precision errors sometimes? return; } if (theirs == simdjson::NUMBER_OUT_OF_RANGE) { // We don't validate the precision of numbers return; } printf("ours: %d\n", ours); printf("theirs: %d\n", theirs); abort(); } } void testStringRoundTrip(std::string_view s) { if (!simdjson::validate_utf8(s.data(), s.size())) { // You can't encode non utf-8 data in a json string, even with escaping return; } for (int stride = 0; stride < 16; ++stride) { auto escaped = "\"" + escapeAsJsonString(s) + "\""; auto parsed = toValue(escaped, stride); if (!parsed.has_value()) { abort(); } if (std::get(*parsed) != s) { abort(); } } } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { auto s = std::string((const char *)data, size); testStreaming(s); compareWithSimdjson(s); testStringRoundTrip(s); return 0; }