#include "callbacks.h" #include "json_value.h" #include "parser3.h" #include "weaseljson.h" #include std::pair runStreaming(std::string copy, int stride) { SerializeState state; auto c = serializeCallbacks(); std::unique_ptr parser{ WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; if (stride == 0) { auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()); if (s != WeaselJson_AGAIN) { return {state.result, s}; } } else { for (size_t i = 0; i < copy.size(); i += stride) { auto s = WeaselJsonParser_parse(parser.get(), copy.data() + i, std::min(stride, copy.size() - i)); if (s != WeaselJson_AGAIN) { return {state.result, s}; } } } auto s = WeaselJsonParser_parse(parser.get(), nullptr, 0); return {state.result, s}; } std::pair runBatch(std::string copy) { SerializeState state; auto c = serializeCallbacks(); std::unique_ptr parser{ WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()); if (s != WeaselJson_AGAIN) { return {state.result, s}; } s = WeaselJsonParser_parse(parser.get(), nullptr, 0); return {state.result, s}; } std::pair runPrefix(std::string copy, int prefix) { SerializeState state; auto c = serializeCallbacks(); std::unique_ptr parser{ WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy}; auto s = WeaselJsonParser_parse(parser.get(), copy.data(), prefix); if (s != WeaselJson_AGAIN) { return {state.result, s}; } s = WeaselJsonParser_parse(parser.get(), copy.data() + prefix, copy.size() - prefix); if (s != WeaselJson_AGAIN) { return {state.result, s}; } s = WeaselJsonParser_parse(parser.get(), nullptr, 0); return {state.result, s}; } void testStreaming(std::string const &json) { auto batch = runBatch(json); if (batch.second == WeaselJson_AGAIN) { abort(); } for (int stride = 1; stride < 16; ++stride) { auto streaming = runStreaming(json, stride); if (streaming != batch) { if (streaming.second == WeaselJson_AGAIN) { abort(); } bool streamingOk = streaming.second == WeaselJson_OK; bool batchOk = batch.second == WeaselJson_OK; if (streamingOk == batchOk && !batchOk) { // It's ok if the processed data doesn't match if parsing failed } else { printf("streaming: %s, %s\n", streaming.second == WeaselJson_OK ? "accept" : "reject", streaming.first.c_str()); printf("batch: %s, %s\n", batch.second == WeaselJson_OK ? "accept" : "reject", batch.first.c_str()); abort(); } } if (int(json.size()) > stride) { auto prefix = runPrefix(json, stride); if (prefix != batch) { if (prefix.second == WeaselJson_AGAIN) { abort(); } bool prefixOk = prefix.second == WeaselJson_OK; bool batchOk = batch.second == WeaselJson_OK; if (prefixOk == batchOk && !batchOk) { // It's ok if the processed data doesn't match if parsing failed } else { printf("prefix: %s, %s\n", prefix.second == WeaselJson_OK ? "accept" : "reject", prefix.first.c_str()); printf("batch: %s, %s\n", batch.second == WeaselJson_OK ? "accept" : "reject", batch.first.c_str()); abort(); } } } } } void compareWithSimdjson(std::string const &json) { WeaselJsonStatus ours; { auto copy = json; auto c = noopCallbacks(); std::unique_ptr parser{WeaselJsonParser_create(1024, &c, nullptr), WeaselJsonParser_destroy}; ours = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()); if (ours == WeaselJson_AGAIN) { ours = WeaselJsonParser_parse(parser.get(), nullptr, 0); } } using namespace simdjson; simdjson::padded_string my_padded_data(json.data(), json.size()); simdjson::dom::parser parser; auto doc = parser.parse(my_padded_data); auto theirs = doc.error(); if (ours == WeaselJson_OVERFLOW || theirs == simdjson::DEPTH_ERROR) { return; } if ((ours == WeaselJson_OK) != (theirs == simdjson::SUCCESS)) { if (json.starts_with("\xef\xbb\xbf")) { // What to do with byte order mark? return; } if (theirs == simdjson::NUMBER_ERROR || theirs == simdjson::BIGINT_ERROR) { // This gets returned for precision errors sometimes? return; } if (theirs == simdjson::NUMBER_OUT_OF_RANGE) { // We don't validate the precision of numbers return; } printf("ours: %d\n", ours); printf("theirs: %d\n", theirs); abort(); } } void testStringRoundTrip(std::string_view s) { if (!simdjson::validate_utf8(s.data(), s.size())) { // You can't encode non utf-8 data in a json string, even with escaping return; } for (int stride = 0; stride < 16; ++stride) { auto escaped = "\"" + escapeAsJsonString(s) + "\""; auto parsed = toValue(escaped, stride); if (!parsed.has_value()) { abort(); } if (std::get(*parsed) != s) { abort(); } } } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { auto s = std::string((const char *)data, size); testStreaming(s); compareWithSimdjson(s); testStringRoundTrip(s); bool json_utf8 = true; for (int i = 0; i < int(size); ++i) { uint8_t c = data[i]; json_utf8 = json_utf8 && c >= 0x20 && c != '"' && c != '\\'; } if (json_utf8) { parser3::Utf8Dfa dfa; auto result = dfa.scan((const char *)data, (const char *)data + size); bool ok = result == (const char *)data + size && dfa.accept(); bool valid = simdjson::validate_utf8(s.data(), s.size()); if (ok != valid) { abort(); } } return 0; }