Files
weaseljson/src/fuzz.cpp

192 lines
6.3 KiB
C++

#include "callbacks.h"
#include "json_value.h"
#include "parser3.h"
#include "weaseljson.h"
#include <simdjson.h>
std::pair<std::string, WeaselJsonStatus> runStreaming(std::string copy,
int stride) {
SerializeState state;
auto c = serializeCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
if (stride == 0) {
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
} else {
for (size_t i = 0; i < copy.size(); i += stride) {
auto s = WeaselJsonParser_parse(parser.get(), copy.data() + i,
std::min<int>(stride, copy.size() - i));
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
}
}
auto s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
return {state.result, s};
}
std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
SerializeState state;
auto c = serializeCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
return {state.result, s};
}
std::pair<std::string, WeaselJsonStatus> runPrefix(std::string copy,
int prefix) {
SerializeState state;
auto c = serializeCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), prefix);
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
s = WeaselJsonParser_parse(parser.get(), copy.data() + prefix,
copy.size() - prefix);
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
return {state.result, s};
}
void testStreaming(std::string const &json) {
auto batch = runBatch(json);
if (batch.second == WeaselJson_AGAIN) {
abort();
}
for (int stride = 1; stride < 16; ++stride) {
auto streaming = runStreaming(json, stride);
if (streaming != batch) {
if (streaming.second == WeaselJson_AGAIN) {
abort();
}
bool streamingOk = streaming.second == WeaselJson_OK;
bool batchOk = batch.second == WeaselJson_OK;
if (streamingOk == batchOk && !batchOk) {
// It's ok if the processed data doesn't match if parsing failed
} else {
printf("streaming: %s, %s\n",
streaming.second == WeaselJson_OK ? "accept" : "reject",
streaming.first.c_str());
printf("batch: %s, %s\n",
batch.second == WeaselJson_OK ? "accept" : "reject",
batch.first.c_str());
abort();
}
}
if (int(json.size()) > stride) {
auto prefix = runPrefix(json, stride);
if (prefix != batch) {
if (prefix.second == WeaselJson_AGAIN) {
abort();
}
bool prefixOk = prefix.second == WeaselJson_OK;
bool batchOk = batch.second == WeaselJson_OK;
if (prefixOk == batchOk && !batchOk) {
// It's ok if the processed data doesn't match if parsing failed
} else {
printf("prefix: %s, %s\n",
prefix.second == WeaselJson_OK ? "accept" : "reject",
prefix.first.c_str());
printf("batch: %s, %s\n",
batch.second == WeaselJson_OK ? "accept" : "reject",
batch.first.c_str());
abort();
}
}
}
}
}
void compareWithSimdjson(std::string const &json) {
WeaselJsonStatus ours;
{
auto copy = json;
auto c = noopCallbacks();
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)>
parser{WeaselJsonParser_create(1024, &c, nullptr),
WeaselJsonParser_destroy};
ours = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (ours == WeaselJson_AGAIN) {
ours = WeaselJsonParser_parse(parser.get(), nullptr, 0);
}
}
using namespace simdjson;
simdjson::padded_string my_padded_data(json.data(), json.size());
simdjson::dom::parser parser;
auto doc = parser.parse(my_padded_data);
auto theirs = doc.error();
if (ours == WeaselJson_OVERFLOW || theirs == simdjson::DEPTH_ERROR) {
return;
}
if ((ours == WeaselJson_OK) != (theirs == simdjson::SUCCESS)) {
if (json.starts_with("\xef\xbb\xbf")) {
// What to do with byte order mark?
return;
}
if (theirs == simdjson::NUMBER_ERROR || theirs == simdjson::BIGINT_ERROR) {
// This gets returned for precision errors sometimes?
return;
}
if (theirs == simdjson::NUMBER_OUT_OF_RANGE) {
// We don't validate the precision of numbers
return;
}
printf("ours: %d\n", ours);
printf("theirs: %d\n", theirs);
abort();
}
}
void testStringRoundTrip(std::string_view s) {
if (!simdjson::validate_utf8(s.data(), s.size())) {
// You can't encode non utf-8 data in a json string, even with escaping
return;
}
for (int stride = 0; stride < 16; ++stride) {
auto escaped = "\"" + escapeAsJsonString(s) + "\"";
auto parsed = toValue(escaped, stride);
if (!parsed.has_value()) {
abort();
}
if (std::get<std::string>(*parsed) != s) {
abort();
}
}
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
auto s = std::string((const char *)data, size);
testStreaming(s);
compareWithSimdjson(s);
testStringRoundTrip(s);
bool json_utf8 = true;
for (int i = 0; i < int(size); ++i) {
uint8_t c = data[i];
json_utf8 = json_utf8 && c >= 0x20 && c != '"' && c != '\\';
}
if (json_utf8) {
parser3::Utf8Dfa dfa;
auto result = dfa.scan((const char *)data, (const char *)data + size);
bool ok = result == (const char *)data + size && dfa.accept();
bool valid = simdjson::validate_utf8(s.data(), s.size());
if (ok != valid) {
abort();
}
}
return 0;
}