133 lines
3.7 KiB
C++
133 lines
3.7 KiB
C++
#include "callbacks.h"
|
|
#include "json_value.h"
|
|
#include "parser3.h"
|
|
#include "weaseljson.h"
|
|
|
|
#include <simdjson.h>
|
|
|
|
std::pair<std::string, WeaselJsonStatus> runStreaming(std::string copy,
|
|
int stride) {
|
|
SerializeState state;
|
|
auto c = serializeCallbacks();
|
|
parser3::Parser3 parser(&c, &state);
|
|
if (stride == 0) {
|
|
auto s = parser.parse(copy.data(), copy.size());
|
|
if (s != WeaselJson_AGAIN) {
|
|
return {state.result, s};
|
|
}
|
|
} else {
|
|
for (int i = 0; i < copy.size(); i += stride) {
|
|
auto s =
|
|
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i));
|
|
if (s != WeaselJson_AGAIN) {
|
|
return {state.result, s};
|
|
}
|
|
}
|
|
}
|
|
auto s = parser.parse(nullptr, 0);
|
|
if (s != WeaselJson_OK) {
|
|
return {state.result, s};
|
|
}
|
|
return {state.result, WeaselJson_OK};
|
|
}
|
|
|
|
std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
|
|
SerializeState state;
|
|
auto c = serializeCallbacks();
|
|
parser3::Parser3 parser(&c, &state);
|
|
auto s = parser.parse(copy.data(), copy.size());
|
|
if (s != WeaselJson_AGAIN) {
|
|
return {state.result, s};
|
|
}
|
|
s = parser.parse(nullptr, 0);
|
|
if (s != WeaselJson_OK) {
|
|
return {state.result, s};
|
|
}
|
|
return {state.result, WeaselJson_OK};
|
|
}
|
|
|
|
void testStreaming(std::string const &json) {
|
|
auto batch = runBatch(json);
|
|
for (int stride = 1; stride < 16; ++stride) {
|
|
auto streaming = runStreaming(json, stride);
|
|
if (streaming != batch) {
|
|
if (streaming.second == batch.second &&
|
|
streaming.second != WeaselJson_OK) {
|
|
// It's ok if the processed data doesn't match if parsing failed
|
|
return;
|
|
}
|
|
printf("streaming: %s, %s\n",
|
|
streaming.second == WeaselJson_OK ? "accept" : "reject",
|
|
streaming.first.c_str());
|
|
printf("batch: %s, %s\n",
|
|
streaming.second == WeaselJson_OK ? "accept" : "reject",
|
|
batch.first.c_str());
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
|
|
void compareWithSimdjson(std::string const &json) {
|
|
WeaselJsonStatus ours;
|
|
{
|
|
auto copy = json;
|
|
auto c = noopCallbacks();
|
|
parser3::Parser3 parser3(&c, nullptr);
|
|
ours = parser3.parse(copy.data(), copy.size());
|
|
if (ours == WeaselJson_AGAIN) {
|
|
ours = parser3.parse(nullptr, 0);
|
|
}
|
|
}
|
|
|
|
using namespace simdjson;
|
|
simdjson::padded_string my_padded_data(json.data(), json.size());
|
|
simdjson::dom::parser parser;
|
|
auto doc = parser.parse(my_padded_data);
|
|
auto theirs = doc.error();
|
|
if (ours == WeaselJson_OVERFLOW || theirs == simdjson::DEPTH_ERROR) {
|
|
return;
|
|
}
|
|
if ((ours == WeaselJson_OK) != (theirs == simdjson::SUCCESS)) {
|
|
if (json.starts_with("\xef\xbb\xbf")) {
|
|
// What to do with byte order mark?
|
|
return;
|
|
}
|
|
if (theirs == simdjson::NUMBER_ERROR || theirs == simdjson::BIGINT_ERROR) {
|
|
// This gets returned for precision errors sometimes?
|
|
return;
|
|
}
|
|
if (theirs == simdjson::NUMBER_OUT_OF_RANGE) {
|
|
// We don't validate the precision of numbers
|
|
return;
|
|
}
|
|
printf("ours: %d\n", ours);
|
|
printf("theirs: %d\n", theirs);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
void testStringRoundTrip(std::string_view s) {
|
|
if (!simdjson::validate_utf8(s.data(), s.size())) {
|
|
// You can't encode non utf-8 data in a json string, even with escaping
|
|
return;
|
|
}
|
|
for (int stride = 0; stride < 16; ++stride) {
|
|
auto escaped = "\"" + escapeAsJsonString(s) + "\"";
|
|
auto parsed = toValue(escaped, stride);
|
|
if (!parsed.has_value()) {
|
|
abort();
|
|
}
|
|
if (std::get<std::string>(*parsed) != s) {
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
|
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|
auto s = std::string((const char *)data, size);
|
|
testStreaming(s);
|
|
compareWithSimdjson(s);
|
|
testStringRoundTrip(s);
|
|
return 0;
|
|
}
|