Validate correct string data in fuzz test
This commit is contained in:
@@ -7,8 +7,8 @@
|
||||
|
||||
#include "weaseljson.h"
|
||||
|
||||
inline Callbacks printCallbacks() {
|
||||
Callbacks result;
|
||||
inline WeaselJsonCallbacks printCallbacks() {
|
||||
WeaselJsonCallbacks result;
|
||||
result.on_begin_object = +[](void *) { puts("on_begin_object"); };
|
||||
result.on_end_object = +[](void *) { puts("on_end_object"); };
|
||||
result.on_begin_string = +[](void *) { puts("on_begin_string"); };
|
||||
@@ -29,8 +29,8 @@ inline Callbacks printCallbacks() {
|
||||
return result;
|
||||
}
|
||||
|
||||
inline Callbacks noopCallbacks() {
|
||||
Callbacks result;
|
||||
inline WeaselJsonCallbacks noopCallbacks() {
|
||||
WeaselJsonCallbacks result;
|
||||
result.on_begin_object = +[](void *) {};
|
||||
result.on_end_object = +[](void *) {};
|
||||
result.on_begin_string = +[](void *) {};
|
||||
@@ -72,8 +72,8 @@ struct SerializeState {
|
||||
std::vector<Cursor> stack;
|
||||
};
|
||||
|
||||
inline Callbacks serializeCallbacks() {
|
||||
Callbacks result;
|
||||
inline WeaselJsonCallbacks serializeCallbacks() {
|
||||
WeaselJsonCallbacks result;
|
||||
result.on_begin_object = +[](void *p) {
|
||||
auto *state = (SerializeState *)p;
|
||||
state->on_begin_value();
|
||||
|
||||
54
src/fuzz.cpp
54
src/fuzz.cpp
@@ -1,14 +1,17 @@
|
||||
#include "callbacks.h"
|
||||
#include "json_value.h"
|
||||
#include "parser3.h"
|
||||
|
||||
#include <simdjson.h>
|
||||
|
||||
std::pair<std::string, parser3::Status> runStreaming(std::string copy) {
|
||||
std::pair<std::string, parser3::Status> runStreaming(std::string copy,
|
||||
int stride) {
|
||||
SerializeState state;
|
||||
auto c = serializeCallbacks();
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
for (int i = 0; i < copy.size(); ++i) {
|
||||
auto s = parser.parse(copy.data() + i, 1);
|
||||
for (int i = 0; i < copy.size(); i += stride) {
|
||||
auto s =
|
||||
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i));
|
||||
if (s != parser3::S_AGAIN) {
|
||||
return {state.result, s};
|
||||
}
|
||||
@@ -36,20 +39,23 @@ std::pair<std::string, parser3::Status> runBatch(std::string copy) {
|
||||
}
|
||||
|
||||
void testStreaming(std::string const &json) {
|
||||
auto streaming = runStreaming(json);
|
||||
auto batch = runBatch(json);
|
||||
if (streaming != batch) {
|
||||
if (streaming.second == batch.second && streaming.second != parser3::S_OK) {
|
||||
// It's ok if the processed data doesn't match if parsing failed
|
||||
return;
|
||||
for (int stride = 1; stride < 16; ++stride) {
|
||||
auto streaming = runStreaming(json, stride);
|
||||
if (streaming != batch) {
|
||||
if (streaming.second == batch.second &&
|
||||
streaming.second != parser3::S_OK) {
|
||||
// It's ok if the processed data doesn't match if parsing failed
|
||||
return;
|
||||
}
|
||||
printf("streaming: %s, %s\n",
|
||||
streaming.second == parser3::S_OK ? "accept" : "reject",
|
||||
streaming.first.c_str());
|
||||
printf("batch: %s, %s\n",
|
||||
streaming.second == parser3::S_OK ? "accept" : "reject",
|
||||
batch.first.c_str());
|
||||
abort();
|
||||
}
|
||||
printf("streaming: %s, %s\n",
|
||||
streaming.second == parser3::S_OK ? "accept" : "reject",
|
||||
streaming.first.c_str());
|
||||
printf("batch: %s, %s\n",
|
||||
streaming.second == parser3::S_OK ? "accept" : "reject",
|
||||
batch.first.c_str());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,9 +98,27 @@ void compareWithSimdjson(std::string const &json) {
|
||||
}
|
||||
}
|
||||
|
||||
void testStringRoundTrip(std::string_view s) {
|
||||
if (!simdjson::validate_utf8(s.data(), s.size())) {
|
||||
// You can't encode non utf-8 data in a json string, even with escaping
|
||||
return;
|
||||
}
|
||||
for (int stride = 0; stride < 16; ++stride) {
|
||||
auto escaped = "\"" + escapeAsJsonString(s) + "\"";
|
||||
auto parsed = toValue(std::move(escaped));
|
||||
if (!parsed.has_value()) {
|
||||
abort();
|
||||
}
|
||||
if (std::get<std::string>(*parsed) != s) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
auto s = std::string((const char *)data, size);
|
||||
testStreaming(s);
|
||||
compareWithSimdjson(s);
|
||||
testStringRoundTrip(s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
215
src/json_value.h
Normal file
215
src/json_value.h
Normal file
@@ -0,0 +1,215 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
struct JsonNumber : std::string {};
|
||||
using JsonValue = std::variant<std::nullptr_t, bool, std::string, JsonNumber,
|
||||
std::unique_ptr<struct JsonArray>,
|
||||
std::unique_ptr<struct JsonObject>>;
|
||||
struct JsonArray : std::vector<JsonValue> {};
|
||||
struct JsonObject : std::map<std::string, JsonValue> {};
|
||||
|
||||
struct ReadValueState {
|
||||
JsonValue result;
|
||||
std::vector<JsonValue> valueStack;
|
||||
std::vector<std::string> keyStack;
|
||||
std::vector</*bool*/ int> isKeyStack;
|
||||
void on_end_value() {
|
||||
auto object = std::move(valueStack.back());
|
||||
valueStack.pop_back();
|
||||
if (valueStack.empty()) {
|
||||
result = std::move(object);
|
||||
return;
|
||||
}
|
||||
auto i = valueStack.back().index();
|
||||
switch (i) {
|
||||
case 0: // null
|
||||
case 1: // bool
|
||||
case 2: // string
|
||||
case 3: // number
|
||||
__builtin_unreachable();
|
||||
case 4: // array
|
||||
std::get<std::unique_ptr<JsonArray>>(valueStack.back())
|
||||
->push_back(std::move(object));
|
||||
return;
|
||||
case 5: // object
|
||||
if (std::exchange(isKeyStack.back(), !isKeyStack.back())) {
|
||||
keyStack.push_back(std::move(std::get<std::string>(object)));
|
||||
} else {
|
||||
std::get<std::unique_ptr<JsonObject>>(valueStack.back())
|
||||
->emplace(std::move(keyStack.back()), std::move(object));
|
||||
keyStack.pop_back();
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline WeaselJsonCallbacks readValueCallbacks() {
|
||||
WeaselJsonCallbacks result;
|
||||
result.on_begin_object = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(std::make_unique<JsonObject>());
|
||||
state->isKeyStack.push_back(true);
|
||||
};
|
||||
result.on_end_object = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->isKeyStack.pop_back();
|
||||
state->on_end_value();
|
||||
};
|
||||
result.on_begin_string = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(std::string());
|
||||
};
|
||||
result.on_string_data = +[](void *p, const char *buf, int len) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
std::get<std::string>(state->valueStack.back()).append(buf, len);
|
||||
};
|
||||
result.on_end_string = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->on_end_value();
|
||||
};
|
||||
result.on_begin_array = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(std::make_unique<JsonArray>());
|
||||
};
|
||||
result.on_end_array = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->on_end_value();
|
||||
};
|
||||
result.on_begin_number = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(JsonNumber());
|
||||
};
|
||||
result.on_number_data = +[](void *p, const char *buf, int len) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
std::get<JsonNumber>(state->valueStack.back()).append(buf, len);
|
||||
};
|
||||
result.on_end_number = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->on_end_value();
|
||||
};
|
||||
result.on_true_literal = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(true);
|
||||
state->on_end_value();
|
||||
};
|
||||
result.on_false_literal = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(false);
|
||||
state->on_end_value();
|
||||
};
|
||||
result.on_null_literal = +[](void *p) {
|
||||
auto *state = (ReadValueState *)p;
|
||||
state->valueStack.emplace_back(nullptr);
|
||||
state->on_end_value();
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string escapeAsJsonString(std::string_view s) {
|
||||
std::string result;
|
||||
for (uint8_t c : s) {
|
||||
switch (c) {
|
||||
case '\"':
|
||||
result.append(R"(\")");
|
||||
break;
|
||||
case '\\':
|
||||
result.append(R"(\\)");
|
||||
break;
|
||||
case '\b':
|
||||
result.append(R"(\b)");
|
||||
break;
|
||||
case '\f':
|
||||
result.append(R"(\f)");
|
||||
break;
|
||||
case '\n':
|
||||
result.append(R"(\n)");
|
||||
break;
|
||||
case '\r':
|
||||
result.append(R"(\r)");
|
||||
break;
|
||||
case '\t':
|
||||
result.append(R"(\t)");
|
||||
break;
|
||||
default:
|
||||
if (c < 0x20) {
|
||||
const char *hex = "0123456789abcdef";
|
||||
result.append(R"(\u00)");
|
||||
result.push_back(hex[c >> 4]);
|
||||
result.push_back(hex[c & 15]);
|
||||
} else {
|
||||
// TODO check if valid utf-8
|
||||
result.push_back(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::string toString(JsonValue const &jsonValue) {
|
||||
switch (jsonValue.index()) {
|
||||
case 0: // null
|
||||
return "null";
|
||||
case 1: // bool
|
||||
return std::get<bool>(jsonValue) ? "true" : "false";
|
||||
case 2: // string
|
||||
return "\"" + escapeAsJsonString(std::get<std::string>(jsonValue)) + "\"";
|
||||
case 3: // number
|
||||
return std::get<JsonNumber>(jsonValue);
|
||||
case 4: // array
|
||||
{
|
||||
std::string result = "[";
|
||||
std::string delimiter = "";
|
||||
for (auto const &v : *std::get<std::unique_ptr<JsonArray>>(jsonValue)) {
|
||||
result += delimiter + toString(v);
|
||||
delimiter = ", ";
|
||||
}
|
||||
return result + "]";
|
||||
}
|
||||
case 5: // object
|
||||
{
|
||||
std::string result = "{";
|
||||
std::string delimiter = "";
|
||||
for (auto const &[k, v] :
|
||||
*std::get<std::unique_ptr<JsonObject>>(jsonValue)) {
|
||||
result += delimiter + "\"" + escapeAsJsonString(k) + "\": " + toString(v);
|
||||
delimiter = ", ";
|
||||
}
|
||||
return result + "}";
|
||||
}
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
inline std::optional<JsonValue> toValue(std::string copy, int stride = 0) {
|
||||
ReadValueState state;
|
||||
auto c = readValueCallbacks();
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
if (stride == 0) {
|
||||
if (parser.parse(copy.data(), copy.size()) != parser3::S_AGAIN) {
|
||||
return std::nullopt;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < copy.size(); i += stride) {
|
||||
if (parser.parse(copy.data(), std::min<int>(stride, copy.size() - i)) !=
|
||||
parser3::S_AGAIN) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parser.parse(nullptr, 0) != parser3::S_OK) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return std::move(state.result);
|
||||
}
|
||||
@@ -74,7 +74,7 @@ enum Symbol : uint8_t {
|
||||
N_SYMBOL_COUNT, // Must be last
|
||||
};
|
||||
struct Parser3 {
|
||||
Parser3(const Callbacks *callbacks, void *data)
|
||||
Parser3(const WeaselJsonCallbacks *callbacks, void *data)
|
||||
: callbacks(callbacks), data(data) {
|
||||
std::ignore = push({N_WHITESPACE, N_VALUE, N_WHITESPACE, T_EOF});
|
||||
}
|
||||
@@ -138,7 +138,7 @@ struct Parser3 {
|
||||
char *dataBegin;
|
||||
// Used for unescaping string data in place
|
||||
char *writeBuf;
|
||||
const Callbacks *const callbacks;
|
||||
const WeaselJsonCallbacks *const callbacks;
|
||||
void *const data;
|
||||
Symbol stack[kMaxStackSize];
|
||||
Symbol *stackPtr = stack;
|
||||
|
||||
@@ -170,7 +170,7 @@ void testStreaming(std::string const &json) {
|
||||
} // namespace
|
||||
|
||||
TEST_CASE("parser3") {
|
||||
Callbacks c = serializeCallbacks();
|
||||
WeaselJsonCallbacks c = serializeCallbacks();
|
||||
SerializeState state;
|
||||
{
|
||||
auto copy = json;
|
||||
|
||||
Reference in New Issue
Block a user