Validate utf8

This commit is contained in:
2025-05-18 17:23:22 -04:00
parent 452bbd3d9c
commit d279173482
6 changed files with 105 additions and 95 deletions

View File

@@ -1,6 +1,9 @@
#include "callbacks.h"
#include "minify.h"
#include "parser3.h"
#include <simdjson.h>
std::pair<std::string, parser3::Status> runStreaming(std::string copy) {
MinifyState state;
auto c = minifyCallbacks();
@@ -41,7 +44,52 @@ void testStreaming(std::string const &json) {
}
}
void compareWithSimdjson(std::string const &json) {
parser3::Status ours;
{
auto copy = json;
auto c = noopCallbacks();
parser3::Parser3 parser3(&c, nullptr);
ours = parser3.parse(copy.data(), copy.size());
if (ours == parser3::S_AGAIN) {
ours = parser3.parse(nullptr, 0);
}
}
using namespace simdjson;
simdjson::padded_string my_padded_data(json.data(), json.size());
simdjson::dom::parser parser;
auto doc = parser.parse(my_padded_data);
auto theirs = doc.error();
if (ours == parser3::S_OVERFLOW || theirs == simdjson::DEPTH_ERROR) {
return;
}
if ((ours == parser3::S_OK) != (theirs == simdjson::SUCCESS)) {
if (json.starts_with("\xef\xbb\xbf")) {
// What to do with byte order mark?
return;
}
if (theirs == simdjson::NUMBER_ERROR || theirs == simdjson::BIGINT_ERROR) {
// This gets returned for precision errors sometimes?
return;
}
if (theirs == simdjson::STRING_ERROR) {
// why god why god do I gotta suffer
return;
}
if (theirs == simdjson::NUMBER_OUT_OF_RANGE) {
// We don't validate the precision of numbers
return;
}
printf("ours: %d\n", ours);
printf("theirs: %d\n", theirs);
abort();
}
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
testStreaming(std::string((const char *)data, size));
auto s = std::string((const char *)data, size);
testStreaming(s);
compareWithSimdjson(s);
return 0;
}