Add automata to recognize utf8 in strings

This commit is contained in:
2025-06-21 15:56:15 -04:00
parent d1523acf94
commit 5613303d52
3 changed files with 351 additions and 2 deletions

View File

@@ -1,5 +1,6 @@
#include "callbacks.h"
#include "json_value.h"
#include "parser3.h"
#include "weaseljson.h"
#include <simdjson.h>
@@ -132,5 +133,19 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
testStreaming(s);
compareWithSimdjson(s);
testStringRoundTrip(s);
bool json_utf8 = true;
for (int i = 0; i < int(size); ++i) {
uint8_t c = data[i];
json_utf8 = json_utf8 && c >= 0x20 && c != '"' && c != '\\';
}
if (json_utf8) {
parser3::Utf8Dfa dfa;
auto result = dfa.scan((const char *)data, (const char *)data + size);
bool ok = result == (const char *)data + size && dfa.accept();
bool valid = simdjson::validate_utf8(s.data(), s.size());
if (ok != valid) {
abort();
}
}
return 0;
}