Files
weaseljson/src/test.cpp

302 lines
6.6 KiB
C++

#include <cassert>
#include <cctype>
#include <cstdio>
#include <cstring>
#include <limits>
#include <string>
#include <doctest.h>
#include <nanobench.h>
#include <simdjson.h>
#include "callbacks.h"
#include "parser3.h"
// This is the JSON grammar in McKeeman Form.
// json
// element
// value
// object
// array
// string
// number
// "true"
// "false"
// "null"
// object
// '{' ws '}'
// '{' members '}'
// members
// member
// member ',' members
// member
// ws string ws ':' element
// array
// '[' ws ']'
// '[' elements ']'
// elements
// element
// element ',' elements
// element
// ws value ws
// string
// '"' characters '"'
// characters
// ""
// character characters
// character
// '0020' . '10FFFF' - '"' - '\'
// '\' escape
// escape
// '"'
// '\'
// '/'
// 'b'
// 'f'
// 'n'
// 'r'
// 't'
// 'u' hex hex hex hex
// hex
// digit
// 'A' . 'F'
// 'a' . 'f'
// number
// integer fraction exponent
// integer
// digit
// onenine digits
// '-' digit
// '-' onenine digits
// digits
// digit
// digit digits
// digit
// '0'
// onenine
// onenine
// '1' . '9'
// fraction
// ""
// '.' digits
// exponent
// ""
// 'E' sign digits
// 'e' sign digits
// sign
// ""
// '+'
// '-'
// ws
// ""
// '0020' ws
// '000A' ws
// '000D' ws
// '0009' ws
namespace {
const std::string json = R"({
"a number": 12345,
"true": true,
"false": false,
"null": null,
"glossary": {
"title": "example glossary",
"GlossDiv": {
"title": "S",
"GlossList": {
"GlossEntry": {
"ID": "SGML",
"SortAs": "SGML",
"GlossTerm": "Standard Generalized Markup Language",
"Acronym": "SGML",
"Abbrev": "ISO 8879:1986",
"GlossDef": {
"para": "A meta-markup language, used to create markup languages such as DocBook.",
"GlossSeeAlso": ["GML", "XML"]
},
"GlossSee": "markup"
}
}
}
}
})";
void testStreaming(std::string const &json) {
SerializeState streaming;
SerializeState batch;
auto c = serializeCallbacks();
{
auto copy = json;
parser3::Parser3 parser(&c, &streaming);
for (int i = 0; i < copy.size(); ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN);
}
CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
}
{
auto copy = json;
parser3::Parser3 parser(&c, &batch);
REQUIRE(parser.parse(copy.data(), copy.size()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
}
CHECK(streaming.result == batch.result);
}
} // namespace
TEST_CASE("parser3") {
Callbacks c = serializeCallbacks();
SerializeState state;
{
auto copy = json;
parser3::Parser3 parser(&c, &state);
int i = 0;
for (; i < copy.length() - 1; ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN);
}
CHECK(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
puts("");
}
{
std::string copy = "{\"x\": [], \"y\": {}}";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
puts("");
}
{
auto c = noopCallbacks();
std::string copy = "{\"a\":\"a";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT);
}
{
auto c = noopCallbacks();
std::string copy = "[";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN);
CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT);
}
}
TEST_CASE("streaming") { testStreaming(json); }
void doTestUnescapingUtf8(std::string const &escaped,
std::string const &expected, int stride) {
CAPTURE(escaped);
CAPTURE(expected);
CAPTURE(stride);
auto c = noopCallbacks();
std::string result;
c.on_string_data = +[](void *p, const char *buf, int len) {
auto &s = *(std::string *)p;
s.append(buf, len);
};
parser3::Parser3 parser(&c, &result);
auto copy = escaped;
for (int i = 0; i < copy.size(); i += stride) {
CAPTURE(i);
CHECK(
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i)) ==
parser3::S_AGAIN);
}
CHECK(parser.parse(nullptr, 0) == parser3::S_OK);
CHECK(result.size() == expected.size());
CHECK(result == expected);
}
void testUnescapingUtf8(std::string const &escaped,
std::string const &expected) {
for (int stride = 0; stride < 10; ++stride) {
doTestUnescapingUtf8(escaped, expected,
stride == 0 ? std::numeric_limits<int>::max()
: stride);
}
}
TEST_CASE("unescaping utf-8") {
// 4 byte encoding (utf-16 surrogate pair)
testUnescapingUtf8("\"\\ud801\\udc37\"", "𐐷");
return;
// Basic
testUnescapingUtf8("\"\\\"\"", "\"");
testUnescapingUtf8("\"\\\\\"", "\\");
testUnescapingUtf8("\"\\/\"", "/");
testUnescapingUtf8("\"\\b\"", "\b");
testUnescapingUtf8("\"\\f\"", "\f");
testUnescapingUtf8("\"\\n\"", "\n");
testUnescapingUtf8("\"\\r\"", "\r");
testUnescapingUtf8("\"\\t\"", "\t");
// 2 byte encoding
testUnescapingUtf8("\"\\u07aB 1234\"", "\u07aB 1234");
// 3 byte encoding
testUnescapingUtf8("\"\\uaB34 5678\"", "\uaB34 5678");
}
TEST_CASE("bench3") {
auto c = noopCallbacks();
ankerl::nanobench::Bench bench;
bench.batch(json.size());
bench.unit("byte");
bench.run("parser3", [&]() {
auto copy = json;
parser3::Parser3 parser(&c, nullptr);
bench.doNotOptimizeAway(parser.parse(copy.data(), copy.length()));
bench.doNotOptimizeAway(parser.parse(nullptr, 0) == parser3::S_OK);
});
}
TEST_CASE("bench4") {
using namespace simdjson;
ankerl::nanobench::Bench bench;
bench.batch(json.size());
bench.unit("byte");
bench.run("parser4", [&]() {
simdjson::padded_string my_padded_data(json.data(), json.size());
simdjson::dom::parser parser;
auto doc = parser.parse(my_padded_data);
bench.doNotOptimizeAway(doc);
});
}
TEST_CASE("bench5") {
using namespace simdjson;
ankerl::nanobench::Bench bench;
bench.batch(json.size());
bench.unit("byte");
bench.run("parser5", [&]() {
padded_string my_padded_data(json.data(), json.size());
ondemand::parser parser;
auto doc = parser.iterate(my_padded_data);
bench.doNotOptimizeAway(doc);
});
}