#include #include #include #include #include #include #include #include #include #include "callbacks.h" #include "parser3.h" // This is the JSON grammar in McKeeman Form. // json // element // value // object // array // string // number // "true" // "false" // "null" // object // '{' ws '}' // '{' members '}' // members // member // member ',' members // member // ws string ws ':' element // array // '[' ws ']' // '[' elements ']' // elements // element // element ',' elements // element // ws value ws // string // '"' characters '"' // characters // "" // character characters // character // '0020' . '10FFFF' - '"' - '\' // '\' escape // escape // '"' // '\' // '/' // 'b' // 'f' // 'n' // 'r' // 't' // 'u' hex hex hex hex // hex // digit // 'A' . 'F' // 'a' . 'f' // number // integer fraction exponent // integer // digit // onenine digits // '-' digit // '-' onenine digits // digits // digit // digit digits // digit // '0' // onenine // onenine // '1' . '9' // fraction // "" // '.' digits // exponent // "" // 'E' sign digits // 'e' sign digits // sign // "" // '+' // '-' // ws // "" // '0020' ws // '000A' ws // '000D' ws // '0009' ws namespace { const std::string json = R"({ "a number": 12345, "true": true, "false": false, "null": null, "glossary": { "title": "example glossary", "GlossDiv": { "title": "S", "GlossList": { "GlossEntry": { "ID": "SGML", "SortAs": "SGML", "GlossTerm": "Standard Generalized Markup Language", "Acronym": "SGML", "Abbrev": "ISO 8879:1986", "GlossDef": { "para": "A meta-markup language, used to create markup languages such as DocBook.", "GlossSeeAlso": ["GML", "XML"] }, "GlossSee": "markup" } } } } })"; void testStreaming(std::string const &json) { SerializeState streaming; SerializeState batch; auto c = serializeCallbacks(); { auto copy = json; parser3::Parser3 parser(&c, &streaming); for (int i = 0; i < copy.size(); ++i) { REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN); } CHECK(parser.parse(nullptr, 0) == parser3::S_OK); } { auto copy = json; parser3::Parser3 parser(&c, &batch); REQUIRE(parser.parse(copy.data(), copy.size()) == parser3::S_AGAIN); CHECK(parser.parse(nullptr, 0) == parser3::S_OK); } CHECK(streaming.result == batch.result); } } // namespace TEST_CASE("parser3") { Callbacks c = serializeCallbacks(); SerializeState state; { auto copy = json; parser3::Parser3 parser(&c, &state); int i = 0; for (; i < copy.length() - 1; ++i) { REQUIRE(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN); } CHECK(parser.parse(copy.data() + i, 1) == parser3::S_AGAIN); CHECK(parser.parse(nullptr, 0) == parser3::S_OK); puts(""); } { std::string copy = "{\"x\": [], \"y\": {}}"; parser3::Parser3 parser(&c, &state); CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN); CHECK(parser.parse(nullptr, 0) == parser3::S_OK); puts(""); } { auto c = noopCallbacks(); std::string copy = "{\"a\":\"a"; parser3::Parser3 parser(&c, &state); CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN); CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT); } { auto c = noopCallbacks(); std::string copy = "["; parser3::Parser3 parser(&c, &state); CHECK(parser.parse(copy.data(), copy.length()) == parser3::S_AGAIN); CHECK(parser.parse(nullptr, 0) == parser3::S_REJECT); } } TEST_CASE("streaming") { testStreaming(json); } void doTestUnescapingUtf8(std::string const &escaped, std::string const &expected, int stride) { CAPTURE(escaped); CAPTURE(expected); CAPTURE(stride); auto c = noopCallbacks(); std::string result; c.on_string_data = +[](void *p, const char *buf, int len) { auto &s = *(std::string *)p; s.append(buf, len); }; parser3::Parser3 parser(&c, &result); auto copy = escaped; for (int i = 0; i < copy.size(); i += stride) { CAPTURE(i); CHECK( parser.parse(copy.data() + i, std::min(stride, copy.size() - i)) == parser3::S_AGAIN); } CHECK(parser.parse(nullptr, 0) == parser3::S_OK); CHECK(result.size() == expected.size()); CHECK(result == expected); } void testUnescapingUtf8(std::string const &escaped, std::string const &expected) { for (int stride = 0; stride < 10; ++stride) { doTestUnescapingUtf8(escaped, expected, stride == 0 ? std::numeric_limits::max() : stride); } } TEST_CASE("unescaping utf-8") { // 4 byte encoding (utf-16 surrogate pair) testUnescapingUtf8("\"\\ud801\\udc37\"", "𐐷"); return; // Basic testUnescapingUtf8("\"\\\"\"", "\""); testUnescapingUtf8("\"\\\\\"", "\\"); testUnescapingUtf8("\"\\/\"", "/"); testUnescapingUtf8("\"\\b\"", "\b"); testUnescapingUtf8("\"\\f\"", "\f"); testUnescapingUtf8("\"\\n\"", "\n"); testUnescapingUtf8("\"\\r\"", "\r"); testUnescapingUtf8("\"\\t\"", "\t"); // 2 byte encoding testUnescapingUtf8("\"\\u07aB 1234\"", "\u07aB 1234"); // 3 byte encoding testUnescapingUtf8("\"\\uaB34 5678\"", "\uaB34 5678"); } TEST_CASE("bench3") { auto c = noopCallbacks(); ankerl::nanobench::Bench bench; bench.batch(json.size()); bench.unit("byte"); bench.run("parser3", [&]() { auto copy = json; parser3::Parser3 parser(&c, nullptr); bench.doNotOptimizeAway(parser.parse(copy.data(), copy.length())); bench.doNotOptimizeAway(parser.parse(nullptr, 0) == parser3::S_OK); }); } TEST_CASE("bench4") { using namespace simdjson; ankerl::nanobench::Bench bench; bench.batch(json.size()); bench.unit("byte"); bench.run("parser4", [&]() { simdjson::padded_string my_padded_data(json.data(), json.size()); simdjson::dom::parser parser; auto doc = parser.parse(my_padded_data); bench.doNotOptimizeAway(doc); }); } TEST_CASE("bench5") { using namespace simdjson; ankerl::nanobench::Bench bench; bench.batch(json.size()); bench.unit("byte"); bench.run("parser5", [&]() { padded_string my_padded_data(json.data(), json.size()); ondemand::parser parser; auto doc = parser.iterate(my_padded_data); bench.doNotOptimizeAway(doc); }); }