diff --git a/src/test.cpp b/src/test.cpp index 6f3b581..daae7fa 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -333,12 +333,14 @@ TEST_CASE("num dfa") { }); } +const char *utf8str = + "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩" + "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩" + "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"; + TEST_CASE("utf8 dfa") { parser3::Utf8Dfa dfa; - std::string match = - "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩" - "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩" - "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"; + std::string match = utf8str; auto *buf = dfa.scan(match.data(), match.data() + match.size()); CHECK(buf == match.data() + match.size()); CHECK(dfa.accept()); @@ -352,3 +354,108 @@ TEST_CASE("utf8 dfa") { dfa.scan(match.data(), match.data() + match.size())); }); } + +// Different input structures with special care in the implementation +// performance wise +TEST_CASE("bench input types") { + auto bench = [](std::string name, std::string json) { + auto c = noopCallbacks(); + ankerl::nanobench::Bench bench; + bench.batch(json.size()); + bench.unit("byte"); + auto *parser = WeaselJsonParser_create(1024, &c, nullptr); + bench.run("parser3 " + name, [&]() { + auto copy = json; + WeaselJsonParser_reset(parser); + if (WeaselJsonParser_parse(parser, copy.data(), copy.size()) != + WeaselJson_AGAIN) { + abort(); + } + if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) { + abort(); + } + }); + WeaselJsonParser_destroy(parser); + }; + + bench("numbers", "[-123456789.000000000000000123456789e+12, " + "-123456789.000000000000000123456789E+12, " + "-123456789.000000000000000123456789e-12, " + "-123456789.000000000000000123456789E-12, " + "-123456789.000000000000000123456789e+12, " + "-123456789.000000000000000123456789E+12, " + "-123456789.000000000000000123456789e-12, " + "-123456789.000000000000000123456789E-12, " + "-123456789.000000000000000123456789e+12, " + "-123456789.000000000000000123456789E+12, " + "-123456789.000000000000000123456789e-12, " + "-123456789.000000000000000123456789E-12, " + "-123456789.000000000000000123456789e+12, " + "-123456789.000000000000000123456789E+12, " + "-123456789.000000000000000123456789e-12, " + "-123456789.000000000000000123456789E-12, " + "-123456789.000000000000000123456789e+12]"); + bench("ascii", + "\"Donec lobortis eleifend condimentum. Cras dictum dolor lacinia " + "lectus vehicula rutrum. Maecenas quis nisi nunc. Nam tristique " + "feugiat est vitae mollis. Maecenas quis nisi nunc.\""); + bench("utf-8", std::string("\"") + utf8str + "\""); + bench("escapes", + R"( +["\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37", +"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37"] +)"); + bench("structural", + R"( +[ +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}, +{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]} +] +)"); + bench("whitespace", R"( + [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + )"); +}