Benchmarks that push the implementation in a specific way

This commit is contained in:
2025-06-23 12:19:28 -04:00
parent 56fc45ab52
commit 03156d2036

View File

@@ -333,12 +333,14 @@ TEST_CASE("num dfa") {
});
}
const char *utf8str =
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩";
TEST_CASE("utf8 dfa") {
parser3::Utf8Dfa dfa;
std::string match =
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩";
std::string match = utf8str;
auto *buf = dfa.scan(match.data(), match.data() + match.size());
CHECK(buf == match.data() + match.size());
CHECK(dfa.accept());
@@ -352,3 +354,108 @@ TEST_CASE("utf8 dfa") {
dfa.scan(match.data(), match.data() + match.size()));
});
}
// Different input structures with special care in the implementation
// performance wise
TEST_CASE("bench input types") {
auto bench = [](std::string name, std::string json) {
auto c = noopCallbacks();
ankerl::nanobench::Bench bench;
bench.batch(json.size());
bench.unit("byte");
auto *parser = WeaselJsonParser_create(1024, &c, nullptr);
bench.run("parser3 " + name, [&]() {
auto copy = json;
WeaselJsonParser_reset(parser);
if (WeaselJsonParser_parse(parser, copy.data(), copy.size()) !=
WeaselJson_AGAIN) {
abort();
}
if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) {
abort();
}
});
WeaselJsonParser_destroy(parser);
};
bench("numbers", "[-123456789.000000000000000123456789e+12, "
"-123456789.000000000000000123456789E+12, "
"-123456789.000000000000000123456789e-12, "
"-123456789.000000000000000123456789E-12, "
"-123456789.000000000000000123456789e+12, "
"-123456789.000000000000000123456789E+12, "
"-123456789.000000000000000123456789e-12, "
"-123456789.000000000000000123456789E-12, "
"-123456789.000000000000000123456789e+12, "
"-123456789.000000000000000123456789E+12, "
"-123456789.000000000000000123456789e-12, "
"-123456789.000000000000000123456789E-12, "
"-123456789.000000000000000123456789e+12, "
"-123456789.000000000000000123456789E+12, "
"-123456789.000000000000000123456789e-12, "
"-123456789.000000000000000123456789E-12, "
"-123456789.000000000000000123456789e+12]");
bench("ascii",
"\"Donec lobortis eleifend condimentum. Cras dictum dolor lacinia "
"lectus vehicula rutrum. Maecenas quis nisi nunc. Nam tristique "
"feugiat est vitae mollis. Maecenas quis nisi nunc.\"");
bench("utf-8", std::string("\"") + utf8str + "\"");
bench("escapes",
R"(
["\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37",
"\n\r\t\"\b\f\\\/\uabcd\u1234\ud801\udc37"]
)");
bench("structural",
R"(
[
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}
]
)");
bench("whitespace", R"(
[
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
]
)");
}