Add automata to recognize utf8 in strings
This commit is contained in:
31
src/test.cpp
31
src/test.cpp
@@ -310,8 +310,15 @@ TEST_CASE("bench5") {
|
||||
|
||||
TEST_CASE("num dfa") {
|
||||
parser3::NumDfa dfa;
|
||||
std::string match = "-1231279127389127389127398127389712893791287389217327482"
|
||||
"374.0e69010101010101010101010101010101";
|
||||
std::string match =
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"11111111";
|
||||
auto *buf = dfa.scan(match.data(), match.data() + match.size());
|
||||
CHECK(buf == match.data() + match.size());
|
||||
CHECK(dfa.accept());
|
||||
@@ -325,3 +332,23 @@ TEST_CASE("num dfa") {
|
||||
dfa.scan(match.data(), match.data() + match.size()));
|
||||
});
|
||||
}
|
||||
|
||||
TEST_CASE("utf8 dfa") {
|
||||
parser3::Utf8Dfa dfa;
|
||||
std::string match =
|
||||
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
|
||||
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
|
||||
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩";
|
||||
auto *buf = dfa.scan(match.data(), match.data() + match.size());
|
||||
CHECK(buf == match.data() + match.size());
|
||||
CHECK(dfa.accept());
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(match.size());
|
||||
bench.unit("byte");
|
||||
bench.run("utf8 dfa", [&]() {
|
||||
dfa.reset();
|
||||
bench.doNotOptimizeAway(
|
||||
dfa.scan(match.data(), match.data() + match.size()));
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user