Compare commits
26 Commits
d1523acf94
...
9803364adb
| Author | SHA1 | Date | |
|---|---|---|---|
| 9803364adb | |||
| 2299904557 | |||
| 578e507b96 | |||
| d536f7ba69 | |||
| 851d07bc43 | |||
| 451c07747e | |||
| 5df9d958ab | |||
| 47a418b689 | |||
| fe60881476 | |||
| 4a9dfe0b45 | |||
| 7ae2ef5443 | |||
| 2cd2975ad4 | |||
| f4c4ed4c36 | |||
| 03156d2036 | |||
| 56fc45ab52 | |||
| e477ff095a | |||
| fc1b3ac147 | |||
| 95250d1668 | |||
| 5a74124cae | |||
| 337d93bcea | |||
| fa0cc1a970 | |||
| 229a68bfdd | |||
| 6c48c40d67 | |||
| 1df18b3c57 | |||
| 7806f6420f | |||
| 5613303d52 |
@@ -1,6 +1,6 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: 6d365699efc33b1b432eab5b4ae331a19e1857de # frozen: v18.1.2
|
||||
rev: 64827eb3528d4dc019b01153e9fb79107241405f # frozen: v20.1.6
|
||||
hooks:
|
||||
- id: clang-format
|
||||
exclude: ".*third_party/.*"
|
||||
@@ -9,6 +9,6 @@ repos:
|
||||
hooks:
|
||||
- id: cmake-format
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 552baf822992936134cbd31a38f69c8cfe7c0f05 # frozen: 24.3.0
|
||||
rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
|
||||
hooks:
|
||||
- id: black
|
||||
|
||||
1
corpus/0bf54d3e79f640685f6c1b95575c68b79b8a1066
Normal file
1
corpus/0bf54d3e79f640685f6c1b95575c68b79b8a1066
Normal file
@@ -0,0 +1 @@
|
||||
[5,[5K,<2C>ǀ<EFBFBD><C780><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
85
corpus/0d4be07052ed814953dba94e9d1ac96b5e331b38
Normal file
85
corpus/0d4be07052ed814953dba94e9d1ac96b5e331b38
Normal file
@@ -0,0 +1,85 @@
|
||||
[8,[[[[ [8,[[[[ [[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,[[[
|
||||
[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,[[[
|
||||
[8,8,[[[[ [[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,[[[
|
||||
[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,[[[
|
||||
[8,8,[[[[ [[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,[[[
|
||||
[8,[8,[8,[[[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,[[[
|
||||
[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-418,[8,[[[[[-41.8,[8,8[,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[641.8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[
|
||||
[[-418,[8,[[[[[-41.8,[8,[8,[[[[[-41.8,[3,[8,[[[[ [[[
|
||||
[
|
||||
[[-41.8,[8,[8,6[[[
|
||||
[8,[8,[8,[[[
|
||||
[[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[
|
||||
1
corpus/111581481e1124ca30d38e1678327ed0740760cd
Normal file
1
corpus/111581481e1124ca30d38e1678327ed0740760cd
Normal file
@@ -0,0 +1 @@
|
||||
[5,7.777E-5,[57E-5,77.777E-<2D>7.77E-ǀ
|
||||
1
corpus/2ace62c1befa19e3ea37dd52be9f6d508c5163e6
Normal file
1
corpus/2ace62c1befa19e3ea37dd52be9f6d508c5163e6
Normal file
@@ -0,0 +1 @@
|
||||
"
|
||||
1
corpus/431b38734fe7440fbaaea6a2b9bbdb17ce6eaba3
Normal file
1
corpus/431b38734fe7440fbaaea6a2b9bbdb17ce6eaba3
Normal file
@@ -0,0 +1 @@
|
||||
[-5,-1<>MMMM<4D><4D>MM<4D>
|
||||
60
corpus/44e1340b726d0134805bdce3ca40cd8b93c636f6
Normal file
60
corpus/44e1340b726d0134805bdce3ca40cd8b93c636f6
Normal file
@@ -0,0 +1,60 @@
|
||||
[[],[[],[],[[],[[],[[],[[],[],[[],[3,[3,[],[],[3,[],[],[[],[[],[[],[],[3,[3,[],[],[3,[],[[],[],[[],[[],[[],[[[],[],[[],[ [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[[[[[[[[{},[[[[[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[7,[[[[41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418444444444444444444444444444,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8.8,[8,[[[{},{}, [[[{},{},[[false, [[false, [[[{},8, [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[
|
||||
[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.9,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[7,[[[[41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-417.777E-8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-941.8,[8,[8,
|
||||
[[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
BIN
corpus/4708949ac5161603ddf9482358a93e3ec6c0b04d
Normal file
BIN
corpus/4708949ac5161603ddf9482358a93e3ec6c0b04d
Normal file
Binary file not shown.
BIN
corpus/622e31f06262e75ef5a9f934e2f490c8fb1af445
Normal file
BIN
corpus/622e31f06262e75ef5a9f934e2f490c8fb1af445
Normal file
Binary file not shown.
1
corpus/6ab0d6a64c75782c995ae3482a5d861ed4b5492d
Normal file
1
corpus/6ab0d6a64c75782c995ae3482a5d861ed4b5492d
Normal file
@@ -0,0 +1 @@
|
||||
[[[1,334e55,335,336e5,34e55,3352071e5,33117e5,334e55,352234e5,33117e5,334e55,3e5,334e55,3352234e5,334e55,33,334e5,352234e5,364e55,33e5,334e55,3e5,33455,334e5,34e55,3352234e5,33117e5,334e54,3e5,334e55,3352234e5,334e55,33e5,355,3e5,334e5,334e5,34e55,3352234e5,33117,33,334e5,35e55,33952234e5,334e55,334e5,34e55,33522344e54,37e5,e5,334e55e55
|
||||
1
corpus/8962517580c6484fb977f13e619b16c3f69b00fd
Normal file
1
corpus/8962517580c6484fb977f13e619b16c3f69b00fd
Normal file
@@ -0,0 +1 @@
|
||||
[<5B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>501
|
||||
1
corpus/8c7d3ac92793c34a26aef0e49dda58c8ebfcab02
Normal file
1
corpus/8c7d3ac92793c34a26aef0e49dda58c8ebfcab02
Normal file
@@ -0,0 +1 @@
|
||||
[[33<33>3666p<36>89501
|
||||
1
corpus/93bfb71671a09937d39790ad289057e76f774966
Normal file
1
corpus/93bfb71671a09937d39790ad289057e76f774966
Normal file
@@ -0,0 +1 @@
|
||||

|
||||
1
corpus/a0849416d07b4e9fd966643af527de269e2301b8
Normal file
1
corpus/a0849416d07b4e9fd966643af527de269e2301b8
Normal file
@@ -0,0 +1 @@
|
||||
[[330.5026663,666666891.5026664333333,666666666666666000001891.50266643,66666000001843,6666666666666391.5026664333333,66666666666666603,666666666666666000001891.50266643,666666891.5026664333333,666666666666666000001891.50266643,666666666666633333363,666666666666666000001333333,666666666666666000001866000001891.50266643,5026664333333,666666666666666000001891.50266643,666666891.5026664333333,666666666666666000001891.50266643,6666<36>~0
|
||||
13
corpus/abd07f4f11688a05dea4eb58e034605bf39edc72
Normal file
13
corpus/abd07f4f11688a05dea4eb58e034605bf39edc72
Normal file
@@ -0,0 +1,13 @@
|
||||
[[[{},{}, [8,[8,
|
||||
[[-41.8,[-48,[8,[
|
||||
[[-418,[8,[[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-3,[8,[[[[[[{},{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[-41.8,[8,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[],[[],[],[[],[[],[][[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-3,[8,[[[[ [[[
|
||||
BIN
corpus/ade1900cfb6e2749cd638c55cf2aaa04c5ec0bf0
Normal file
BIN
corpus/ade1900cfb6e2749cd638c55cf2aaa04c5ec0bf0
Normal file
Binary file not shown.
1
corpus/bb4bee5c345bba47b3c20a88869832aeedaa2a01
Normal file
1
corpus/bb4bee5c345bba47b3c20a88869832aeedaa2a01
Normal file
@@ -0,0 +1 @@
|
||||
nul<EFBFBD>,<2C><>+<2B>lllllll
|
||||
1
corpus/bc513ad0296d6f179e7edd547976c853c4669a49
Normal file
1
corpus/bc513ad0296d6f179e7edd547976c853c4669a49
Normal file
@@ -0,0 +1 @@
|
||||
[8,[8,[8,-.[<5B><>..
|
||||
1
corpus/bec621e565673e9f0b6645d91ca3087980d1e50f
Normal file
1
corpus/bec621e565673e9f0b6645d91ca3087980d1e50f
Normal file
@@ -0,0 +1 @@
|
||||
[5,[5,ǀ<><C780><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
1
corpus/c403bd6afed0850c403dac9fd511f232cf3d17b3
Normal file
1
corpus/c403bd6afed0850c403dac9fd511f232cf3d17b3
Normal file
@@ -0,0 +1 @@
|
||||
[8,[8,[8399,-<2D>8,
|
||||
15
corpus/d9457eb9150519596d43890dbbfbafd0d0d75a40
Normal file
15
corpus/d9457eb9150519596d43890dbbfbafd0d0d75a40
Normal file
@@ -0,0 +1,15 @@
|
||||
[[[{},{}, [[[{},{},[[[[{},{}, [[[{},{},[[false,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8.8,[8,[[[{},{}, [[[{},{},[[false, [[false, [[[{},8, [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8.8,[8,[[[{},{}, [[[{}, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41,[8,[8,[[[[ [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[ [[[
|
||||
[[-418,[8,[[[
|
||||
[[-41.8,[8,[[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8.8,[8,[[[{},{}, [[[{},{},[[false, [[false, [[[[false, [[false, [[8,[[[
|
||||
[[-41,{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-41.8,[8,[8,[[[[ [[[ [[-41.8,[3,[8,[[[[[-41.8,[8,[
|
||||
[[-418,[8,[[[[[[-41.8,[8,[8,[[[[ [[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8,[[[[[-20.8,[8,[8,[[1.8,[3,[[[false, [[[{},[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8.8,[8,[[[{},{}, [[[{},{},[[false, [[false, [[[{},8, [[[
|
||||
[[-41,{},[[[[{},{}, [[[{},{},[[false, [[false, [[[{},8,[-41.8,[8,[8.8,[8,[[[{},{[[[
|
||||
[8{},[-[
|
||||
1
corpus/e5733279bdbbc7ba127f87b706c26f12888fe1c0
Normal file
1
corpus/e5733279bdbbc7ba127f87b706c26f12888fe1c0
Normal file
@@ -0,0 +1 @@
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-8<><38><EFBFBD><EFBFBD><EFBFBD>0
|
||||
1
corpus/ec4b74c9a120f34fdeb4119c8c715de6d10fd6ca
Normal file
1
corpus/ec4b74c9a120f34fdeb4119c8c715de6d10fd6ca
Normal file
@@ -0,0 +1 @@
|
||||
2.6
|
||||
1
corpus/example.bin
Normal file
1
corpus/example.bin
Normal file
@@ -0,0 +1 @@
|
||||
"<22> <20> "
|
||||
71
src/fuzz.cpp
71
src/fuzz.cpp
@@ -1,5 +1,6 @@
|
||||
#include "callbacks.h"
|
||||
#include "json_value.h"
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
#include <simdjson.h>
|
||||
@@ -41,6 +42,25 @@ std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
|
||||
return {state.result, s};
|
||||
}
|
||||
|
||||
std::pair<std::string, WeaselJsonStatus> runPrefix(std::string copy,
|
||||
int prefix) {
|
||||
SerializeState state;
|
||||
auto c = serializeCallbacks();
|
||||
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
|
||||
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
|
||||
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), prefix);
|
||||
if (s != WeaselJson_AGAIN) {
|
||||
return {state.result, s};
|
||||
}
|
||||
s = WeaselJsonParser_parse(parser.get(), copy.data() + prefix,
|
||||
copy.size() - prefix);
|
||||
if (s != WeaselJson_AGAIN) {
|
||||
return {state.result, s};
|
||||
}
|
||||
s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
|
||||
return {state.result, s};
|
||||
}
|
||||
|
||||
void testStreaming(std::string const &json) {
|
||||
auto batch = runBatch(json);
|
||||
if (batch.second == WeaselJson_AGAIN) {
|
||||
@@ -56,15 +76,36 @@ void testStreaming(std::string const &json) {
|
||||
bool batchOk = batch.second == WeaselJson_OK;
|
||||
if (streamingOk == batchOk && !batchOk) {
|
||||
// It's ok if the processed data doesn't match if parsing failed
|
||||
continue;
|
||||
} else {
|
||||
printf("streaming: %s, %s\n",
|
||||
streaming.second == WeaselJson_OK ? "accept" : "reject",
|
||||
streaming.first.c_str());
|
||||
printf("batch: %s, %s\n",
|
||||
batch.second == WeaselJson_OK ? "accept" : "reject",
|
||||
batch.first.c_str());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (int(json.size()) > stride) {
|
||||
auto prefix = runPrefix(json, stride);
|
||||
if (prefix != batch) {
|
||||
if (prefix.second == WeaselJson_AGAIN) {
|
||||
abort();
|
||||
}
|
||||
bool prefixOk = prefix.second == WeaselJson_OK;
|
||||
bool batchOk = batch.second == WeaselJson_OK;
|
||||
if (prefixOk == batchOk && !batchOk) {
|
||||
// It's ok if the processed data doesn't match if parsing failed
|
||||
} else {
|
||||
printf("prefix: %s, %s\n",
|
||||
prefix.second == WeaselJson_OK ? "accept" : "reject",
|
||||
prefix.first.c_str());
|
||||
printf("batch: %s, %s\n",
|
||||
batch.second == WeaselJson_OK ? "accept" : "reject",
|
||||
batch.first.c_str());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
printf("streaming: %s, %s\n",
|
||||
streaming.second == WeaselJson_OK ? "accept" : "reject",
|
||||
streaming.first.c_str());
|
||||
printf("batch: %s, %s\n",
|
||||
batch.second == WeaselJson_OK ? "accept" : "reject",
|
||||
batch.first.c_str());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -132,5 +173,19 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
testStreaming(s);
|
||||
compareWithSimdjson(s);
|
||||
testStringRoundTrip(s);
|
||||
bool json_utf8 = true;
|
||||
for (int i = 0; i < int(size); ++i) {
|
||||
uint8_t c = data[i];
|
||||
json_utf8 = json_utf8 && c >= 0x20 && c != '"' && c != '\\';
|
||||
}
|
||||
if (json_utf8) {
|
||||
Utf8Dfa dfa;
|
||||
auto result = dfa.scan((const char *)data, (const char *)data + size);
|
||||
bool ok = result == (const char *)data + size && dfa.accept();
|
||||
bool valid = simdjson::validate_utf8(s.data(), s.size());
|
||||
if (ok != valid) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
706
src/parser3.h
706
src/parser3.h
@@ -16,316 +16,6 @@
|
||||
|
||||
namespace parser3 {
|
||||
|
||||
struct NumDfa {
|
||||
constexpr static uint64_t num_dfa_table[256] = {
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x36000ull,
|
||||
0x0ull,
|
||||
0x36600ull,
|
||||
0x12480000000000ull,
|
||||
0x0ull,
|
||||
0x780aa47b091ec00ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0xc30c000000000ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0xc30c000000000ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
};
|
||||
// Restore this dfa to its start state
|
||||
void reset() { state = 6; }
|
||||
// Return true if this dfa is in an accept state. You probably want to call
|
||||
// scan until the match ends first.
|
||||
bool accept() const {
|
||||
return (state & 63) == 30 || (state & 63) == 36 || (state & 63) == 48 ||
|
||||
(state & 63) == 42;
|
||||
}
|
||||
// return value either points to the first byte which does not match, or
|
||||
// bufEnd. Leaves the dfa in the last state of the match.
|
||||
#ifdef __x86_64__
|
||||
__attribute__((target_clones("default", "bmi2")))
|
||||
#endif
|
||||
const char *
|
||||
scan(const char *buf, const char *bufEnd) {
|
||||
auto state_ = state;
|
||||
for (;;) {
|
||||
constexpr int kStride = 16;
|
||||
if (bufEnd - buf < kStride) [[unlikely]] {
|
||||
while (buf != bufEnd) {
|
||||
uint64_t row = num_dfa_table[uint8_t(*buf)];
|
||||
auto prev = state_;
|
||||
state_ = (row >> (state_ & 63)) & 63;
|
||||
if (state_ == 0) {
|
||||
state_ = prev;
|
||||
break;
|
||||
}
|
||||
++buf;
|
||||
}
|
||||
state = state_;
|
||||
return buf;
|
||||
}
|
||||
uint8_t prev[kStride + 1];
|
||||
prev[0] = state_;
|
||||
for (int i = 0; i < kStride; ++i) {
|
||||
uint64_t row = num_dfa_table[uint8_t(*buf)];
|
||||
prev[i + 1] = row >> (prev[i] & 63);
|
||||
if ((prev[i + 1] & 63) == 0) {
|
||||
state = prev[i];
|
||||
return buf;
|
||||
}
|
||||
++buf;
|
||||
}
|
||||
state_ = prev[kStride];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t state = 6;
|
||||
};
|
||||
|
||||
typedef PRESERVE_NONE WeaselJsonStatus (*Continuation)(struct Parser3 *,
|
||||
char *buf, char *bufEnd);
|
||||
|
||||
@@ -353,8 +43,6 @@ enum Symbol : uint8_t {
|
||||
T_L,
|
||||
T_S,
|
||||
T_COLON,
|
||||
T_UTF8_CONTINUATION_BYTE,
|
||||
T_UTF8_LAST_CONTINUATION_BYTE,
|
||||
T_HEX,
|
||||
T_HEX2,
|
||||
T_HEX3,
|
||||
@@ -388,7 +76,6 @@ struct Parser3 {
|
||||
if (done || len > 0) {
|
||||
callbacks->on_string_data(userdata, dataBegin, len, done);
|
||||
}
|
||||
dataBegin = writeBuf;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool empty() const { return stackPtr == stack(); }
|
||||
@@ -434,19 +121,36 @@ struct Parser3 {
|
||||
int const stackSize;
|
||||
bool complete;
|
||||
NumDfa numDfa;
|
||||
Utf8Dfa strDfa;
|
||||
};
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus skipWhitespace(char *&buf, char *bufEnd) {
|
||||
constexpr int kStride = 4;
|
||||
for (;;) {
|
||||
if (bufEnd - buf < kStride) [[unlikely]] {
|
||||
while (buf != bufEnd && tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
}
|
||||
return buf == bufEnd ? WeaselJson_AGAIN : WeaselJson_OK;
|
||||
}
|
||||
for (int i = 0; i < kStride; ++i) {
|
||||
if (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
} else {
|
||||
return WeaselJson_OK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (bufEnd - buf == 0) {
|
||||
if (buf == bufEnd) {
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
@@ -467,14 +171,83 @@ inline PRESERVE_NONE WeaselJsonStatus n_number(Parser3 *self, char *buf,
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
// Advance buf until double quote, backslash, invalid utf8, or codepoint <
|
||||
// 0x20
|
||||
template <class V>
|
||||
inline PRESERVE_NONE WeaselJsonStatus scan_string_impl(Parser3 *self,
|
||||
char *&buf,
|
||||
char *bufEnd) {
|
||||
const auto before = buf;
|
||||
|
||||
// Advance buf past characters that transition the accept state to itself
|
||||
if (self->strDfa.accept()) {
|
||||
for (;;) {
|
||||
if (bufEnd - buf < V::lanes) [[unlikely]] {
|
||||
break;
|
||||
}
|
||||
auto v = V{(int8_t *)buf};
|
||||
int normal =
|
||||
(v != V::splat('"') & v != V::splat('\\') & v >= V::splat(0x20))
|
||||
.count_leading_nonzero_lanes();
|
||||
buf += normal;
|
||||
if (normal < V::lanes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buf = (char *)self->strDfa.scan(buf, bufEnd);
|
||||
|
||||
int len = buf - before;
|
||||
if (self->writeBuf != before) {
|
||||
memmove(self->writeBuf, before, len);
|
||||
}
|
||||
self->writeBuf += len;
|
||||
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
|
||||
if (!self->strDfa.accept()) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
return WeaselJson_OK;
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
constexpr int kLanes = 32;
|
||||
template WeaselJsonStatus
|
||||
scan_string_impl<simd<int8_t, kLanes, sse::Simd_x86_SSE>>(Parser3 *, char *&,
|
||||
char *);
|
||||
|
||||
template __attribute__((target("avx2"))) WeaselJsonStatus
|
||||
scan_string_impl<simd<int8_t, kLanes, sse::Simd_x86_AVX2>>(Parser3 *, char *&,
|
||||
char *);
|
||||
|
||||
__attribute__((target("default"))) inline PRESERVE_NONE WeaselJsonStatus
|
||||
scan_string(Parser3 *self, char *&buf, char *bufEnd) {
|
||||
MUSTTAIL return scan_string_impl<simd<int8_t, kLanes, sse::Simd_x86_SSE>>(
|
||||
self, buf, bufEnd);
|
||||
}
|
||||
|
||||
__attribute__((target("avx2"))) inline PRESERVE_NONE WeaselJsonStatus
|
||||
scan_string(Parser3 *self, char *&buf, char *bufEnd) {
|
||||
MUSTTAIL return scan_string_impl<simd<int8_t, kLanes, sse::Simd_x86_AVX2>>(
|
||||
self, buf, bufEnd);
|
||||
}
|
||||
#else
|
||||
inline PRESERVE_NONE WeaselJsonStatus scan_string(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
MUSTTAIL return scan_string_impl<simd<int8_t, 32>>(self, buf, bufEnd);
|
||||
}
|
||||
#endif
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
switch (*buf) {
|
||||
case '{':
|
||||
@@ -497,8 +270,9 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->dataBegin = self->writeBuf = buf;
|
||||
self->pop();
|
||||
if (auto s = self->push({N_STRING2})) {
|
||||
return s;
|
||||
self->strDfa.reset();
|
||||
if (auto s2 = self->push({N_STRING2})) {
|
||||
return s2;
|
||||
}
|
||||
break;
|
||||
case '0':
|
||||
@@ -515,19 +289,10 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
self->dataBegin = buf;
|
||||
self->pop();
|
||||
self->numDfa.reset();
|
||||
buf = (char *)self->numDfa.scan(buf, bufEnd);
|
||||
if (buf == bufEnd) {
|
||||
self->flushNumber(false, buf);
|
||||
if (auto s = self->push({N_NUMBER})) {
|
||||
return s;
|
||||
}
|
||||
return WeaselJson_AGAIN;
|
||||
if (auto s2 = self->push({N_NUMBER})) {
|
||||
return s2;
|
||||
}
|
||||
if (!self->numDfa.accept()) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
self->flushNumber(true, buf);
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
break;
|
||||
case 't':
|
||||
++buf;
|
||||
self->pop();
|
||||
@@ -585,11 +350,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_value(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
switch (*buf) {
|
||||
case '}':
|
||||
@@ -601,6 +363,7 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->dataBegin = self->writeBuf = buf;
|
||||
self->pop();
|
||||
self->strDfa.reset();
|
||||
if (auto s = self->push({N_STRING2, T_COLON, N_VALUE, N_OBJECT3})) {
|
||||
return s;
|
||||
}
|
||||
@@ -613,11 +376,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_object2(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
switch (*buf) {
|
||||
case '}':
|
||||
@@ -640,11 +400,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_object3(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
switch (*buf) {
|
||||
case ']':
|
||||
@@ -664,11 +421,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_array2(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
switch (*buf) {
|
||||
case ']':
|
||||
@@ -691,11 +445,8 @@ inline PRESERVE_NONE WeaselJsonStatus n_array3(Parser3 *self, char *buf,
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
if (*buf != '"') [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
@@ -703,128 +454,39 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self, char *buf,
|
||||
++buf;
|
||||
self->dataBegin = self->writeBuf = buf;
|
||||
self->pop();
|
||||
self->strDfa.reset();
|
||||
if (auto s = self->push({N_STRING2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
template <class V>
|
||||
PRESERVE_NONE WeaselJsonStatus n_string2_impl(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
const auto before = buf;
|
||||
|
||||
// Advance buf to the first "non-normal" character
|
||||
for (;;) {
|
||||
if (bufEnd - buf < V::lanes) [[unlikely]] {
|
||||
while (buf != bufEnd &&
|
||||
tables.stringByteMeaning[uint8_t(*buf)] == Tables::NORMAL) {
|
||||
++buf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
auto v = V{(int8_t *)buf};
|
||||
int normal =
|
||||
(v != V::splat('"') & v != V::splat('\\') & v >= V::splat(0x20))
|
||||
.count_leading_nonzero_lanes();
|
||||
buf += normal;
|
||||
if (normal < V::lanes) {
|
||||
break;
|
||||
}
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (auto s = scan_string(self, buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
int len = buf - before;
|
||||
memmove(self->writeBuf, before, len);
|
||||
self->writeBuf += len;
|
||||
|
||||
if (buf == bufEnd) {
|
||||
self->flushString(false);
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
|
||||
switch (tables.stringByteMeaning[uint8_t(*buf)]) {
|
||||
case Tables::NORMAL:
|
||||
__builtin_unreachable();
|
||||
case Tables::DUBQUOTE:
|
||||
switch (*buf) {
|
||||
case '"':
|
||||
self->flushString(true);
|
||||
++buf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case Tables::BACKSLASH:
|
||||
case '\\':
|
||||
++buf;
|
||||
self->pop();
|
||||
if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case Tables::TWO_BYTE_UTF8:
|
||||
// two byte utf-8 encoding
|
||||
self->utf8Codepoint = *buf & 0b00011111;
|
||||
self->minCodepoint = 0x80;
|
||||
*self->writeBuf++ = *buf++;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case Tables::THREE_BYTE_UTF8:
|
||||
// three byte utf-8 encoding
|
||||
self->utf8Codepoint = *buf & 0b00001111;
|
||||
self->minCodepoint = 0x800;
|
||||
*self->writeBuf++ = *buf++;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
|
||||
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case Tables::FOUR_BYTE_UTF8:
|
||||
// four byte utf-8 encoding
|
||||
self->utf8Codepoint = *buf & 0b00000111;
|
||||
self->minCodepoint = 0x10000;
|
||||
*self->writeBuf++ = *buf++;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
|
||||
T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case Tables::CONTINUATION_BYTE:
|
||||
case Tables::INVALID:
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
[[unlikely]] return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
template WeaselJsonStatus
|
||||
n_string2_impl<simd<int8_t, 64, sse::Simd_x86_SSE>>(Parser3 *, char *, char *);
|
||||
|
||||
template __attribute__((target("avx2"))) WeaselJsonStatus
|
||||
n_string2_impl<simd<int8_t, 64, sse::Simd_x86_AVX2>>(Parser3 *, char *, char *);
|
||||
|
||||
__attribute__((target("default"))) inline PRESERVE_NONE WeaselJsonStatus
|
||||
n_string2(Parser3 *self, char *buf, char *bufEnd) {
|
||||
MUSTTAIL return n_string2_impl<simd<int8_t, 64, sse::Simd_x86_SSE>>(self, buf,
|
||||
bufEnd);
|
||||
}
|
||||
|
||||
__attribute__((target("avx2"))) inline PRESERVE_NONE WeaselJsonStatus
|
||||
n_string2(Parser3 *self, char *buf, char *bufEnd) {
|
||||
MUSTTAIL return n_string2_impl<simd<int8_t, 64, sse::Simd_x86_AVX2>>(
|
||||
self, buf, bufEnd);
|
||||
}
|
||||
#else
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
MUSTTAIL return n_string2_impl<simd<int8_t, 32>>(self, buf, bufEnd);
|
||||
}
|
||||
#endif
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
|
||||
char *buf,
|
||||
char *bufEnd) {
|
||||
assert(self->strDfa.accept());
|
||||
switch (*buf) {
|
||||
case '"':
|
||||
case '\\':
|
||||
@@ -836,15 +498,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
|
||||
case 't':
|
||||
*self->writeBuf++ = tables.unescape[uint8_t(*buf++)];
|
||||
self->pop();
|
||||
if (auto s = self->push({N_STRING2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
case 'u':
|
||||
++buf;
|
||||
self->utf8Codepoint = 0;
|
||||
self->pop();
|
||||
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2, N_STRING2})) {
|
||||
if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2})) {
|
||||
return s;
|
||||
}
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
@@ -853,74 +512,14 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self,
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_utf8_continuation_byte(Parser3 *self,
|
||||
char *buf,
|
||||
char *bufEnd) {
|
||||
if (tables.stringByteMeaning[uint8_t(*buf)] != Tables::CONTINUATION_BYTE)
|
||||
[[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
self->utf8Codepoint <<= 6;
|
||||
self->utf8Codepoint |= *buf & 0b00111111;
|
||||
*self->writeBuf++ = *buf++;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus
|
||||
t_utf8_last_continuation_byte(Parser3 *self, char *buf, char *bufEnd) {
|
||||
if (tables.stringByteMeaning[uint8_t(*buf)] != Tables::CONTINUATION_BYTE)
|
||||
[[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
self->utf8Codepoint <<= 6;
|
||||
self->utf8Codepoint |= *buf & 0b00111111;
|
||||
if (self->utf8Codepoint < self->minCodepoint ||
|
||||
self->utf8Codepoint > 0x10ffff ||
|
||||
(0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff))
|
||||
[[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
// TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
|
||||
*self->writeBuf++ = *buf++;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_digit(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if ('0' <= *buf && *buf <= '9') {
|
||||
++buf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_onenine(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if ('1' <= *buf && *buf <= '9') {
|
||||
++buf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
} else [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
self->utf8Codepoint <<= 4;
|
||||
if (('0' <= *buf && *buf <= '9')) {
|
||||
self->utf8Codepoint |= *buf - '0';
|
||||
} else if ('a' <= *buf && *buf <= 'f') {
|
||||
self->utf8Codepoint |= 10 + *buf - 'a';
|
||||
} else if ('A' <= *buf && *buf <= 'F') {
|
||||
self->utf8Codepoint |= 10 + *buf - 'A';
|
||||
} else [[unlikely]] {
|
||||
auto hexVal = tables.hex[uint8_t(*buf)];
|
||||
if (hexVal < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
self->utf8Codepoint <<= 4;
|
||||
self->utf8Codepoint |= hexVal;
|
||||
++buf;
|
||||
self->pop();
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
@@ -928,16 +527,12 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
self->utf8Codepoint <<= 4;
|
||||
if (('0' <= *buf && *buf <= '9')) {
|
||||
self->utf8Codepoint |= *buf - '0';
|
||||
} else if ('a' <= *buf && *buf <= 'f') {
|
||||
self->utf8Codepoint |= 10 + *buf - 'a';
|
||||
} else if ('A' <= *buf && *buf <= 'F') {
|
||||
self->utf8Codepoint |= 10 + *buf - 'A';
|
||||
} else [[unlikely]] {
|
||||
auto hexVal = tables.hex[uint8_t(*buf)];
|
||||
if (hexVal < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
self->utf8Codepoint <<= 4;
|
||||
self->utf8Codepoint |= hexVal;
|
||||
++buf;
|
||||
|
||||
// Write codepoint in utf-8 if there's room in the user provided buffer. If
|
||||
@@ -996,16 +591,12 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
self->utf8Codepoint <<= 4;
|
||||
if (('0' <= *buf && *buf <= '9')) {
|
||||
self->utf8Codepoint |= *buf - '0';
|
||||
} else if ('a' <= *buf && *buf <= 'f') {
|
||||
self->utf8Codepoint |= 10 + *buf - 'a';
|
||||
} else if ('A' <= *buf && *buf <= 'F') {
|
||||
self->utf8Codepoint |= 10 + *buf - 'A';
|
||||
} else [[unlikely]] {
|
||||
auto hexVal = tables.hex[uint8_t(*buf)];
|
||||
if (hexVal < 0) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
self->utf8Codepoint <<= 4;
|
||||
self->utf8Codepoint |= hexVal;
|
||||
++buf;
|
||||
|
||||
if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff))
|
||||
@@ -1087,11 +678,8 @@ inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if constexpr (kSkipWhitespace) {
|
||||
assert(bufEnd - buf != 0);
|
||||
while (tables.whitespace[uint8_t(*buf)]) {
|
||||
++buf;
|
||||
if (buf == bufEnd) {
|
||||
return WeaselJson_AGAIN;
|
||||
}
|
||||
if (auto s = skipWhitespace(buf, bufEnd)) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
if (*buf == kChar) {
|
||||
@@ -1105,19 +693,12 @@ inline PRESERVE_NONE WeaselJsonStatus singleChar(Parser3 *self, char *buf,
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_eof(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
if (bufEnd - buf > 0) [[unlikely]] {
|
||||
if (buf != bufEnd) [[unlikely]] {
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
return self->complete ? WeaselJson_OK : WeaselJson_AGAIN;
|
||||
}
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus t_end_number(Parser3 *self, char *buf,
|
||||
char *bufEnd) {
|
||||
self->pop();
|
||||
self->flushNumber(true, buf);
|
||||
MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd);
|
||||
}
|
||||
|
||||
constexpr inline struct ContinuationTable {
|
||||
constexpr ContinuationTable() {
|
||||
// Defaults
|
||||
@@ -1147,9 +728,6 @@ constexpr inline struct ContinuationTable {
|
||||
continuations[T_L] = singleChar<'l'>;
|
||||
continuations[T_S] = singleChar<'s'>;
|
||||
continuations[T_COLON] = singleChar<':', true>;
|
||||
continuations[T_UTF8_CONTINUATION_BYTE] = t_utf8_continuation_byte;
|
||||
continuations[T_UTF8_LAST_CONTINUATION_BYTE] =
|
||||
t_utf8_last_continuation_byte;
|
||||
continuations[T_HEX] = t_hex;
|
||||
continuations[T_HEX2] = t_hex2;
|
||||
continuations[T_HEX3] = t_hex3;
|
||||
@@ -1176,7 +754,6 @@ constexpr inline struct ContinuationTable {
|
||||
symbolNames[T_L] = "singleChar<'l'>";
|
||||
symbolNames[T_S] = "singleChar<'s'>";
|
||||
symbolNames[T_COLON] = "singleChar<':'>";
|
||||
symbolNames[T_UTF8_CONTINUATION_BYTE] = "t_utf8_continuation_byte";
|
||||
symbolNames[T_HEX] = "t_hex";
|
||||
symbolNames[T_HEX2] = "t_hex2";
|
||||
symbolNames[T_HEX3] = "t_hex3";
|
||||
@@ -1201,8 +778,6 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self,
|
||||
switch (self->top()) {
|
||||
case N_STRING2:
|
||||
case N_STRING_FOLLOWING_ESCAPE:
|
||||
case T_UTF8_CONTINUATION_BYTE:
|
||||
case T_UTF8_LAST_CONTINUATION_BYTE:
|
||||
case T_HEX:
|
||||
case T_HEX2:
|
||||
case T_HEX3:
|
||||
@@ -1240,6 +815,7 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self,
|
||||
return WeaselJson_REJECT;
|
||||
}
|
||||
}
|
||||
// printf("%s\n", symbolTables.symbolNames[self->top()]);
|
||||
MUSTTAIL return symbolTables.continuations[self->top()](self, buf, bufEnd);
|
||||
}
|
||||
|
||||
|
||||
680
src/tables.h
680
src/tables.h
@@ -1,50 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
constexpr inline struct Tables {
|
||||
enum StringByteMeaning {
|
||||
INVALID,
|
||||
NORMAL,
|
||||
DUBQUOTE,
|
||||
BACKSLASH,
|
||||
TWO_BYTE_UTF8,
|
||||
THREE_BYTE_UTF8,
|
||||
FOUR_BYTE_UTF8,
|
||||
CONTINUATION_BYTE,
|
||||
};
|
||||
|
||||
constexpr Tables() {
|
||||
whitespace[' '] = true;
|
||||
whitespace['\n'] = true;
|
||||
whitespace['\r'] = true;
|
||||
whitespace['\t'] = true;
|
||||
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
if ((i & 0b11000000) == 0b10000000) {
|
||||
stringByteMeaning[i] = CONTINUATION_BYTE;
|
||||
}
|
||||
if ((i & 0b11100000) == 0b11000000) {
|
||||
stringByteMeaning[i] = TWO_BYTE_UTF8;
|
||||
}
|
||||
if ((i & 0b11110000) == 0b11100000) {
|
||||
stringByteMeaning[i] = THREE_BYTE_UTF8;
|
||||
}
|
||||
if ((i & 0b11111000) == 0b11110000) {
|
||||
stringByteMeaning[i] = FOUR_BYTE_UTF8;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0x20; i < 128; ++i) {
|
||||
stringByteMeaning[i] = NORMAL;
|
||||
}
|
||||
stringByteMeaning['"'] = DUBQUOTE;
|
||||
stringByteMeaning['\\'] = BACKSLASH;
|
||||
|
||||
stringByteMeaning[0xc0] = INVALID;
|
||||
stringByteMeaning[0xc1] = INVALID;
|
||||
for (int i = 0xF5; i < 0x100; ++i) {
|
||||
stringByteMeaning[i] = INVALID;
|
||||
}
|
||||
|
||||
unescape['n'] = '\n';
|
||||
unescape['r'] = '\r';
|
||||
unescape['t'] = '\t';
|
||||
@@ -53,8 +17,646 @@ constexpr inline struct Tables {
|
||||
unescape['f'] = '\f';
|
||||
unescape['\\'] = '\\';
|
||||
unescape['/'] = '/';
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
hex[i] = -1;
|
||||
}
|
||||
for (int i = '0'; i <= '9'; ++i) {
|
||||
hex[i] = i - '0';
|
||||
}
|
||||
for (int i = 'a'; i <= 'f'; ++i) {
|
||||
hex[i] = 10 + i - 'a';
|
||||
}
|
||||
for (int i = 'A'; i <= 'F'; ++i) {
|
||||
hex[i] = 10 + i - 'A';
|
||||
}
|
||||
}
|
||||
bool whitespace[256]{};
|
||||
StringByteMeaning stringByteMeaning[256]{};
|
||||
char unescape[256]{};
|
||||
int8_t hex[256]{};
|
||||
} tables;
|
||||
|
||||
// See https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725 for
|
||||
// an explanation of this cycle/byte dfa implementation.
|
||||
//
|
||||
// Recognizes json number syntax. As a regex:
|
||||
// -?([0-9]|[1-9][0-9]*)(\.[0-9]+)?((e|E)(-|\+)?[0-9]+)?
|
||||
struct NumDfa {
|
||||
constexpr static uint64_t table[256] = {
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x36000ull,
|
||||
0x0ull,
|
||||
0x36600ull,
|
||||
0x12480000000000ull,
|
||||
0x0ull,
|
||||
0x780aa47b091ec00ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x780aa47aa91ea80ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0xc30c000000000ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0xc30c000000000ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
};
|
||||
// Restore this dfa to its start state
|
||||
void reset() { state = 6; }
|
||||
// Return true if this dfa is in an accept state. You probably want to call
|
||||
// scan until the match ends first.
|
||||
bool accept() const {
|
||||
return (state & 63) == 30 || (state & 63) == 36 || (state & 63) == 48 ||
|
||||
(state & 63) == 42;
|
||||
}
|
||||
// clang-format off
|
||||
#ifdef __x86_64__
|
||||
__attribute__((target_clones("default", "bmi2")))
|
||||
#endif
|
||||
// Return value either points to the first byte which does not match, or bufEnd.
|
||||
// Leaves the dfa in the last state of the match.
|
||||
const char *scan(const char *buf, const char *bufEnd) {
|
||||
// clang-format on
|
||||
auto state_ = state;
|
||||
for (;;) {
|
||||
constexpr int kStride = 16;
|
||||
if (bufEnd - buf < kStride) [[unlikely]] {
|
||||
while (buf != bufEnd) {
|
||||
uint64_t row = table[uint8_t(*buf)];
|
||||
auto prev = state_;
|
||||
state_ = (row >> (state_ & 63)) & 63;
|
||||
if (state_ == 0) {
|
||||
state_ = prev;
|
||||
break;
|
||||
}
|
||||
++buf;
|
||||
}
|
||||
state = state_;
|
||||
return buf;
|
||||
}
|
||||
uint8_t prev[kStride + 1];
|
||||
prev[0] = state_;
|
||||
for (int i = 0; i < kStride; ++i) {
|
||||
uint64_t row = table[uint8_t(*buf)];
|
||||
prev[i + 1] = row >> (prev[i] & 63);
|
||||
if ((prev[i + 1] & 63) == 0) {
|
||||
state = prev[i];
|
||||
return buf;
|
||||
}
|
||||
++buf;
|
||||
}
|
||||
state_ = prev[kStride];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t state = 6;
|
||||
};
|
||||
|
||||
// Recognizes sequences of valid utf8 characters except 0-0x20, double quote,
|
||||
// and backslash
|
||||
struct Utf8Dfa {
|
||||
constexpr static uint64_t table[256] = {
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x0ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x0ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x30000000000000ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x18630780780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x1863001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x60063001e780ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x18000000000000ull,
|
||||
0x2a000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x24000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0x1e000000000000ull,
|
||||
0xc000000000000ull,
|
||||
0x6000000000000ull,
|
||||
0x6000000000000ull,
|
||||
0x6000000000000ull,
|
||||
0x12000000000000ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
0x0ull,
|
||||
};
|
||||
// Restore this dfa to its start state
|
||||
void reset() { state = 48; }
|
||||
// Return true if this dfa is in an accept state. You probably want to call
|
||||
// scan until the match ends first.
|
||||
bool accept() const { return (state & 63) == 48; }
|
||||
// clang-format off
|
||||
#ifdef __x86_64__
|
||||
__attribute__((target_clones("default", "bmi2")))
|
||||
#endif
|
||||
// Return value either points to the first byte which does not match, or bufEnd.
|
||||
// Leaves the dfa in the last state of the match.
|
||||
const char *scan(const char *buf, const char *bufEnd) {
|
||||
// clang-format on
|
||||
auto state_ = state;
|
||||
for (;;) {
|
||||
constexpr int kStride = 16;
|
||||
if (bufEnd - buf < kStride) [[unlikely]] {
|
||||
while (buf != bufEnd) {
|
||||
uint64_t row = table[uint8_t(*buf)];
|
||||
auto prev = state_;
|
||||
state_ = (row >> (state_ & 63)) & 63;
|
||||
if (state_ == 0) {
|
||||
state_ = prev;
|
||||
break;
|
||||
}
|
||||
++buf;
|
||||
}
|
||||
state = state_;
|
||||
return buf;
|
||||
}
|
||||
uint8_t prev[kStride + 1];
|
||||
prev[0] = state_;
|
||||
for (int i = 0; i < kStride; ++i) {
|
||||
uint64_t row = table[uint8_t(*buf)];
|
||||
prev[i + 1] = row >> (prev[i] & 63);
|
||||
if ((prev[i + 1] & 63) == 0) {
|
||||
state = prev[i];
|
||||
return buf;
|
||||
}
|
||||
++buf;
|
||||
}
|
||||
state_ = prev[kStride];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t state = 48;
|
||||
};
|
||||
|
||||
181
src/test.cpp
181
src/test.cpp
@@ -309,9 +309,16 @@ TEST_CASE("bench5") {
|
||||
}
|
||||
|
||||
TEST_CASE("num dfa") {
|
||||
parser3::NumDfa dfa;
|
||||
std::string match = "-1231279127389127389127398127389712893791287389217327482"
|
||||
"374.0e69010101010101010101010101010101";
|
||||
NumDfa dfa;
|
||||
std::string match =
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"111111111111111111111111111111111111111111111111111111111111111111111111"
|
||||
"11111111";
|
||||
auto *buf = dfa.scan(match.data(), match.data() + match.size());
|
||||
CHECK(buf == match.data() + match.size());
|
||||
CHECK(dfa.accept());
|
||||
@@ -325,3 +332,171 @@ TEST_CASE("num dfa") {
|
||||
dfa.scan(match.data(), match.data() + match.size()));
|
||||
});
|
||||
}
|
||||
|
||||
const char *utf8str =
|
||||
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
|
||||
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩"
|
||||
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩";
|
||||
|
||||
TEST_CASE("utf8 dfa") {
|
||||
Utf8Dfa dfa;
|
||||
std::string match = utf8str;
|
||||
auto *buf = dfa.scan(match.data(), match.data() + match.size());
|
||||
CHECK(buf == match.data() + match.size());
|
||||
CHECK(dfa.accept());
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(match.size());
|
||||
bench.unit("byte");
|
||||
bench.run("utf8 dfa", [&]() {
|
||||
dfa.reset();
|
||||
bench.doNotOptimizeAway(
|
||||
dfa.scan(match.data(), match.data() + match.size()));
|
||||
});
|
||||
bench.run("simdjson utf8", [&]() {
|
||||
bench.doNotOptimizeAway(
|
||||
simdjson::validate_utf8(match.data(), match.size()));
|
||||
});
|
||||
}
|
||||
|
||||
// Different input structures with special care in the implementation
|
||||
// performance wise
|
||||
TEST_CASE("bench input types") {
|
||||
auto bench = [](std::string name, std::string json) {
|
||||
auto c = noopCallbacks();
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(json.size());
|
||||
bench.unit("byte");
|
||||
bench.relative(true);
|
||||
|
||||
bench.run("simdjson dom " + name, [&]() {
|
||||
simdjson::padded_string my_padded_data(json.data(), json.size());
|
||||
simdjson::dom::parser parser;
|
||||
auto doc = parser.parse(my_padded_data);
|
||||
bench.doNotOptimizeAway(doc);
|
||||
});
|
||||
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, nullptr);
|
||||
bench.run("parser3 " + name, [&]() {
|
||||
auto copy = json;
|
||||
WeaselJsonParser_reset(parser);
|
||||
if (WeaselJsonParser_parse(parser, copy.data(), copy.size()) !=
|
||||
WeaselJson_AGAIN) {
|
||||
abort();
|
||||
}
|
||||
if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) {
|
||||
abort();
|
||||
}
|
||||
});
|
||||
WeaselJsonParser_destroy(parser);
|
||||
};
|
||||
|
||||
bench("numbers", "[-123456789.000000000000000123456789e+12, "
|
||||
"-123456789.000000000000000123456789E+12, "
|
||||
"-123456789.000000000000000123456789e-12, "
|
||||
"-123456789.000000000000000123456789E-12, "
|
||||
"-123456789.000000000000000123456789e+12, "
|
||||
"-123456789.000000000000000123456789E+12, "
|
||||
"-123456789.000000000000000123456789e-12, "
|
||||
"-123456789.000000000000000123456789E-12, "
|
||||
"-123456789.000000000000000123456789e+12, "
|
||||
"-123456789.000000000000000123456789E+12, "
|
||||
"-123456789.000000000000000123456789e-12, "
|
||||
"-123456789.000000000000000123456789E-12, "
|
||||
"-123456789.000000000000000123456789e+12, "
|
||||
"-123456789.000000000000000123456789E+12, "
|
||||
"-123456789.000000000000000123456789e-12, "
|
||||
"-123456789.000000000000000123456789E-12, "
|
||||
"-123456789.000000000000000123456789e+12]");
|
||||
bench("ascii",
|
||||
"\"Donec lobortis eleifend condimentum. Cras dictum dolor lacinia "
|
||||
"lectus vehicula rutrum. Maecenas quis nisi nunc. Nam tristique "
|
||||
"feugiat est vitae mollis. Maecenas quis nisi nunc.\"");
|
||||
bench("utf-8", std::string("\"") + utf8str + "\"");
|
||||
bench("normal escapes",
|
||||
R"(
|
||||
["\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/",
|
||||
"\n\r\t\"\b\f\\\/"]
|
||||
)");
|
||||
bench("unicode escapes",
|
||||
R"(
|
||||
["\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37",
|
||||
"\uabcd\u1234\ud801\udc37"]
|
||||
)");
|
||||
bench("structural",
|
||||
R"(
|
||||
[
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]},
|
||||
{"": [{"": [[], [], [], [], [], [[[[[[[]]]]]]], {"": ""}]}]}
|
||||
]
|
||||
)");
|
||||
bench("whitespace", R"(
|
||||
[
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0
|
||||
]
|
||||
)");
|
||||
bench("literals", R"([
|
||||
true, false, null,
|
||||
true, false, null,
|
||||
true, false, null,
|
||||
true, false, null,
|
||||
true, false, null,
|
||||
true, false, null,
|
||||
true, false, null
|
||||
]
|
||||
)");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user