Use new simd library for scanning string literals
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdint>
|
||||
@@ -9,15 +8,9 @@
|
||||
#include <initializer_list>
|
||||
#include <tuple>
|
||||
|
||||
#ifdef __x86_64__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#ifdef __aarch64__
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#include "musttail.h"
|
||||
#include "preserve_none.h"
|
||||
#include "simd.h"
|
||||
#include "tables.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
@@ -409,64 +402,26 @@ inline PRESERVE_NONE WeaselJsonStatus n_string(Parser3 *self) {
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_string2(Parser3 *self) {
|
||||
const auto before = self->buf;
|
||||
|
||||
// Advance self->buf to the first "non-normal" character
|
||||
#ifdef __x86_64__
|
||||
// Advance self->buf to the first "non-normal" character
|
||||
for (;;) {
|
||||
if (self->bufEnd - self->buf < 16) [[unlikely]] {
|
||||
constexpr int kStride = 64;
|
||||
if (self->bufEnd - self->buf < kStride) [[unlikely]] {
|
||||
while (self->buf != self->bufEnd &&
|
||||
tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) {
|
||||
++self->buf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
__m128i x;
|
||||
memcpy(&x, self->buf, 16);
|
||||
const uint32_t dubquote =
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('"'), x));
|
||||
const uint32_t backslash =
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('\\'), x));
|
||||
const uint32_t control_or_negative =
|
||||
_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_set1_epi8(0x20), x));
|
||||
const uint32_t non_normal = dubquote | backslash | control_or_negative;
|
||||
if (non_normal) {
|
||||
self->buf += std::countr_zero(non_normal);
|
||||
using V = simd<int8_t, kStride>;
|
||||
auto v = V{(int8_t *)self->buf};
|
||||
int normal =
|
||||
(v != V::splat('"') & v != V::splat('\\') & v >= V::splat(0x20))
|
||||
.count_leading_nonzero_lanes();
|
||||
self->buf += normal;
|
||||
if (normal < kStride) {
|
||||
break;
|
||||
}
|
||||
self->buf += 16;
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
for (;;) {
|
||||
if (self->bufEnd - self->buf < 16) [[unlikely]] {
|
||||
while (self->buf != self->bufEnd &&
|
||||
tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) {
|
||||
++self->buf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
int8x16_t x;
|
||||
memcpy(&x, self->buf, 16);
|
||||
const auto dubquote = vreinterpretq_s8_u8(vceqq_s8(vdupq_n_s8('"'), x));
|
||||
const auto backslash = vreinterpretq_s8_u8(vceqq_s8(vdupq_n_s8('\\'), x));
|
||||
const auto control_or_negative =
|
||||
vreinterpretq_s8_u8(vcgtq_s8(vdupq_n_s8(0x20), x));
|
||||
const auto non_normal = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(
|
||||
vreinterpretq_u16_s8(
|
||||
vorrq_s8(vorrq_s8(dubquote, backslash), control_or_negative)),
|
||||
4)),
|
||||
0);
|
||||
if (non_normal) {
|
||||
self->buf += std::countr_zero(non_normal) / 4;
|
||||
break;
|
||||
}
|
||||
self->buf += 16;
|
||||
}
|
||||
#else
|
||||
while (self->buf != self->bufEnd &&
|
||||
tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) {
|
||||
++self->buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
int len = self->buf - before;
|
||||
memmove(self->writeBuf, before, len);
|
||||
|
||||
1266
src/simd.h
Normal file
1266
src/simd.h
Normal file
File diff suppressed because it is too large
Load Diff
32
src/test.cpp
32
src/test.cpp
@@ -2,6 +2,7 @@
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
|
||||
#include <limits>
|
||||
#include <string>
|
||||
@@ -119,32 +120,11 @@
|
||||
|
||||
namespace {
|
||||
|
||||
const std::string json = R"({
|
||||
"a number": 12345,
|
||||
"true": true,
|
||||
"false": false,
|
||||
"null": null,
|
||||
"glossary": {
|
||||
"title": "example glossary",
|
||||
"GlossDiv": {
|
||||
"title": "S",
|
||||
"GlossList": {
|
||||
"GlossEntry": {
|
||||
"ID": "SGML",
|
||||
"SortAs": "SGML",
|
||||
"GlossTerm": "Standard Generalized Markup Language",
|
||||
"Acronym": "SGML",
|
||||
"Abbrev": "ISO 8879:1986",
|
||||
"GlossDef": {
|
||||
"para": "A meta-markup language, used to create markup languages such as DocBook.",
|
||||
"GlossSeeAlso": ["GML", "XML"]
|
||||
},
|
||||
"GlossSee": "markup"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})";
|
||||
const std::string json = []() {
|
||||
std::ifstream infile{"test.json"};
|
||||
return std::string{std::istreambuf_iterator<char>(infile),
|
||||
std::istreambuf_iterator<char>()};
|
||||
}();
|
||||
|
||||
void testStreaming(std::string const &json) {
|
||||
SerializeState streaming;
|
||||
|
||||
Reference in New Issue
Block a user