Simd string2

This commit is contained in:
2025-05-22 15:36:46 -04:00
parent ad17336997
commit ef19dae3f6

View File

@@ -1,5 +1,6 @@
#pragma once #pragma once
#include <bit>
#include <cassert> #include <cassert>
#include <cctype> #include <cctype>
#include <cstdint> #include <cstdint>
@@ -10,6 +11,10 @@
#include <tuple> #include <tuple>
#include <utility> #include <utility>
#ifdef __x86_64__
#include <immintrin.h>
#endif
#include "musttail.h" #include "musttail.h"
#include "tables.h" #include "tables.h"
#include "weaseljson.h" #include "weaseljson.h"
@@ -397,25 +402,47 @@ inline WeaselJsonStatus n_string(Parser3 *self) {
} }
inline WeaselJsonStatus n_string2(Parser3 *self) { inline WeaselJsonStatus n_string2(Parser3 *self) {
auto commit = [self, before = self->buf]() { const auto before = self->buf;
int len = self->buf - before;
if (self->writeBuf != before) { // Advance self->buf to the first "non-normal" character
memmove(self->writeBuf, before, len); #ifdef __x86_64__
for (;;) {
if (self->bufEnd - self->buf < 16) [[unlikely]] {
while (self->buf != self->bufEnd &&
tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) {
++self->buf;
}
break;
} }
self->writeBuf += len; __m128 x;
}; memcpy(&x, self->buf, 16);
begin: const uint32_t dubquote =
auto meaning = tables.stringByteMeaning[uint8_t(*self->buf)]; _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('"'), x));
if (meaning == Tables::NORMAL) { const uint32_t backslash =
++self->buf; _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('\\'), x));
if (self->buf == self->bufEnd) { const uint32_t control_or_negative =
commit(); _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_set1_epi8(0x20), x));
MUSTTAIL return Parser3::keepGoing(self); const uint32_t non_normal = dubquote | backslash | control_or_negative;
if (non_normal) {
self->buf += std::countr_zero(non_normal);
break;
} }
goto begin; self->buf += 16;
} }
commit(); #else
switch (meaning) { #error "port me"
#endif
int len = self->buf - before;
memmove(self->writeBuf, before, len);
self->writeBuf += len;
if (self->buf == self->bufEnd) {
self->flushString();
return WeaselJson_AGAIN;
}
switch (tables.stringByteMeaning[uint8_t(*self->buf)]) {
case Tables::NORMAL: case Tables::NORMAL:
__builtin_unreachable(); __builtin_unreachable();
case Tables::DUBQUOTE: case Tables::DUBQUOTE: