Simd string2

This commit is contained in:
2025-05-22 15:36:46 -04:00
parent ad17336997
commit ef19dae3f6

View File

@@ -1,5 +1,6 @@
#pragma once
#include <bit>
#include <cassert>
#include <cctype>
#include <cstdint>
@@ -10,6 +11,10 @@
#include <tuple>
#include <utility>
#ifdef __x86_64__
#include <immintrin.h>
#endif
#include "musttail.h"
#include "tables.h"
#include "weaseljson.h"
@@ -397,25 +402,47 @@ inline WeaselJsonStatus n_string(Parser3 *self) {
}
inline WeaselJsonStatus n_string2(Parser3 *self) {
auto commit = [self, before = self->buf]() {
int len = self->buf - before;
if (self->writeBuf != before) {
memmove(self->writeBuf, before, len);
}
self->writeBuf += len;
};
begin:
auto meaning = tables.stringByteMeaning[uint8_t(*self->buf)];
if (meaning == Tables::NORMAL) {
const auto before = self->buf;
// Advance self->buf to the first "non-normal" character
#ifdef __x86_64__
for (;;) {
if (self->bufEnd - self->buf < 16) [[unlikely]] {
while (self->buf != self->bufEnd &&
tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) {
++self->buf;
}
break;
}
__m128 x;
memcpy(&x, self->buf, 16);
const uint32_t dubquote =
_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('"'), x));
const uint32_t backslash =
_mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('\\'), x));
const uint32_t control_or_negative =
_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_set1_epi8(0x20), x));
const uint32_t non_normal = dubquote | backslash | control_or_negative;
if (non_normal) {
self->buf += std::countr_zero(non_normal);
break;
}
self->buf += 16;
}
#else
#error "port me"
#endif
int len = self->buf - before;
memmove(self->writeBuf, before, len);
self->writeBuf += len;
if (self->buf == self->bufEnd) {
commit();
MUSTTAIL return Parser3::keepGoing(self);
self->flushString();
return WeaselJson_AGAIN;
}
goto begin;
}
commit();
switch (meaning) {
switch (tables.stringByteMeaning[uint8_t(*self->buf)]) {
case Tables::NORMAL:
__builtin_unreachable();
case Tables::DUBQUOTE: