diff --git a/src/parser3.h b/src/parser3.h index 95c77e8..b27dba3 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -10,6 +11,10 @@ #include #include +#ifdef __x86_64__ +#include +#endif + #include "musttail.h" #include "tables.h" #include "weaseljson.h" @@ -397,25 +402,47 @@ inline WeaselJsonStatus n_string(Parser3 *self) { } inline WeaselJsonStatus n_string2(Parser3 *self) { - auto commit = [self, before = self->buf]() { - int len = self->buf - before; - if (self->writeBuf != before) { - memmove(self->writeBuf, before, len); + const auto before = self->buf; + +// Advance self->buf to the first "non-normal" character +#ifdef __x86_64__ + for (;;) { + if (self->bufEnd - self->buf < 16) [[unlikely]] { + while (self->buf != self->bufEnd && + tables.stringByteMeaning[uint8_t(*self->buf)] == Tables::NORMAL) { + ++self->buf; + } + break; } - self->writeBuf += len; - }; -begin: - auto meaning = tables.stringByteMeaning[uint8_t(*self->buf)]; - if (meaning == Tables::NORMAL) { - ++self->buf; - if (self->buf == self->bufEnd) { - commit(); - MUSTTAIL return Parser3::keepGoing(self); + __m128 x; + memcpy(&x, self->buf, 16); + const uint32_t dubquote = + _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('"'), x)); + const uint32_t backslash = + _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_set1_epi8('\\'), x)); + const uint32_t control_or_negative = + _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_set1_epi8(0x20), x)); + const uint32_t non_normal = dubquote | backslash | control_or_negative; + if (non_normal) { + self->buf += std::countr_zero(non_normal); + break; } - goto begin; + self->buf += 16; } - commit(); - switch (meaning) { +#else +#error "port me" +#endif + + int len = self->buf - before; + memmove(self->writeBuf, before, len); + self->writeBuf += len; + + if (self->buf == self->bufEnd) { + self->flushString(); + return WeaselJson_AGAIN; + } + + switch (tables.stringByteMeaning[uint8_t(*self->buf)]) { case Tables::NORMAL: __builtin_unreachable(); case Tables::DUBQUOTE: