diff --git a/src/test.cpp b/src/test.cpp index 26a9ea3..d05298c 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -158,59 +157,18 @@ const char *symbolNames[] = { }; int leadingWhitespaceCount(const char *buf, int len) { - // Based on - // http://0x80.pl/articles/simd-byte-lookup.html#special-case-1-small-sets - constexpr uint8_t charSet[] = {' ', '\t', '\n', '\r'}; constexpr static struct Table { constexpr Table() { - static_assert(sizeof(charSet) < 8); - uint8_t bitElement[sizeof(charSet)]{}; - for (int i = 0; i < int(sizeof(charSet)); ++i) { - bitElement[i] = 1 << i; - } - for (int i = 0; i < int(sizeof(charSet)); ++i) { - uint8_t c = charSet[i]; - int low = c & 0xf; - int high = c >> 4; - lowNibbleTable[low] |= bitElement[i]; - highNibbleTable[high] |= bitElement[i]; + for (int i = 0; i < 256; ++i) { + notWhitespace[i] = !(i == ' ' || i == '\n' || i == '\t' || i == '\r'); } } - // lowNibbleTable[i] is the set of chars with i as its low nibble - alignas(16) uint8_t lowNibbleTable[16]{}; - // highNibbleTable[i] is the set of chars with i as its high nibble - alignas(16) uint8_t highNibbleTable[16]{}; + alignas(16) bool notWhitespace[256]{}; } table; int i = 0; - for (; i + 16 <= len; i += 16) { - __m128i copy; - memcpy(©, &buf[i], sizeof(copy)); - const __m128i input = _mm_loadu_si128((const __m128i *)©); - const __m128i lower_nibbles = _mm_and_si128(input, _mm_set1_epi8(0x0f)); - const __m128i higher_nibbles = - _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0f)); - const __m128i lo_translated = _mm_shuffle_epi8( - _mm_load_si128((const __m128i *)table.lowNibbleTable), lower_nibbles); - const __m128i hi_translated = _mm_shuffle_epi8( - _mm_load_si128((const __m128i *)table.highNibbleTable), higher_nibbles); - const __m128i intersection = _mm_and_si128(lo_translated, hi_translated); - uint32_t notInCharSet = - _mm_movemask_epi8(_mm_cmpeq_epi8(intersection, _mm_setzero_si128())); - if (notInCharSet != 0) { - return i + __builtin_ctz(notInCharSet); - } - } - for (; i < len; ++i) { - bool any = false; - for (auto c : charSet) { - if (buf[i] == c) { - any = true; - break; - } - } - if (!any) { + if (table.notWhitespace[buf[i]]) { break; } } @@ -242,10 +200,7 @@ private: int len() const { return bufEnd - buf; } // Helpers - void maybeSkipWs() { - int leadingWs = leadingWhitespaceCount(buf, len()); - buf += leadingWs; - } + void maybeSkipWs() { buf += leadingWhitespaceCount(buf, len()); } bool parseLiteral(const char *literal) { const int litLen = strlen(literal); if (len() < litLen) { @@ -481,11 +436,7 @@ struct Parser2 { private: // Helpers - void maybeSkipWs() { - int leadingWs = leadingWhitespaceCount(buf, len()); - // printf("ws: %d\n", leadingWs); - buf += leadingWs; - } + void maybeSkipWs() { buf += leadingWhitespaceCount(buf, len()); } bool parseLiteral(const char *literal) { const int litLen = strlen(literal); if (len() < litLen) { @@ -717,13 +668,6 @@ Callbacks printCallbacks() { } // namespace -TEST_CASE("leadingWhitespaceCount") { - { - const char *s = " \r\t\n x "; - CHECK(leadingWhitespaceCount(s, strlen(s)) == 5); - } -} - TEST_CASE("parser1") { Callbacks c = printCallbacks(); auto copy = json;