From efb0e52a0aa64820de7c053ae866c1b352833c8d Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 27 Jun 2024 22:21:41 -0700 Subject: [PATCH] SIMD implementation of scan16 for x86 Closes #29 --- ConflictSet.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index f84dc06..af35ec3 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -1713,9 +1713,10 @@ downLeftSpine: bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, int64_t readVersion) { + assert(end - begin < 256); + #ifdef HAS_ARM_NEON - assert(end - begin < 256); uint8x16_t indices; memcpy(&indices, is, 16); // 0xff for each in bounds @@ -1727,7 +1728,21 @@ bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, uint64_t compared = 0; for (int i = 0; i < 16; ++i) { - compared |= (uint64_t(vs[i] > readVersion) << (i << 2)); + compared |= uint64_t(vs[i] > readVersion) << (i << 2); + } + return !(compared & mask); + +#elif defined(HAS_AVX) + + __m128i indices; + memcpy(&indices, is, 16); + indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin)); + uint32_t mask = ~_mm_movemask_epi8(_mm_cmpeq_epi8( + indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin)))); + + uint32_t compared = 0; + for (int i = 0; i < 16; ++i) { + compared |= (vs[i] > readVersion) << i; } return !(compared & mask); @@ -1739,7 +1754,7 @@ bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, uint32_t compared = 0; for (int i = 0; i < 16; ++i) { - compared |= ((vs[i] > readVersion) << i); + compared |= (vs[i] > readVersion) << i; } uint32_t mask = 0; for (int i = 0; i < 16; ++i) {