diff --git a/ConflictSet.cpp b/ConflictSet.cpp index f84dc06..af35ec3 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -1713,9 +1713,10 @@ downLeftSpine: bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, int64_t readVersion) { + assert(end - begin < 256); + #ifdef HAS_ARM_NEON - assert(end - begin < 256); uint8x16_t indices; memcpy(&indices, is, 16); // 0xff for each in bounds @@ -1727,7 +1728,21 @@ bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, uint64_t compared = 0; for (int i = 0; i < 16; ++i) { - compared |= (uint64_t(vs[i] > readVersion) << (i << 2)); + compared |= uint64_t(vs[i] > readVersion) << (i << 2); + } + return !(compared & mask); + +#elif defined(HAS_AVX) + + __m128i indices; + memcpy(&indices, is, 16); + indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin)); + uint32_t mask = ~_mm_movemask_epi8(_mm_cmpeq_epi8( + indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin)))); + + uint32_t compared = 0; + for (int i = 0; i < 16; ++i) { + compared |= (vs[i] > readVersion) << i; } return !(compared & mask); @@ -1739,7 +1754,7 @@ bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, uint32_t compared = 0; for (int i = 0; i < 16; ++i) { - compared |= ((vs[i] > readVersion) << i); + compared |= (vs[i] > readVersion) << i; } uint32_t mask = 0; for (int i = 0; i < 16; ++i) {