From f762add4d69444df0c355f70f04ef015d88f821d Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 28 Jun 2024 17:28:59 -0700 Subject: [PATCH] Write vectorized 32-bit compare by hand for arm in scan16 --- ConflictSet.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 0725870..57cbbcf 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -1776,10 +1776,32 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end, uint64_t mask = vget_lane_u64( vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0); +#if INTERNAL_VERSION_32_BIT + uint32x4_t w4[4]; + memcpy(w4, vs, sizeof(w4)); + uint32_t rv; + memcpy(&rv, &readVersion, sizeof(rv)); + + int32x4_t z; + memset(&z, 0, sizeof(z)); + + uint16x4_t conflicting[4]; + for (int i = 0; i < 4; ++i) { + conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], vdupq_n_u32(rv)), z)); + } + auto combined = + vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])), + vmovn_u16(vcombine_u16(conflicting[2], conflicting[3]))); + + uint64_t compared = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0); +#else uint64_t compared = 0; for (int i = 0; i < 16; ++i) { compared |= uint64_t(vs[i] > readVersion) << (i << 2); } +#endif + return !(compared & mask); #elif defined(HAS_AVX)