From 8016d44c040936d736a81cae58ae93d83cde3588 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 22 Feb 2024 14:49:04 -0800 Subject: [PATCH] Use simple loop for longestCommonPrefixPartialKey The benchmark has this as faster --- ConflictSet.cpp | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index ec635c9..1997bd8 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -720,42 +720,13 @@ bytes: int longestCommonPrefixPartialKey(const uint8_t *ap, const uint8_t *bp, int cl) { - if (cl > Node::kPartialKeyMaxLen) { - __builtin_unreachable(); // GCOVR_EXCL_LINE - } - return longestCommonPrefix(ap, bp, cl); -#if 0 -static_assert(Node::kPartialKeyMaxLen == 16); - // SOMEDAY: use masked loads (requires avx-512/sve2) -#if defined(HAS_AVX) - __m128i a; - memcpy(&a, ap, cl); - __m128i b; - memcpy(&b, bp, cl); - const auto compared = _mm_cmpeq_epi8(a, b); - int mask = (1 << cl) - 1; - auto c = = _mm_movemask_epi8(compared) & mask; - return std::countr_zero(~c); -#elif defined(HAS_ARM_NEON) - uint8x16_t a; - memcpy(&a, ap, cl); - uint8x16_t b; - memcpy(&b, bp, cl); - uint16x8_t results = vreinterpretq_u16_u8(vceqq_u8(a, b)); - uint64_t mask = cl == 16 ? uint64_t(-1) : (uint64_t(1) << (cl * 4)) - 1; - uint64_t bitfield = - vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask; - return std::countr_zero(~bitfield) >> 2; -#else int i = 0; - for (; i < 16; ++i) { + for (; i < cl; ++i) { if (*ap++ != *bp++) { break; } } return i; -#endif -#endif } // Performs a physical search for remaining