Use simple loop for longestCommonPrefixPartialKey

The benchmark has this as faster
This commit is contained in:
2024-02-22 14:49:04 -08:00
parent 505c060a28
commit 8016d44c04

View File

@@ -720,42 +720,13 @@ bytes:
int longestCommonPrefixPartialKey(const uint8_t *ap, const uint8_t *bp,
int cl) {
if (cl > Node::kPartialKeyMaxLen) {
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
return longestCommonPrefix(ap, bp, cl);
#if 0
static_assert(Node::kPartialKeyMaxLen == 16);
// SOMEDAY: use masked loads (requires avx-512/sve2)
#if defined(HAS_AVX)
__m128i a;
memcpy(&a, ap, cl);
__m128i b;
memcpy(&b, bp, cl);
const auto compared = _mm_cmpeq_epi8(a, b);
int mask = (1 << cl) - 1;
auto c = = _mm_movemask_epi8(compared) & mask;
return std::countr_zero(~c);
#elif defined(HAS_ARM_NEON)
uint8x16_t a;
memcpy(&a, ap, cl);
uint8x16_t b;
memcpy(&b, bp, cl);
uint16x8_t results = vreinterpretq_u16_u8(vceqq_u8(a, b));
uint64_t mask = cl == 16 ? uint64_t(-1) : (uint64_t(1) << (cl * 4)) - 1;
uint64_t bitfield =
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
return std::countr_zero(~bitfield) >> 2;
#else
int i = 0;
for (; i < 16; ++i) {
for (; i < cl; ++i) {
if (*ap++ != *bp++) {
break;
}
}
return i;
#endif
#endif
}
// Performs a physical search for remaining