From 8edac4d81167f4988ee2d92c5368be5466a076ca Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Tue, 30 Jan 2024 13:19:20 -0800 Subject: [PATCH] Use c++20 std::count{l,r}_zero --- ConflictSet.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 11a6f07..b535d8e 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -2,6 +2,7 @@ #include "Internal.h" #include +#include #include #include #include @@ -113,7 +114,7 @@ int getNodeIndex(Node16 *self, uint8_t index) { // Find the index of the first '1' in the bitfield by counting the leading // zeros. - return __builtin_ctz(bitfield); + return std::countr_zero(bitfield); #elif defined(HAS_ARM_NEON) // Based on // https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon @@ -131,7 +132,7 @@ int getNodeIndex(Node16 *self, uint8_t index) { vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask; if (bitfield == 0) return -1; - return __builtin_ctzll(bitfield) / 4; + return std::countr_zero(bitfield) / 4; #else for (int i = 0; i < self->numChildren; ++i) { if (self->index[i] == index) { @@ -151,7 +152,7 @@ int firstNonNeg1(const int8_t x[16]) { uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff; if (bitfield == 0) return -1; - return __builtin_ctz(bitfield); + return std::countr_zero(bitfield); } int lastNonNeg1(const int8_t x[16]) { @@ -162,7 +163,7 @@ int lastNonNeg1(const int8_t x[16]) { uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff; if (bitfield == 0) return -1; - return 31 - __builtin_clz(bitfield); + return 31 - std::countl_zero(bitfield); } #endif @@ -175,7 +176,7 @@ int firstNonNeg1(const int8_t x[16]) { ~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0); if (bitfield == 0) return -1; - return __builtin_ctzll(bitfield) / 4; + return std::countr_zero(bitfield) / 4; } int lastNonNeg1(const int8_t x[16]) { @@ -186,7 +187,7 @@ int lastNonNeg1(const int8_t x[16]) { ~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0); if (bitfield == 0) return -1; - return 15 - __builtin_clzll(bitfield) / 4; + return 15 - std::countl_zero(bitfield) / 4; } #endif @@ -262,7 +263,7 @@ int getChildGeq(Node *self, int child) { __m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices)); int mask = (1 << self16->numChildren) - 1; int bitfield = _mm_movemask_epi8(results) & mask; - int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)]; + int result = bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield)]; assert(result == [&]() -> int { for (int i = 0; i < self16->numChildren; ++i) { if (self16->index[i] >= child) { @@ -287,7 +288,7 @@ int getChildGeq(Node *self, int child) { 0) & mask; int simd = - bitfield == 0 ? -1 : self16->index[__builtin_ctzll(bitfield) / 4]; + bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield) / 4]; assert(simd == [&]() -> int { for (int i = 0; i < self->numChildren; ++i) { if (self16->index[i] >= child) { @@ -366,7 +367,7 @@ int getChildLeq(Node *self, int child) { int mask = (1 << self16->numChildren) - 1; int bitfield = _mm_movemask_epi8(results) & mask; int result = - bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)]; + bitfield == 0 ? -1 : self16->index[31 - std::countl_zero(bitfield)]; assert(result == [&]() -> int { for (int i = self16->numChildren - 1; i >= 0; --i) { if (self16->index[i] <= child) { @@ -391,7 +392,7 @@ int getChildLeq(Node *self, int child) { 0) & mask; int simd = - bitfield == 0 ? -1 : self16->index[15 - __builtin_clzll(bitfield) / 4]; + bitfield == 0 ? -1 : self16->index[15 - std::countl_zero(bitfield) / 4]; assert(simd == [&]() -> int { for (int i = self->numChildren - 1; i >= 0; --i) { if (self16->index[i] <= child) {