Use c++20 std::count{l,r}_zero

This commit is contained in:
2024-01-30 13:19:20 -08:00
parent 3c9c5bd9fb
commit 8edac4d811

View File

@@ -2,6 +2,7 @@
#include "Internal.h"
#include <algorithm>
#include <bit>
#include <cassert>
#include <cstdint>
#include <cstring>
@@ -113,7 +114,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// Find the index of the first '1' in the bitfield by counting the leading
// zeros.
return __builtin_ctz(bitfield);
return std::countr_zero(bitfield);
#elif defined(HAS_ARM_NEON)
// Based on
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
@@ -131,7 +132,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
if (bitfield == 0)
return -1;
return __builtin_ctzll(bitfield) / 4;
return std::countr_zero(bitfield) / 4;
#else
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] == index) {
@@ -151,7 +152,7 @@ int firstNonNeg1(const int8_t x[16]) {
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
if (bitfield == 0)
return -1;
return __builtin_ctz(bitfield);
return std::countr_zero(bitfield);
}
int lastNonNeg1(const int8_t x[16]) {
@@ -162,7 +163,7 @@ int lastNonNeg1(const int8_t x[16]) {
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
if (bitfield == 0)
return -1;
return 31 - __builtin_clz(bitfield);
return 31 - std::countl_zero(bitfield);
}
#endif
@@ -175,7 +176,7 @@ int firstNonNeg1(const int8_t x[16]) {
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
if (bitfield == 0)
return -1;
return __builtin_ctzll(bitfield) / 4;
return std::countr_zero(bitfield) / 4;
}
int lastNonNeg1(const int8_t x[16]) {
@@ -186,7 +187,7 @@ int lastNonNeg1(const int8_t x[16]) {
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
if (bitfield == 0)
return -1;
return 15 - __builtin_clzll(bitfield) / 4;
return 15 - std::countl_zero(bitfield) / 4;
}
#endif
@@ -262,7 +263,7 @@ int getChildGeq(Node *self, int child) {
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << self16->numChildren) - 1;
int bitfield = _mm_movemask_epi8(results) & mask;
int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)];
int result = bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield)];
assert(result == [&]() -> int {
for (int i = 0; i < self16->numChildren; ++i) {
if (self16->index[i] >= child) {
@@ -287,7 +288,7 @@ int getChildGeq(Node *self, int child) {
0) &
mask;
int simd =
bitfield == 0 ? -1 : self16->index[__builtin_ctzll(bitfield) / 4];
bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield) / 4];
assert(simd == [&]() -> int {
for (int i = 0; i < self->numChildren; ++i) {
if (self16->index[i] >= child) {
@@ -366,7 +367,7 @@ int getChildLeq(Node *self, int child) {
int mask = (1 << self16->numChildren) - 1;
int bitfield = _mm_movemask_epi8(results) & mask;
int result =
bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)];
bitfield == 0 ? -1 : self16->index[31 - std::countl_zero(bitfield)];
assert(result == [&]() -> int {
for (int i = self16->numChildren - 1; i >= 0; --i) {
if (self16->index[i] <= child) {
@@ -391,7 +392,7 @@ int getChildLeq(Node *self, int child) {
0) &
mask;
int simd =
bitfield == 0 ? -1 : self16->index[15 - __builtin_clzll(bitfield) / 4];
bitfield == 0 ? -1 : self16->index[15 - std::countl_zero(bitfield) / 4];
assert(simd == [&]() -> int {
for (int i = self->numChildren - 1; i >= 0; --i) {
if (self16->index[i] <= child) {