Use c++20 std::count{l,r}_zero
This commit is contained in:
@@ -2,6 +2,7 @@
|
|||||||
#include "Internal.h"
|
#include "Internal.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <bit>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@@ -113,7 +114,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
|||||||
|
|
||||||
// Find the index of the first '1' in the bitfield by counting the leading
|
// Find the index of the first '1' in the bitfield by counting the leading
|
||||||
// zeros.
|
// zeros.
|
||||||
return __builtin_ctz(bitfield);
|
return std::countr_zero(bitfield);
|
||||||
#elif defined(HAS_ARM_NEON)
|
#elif defined(HAS_ARM_NEON)
|
||||||
// Based on
|
// Based on
|
||||||
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||||
@@ -131,7 +132,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
|||||||
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
|
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
|
||||||
if (bitfield == 0)
|
if (bitfield == 0)
|
||||||
return -1;
|
return -1;
|
||||||
return __builtin_ctzll(bitfield) / 4;
|
return std::countr_zero(bitfield) / 4;
|
||||||
#else
|
#else
|
||||||
for (int i = 0; i < self->numChildren; ++i) {
|
for (int i = 0; i < self->numChildren; ++i) {
|
||||||
if (self->index[i] == index) {
|
if (self->index[i] == index) {
|
||||||
@@ -151,7 +152,7 @@ int firstNonNeg1(const int8_t x[16]) {
|
|||||||
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
|
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
|
||||||
if (bitfield == 0)
|
if (bitfield == 0)
|
||||||
return -1;
|
return -1;
|
||||||
return __builtin_ctz(bitfield);
|
return std::countr_zero(bitfield);
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastNonNeg1(const int8_t x[16]) {
|
int lastNonNeg1(const int8_t x[16]) {
|
||||||
@@ -162,7 +163,7 @@ int lastNonNeg1(const int8_t x[16]) {
|
|||||||
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
|
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
|
||||||
if (bitfield == 0)
|
if (bitfield == 0)
|
||||||
return -1;
|
return -1;
|
||||||
return 31 - __builtin_clz(bitfield);
|
return 31 - std::countl_zero(bitfield);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -175,7 +176,7 @@ int firstNonNeg1(const int8_t x[16]) {
|
|||||||
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
||||||
if (bitfield == 0)
|
if (bitfield == 0)
|
||||||
return -1;
|
return -1;
|
||||||
return __builtin_ctzll(bitfield) / 4;
|
return std::countr_zero(bitfield) / 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastNonNeg1(const int8_t x[16]) {
|
int lastNonNeg1(const int8_t x[16]) {
|
||||||
@@ -186,7 +187,7 @@ int lastNonNeg1(const int8_t x[16]) {
|
|||||||
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
||||||
if (bitfield == 0)
|
if (bitfield == 0)
|
||||||
return -1;
|
return -1;
|
||||||
return 15 - __builtin_clzll(bitfield) / 4;
|
return 15 - std::countl_zero(bitfield) / 4;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -262,7 +263,7 @@ int getChildGeq(Node *self, int child) {
|
|||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
int mask = (1 << self16->numChildren) - 1;
|
int mask = (1 << self16->numChildren) - 1;
|
||||||
int bitfield = _mm_movemask_epi8(results) & mask;
|
int bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)];
|
int result = bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield)];
|
||||||
assert(result == [&]() -> int {
|
assert(result == [&]() -> int {
|
||||||
for (int i = 0; i < self16->numChildren; ++i) {
|
for (int i = 0; i < self16->numChildren; ++i) {
|
||||||
if (self16->index[i] >= child) {
|
if (self16->index[i] >= child) {
|
||||||
@@ -287,7 +288,7 @@ int getChildGeq(Node *self, int child) {
|
|||||||
0) &
|
0) &
|
||||||
mask;
|
mask;
|
||||||
int simd =
|
int simd =
|
||||||
bitfield == 0 ? -1 : self16->index[__builtin_ctzll(bitfield) / 4];
|
bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield) / 4];
|
||||||
assert(simd == [&]() -> int {
|
assert(simd == [&]() -> int {
|
||||||
for (int i = 0; i < self->numChildren; ++i) {
|
for (int i = 0; i < self->numChildren; ++i) {
|
||||||
if (self16->index[i] >= child) {
|
if (self16->index[i] >= child) {
|
||||||
@@ -366,7 +367,7 @@ int getChildLeq(Node *self, int child) {
|
|||||||
int mask = (1 << self16->numChildren) - 1;
|
int mask = (1 << self16->numChildren) - 1;
|
||||||
int bitfield = _mm_movemask_epi8(results) & mask;
|
int bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
int result =
|
int result =
|
||||||
bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)];
|
bitfield == 0 ? -1 : self16->index[31 - std::countl_zero(bitfield)];
|
||||||
assert(result == [&]() -> int {
|
assert(result == [&]() -> int {
|
||||||
for (int i = self16->numChildren - 1; i >= 0; --i) {
|
for (int i = self16->numChildren - 1; i >= 0; --i) {
|
||||||
if (self16->index[i] <= child) {
|
if (self16->index[i] <= child) {
|
||||||
@@ -391,7 +392,7 @@ int getChildLeq(Node *self, int child) {
|
|||||||
0) &
|
0) &
|
||||||
mask;
|
mask;
|
||||||
int simd =
|
int simd =
|
||||||
bitfield == 0 ? -1 : self16->index[15 - __builtin_clzll(bitfield) / 4];
|
bitfield == 0 ? -1 : self16->index[15 - std::countl_zero(bitfield) / 4];
|
||||||
assert(simd == [&]() -> int {
|
assert(simd == [&]() -> int {
|
||||||
for (int i = self->numChildren - 1; i >= 0; --i) {
|
for (int i = self->numChildren - 1; i >= 0; --i) {
|
||||||
if (self16->index[i] <= child) {
|
if (self16->index[i] <= child) {
|
||||||
|
Reference in New Issue
Block a user