Implement x86 getChildLeq/getChildGeq

This commit is contained in:
2024-01-25 14:57:03 -08:00
parent b15bec6b38
commit 4d71cf5b75

View File

@@ -702,7 +702,22 @@ int getChildGeq(Node *self, int child) {
} else if (self->type == Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
#ifdef HAS_AVX
// TODO
__m128i key_vec = _mm_set1_epi8(child);
__m128i indices;
memcpy(&indices, self16->index, sizeof(self16->index));
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << self16->numChildren) - 1;
int bitfield = _mm_movemask_epi8(results) & mask;
int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)];
assert(result == [&]() -> int {
for (int i = 0; i < self16->numChildren; ++i) {
if (self16->index[i] >= child) {
return self16->index[i];
}
}
return -1;
}());
return result;
#elif defined(HAS_ARM_NEON)
uint8x16_t indices;
memcpy(&indices, self16->index, sizeof(self16->index));
@@ -790,7 +805,23 @@ int getChildLeq(Node *self, int child) {
} else if (self->type == Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
#ifdef HAS_AVX
// TODO
__m128i key_vec = _mm_set1_epi8(child);
__m128i indices;
memcpy(&indices, self16->index, sizeof(self16->index));
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_max_epu8(key_vec, indices));
int mask = (1 << self16->numChildren) - 1;
int bitfield = _mm_movemask_epi8(results) & mask;
int result =
bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)];
assert(result == [&]() -> int {
for (int i = self16->numChildren - 1; i >= 0; --i) {
if (self16->index[i] <= child) {
return self16->index[i];
}
}
return -1;
}());
return result;
#elif defined(HAS_ARM_NEON)
uint8x16_t indices;
memcpy(&indices, self16->index, sizeof(self16->index));
@@ -1540,4 +1571,4 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
}
#endif
// GCOVR_EXCL_STOP
// GCOVR_EXCL_STOP