Implement x86 getChildLeq/getChildGeq
This commit is contained in:
@@ -702,7 +702,22 @@ int getChildGeq(Node *self, int child) {
|
||||
} else if (self->type == Type::Node16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
#ifdef HAS_AVX
|
||||
// TODO
|
||||
__m128i key_vec = _mm_set1_epi8(child);
|
||||
__m128i indices;
|
||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||
int mask = (1 << self16->numChildren) - 1;
|
||||
int bitfield = _mm_movemask_epi8(results) & mask;
|
||||
int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)];
|
||||
assert(result == [&]() -> int {
|
||||
for (int i = 0; i < self16->numChildren; ++i) {
|
||||
if (self16->index[i] >= child) {
|
||||
return self16->index[i];
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}());
|
||||
return result;
|
||||
#elif defined(HAS_ARM_NEON)
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||
@@ -790,7 +805,23 @@ int getChildLeq(Node *self, int child) {
|
||||
} else if (self->type == Type::Node16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
#ifdef HAS_AVX
|
||||
// TODO
|
||||
__m128i key_vec = _mm_set1_epi8(child);
|
||||
__m128i indices;
|
||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_max_epu8(key_vec, indices));
|
||||
int mask = (1 << self16->numChildren) - 1;
|
||||
int bitfield = _mm_movemask_epi8(results) & mask;
|
||||
int result =
|
||||
bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)];
|
||||
assert(result == [&]() -> int {
|
||||
for (int i = self16->numChildren - 1; i >= 0; --i) {
|
||||
if (self16->index[i] <= child) {
|
||||
return self16->index[i];
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}());
|
||||
return result;
|
||||
#elif defined(HAS_ARM_NEON)
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||
@@ -1540,4 +1571,4 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// GCOVR_EXCL_STOP
|
||||
// GCOVR_EXCL_STOP
|
||||
|
Reference in New Issue
Block a user