Implement x86 getChildLeq/getChildGeq
This commit is contained in:
@@ -702,7 +702,22 @@ int getChildGeq(Node *self, int child) {
|
|||||||
} else if (self->type == Type::Node16) {
|
} else if (self->type == Type::Node16) {
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
// TODO
|
__m128i key_vec = _mm_set1_epi8(child);
|
||||||
|
__m128i indices;
|
||||||
|
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||||
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
|
int mask = (1 << self16->numChildren) - 1;
|
||||||
|
int bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
|
int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)];
|
||||||
|
assert(result == [&]() -> int {
|
||||||
|
for (int i = 0; i < self16->numChildren; ++i) {
|
||||||
|
if (self16->index[i] >= child) {
|
||||||
|
return self16->index[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}());
|
||||||
|
return result;
|
||||||
#elif defined(HAS_ARM_NEON)
|
#elif defined(HAS_ARM_NEON)
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||||
@@ -790,7 +805,23 @@ int getChildLeq(Node *self, int child) {
|
|||||||
} else if (self->type == Type::Node16) {
|
} else if (self->type == Type::Node16) {
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
// TODO
|
__m128i key_vec = _mm_set1_epi8(child);
|
||||||
|
__m128i indices;
|
||||||
|
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||||
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_max_epu8(key_vec, indices));
|
||||||
|
int mask = (1 << self16->numChildren) - 1;
|
||||||
|
int bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
|
int result =
|
||||||
|
bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)];
|
||||||
|
assert(result == [&]() -> int {
|
||||||
|
for (int i = self16->numChildren - 1; i >= 0; --i) {
|
||||||
|
if (self16->index[i] <= child) {
|
||||||
|
return self16->index[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}());
|
||||||
|
return result;
|
||||||
#elif defined(HAS_ARM_NEON)
|
#elif defined(HAS_ARM_NEON)
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||||
|
Reference in New Issue
Block a user