diff --git a/ConflictSet.cpp b/ConflictSet.cpp index a0c0a7b..c5e8053 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -702,7 +702,22 @@ int getChildGeq(Node *self, int child) { } else if (self->type == Type::Node16) { auto *self16 = static_cast(self); #ifdef HAS_AVX -// TODO + __m128i key_vec = _mm_set1_epi8(child); + __m128i indices; + memcpy(&indices, self16->index, sizeof(self16->index)); + __m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices)); + int mask = (1 << self16->numChildren) - 1; + int bitfield = _mm_movemask_epi8(results) & mask; + int result = bitfield == 0 ? -1 : self16->index[__builtin_ctz(bitfield)]; + assert(result == [&]() -> int { + for (int i = 0; i < self16->numChildren; ++i) { + if (self16->index[i] >= child) { + return self16->index[i]; + } + } + return -1; + }()); + return result; #elif defined(HAS_ARM_NEON) uint8x16_t indices; memcpy(&indices, self16->index, sizeof(self16->index)); @@ -790,7 +805,23 @@ int getChildLeq(Node *self, int child) { } else if (self->type == Type::Node16) { auto *self16 = static_cast(self); #ifdef HAS_AVX -// TODO + __m128i key_vec = _mm_set1_epi8(child); + __m128i indices; + memcpy(&indices, self16->index, sizeof(self16->index)); + __m128i results = _mm_cmpeq_epi8(key_vec, _mm_max_epu8(key_vec, indices)); + int mask = (1 << self16->numChildren) - 1; + int bitfield = _mm_movemask_epi8(results) & mask; + int result = + bitfield == 0 ? -1 : self16->index[31 - __builtin_clz(bitfield)]; + assert(result == [&]() -> int { + for (int i = self16->numChildren - 1; i >= 0; --i) { + if (self16->index[i] <= child) { + return self16->index[i]; + } + } + return -1; + }()); + return result; #elif defined(HAS_ARM_NEON) uint8x16_t indices; memcpy(&indices, self16->index, sizeof(self16->index)); @@ -1540,4 +1571,4 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { } #endif -// GCOVR_EXCL_STOP \ No newline at end of file +// GCOVR_EXCL_STOP