From d1a6b293e9afe087948f396fba99719265cb6f7e Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 7 Mar 2024 12:41:34 -0800 Subject: [PATCH] Revert "Add getChildNodeGeq, use in nextLogical" This reverts commit 53bc36f62830a16aef2b51cd5ec0d2bb2c02ef0e. Apparently this used more instructions. Not sure I understand. --- ConflictSet.cpp | 58 +++---------------------------------------------- 1 file changed, 3 insertions(+), 55 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index eff4935..0c4d81b 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -484,58 +484,6 @@ int getChildGeq(Node *self, int child) { return -1; } -Node *getChildNodeGeq(Node *self, int child) { - if (child > 255) { - return nullptr; - } - if (self->type <= Type::Node16) { - auto *self16 = static_cast(self); -#ifdef HAS_AVX - __m128i key_vec = _mm_set1_epi8(child); - __m128i indices; - memcpy(&indices, self16->index, sizeof(self16->index)); - __m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices)); - int mask = (1 << self16->numChildren) - 1; - uint32_t bitfield = _mm_movemask_epi8(results) & mask; - return bitfield == 0 ? nullptr - : self16->children[std::countr_zero(bitfield)].child; -#elif defined(HAS_ARM_NEON) - uint8x16_t indices; - memcpy(&indices, self16->index, sizeof(self16->index)); - // 0xff for each leq - auto results = vcleq_u8(vdupq_n_u8(child), indices); - uint64_t mask = self->numChildren == 16 - ? uint64_t(-1) - : (uint64_t(1) << (self->numChildren * 4)) - 1; - // 0xf for each 0xff (within mask) - uint64_t bitfield = - vget_lane_u64( - vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), - 0) & - mask; - return bitfield == 0 - ? nullptr - : self16->children[std::countr_zero(bitfield) / 4].child; -#else - for (int i = 0; i < self->numChildren; ++i) { - if (self16->index[i] >= child) { - return self16->children[i].child; - } - } - return nullptr; -#endif - } else if (self->type == Type::Node48) { - auto *self48 = static_cast(self); - int c = self48->bitSet.firstSetGeq(child); - return c >= 0 ? self48->children[self48->index[c]].child : nullptr; - } else { - assert(self->type == Type::Node256); - auto *self256 = static_cast(self); - int c = self256->bitSet.firstSetGeq(child); - return c >= 0 ? self256->children[c].child : nullptr; - } -} - void setChildrenParents(Node4 *n) { for (int i = 0; i < n->numChildren; ++i) { n->children[i].child->parent = n; @@ -750,9 +698,9 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { Node *nextPhysical(Node *node) { int index = -1; for (;;) { - Node *nextChild = getChildNodeGeq(node, index + 1); - if (nextChild != nullptr) { - return nextChild; + auto nextChild = getChildGeq(node, index + 1); + if (nextChild >= 0) { + return getChildExists(node, nextChild); } index = node->parentsIndex; node = node->parent;