From 3a5db2d2ac5ed2e1b184bdd5554ed8fa53a19e21 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 22 Feb 2024 12:31:10 -0800 Subject: [PATCH] Share some Node4/16 and Node48/256 implementations This cuts down on the number of instructions (confirmed with cachegrind). Also avoid initializing some memory unnecessarily. --- ConflictSet.cpp | 137 ++++++++++++++++++------------------------------ 1 file changed, 50 insertions(+), 87 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index e796476..eb95808 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -21,6 +21,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -89,45 +90,6 @@ private: void *freeList = nullptr; }; -enum class Type : int8_t { - Node4, - Node16, - Node48, - Node256, - Invalid, -}; -struct Node { - /* begin section that's copied to the next node */ - Node *parent = nullptr; - // The max write version over all keys that start with the search path up to - // this point - int64_t maxVersion; - Entry entry; - int16_t numChildren = 0; - bool entryPresent = false; - uint8_t parentsIndex = 0; - constexpr static auto kPartialKeyMaxLen = 26; - uint8_t partialKey[kPartialKeyMaxLen]; - int8_t partialKeyLen = 0; - /* end section that's copied to the next node */ - - Type type = Type::Invalid; -}; - -struct Node4 : Node { - // Sorted - uint8_t index[4] = {}; - Node *children[4] = {}; - Node4() { this->type = Type::Node4; } -}; - -struct Node16 : Node { - // Sorted - uint8_t index[16] = {}; - Node *children[16] = {}; - Node16() { this->type = Type::Node16; } -}; - struct BitSet { bool test(int i) const { assert(0 <= i); @@ -185,9 +147,49 @@ private: __uint128_t hi = 0; }; +enum class Type : int8_t { + Node4, + Node16, + Node48, + Node256, + Invalid, +}; +struct Node { + /* begin section that's copied to the next node */ + Node *parent = nullptr; + // The max write version over all keys that start with the search path up to + // this point + int64_t maxVersion; + Entry entry; + int16_t numChildren = 0; + bool entryPresent = false; + uint8_t parentsIndex = 0; + constexpr static auto kPartialKeyMaxLen = 26; + uint8_t partialKey[kPartialKeyMaxLen]; + int8_t partialKeyLen = 0; + /* end section that's copied to the next node */ + + Type type = Type::Invalid; +}; + +struct Node4 : Node { + // Sorted + uint8_t index[16]; // 16 so that we can use the same simd index search + // implementation for Node4 as Node16 + Node *children[4]; + Node4() { this->type = Type::Node4; } +}; + +struct Node16 : Node { + // Sorted + uint8_t index[16]; + Node *children[16]; + Node16() { this->type = Type::Node16; } +}; + struct Node48 : Node { BitSet bitSet; - Node *children[48] = {}; + Node *children[48]; int8_t nextFree = 0; int8_t index[256]; Node48() { @@ -209,15 +211,6 @@ struct NodeAllocators { BoundedFreeListAllocator node256; }; -int getNodeIndex(Node4 *self, uint8_t index) { - for (int i = 0; i < self->numChildren; ++i) { - if (self->index[i] == index) { - return i; - } - } - return -1; -} - int getNodeIndex(Node16 *self, uint8_t index) { #ifdef HAS_AVX // Based on https://www.the-paper-trail.org/post/art-paper-notes/ @@ -278,10 +271,7 @@ int getNodeIndex(Node16 *self, uint8_t index) { // Precondition - an entry for index must exist in the node Node *&getChildExists(Node *self, uint8_t index) { - if (self->type == Type::Node4) { - auto *self4 = static_cast(self); - return self4->children[getNodeIndex(self4, index)]; - } else if (self->type == Type::Node16) { + if (self->type <= Type::Node16) { auto *self16 = static_cast(self); return self16->children[getNodeIndex(self16, index)]; } else if (self->type == Type::Node48) { @@ -299,17 +289,7 @@ int getChildGeq(Node *self, int child) { if (child > 255) { return -1; } - if (self->type == Type::Node4) { - auto *self4 = static_cast(self); - for (int i = 0; i < self->numChildren; ++i) { - if (i > 0) { - assert(self4->index[i - 1] < self4->index[i]); - } - if (self4->index[i] >= child) { - return self4->index[i]; - } - } - } else if (self->type == Type::Node16) { + if (self->type <= Type::Node16) { auto *self16 = static_cast(self); #ifdef HAS_AVX __m128i key_vec = _mm_set1_epi8(child); @@ -363,12 +343,10 @@ int getChildGeq(Node *self, int child) { } } #endif - } else if (self->type == Type::Node48) { + } else { + static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet)); auto *self48 = static_cast(self); return self48->bitSet.firstSetGeq(child); - } else { - auto *self256 = static_cast(self); - return self256->bitSet.firstSetGeq(child); } return -1; } @@ -386,7 +364,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->type == Type::Node4) { auto *self4 = static_cast(self); { - int i = getNodeIndex(self4, index); + int i = getNodeIndex((Node16 *)self4, index); if (i >= 0) { return self4->children[i]; } @@ -517,15 +495,7 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { __builtin_unreachable(); // GCOVR_EXCL_LINE } - if (self->type == Type::Node4) { - auto *self4 = static_cast(self); - int nodeIndex = getNodeIndex(self4, index); - memmove(self4->index + nodeIndex, self4->index + nodeIndex + 1, - sizeof(self4->index[0]) * (self->numChildren - (nodeIndex + 1))); - memmove(self4->children + nodeIndex, self4->children + nodeIndex + 1, - sizeof(self4->children[0]) * // NOLINT - (self->numChildren - (nodeIndex + 1))); - } else if (self->type == Type::Node16) { + if (self->type <= Type::Node16) { auto *self16 = static_cast(self); int nodeIndex = getNodeIndex(self16, index); memmove(self16->index + nodeIndex, self16->index + nodeIndex + 1, @@ -735,15 +705,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) { } } switch (n->type) { - case Type::Node4: { - auto *self = static_cast(n); - for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) { - if (begin < self->index[i]) { - result = std::max(result, self->children[i]->maxVersion); - } - } - break; - } + case Type::Node4: + [[fallthrough]]; case Type::Node16: { auto *self = static_cast(n); for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {