Share some Node4/16 and Node48/256 implementations

This cuts down on the number of instructions (confirmed with
cachegrind). Also avoid initializing some memory unnecessarily.
This commit is contained in:
2024-02-22 12:31:10 -08:00
parent bd5d0259d9
commit 3a5db2d2ac

View File

@@ -21,6 +21,7 @@ limitations under the License.
#include <bit> #include <bit>
#include <cassert> #include <cassert>
#include <compare> #include <compare>
#include <cstddef>
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <inttypes.h> #include <inttypes.h>
@@ -89,45 +90,6 @@ private:
void *freeList = nullptr; void *freeList = nullptr;
}; };
enum class Type : int8_t {
Node4,
Node16,
Node48,
Node256,
Invalid,
};
struct Node {
/* begin section that's copied to the next node */
Node *parent = nullptr;
// The max write version over all keys that start with the search path up to
// this point
int64_t maxVersion;
Entry entry;
int16_t numChildren = 0;
bool entryPresent = false;
uint8_t parentsIndex = 0;
constexpr static auto kPartialKeyMaxLen = 26;
uint8_t partialKey[kPartialKeyMaxLen];
int8_t partialKeyLen = 0;
/* end section that's copied to the next node */
Type type = Type::Invalid;
};
struct Node4 : Node {
// Sorted
uint8_t index[4] = {};
Node *children[4] = {};
Node4() { this->type = Type::Node4; }
};
struct Node16 : Node {
// Sorted
uint8_t index[16] = {};
Node *children[16] = {};
Node16() { this->type = Type::Node16; }
};
struct BitSet { struct BitSet {
bool test(int i) const { bool test(int i) const {
assert(0 <= i); assert(0 <= i);
@@ -185,9 +147,49 @@ private:
__uint128_t hi = 0; __uint128_t hi = 0;
}; };
enum class Type : int8_t {
Node4,
Node16,
Node48,
Node256,
Invalid,
};
struct Node {
/* begin section that's copied to the next node */
Node *parent = nullptr;
// The max write version over all keys that start with the search path up to
// this point
int64_t maxVersion;
Entry entry;
int16_t numChildren = 0;
bool entryPresent = false;
uint8_t parentsIndex = 0;
constexpr static auto kPartialKeyMaxLen = 26;
uint8_t partialKey[kPartialKeyMaxLen];
int8_t partialKeyLen = 0;
/* end section that's copied to the next node */
Type type = Type::Invalid;
};
struct Node4 : Node {
// Sorted
uint8_t index[16]; // 16 so that we can use the same simd index search
// implementation for Node4 as Node16
Node *children[4];
Node4() { this->type = Type::Node4; }
};
struct Node16 : Node {
// Sorted
uint8_t index[16];
Node *children[16];
Node16() { this->type = Type::Node16; }
};
struct Node48 : Node { struct Node48 : Node {
BitSet bitSet; BitSet bitSet;
Node *children[48] = {}; Node *children[48];
int8_t nextFree = 0; int8_t nextFree = 0;
int8_t index[256]; int8_t index[256];
Node48() { Node48() {
@@ -209,15 +211,6 @@ struct NodeAllocators {
BoundedFreeListAllocator<Node256> node256; BoundedFreeListAllocator<Node256> node256;
}; };
int getNodeIndex(Node4 *self, uint8_t index) {
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] == index) {
return i;
}
}
return -1;
}
int getNodeIndex(Node16 *self, uint8_t index) { int getNodeIndex(Node16 *self, uint8_t index) {
#ifdef HAS_AVX #ifdef HAS_AVX
// Based on https://www.the-paper-trail.org/post/art-paper-notes/ // Based on https://www.the-paper-trail.org/post/art-paper-notes/
@@ -278,10 +271,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
Node *&getChildExists(Node *self, uint8_t index) { Node *&getChildExists(Node *self, uint8_t index) {
if (self->type == Type::Node4) { if (self->type <= Type::Node16) {
auto *self4 = static_cast<Node4 *>(self);
return self4->children[getNodeIndex(self4, index)];
} else if (self->type == Type::Node16) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
return self16->children[getNodeIndex(self16, index)]; return self16->children[getNodeIndex(self16, index)];
} else if (self->type == Type::Node48) { } else if (self->type == Type::Node48) {
@@ -299,17 +289,7 @@ int getChildGeq(Node *self, int child) {
if (child > 255) { if (child > 255) {
return -1; return -1;
} }
if (self->type == Type::Node4) { if (self->type <= Type::Node16) {
auto *self4 = static_cast<Node4 *>(self);
for (int i = 0; i < self->numChildren; ++i) {
if (i > 0) {
assert(self4->index[i - 1] < self4->index[i]);
}
if (self4->index[i] >= child) {
return self4->index[i];
}
}
} else if (self->type == Type::Node16) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
#ifdef HAS_AVX #ifdef HAS_AVX
__m128i key_vec = _mm_set1_epi8(child); __m128i key_vec = _mm_set1_epi8(child);
@@ -363,12 +343,10 @@ int getChildGeq(Node *self, int child) {
} }
} }
#endif #endif
} else if (self->type == Type::Node48) { } else {
static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet));
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
return self48->bitSet.firstSetGeq(child); return self48->bitSet.firstSetGeq(child);
} else {
auto *self256 = static_cast<Node256 *>(self);
return self256->bitSet.firstSetGeq(child);
} }
return -1; return -1;
} }
@@ -386,7 +364,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
if (self->type == Type::Node4) { if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self); auto *self4 = static_cast<Node4 *>(self);
{ {
int i = getNodeIndex(self4, index); int i = getNodeIndex((Node16 *)self4, index);
if (i >= 0) { if (i >= 0) {
return self4->children[i]; return self4->children[i];
} }
@@ -517,15 +495,7 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
} }
if (self->type == Type::Node4) { if (self->type <= Type::Node16) {
auto *self4 = static_cast<Node4 *>(self);
int nodeIndex = getNodeIndex(self4, index);
memmove(self4->index + nodeIndex, self4->index + nodeIndex + 1,
sizeof(self4->index[0]) * (self->numChildren - (nodeIndex + 1)));
memmove(self4->children + nodeIndex, self4->children + nodeIndex + 1,
sizeof(self4->children[0]) * // NOLINT
(self->numChildren - (nodeIndex + 1)));
} else if (self->type == Type::Node16) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
int nodeIndex = getNodeIndex(self16, index); int nodeIndex = getNodeIndex(self16, index);
memmove(self16->index + nodeIndex, self16->index + nodeIndex + 1, memmove(self16->index + nodeIndex, self16->index + nodeIndex + 1,
@@ -735,15 +705,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
} }
switch (n->type) { switch (n->type) {
case Type::Node4: { case Type::Node4:
auto *self = static_cast<Node4 *>(n); [[fallthrough]];
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin < self->index[i]) {
result = std::max(result, self->children[i]->maxVersion);
}
}
break;
}
case Type::Node16: { case Type::Node16: {
auto *self = static_cast<Node16 *>(n); auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) { for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {