Share some Node4/16 and Node48/256 implementations

This cuts down on the number of instructions (confirmed with
cachegrind). Also avoid initializing some memory unnecessarily.
This commit is contained in:
2024-02-22 12:31:10 -08:00
parent bd5d0259d9
commit 3a5db2d2ac

View File

@@ -21,6 +21,7 @@ limitations under the License.
#include <bit>
#include <cassert>
#include <compare>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <inttypes.h>
@@ -89,45 +90,6 @@ private:
void *freeList = nullptr;
};
enum class Type : int8_t {
Node4,
Node16,
Node48,
Node256,
Invalid,
};
struct Node {
/* begin section that's copied to the next node */
Node *parent = nullptr;
// The max write version over all keys that start with the search path up to
// this point
int64_t maxVersion;
Entry entry;
int16_t numChildren = 0;
bool entryPresent = false;
uint8_t parentsIndex = 0;
constexpr static auto kPartialKeyMaxLen = 26;
uint8_t partialKey[kPartialKeyMaxLen];
int8_t partialKeyLen = 0;
/* end section that's copied to the next node */
Type type = Type::Invalid;
};
struct Node4 : Node {
// Sorted
uint8_t index[4] = {};
Node *children[4] = {};
Node4() { this->type = Type::Node4; }
};
struct Node16 : Node {
// Sorted
uint8_t index[16] = {};
Node *children[16] = {};
Node16() { this->type = Type::Node16; }
};
struct BitSet {
bool test(int i) const {
assert(0 <= i);
@@ -185,9 +147,49 @@ private:
__uint128_t hi = 0;
};
enum class Type : int8_t {
Node4,
Node16,
Node48,
Node256,
Invalid,
};
struct Node {
/* begin section that's copied to the next node */
Node *parent = nullptr;
// The max write version over all keys that start with the search path up to
// this point
int64_t maxVersion;
Entry entry;
int16_t numChildren = 0;
bool entryPresent = false;
uint8_t parentsIndex = 0;
constexpr static auto kPartialKeyMaxLen = 26;
uint8_t partialKey[kPartialKeyMaxLen];
int8_t partialKeyLen = 0;
/* end section that's copied to the next node */
Type type = Type::Invalid;
};
struct Node4 : Node {
// Sorted
uint8_t index[16]; // 16 so that we can use the same simd index search
// implementation for Node4 as Node16
Node *children[4];
Node4() { this->type = Type::Node4; }
};
struct Node16 : Node {
// Sorted
uint8_t index[16];
Node *children[16];
Node16() { this->type = Type::Node16; }
};
struct Node48 : Node {
BitSet bitSet;
Node *children[48] = {};
Node *children[48];
int8_t nextFree = 0;
int8_t index[256];
Node48() {
@@ -209,15 +211,6 @@ struct NodeAllocators {
BoundedFreeListAllocator<Node256> node256;
};
int getNodeIndex(Node4 *self, uint8_t index) {
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] == index) {
return i;
}
}
return -1;
}
int getNodeIndex(Node16 *self, uint8_t index) {
#ifdef HAS_AVX
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
@@ -278,10 +271,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// Precondition - an entry for index must exist in the node
Node *&getChildExists(Node *self, uint8_t index) {
if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self);
return self4->children[getNodeIndex(self4, index)];
} else if (self->type == Type::Node16) {
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
return self16->children[getNodeIndex(self16, index)];
} else if (self->type == Type::Node48) {
@@ -299,17 +289,7 @@ int getChildGeq(Node *self, int child) {
if (child > 255) {
return -1;
}
if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self);
for (int i = 0; i < self->numChildren; ++i) {
if (i > 0) {
assert(self4->index[i - 1] < self4->index[i]);
}
if (self4->index[i] >= child) {
return self4->index[i];
}
}
} else if (self->type == Type::Node16) {
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
#ifdef HAS_AVX
__m128i key_vec = _mm_set1_epi8(child);
@@ -363,12 +343,10 @@ int getChildGeq(Node *self, int child) {
}
}
#endif
} else if (self->type == Type::Node48) {
} else {
static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet));
auto *self48 = static_cast<Node48 *>(self);
return self48->bitSet.firstSetGeq(child);
} else {
auto *self256 = static_cast<Node256 *>(self);
return self256->bitSet.firstSetGeq(child);
}
return -1;
}
@@ -386,7 +364,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self);
{
int i = getNodeIndex(self4, index);
int i = getNodeIndex((Node16 *)self4, index);
if (i >= 0) {
return self4->children[i];
}
@@ -517,15 +495,7 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self);
int nodeIndex = getNodeIndex(self4, index);
memmove(self4->index + nodeIndex, self4->index + nodeIndex + 1,
sizeof(self4->index[0]) * (self->numChildren - (nodeIndex + 1)));
memmove(self4->children + nodeIndex, self4->children + nodeIndex + 1,
sizeof(self4->children[0]) * // NOLINT
(self->numChildren - (nodeIndex + 1)));
} else if (self->type == Type::Node16) {
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
int nodeIndex = getNodeIndex(self16, index);
memmove(self16->index + nodeIndex, self16->index + nodeIndex + 1,
@@ -735,15 +705,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
}
}
switch (n->type) {
case Type::Node4: {
auto *self = static_cast<Node4 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin < self->index[i]) {
result = std::max(result, self->children[i]->maxVersion);
}
}
break;
}
case Type::Node4:
[[fallthrough]];
case Type::Node16: {
auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {