Share some Node4/16 and Node48/256 implementations
This cuts down on the number of instructions (confirmed with cachegrind). Also avoid initializing some memory unnecessarily.
This commit is contained in:
137
ConflictSet.cpp
137
ConflictSet.cpp
@@ -21,6 +21,7 @@ limitations under the License.
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <compare>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <inttypes.h>
|
||||
@@ -89,45 +90,6 @@ private:
|
||||
void *freeList = nullptr;
|
||||
};
|
||||
|
||||
enum class Type : int8_t {
|
||||
Node4,
|
||||
Node16,
|
||||
Node48,
|
||||
Node256,
|
||||
Invalid,
|
||||
};
|
||||
struct Node {
|
||||
/* begin section that's copied to the next node */
|
||||
Node *parent = nullptr;
|
||||
// The max write version over all keys that start with the search path up to
|
||||
// this point
|
||||
int64_t maxVersion;
|
||||
Entry entry;
|
||||
int16_t numChildren = 0;
|
||||
bool entryPresent = false;
|
||||
uint8_t parentsIndex = 0;
|
||||
constexpr static auto kPartialKeyMaxLen = 26;
|
||||
uint8_t partialKey[kPartialKeyMaxLen];
|
||||
int8_t partialKeyLen = 0;
|
||||
/* end section that's copied to the next node */
|
||||
|
||||
Type type = Type::Invalid;
|
||||
};
|
||||
|
||||
struct Node4 : Node {
|
||||
// Sorted
|
||||
uint8_t index[4] = {};
|
||||
Node *children[4] = {};
|
||||
Node4() { this->type = Type::Node4; }
|
||||
};
|
||||
|
||||
struct Node16 : Node {
|
||||
// Sorted
|
||||
uint8_t index[16] = {};
|
||||
Node *children[16] = {};
|
||||
Node16() { this->type = Type::Node16; }
|
||||
};
|
||||
|
||||
struct BitSet {
|
||||
bool test(int i) const {
|
||||
assert(0 <= i);
|
||||
@@ -185,9 +147,49 @@ private:
|
||||
__uint128_t hi = 0;
|
||||
};
|
||||
|
||||
enum class Type : int8_t {
|
||||
Node4,
|
||||
Node16,
|
||||
Node48,
|
||||
Node256,
|
||||
Invalid,
|
||||
};
|
||||
struct Node {
|
||||
/* begin section that's copied to the next node */
|
||||
Node *parent = nullptr;
|
||||
// The max write version over all keys that start with the search path up to
|
||||
// this point
|
||||
int64_t maxVersion;
|
||||
Entry entry;
|
||||
int16_t numChildren = 0;
|
||||
bool entryPresent = false;
|
||||
uint8_t parentsIndex = 0;
|
||||
constexpr static auto kPartialKeyMaxLen = 26;
|
||||
uint8_t partialKey[kPartialKeyMaxLen];
|
||||
int8_t partialKeyLen = 0;
|
||||
/* end section that's copied to the next node */
|
||||
|
||||
Type type = Type::Invalid;
|
||||
};
|
||||
|
||||
struct Node4 : Node {
|
||||
// Sorted
|
||||
uint8_t index[16]; // 16 so that we can use the same simd index search
|
||||
// implementation for Node4 as Node16
|
||||
Node *children[4];
|
||||
Node4() { this->type = Type::Node4; }
|
||||
};
|
||||
|
||||
struct Node16 : Node {
|
||||
// Sorted
|
||||
uint8_t index[16];
|
||||
Node *children[16];
|
||||
Node16() { this->type = Type::Node16; }
|
||||
};
|
||||
|
||||
struct Node48 : Node {
|
||||
BitSet bitSet;
|
||||
Node *children[48] = {};
|
||||
Node *children[48];
|
||||
int8_t nextFree = 0;
|
||||
int8_t index[256];
|
||||
Node48() {
|
||||
@@ -209,15 +211,6 @@ struct NodeAllocators {
|
||||
BoundedFreeListAllocator<Node256> node256;
|
||||
};
|
||||
|
||||
int getNodeIndex(Node4 *self, uint8_t index) {
|
||||
for (int i = 0; i < self->numChildren; ++i) {
|
||||
if (self->index[i] == index) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
#ifdef HAS_AVX
|
||||
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
||||
@@ -278,10 +271,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node *self, uint8_t index) {
|
||||
if (self->type == Type::Node4) {
|
||||
auto *self4 = static_cast<Node4 *>(self);
|
||||
return self4->children[getNodeIndex(self4, index)];
|
||||
} else if (self->type == Type::Node16) {
|
||||
if (self->type <= Type::Node16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
return self16->children[getNodeIndex(self16, index)];
|
||||
} else if (self->type == Type::Node48) {
|
||||
@@ -299,17 +289,7 @@ int getChildGeq(Node *self, int child) {
|
||||
if (child > 255) {
|
||||
return -1;
|
||||
}
|
||||
if (self->type == Type::Node4) {
|
||||
auto *self4 = static_cast<Node4 *>(self);
|
||||
for (int i = 0; i < self->numChildren; ++i) {
|
||||
if (i > 0) {
|
||||
assert(self4->index[i - 1] < self4->index[i]);
|
||||
}
|
||||
if (self4->index[i] >= child) {
|
||||
return self4->index[i];
|
||||
}
|
||||
}
|
||||
} else if (self->type == Type::Node16) {
|
||||
if (self->type <= Type::Node16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
#ifdef HAS_AVX
|
||||
__m128i key_vec = _mm_set1_epi8(child);
|
||||
@@ -363,12 +343,10 @@ int getChildGeq(Node *self, int child) {
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else if (self->type == Type::Node48) {
|
||||
} else {
|
||||
static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet));
|
||||
auto *self48 = static_cast<Node48 *>(self);
|
||||
return self48->bitSet.firstSetGeq(child);
|
||||
} else {
|
||||
auto *self256 = static_cast<Node256 *>(self);
|
||||
return self256->bitSet.firstSetGeq(child);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -386,7 +364,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
if (self->type == Type::Node4) {
|
||||
auto *self4 = static_cast<Node4 *>(self);
|
||||
{
|
||||
int i = getNodeIndex(self4, index);
|
||||
int i = getNodeIndex((Node16 *)self4, index);
|
||||
if (i >= 0) {
|
||||
return self4->children[i];
|
||||
}
|
||||
@@ -517,15 +495,7 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
if (self->type == Type::Node4) {
|
||||
auto *self4 = static_cast<Node4 *>(self);
|
||||
int nodeIndex = getNodeIndex(self4, index);
|
||||
memmove(self4->index + nodeIndex, self4->index + nodeIndex + 1,
|
||||
sizeof(self4->index[0]) * (self->numChildren - (nodeIndex + 1)));
|
||||
memmove(self4->children + nodeIndex, self4->children + nodeIndex + 1,
|
||||
sizeof(self4->children[0]) * // NOLINT
|
||||
(self->numChildren - (nodeIndex + 1)));
|
||||
} else if (self->type == Type::Node16) {
|
||||
if (self->type <= Type::Node16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
int nodeIndex = getNodeIndex(self16, index);
|
||||
memmove(self16->index + nodeIndex, self16->index + nodeIndex + 1,
|
||||
@@ -735,15 +705,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
||||
}
|
||||
}
|
||||
switch (n->type) {
|
||||
case Type::Node4: {
|
||||
auto *self = static_cast<Node4 *>(n);
|
||||
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
||||
if (begin < self->index[i]) {
|
||||
result = std::max(result, self->children[i]->maxVersion);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Type::Node4:
|
||||
[[fallthrough]];
|
||||
case Type::Node16: {
|
||||
auto *self = static_cast<Node16 *>(n);
|
||||
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
||||
|
Reference in New Issue
Block a user