4 Commits

Author SHA1 Message Date
26bd8b94cc Lower kBytesPerKey to 144 by changing Node4 to Node3
All checks were successful
Tests / Release [gcc] total: 932, passed: 932
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |5|0|5|0|:zzz:
Tests / Release [gcc,aarch64] total: 931, passed: 931
Tests / Coverage total: 930, passed: 930
weaselab/conflict-set/pipeline/head This commit looks good
2024-03-13 12:29:01 -07:00
55eaef5b1d Remove 16 bytes from Node0 2024-03-13 10:59:25 -07:00
797e6b4a3e Use switch for type dispatch throughout 2024-03-13 10:59:19 -07:00
ee86b5289b Rearrange induction inequalities
This looks nicer IMO
2024-03-13 07:31:24 -07:00

View File

@@ -176,7 +176,7 @@ int BitSet::firstSetGeq(int i) const {
enum class Type : int8_t { enum class Type : int8_t {
Node0, Node0,
Node4, Node3,
Node16, Node16,
Node48, Node48,
Node256, Node256,
@@ -211,26 +211,23 @@ struct Child {
}; };
struct Node0 : Node { struct Node0 : Node {
// Sorted
uint8_t index[16]; // 16 so that we can use the same simd index search
// implementation as Node16
Node0() { this->type = Type::Node0; } Node0() { this->type = Type::Node0; }
uint8_t *partialKey() { return (uint8_t *)(this + 1); } uint8_t *partialKey() { return (uint8_t *)(this + 1); }
}; };
struct Node4 : Node { struct Node3 : Node {
// Sorted constexpr static auto kMaxNodes = 3;
uint8_t index[16]; // 16 so that we can use the same simd index search uint8_t index[kMaxNodes];
// implementation as Node16 Child children[kMaxNodes];
Child children[4]; Node3() { this->type = Type::Node3; }
Node4() { this->type = Type::Node4; }
uint8_t *partialKey() { return (uint8_t *)(this + 1); } uint8_t *partialKey() { return (uint8_t *)(this + 1); }
}; };
struct Node16 : Node { struct Node16 : Node {
// Sorted // Sorted
uint8_t index[16]; constexpr static auto kMaxNodes = 16;
Child children[16]; uint8_t index[kMaxNodes];
Child children[kMaxNodes];
Node16() { this->type = Type::Node16; } Node16() { this->type = Type::Node16; }
uint8_t *partialKey() { return (uint8_t *)(this + 1); } uint8_t *partialKey() { return (uint8_t *)(this + 1); }
}; };
@@ -265,22 +262,33 @@ std::string getSearchPathPrintable(Node *n);
// Bound memory usage following the analysis in the ART paper // Bound memory usage following the analysis in the ART paper
constexpr int kBytesPerKey = 176; // Each node with an entry present gets a budget of kBytesPerKey. Node0 always
constexpr int kMinSavingsPerNode = 120; // has an entry present.
constexpr int kMinChildrenNode4 = 2; constexpr int kBytesPerKey = 144;
constexpr int kMinChildrenNode16 = 5; // Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
constexpr int kMinNodeSurplus = 104;
constexpr int kMinChildrenNode3 = 2;
constexpr int kMinChildrenNode16 = 4;
constexpr int kMinChildrenNode48 = 17; constexpr int kMinChildrenNode48 = 17;
constexpr int kMinChildrenNode256 = 49; constexpr int kMinChildrenNode256 = 49;
static_assert(sizeof(Node256) + kMinSavingsPerNode <= constexpr int kNode256Surplus =
kMinChildrenNode256 * kMinSavingsPerNode); kMinChildrenNode256 * kMinNodeSurplus - sizeof(Node256);
static_assert(sizeof(Node48) + kMinSavingsPerNode <= static_assert(kNode256Surplus >= kMinNodeSurplus);
kMinChildrenNode48 * kMinSavingsPerNode);
static_assert(sizeof(Node16) + kMinSavingsPerNode <= constexpr int kNode48Surplus =
kMinChildrenNode16 * kMinSavingsPerNode); kMinChildrenNode48 * kMinNodeSurplus - sizeof(Node48);
static_assert(sizeof(Node4) + kMinSavingsPerNode <= static_assert(kNode48Surplus >= kMinNodeSurplus);
kMinChildrenNode4 * kMinSavingsPerNode);
static_assert(sizeof(Node0) + kMinSavingsPerNode <= kBytesPerKey); constexpr int kNode16Surplus =
kMinChildrenNode16 * kMinNodeSurplus - sizeof(Node16);
static_assert(kNode16Surplus >= kMinNodeSurplus);
constexpr int kNode3Surplus =
kMinChildrenNode3 * kMinNodeSurplus - sizeof(Node3);
static_assert(kNode3Surplus >= kMinNodeSurplus);
static_assert(kBytesPerKey - sizeof(Node0) >= kMinNodeSurplus);
// setOldestVersion will additionally try to maintain this property: // setOldestVersion will additionally try to maintain this property:
// `max(children, 1) * length >= capacity` // `max(children, 1) * length >= capacity`
@@ -359,8 +367,8 @@ uint8_t *Node::partialKey() {
switch (type) { switch (type) {
case Type::Node0: case Type::Node0:
return ((Node0 *)this)->partialKey(); return ((Node0 *)this)->partialKey();
case Type::Node4: case Type::Node3:
return ((Node4 *)this)->partialKey(); return ((Node3 *)this)->partialKey();
case Type::Node16: case Type::Node16:
return ((Node16 *)this)->partialKey(); return ((Node16 *)this)->partialKey();
case Type::Node48: case Type::Node48:
@@ -373,13 +381,14 @@ uint8_t *Node::partialKey() {
struct NodeAllocators { struct NodeAllocators {
BoundedFreeListAllocator<Node0> node0; BoundedFreeListAllocator<Node0> node0;
BoundedFreeListAllocator<Node4> node4; BoundedFreeListAllocator<Node3> node3;
BoundedFreeListAllocator<Node16> node16; BoundedFreeListAllocator<Node16> node16;
BoundedFreeListAllocator<Node48> node48; BoundedFreeListAllocator<Node48> node48;
BoundedFreeListAllocator<Node256> node256; BoundedFreeListAllocator<Node256> node256;
}; };
int getNodeIndex(Node16 *self, uint8_t index) { template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
#ifdef HAS_AVX #ifdef HAS_AVX
// Based on https://www.the-paper-trail.org/post/art-paper-notes/ // Based on https://www.the-paper-trail.org/post/art-paper-notes/
@@ -391,7 +400,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// keys aren't valid, we'll mask the results to only consider the valid ones // keys aren't valid, we'll mask the results to only consider the valid ones
// below. // below.
__m128i indices; __m128i indices;
memcpy(&indices, self->index, sizeof(self->index)); memcpy(&indices, self->index, NodeT::kMaxNodes);
__m128i results = _mm_cmpeq_epi8(key_vec, indices); __m128i results = _mm_cmpeq_epi8(key_vec, indices);
// Build a mask to select only the first node->num_children values from the // Build a mask to select only the first node->num_children values from the
@@ -414,10 +423,12 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon // https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
uint8x16_t indices; uint8x16_t indices;
memcpy(&indices, self->index, sizeof(self->index)); memcpy(&indices, self->index, NodeT::kMaxNodes);
// 0xff for each match // 0xff for each match
uint16x8_t results = uint16x8_t results =
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices)); vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
static_assert(NodeT::kMaxNodes <= 16);
assume(self->numChildren <= NodeT::kMaxNodes);
uint64_t mask = self->numChildren == 16 uint64_t mask = self->numChildren == 16
? uint64_t(-1) ? uint64_t(-1)
: (uint64_t(1) << (self->numChildren * 4)) - 1; : (uint64_t(1) << (self->numChildren * 4)) - 1;
@@ -439,36 +450,28 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
Node *&getChildExists(Node *self, uint8_t index) { Node *&getChildExists(Node *self, uint8_t index) {
if (self->type <= Type::Node16) { switch (self->type) {
case Type::Node0:
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type::Node3: {
auto *self3 = static_cast<Node3 *>(self);
return self3->children[getNodeIndex(self3, index)].child;
}
case Type::Node16: {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
return self16->children[getNodeIndex(self16, index)].child; return self16->children[getNodeIndex(self16, index)].child;
} else if (self->type == Type::Node48) { }
case Type::Node48: {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
assert(self48->bitSet.test(index)); assert(self48->bitSet.test(index));
return self48->children[self48->index[index]].child; return self48->children[self48->index[index]].child;
} else { }
case Type::Node256: {
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
assert(self256->bitSet.test(index)); assert(self256->bitSet.test(index));
return self256->children[index].child; return self256->children[index].child;
} }
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
// Precondition - an entry for index must exist in the node
int64_t getChildMaxVersion(Node *self, uint8_t index) {
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
return self16->children[getNodeIndex(self16, index)].childMaxVersion;
} else if (self->type == Type::Node48) {
auto *self48 = static_cast<Node48 *>(self);
assert(self48->bitSet.test(index));
return self48->children[self48->index[index]].childMaxVersion;
} else {
auto *self256 = static_cast<Node256 *>(self);
assert(self256->bitSet.test(index));
return self256->children[index].childMaxVersion;
} }
__builtin_unreachable(); // GCOVR_EXCL_LINE
} }
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
@@ -477,44 +480,45 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *);
Node *&getInTree(Node *n, ConflictSet::Impl *); Node *&getInTree(Node *n, ConflictSet::Impl *);
Node *getChild(Node *self, uint8_t index) { Node *getChild(Node *self, uint8_t index) {
if (self->type <= Type::Node16) { switch (self->type) {
case Type::Node0:
return nullptr;
case Type::Node3: {
auto *self3 = static_cast<Node3 *>(self);
int i = getNodeIndex(self3, index);
return i < 0 ? nullptr : self3->children[i].child;
}
case Type::Node16: {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
int i = getNodeIndex(self16, index); int i = getNodeIndex(self16, index);
if (i >= 0) { return i < 0 ? nullptr : self16->children[i].child;
return self16->children[i].child;
} }
return nullptr; case Type::Node48: {
} else if (self->type == Type::Node48) {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
int secondIndex = self48->index[index]; int i = self48->index[index];
if (secondIndex >= 0) { return i < 0 ? nullptr : self48->children[i].child;
return self48->children[secondIndex].child;
} }
return nullptr; case Type::Node256: {
} else {
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
return self256->children[index].child; return self256->children[index].child;
} }
}
} }
int getChildGeq(Node *self, int child) { template <class NodeT> int getChildGeqSimd(NodeT *self, int child) {
if (child > 255) { static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
return -1;
}
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
#ifdef HAS_AVX #ifdef HAS_AVX
__m128i key_vec = _mm_set1_epi8(child); __m128i key_vec = _mm_set1_epi8(child);
__m128i indices; __m128i indices;
memcpy(&indices, self16->index, sizeof(self16->index)); memcpy(&indices, self->index, NodeT::kMaxNodes);
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices)); __m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << self16->numChildren) - 1; int mask = (1 << self->numChildren) - 1;
uint32_t bitfield = _mm_movemask_epi8(results) & mask; uint32_t bitfield = _mm_movemask_epi8(results) & mask;
int result = bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield)]; int result = bitfield == 0 ? -1 : self->index[std::countr_zero(bitfield)];
assert(result == [&]() -> int { assert(result == [&]() -> int {
for (int i = 0; i < self16->numChildren; ++i) { for (int i = 0; i < self->numChildren; ++i) {
if (self16->index[i] >= child) { if (self->index[i] >= child) {
return self16->index[i]; return self->index[i];
} }
} }
return -1; return -1;
@@ -522,9 +526,11 @@ int getChildGeq(Node *self, int child) {
return result; return result;
#elif defined(HAS_ARM_NEON) #elif defined(HAS_ARM_NEON)
uint8x16_t indices; uint8x16_t indices;
memcpy(&indices, self16->index, sizeof(self16->index)); memcpy(&indices, self->index, sizeof(self->index));
// 0xff for each leq // 0xff for each leq
auto results = vcleq_u8(vdupq_n_u8(child), indices); auto results = vcleq_u8(vdupq_n_u8(child), indices);
static_assert(NodeT::kMaxNodes <= 16);
assume(self->numChildren <= NodeT::kMaxNodes);
uint64_t mask = self->numChildren == 16 uint64_t mask = self->numChildren == 16
? uint64_t(-1) ? uint64_t(-1)
: (uint64_t(1) << (self->numChildren * 4)) - 1; : (uint64_t(1) << (self->numChildren * 4)) - 1;
@@ -534,12 +540,11 @@ int getChildGeq(Node *self, int child) {
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
0) & 0) &
mask; mask;
int simd = int simd = bitfield == 0 ? -1 : self->index[std::countr_zero(bitfield) / 4];
bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield) / 4];
assert(simd == [&]() -> int { assert(simd == [&]() -> int {
for (int i = 0; i < self->numChildren; ++i) { for (int i = 0; i < self->numChildren; ++i) {
if (self16->index[i] >= child) { if (self->index[i] >= child) {
return self16->index[i]; return self->index[i];
} }
} }
return -1; return -1;
@@ -548,22 +553,38 @@ int getChildGeq(Node *self, int child) {
#else #else
for (int i = 0; i < self->numChildren; ++i) { for (int i = 0; i < self->numChildren; ++i) {
if (i > 0) { if (i > 0) {
assert(self16->index[i - 1] < self16->index[i]); assert(self->index[i - 1] < self->index[i]);
} }
if (self16->index[i] >= child) { if (self->index[i] >= child) {
return self16->index[i]; return self->index[i];
} }
} }
return -1;
#endif #endif
} else { }
int getChildGeq(Node *self, int child) {
if (child > 255) {
return -1;
}
switch (self->type) {
case Type::Node0:
return -1;
case Type::Node3:
return getChildGeqSimd(static_cast<Node3 *>(self), child);
case Type::Node16:
return getChildGeqSimd(static_cast<Node16 *>(self), child);
case Type::Node48:
[[fallthrough]];
case Type::Node256: {
static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet)); static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet));
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
return self48->bitSet.firstSetGeq(child); return self48->bitSet.firstSetGeq(child);
} }
return -1; }
} }
void setChildrenParents(Node4 *n) { void setChildrenParents(Node3 *n) {
for (int i = 0; i < n->numChildren; ++i) { for (int i = 0; i < n->numChildren; ++i) {
n->children[i].child->parent = n; n->children[i].child->parent = n;
} }
@@ -591,66 +612,95 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
NodeAllocators *allocators) { NodeAllocators *allocators) {
// Fast path for if it exists already // Fast path for if it exists already
if (self->type <= Type::Node16) { switch (self->type) {
case Type::Node0:
break;
case Type::Node3: {
auto *self3 = static_cast<Node3 *>(self);
int i = getNodeIndex(self3, index);
if (i >= 0) {
return self3->children[i].child;
}
} break;
case Type::Node16: {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
int i = getNodeIndex(self16, index); int i = getNodeIndex(self16, index);
if (i >= 0) { if (i >= 0) {
return self16->children[i].child; return self16->children[i].child;
} }
} else if (self->type == Type::Node48) { } break;
case Type::Node48: {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
int secondIndex = self48->index[index]; int secondIndex = self48->index[index];
if (secondIndex >= 0) { if (secondIndex >= 0) {
return self48->children[secondIndex].child; return self48->children[secondIndex].child;
} }
} else { } break;
case Type::Node256: {
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
if (auto &result = self256->children[index].child; result != nullptr) { if (auto &result = self256->children[index].child; result != nullptr) {
return result; return result;
} }
} break;
} }
if (self->type == Type::Node0) { switch (self->type) {
case Type::Node0: {
auto *self0 = static_cast<Node0 *>(self); auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = allocators->node4.allocate(self->partialKeyLen); auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen);
allocators->node0.release(self0); allocators->node0.release(self0);
self = newSelf; self = newSelf;
goto insert16; goto insert3;
}
} else if (self->type == Type::Node4) { case Type::Node3: {
auto *self4 = static_cast<Node4 *>(self); if (self->numChildren == Node3::kMaxNodes) {
auto *self3 = static_cast<Node3 *>(self);
if (self->numChildren == 4) {
auto *newSelf = allocators->node16.allocate(self->partialKeyLen); auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self3->partialKey(), self->partialKeyLen);
// TODO replace with memcpy? // TODO replace with memcpy?
for (int i = 0; i < 4; ++i) { for (int i = 0; i < Node3::kMaxNodes; ++i) {
newSelf->index[i] = self4->index[i]; newSelf->index[i] = self3->index[i];
newSelf->children[i] = self4->children[i]; newSelf->children[i] = self3->children[i];
} }
allocators->node4.release(self4); allocators->node3.release(self3);
setChildrenParents(newSelf); setChildrenParents(newSelf);
self = newSelf; self = newSelf;
goto insert16;
} }
goto insert16; insert3:
auto *self3 = static_cast<Node3 *>(self);
} else if (self->type == Type::Node16) { ++self->numChildren;
int i = 0;
if (self->numChildren == 16) { for (; i < self->numChildren - 1; ++i) {
if (int(self3->index[i]) > int(index)) {
memmove(self3->index + i + 1, self3->index + i,
self->numChildren - (i + 1));
memmove(self3->children + i + 1, self3->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
break;
}
}
self3->index[i] = index;
auto &result = self3->children[i].child;
result = nullptr;
return result;
}
case Type::Node16: {
if (self->numChildren == Node16::kMaxNodes) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = allocators->node48.allocate(self->partialKeyLen); auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
newSelf->nextFree = 16; newSelf->nextFree = Node16::kMaxNodes;
int i = 0; int i = 0;
for (auto x : self16->index) { for (auto x : self16->index) {
newSelf->bitSet.set(x); newSelf->bitSet.set(x);
@@ -658,7 +708,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
newSelf->index[x] = i; newSelf->index[x] = i;
++i; ++i;
} }
assert(i == 16); assert(i == Node16::kMaxNodes);
allocators->node16.release(self16); allocators->node16.release(self16);
setChildrenParents(newSelf); setChildrenParents(newSelf);
self = newSelf; self = newSelf;
@@ -667,6 +717,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
insert16: insert16:
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
assert(self->type == Type::Node16);
++self->numChildren; ++self->numChildren;
int i = 0; int i = 0;
@@ -683,7 +734,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
auto &result = self16->children[i].child; auto &result = self16->children[i].child;
result = nullptr; result = nullptr;
return result; return result;
} else if (self->type == Type::Node48) { }
case Type::Node48: {
if (self->numChildren == 48) { if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
@@ -713,14 +765,15 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
auto &result = self48->children[nextFree].child; auto &result = self48->children[nextFree].child;
result = nullptr; result = nullptr;
return result; return result;
} else { }
assert(self->type == Type::Node256); case Type::Node256: {
insert256: insert256:
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
++self->numChildren; ++self->numChildren;
self256->bitSet.set(index); self256->bitSet.set(index);
return self256->children[index].child; return self256->children[index].child;
} }
}
} }
Node *nextPhysical(Node *node) { Node *nextPhysical(Node *node) {
@@ -765,23 +818,23 @@ void makeCapacityAtLeast(Node *&self, int capacity, NodeAllocators *allocators,
} }
self = newSelf; self = newSelf;
} break; } break;
case Type::Node4: { case Type::Node3: {
auto *self4 = (Node4 *)self; auto *self3 = (Node3 *)self;
auto *newSelf = allocators->node4.allocate(capacity); auto *newSelf = allocators->node3.allocate(capacity);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self3->partialKey(), self->partialKeyLen);
// TODO replace with memcpy? // TODO replace with memcpy?
for (int i = 0; i < 4; ++i) { for (int i = 0; i < Node3::kMaxNodes; ++i) {
newSelf->index[i] = self4->index[i]; newSelf->index[i] = self3->index[i];
newSelf->children[i] = self4->children[i]; newSelf->children[i] = self3->children[i];
} }
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
setChildrenParents(newSelf); setChildrenParents(newSelf);
if constexpr (kUseFreeList) { if constexpr (kUseFreeList) {
allocators->node4.release(self4); allocators->node3.release(self3);
} else { } else {
free(self4); free(self3);
} }
self = newSelf; self = newSelf;
} break; } break;
@@ -792,7 +845,7 @@ void makeCapacityAtLeast(Node *&self, int capacity, NodeAllocators *allocators,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
// TODO replace with memcpy? // TODO replace with memcpy?
for (int i = 0; i < 16; ++i) { for (int i = 0; i < Node16::kMaxNodes; ++i) {
newSelf->index[i] = self16->index[i]; newSelf->index[i] = self16->index[i];
newSelf->children[i] = self16->children[i]; newSelf->children[i] = self16->children[i];
} }
@@ -875,20 +928,20 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
switch (self->type) { switch (self->type) {
case Type::Node0: case Type::Node0:
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
case Type::Node4: { case Type::Node3: {
auto *self4 = (Node4 *)self; auto *self3 = (Node3 *)self;
if (self->numChildren == 0) { if (self->numChildren == 0) {
auto *newSelf = allocators->node0.allocate(self->partialKeyLen); auto *newSelf = allocators->node0.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self3->partialKey(), self->partialKeyLen);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
allocators->node4.release(self4); allocators->node3.release(self3);
} else if (self->numChildren == 1) { } else if (self->numChildren == 1) {
if (!self->entryPresent) { if (!self->entryPresent) {
auto *child = self4->children[0].child; auto *child = self3->children[0].child;
int minCapacity = self4->partialKeyLen + 1 + child->partialKeyLen; int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
if (minCapacity > child->partialKeyCapacity) { if (minCapacity > child->partialKeyCapacity) {
const bool update = child == dontInvalidate; const bool update = child == dontInvalidate;
@@ -909,11 +962,11 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
int64_t childMaxVersion = maxVersion(child, impl); int64_t childMaxVersion = maxVersion(child, impl);
// Construct new partial key for child // Construct new partial key for child
memmove(child->partialKey() + self4->partialKeyLen + 1, memmove(child->partialKey() + self3->partialKeyLen + 1,
child->partialKey(), child->partialKeyLen); child->partialKey(), child->partialKeyLen);
memcpy(child->partialKey(), self4->partialKey(), self->partialKeyLen); memcpy(child->partialKey(), self3->partialKey(), self->partialKeyLen);
child->partialKey()[self4->partialKeyLen] = self4->index[0]; child->partialKey()[self3->partialKeyLen] = self3->index[0];
child->partialKeyLen += 1 + self4->partialKeyLen; child->partialKeyLen += 1 + self3->partialKeyLen;
child->parent = self->parent; child->parent = self->parent;
child->parentsIndex = self->parentsIndex; child->parentsIndex = self->parentsIndex;
@@ -923,19 +976,19 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
maxVersion(child, impl) = childMaxVersion; maxVersion(child, impl) = childMaxVersion;
getInTree(self, impl) = child; getInTree(self, impl) = child;
allocators->node4.release(self4); allocators->node3.release(self3);
} }
} }
} break; } break;
case Type::Node16: case Type::Node16:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) { if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) {
auto *self16 = (Node16 *)self; auto *self16 = (Node16 *)self;
auto *newSelf = allocators->node4.allocate(self->partialKeyLen); auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
// TODO replace with memcpy? // TODO replace with memcpy?
for (int i = 0; i < 4; ++i) { for (int i = 0; i < Node3::kMaxNodes; ++i) {
newSelf->index[i] = self16->index[i]; newSelf->index[i] = self16->index[i];
newSelf->children[i] = self16->children[i]; newSelf->children[i] = self16->children[i];
} }
@@ -957,7 +1010,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
[&](int c) { [&](int c) {
// Suppress a false positive -Waggressive-loop-optimizations warning // Suppress a false positive -Waggressive-loop-optimizations warning
// in gcc. `assume` doesn't work for some reason. // in gcc. `assume` doesn't work for some reason.
if (!(i < 16)) { if (!(i < Node16::kMaxNodes)) {
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
} }
newSelf->index[i] = c; newSelf->index[i] = c;
@@ -1026,8 +1079,8 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
case Type::Node0: case Type::Node0:
allocators->node0.release((Node0 *)self); allocators->node0.release((Node0 *)self);
break; break;
case Type::Node4: case Type::Node3:
allocators->node4.release((Node4 *)self); allocators->node3.release((Node3 *)self);
break; break;
case Type::Node16: case Type::Node16:
allocators->node16.release((Node16 *)self); allocators->node16.release((Node16 *)self);
@@ -1040,7 +1093,21 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
break; break;
} }
if (parent->type <= Type::Node16) { switch (parent->type) {
case Type::Node0:
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type::Node3: {
auto *parent3 = static_cast<Node3 *>(parent);
int nodeIndex = getNodeIndex(parent3, parentsIndex);
assert(nodeIndex >= 0);
memmove(parent3->index + nodeIndex, parent3->index + nodeIndex + 1,
sizeof(parent3->index[0]) *
(parent->numChildren - (nodeIndex + 1)));
memmove(parent3->children + nodeIndex, parent3->children + nodeIndex + 1,
sizeof(parent3->children[0]) *
(parent->numChildren - (nodeIndex + 1)));
} break;
case Type::Node16: {
auto *parent16 = static_cast<Node16 *>(parent); auto *parent16 = static_cast<Node16 *>(parent);
int nodeIndex = getNodeIndex(parent16, parentsIndex); int nodeIndex = getNodeIndex(parent16, parentsIndex);
assert(nodeIndex >= 0); assert(nodeIndex >= 0);
@@ -1050,7 +1117,8 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
memmove(parent16->children + nodeIndex, parent16->children + nodeIndex + 1, memmove(parent16->children + nodeIndex, parent16->children + nodeIndex + 1,
sizeof(parent16->children[0]) * sizeof(parent16->children[0]) *
(parent->numChildren - (nodeIndex + 1))); (parent->numChildren - (nodeIndex + 1)));
} else if (parent->type == Type::Node48) { } break;
case Type::Node48: {
auto *parent48 = static_cast<Node48 *>(parent); auto *parent48 = static_cast<Node48 *>(parent);
parent48->bitSet.reset(parentsIndex); parent48->bitSet.reset(parentsIndex);
int8_t toRemoveChildrenIndex = int8_t toRemoveChildrenIndex =
@@ -1064,11 +1132,14 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
parent48->index[parent48->children[toRemoveChildrenIndex] parent48->index[parent48->children[toRemoveChildrenIndex]
.child->parentsIndex] = toRemoveChildrenIndex; .child->parentsIndex] = toRemoveChildrenIndex;
} }
} else { } break;
case Type::Node256: {
auto *parent256 = static_cast<Node256 *>(parent); auto *parent256 = static_cast<Node256 *>(parent);
parent256->bitSet.reset(parentsIndex); parent256->bitSet.reset(parentsIndex);
parent256->children[parentsIndex].child = nullptr; parent256->children[parentsIndex].child = nullptr;
} break;
} }
--parent->numChildren; --parent->numChildren;
if (parent->numChildren == 0 && !parent->entryPresent && if (parent->numChildren == 0 && !parent->entryPresent &&
parent->parent != nullptr) { parent->parent != nullptr) {
@@ -1357,7 +1428,8 @@ downLeftSpine:
} }
// Return the max version among all keys starting with the search path of n + // Return the max version among all keys starting with the search path of n +
// [child], where child in (begin, end) // [child], where child in (begin, end). Does not account for the range version
// of firstGt(searchpath(n) + [end - 1])
int64_t maxBetweenExclusive(Node *n, int begin, int end) { int64_t maxBetweenExclusive(Node *n, int begin, int end) {
assume(-1 <= begin); assume(-1 <= begin);
assume(begin <= 256); assume(begin <= 256);
@@ -1379,9 +1451,16 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
switch (n->type) { switch (n->type) {
case Type::Node0: case Type::Node0:
[[fallthrough]]; // We would have returned above, after not finding a child
case Type::Node4: __builtin_unreachable(); // GCOVR_EXCL_LINE
[[fallthrough]]; case Type::Node3: {
auto *self = static_cast<Node3 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin <= self->index[i]) {
result = std::max(result, self->children[i].childMaxVersion);
}
}
} break;
case Type::Node16: { case Type::Node16: {
auto *self = static_cast<Node16 *>(n); auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) { for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
@@ -1389,8 +1468,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
result = std::max(result, self->children[i].childMaxVersion); result = std::max(result, self->children[i].childMaxVersion);
} }
} }
break; } break;
}
case Type::Node48: { case Type::Node48: {
auto *self = static_cast<Node48 *>(n); auto *self = static_cast<Node48 *>(n);
self->bitSet.forEachInRange( self->bitSet.forEachInRange(
@@ -1879,7 +1957,7 @@ template <bool kBegin>
int64_t oldMaxVersion = maxVersion(old, impl); int64_t oldMaxVersion = maxVersion(old, impl);
// *self will have one child // *self will have one child
*self = allocators->node4.allocate(partialKeyIndex); *self = allocators->node3.allocate(partialKeyIndex);
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin, memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
@@ -2239,19 +2317,30 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *impl) {
if (n == nullptr) { if (n == nullptr) {
return impl->rootMaxVersion; return impl->rootMaxVersion;
} }
if (n->type <= Type::Node16) { switch (n->type) {
case Type::Node0:
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type::Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndex(n3, index);
return n3->children[i].childMaxVersion;
}
case Type::Node16: {
auto *n16 = static_cast<Node16 *>(n); auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndex(n16, index); int i = getNodeIndex(n16, index);
return n16->children[i].childMaxVersion; return n16->children[i].childMaxVersion;
} else if (n->type == Type::Node48) { }
case Type::Node48: {
auto *n48 = static_cast<Node48 *>(n); auto *n48 = static_cast<Node48 *>(n);
assert(n48->bitSet.test(index)); assert(n48->bitSet.test(index));
return n48->children[n48->index[index]].childMaxVersion; return n48->children[n48->index[index]].childMaxVersion;
} else { }
case Type::Node256: {
auto *n256 = static_cast<Node256 *>(n); auto *n256 = static_cast<Node256 *>(n);
assert(n256->bitSet.test(index)); assert(n256->bitSet.test(index));
return n256->children[index].childMaxVersion; return n256->children[index].childMaxVersion;
} }
}
} }
Node *&getInTree(Node *n, ConflictSet::Impl *impl) { Node *&getInTree(Node *n, ConflictSet::Impl *impl) {
@@ -2287,23 +2376,6 @@ ConflictSet::~ConflictSet() {
} }
} }
#if SHOW_MEMORY
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
ConflictSet::Impl *impl;
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n",
impl->allocators.node0.highWaterMarkBytes());
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
impl->allocators.node4.highWaterMarkBytes());
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
impl->allocators.node16.highWaterMarkBytes());
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
impl->allocators.node48.highWaterMarkBytes());
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
impl->allocators.node256.highWaterMarkBytes());
}
#endif
ConflictSet::ConflictSet(ConflictSet &&other) noexcept ConflictSet::ConflictSet(ConflictSet &&other) noexcept
: impl(std::exchange(other.impl, nullptr)) {} : impl(std::exchange(other.impl, nullptr)) {}
@@ -2494,16 +2566,6 @@ Iterator firstGeq(Node *n, std::string_view key) {
expected = std::max(expected, borrowed.n->entry.rangeVersion); expected = std::max(expected, borrowed.n->entry.rangeVersion);
} }
} }
if (node->parent != nullptr &&
getChildMaxVersion(node->parent, node->parentsIndex) !=
maxVersion(node, impl)) {
fprintf(stderr,
"%s has max version %" PRId64
" . But parent has child max version %" PRId64 "\n",
getSearchPathPrintable(node).c_str(), maxVersion(node, impl),
getChildMaxVersion(node->parent, node->parentsIndex));
success = false;
}
if (maxVersion(node, impl) > oldestVersion && if (maxVersion(node, impl) > oldestVersion &&
maxVersion(node, impl) != expected) { maxVersion(node, impl) != expected) {
fprintf(stderr, "%s has max version %" PRId64 " . Expected %" PRId64 "\n", fprintf(stderr, "%s has max version %" PRId64 " . Expected %" PRId64 "\n",
@@ -2536,8 +2598,8 @@ Iterator firstGeq(Node *n, std::string_view key) {
case Type::Node0: case Type::Node0:
minNumChildren = 0; minNumChildren = 0;
break; break;
case Type::Node4: case Type::Node3:
minNumChildren = kMinChildrenNode4; minNumChildren = kMinChildrenNode3;
break; break;
case Type::Node16: case Type::Node16:
minNumChildren = kMinChildrenNode16; minNumChildren = kMinChildrenNode16;