Lower kBytesPerKey to 144 by changing Node4 to Node3
All checks were successful
Tests / Release [gcc] total: 932, passed: 932
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|5|0|5|0|:zzz:
Tests / Release [gcc,aarch64] total: 931, passed: 931
Tests / Coverage total: 930, passed: 930
weaselab/conflict-set/pipeline/head This commit looks good
All checks were successful
Tests / Release [gcc] total: 932, passed: 932
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|5|0|5|0|:zzz:
Tests / Release [gcc,aarch64] total: 931, passed: 931
Tests / Coverage total: 930, passed: 930
weaselab/conflict-set/pipeline/head This commit looks good
This commit is contained in:
305
ConflictSet.cpp
305
ConflictSet.cpp
@@ -176,7 +176,7 @@ int BitSet::firstSetGeq(int i) const {
|
|||||||
|
|
||||||
enum class Type : int8_t {
|
enum class Type : int8_t {
|
||||||
Node0,
|
Node0,
|
||||||
Node4,
|
Node3,
|
||||||
Node16,
|
Node16,
|
||||||
Node48,
|
Node48,
|
||||||
Node256,
|
Node256,
|
||||||
@@ -215,19 +215,19 @@ struct Node0 : Node {
|
|||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Node4 : Node {
|
struct Node3 : Node {
|
||||||
// Sorted
|
constexpr static auto kMaxNodes = 3;
|
||||||
uint8_t index[16]; // 16 so that we can use the same simd index search
|
uint8_t index[kMaxNodes];
|
||||||
// implementation as Node16
|
Child children[kMaxNodes];
|
||||||
Child children[4];
|
Node3() { this->type = Type::Node3; }
|
||||||
Node4() { this->type = Type::Node4; }
|
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Node16 : Node {
|
struct Node16 : Node {
|
||||||
// Sorted
|
// Sorted
|
||||||
uint8_t index[16];
|
constexpr static auto kMaxNodes = 16;
|
||||||
Child children[16];
|
uint8_t index[kMaxNodes];
|
||||||
|
Child children[kMaxNodes];
|
||||||
Node16() { this->type = Type::Node16; }
|
Node16() { this->type = Type::Node16; }
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
};
|
};
|
||||||
@@ -262,22 +262,33 @@ std::string getSearchPathPrintable(Node *n);
|
|||||||
|
|
||||||
// Bound memory usage following the analysis in the ART paper
|
// Bound memory usage following the analysis in the ART paper
|
||||||
|
|
||||||
constexpr int kBytesPerKey = 176;
|
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
||||||
constexpr int kMinSavingsPerNode = 120;
|
// has an entry present.
|
||||||
constexpr int kMinChildrenNode4 = 2;
|
constexpr int kBytesPerKey = 144;
|
||||||
constexpr int kMinChildrenNode16 = 5;
|
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
||||||
|
constexpr int kMinNodeSurplus = 104;
|
||||||
|
constexpr int kMinChildrenNode3 = 2;
|
||||||
|
constexpr int kMinChildrenNode16 = 4;
|
||||||
constexpr int kMinChildrenNode48 = 17;
|
constexpr int kMinChildrenNode48 = 17;
|
||||||
constexpr int kMinChildrenNode256 = 49;
|
constexpr int kMinChildrenNode256 = 49;
|
||||||
|
|
||||||
static_assert(kMinChildrenNode256 * kMinSavingsPerNode - sizeof(Node256) >=
|
constexpr int kNode256Surplus =
|
||||||
kMinSavingsPerNode);
|
kMinChildrenNode256 * kMinNodeSurplus - sizeof(Node256);
|
||||||
static_assert(kMinChildrenNode48 * kMinSavingsPerNode - sizeof(Node48) >=
|
static_assert(kNode256Surplus >= kMinNodeSurplus);
|
||||||
kMinSavingsPerNode);
|
|
||||||
static_assert(kMinChildrenNode16 * kMinSavingsPerNode - sizeof(Node16) >=
|
constexpr int kNode48Surplus =
|
||||||
kMinSavingsPerNode);
|
kMinChildrenNode48 * kMinNodeSurplus - sizeof(Node48);
|
||||||
static_assert(kMinChildrenNode4 * kMinSavingsPerNode - sizeof(Node4) >=
|
static_assert(kNode48Surplus >= kMinNodeSurplus);
|
||||||
kMinSavingsPerNode);
|
|
||||||
static_assert(kBytesPerKey - sizeof(Node0) >= kMinSavingsPerNode);
|
constexpr int kNode16Surplus =
|
||||||
|
kMinChildrenNode16 * kMinNodeSurplus - sizeof(Node16);
|
||||||
|
static_assert(kNode16Surplus >= kMinNodeSurplus);
|
||||||
|
|
||||||
|
constexpr int kNode3Surplus =
|
||||||
|
kMinChildrenNode3 * kMinNodeSurplus - sizeof(Node3);
|
||||||
|
static_assert(kNode3Surplus >= kMinNodeSurplus);
|
||||||
|
|
||||||
|
static_assert(kBytesPerKey - sizeof(Node0) >= kMinNodeSurplus);
|
||||||
|
|
||||||
// setOldestVersion will additionally try to maintain this property:
|
// setOldestVersion will additionally try to maintain this property:
|
||||||
// `max(children, 1) * length >= capacity`
|
// `max(children, 1) * length >= capacity`
|
||||||
@@ -356,8 +367,8 @@ uint8_t *Node::partialKey() {
|
|||||||
switch (type) {
|
switch (type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
return ((Node0 *)this)->partialKey();
|
return ((Node0 *)this)->partialKey();
|
||||||
case Type::Node4:
|
case Type::Node3:
|
||||||
return ((Node4 *)this)->partialKey();
|
return ((Node3 *)this)->partialKey();
|
||||||
case Type::Node16:
|
case Type::Node16:
|
||||||
return ((Node16 *)this)->partialKey();
|
return ((Node16 *)this)->partialKey();
|
||||||
case Type::Node48:
|
case Type::Node48:
|
||||||
@@ -370,13 +381,14 @@ uint8_t *Node::partialKey() {
|
|||||||
|
|
||||||
struct NodeAllocators {
|
struct NodeAllocators {
|
||||||
BoundedFreeListAllocator<Node0> node0;
|
BoundedFreeListAllocator<Node0> node0;
|
||||||
BoundedFreeListAllocator<Node4> node4;
|
BoundedFreeListAllocator<Node3> node3;
|
||||||
BoundedFreeListAllocator<Node16> node16;
|
BoundedFreeListAllocator<Node16> node16;
|
||||||
BoundedFreeListAllocator<Node48> node48;
|
BoundedFreeListAllocator<Node48> node48;
|
||||||
BoundedFreeListAllocator<Node256> node256;
|
BoundedFreeListAllocator<Node256> node256;
|
||||||
};
|
};
|
||||||
|
|
||||||
int getNodeIndex(Node16 *self, uint8_t index) {
|
template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
||||||
|
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
||||||
|
|
||||||
@@ -388,7 +400,7 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
|||||||
// keys aren't valid, we'll mask the results to only consider the valid ones
|
// keys aren't valid, we'll mask the results to only consider the valid ones
|
||||||
// below.
|
// below.
|
||||||
__m128i indices;
|
__m128i indices;
|
||||||
memcpy(&indices, self->index, sizeof(self->index));
|
memcpy(&indices, self->index, NodeT::kMaxNodes);
|
||||||
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||||
|
|
||||||
// Build a mask to select only the first node->num_children values from the
|
// Build a mask to select only the first node->num_children values from the
|
||||||
@@ -411,10 +423,12 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
|||||||
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||||
|
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self->index, sizeof(self->index));
|
memcpy(&indices, self->index, NodeT::kMaxNodes);
|
||||||
// 0xff for each match
|
// 0xff for each match
|
||||||
uint16x8_t results =
|
uint16x8_t results =
|
||||||
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
||||||
|
static_assert(NodeT::kMaxNodes <= 16);
|
||||||
|
assume(self->numChildren <= NodeT::kMaxNodes);
|
||||||
uint64_t mask = self->numChildren == 16
|
uint64_t mask = self->numChildren == 16
|
||||||
? uint64_t(-1)
|
? uint64_t(-1)
|
||||||
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||||
@@ -439,8 +453,10 @@ Node *&getChildExists(Node *self, uint8_t index) {
|
|||||||
switch (self->type) {
|
switch (self->type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type::Node4:
|
case Type::Node3: {
|
||||||
[[fallthrough]];
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
|
return self3->children[getNodeIndex(self3, index)].child;
|
||||||
|
}
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
return self16->children[getNodeIndex(self16, index)].child;
|
return self16->children[getNodeIndex(self16, index)].child;
|
||||||
@@ -467,8 +483,11 @@ Node *getChild(Node *self, uint8_t index) {
|
|||||||
switch (self->type) {
|
switch (self->type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
case Type::Node4:
|
case Type::Node3: {
|
||||||
[[fallthrough]];
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
|
int i = getNodeIndex(self3, index);
|
||||||
|
return i < 0 ? nullptr : self3->children[i].child;
|
||||||
|
}
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
int i = getNodeIndex(self16, index);
|
int i = getNodeIndex(self16, index);
|
||||||
@@ -486,29 +505,20 @@ Node *getChild(Node *self, uint8_t index) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int getChildGeq(Node *self, int child) {
|
template <class NodeT> int getChildGeqSimd(NodeT *self, int child) {
|
||||||
if (child > 255) {
|
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
switch (self->type) {
|
|
||||||
case Type::Node0:
|
|
||||||
return -1;
|
|
||||||
case Type::Node4:
|
|
||||||
[[fallthrough]];
|
|
||||||
case Type::Node16: {
|
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
__m128i key_vec = _mm_set1_epi8(child);
|
__m128i key_vec = _mm_set1_epi8(child);
|
||||||
__m128i indices;
|
__m128i indices;
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
memcpy(&indices, self->index, NodeT::kMaxNodes);
|
||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
int mask = (1 << self16->numChildren) - 1;
|
int mask = (1 << self->numChildren) - 1;
|
||||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
int result = bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield)];
|
int result = bitfield == 0 ? -1 : self->index[std::countr_zero(bitfield)];
|
||||||
assert(result == [&]() -> int {
|
assert(result == [&]() -> int {
|
||||||
for (int i = 0; i < self16->numChildren; ++i) {
|
for (int i = 0; i < self->numChildren; ++i) {
|
||||||
if (self16->index[i] >= child) {
|
if (self->index[i] >= child) {
|
||||||
return self16->index[i];
|
return self->index[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
@@ -516,9 +526,11 @@ int getChildGeq(Node *self, int child) {
|
|||||||
return result;
|
return result;
|
||||||
#elif defined(HAS_ARM_NEON)
|
#elif defined(HAS_ARM_NEON)
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
memcpy(&indices, self->index, sizeof(self->index));
|
||||||
// 0xff for each leq
|
// 0xff for each leq
|
||||||
auto results = vcleq_u8(vdupq_n_u8(child), indices);
|
auto results = vcleq_u8(vdupq_n_u8(child), indices);
|
||||||
|
static_assert(NodeT::kMaxNodes <= 16);
|
||||||
|
assume(self->numChildren <= NodeT::kMaxNodes);
|
||||||
uint64_t mask = self->numChildren == 16
|
uint64_t mask = self->numChildren == 16
|
||||||
? uint64_t(-1)
|
? uint64_t(-1)
|
||||||
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||||
@@ -528,12 +540,11 @@ int getChildGeq(Node *self, int child) {
|
|||||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
|
||||||
0) &
|
0) &
|
||||||
mask;
|
mask;
|
||||||
int simd =
|
int simd = bitfield == 0 ? -1 : self->index[std::countr_zero(bitfield) / 4];
|
||||||
bitfield == 0 ? -1 : self16->index[std::countr_zero(bitfield) / 4];
|
|
||||||
assert(simd == [&]() -> int {
|
assert(simd == [&]() -> int {
|
||||||
for (int i = 0; i < self->numChildren; ++i) {
|
for (int i = 0; i < self->numChildren; ++i) {
|
||||||
if (self16->index[i] >= child) {
|
if (self->index[i] >= child) {
|
||||||
return self16->index[i];
|
return self->index[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
@@ -542,15 +553,27 @@ int getChildGeq(Node *self, int child) {
|
|||||||
#else
|
#else
|
||||||
for (int i = 0; i < self->numChildren; ++i) {
|
for (int i = 0; i < self->numChildren; ++i) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
assert(self16->index[i - 1] < self16->index[i]);
|
assert(self->index[i - 1] < self->index[i]);
|
||||||
}
|
}
|
||||||
if (self16->index[i] >= child) {
|
if (self->index[i] >= child) {
|
||||||
return self16->index[i];
|
return self->index[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int getChildGeq(Node *self, int child) {
|
||||||
|
if (child > 255) {
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
switch (self->type) {
|
||||||
|
case Type::Node0:
|
||||||
|
return -1;
|
||||||
|
case Type::Node3:
|
||||||
|
return getChildGeqSimd(static_cast<Node3 *>(self), child);
|
||||||
|
case Type::Node16:
|
||||||
|
return getChildGeqSimd(static_cast<Node16 *>(self), child);
|
||||||
case Type::Node48:
|
case Type::Node48:
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case Type::Node256: {
|
case Type::Node256: {
|
||||||
@@ -561,7 +584,7 @@ int getChildGeq(Node *self, int child) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void setChildrenParents(Node4 *n) {
|
void setChildrenParents(Node3 *n) {
|
||||||
for (int i = 0; i < n->numChildren; ++i) {
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
n->children[i].child->parent = n;
|
n->children[i].child->parent = n;
|
||||||
}
|
}
|
||||||
@@ -592,8 +615,13 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
switch (self->type) {
|
switch (self->type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
break;
|
break;
|
||||||
case Type::Node4:
|
case Type::Node3: {
|
||||||
[[fallthrough]];
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
|
int i = getNodeIndex(self3, index);
|
||||||
|
if (i >= 0) {
|
||||||
|
return self3->children[i].child;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
int i = getNodeIndex(self16, index);
|
int i = getNodeIndex(self16, index);
|
||||||
@@ -620,43 +648,59 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
case Type::Node0: {
|
case Type::Node0: {
|
||||||
auto *self0 = static_cast<Node0 *>(self);
|
auto *self0 = static_cast<Node0 *>(self);
|
||||||
|
|
||||||
auto *newSelf = allocators->node4.allocate(self->partialKeyLen);
|
auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
|
||||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen);
|
||||||
allocators->node0.release(self0);
|
allocators->node0.release(self0);
|
||||||
self = newSelf;
|
self = newSelf;
|
||||||
|
|
||||||
goto insert16;
|
goto insert3;
|
||||||
}
|
}
|
||||||
case Type::Node4: {
|
case Type::Node3: {
|
||||||
auto *self4 = static_cast<Node4 *>(self);
|
if (self->numChildren == Node3::kMaxNodes) {
|
||||||
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
if (self->numChildren == 4) {
|
|
||||||
auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
|
auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
|
||||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self3->partialKey(), self->partialKeyLen);
|
||||||
// TODO replace with memcpy?
|
// TODO replace with memcpy?
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||||
newSelf->index[i] = self4->index[i];
|
newSelf->index[i] = self3->index[i];
|
||||||
newSelf->children[i] = self4->children[i];
|
newSelf->children[i] = self3->children[i];
|
||||||
}
|
}
|
||||||
allocators->node4.release(self4);
|
allocators->node3.release(self3);
|
||||||
setChildrenParents(newSelf);
|
setChildrenParents(newSelf);
|
||||||
self = newSelf;
|
self = newSelf;
|
||||||
}
|
|
||||||
|
|
||||||
goto insert16;
|
goto insert16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
insert3:
|
||||||
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
|
++self->numChildren;
|
||||||
|
int i = 0;
|
||||||
|
for (; i < self->numChildren - 1; ++i) {
|
||||||
|
if (int(self3->index[i]) > int(index)) {
|
||||||
|
memmove(self3->index + i + 1, self3->index + i,
|
||||||
|
self->numChildren - (i + 1));
|
||||||
|
memmove(self3->children + i + 1, self3->children + i,
|
||||||
|
(self->numChildren - (i + 1)) * sizeof(Child));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self3->index[i] = index;
|
||||||
|
auto &result = self3->children[i].child;
|
||||||
|
result = nullptr;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
if (self->numChildren == 16) {
|
if (self->numChildren == Node16::kMaxNodes) {
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
|
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
|
||||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
||||||
newSelf->nextFree = 16;
|
newSelf->nextFree = Node16::kMaxNodes;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (auto x : self16->index) {
|
for (auto x : self16->index) {
|
||||||
newSelf->bitSet.set(x);
|
newSelf->bitSet.set(x);
|
||||||
@@ -664,7 +708,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
newSelf->index[x] = i;
|
newSelf->index[x] = i;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
assert(i == 16);
|
assert(i == Node16::kMaxNodes);
|
||||||
allocators->node16.release(self16);
|
allocators->node16.release(self16);
|
||||||
setChildrenParents(newSelf);
|
setChildrenParents(newSelf);
|
||||||
self = newSelf;
|
self = newSelf;
|
||||||
@@ -673,6 +717,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
|
|
||||||
insert16:
|
insert16:
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
|
assert(self->type == Type::Node16);
|
||||||
|
|
||||||
++self->numChildren;
|
++self->numChildren;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@@ -773,23 +818,23 @@ void makeCapacityAtLeast(Node *&self, int capacity, NodeAllocators *allocators,
|
|||||||
}
|
}
|
||||||
self = newSelf;
|
self = newSelf;
|
||||||
} break;
|
} break;
|
||||||
case Type::Node4: {
|
case Type::Node3: {
|
||||||
auto *self4 = (Node4 *)self;
|
auto *self3 = (Node3 *)self;
|
||||||
auto *newSelf = allocators->node4.allocate(capacity);
|
auto *newSelf = allocators->node3.allocate(capacity);
|
||||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self3->partialKey(), self->partialKeyLen);
|
||||||
// TODO replace with memcpy?
|
// TODO replace with memcpy?
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||||
newSelf->index[i] = self4->index[i];
|
newSelf->index[i] = self3->index[i];
|
||||||
newSelf->children[i] = self4->children[i];
|
newSelf->children[i] = self3->children[i];
|
||||||
}
|
}
|
||||||
getInTree(self, impl) = newSelf;
|
getInTree(self, impl) = newSelf;
|
||||||
setChildrenParents(newSelf);
|
setChildrenParents(newSelf);
|
||||||
if constexpr (kUseFreeList) {
|
if constexpr (kUseFreeList) {
|
||||||
allocators->node4.release(self4);
|
allocators->node3.release(self3);
|
||||||
} else {
|
} else {
|
||||||
free(self4);
|
free(self3);
|
||||||
}
|
}
|
||||||
self = newSelf;
|
self = newSelf;
|
||||||
} break;
|
} break;
|
||||||
@@ -800,7 +845,7 @@ void makeCapacityAtLeast(Node *&self, int capacity, NodeAllocators *allocators,
|
|||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
||||||
// TODO replace with memcpy?
|
// TODO replace with memcpy?
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < Node16::kMaxNodes; ++i) {
|
||||||
newSelf->index[i] = self16->index[i];
|
newSelf->index[i] = self16->index[i];
|
||||||
newSelf->children[i] = self16->children[i];
|
newSelf->children[i] = self16->children[i];
|
||||||
}
|
}
|
||||||
@@ -883,20 +928,20 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
|||||||
switch (self->type) {
|
switch (self->type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type::Node4: {
|
case Type::Node3: {
|
||||||
auto *self4 = (Node4 *)self;
|
auto *self3 = (Node3 *)self;
|
||||||
if (self->numChildren == 0) {
|
if (self->numChildren == 0) {
|
||||||
auto *newSelf = allocators->node0.allocate(self->partialKeyLen);
|
auto *newSelf = allocators->node0.allocate(self->partialKeyLen);
|
||||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self3->partialKey(), self->partialKeyLen);
|
||||||
|
|
||||||
getInTree(self, impl) = newSelf;
|
getInTree(self, impl) = newSelf;
|
||||||
allocators->node4.release(self4);
|
allocators->node3.release(self3);
|
||||||
} else if (self->numChildren == 1) {
|
} else if (self->numChildren == 1) {
|
||||||
if (!self->entryPresent) {
|
if (!self->entryPresent) {
|
||||||
auto *child = self4->children[0].child;
|
auto *child = self3->children[0].child;
|
||||||
int minCapacity = self4->partialKeyLen + 1 + child->partialKeyLen;
|
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
|
||||||
|
|
||||||
if (minCapacity > child->partialKeyCapacity) {
|
if (minCapacity > child->partialKeyCapacity) {
|
||||||
const bool update = child == dontInvalidate;
|
const bool update = child == dontInvalidate;
|
||||||
@@ -917,11 +962,11 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
|||||||
int64_t childMaxVersion = maxVersion(child, impl);
|
int64_t childMaxVersion = maxVersion(child, impl);
|
||||||
|
|
||||||
// Construct new partial key for child
|
// Construct new partial key for child
|
||||||
memmove(child->partialKey() + self4->partialKeyLen + 1,
|
memmove(child->partialKey() + self3->partialKeyLen + 1,
|
||||||
child->partialKey(), child->partialKeyLen);
|
child->partialKey(), child->partialKeyLen);
|
||||||
memcpy(child->partialKey(), self4->partialKey(), self->partialKeyLen);
|
memcpy(child->partialKey(), self3->partialKey(), self->partialKeyLen);
|
||||||
child->partialKey()[self4->partialKeyLen] = self4->index[0];
|
child->partialKey()[self3->partialKeyLen] = self3->index[0];
|
||||||
child->partialKeyLen += 1 + self4->partialKeyLen;
|
child->partialKeyLen += 1 + self3->partialKeyLen;
|
||||||
|
|
||||||
child->parent = self->parent;
|
child->parent = self->parent;
|
||||||
child->parentsIndex = self->parentsIndex;
|
child->parentsIndex = self->parentsIndex;
|
||||||
@@ -931,19 +976,19 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
|||||||
maxVersion(child, impl) = childMaxVersion;
|
maxVersion(child, impl) = childMaxVersion;
|
||||||
|
|
||||||
getInTree(self, impl) = child;
|
getInTree(self, impl) = child;
|
||||||
allocators->node4.release(self4);
|
allocators->node3.release(self3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case Type::Node16:
|
case Type::Node16:
|
||||||
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) {
|
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) {
|
||||||
auto *self16 = (Node16 *)self;
|
auto *self16 = (Node16 *)self;
|
||||||
auto *newSelf = allocators->node4.allocate(self->partialKeyLen);
|
auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
|
||||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
||||||
// TODO replace with memcpy?
|
// TODO replace with memcpy?
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||||
newSelf->index[i] = self16->index[i];
|
newSelf->index[i] = self16->index[i];
|
||||||
newSelf->children[i] = self16->children[i];
|
newSelf->children[i] = self16->children[i];
|
||||||
}
|
}
|
||||||
@@ -965,7 +1010,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
|||||||
[&](int c) {
|
[&](int c) {
|
||||||
// Suppress a false positive -Waggressive-loop-optimizations warning
|
// Suppress a false positive -Waggressive-loop-optimizations warning
|
||||||
// in gcc. `assume` doesn't work for some reason.
|
// in gcc. `assume` doesn't work for some reason.
|
||||||
if (!(i < 16)) {
|
if (!(i < Node16::kMaxNodes)) {
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
}
|
}
|
||||||
newSelf->index[i] = c;
|
newSelf->index[i] = c;
|
||||||
@@ -1034,8 +1079,8 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
|
|||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
allocators->node0.release((Node0 *)self);
|
allocators->node0.release((Node0 *)self);
|
||||||
break;
|
break;
|
||||||
case Type::Node4:
|
case Type::Node3:
|
||||||
allocators->node4.release((Node4 *)self);
|
allocators->node3.release((Node3 *)self);
|
||||||
break;
|
break;
|
||||||
case Type::Node16:
|
case Type::Node16:
|
||||||
allocators->node16.release((Node16 *)self);
|
allocators->node16.release((Node16 *)self);
|
||||||
@@ -1051,8 +1096,17 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
|
|||||||
switch (parent->type) {
|
switch (parent->type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type::Node4:
|
case Type::Node3: {
|
||||||
[[fallthrough]];
|
auto *parent3 = static_cast<Node3 *>(parent);
|
||||||
|
int nodeIndex = getNodeIndex(parent3, parentsIndex);
|
||||||
|
assert(nodeIndex >= 0);
|
||||||
|
memmove(parent3->index + nodeIndex, parent3->index + nodeIndex + 1,
|
||||||
|
sizeof(parent3->index[0]) *
|
||||||
|
(parent->numChildren - (nodeIndex + 1)));
|
||||||
|
memmove(parent3->children + nodeIndex, parent3->children + nodeIndex + 1,
|
||||||
|
sizeof(parent3->children[0]) *
|
||||||
|
(parent->numChildren - (nodeIndex + 1)));
|
||||||
|
} break;
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *parent16 = static_cast<Node16 *>(parent);
|
auto *parent16 = static_cast<Node16 *>(parent);
|
||||||
int nodeIndex = getNodeIndex(parent16, parentsIndex);
|
int nodeIndex = getNodeIndex(parent16, parentsIndex);
|
||||||
@@ -1399,8 +1453,14 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
|||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
// We would have returned above, after not finding a child
|
// We would have returned above, after not finding a child
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type::Node4:
|
case Type::Node3: {
|
||||||
[[fallthrough]];
|
auto *self = static_cast<Node3 *>(n);
|
||||||
|
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
||||||
|
if (begin <= self->index[i]) {
|
||||||
|
result = std::max(result, self->children[i].childMaxVersion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *self = static_cast<Node16 *>(n);
|
auto *self = static_cast<Node16 *>(n);
|
||||||
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
||||||
@@ -1408,8 +1468,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
|||||||
result = std::max(result, self->children[i].childMaxVersion);
|
result = std::max(result, self->children[i].childMaxVersion);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
} break;
|
||||||
}
|
|
||||||
case Type::Node48: {
|
case Type::Node48: {
|
||||||
auto *self = static_cast<Node48 *>(n);
|
auto *self = static_cast<Node48 *>(n);
|
||||||
self->bitSet.forEachInRange(
|
self->bitSet.forEachInRange(
|
||||||
@@ -1898,7 +1957,7 @@ template <bool kBegin>
|
|||||||
int64_t oldMaxVersion = maxVersion(old, impl);
|
int64_t oldMaxVersion = maxVersion(old, impl);
|
||||||
|
|
||||||
// *self will have one child
|
// *self will have one child
|
||||||
*self = allocators->node4.allocate(partialKeyIndex);
|
*self = allocators->node3.allocate(partialKeyIndex);
|
||||||
|
|
||||||
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
|
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
@@ -2261,8 +2320,11 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *impl) {
|
|||||||
switch (n->type) {
|
switch (n->type) {
|
||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type::Node4:
|
case Type::Node3: {
|
||||||
[[fallthrough]];
|
auto *n3 = static_cast<Node3 *>(n);
|
||||||
|
int i = getNodeIndex(n3, index);
|
||||||
|
return n3->children[i].childMaxVersion;
|
||||||
|
}
|
||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *n16 = static_cast<Node16 *>(n);
|
auto *n16 = static_cast<Node16 *>(n);
|
||||||
int i = getNodeIndex(n16, index);
|
int i = getNodeIndex(n16, index);
|
||||||
@@ -2314,23 +2376,6 @@ ConflictSet::~ConflictSet() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if SHOW_MEMORY
|
|
||||||
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
|
|
||||||
ConflictSet::Impl *impl;
|
|
||||||
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
|
|
||||||
fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n",
|
|
||||||
impl->allocators.node0.highWaterMarkBytes());
|
|
||||||
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
|
|
||||||
impl->allocators.node4.highWaterMarkBytes());
|
|
||||||
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
|
|
||||||
impl->allocators.node16.highWaterMarkBytes());
|
|
||||||
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
|
|
||||||
impl->allocators.node48.highWaterMarkBytes());
|
|
||||||
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
|
|
||||||
impl->allocators.node256.highWaterMarkBytes());
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ConflictSet::ConflictSet(ConflictSet &&other) noexcept
|
ConflictSet::ConflictSet(ConflictSet &&other) noexcept
|
||||||
: impl(std::exchange(other.impl, nullptr)) {}
|
: impl(std::exchange(other.impl, nullptr)) {}
|
||||||
|
|
||||||
@@ -2553,8 +2598,8 @@ Iterator firstGeq(Node *n, std::string_view key) {
|
|||||||
case Type::Node0:
|
case Type::Node0:
|
||||||
minNumChildren = 0;
|
minNumChildren = 0;
|
||||||
break;
|
break;
|
||||||
case Type::Node4:
|
case Type::Node3:
|
||||||
minNumChildren = kMinChildrenNode4;
|
minNumChildren = kMinChildrenNode3;
|
||||||
break;
|
break;
|
||||||
case Type::Node16:
|
case Type::Node16:
|
||||||
minNumChildren = kMinChildrenNode16;
|
minNumChildren = kMinChildrenNode16;
|
||||||
|
Reference in New Issue
Block a user