7 Commits

Author SHA1 Message Date
bcbae026b2 Update README
Some checks failed
Tests / 64 bit versions total: 8220, passed: 8220
Tests / Debug total: 8218, failed: 28, passed: 8190
Tests / SIMD fallback total: 8220, passed: 8220
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-11-20 14:53:57 -08:00
e125b599b5 Remove freeList, min/max capacity tracking
The freelist doesn't seem to get a good hit rate. Policies other than
capacity = minCapacity did not improve the rate we were resizing nodes,
but did increase memory usage, so get rid of that too. Add a
nodes_resized_total counter.
2024-11-20 14:45:56 -08:00
3f4d3b685a More valgrind annotations 2024-11-20 13:36:30 -08:00
4198b8b090 Some prep for leafs-in-parents 2024-11-20 12:20:11 -08:00
8757d2387c Call prefetch within TaggedNodePointer::getType
Instead of at every call site
2024-11-20 12:07:32 -08:00
4a22b95d53 Remove state machine transitions "from" Node0
Those aren't used
2024-11-20 11:46:45 -08:00
03d6c7e471 Allocate maxCapacity instead of minCapacity 2024-11-19 16:13:57 -08:00
2 changed files with 119 additions and 211 deletions

View File

@@ -219,6 +219,8 @@ template <class T> struct NodeAllocator;
struct TaggedNodePointer { struct TaggedNodePointer {
TaggedNodePointer() = default; TaggedNodePointer() = default;
operator struct Node *() { return (struct Node *)withoutType(); } operator struct Node *() { return (struct Node *)withoutType(); }
// In preparation for when we might store leafs directly in parents
struct Node *asNodeUnsafe() { return (struct Node *)withoutType(); }
operator struct Node0 *() { operator struct Node0 *() {
assert(getType() == Type_Node0); assert(getType() == Type_Node0);
return (struct Node0 *)withoutType(); return (struct Node0 *)withoutType();
@@ -262,6 +264,11 @@ struct TaggedNodePointer {
TaggedNodePointer &operator=(const TaggedNodePointer &) = default; TaggedNodePointer &operator=(const TaggedNodePointer &) = default;
/*implicit*/ TaggedNodePointer(Node *n); /*implicit*/ TaggedNodePointer(Node *n);
void prefetch() {
// __builtin_prefetch is ok even if argument isn't addressable
__builtin_prefetch((void *)withoutType());
}
private: private:
TaggedNodePointer(struct Node *p, Type t) : p((uintptr_t)p) { TaggedNodePointer(struct Node *p, Type t) : p((uintptr_t)p) {
assert((this->p & 7) == 0); assert((this->p & 7) == 0);
@@ -313,6 +320,7 @@ TaggedNodePointer::TaggedNodePointer(Node *n)
Type TaggedNodePointer::getType() { Type TaggedNodePointer::getType() {
assert(p != 0); assert(p != 0);
prefetch();
return Type(p & uintptr_t(7)); return Type(p & uintptr_t(7));
} }
@@ -694,27 +702,13 @@ constexpr int getMaxCapacity(Node *self) {
self->partialKeyLen); self->partialKeyLen);
} }
#ifdef __APPLE__
// Disabling the free list altogether is faster on my mac m1
constexpr int64_t kMaxFreeListBytes = 0;
#else
constexpr int64_t kMaxFreeListBytes = 1 << 20;
#endif
// Maintains a free list up to kMaxFreeListBytes. If the top element of the list
// doesn't meet the capacity constraints, it's freed and a new node is allocated
// with the minimum capacity. The hope is that "unfit" nodes don't get stuck in
// the free list.
template <class T> struct NodeAllocator { template <class T> struct NodeAllocator {
static_assert(std::derived_from<T, Node>); static_assert(std::derived_from<T, Node>);
static_assert(std::is_trivial_v<T>); static_assert(std::is_trivial_v<T>);
T *allocate(int minCapacity, int maxCapacity) { T *allocate(int capacity) {
assert(minCapacity <= maxCapacity); T *result = allocate_helper(capacity);
assert(freeListSize >= 0);
assert(freeListSize <= kMaxFreeListBytes);
T *result = allocate_helper(minCapacity, maxCapacity);
result->endOfRange = false; result->endOfRange = false;
result->releaseDeferred = false; result->releaseDeferred = false;
if constexpr (!std::is_same_v<T, Node0>) { if constexpr (!std::is_same_v<T, Node0>) {
@@ -734,47 +728,19 @@ template <class T> struct NodeAllocator {
} }
void release(T *p) { void release(T *p) {
assume(p->partialKeyCapacity >= 0); removeNode(p);
assume(freeListSize >= 0); return safe_free(p, sizeof(T) + p->partialKeyCapacity);
if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) {
removeNode(p);
return safe_free(p, sizeof(T) + p->partialKeyCapacity);
}
p->parent = freeList;
freeList = p;
freeListSize += sizeof(T) + p->partialKeyCapacity;
VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T) + p->partialKeyCapacity);
} }
void deferRelease(T *p, Node *forwardTo) { void deferRelease(T *p, Node *forwardTo) {
p->releaseDeferred = true; p->releaseDeferred = true;
p->forwardTo = forwardTo; p->forwardTo = forwardTo;
if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) { p->parent = deferredList;
p->parent = deferredListOverflow; deferredList = p;
deferredListOverflow = p;
} else {
if (deferredList == nullptr) {
deferredListFront = p;
}
p->parent = deferredList;
deferredList = p;
freeListSize += sizeof(T) + p->partialKeyCapacity;
}
} }
void releaseDeferred() { void releaseDeferred() {
if (deferredList != nullptr) { for (T *n = std::exchange(deferredList, nullptr); n != nullptr;) {
deferredListFront->parent = freeList;
#ifndef NVALGRIND
for (auto *iter = deferredList; iter != freeList;) {
auto *tmp = iter;
iter = (T *)iter->parent;
VALGRIND_MAKE_MEM_NOACCESS(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
#endif
freeList = std::exchange(deferredList, nullptr);
}
for (T *n = std::exchange(deferredListOverflow, nullptr); n != nullptr;) {
auto *tmp = n; auto *tmp = n;
n = (T *)n->parent; n = (T *)n->parent;
release(tmp); release(tmp);
@@ -788,53 +754,15 @@ template <class T> struct NodeAllocator {
NodeAllocator(NodeAllocator &&) = delete; NodeAllocator(NodeAllocator &&) = delete;
NodeAllocator &operator=(NodeAllocator &&) = delete; NodeAllocator &operator=(NodeAllocator &&) = delete;
~NodeAllocator() { ~NodeAllocator() { assert(deferredList == nullptr); }
assert(deferredList == nullptr);
assert(deferredListOverflow == nullptr);
for (T *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(T));
auto *tmp = iter;
iter = (T *)iter->parent;
removeNode(tmp);
safe_free(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
}
private: private:
int64_t freeListSize = 0;
T *freeList = nullptr;
T *deferredList = nullptr; T *deferredList = nullptr;
// Used to concatenate deferredList to freeList
T *deferredListFront;
T *deferredListOverflow = nullptr;
T *allocate_helper(int minCapacity, int maxCapacity) { T *allocate_helper(int capacity) {
if (freeList != nullptr) { auto *result = (T *)safe_malloc(sizeof(T) + capacity);
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(T));
freeListSize -= sizeof(T) + freeList->partialKeyCapacity;
assume(freeList->partialKeyCapacity >= 0);
assume(minCapacity >= 0);
assume(minCapacity <= maxCapacity);
if (freeList->partialKeyCapacity >= minCapacity &&
freeList->partialKeyCapacity <= maxCapacity) {
auto *result = freeList;
freeList = (T *)freeList->parent;
VALGRIND_MAKE_MEM_UNDEFINED(result,
sizeof(T) + result->partialKeyCapacity);
VALGRIND_MAKE_MEM_DEFINED(&result->partialKeyCapacity,
sizeof(result->partialKeyCapacity));
VALGRIND_MAKE_MEM_DEFINED(&result->type, sizeof(result->type));
return result;
} else {
auto *p = freeList;
freeList = (T *)p->parent;
removeNode(p);
safe_free(p, sizeof(T) + p->partialKeyCapacity);
}
}
auto *result = (T *)safe_malloc(sizeof(T) + minCapacity);
result->type = T::kType; result->type = T::kType;
result->partialKeyCapacity = minCapacity; result->partialKeyCapacity = capacity;
addNode(result); addNode(result);
return result; return result;
} }
@@ -890,6 +818,7 @@ struct WriteContext {
int64_t point_writes; int64_t point_writes;
int64_t range_writes; int64_t range_writes;
int64_t write_bytes; int64_t write_bytes;
int64_t nodes_resized;
} accum; } accum;
#if USE_64_BIT #if USE_64_BIT
@@ -902,19 +831,19 @@ struct WriteContext {
WriteContext() { memset(&accum, 0, sizeof(accum)); } WriteContext() { memset(&accum, 0, sizeof(accum)); }
template <class T> T *allocate(int minCapacity, int maxCapacity) { template <class T> T *allocate(int capacity) {
static_assert(!std::is_same_v<T, Node>); static_assert(!std::is_same_v<T, Node>);
++accum.nodes_allocated; ++accum.nodes_allocated;
if constexpr (std::is_same_v<T, Node0>) { if constexpr (std::is_same_v<T, Node0>) {
return node0.allocate(minCapacity, maxCapacity); return node0.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node3>) { } else if constexpr (std::is_same_v<T, Node3>) {
return node3.allocate(minCapacity, maxCapacity); return node3.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node16>) { } else if constexpr (std::is_same_v<T, Node16>) {
return node16.allocate(minCapacity, maxCapacity); return node16.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node48>) { } else if constexpr (std::is_same_v<T, Node48>) {
return node48.allocate(minCapacity, maxCapacity); return node48.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node256>) { } else if constexpr (std::is_same_v<T, Node256>) {
return node256.allocate(minCapacity, maxCapacity); return node256.allocate(capacity);
} }
} }
template <class T> void release(T *c) { template <class T> void release(T *c) {
@@ -1415,12 +1344,12 @@ TaggedNodePointer getChildGeq(Node *self, int child) {
TaggedNodePointer getFirstChild(Node0 *) { return nullptr; } TaggedNodePointer getFirstChild(Node0 *) { return nullptr; }
TaggedNodePointer getFirstChild(Node3 *self) { TaggedNodePointer getFirstChild(Node3 *self) {
// Improves scan performance // Improves scan performance
__builtin_prefetch(self->children[1]); self->children[1].prefetch();
return self->children[0]; return self->children[0];
} }
TaggedNodePointer getFirstChild(Node16 *self) { TaggedNodePointer getFirstChild(Node16 *self) {
// Improves scan performance // Improves scan performance
__builtin_prefetch(self->children[1]); self->children[1].prefetch();
return self->children[0]; return self->children[0];
} }
TaggedNodePointer getFirstChild(Node48 *self) { TaggedNodePointer getFirstChild(Node48 *self) {
@@ -1463,14 +1392,14 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
int partialKeyIndex = int partialKeyIndex =
longestCommonPrefix(self->partialKey(), key.data(), commonLen); longestCommonPrefix(self->partialKey(), key.data(), commonLen);
if (partialKeyIndex < self->partialKeyLen) { if (partialKeyIndex < self->partialKeyLen) {
Node *old = self; // Safety: self has a partial key so it can't be a leaf
Node *old = self.asNodeUnsafe();
// Since root cannot have a partial key // Since root cannot have a partial key
assert(old->parent != nullptr); assert(old->parent != nullptr);
InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion); InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion);
// *self will have one child (old) // *self will have one child (old)
auto *newSelf = writeContext->allocate<Node3>( auto *newSelf = writeContext->allocate<Node3>(partialKeyIndex);
partialKeyIndex, getMaxCapacity(1, 0, partialKeyIndex));
newSelf->parent = old->parent; newSelf->parent = old->parent;
newSelf->parentsIndex = old->parentsIndex; newSelf->parentsIndex = old->parentsIndex;
@@ -1491,6 +1420,8 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
memmove(old->partialKey(), old->partialKey() + partialKeyIndex + 1, memmove(old->partialKey(), old->partialKey() + partialKeyIndex + 1,
old->partialKeyLen - (partialKeyIndex + 1)); old->partialKeyLen - (partialKeyIndex + 1));
old->partialKeyLen -= partialKeyIndex + 1; old->partialKeyLen -= partialKeyIndex + 1;
VALGRIND_MAKE_MEM_UNDEFINED(old->partialKey() + old->partialKeyLen,
partialKeyIndex + 1);
// Maintain memory capacity invariant // Maintain memory capacity invariant
maybeDecreaseCapacity(old, writeContext, impl); maybeDecreaseCapacity(old, writeContext, impl);
@@ -1572,8 +1503,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
} }
auto *newChild = writeContext->allocate<Node0>( auto *newChild = writeContext->allocate<Node0>(key.size());
key.size(), getMaxCapacity(0, 1, key.size()));
newChild->numChildren = 0; newChild->numChildren = 0;
newChild->entryPresent = false; // Will be set to true by the caller newChild->entryPresent = false; // Will be set to true by the caller
newChild->partialKeyLen = key.size(); newChild->partialKeyLen = key.size();
@@ -1585,8 +1515,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
case Type_Node0: { case Type_Node0: {
auto *self0 = static_cast<Node0 *>(self); auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = writeContext->allocate<Node3>( auto *newSelf = writeContext->allocate<Node3>(self->partialKeyLen);
self->partialKeyLen, getMaxCapacity(1, 1, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self0); newSelf->copyChildrenAndKeyFrom(*self0);
writeContext->deferRelease(self0, newSelf); writeContext->deferRelease(self0, newSelf);
self = newSelf; self = newSelf;
@@ -1596,9 +1525,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
case Type_Node3: { case Type_Node3: {
if (self->numChildren == Node3::kMaxNodes) { if (self->numChildren == Node3::kMaxNodes) {
auto *self3 = static_cast<Node3 *>(self); auto *self3 = static_cast<Node3 *>(self);
auto *newSelf = writeContext->allocate<Node16>( auto *newSelf = writeContext->allocate<Node16>(self->partialKeyLen);
self->partialKeyLen,
getMaxCapacity(4, self->entryPresent, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self3); newSelf->copyChildrenAndKeyFrom(*self3);
writeContext->deferRelease(self3, newSelf); writeContext->deferRelease(self3, newSelf);
self = newSelf; self = newSelf;
@@ -1621,15 +1548,14 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self3->childMaxVersion[i + 1] = newMaxVersion; self3->childMaxVersion[i + 1] = newMaxVersion;
result = newChild; result = newChild;
++self->numChildren; ++self->numChildren;
newChild->parent = self; // Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result; return result;
} }
case Type_Node16: { case Type_Node16: {
if (self->numChildren == Node16::kMaxNodes) { if (self->numChildren == Node16::kMaxNodes) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = writeContext->allocate<Node48>( auto *newSelf = writeContext->allocate<Node48>(self->partialKeyLen);
self->partialKeyLen,
getMaxCapacity(17, self->entryPresent, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self16); newSelf->copyChildrenAndKeyFrom(*self16);
writeContext->deferRelease(self16, newSelf); writeContext->deferRelease(self16, newSelf);
self = newSelf; self = newSelf;
@@ -1653,16 +1579,15 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self16->childMaxVersion[i + 1] = newMaxVersion; self16->childMaxVersion[i + 1] = newMaxVersion;
result = newChild; result = newChild;
++self->numChildren; ++self->numChildren;
newChild->parent = self; // Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result; return result;
} }
case Type_Node48: { case Type_Node48: {
if (self->numChildren == 48) { if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = writeContext->allocate<Node256>( auto *newSelf = writeContext->allocate<Node256>(self->partialKeyLen);
self->partialKeyLen,
getMaxCapacity(49, self->entryPresent, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self48); newSelf->copyChildrenAndKeyFrom(*self48);
writeContext->deferRelease(self48, newSelf); writeContext->deferRelease(self48, newSelf);
self = newSelf; self = newSelf;
@@ -1680,7 +1605,8 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self48->maxOfMax[nextFree >> Node48::kMaxOfMaxShift] = std::max( self48->maxOfMax[nextFree >> Node48::kMaxOfMaxShift] = std::max(
newMaxVersion, self48->maxOfMax[nextFree >> Node48::kMaxOfMaxShift]); newMaxVersion, self48->maxOfMax[nextFree >> Node48::kMaxOfMaxShift]);
result = newChild; result = newChild;
newChild->parent = self; // Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result; return result;
} }
case Type_Node256: { case Type_Node256: {
@@ -1694,7 +1620,8 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self256->maxOfMax[index >> Node256::kMaxOfMaxShift] = std::max( self256->maxOfMax[index >> Node256::kMaxOfMaxShift] = std::max(
newMaxVersion, self256->maxOfMax[index >> Node256::kMaxOfMaxShift]); newMaxVersion, self256->maxOfMax[index >> Node256::kMaxOfMaxShift]);
result = newChild; result = newChild;
newChild->parent = self; // Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result; return result;
} }
default: // GCOVR_EXCL_LINE default: // GCOVR_EXCL_LINE
@@ -1744,13 +1671,13 @@ downLeftSpine:
return node; return node;
} }
void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity, void freeAndMakeCapacity(Node *&self, int capacity, WriteContext *writeContext,
WriteContext *writeContext, ConflictSet::Impl *impl) {
ConflictSet::Impl *impl) { ++writeContext->accum.nodes_resized;
switch (self->getType()) { switch (self->getType()) {
case Type_Node0: { case Type_Node0: {
auto *self0 = (Node0 *)self; auto *self0 = (Node0 *)self;
auto *newSelf = writeContext->allocate<Node0>(minCapacity, maxCapacity); auto *newSelf = writeContext->allocate<Node0>(capacity);
newSelf->copyChildrenAndKeyFrom(*self0); newSelf->copyChildrenAndKeyFrom(*self0);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self0, newSelf); writeContext->deferRelease(self0, newSelf);
@@ -1758,7 +1685,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break; } break;
case Type_Node3: { case Type_Node3: {
auto *self3 = (Node3 *)self; auto *self3 = (Node3 *)self;
auto *newSelf = writeContext->allocate<Node3>(minCapacity, maxCapacity); auto *newSelf = writeContext->allocate<Node3>(capacity);
newSelf->copyChildrenAndKeyFrom(*self3); newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self3, newSelf); writeContext->deferRelease(self3, newSelf);
@@ -1766,7 +1693,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break; } break;
case Type_Node16: { case Type_Node16: {
auto *self16 = (Node16 *)self; auto *self16 = (Node16 *)self;
auto *newSelf = writeContext->allocate<Node16>(minCapacity, maxCapacity); auto *newSelf = writeContext->allocate<Node16>(capacity);
newSelf->copyChildrenAndKeyFrom(*self16); newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self16, newSelf); writeContext->deferRelease(self16, newSelf);
@@ -1774,7 +1701,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break; } break;
case Type_Node48: { case Type_Node48: {
auto *self48 = (Node48 *)self; auto *self48 = (Node48 *)self;
auto *newSelf = writeContext->allocate<Node48>(minCapacity, maxCapacity); auto *newSelf = writeContext->allocate<Node48>(capacity);
newSelf->copyChildrenAndKeyFrom(*self48); newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self48, newSelf); writeContext->deferRelease(self48, newSelf);
@@ -1782,7 +1709,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break; } break;
case Type_Node256: { case Type_Node256: {
auto *self256 = (Node256 *)self; auto *self256 = (Node256 *)self;
auto *newSelf = writeContext->allocate<Node256>(minCapacity, maxCapacity); auto *newSelf = writeContext->allocate<Node256>(capacity);
newSelf->copyChildrenAndKeyFrom(*self256); newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self256, newSelf); writeContext->deferRelease(self256, newSelf);
@@ -1807,8 +1734,7 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext,
if (self->getCapacity() <= maxCapacity) { if (self->getCapacity() <= maxCapacity) {
return; return;
} }
freeAndMakeCapacityBetween(self, self->partialKeyLen, maxCapacity, freeAndMakeCapacity(self, self->partialKeyLen, writeContext, impl);
writeContext, impl);
} }
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__) #if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
@@ -1882,12 +1808,9 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
assert(!self3->entryPresent); assert(!self3->entryPresent);
Node *child = self3->children[0]; Node *child = self3->children[0];
const int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen; const int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
const int maxCapacity =
getMaxCapacity(child->numChildren, child->entryPresent, minCapacity);
if (minCapacity > child->getCapacity()) { if (minCapacity > child->getCapacity()) {
freeAndMakeCapacityBetween(child, minCapacity, maxCapacity, writeContext, freeAndMakeCapacity(child, minCapacity, writeContext, impl);
impl);
} }
// Merge partial key with child // Merge partial key with child
@@ -1914,7 +1837,7 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
setMaxVersion(child, std::max(childMaxVersion, writeContext->zero)); setMaxVersion(child, std::max(childMaxVersion, writeContext->zero));
self = child; self = child;
writeContext->deferRelease(self3, self); writeContext->deferRelease(self3, child);
} }
bool needsDownsize(Node *n) { bool needsDownsize(Node *n) {
@@ -1926,8 +1849,7 @@ bool needsDownsize(Node *n) {
void downsize(Node3 *self, WriteContext *writeContext, void downsize(Node3 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
if (self->numChildren == 0) { if (self->numChildren == 0) {
auto *newSelf = writeContext->allocate<Node0>( auto *newSelf = writeContext->allocate<Node0>(self->partialKeyLen);
self->partialKeyLen, getMaxCapacity(0, 1, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self); newSelf->copyChildrenAndKeyFrom(*self);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self, newSelf); writeContext->deferRelease(self, newSelf);
@@ -1940,9 +1862,7 @@ void downsize(Node3 *self, WriteContext *writeContext,
void downsize(Node16 *self, WriteContext *writeContext, void downsize(Node16 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode16); assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode16);
auto *newSelf = writeContext->allocate<Node3>( auto *newSelf = writeContext->allocate<Node3>(self->partialKeyLen);
self->partialKeyLen,
getMaxCapacity(kMinChildrenNode16 - 1, 0, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self); newSelf->copyChildrenAndKeyFrom(*self);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self, newSelf); writeContext->deferRelease(self, newSelf);
@@ -1951,9 +1871,7 @@ void downsize(Node16 *self, WriteContext *writeContext,
void downsize(Node48 *self, WriteContext *writeContext, void downsize(Node48 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode48); assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode48);
auto *newSelf = writeContext->allocate<Node16>( auto *newSelf = writeContext->allocate<Node16>(self->partialKeyLen);
self->partialKeyLen,
getMaxCapacity(kMinChildrenNode48 - 1, 0, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self); newSelf->copyChildrenAndKeyFrom(*self);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self, newSelf); writeContext->deferRelease(self, newSelf);
@@ -1963,9 +1881,7 @@ void downsize(Node256 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode256); assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode256);
auto *self256 = (Node256 *)self; auto *self256 = (Node256 *)self;
auto *newSelf = writeContext->allocate<Node48>( auto *newSelf = writeContext->allocate<Node48>(self->partialKeyLen);
self->partialKeyLen,
getMaxCapacity(kMinChildrenNode256 - 1, 0, self->partialKeyLen));
newSelf->copyChildrenAndKeyFrom(*self256); newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
writeContext->deferRelease(self256, newSelf); writeContext->deferRelease(self256, newSelf);
@@ -2047,6 +1963,12 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
parent3->children[i] = parent3->children[i + 1]; parent3->children[i] = parent3->children[i + 1];
parent3->childMaxVersion[i] = parent3->childMaxVersion[i + 1]; parent3->childMaxVersion[i] = parent3->childMaxVersion[i + 1];
} }
VALGRIND_MAKE_MEM_UNDEFINED(parent3->index + parent3->numChildren,
sizeof(parent3->index[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent3->children + parent3->numChildren,
sizeof(parent3->children[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent3->childMaxVersion + parent3->numChildren,
sizeof(parent3->childMaxVersion[0]));
if (needsDownsize(parent3)) { if (needsDownsize(parent3)) {
downsize(parent3, writeContext, impl); downsize(parent3, writeContext, impl);
@@ -2062,6 +1984,13 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
parent16->children[i] = parent16->children[i + 1]; parent16->children[i] = parent16->children[i + 1];
parent16->childMaxVersion[i] = parent16->childMaxVersion[i + 1]; parent16->childMaxVersion[i] = parent16->childMaxVersion[i + 1];
} }
VALGRIND_MAKE_MEM_UNDEFINED(parent16->index + parent16->numChildren,
sizeof(parent16->index[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent16->children + parent16->numChildren,
sizeof(parent16->children[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent16->childMaxVersion +
parent16->numChildren,
sizeof(parent16->childMaxVersion[0]));
if (needsDownsize(parent16)) { if (needsDownsize(parent16)) {
downsize(parent16, writeContext, impl); downsize(parent16, writeContext, impl);
@@ -2090,6 +2019,8 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex; parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
} }
parent48->childMaxVersion[lastChildrenIndex] = writeContext->zero; parent48->childMaxVersion[lastChildrenIndex] = writeContext->zero;
VALGRIND_MAKE_MEM_UNDEFINED(parent48->children + lastChildrenIndex,
sizeof(parent48->children[0]));
if (needsDownsize(parent48)) { if (needsDownsize(parent48)) {
downsize(parent48, writeContext, impl); downsize(parent48, writeContext, impl);
@@ -3246,7 +3177,6 @@ PRESERVE_NONE void down_left_spine(Job *job, Context *context) {
} }
auto child = getFirstChild(n); auto child = getFirstChild(n);
job->n = child; job->n = child;
__builtin_prefetch(job->n);
job->continuation = downLeftSpineTable[child.getType()]; job->continuation = downLeftSpineTable[child.getType()];
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3277,7 +3207,6 @@ void begin(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
// The root never has a next sibling // The root never has a next sibling
@@ -3287,7 +3216,6 @@ void begin(Job *job, Context *context) {
} }
job->continuation = iterTable[taggedChild.getType()]; job->continuation = iterTable[taggedChild.getType()];
job->n = child; job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3312,7 +3240,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} }
job->continuation = downLeftSpineTable[s.getType()]; job->continuation = downLeftSpineTable[s.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
@@ -3342,7 +3269,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
auto c = getFirstChild(n); auto c = getFirstChild(n);
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3354,7 +3280,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
auto c = nextSibling(job->n); auto c = nextSibling(job->n);
@@ -3364,13 +3289,11 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} }
job->continuation = downLeftSpineTable[c->getType()]; job->continuation = downLeftSpineTable[c->getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
job->continuation = iterTable[taggedChild.getType()]; job->continuation = iterTable[taggedChild.getType()];
job->n = child; job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3399,7 +3322,6 @@ void begin(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
// The root never has a next sibling // The root never has a next sibling
@@ -3409,7 +3331,6 @@ void begin(Job *job, Context *context) {
} }
job->continuation = iterTable[taggedChild.getType()]; job->continuation = iterTable[taggedChild.getType()];
job->n = child; job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3434,7 +3355,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} }
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
@@ -3474,7 +3394,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
auto c = nextSibling(job->n); auto c = nextSibling(job->n);
@@ -3484,13 +3403,11 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} }
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
job->continuation = iterTable[taggedChild.getType()]; job->continuation = iterTable[taggedChild.getType()];
job->n = child; job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3566,7 +3483,6 @@ PRESERVE_NONE void begin(Job *job, Context *context) {
} }
job->continuation = commonPrefixIterTable[c.getType()]; job->continuation = commonPrefixIterTable[c.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3610,7 +3526,6 @@ template <class NodeT> void common_prefix_iter(Job *job, Context *context) {
} }
job->continuation = commonPrefixIterTable[c.getType()]; job->continuation = commonPrefixIterTable[c.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3659,7 +3574,6 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
auto c = nextSibling(job->n); auto c = nextSibling(job->n);
@@ -3674,7 +3588,6 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
job->n = child; job->n = child;
job->continuation = rightSideIterTable[c.getType()]; job->continuation = rightSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3698,7 +3611,6 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = leftSideDownLeftSpineTable[c.getType()]; job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
auto c = nextSibling(job->n); auto c = nextSibling(job->n);
@@ -3707,14 +3619,12 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
MUSTTAIL return done_left_side_iter(job, context); MUSTTAIL return done_left_side_iter(job, context);
} }
job->continuation = leftSideDownLeftSpineTable[c.getType()]; job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
job->n = child; job->n = child;
job->continuation = leftSideIterTable[c.getType()]; job->continuation = leftSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3755,7 +3665,6 @@ PRESERVE_NONE void left_side_iter(Job *job, Context *context) {
MUSTTAIL return done_left_side_iter(job, context); MUSTTAIL return done_left_side_iter(job, context);
} }
job->continuation = leftSideDownLeftSpineTable[c.getType()]; job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
@@ -3809,14 +3718,12 @@ PRESERVE_NONE void left_side_iter(Job *job, Context *context) {
MUSTTAIL return done_left_side_iter(job, context); MUSTTAIL return done_left_side_iter(job, context);
} }
job->continuation = leftSideDownLeftSpineTable[c.getType()]; job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
job->n = child; job->n = child;
job->continuation = leftSideIterTable[c.getType()]; job->continuation = leftSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3847,7 +3754,6 @@ PRESERVE_NONE void done_left_side_iter(Job *job, Context *context) {
job->n = child; job->n = child;
job->continuation = rightSideIterTable[c.getType()]; job->continuation = rightSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3866,7 +3772,6 @@ void left_side_down_left_spine(Job *job, Context *context) {
auto c = getFirstChild(n); auto c = getFirstChild(n);
job->n = c; job->n = c;
job->continuation = leftSideDownLeftSpineTable[c.getType()]; job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -3905,7 +3810,6 @@ PRESERVE_NONE void right_side_iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} }
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
@@ -3947,7 +3851,6 @@ PRESERVE_NONE void right_side_iter(Job *job, Context *context) {
if (c != nullptr) { if (c != nullptr) {
job->n = c; job->n = c;
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} else { } else {
auto c = nextSibling(job->n); auto c = nextSibling(job->n);
@@ -3957,14 +3860,12 @@ PRESERVE_NONE void right_side_iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} }
job->continuation = downLeftSpineTable[c.getType()]; job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
job->n = child; job->n = child;
job->continuation = rightSideIterTable[c.getType()]; job->continuation = rightSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -4133,8 +4034,10 @@ template <class NodeTFrom> struct PointIterTable {
}; };
static constexpr Continuation const *pointIterTable[] = { static constexpr Continuation const *pointIterTable[] = {
PointIterTable<Node0>::table, PointIterTable<Node3>::table, nullptr,
PointIterTable<Node16>::table, PointIterTable<Node48>::table, PointIterTable<Node3>::table,
PointIterTable<Node16>::table,
PointIterTable<Node48>::table,
PointIterTable<Node256>::table, PointIterTable<Node256>::table,
}; };
@@ -4184,7 +4087,6 @@ void pointIter(Job *job, Context *context) {
} }
job->continuation = PointIterTable<NodeTTo>::table[job->child.getType()]; job->continuation = PointIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -4203,8 +4105,10 @@ template <class NodeTFrom> struct PrefixIterTable {
}; };
static constexpr Continuation const *prefixIterTable[] = { static constexpr Continuation const *prefixIterTable[] = {
PrefixIterTable<Node0>::table, PrefixIterTable<Node3>::table, nullptr,
PrefixIterTable<Node16>::table, PrefixIterTable<Node48>::table, PrefixIterTable<Node3>::table,
PrefixIterTable<Node16>::table,
PrefixIterTable<Node48>::table,
PrefixIterTable<Node256>::table, PrefixIterTable<Node256>::table,
}; };
@@ -4216,8 +4120,10 @@ template <class NodeTFrom> struct BeginIterTable {
}; };
static constexpr Continuation const *beginIterTable[] = { static constexpr Continuation const *beginIterTable[] = {
BeginIterTable<Node0>::table, BeginIterTable<Node3>::table, nullptr,
BeginIterTable<Node16>::table, BeginIterTable<Node48>::table, BeginIterTable<Node3>::table,
BeginIterTable<Node16>::table,
BeginIterTable<Node48>::table,
BeginIterTable<Node256>::table, BeginIterTable<Node256>::table,
}; };
@@ -4229,8 +4135,10 @@ template <class NodeTFrom> struct EndIterTable {
}; };
static constexpr Continuation const *endIterTable[] = { static constexpr Continuation const *endIterTable[] = {
EndIterTable<Node0>::table, EndIterTable<Node3>::table, nullptr,
EndIterTable<Node16>::table, EndIterTable<Node48>::table, EndIterTable<Node3>::table,
EndIterTable<Node16>::table,
EndIterTable<Node48>::table,
EndIterTable<Node256>::table, EndIterTable<Node256>::table,
}; };
@@ -4278,7 +4186,6 @@ void prefixIter(Job *job, Context *context) {
goto gotoEndIter; goto gotoEndIter;
} else { } else {
job->continuation = BeginIterTable<NodeTTo>::table[job->child.getType()]; job->continuation = BeginIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
@@ -4290,7 +4197,6 @@ void prefixIter(Job *job, Context *context) {
} }
job->continuation = PrefixIterTable<NodeTTo>::table[job->child.getType()]; job->continuation = PrefixIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
noNodeOnSearchPath: { noNodeOnSearchPath: {
@@ -4317,7 +4223,6 @@ gotoEndIter:
MUSTTAIL return complete(job, context); MUSTTAIL return complete(job, context);
} else { } else {
job->continuation = EndIterTable<NodeTTo>::table[job->child.getType()]; job->continuation = EndIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
} }
@@ -4365,7 +4270,6 @@ void beginIter(Job *job, Context *context) {
} }
job->continuation = BeginIterTable<NodeTTo>::table[job->child.getType()]; job->continuation = BeginIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
gotoEndIter: gotoEndIter:
@@ -4432,7 +4336,6 @@ void endIter(Job *job, Context *context) {
} }
job->continuation = EndIterTable<NodeTTo>::table[job->child.getType()]; job->continuation = EndIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context); MUSTTAIL return keepGoing(job, context);
} }
@@ -5265,6 +5168,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
range_writes_total.add(writeContext.accum.range_writes); range_writes_total.add(writeContext.accum.range_writes);
nodes_allocated_total.add(writeContext.accum.nodes_allocated); nodes_allocated_total.add(writeContext.accum.nodes_allocated);
nodes_released_total.add(writeContext.accum.nodes_released); nodes_released_total.add(writeContext.accum.nodes_released);
nodes_resized_total.add(writeContext.accum.nodes_resized);
entries_inserted_total.add(writeContext.accum.entries_inserted); entries_inserted_total.add(writeContext.accum.entries_inserted);
entries_erased_total.add(writeContext.accum.entries_erased); entries_erased_total.add(writeContext.accum.entries_erased);
insert_iterations_total.add(writeContext.accum.insert_iterations); insert_iterations_total.add(writeContext.accum.insert_iterations);
@@ -5393,6 +5297,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
std::exchange(writeContext.accum.nodes_allocated, 0)); std::exchange(writeContext.accum.nodes_allocated, 0));
nodes_released_total.add( nodes_released_total.add(
std::exchange(writeContext.accum.nodes_released, 0)); std::exchange(writeContext.accum.nodes_released, 0));
nodes_resized_total.add(std::exchange(writeContext.accum.nodes_resized, 0));
entries_inserted_total.add( entries_inserted_total.add(
std::exchange(writeContext.accum.entries_inserted, 0)); std::exchange(writeContext.accum.entries_inserted, 0));
entries_erased_total.add( entries_erased_total.add(
@@ -5418,7 +5323,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
keyUpdates = 10; keyUpdates = 10;
// Insert "" // Insert ""
root = writeContext.allocate<Node0>(0, 0); root = writeContext.allocate<Node0>(0);
root->numChildren = 0; root->numChildren = 0;
root->parent = nullptr; root->parent = nullptr;
root->entryPresent = false; root->entryPresent = false;
@@ -5511,6 +5416,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
"The total number of physical tree nodes allocated"); "The total number of physical tree nodes allocated");
COUNTER(nodes_released_total, COUNTER(nodes_released_total,
"The total number of physical tree nodes released"); "The total number of physical tree nodes released");
COUNTER(nodes_resized_total,
"The total number of physical tree nodes that have been resized to "
"account for partial key capacity changes");
COUNTER(insert_iterations_total, COUNTER(insert_iterations_total,
"The total number of iterations of the main loop for insertion. " "The total number of iterations of the main loop for insertion. "
"Includes searches where the entry already existed, and so insertion " "Includes searches where the entry already existed, and so insertion "

View File

@@ -19,30 +19,30 @@ InstalledDir: /usr/lib/llvm-20/bin
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 169.16 | 5,911,582.44 | 0.0% | 3,014.03 | 855.12 | 3.525 | 504.59 | 0.0% | 2.02 | `point reads` | 175.88 | 5,685,575.81 | 0.0% | 3,014.03 | 883.13 | 3.413 | 504.59 | 0.0% | 2.10 | `point reads`
| 167.17 | 5,981,796.19 | 0.0% | 2,954.16 | 845.14 | 3.495 | 490.17 | 0.0% | 2.00 | `prefix reads` | 173.35 | 5,768,718.52 | 0.0% | 2,954.16 | 869.59 | 3.397 | 490.17 | 0.0% | 2.07 | `prefix reads`
| 250.44 | 3,992,954.35 | 0.1% | 3,592.41 | 1,265.18 | 2.839 | 629.31 | 0.0% | 2.99 | `range reads` | 251.74 | 3,972,274.68 | 0.1% | 3,592.41 | 1,265.71 | 2.838 | 629.31 | 0.0% | 3.01 | `range reads`
| 467.10 | 2,140,846.36 | 0.0% | 4,450.57 | 2,488.36 | 1.789 | 707.92 | 2.1% | 5.62 | `point writes` | 472.13 | 2,118,079.83 | 0.2% | 4,450.57 | 2,377.84 | 1.872 | 707.92 | 2.3% | 5.60 | `point writes`
| 465.18 | 2,149,723.11 | 0.2% | 4,410.22 | 2,474.92 | 1.782 | 694.74 | 2.1% | 5.55 | `prefix writes` | 471.34 | 2,121,609.91 | 0.0% | 4,410.22 | 2,372.55 | 1.859 | 694.74 | 2.3% | 5.61 | `prefix writes`
| 297.45 | 3,361,954.05 | 0.1% | 2,315.38 | 1,581.64 | 1.464 | 396.69 | 3.3% | 3.57 | `range writes` | 306.69 | 3,260,650.68 | 0.0% | 2,315.38 | 1,550.33 | 1.493 | 396.69 | 3.3% | 3.68 | `range writes`
| 476.56 | 2,098,370.82 | 1.0% | 6,999.33 | 2,492.26 | 2.808 | 1,251.74 | 1.3% | 0.06 | `monotonic increasing point writes` | 502.70 | 1,989,277.15 | 0.9% | 6,999.33 | 2,527.47 | 2.769 | 1,251.74 | 1.3% | 0.06 | `monotonic increasing point writes`
| 129,455.00 | 7,724.69 | 1.0% | 807,446.67 | 698,559.40 | 1.156 | 144,584.60 | 0.8% | 0.01 | `worst case for radix tree` | 138,097.67 | 7,241.25 | 0.7% | 807,445.67 | 699,899.00 | 1.154 | 144,584.50 | 0.8% | 0.01 | `worst case for radix tree`
| 44.67 | 22,384,996.63 | 0.5% | 902.00 | 235.18 | 3.835 | 132.00 | 0.0% | 0.01 | `create and destroy` | 46.29 | 21,605,126.00 | 1.0% | 902.00 | 230.73 | 3.909 | 132.00 | 0.0% | 0.01 | `create and destroy`
## Radix tree (this implementation) ## Radix tree (this implementation)
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 14.11 | 70,857,435.19 | 0.1% | 247.13 | 71.03 | 3.479 | 32.64 | 0.8% | 0.17 | `point reads` | 14.04 | 71,216,855.01 | 0.1% | 245.98 | 70.38 | 3.495 | 31.49 | 0.6% | 0.17 | `point reads`
| 15.63 | 63,997,306.79 | 0.0% | 299.99 | 78.59 | 3.817 | 42.50 | 0.4% | 0.19 | `prefix reads` | 14.57 | 68,630,592.89 | 0.1% | 297.83 | 72.93 | 4.084 | 40.34 | 0.5% | 0.17 | `prefix reads`
| 36.24 | 27,590,266.59 | 0.1% | 782.70 | 182.21 | 4.296 | 106.65 | 0.2% | 0.43 | `range reads` | 35.28 | 28,347,588.39 | 0.2% | 783.70 | 176.57 | 4.438 | 107.65 | 0.2% | 0.42 | `range reads`
| 22.72 | 44,004,627.40 | 0.1% | 376.04 | 114.33 | 3.289 | 49.97 | 0.8% | 0.27 | `point writes` | 20.64 | 48,453,280.08 | 0.0% | 378.97 | 103.31 | 3.668 | 49.92 | 0.7% | 0.25 | `point writes`
| 40.83 | 24,494,110.04 | 0.0% | 666.07 | 205.35 | 3.244 | 101.33 | 0.3% | 0.49 | `prefix writes` | 39.14 | 25,551,096.17 | 0.0% | 672.00 | 196.08 | 3.427 | 101.28 | 0.3% | 0.47 | `prefix writes`
| 43.45 | 23,016,324.00 | 0.0% | 732.33 | 218.41 | 3.353 | 111.64 | 0.1% | 0.53 | `range writes` | 39.88 | 25,075,121.85 | 0.0% | 738.26 | 199.71 | 3.697 | 111.59 | 0.1% | 0.49 | `range writes`
| 81.46 | 12,276,650.63 | 3.6% | 1,458.85 | 411.52 | 3.545 | 280.42 | 0.1% | 0.01 | `monotonic increasing point writes` | 79.36 | 12,601,312.65 | 0.5% | 1,447.65 | 398.77 | 3.630 | 275.61 | 0.1% | 0.01 | `monotonic increasing point writes`
| 314,217.00 | 3,182.51 | 1.2% | 4,043,063.50 | 1,593,715.00 | 2.537 | 714,828.00 | 0.1% | 0.01 | `worst case for radix tree` | 307,037.33 | 3,256.93 | 0.3% | 4,043,060.50 | 1,549,199.33 | 2.610 | 714,828.00 | 0.1% | 0.01 | `worst case for radix tree`
| 106.79 | 9,364,602.60 | 0.5% | 2,046.00 | 539.75 | 3.791 | 329.00 | 0.0% | 0.01 | `create and destroy` | 97.00 | 10,309,246.47 | 0.2% | 1,986.00 | 487.06 | 4.078 | 315.00 | 0.0% | 0.01 | `create and destroy`
# "Real data" test # "Real data" test
@@ -51,13 +51,13 @@ Point queries only. Gc ratio is the ratio of time spent doing garbage collection
## skip list ## skip list
``` ```
Check: 4.62434 seconds, 364.633 MB/s, Add: 3.90399 seconds, 147.371 MB/s, Gc ratio: 33.6898%, Peak idle memory: 5.61007e+06 Check: 4.66163 seconds, 361.716 MB/s, Add: 3.97534 seconds, 144.726 MB/s, Gc ratio: 33.3998%, Peak idle memory: 5.61007e+06
``` ```
## radix tree ## radix tree
``` ```
Check: 0.956689 seconds, 1762.52 MB/s, Add: 1.35744 seconds, 423.84 MB/s, Gc ratio: 35.0946%, Peak idle memory: 2.32922e+06 Check: 1.01776 seconds, 1656.77 MB/s, Add: 1.2718 seconds, 452.38 MB/s, Gc ratio: 38.626%, Peak idle memory: 2.05667e+06
``` ```
## hash table ## hash table
@@ -65,6 +65,6 @@ Check: 0.956689 seconds, 1762.52 MB/s, Add: 1.35744 seconds, 423.84 MB/s, Gc rat
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be) (The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
``` ```
Check: 0.799863 seconds, 2108.09 MB/s, Add: 0.667736 seconds, 861.621 MB/s, Gc ratio: 35.0666%, Peak idle memory: 0 Check: 0.859188 seconds, 1962.54 MB/s, Add: 0.714174 seconds, 805.596 MB/s, Gc ratio: 34.9552%, Peak idle memory: 0
``` ```