7 Commits

Author SHA1 Message Date
bcbae026b2 Update README
Some checks failed
Tests / 64 bit versions total: 8220, passed: 8220
Tests / Debug total: 8218, failed: 28, passed: 8190
Tests / SIMD fallback total: 8220, passed: 8220
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-11-20 14:53:57 -08:00
e125b599b5 Remove freeList, min/max capacity tracking
The freelist doesn't seem to get a good hit rate. Policies other than
capacity = minCapacity did not improve the rate we were resizing nodes,
but did increase memory usage, so get rid of that too. Add a
nodes_resized_total counter.
2024-11-20 14:45:56 -08:00
3f4d3b685a More valgrind annotations 2024-11-20 13:36:30 -08:00
4198b8b090 Some prep for leafs-in-parents 2024-11-20 12:20:11 -08:00
8757d2387c Call prefetch within TaggedNodePointer::getType
Instead of at every call site
2024-11-20 12:07:32 -08:00
4a22b95d53 Remove state machine transitions "from" Node0
Those aren't used
2024-11-20 11:46:45 -08:00
03d6c7e471 Allocate maxCapacity instead of minCapacity 2024-11-19 16:13:57 -08:00
2 changed files with 119 additions and 211 deletions

View File

@@ -219,6 +219,8 @@ template <class T> struct NodeAllocator;
struct TaggedNodePointer {
TaggedNodePointer() = default;
operator struct Node *() { return (struct Node *)withoutType(); }
// In preparation for when we might store leafs directly in parents
struct Node *asNodeUnsafe() { return (struct Node *)withoutType(); }
operator struct Node0 *() {
assert(getType() == Type_Node0);
return (struct Node0 *)withoutType();
@@ -262,6 +264,11 @@ struct TaggedNodePointer {
TaggedNodePointer &operator=(const TaggedNodePointer &) = default;
/*implicit*/ TaggedNodePointer(Node *n);
void prefetch() {
// __builtin_prefetch is ok even if argument isn't addressable
__builtin_prefetch((void *)withoutType());
}
private:
TaggedNodePointer(struct Node *p, Type t) : p((uintptr_t)p) {
assert((this->p & 7) == 0);
@@ -313,6 +320,7 @@ TaggedNodePointer::TaggedNodePointer(Node *n)
Type TaggedNodePointer::getType() {
assert(p != 0);
prefetch();
return Type(p & uintptr_t(7));
}
@@ -694,27 +702,13 @@ constexpr int getMaxCapacity(Node *self) {
self->partialKeyLen);
}
#ifdef __APPLE__
// Disabling the free list altogether is faster on my mac m1
constexpr int64_t kMaxFreeListBytes = 0;
#else
constexpr int64_t kMaxFreeListBytes = 1 << 20;
#endif
// Maintains a free list up to kMaxFreeListBytes. If the top element of the list
// doesn't meet the capacity constraints, it's freed and a new node is allocated
// with the minimum capacity. The hope is that "unfit" nodes don't get stuck in
// the free list.
template <class T> struct NodeAllocator {
static_assert(std::derived_from<T, Node>);
static_assert(std::is_trivial_v<T>);
T *allocate(int minCapacity, int maxCapacity) {
assert(minCapacity <= maxCapacity);
assert(freeListSize >= 0);
assert(freeListSize <= kMaxFreeListBytes);
T *result = allocate_helper(minCapacity, maxCapacity);
T *allocate(int capacity) {
T *result = allocate_helper(capacity);
result->endOfRange = false;
result->releaseDeferred = false;
if constexpr (!std::is_same_v<T, Node0>) {
@@ -734,47 +728,19 @@ template <class T> struct NodeAllocator {
}
void release(T *p) {
assume(p->partialKeyCapacity >= 0);
assume(freeListSize >= 0);
if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) {
removeNode(p);
return safe_free(p, sizeof(T) + p->partialKeyCapacity);
}
p->parent = freeList;
freeList = p;
freeListSize += sizeof(T) + p->partialKeyCapacity;
VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T) + p->partialKeyCapacity);
removeNode(p);
return safe_free(p, sizeof(T) + p->partialKeyCapacity);
}
void deferRelease(T *p, Node *forwardTo) {
p->releaseDeferred = true;
p->forwardTo = forwardTo;
if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) {
p->parent = deferredListOverflow;
deferredListOverflow = p;
} else {
if (deferredList == nullptr) {
deferredListFront = p;
}
p->parent = deferredList;
deferredList = p;
freeListSize += sizeof(T) + p->partialKeyCapacity;
}
p->parent = deferredList;
deferredList = p;
}
void releaseDeferred() {
if (deferredList != nullptr) {
deferredListFront->parent = freeList;
#ifndef NVALGRIND
for (auto *iter = deferredList; iter != freeList;) {
auto *tmp = iter;
iter = (T *)iter->parent;
VALGRIND_MAKE_MEM_NOACCESS(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
#endif
freeList = std::exchange(deferredList, nullptr);
}
for (T *n = std::exchange(deferredListOverflow, nullptr); n != nullptr;) {
for (T *n = std::exchange(deferredList, nullptr); n != nullptr;) {
auto *tmp = n;
n = (T *)n->parent;
release(tmp);
@@ -788,53 +754,15 @@ template <class T> struct NodeAllocator {
NodeAllocator(NodeAllocator &&) = delete;
NodeAllocator &operator=(NodeAllocator &&) = delete;
~NodeAllocator() {
assert(deferredList == nullptr);
assert(deferredListOverflow == nullptr);
for (T *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(T));
auto *tmp = iter;
iter = (T *)iter->parent;
removeNode(tmp);
safe_free(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
}
~NodeAllocator() { assert(deferredList == nullptr); }
private:
int64_t freeListSize = 0;
T *freeList = nullptr;
T *deferredList = nullptr;
// Used to concatenate deferredList to freeList
T *deferredListFront;
T *deferredListOverflow = nullptr;
T *allocate_helper(int minCapacity, int maxCapacity) {
if (freeList != nullptr) {
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(T));
freeListSize -= sizeof(T) + freeList->partialKeyCapacity;
assume(freeList->partialKeyCapacity >= 0);
assume(minCapacity >= 0);
assume(minCapacity <= maxCapacity);
if (freeList->partialKeyCapacity >= minCapacity &&
freeList->partialKeyCapacity <= maxCapacity) {
auto *result = freeList;
freeList = (T *)freeList->parent;
VALGRIND_MAKE_MEM_UNDEFINED(result,
sizeof(T) + result->partialKeyCapacity);
VALGRIND_MAKE_MEM_DEFINED(&result->partialKeyCapacity,
sizeof(result->partialKeyCapacity));
VALGRIND_MAKE_MEM_DEFINED(&result->type, sizeof(result->type));
return result;
} else {
auto *p = freeList;
freeList = (T *)p->parent;
removeNode(p);
safe_free(p, sizeof(T) + p->partialKeyCapacity);
}
}
auto *result = (T *)safe_malloc(sizeof(T) + minCapacity);
T *allocate_helper(int capacity) {
auto *result = (T *)safe_malloc(sizeof(T) + capacity);
result->type = T::kType;
result->partialKeyCapacity = minCapacity;
result->partialKeyCapacity = capacity;
addNode(result);
return result;
}
@@ -890,6 +818,7 @@ struct WriteContext {
int64_t point_writes;
int64_t range_writes;
int64_t write_bytes;
int64_t nodes_resized;
} accum;
#if USE_64_BIT
@@ -902,19 +831,19 @@ struct WriteContext {
WriteContext() { memset(&accum, 0, sizeof(accum)); }
template <class T> T *allocate(int minCapacity, int maxCapacity) {
template <class T> T *allocate(int capacity) {
static_assert(!std::is_same_v<T, Node>);
++accum.nodes_allocated;
if constexpr (std::is_same_v<T, Node0>) {
return node0.allocate(minCapacity, maxCapacity);
return node0.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node3>) {
return node3.allocate(minCapacity, maxCapacity);
return node3.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node16>) {
return node16.allocate(minCapacity, maxCapacity);
return node16.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node48>) {
return node48.allocate(minCapacity, maxCapacity);
return node48.allocate(capacity);
} else if constexpr (std::is_same_v<T, Node256>) {
return node256.allocate(minCapacity, maxCapacity);
return node256.allocate(capacity);
}
}
template <class T> void release(T *c) {
@@ -1415,12 +1344,12 @@ TaggedNodePointer getChildGeq(Node *self, int child) {
TaggedNodePointer getFirstChild(Node0 *) { return nullptr; }
TaggedNodePointer getFirstChild(Node3 *self) {
// Improves scan performance
__builtin_prefetch(self->children[1]);
self->children[1].prefetch();
return self->children[0];
}
TaggedNodePointer getFirstChild(Node16 *self) {
// Improves scan performance
__builtin_prefetch(self->children[1]);
self->children[1].prefetch();
return self->children[0];
}
TaggedNodePointer getFirstChild(Node48 *self) {
@@ -1463,14 +1392,14 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
int partialKeyIndex =
longestCommonPrefix(self->partialKey(), key.data(), commonLen);
if (partialKeyIndex < self->partialKeyLen) {
Node *old = self;
// Safety: self has a partial key so it can't be a leaf
Node *old = self.asNodeUnsafe();
// Since root cannot have a partial key
assert(old->parent != nullptr);
InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion);
// *self will have one child (old)
auto *newSelf = writeContext->allocate<Node3>(
partialKeyIndex, getMaxCapacity(1, 0, partialKeyIndex));
auto *newSelf = writeContext->allocate<Node3>(partialKeyIndex);
newSelf->parent = old->parent;
newSelf->parentsIndex = old->parentsIndex;
@@ -1491,6 +1420,8 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
memmove(old->partialKey(), old->partialKey() + partialKeyIndex + 1,
old->partialKeyLen - (partialKeyIndex + 1));
old->partialKeyLen -= partialKeyIndex + 1;
VALGRIND_MAKE_MEM_UNDEFINED(old->partialKey() + old->partialKeyLen,
partialKeyIndex + 1);
// Maintain memory capacity invariant
maybeDecreaseCapacity(old, writeContext, impl);
@@ -1572,8 +1503,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
auto *newChild = writeContext->allocate<Node0>(
key.size(), getMaxCapacity(0, 1, key.size()));
auto *newChild = writeContext->allocate<Node0>(key.size());
newChild->numChildren = 0;
newChild->entryPresent = false; // Will be set to true by the caller
newChild->partialKeyLen = key.size();
@@ -1585,8 +1515,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
case Type_Node0: {
auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = writeContext->allocate<Node3>(
self->partialKeyLen, getMaxCapacity(1, 1, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node3>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self0);
writeContext->deferRelease(self0, newSelf);
self = newSelf;
@@ -1596,9 +1525,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
case Type_Node3: {
if (self->numChildren == Node3::kMaxNodes) {
auto *self3 = static_cast<Node3 *>(self);
auto *newSelf = writeContext->allocate<Node16>(
self->partialKeyLen,
getMaxCapacity(4, self->entryPresent, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node16>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self3);
writeContext->deferRelease(self3, newSelf);
self = newSelf;
@@ -1621,15 +1548,14 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self3->childMaxVersion[i + 1] = newMaxVersion;
result = newChild;
++self->numChildren;
newChild->parent = self;
// Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result;
}
case Type_Node16: {
if (self->numChildren == Node16::kMaxNodes) {
auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = writeContext->allocate<Node48>(
self->partialKeyLen,
getMaxCapacity(17, self->entryPresent, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node48>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self16);
writeContext->deferRelease(self16, newSelf);
self = newSelf;
@@ -1653,16 +1579,15 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self16->childMaxVersion[i + 1] = newMaxVersion;
result = newChild;
++self->numChildren;
newChild->parent = self;
// Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result;
}
case Type_Node48: {
if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = writeContext->allocate<Node256>(
self->partialKeyLen,
getMaxCapacity(49, self->entryPresent, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node256>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self48);
writeContext->deferRelease(self48, newSelf);
self = newSelf;
@@ -1680,7 +1605,8 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self48->maxOfMax[nextFree >> Node48::kMaxOfMaxShift] = std::max(
newMaxVersion, self48->maxOfMax[nextFree >> Node48::kMaxOfMaxShift]);
result = newChild;
newChild->parent = self;
// Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result;
}
case Type_Node256: {
@@ -1694,7 +1620,8 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
self256->maxOfMax[index >> Node256::kMaxOfMaxShift] = std::max(
newMaxVersion, self256->maxOfMax[index >> Node256::kMaxOfMaxShift]);
result = newChild;
newChild->parent = self;
// Safety: self has a child so it can't be a leaf
newChild->parent = self.asNodeUnsafe();
return result;
}
default: // GCOVR_EXCL_LINE
@@ -1744,13 +1671,13 @@ downLeftSpine:
return node;
}
void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
WriteContext *writeContext,
ConflictSet::Impl *impl) {
void freeAndMakeCapacity(Node *&self, int capacity, WriteContext *writeContext,
ConflictSet::Impl *impl) {
++writeContext->accum.nodes_resized;
switch (self->getType()) {
case Type_Node0: {
auto *self0 = (Node0 *)self;
auto *newSelf = writeContext->allocate<Node0>(minCapacity, maxCapacity);
auto *newSelf = writeContext->allocate<Node0>(capacity);
newSelf->copyChildrenAndKeyFrom(*self0);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self0, newSelf);
@@ -1758,7 +1685,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break;
case Type_Node3: {
auto *self3 = (Node3 *)self;
auto *newSelf = writeContext->allocate<Node3>(minCapacity, maxCapacity);
auto *newSelf = writeContext->allocate<Node3>(capacity);
newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self3, newSelf);
@@ -1766,7 +1693,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break;
case Type_Node16: {
auto *self16 = (Node16 *)self;
auto *newSelf = writeContext->allocate<Node16>(minCapacity, maxCapacity);
auto *newSelf = writeContext->allocate<Node16>(capacity);
newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self16, newSelf);
@@ -1774,7 +1701,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break;
case Type_Node48: {
auto *self48 = (Node48 *)self;
auto *newSelf = writeContext->allocate<Node48>(minCapacity, maxCapacity);
auto *newSelf = writeContext->allocate<Node48>(capacity);
newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self48, newSelf);
@@ -1782,7 +1709,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
} break;
case Type_Node256: {
auto *self256 = (Node256 *)self;
auto *newSelf = writeContext->allocate<Node256>(minCapacity, maxCapacity);
auto *newSelf = writeContext->allocate<Node256>(capacity);
newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self256, newSelf);
@@ -1807,8 +1734,7 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext,
if (self->getCapacity() <= maxCapacity) {
return;
}
freeAndMakeCapacityBetween(self, self->partialKeyLen, maxCapacity,
writeContext, impl);
freeAndMakeCapacity(self, self->partialKeyLen, writeContext, impl);
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
@@ -1882,12 +1808,9 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
assert(!self3->entryPresent);
Node *child = self3->children[0];
const int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
const int maxCapacity =
getMaxCapacity(child->numChildren, child->entryPresent, minCapacity);
if (minCapacity > child->getCapacity()) {
freeAndMakeCapacityBetween(child, minCapacity, maxCapacity, writeContext,
impl);
freeAndMakeCapacity(child, minCapacity, writeContext, impl);
}
// Merge partial key with child
@@ -1914,7 +1837,7 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
setMaxVersion(child, std::max(childMaxVersion, writeContext->zero));
self = child;
writeContext->deferRelease(self3, self);
writeContext->deferRelease(self3, child);
}
bool needsDownsize(Node *n) {
@@ -1926,8 +1849,7 @@ bool needsDownsize(Node *n) {
void downsize(Node3 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) {
if (self->numChildren == 0) {
auto *newSelf = writeContext->allocate<Node0>(
self->partialKeyLen, getMaxCapacity(0, 1, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node0>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self, newSelf);
@@ -1940,9 +1862,7 @@ void downsize(Node3 *self, WriteContext *writeContext,
void downsize(Node16 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) {
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode16);
auto *newSelf = writeContext->allocate<Node3>(
self->partialKeyLen,
getMaxCapacity(kMinChildrenNode16 - 1, 0, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node3>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self, newSelf);
@@ -1951,9 +1871,7 @@ void downsize(Node16 *self, WriteContext *writeContext,
void downsize(Node48 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) {
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode48);
auto *newSelf = writeContext->allocate<Node16>(
self->partialKeyLen,
getMaxCapacity(kMinChildrenNode48 - 1, 0, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node16>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self, newSelf);
@@ -1963,9 +1881,7 @@ void downsize(Node256 *self, WriteContext *writeContext,
ConflictSet::Impl *impl) {
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode256);
auto *self256 = (Node256 *)self;
auto *newSelf = writeContext->allocate<Node48>(
self->partialKeyLen,
getMaxCapacity(kMinChildrenNode256 - 1, 0, self->partialKeyLen));
auto *newSelf = writeContext->allocate<Node48>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf;
writeContext->deferRelease(self256, newSelf);
@@ -2047,6 +1963,12 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
parent3->children[i] = parent3->children[i + 1];
parent3->childMaxVersion[i] = parent3->childMaxVersion[i + 1];
}
VALGRIND_MAKE_MEM_UNDEFINED(parent3->index + parent3->numChildren,
sizeof(parent3->index[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent3->children + parent3->numChildren,
sizeof(parent3->children[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent3->childMaxVersion + parent3->numChildren,
sizeof(parent3->childMaxVersion[0]));
if (needsDownsize(parent3)) {
downsize(parent3, writeContext, impl);
@@ -2062,6 +1984,13 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
parent16->children[i] = parent16->children[i + 1];
parent16->childMaxVersion[i] = parent16->childMaxVersion[i + 1];
}
VALGRIND_MAKE_MEM_UNDEFINED(parent16->index + parent16->numChildren,
sizeof(parent16->index[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent16->children + parent16->numChildren,
sizeof(parent16->children[0]));
VALGRIND_MAKE_MEM_UNDEFINED(parent16->childMaxVersion +
parent16->numChildren,
sizeof(parent16->childMaxVersion[0]));
if (needsDownsize(parent16)) {
downsize(parent16, writeContext, impl);
@@ -2090,6 +2019,8 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
}
parent48->childMaxVersion[lastChildrenIndex] = writeContext->zero;
VALGRIND_MAKE_MEM_UNDEFINED(parent48->children + lastChildrenIndex,
sizeof(parent48->children[0]));
if (needsDownsize(parent48)) {
downsize(parent48, writeContext, impl);
@@ -3246,7 +3177,6 @@ PRESERVE_NONE void down_left_spine(Job *job, Context *context) {
}
auto child = getFirstChild(n);
job->n = child;
__builtin_prefetch(job->n);
job->continuation = downLeftSpineTable[child.getType()];
MUSTTAIL return keepGoing(job, context);
}
@@ -3277,7 +3207,6 @@ void begin(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
// The root never has a next sibling
@@ -3287,7 +3216,6 @@ void begin(Job *job, Context *context) {
}
job->continuation = iterTable[taggedChild.getType()];
job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context);
}
@@ -3312,7 +3240,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context);
}
job->continuation = downLeftSpineTable[s.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
@@ -3342,7 +3269,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
auto c = getFirstChild(n);
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -3354,7 +3280,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
auto c = nextSibling(job->n);
@@ -3364,13 +3289,11 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context);
}
job->continuation = downLeftSpineTable[c->getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
job->continuation = iterTable[taggedChild.getType()];
job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context);
}
@@ -3399,7 +3322,6 @@ void begin(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
// The root never has a next sibling
@@ -3409,7 +3331,6 @@ void begin(Job *job, Context *context) {
}
job->continuation = iterTable[taggedChild.getType()];
job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context);
}
@@ -3434,7 +3355,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context);
}
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
@@ -3474,7 +3394,6 @@ template <class NodeT> void iter(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
auto c = nextSibling(job->n);
@@ -3484,13 +3403,11 @@ template <class NodeT> void iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context);
}
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
job->continuation = iterTable[taggedChild.getType()];
job->n = child;
__builtin_prefetch(child);
MUSTTAIL return keepGoing(job, context);
}
@@ -3566,7 +3483,6 @@ PRESERVE_NONE void begin(Job *job, Context *context) {
}
job->continuation = commonPrefixIterTable[c.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
}
@@ -3610,7 +3526,6 @@ template <class NodeT> void common_prefix_iter(Job *job, Context *context) {
}
job->continuation = commonPrefixIterTable[c.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
}
@@ -3659,7 +3574,6 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
auto c = nextSibling(job->n);
@@ -3674,7 +3588,6 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
job->n = child;
job->continuation = rightSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -3698,7 +3611,6 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
auto c = nextSibling(job->n);
@@ -3707,14 +3619,12 @@ PRESERVE_NONE void done_common_prefix_iter(Job *job, Context *context) {
MUSTTAIL return done_left_side_iter(job, context);
}
job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
job->n = child;
job->continuation = leftSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -3755,7 +3665,6 @@ PRESERVE_NONE void left_side_iter(Job *job, Context *context) {
MUSTTAIL return done_left_side_iter(job, context);
}
job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
@@ -3809,14 +3718,12 @@ PRESERVE_NONE void left_side_iter(Job *job, Context *context) {
MUSTTAIL return done_left_side_iter(job, context);
}
job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
job->n = child;
job->continuation = leftSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -3847,7 +3754,6 @@ PRESERVE_NONE void done_left_side_iter(Job *job, Context *context) {
job->n = child;
job->continuation = rightSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -3866,7 +3772,6 @@ void left_side_down_left_spine(Job *job, Context *context) {
auto c = getFirstChild(n);
job->n = c;
job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -3905,7 +3810,6 @@ PRESERVE_NONE void right_side_iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context);
}
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
@@ -3947,7 +3851,6 @@ PRESERVE_NONE void right_side_iter(Job *job, Context *context) {
if (c != nullptr) {
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
} else {
auto c = nextSibling(job->n);
@@ -3957,14 +3860,12 @@ PRESERVE_NONE void right_side_iter(Job *job, Context *context) {
MUSTTAIL return complete(job, context);
}
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
}
job->n = child;
job->continuation = rightSideIterTable[c.getType()];
__builtin_prefetch(job->n);
MUSTTAIL return keepGoing(job, context);
}
@@ -4133,8 +4034,10 @@ template <class NodeTFrom> struct PointIterTable {
};
static constexpr Continuation const *pointIterTable[] = {
PointIterTable<Node0>::table, PointIterTable<Node3>::table,
PointIterTable<Node16>::table, PointIterTable<Node48>::table,
nullptr,
PointIterTable<Node3>::table,
PointIterTable<Node16>::table,
PointIterTable<Node48>::table,
PointIterTable<Node256>::table,
};
@@ -4184,7 +4087,6 @@ void pointIter(Job *job, Context *context) {
}
job->continuation = PointIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
}
@@ -4203,8 +4105,10 @@ template <class NodeTFrom> struct PrefixIterTable {
};
static constexpr Continuation const *prefixIterTable[] = {
PrefixIterTable<Node0>::table, PrefixIterTable<Node3>::table,
PrefixIterTable<Node16>::table, PrefixIterTable<Node48>::table,
nullptr,
PrefixIterTable<Node3>::table,
PrefixIterTable<Node16>::table,
PrefixIterTable<Node48>::table,
PrefixIterTable<Node256>::table,
};
@@ -4216,8 +4120,10 @@ template <class NodeTFrom> struct BeginIterTable {
};
static constexpr Continuation const *beginIterTable[] = {
BeginIterTable<Node0>::table, BeginIterTable<Node3>::table,
BeginIterTable<Node16>::table, BeginIterTable<Node48>::table,
nullptr,
BeginIterTable<Node3>::table,
BeginIterTable<Node16>::table,
BeginIterTable<Node48>::table,
BeginIterTable<Node256>::table,
};
@@ -4229,8 +4135,10 @@ template <class NodeTFrom> struct EndIterTable {
};
static constexpr Continuation const *endIterTable[] = {
EndIterTable<Node0>::table, EndIterTable<Node3>::table,
EndIterTable<Node16>::table, EndIterTable<Node48>::table,
nullptr,
EndIterTable<Node3>::table,
EndIterTable<Node16>::table,
EndIterTable<Node48>::table,
EndIterTable<Node256>::table,
};
@@ -4278,7 +4186,6 @@ void prefixIter(Job *job, Context *context) {
goto gotoEndIter;
} else {
job->continuation = BeginIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
}
}
@@ -4290,7 +4197,6 @@ void prefixIter(Job *job, Context *context) {
}
job->continuation = PrefixIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
noNodeOnSearchPath: {
@@ -4317,7 +4223,6 @@ gotoEndIter:
MUSTTAIL return complete(job, context);
} else {
job->continuation = EndIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
}
}
@@ -4365,7 +4270,6 @@ void beginIter(Job *job, Context *context) {
}
job->continuation = BeginIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
gotoEndIter:
@@ -4432,7 +4336,6 @@ void endIter(Job *job, Context *context) {
}
job->continuation = EndIterTable<NodeTTo>::table[job->child.getType()];
__builtin_prefetch(job->child);
MUSTTAIL return keepGoing(job, context);
}
@@ -5265,6 +5168,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
range_writes_total.add(writeContext.accum.range_writes);
nodes_allocated_total.add(writeContext.accum.nodes_allocated);
nodes_released_total.add(writeContext.accum.nodes_released);
nodes_resized_total.add(writeContext.accum.nodes_resized);
entries_inserted_total.add(writeContext.accum.entries_inserted);
entries_erased_total.add(writeContext.accum.entries_erased);
insert_iterations_total.add(writeContext.accum.insert_iterations);
@@ -5393,6 +5297,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
std::exchange(writeContext.accum.nodes_allocated, 0));
nodes_released_total.add(
std::exchange(writeContext.accum.nodes_released, 0));
nodes_resized_total.add(std::exchange(writeContext.accum.nodes_resized, 0));
entries_inserted_total.add(
std::exchange(writeContext.accum.entries_inserted, 0));
entries_erased_total.add(
@@ -5418,7 +5323,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
keyUpdates = 10;
// Insert ""
root = writeContext.allocate<Node0>(0, 0);
root = writeContext.allocate<Node0>(0);
root->numChildren = 0;
root->parent = nullptr;
root->entryPresent = false;
@@ -5511,6 +5416,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
"The total number of physical tree nodes allocated");
COUNTER(nodes_released_total,
"The total number of physical tree nodes released");
COUNTER(nodes_resized_total,
"The total number of physical tree nodes that have been resized to "
"account for partial key capacity changes");
COUNTER(insert_iterations_total,
"The total number of iterations of the main loop for insertion. "
"Includes searches where the entry already existed, and so insertion "

View File

@@ -19,30 +19,30 @@ InstalledDir: /usr/lib/llvm-20/bin
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 169.16 | 5,911,582.44 | 0.0% | 3,014.03 | 855.12 | 3.525 | 504.59 | 0.0% | 2.02 | `point reads`
| 167.17 | 5,981,796.19 | 0.0% | 2,954.16 | 845.14 | 3.495 | 490.17 | 0.0% | 2.00 | `prefix reads`
| 250.44 | 3,992,954.35 | 0.1% | 3,592.41 | 1,265.18 | 2.839 | 629.31 | 0.0% | 2.99 | `range reads`
| 467.10 | 2,140,846.36 | 0.0% | 4,450.57 | 2,488.36 | 1.789 | 707.92 | 2.1% | 5.62 | `point writes`
| 465.18 | 2,149,723.11 | 0.2% | 4,410.22 | 2,474.92 | 1.782 | 694.74 | 2.1% | 5.55 | `prefix writes`
| 297.45 | 3,361,954.05 | 0.1% | 2,315.38 | 1,581.64 | 1.464 | 396.69 | 3.3% | 3.57 | `range writes`
| 476.56 | 2,098,370.82 | 1.0% | 6,999.33 | 2,492.26 | 2.808 | 1,251.74 | 1.3% | 0.06 | `monotonic increasing point writes`
| 129,455.00 | 7,724.69 | 1.0% | 807,446.67 | 698,559.40 | 1.156 | 144,584.60 | 0.8% | 0.01 | `worst case for radix tree`
| 44.67 | 22,384,996.63 | 0.5% | 902.00 | 235.18 | 3.835 | 132.00 | 0.0% | 0.01 | `create and destroy`
| 175.88 | 5,685,575.81 | 0.0% | 3,014.03 | 883.13 | 3.413 | 504.59 | 0.0% | 2.10 | `point reads`
| 173.35 | 5,768,718.52 | 0.0% | 2,954.16 | 869.59 | 3.397 | 490.17 | 0.0% | 2.07 | `prefix reads`
| 251.74 | 3,972,274.68 | 0.1% | 3,592.41 | 1,265.71 | 2.838 | 629.31 | 0.0% | 3.01 | `range reads`
| 472.13 | 2,118,079.83 | 0.2% | 4,450.57 | 2,377.84 | 1.872 | 707.92 | 2.3% | 5.60 | `point writes`
| 471.34 | 2,121,609.91 | 0.0% | 4,410.22 | 2,372.55 | 1.859 | 694.74 | 2.3% | 5.61 | `prefix writes`
| 306.69 | 3,260,650.68 | 0.0% | 2,315.38 | 1,550.33 | 1.493 | 396.69 | 3.3% | 3.68 | `range writes`
| 502.70 | 1,989,277.15 | 0.9% | 6,999.33 | 2,527.47 | 2.769 | 1,251.74 | 1.3% | 0.06 | `monotonic increasing point writes`
| 138,097.67 | 7,241.25 | 0.7% | 807,445.67 | 699,899.00 | 1.154 | 144,584.50 | 0.8% | 0.01 | `worst case for radix tree`
| 46.29 | 21,605,126.00 | 1.0% | 902.00 | 230.73 | 3.909 | 132.00 | 0.0% | 0.01 | `create and destroy`
## Radix tree (this implementation)
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 14.11 | 70,857,435.19 | 0.1% | 247.13 | 71.03 | 3.479 | 32.64 | 0.8% | 0.17 | `point reads`
| 15.63 | 63,997,306.79 | 0.0% | 299.99 | 78.59 | 3.817 | 42.50 | 0.4% | 0.19 | `prefix reads`
| 36.24 | 27,590,266.59 | 0.1% | 782.70 | 182.21 | 4.296 | 106.65 | 0.2% | 0.43 | `range reads`
| 22.72 | 44,004,627.40 | 0.1% | 376.04 | 114.33 | 3.289 | 49.97 | 0.8% | 0.27 | `point writes`
| 40.83 | 24,494,110.04 | 0.0% | 666.07 | 205.35 | 3.244 | 101.33 | 0.3% | 0.49 | `prefix writes`
| 43.45 | 23,016,324.00 | 0.0% | 732.33 | 218.41 | 3.353 | 111.64 | 0.1% | 0.53 | `range writes`
| 81.46 | 12,276,650.63 | 3.6% | 1,458.85 | 411.52 | 3.545 | 280.42 | 0.1% | 0.01 | `monotonic increasing point writes`
| 314,217.00 | 3,182.51 | 1.2% | 4,043,063.50 | 1,593,715.00 | 2.537 | 714,828.00 | 0.1% | 0.01 | `worst case for radix tree`
| 106.79 | 9,364,602.60 | 0.5% | 2,046.00 | 539.75 | 3.791 | 329.00 | 0.0% | 0.01 | `create and destroy`
| 14.04 | 71,216,855.01 | 0.1% | 245.98 | 70.38 | 3.495 | 31.49 | 0.6% | 0.17 | `point reads`
| 14.57 | 68,630,592.89 | 0.1% | 297.83 | 72.93 | 4.084 | 40.34 | 0.5% | 0.17 | `prefix reads`
| 35.28 | 28,347,588.39 | 0.2% | 783.70 | 176.57 | 4.438 | 107.65 | 0.2% | 0.42 | `range reads`
| 20.64 | 48,453,280.08 | 0.0% | 378.97 | 103.31 | 3.668 | 49.92 | 0.7% | 0.25 | `point writes`
| 39.14 | 25,551,096.17 | 0.0% | 672.00 | 196.08 | 3.427 | 101.28 | 0.3% | 0.47 | `prefix writes`
| 39.88 | 25,075,121.85 | 0.0% | 738.26 | 199.71 | 3.697 | 111.59 | 0.1% | 0.49 | `range writes`
| 79.36 | 12,601,312.65 | 0.5% | 1,447.65 | 398.77 | 3.630 | 275.61 | 0.1% | 0.01 | `monotonic increasing point writes`
| 307,037.33 | 3,256.93 | 0.3% | 4,043,060.50 | 1,549,199.33 | 2.610 | 714,828.00 | 0.1% | 0.01 | `worst case for radix tree`
| 97.00 | 10,309,246.47 | 0.2% | 1,986.00 | 487.06 | 4.078 | 315.00 | 0.0% | 0.01 | `create and destroy`
# "Real data" test
@@ -51,13 +51,13 @@ Point queries only. Gc ratio is the ratio of time spent doing garbage collection
## skip list
```
Check: 4.62434 seconds, 364.633 MB/s, Add: 3.90399 seconds, 147.371 MB/s, Gc ratio: 33.6898%, Peak idle memory: 5.61007e+06
Check: 4.66163 seconds, 361.716 MB/s, Add: 3.97534 seconds, 144.726 MB/s, Gc ratio: 33.3998%, Peak idle memory: 5.61007e+06
```
## radix tree
```
Check: 0.956689 seconds, 1762.52 MB/s, Add: 1.35744 seconds, 423.84 MB/s, Gc ratio: 35.0946%, Peak idle memory: 2.32922e+06
Check: 1.01776 seconds, 1656.77 MB/s, Add: 1.2718 seconds, 452.38 MB/s, Gc ratio: 38.626%, Peak idle memory: 2.05667e+06
```
## hash table
@@ -65,6 +65,6 @@ Check: 0.956689 seconds, 1762.52 MB/s, Add: 1.35744 seconds, 423.84 MB/s, Gc rat
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
```
Check: 0.799863 seconds, 2108.09 MB/s, Add: 0.667736 seconds, 861.621 MB/s, Gc ratio: 35.0666%, Peak idle memory: 0
Check: 0.859188 seconds, 1962.54 MB/s, Add: 0.714174 seconds, 805.596 MB/s, Gc ratio: 34.9552%, Peak idle memory: 0
```