Remove freeList, min/max capacity tracking
The freelist doesn't seem to get a good hit rate. Policies other than capacity = minCapacity did not improve the rate we were resizing nodes, but did increase memory usage, so get rid of that too. Add a nodes_resized_total counter.
This commit is contained in:
179
ConflictSet.cpp
179
ConflictSet.cpp
@@ -702,27 +702,13 @@ constexpr int getMaxCapacity(Node *self) {
|
||||
self->partialKeyLen);
|
||||
}
|
||||
|
||||
#ifdef __APPLE__
|
||||
// Disabling the free list altogether is faster on my mac m1
|
||||
constexpr int64_t kMaxFreeListBytes = 0;
|
||||
#else
|
||||
constexpr int64_t kMaxFreeListBytes = 1 << 20;
|
||||
#endif
|
||||
|
||||
// Maintains a free list up to kMaxFreeListBytes. If the top element of the list
|
||||
// doesn't meet the capacity constraints, it's freed and a new node is allocated
|
||||
// with the minimum capacity. The hope is that "unfit" nodes don't get stuck in
|
||||
// the free list.
|
||||
template <class T> struct NodeAllocator {
|
||||
|
||||
static_assert(std::derived_from<T, Node>);
|
||||
static_assert(std::is_trivial_v<T>);
|
||||
|
||||
T *allocate(int minCapacity, int maxCapacity) {
|
||||
assert(minCapacity <= maxCapacity);
|
||||
assert(freeListSize >= 0);
|
||||
assert(freeListSize <= kMaxFreeListBytes);
|
||||
T *result = allocate_helper(minCapacity, maxCapacity);
|
||||
T *allocate(int capacity) {
|
||||
T *result = allocate_helper(capacity);
|
||||
result->endOfRange = false;
|
||||
result->releaseDeferred = false;
|
||||
if constexpr (!std::is_same_v<T, Node0>) {
|
||||
@@ -742,47 +728,19 @@ template <class T> struct NodeAllocator {
|
||||
}
|
||||
|
||||
void release(T *p) {
|
||||
assume(p->partialKeyCapacity >= 0);
|
||||
assume(freeListSize >= 0);
|
||||
if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) {
|
||||
removeNode(p);
|
||||
return safe_free(p, sizeof(T) + p->partialKeyCapacity);
|
||||
}
|
||||
p->parent = freeList;
|
||||
freeList = p;
|
||||
freeListSize += sizeof(T) + p->partialKeyCapacity;
|
||||
VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T) + p->partialKeyCapacity);
|
||||
removeNode(p);
|
||||
return safe_free(p, sizeof(T) + p->partialKeyCapacity);
|
||||
}
|
||||
|
||||
void deferRelease(T *p, Node *forwardTo) {
|
||||
p->releaseDeferred = true;
|
||||
p->forwardTo = forwardTo;
|
||||
if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) {
|
||||
p->parent = deferredListOverflow;
|
||||
deferredListOverflow = p;
|
||||
} else {
|
||||
if (deferredList == nullptr) {
|
||||
deferredListFront = p;
|
||||
}
|
||||
p->parent = deferredList;
|
||||
deferredList = p;
|
||||
freeListSize += sizeof(T) + p->partialKeyCapacity;
|
||||
}
|
||||
p->parent = deferredList;
|
||||
deferredList = p;
|
||||
}
|
||||
|
||||
void releaseDeferred() {
|
||||
if (deferredList != nullptr) {
|
||||
deferredListFront->parent = freeList;
|
||||
#ifndef NVALGRIND
|
||||
for (auto *iter = deferredList; iter != freeList;) {
|
||||
auto *tmp = iter;
|
||||
iter = (T *)iter->parent;
|
||||
VALGRIND_MAKE_MEM_NOACCESS(tmp, sizeof(T) + tmp->partialKeyCapacity);
|
||||
}
|
||||
#endif
|
||||
freeList = std::exchange(deferredList, nullptr);
|
||||
}
|
||||
for (T *n = std::exchange(deferredListOverflow, nullptr); n != nullptr;) {
|
||||
for (T *n = std::exchange(deferredList, nullptr); n != nullptr;) {
|
||||
auto *tmp = n;
|
||||
n = (T *)n->parent;
|
||||
release(tmp);
|
||||
@@ -796,51 +754,12 @@ template <class T> struct NodeAllocator {
|
||||
NodeAllocator(NodeAllocator &&) = delete;
|
||||
NodeAllocator &operator=(NodeAllocator &&) = delete;
|
||||
|
||||
~NodeAllocator() {
|
||||
assert(deferredList == nullptr);
|
||||
assert(deferredListOverflow == nullptr);
|
||||
for (T *iter = freeList; iter != nullptr;) {
|
||||
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(T));
|
||||
auto *tmp = iter;
|
||||
iter = (T *)iter->parent;
|
||||
removeNode(tmp);
|
||||
safe_free(tmp, sizeof(T) + tmp->partialKeyCapacity);
|
||||
}
|
||||
}
|
||||
~NodeAllocator() { assert(deferredList == nullptr); }
|
||||
|
||||
private:
|
||||
int64_t freeListSize = 0;
|
||||
T *freeList = nullptr;
|
||||
T *deferredList = nullptr;
|
||||
// Used to concatenate deferredList to freeList
|
||||
T *deferredListFront;
|
||||
T *deferredListOverflow = nullptr;
|
||||
|
||||
T *allocate_helper(int minCapacity, int maxCapacity) {
|
||||
if (freeList != nullptr) {
|
||||
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(T));
|
||||
freeListSize -= sizeof(T) + freeList->partialKeyCapacity;
|
||||
assume(freeList->partialKeyCapacity >= 0);
|
||||
assume(minCapacity >= 0);
|
||||
assume(minCapacity <= maxCapacity);
|
||||
if (freeList->partialKeyCapacity >= minCapacity &&
|
||||
freeList->partialKeyCapacity <= maxCapacity) {
|
||||
auto *result = freeList;
|
||||
freeList = (T *)freeList->parent;
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(result,
|
||||
sizeof(T) + result->partialKeyCapacity);
|
||||
VALGRIND_MAKE_MEM_DEFINED(&result->partialKeyCapacity,
|
||||
sizeof(result->partialKeyCapacity));
|
||||
VALGRIND_MAKE_MEM_DEFINED(&result->type, sizeof(result->type));
|
||||
return result;
|
||||
} else {
|
||||
auto *p = freeList;
|
||||
freeList = (T *)p->parent;
|
||||
removeNode(p);
|
||||
safe_free(p, sizeof(T) + p->partialKeyCapacity);
|
||||
}
|
||||
}
|
||||
int capacity = maxCapacity;
|
||||
T *allocate_helper(int capacity) {
|
||||
auto *result = (T *)safe_malloc(sizeof(T) + capacity);
|
||||
result->type = T::kType;
|
||||
result->partialKeyCapacity = capacity;
|
||||
@@ -899,6 +818,7 @@ struct WriteContext {
|
||||
int64_t point_writes;
|
||||
int64_t range_writes;
|
||||
int64_t write_bytes;
|
||||
int64_t nodes_resized;
|
||||
} accum;
|
||||
|
||||
#if USE_64_BIT
|
||||
@@ -911,19 +831,19 @@ struct WriteContext {
|
||||
|
||||
WriteContext() { memset(&accum, 0, sizeof(accum)); }
|
||||
|
||||
template <class T> T *allocate(int minCapacity, int maxCapacity) {
|
||||
template <class T> T *allocate(int capacity) {
|
||||
static_assert(!std::is_same_v<T, Node>);
|
||||
++accum.nodes_allocated;
|
||||
if constexpr (std::is_same_v<T, Node0>) {
|
||||
return node0.allocate(minCapacity, maxCapacity);
|
||||
return node0.allocate(capacity);
|
||||
} else if constexpr (std::is_same_v<T, Node3>) {
|
||||
return node3.allocate(minCapacity, maxCapacity);
|
||||
return node3.allocate(capacity);
|
||||
} else if constexpr (std::is_same_v<T, Node16>) {
|
||||
return node16.allocate(minCapacity, maxCapacity);
|
||||
return node16.allocate(capacity);
|
||||
} else if constexpr (std::is_same_v<T, Node48>) {
|
||||
return node48.allocate(minCapacity, maxCapacity);
|
||||
return node48.allocate(capacity);
|
||||
} else if constexpr (std::is_same_v<T, Node256>) {
|
||||
return node256.allocate(minCapacity, maxCapacity);
|
||||
return node256.allocate(capacity);
|
||||
}
|
||||
}
|
||||
template <class T> void release(T *c) {
|
||||
@@ -1479,8 +1399,7 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
|
||||
InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion);
|
||||
|
||||
// *self will have one child (old)
|
||||
auto *newSelf = writeContext->allocate<Node3>(
|
||||
partialKeyIndex, getMaxCapacity(1, 0, partialKeyIndex));
|
||||
auto *newSelf = writeContext->allocate<Node3>(partialKeyIndex);
|
||||
|
||||
newSelf->parent = old->parent;
|
||||
newSelf->parentsIndex = old->parentsIndex;
|
||||
@@ -1584,8 +1503,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
auto *newChild = writeContext->allocate<Node0>(
|
||||
key.size(), getMaxCapacity(0, 1, key.size()));
|
||||
auto *newChild = writeContext->allocate<Node0>(key.size());
|
||||
newChild->numChildren = 0;
|
||||
newChild->entryPresent = false; // Will be set to true by the caller
|
||||
newChild->partialKeyLen = key.size();
|
||||
@@ -1597,8 +1515,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
|
||||
case Type_Node0: {
|
||||
auto *self0 = static_cast<Node0 *>(self);
|
||||
|
||||
auto *newSelf = writeContext->allocate<Node3>(
|
||||
self->partialKeyLen, getMaxCapacity(1, 1, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node3>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self0);
|
||||
writeContext->deferRelease(self0, newSelf);
|
||||
self = newSelf;
|
||||
@@ -1608,9 +1525,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
|
||||
case Type_Node3: {
|
||||
if (self->numChildren == Node3::kMaxNodes) {
|
||||
auto *self3 = static_cast<Node3 *>(self);
|
||||
auto *newSelf = writeContext->allocate<Node16>(
|
||||
self->partialKeyLen,
|
||||
getMaxCapacity(4, self->entryPresent, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node16>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self3);
|
||||
writeContext->deferRelease(self3, newSelf);
|
||||
self = newSelf;
|
||||
@@ -1640,9 +1555,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
|
||||
case Type_Node16: {
|
||||
if (self->numChildren == Node16::kMaxNodes) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
auto *newSelf = writeContext->allocate<Node48>(
|
||||
self->partialKeyLen,
|
||||
getMaxCapacity(17, self->entryPresent, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node48>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self16);
|
||||
writeContext->deferRelease(self16, newSelf);
|
||||
self = newSelf;
|
||||
@@ -1674,9 +1587,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
|
||||
|
||||
if (self->numChildren == 48) {
|
||||
auto *self48 = static_cast<Node48 *>(self);
|
||||
auto *newSelf = writeContext->allocate<Node256>(
|
||||
self->partialKeyLen,
|
||||
getMaxCapacity(49, self->entryPresent, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node256>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self48);
|
||||
writeContext->deferRelease(self48, newSelf);
|
||||
self = newSelf;
|
||||
@@ -1760,13 +1671,13 @@ downLeftSpine:
|
||||
return node;
|
||||
}
|
||||
|
||||
void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
|
||||
WriteContext *writeContext,
|
||||
ConflictSet::Impl *impl) {
|
||||
void freeAndMakeCapacity(Node *&self, int capacity, WriteContext *writeContext,
|
||||
ConflictSet::Impl *impl) {
|
||||
++writeContext->accum.nodes_resized;
|
||||
switch (self->getType()) {
|
||||
case Type_Node0: {
|
||||
auto *self0 = (Node0 *)self;
|
||||
auto *newSelf = writeContext->allocate<Node0>(minCapacity, maxCapacity);
|
||||
auto *newSelf = writeContext->allocate<Node0>(capacity);
|
||||
newSelf->copyChildrenAndKeyFrom(*self0);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self0, newSelf);
|
||||
@@ -1774,7 +1685,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
|
||||
} break;
|
||||
case Type_Node3: {
|
||||
auto *self3 = (Node3 *)self;
|
||||
auto *newSelf = writeContext->allocate<Node3>(minCapacity, maxCapacity);
|
||||
auto *newSelf = writeContext->allocate<Node3>(capacity);
|
||||
newSelf->copyChildrenAndKeyFrom(*self3);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self3, newSelf);
|
||||
@@ -1782,7 +1693,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
|
||||
} break;
|
||||
case Type_Node16: {
|
||||
auto *self16 = (Node16 *)self;
|
||||
auto *newSelf = writeContext->allocate<Node16>(minCapacity, maxCapacity);
|
||||
auto *newSelf = writeContext->allocate<Node16>(capacity);
|
||||
newSelf->copyChildrenAndKeyFrom(*self16);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self16, newSelf);
|
||||
@@ -1790,7 +1701,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
|
||||
} break;
|
||||
case Type_Node48: {
|
||||
auto *self48 = (Node48 *)self;
|
||||
auto *newSelf = writeContext->allocate<Node48>(minCapacity, maxCapacity);
|
||||
auto *newSelf = writeContext->allocate<Node48>(capacity);
|
||||
newSelf->copyChildrenAndKeyFrom(*self48);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self48, newSelf);
|
||||
@@ -1798,7 +1709,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity,
|
||||
} break;
|
||||
case Type_Node256: {
|
||||
auto *self256 = (Node256 *)self;
|
||||
auto *newSelf = writeContext->allocate<Node256>(minCapacity, maxCapacity);
|
||||
auto *newSelf = writeContext->allocate<Node256>(capacity);
|
||||
newSelf->copyChildrenAndKeyFrom(*self256);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self256, newSelf);
|
||||
@@ -1823,8 +1734,7 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext,
|
||||
if (self->getCapacity() <= maxCapacity) {
|
||||
return;
|
||||
}
|
||||
freeAndMakeCapacityBetween(self, self->partialKeyLen, maxCapacity,
|
||||
writeContext, impl);
|
||||
freeAndMakeCapacity(self, self->partialKeyLen, writeContext, impl);
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
@@ -1898,12 +1808,9 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
|
||||
assert(!self3->entryPresent);
|
||||
Node *child = self3->children[0];
|
||||
const int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
|
||||
const int maxCapacity =
|
||||
getMaxCapacity(child->numChildren, child->entryPresent, minCapacity);
|
||||
|
||||
if (minCapacity > child->getCapacity()) {
|
||||
freeAndMakeCapacityBetween(child, minCapacity, maxCapacity, writeContext,
|
||||
impl);
|
||||
freeAndMakeCapacity(child, minCapacity, writeContext, impl);
|
||||
}
|
||||
|
||||
// Merge partial key with child
|
||||
@@ -1942,8 +1849,7 @@ bool needsDownsize(Node *n) {
|
||||
void downsize(Node3 *self, WriteContext *writeContext,
|
||||
ConflictSet::Impl *impl) {
|
||||
if (self->numChildren == 0) {
|
||||
auto *newSelf = writeContext->allocate<Node0>(
|
||||
self->partialKeyLen, getMaxCapacity(0, 1, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node0>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self, newSelf);
|
||||
@@ -1956,9 +1862,7 @@ void downsize(Node3 *self, WriteContext *writeContext,
|
||||
void downsize(Node16 *self, WriteContext *writeContext,
|
||||
ConflictSet::Impl *impl) {
|
||||
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode16);
|
||||
auto *newSelf = writeContext->allocate<Node3>(
|
||||
self->partialKeyLen,
|
||||
getMaxCapacity(kMinChildrenNode16 - 1, 0, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node3>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self, newSelf);
|
||||
@@ -1967,9 +1871,7 @@ void downsize(Node16 *self, WriteContext *writeContext,
|
||||
void downsize(Node48 *self, WriteContext *writeContext,
|
||||
ConflictSet::Impl *impl) {
|
||||
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode48);
|
||||
auto *newSelf = writeContext->allocate<Node16>(
|
||||
self->partialKeyLen,
|
||||
getMaxCapacity(kMinChildrenNode48 - 1, 0, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node16>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self, newSelf);
|
||||
@@ -1979,9 +1881,7 @@ void downsize(Node256 *self, WriteContext *writeContext,
|
||||
ConflictSet::Impl *impl) {
|
||||
assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode256);
|
||||
auto *self256 = (Node256 *)self;
|
||||
auto *newSelf = writeContext->allocate<Node48>(
|
||||
self->partialKeyLen,
|
||||
getMaxCapacity(kMinChildrenNode256 - 1, 0, self->partialKeyLen));
|
||||
auto *newSelf = writeContext->allocate<Node48>(self->partialKeyLen);
|
||||
newSelf->copyChildrenAndKeyFrom(*self256);
|
||||
getInTree(self, impl) = newSelf;
|
||||
writeContext->deferRelease(self256, newSelf);
|
||||
@@ -5268,6 +5168,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
range_writes_total.add(writeContext.accum.range_writes);
|
||||
nodes_allocated_total.add(writeContext.accum.nodes_allocated);
|
||||
nodes_released_total.add(writeContext.accum.nodes_released);
|
||||
nodes_resized_total.add(writeContext.accum.nodes_resized);
|
||||
entries_inserted_total.add(writeContext.accum.entries_inserted);
|
||||
entries_erased_total.add(writeContext.accum.entries_erased);
|
||||
insert_iterations_total.add(writeContext.accum.insert_iterations);
|
||||
@@ -5396,6 +5297,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
std::exchange(writeContext.accum.nodes_allocated, 0));
|
||||
nodes_released_total.add(
|
||||
std::exchange(writeContext.accum.nodes_released, 0));
|
||||
nodes_resized_total.add(std::exchange(writeContext.accum.nodes_resized, 0));
|
||||
entries_inserted_total.add(
|
||||
std::exchange(writeContext.accum.entries_inserted, 0));
|
||||
entries_erased_total.add(
|
||||
@@ -5421,7 +5323,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
keyUpdates = 10;
|
||||
|
||||
// Insert ""
|
||||
root = writeContext.allocate<Node0>(0, 0);
|
||||
root = writeContext.allocate<Node0>(0);
|
||||
root->numChildren = 0;
|
||||
root->parent = nullptr;
|
||||
root->entryPresent = false;
|
||||
@@ -5514,6 +5416,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
"The total number of physical tree nodes allocated");
|
||||
COUNTER(nodes_released_total,
|
||||
"The total number of physical tree nodes released");
|
||||
COUNTER(nodes_resized_total,
|
||||
"The total number of physical tree nodes that have been resized to "
|
||||
"account for partial key capacity changes");
|
||||
COUNTER(insert_iterations_total,
|
||||
"The total number of iterations of the main loop for insertion. "
|
||||
"Includes searches where the entry already existed, and so insertion "
|
||||
|
||||
Reference in New Issue
Block a user