diff --git a/ConflictSet.cpp b/ConflictSet.cpp index d993464..8640ab4 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -702,27 +702,13 @@ constexpr int getMaxCapacity(Node *self) { self->partialKeyLen); } -#ifdef __APPLE__ -// Disabling the free list altogether is faster on my mac m1 -constexpr int64_t kMaxFreeListBytes = 0; -#else -constexpr int64_t kMaxFreeListBytes = 1 << 20; -#endif - -// Maintains a free list up to kMaxFreeListBytes. If the top element of the list -// doesn't meet the capacity constraints, it's freed and a new node is allocated -// with the minimum capacity. The hope is that "unfit" nodes don't get stuck in -// the free list. template struct NodeAllocator { static_assert(std::derived_from); static_assert(std::is_trivial_v); - T *allocate(int minCapacity, int maxCapacity) { - assert(minCapacity <= maxCapacity); - assert(freeListSize >= 0); - assert(freeListSize <= kMaxFreeListBytes); - T *result = allocate_helper(minCapacity, maxCapacity); + T *allocate(int capacity) { + T *result = allocate_helper(capacity); result->endOfRange = false; result->releaseDeferred = false; if constexpr (!std::is_same_v) { @@ -742,47 +728,19 @@ template struct NodeAllocator { } void release(T *p) { - assume(p->partialKeyCapacity >= 0); - assume(freeListSize >= 0); - if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) { - removeNode(p); - return safe_free(p, sizeof(T) + p->partialKeyCapacity); - } - p->parent = freeList; - freeList = p; - freeListSize += sizeof(T) + p->partialKeyCapacity; - VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T) + p->partialKeyCapacity); + removeNode(p); + return safe_free(p, sizeof(T) + p->partialKeyCapacity); } void deferRelease(T *p, Node *forwardTo) { p->releaseDeferred = true; p->forwardTo = forwardTo; - if (freeListSize + sizeof(T) + p->partialKeyCapacity > kMaxFreeListBytes) { - p->parent = deferredListOverflow; - deferredListOverflow = p; - } else { - if (deferredList == nullptr) { - deferredListFront = p; - } - p->parent = deferredList; - deferredList = p; - freeListSize += sizeof(T) + p->partialKeyCapacity; - } + p->parent = deferredList; + deferredList = p; } void releaseDeferred() { - if (deferredList != nullptr) { - deferredListFront->parent = freeList; -#ifndef NVALGRIND - for (auto *iter = deferredList; iter != freeList;) { - auto *tmp = iter; - iter = (T *)iter->parent; - VALGRIND_MAKE_MEM_NOACCESS(tmp, sizeof(T) + tmp->partialKeyCapacity); - } -#endif - freeList = std::exchange(deferredList, nullptr); - } - for (T *n = std::exchange(deferredListOverflow, nullptr); n != nullptr;) { + for (T *n = std::exchange(deferredList, nullptr); n != nullptr;) { auto *tmp = n; n = (T *)n->parent; release(tmp); @@ -796,51 +754,12 @@ template struct NodeAllocator { NodeAllocator(NodeAllocator &&) = delete; NodeAllocator &operator=(NodeAllocator &&) = delete; - ~NodeAllocator() { - assert(deferredList == nullptr); - assert(deferredListOverflow == nullptr); - for (T *iter = freeList; iter != nullptr;) { - VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(T)); - auto *tmp = iter; - iter = (T *)iter->parent; - removeNode(tmp); - safe_free(tmp, sizeof(T) + tmp->partialKeyCapacity); - } - } + ~NodeAllocator() { assert(deferredList == nullptr); } private: - int64_t freeListSize = 0; - T *freeList = nullptr; T *deferredList = nullptr; - // Used to concatenate deferredList to freeList - T *deferredListFront; - T *deferredListOverflow = nullptr; - T *allocate_helper(int minCapacity, int maxCapacity) { - if (freeList != nullptr) { - VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(T)); - freeListSize -= sizeof(T) + freeList->partialKeyCapacity; - assume(freeList->partialKeyCapacity >= 0); - assume(minCapacity >= 0); - assume(minCapacity <= maxCapacity); - if (freeList->partialKeyCapacity >= minCapacity && - freeList->partialKeyCapacity <= maxCapacity) { - auto *result = freeList; - freeList = (T *)freeList->parent; - VALGRIND_MAKE_MEM_UNDEFINED(result, - sizeof(T) + result->partialKeyCapacity); - VALGRIND_MAKE_MEM_DEFINED(&result->partialKeyCapacity, - sizeof(result->partialKeyCapacity)); - VALGRIND_MAKE_MEM_DEFINED(&result->type, sizeof(result->type)); - return result; - } else { - auto *p = freeList; - freeList = (T *)p->parent; - removeNode(p); - safe_free(p, sizeof(T) + p->partialKeyCapacity); - } - } - int capacity = maxCapacity; + T *allocate_helper(int capacity) { auto *result = (T *)safe_malloc(sizeof(T) + capacity); result->type = T::kType; result->partialKeyCapacity = capacity; @@ -899,6 +818,7 @@ struct WriteContext { int64_t point_writes; int64_t range_writes; int64_t write_bytes; + int64_t nodes_resized; } accum; #if USE_64_BIT @@ -911,19 +831,19 @@ struct WriteContext { WriteContext() { memset(&accum, 0, sizeof(accum)); } - template T *allocate(int minCapacity, int maxCapacity) { + template T *allocate(int capacity) { static_assert(!std::is_same_v); ++accum.nodes_allocated; if constexpr (std::is_same_v) { - return node0.allocate(minCapacity, maxCapacity); + return node0.allocate(capacity); } else if constexpr (std::is_same_v) { - return node3.allocate(minCapacity, maxCapacity); + return node3.allocate(capacity); } else if constexpr (std::is_same_v) { - return node16.allocate(minCapacity, maxCapacity); + return node16.allocate(capacity); } else if constexpr (std::is_same_v) { - return node48.allocate(minCapacity, maxCapacity); + return node48.allocate(capacity); } else if constexpr (std::is_same_v) { - return node256.allocate(minCapacity, maxCapacity); + return node256.allocate(capacity); } } template void release(T *c) { @@ -1479,8 +1399,7 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key, InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion); // *self will have one child (old) - auto *newSelf = writeContext->allocate( - partialKeyIndex, getMaxCapacity(1, 0, partialKeyIndex)); + auto *newSelf = writeContext->allocate(partialKeyIndex); newSelf->parent = old->parent; newSelf->parentsIndex = old->parentsIndex; @@ -1584,8 +1503,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key, __builtin_unreachable(); // GCOVR_EXCL_LINE } - auto *newChild = writeContext->allocate( - key.size(), getMaxCapacity(0, 1, key.size())); + auto *newChild = writeContext->allocate(key.size()); newChild->numChildren = 0; newChild->entryPresent = false; // Will be set to true by the caller newChild->partialKeyLen = key.size(); @@ -1597,8 +1515,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key, case Type_Node0: { auto *self0 = static_cast(self); - auto *newSelf = writeContext->allocate( - self->partialKeyLen, getMaxCapacity(1, 1, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self0); writeContext->deferRelease(self0, newSelf); self = newSelf; @@ -1608,9 +1525,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key, case Type_Node3: { if (self->numChildren == Node3::kMaxNodes) { auto *self3 = static_cast(self); - auto *newSelf = writeContext->allocate( - self->partialKeyLen, - getMaxCapacity(4, self->entryPresent, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self3); writeContext->deferRelease(self3, newSelf); self = newSelf; @@ -1640,9 +1555,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key, case Type_Node16: { if (self->numChildren == Node16::kMaxNodes) { auto *self16 = static_cast(self); - auto *newSelf = writeContext->allocate( - self->partialKeyLen, - getMaxCapacity(17, self->entryPresent, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self16); writeContext->deferRelease(self16, newSelf); self = newSelf; @@ -1674,9 +1587,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key, if (self->numChildren == 48) { auto *self48 = static_cast(self); - auto *newSelf = writeContext->allocate( - self->partialKeyLen, - getMaxCapacity(49, self->entryPresent, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self48); writeContext->deferRelease(self48, newSelf); self = newSelf; @@ -1760,13 +1671,13 @@ downLeftSpine: return node; } -void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity, - WriteContext *writeContext, - ConflictSet::Impl *impl) { +void freeAndMakeCapacity(Node *&self, int capacity, WriteContext *writeContext, + ConflictSet::Impl *impl) { + ++writeContext->accum.nodes_resized; switch (self->getType()) { case Type_Node0: { auto *self0 = (Node0 *)self; - auto *newSelf = writeContext->allocate(minCapacity, maxCapacity); + auto *newSelf = writeContext->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self0); getInTree(self, impl) = newSelf; writeContext->deferRelease(self0, newSelf); @@ -1774,7 +1685,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity, } break; case Type_Node3: { auto *self3 = (Node3 *)self; - auto *newSelf = writeContext->allocate(minCapacity, maxCapacity); + auto *newSelf = writeContext->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self3); getInTree(self, impl) = newSelf; writeContext->deferRelease(self3, newSelf); @@ -1782,7 +1693,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity, } break; case Type_Node16: { auto *self16 = (Node16 *)self; - auto *newSelf = writeContext->allocate(minCapacity, maxCapacity); + auto *newSelf = writeContext->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self16); getInTree(self, impl) = newSelf; writeContext->deferRelease(self16, newSelf); @@ -1790,7 +1701,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity, } break; case Type_Node48: { auto *self48 = (Node48 *)self; - auto *newSelf = writeContext->allocate(minCapacity, maxCapacity); + auto *newSelf = writeContext->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self48); getInTree(self, impl) = newSelf; writeContext->deferRelease(self48, newSelf); @@ -1798,7 +1709,7 @@ void freeAndMakeCapacityBetween(Node *&self, int minCapacity, int maxCapacity, } break; case Type_Node256: { auto *self256 = (Node256 *)self; - auto *newSelf = writeContext->allocate(minCapacity, maxCapacity); + auto *newSelf = writeContext->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self256); getInTree(self, impl) = newSelf; writeContext->deferRelease(self256, newSelf); @@ -1823,8 +1734,7 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext, if (self->getCapacity() <= maxCapacity) { return; } - freeAndMakeCapacityBetween(self, self->partialKeyLen, maxCapacity, - writeContext, impl); + freeAndMakeCapacity(self, self->partialKeyLen, writeContext, impl); } #if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__) @@ -1898,12 +1808,9 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext, assert(!self3->entryPresent); Node *child = self3->children[0]; const int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen; - const int maxCapacity = - getMaxCapacity(child->numChildren, child->entryPresent, minCapacity); if (minCapacity > child->getCapacity()) { - freeAndMakeCapacityBetween(child, minCapacity, maxCapacity, writeContext, - impl); + freeAndMakeCapacity(child, minCapacity, writeContext, impl); } // Merge partial key with child @@ -1942,8 +1849,7 @@ bool needsDownsize(Node *n) { void downsize(Node3 *self, WriteContext *writeContext, ConflictSet::Impl *impl) { if (self->numChildren == 0) { - auto *newSelf = writeContext->allocate( - self->partialKeyLen, getMaxCapacity(0, 1, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self); getInTree(self, impl) = newSelf; writeContext->deferRelease(self, newSelf); @@ -1956,9 +1862,7 @@ void downsize(Node3 *self, WriteContext *writeContext, void downsize(Node16 *self, WriteContext *writeContext, ConflictSet::Impl *impl) { assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode16); - auto *newSelf = writeContext->allocate( - self->partialKeyLen, - getMaxCapacity(kMinChildrenNode16 - 1, 0, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self); getInTree(self, impl) = newSelf; writeContext->deferRelease(self, newSelf); @@ -1967,9 +1871,7 @@ void downsize(Node16 *self, WriteContext *writeContext, void downsize(Node48 *self, WriteContext *writeContext, ConflictSet::Impl *impl) { assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode48); - auto *newSelf = writeContext->allocate( - self->partialKeyLen, - getMaxCapacity(kMinChildrenNode48 - 1, 0, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self); getInTree(self, impl) = newSelf; writeContext->deferRelease(self, newSelf); @@ -1979,9 +1881,7 @@ void downsize(Node256 *self, WriteContext *writeContext, ConflictSet::Impl *impl) { assert(self->numChildren + int(self->entryPresent) < kMinChildrenNode256); auto *self256 = (Node256 *)self; - auto *newSelf = writeContext->allocate( - self->partialKeyLen, - getMaxCapacity(kMinChildrenNode256 - 1, 0, self->partialKeyLen)); + auto *newSelf = writeContext->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self256); getInTree(self, impl) = newSelf; writeContext->deferRelease(self256, newSelf); @@ -5268,6 +5168,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { range_writes_total.add(writeContext.accum.range_writes); nodes_allocated_total.add(writeContext.accum.nodes_allocated); nodes_released_total.add(writeContext.accum.nodes_released); + nodes_resized_total.add(writeContext.accum.nodes_resized); entries_inserted_total.add(writeContext.accum.entries_inserted); entries_erased_total.add(writeContext.accum.entries_erased); insert_iterations_total.add(writeContext.accum.insert_iterations); @@ -5396,6 +5297,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { std::exchange(writeContext.accum.nodes_allocated, 0)); nodes_released_total.add( std::exchange(writeContext.accum.nodes_released, 0)); + nodes_resized_total.add(std::exchange(writeContext.accum.nodes_resized, 0)); entries_inserted_total.add( std::exchange(writeContext.accum.entries_inserted, 0)); entries_erased_total.add( @@ -5421,7 +5323,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { keyUpdates = 10; // Insert "" - root = writeContext.allocate(0, 0); + root = writeContext.allocate(0); root->numChildren = 0; root->parent = nullptr; root->entryPresent = false; @@ -5514,6 +5416,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { "The total number of physical tree nodes allocated"); COUNTER(nodes_released_total, "The total number of physical tree nodes released"); + COUNTER(nodes_resized_total, + "The total number of physical tree nodes that have been resized to " + "account for partial key capacity changes"); COUNTER(insert_iterations_total, "The total number of iterations of the main loop for insertion. " "Includes searches where the entry already existed, and so insertion "