diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 745ce4e..3f57c71 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -46,65 +46,6 @@ struct Entry { int64_t rangeVersion; }; -template -struct BoundedFreeListAllocator { - static_assert(sizeof(T) >= sizeof(void *)); - - T *allocate() { -#if SHOW_MEMORY - ++liveAllocations; - maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations); -#endif - if (freeListSize == 0) { - assert(freeList == nullptr); - return new (safe_malloc(sizeof(T))) T; - } - assert(freeList != nullptr); - void *buffer = freeList; - VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(freeList)); - memcpy(&freeList, freeList, sizeof(freeList)); - --freeListSize; - VALGRIND_MAKE_MEM_UNDEFINED(buffer, sizeof(T)); - return new (buffer) T; - } - - void release(T *p) { -#if SHOW_MEMORY - --liveAllocations; -#endif - p->~T(); - if (freeListSize == kMaxFreeListSize) { - return free(p); - } - memcpy((void *)p, &freeList, sizeof(freeList)); - freeList = p; - ++freeListSize; - VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T)); - } - - ~BoundedFreeListAllocator() { - for (void *iter = freeList; iter != nullptr;) { - VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(iter)); - auto *tmp = iter; - memcpy(&iter, iter, sizeof(void *)); - free(tmp); - } - } - -#if SHOW_MEMORY - int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); } -#endif - -private: - static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T); - int freeListSize = 0; - void *freeList = nullptr; -#if SHOW_MEMORY - int64_t maxLiveAllocations = 0; - int64_t liveAllocations = 0; -#endif -}; - struct BitSet { bool test(int i) const; void set(int i); @@ -304,13 +245,9 @@ struct Node256 : Node { } }; -struct NodeAllocators { - BoundedFreeListAllocator node0; - BoundedFreeListAllocator node4; - BoundedFreeListAllocator node16; - BoundedFreeListAllocator node48; - BoundedFreeListAllocator node256; -}; +template NodeT *newNode() { + return new (safe_malloc(sizeof(NodeT))) NodeT; +} int getNodeIndex(Node16 *self, uint8_t index) { #ifdef HAS_AVX @@ -518,8 +455,7 @@ void setChildrenParents(Node256 *n) { // Caller is responsible for assigning a non-null pointer to the returned // reference if null -Node *&getOrCreateChild(Node *&self, uint8_t index, - NodeAllocators *allocators) { +Node *&getOrCreateChild(Node *&self, uint8_t index) { // Fast path for if it exists already if (self->type <= Type::Node16) { @@ -544,10 +480,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->type == Type::Node0) { auto *self0 = static_cast(self); - auto *newSelf = allocators->node4.allocate(); + auto *newSelf = newNode(); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); - allocators->node0.release(self0); + free(self0); self = newSelf; goto insert16; @@ -556,12 +492,12 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, auto *self4 = static_cast(self); if (self->numChildren == 4) { - auto *newSelf = allocators->node16.allocate(); + auto *newSelf = newNode(); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); memcpy((void *)newSelf->index, (void *)self4->index, sizeof(self4->index) + sizeof(self4->children)); - allocators->node4.release(self4); + free(self4); setChildrenParents(newSelf); self = newSelf; } @@ -572,7 +508,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->numChildren == 16) { auto *self16 = static_cast(self); - auto *newSelf = allocators->node48.allocate(); + auto *newSelf = newNode(); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); newSelf->nextFree = 16; @@ -584,7 +520,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, ++i; } assert(i == 16); - allocators->node16.release(self16); + free(self16); setChildrenParents(newSelf); self = newSelf; goto insert48; @@ -612,7 +548,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->numChildren == 48) { auto *self48 = static_cast(self); - auto *newSelf = allocators->node256.allocate(); + auto *newSelf = newNode(); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); newSelf->bitSet = self48->bitSet; @@ -621,7 +557,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, newSelf->children[i] = self48->children[self48->index[i]]; }, 0, 256); - allocators->node48.release(self48); + free(self48); setChildrenParents(newSelf); self = newSelf; goto insert256; @@ -648,25 +584,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, } // Precondition - an entry for index must exist in the node -void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { +void eraseChild(Node *self, uint8_t index) { auto *child = getChildExists(self, index); - switch (child->type) { - case Type::Node0: - allocators->node0.release((Node0 *)child); - break; - case Type::Node4: - allocators->node4.release((Node4 *)child); - break; - case Type::Node16: - allocators->node16.release((Node16 *)child); - break; - case Type::Node48: - allocators->node48.release((Node48 *)child); - break; - case Type::Node256: - allocators->node256.release((Node256 *)child); - break; - } + free(child); if (self->type <= Type::Node16) { auto *self16 = static_cast(self); @@ -698,7 +618,7 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { --self->numChildren; if (self->numChildren == 0 && !self->entryPresent && self->parent != nullptr) { - eraseChild(self->parent, self->parentsIndex, allocators); + eraseChild(self->parent, self->parentsIndex); } } @@ -1526,8 +1446,7 @@ bool checkRangeRead(Node *n, std::span begin, // a postcondition. template [[nodiscard]] Node *insert(Node **self, std::span key, - int64_t writeVersion, NodeAllocators *allocators, - ConflictSet::Impl *impl) { + int64_t writeVersion, ConflictSet::Impl *impl) { for (;;) { @@ -1545,7 +1464,7 @@ template auto *old = *self; int64_t oldMaxVersion = maxVersion(old, impl); - *self = allocators->node0.allocate(); + *self = newNode(); memcpy((char *)*self + sizeof(Node::type), (char *)old + sizeof(Node::type), @@ -1554,8 +1473,7 @@ template (*self)->entryPresent = false; (*self)->numChildren = 0; - getOrCreateChild(*self, old->partialKey[partialKeyIndex], allocators) = - old; + getOrCreateChild(*self, old->partialKey[partialKeyIndex]) = old; old->parent = *self; old->parentsIndex = old->partialKey[partialKeyIndex]; maxVersion(old, impl) = oldMaxVersion; @@ -1597,9 +1515,9 @@ template m = writeVersion; } - auto &child = getOrCreateChild(*self, key.front(), allocators); + auto &child = getOrCreateChild(*self, key.front()); if (!child) { - child = allocators->node0.allocate(); + child = newNode(); child->parent = *self; child->parentsIndex = key.front(); maxVersion(child, impl) = @@ -1631,8 +1549,8 @@ void destroyTree(Node *root) { void addPointWrite(Node *&root, int64_t oldestVersion, std::span key, int64_t writeVersion, - NodeAllocators *allocators, ConflictSet::Impl *impl) { - auto *n = insert(&root, key, writeVersion, allocators, impl); + ConflictSet::Impl *impl) { + auto *n = insert(&root, key, writeVersion, impl); if (!n->entryPresent) { auto *p = nextLogical(n); n->entryPresent = true; @@ -1648,15 +1566,13 @@ void addPointWrite(Node *&root, int64_t oldestVersion, void addWriteRange(Node *&root, int64_t oldestVersion, std::span begin, std::span end, - int64_t writeVersion, NodeAllocators *allocators, - ConflictSet::Impl *impl) { + int64_t writeVersion, ConflictSet::Impl *impl) { int lcp = longestCommonPrefix(begin.data(), end.data(), std::min(begin.size(), end.size())); if (lcp == int(begin.size()) && end.size() == begin.size() + 1 && end.back() == 0) { - return addPointWrite(root, oldestVersion, begin, writeVersion, allocators, - impl); + return addPointWrite(root, oldestVersion, begin, writeVersion, impl); } auto remaining = begin.subspan(0, lcp); @@ -1695,8 +1611,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion, begin = begin.subspan(consumed, begin.size() - consumed); end = end.subspan(consumed, end.size() - consumed); - auto *beginNode = - insert(useAsRoot, begin, writeVersion, allocators, impl); + auto *beginNode = insert(useAsRoot, begin, writeVersion, impl); const bool insertedBegin = !std::exchange(beginNode->entryPresent, true); @@ -1713,7 +1628,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion, assert(writeVersion >= beginNode->entry.pointVersion); beginNode->entry.pointVersion = writeVersion; - auto *endNode = insert(useAsRoot, end, writeVersion, allocators, impl); + auto *endNode = insert(useAsRoot, end, writeVersion, impl); const bool insertedEnd = !std::exchange(endNode->entryPresent, true); @@ -1728,7 +1643,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion, if (insertedEnd) { // beginNode may have been invalidated - beginNode = insert(useAsRoot, begin, writeVersion, allocators, impl); + beginNode = insert(useAsRoot, begin, writeVersion, impl); assert(beginNode->entryPresent); } @@ -1737,7 +1652,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion, beginNode = nextLogical(beginNode); old->entryPresent = false; if (old->numChildren == 0 && old->parent != nullptr) { - eraseChild(old->parent, old->parentsIndex, allocators); + eraseChild(old->parent, old->parentsIndex); } } } @@ -1831,12 +1746,10 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { auto end = std::span(w.end.p, w.end.len); if (w.end.len > 0) { keyUpdates += 3; - addWriteRange(root, oldestVersion, begin, end, writeVersion, - &allocators, this); + addWriteRange(root, oldestVersion, begin, end, writeVersion, this); } else { keyUpdates += 2; - addPointWrite(root, oldestVersion, begin, writeVersion, &allocators, - this); + addPointWrite(root, oldestVersion, begin, writeVersion, this); } } } @@ -1869,7 +1782,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { assert(n->entry.rangeVersion <= oldestVersion); prev->entryPresent = false; if (prev->numChildren == 0 && prev->parent != nullptr) { - eraseChild(prev->parent, prev->parentsIndex, &allocators); + eraseChild(prev->parent, prev->parentsIndex); } } @@ -1881,7 +1794,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) { // Insert "" - root = allocators.node0.allocate(); + root = newNode(); rootMaxVersion = oldestVersion; root->entry.pointVersion = oldestVersion; root->entry.rangeVersion = oldestVersion; @@ -1889,8 +1802,6 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { } ~Impl() { destroyTree(root); } - NodeAllocators allocators; - Arena removalKeyArena; std::span removalKey; int64_t keyUpdates = 0; @@ -1950,23 +1861,6 @@ ConflictSet::~ConflictSet() { } } -#if SHOW_MEMORY -__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) { - ConflictSet::Impl *impl; - memcpy(&impl, &cs, sizeof(impl)); // NOLINT - fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n", - impl->allocators.node0.highWaterMarkBytes()); - fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n", - impl->allocators.node4.highWaterMarkBytes()); - fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n", - impl->allocators.node16.highWaterMarkBytes()); - fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n", - impl->allocators.node48.highWaterMarkBytes()); - fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n", - impl->allocators.node256.highWaterMarkBytes()); -} -#endif - ConflictSet::ConflictSet(ConflictSet &&other) noexcept : impl(std::exchange(other.impl, nullptr)) {} @@ -2242,8 +2136,7 @@ int main(void) { ankerl::nanobench::Bench bench; ConflictSet::Impl cs{0}; for (int j = 0; j < 256; ++j) { - getOrCreateChild(cs.root, j, &cs.allocators) = - cs.allocators.node0.allocate(); + getOrCreateChild(cs.root, j) = newNode(); if (j % 10 == 0) { bench.run("MaxExclusive " + std::to_string(j), [&]() { bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));