diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2f9c120..5f3fcc1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,4 +15,12 @@ repos: description: disallow checking in DEBUG_VERBOSE=1 entry: '^#define DEBUG_VERBOSE 1$' language: pygrep + types: [c++] + - repo: local + hooks: + - id: debug verbose check + name: disallow checking in SHOW_MEMORY=1 + description: disallow checking in SHOW_MEMORY=1 + entry: '^#define SHOW_MEMORY 1$' + language: pygrep types: [c++] \ No newline at end of file diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 05b9353..ead210e 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -51,6 +51,10 @@ struct BoundedFreeListAllocator { static_assert(sizeof(T) >= sizeof(void *)); T *allocate() { +#if SHOW_MEMORY + ++liveAllocations; + maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations); +#endif if (freeListSize == 0) { assert(freeList == nullptr); return new (safe_malloc(sizeof(T))) T; @@ -65,6 +69,9 @@ struct BoundedFreeListAllocator { } void release(T *p) { +#if SHOW_MEMORY + --liveAllocations; +#endif p->~T(); if (freeListSize == kMaxFreeListSize) { return free(p); @@ -84,10 +91,18 @@ struct BoundedFreeListAllocator { } } +#if SHOW_MEMORY + int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); } +#endif + private: static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T); int freeListSize = 0; void *freeList = nullptr; +#if SHOW_MEMORY + int64_t maxLiveAllocations = 0; + int64_t liveAllocations = 0; +#endif }; struct BitSet { @@ -205,6 +220,7 @@ int BitSet::firstSetGeq(int i) const { } enum class Type : int8_t { + Node1, Node4, Node16, Node48, @@ -233,10 +249,18 @@ struct Child { Node *child; }; +struct Node1 : Node { + // Sorted + uint8_t index[16]; // 16 so that we can use the same simd index search + // implementation as Node16 + Child children[1]; + Node1() { this->type = Type::Node1; } +}; + struct Node4 : Node { // Sorted uint8_t index[16]; // 16 so that we can use the same simd index search - // implementation for Node4 as Node16 + // implementation as Node16 Child children[4]; Node4() { this->type = Type::Node4; } }; @@ -271,6 +295,7 @@ struct Node256 : Node { }; struct NodeAllocators { + BoundedFreeListAllocator node1; BoundedFreeListAllocator node4; BoundedFreeListAllocator node16; BoundedFreeListAllocator node48; @@ -459,6 +484,12 @@ int getChildGeq(Node *self, int child) { return -1; } +void setChildrenParents(Node4 *n) { + for (int i = 0; i < n->numChildren; ++i) { + n->children[i].child->parent = n; + } +} + void setChildrenParents(Node16 *n) { for (int i = 0; i < n->numChildren; ++i) { n->children[i].child->parent = n; @@ -500,7 +531,23 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, } } - if (self->type == Type::Node4) { + if (self->type == Type::Node1) { + auto *self1 = static_cast(self); + + if (self->numChildren == 1) { + auto *newSelf = allocators->node4.allocate(); + memcpy((void *)newSelf, self, sizeof(Node1)); + newSelf->type = Type::Node4; + allocators->node1.release(self1); + setChildrenParents(newSelf); + self = newSelf; + } else { + assert(self->numChildren == 0); + } + + goto insert16; + + } else if (self->type == Type::Node4) { auto *self4 = static_cast(self); if (self->numChildren == 4) { @@ -582,6 +629,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, result = nullptr; return result; } else { + assert(self->type == Type::Node256); insert256: auto *self256 = static_cast(self); ++self->numChildren; @@ -594,6 +642,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { auto *child = getChildExists(self, index); switch (child->type) { + case Type::Node1: + allocators->node1.release((Node1 *)child); + break; case Type::Node4: allocators->node4.release((Node4 *)child); break; @@ -979,6 +1030,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) { } } switch (n->type) { + case Type::Node1: + [[fallthrough]]; case Type::Node4: [[fallthrough]]; case Type::Node16: { @@ -1480,7 +1533,7 @@ template auto *old = *self; int64_t oldMaxVersion = maxVersion(old, impl); - *self = allocators->node4.allocate(); + *self = allocators->node1.allocate(); memcpy((void *)*self, old, offsetof(Node, type)); (*self)->partialKeyLen = partialKeyIndex; @@ -1526,7 +1579,7 @@ template auto &child = getOrCreateChild(*self, key.front(), allocators); if (!child) { - child = allocators->node4.allocate(); + child = allocators->node1.allocate(); child->parent = *self; child->parentsIndex = key.front(); maxVersion(child, impl) = @@ -1846,13 +1899,27 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) { // Insert "" - root = allocators.node4.allocate(); + root = allocators.node1.allocate(); rootMaxVersion = oldestVersion; root->entry.pointVersion = oldestVersion; root->entry.rangeVersion = oldestVersion; root->entryPresent = true; } - ~Impl() { destroyTree(root); } + ~Impl() { + destroyTree(root); +#if SHOW_MEMORY + fprintf(stderr, "Max Node1 memory usage: %" PRId64 "\n", + allocators.node1.highWaterMarkBytes()); + fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n", + allocators.node4.highWaterMarkBytes()); + fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n", + allocators.node16.highWaterMarkBytes()); + fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n", + allocators.node48.highWaterMarkBytes()); + fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n", + allocators.node256.highWaterMarkBytes()); +#endif + } NodeAllocators allocators; @@ -2182,7 +2249,7 @@ int main(void) { ConflictSet::Impl cs{0}; for (int j = 0; j < 256; ++j) { getOrCreateChild(cs.root, j, &cs.allocators) = - cs.allocators.node4.allocate(); + cs.allocators.node1.allocate(); if (j % 10 == 0) { bench.run("MaxExclusive " + std::to_string(j), [&]() { bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256)); diff --git a/Internal.h b/Internal.h index c7ce7e9..65a4376 100644 --- a/Internal.h +++ b/Internal.h @@ -21,6 +21,7 @@ #include #define DEBUG_VERBOSE 0 +#define SHOW_MEMORY 0 [[nodiscard]] inline auto operator<=>(const std::span &lhs,