diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 3989160..8cfd87d 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -220,7 +220,7 @@ int BitSet::firstSetGeq(int i) const { } enum class Type : int8_t { - Node1, + Node0, Node4, Node16, Node48, @@ -249,6 +249,9 @@ struct Node { /* end section that's copied to the next node */ }; +constexpr int kNodeCopyBegin = offsetof(Node, entryPresent); +constexpr int kNodeCopySize = sizeof(Node) - kNodeCopyBegin; + static_assert(offsetof(Node, entry) == offsetof(Node, partialKey) + kPartialKeyMaxLenEntryPresent); static_assert(std::is_trivial_v); @@ -258,12 +261,11 @@ struct Child { Node *child; }; -struct Node1 : Node { +struct Node0 : Node { // Sorted uint8_t index[16]; // 16 so that we can use the same simd index search // implementation as Node16 - Child children[1]; - Node1() { this->type = Type::Node1; } + Node0() { this->type = Type::Node0; } }; struct Node4 : Node { @@ -304,7 +306,7 @@ struct Node256 : Node { }; struct NodeAllocators { - BoundedFreeListAllocator node1; + BoundedFreeListAllocator node0; BoundedFreeListAllocator node4; BoundedFreeListAllocator node16; BoundedFreeListAllocator node48; @@ -540,19 +542,14 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, } } - if (self->type == Type::Node1) { - auto *self1 = static_cast(self); + if (self->type == Type::Node0) { + auto *self0 = static_cast(self); - if (self->numChildren == 1) { - auto *newSelf = allocators->node4.allocate(); - memcpy((void *)newSelf, self, sizeof(Node1)); - newSelf->type = Type::Node4; - allocators->node1.release(self1); - setChildrenParents(newSelf); - self = newSelf; - } else { - assert(self->numChildren == 0); - } + auto *newSelf = allocators->node4.allocate(); + memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, + kNodeCopySize); + allocators->node0.release(self0); + self = newSelf; goto insert16; @@ -561,8 +558,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->numChildren == 4) { auto *newSelf = allocators->node16.allocate(); - memcpy((void *)newSelf, self, sizeof(Node4)); - newSelf->type = Type::Node16; + memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, + kNodeCopySize); + memcpy((void *)newSelf->index, (void *)self4->index, + sizeof(self4->index) + sizeof(self4->children)); allocators->node4.release(self4); setChildrenParents(newSelf); self = newSelf; @@ -575,9 +574,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->numChildren == 16) { auto *self16 = static_cast(self); auto *newSelf = allocators->node48.allocate(); - memcpy((char *)newSelf + sizeof(Node::type), - (char *)self + sizeof(Node::type), - sizeof(Node) - sizeof(Node::type)); + memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, + kNodeCopySize); newSelf->nextFree = 16; int i = 0; for (auto x : self16->index) { @@ -616,9 +614,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->numChildren == 48) { auto *self48 = static_cast(self); auto *newSelf = allocators->node256.allocate(); - memcpy((char *)newSelf + sizeof(Node::type), - (char *)self + sizeof(Node::type), - sizeof(Node) - sizeof(Node::type)); + memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, + kNodeCopySize); newSelf->bitSet = self48->bitSet; newSelf->bitSet.forEachInRange( [&](int i) { @@ -655,8 +652,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { auto *child = getChildExists(self, index); switch (child->type) { - case Type::Node1: - allocators->node1.release((Node1 *)child); + case Type::Node0: + allocators->node0.release((Node0 *)child); break; case Type::Node4: allocators->node4.release((Node4 *)child); @@ -1045,7 +1042,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) { } } switch (n->type) { - case Type::Node1: + case Type::Node0: [[fallthrough]]; case Type::Node4: [[fallthrough]]; @@ -1553,7 +1550,7 @@ template auto *old = *self; int64_t oldMaxVersion = maxVersion(old, impl); - *self = allocators->node1.allocate(); + *self = allocators->node0.allocate(); memcpy((char *)*self + sizeof(Node::type), (char *)old + sizeof(Node::type), @@ -1607,7 +1604,7 @@ template auto &child = getOrCreateChild(*self, key.front(), allocators); if (!child) { - child = allocators->node1.allocate(); + child = allocators->node0.allocate(); child->parent = *self; child->parentsIndex = key.front(); maxVersion(child, impl) = @@ -1889,7 +1886,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) { // Insert "" - root = allocators.node1.allocate(); + root = allocators.node0.allocate(); rootMaxVersion = oldestVersion; root->entry.pointVersion = oldestVersion; root->entry.rangeVersion = oldestVersion; @@ -1962,8 +1959,8 @@ ConflictSet::~ConflictSet() { __attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) { ConflictSet::Impl *impl; memcpy(&impl, &cs, sizeof(impl)); // NOLINT - fprintf(stderr, "Max Node1 memory usage: %" PRId64 "\n", - impl->allocators.node1.highWaterMarkBytes()); + fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n", + impl->allocators.node0.highWaterMarkBytes()); fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n", impl->allocators.node4.highWaterMarkBytes()); fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n", @@ -2251,7 +2248,7 @@ int main(void) { ConflictSet::Impl cs{0}; for (int j = 0; j < 256; ++j) { getOrCreateChild(cs.root, j, &cs.allocators) = - cs.allocators.node1.allocate(); + cs.allocators.node0.allocate(); if (j % 10 == 0) { bench.run("MaxExclusive " + std::to_string(j), [&]() { bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));