diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 7c5820a..b0def54 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -261,17 +261,20 @@ struct Node256 : Node { // Bound memory usage following the analysis in the ART paper -constexpr int kBytesPerKey = 121; -constexpr int kMinChildrenNode4 = 1; +constexpr int kBytesPerKey = 120; +constexpr int kMinChildrenNode4 = 2; constexpr int kMinChildrenNode16 = 5; constexpr int kMinChildrenNode48 = 17; constexpr int kMinChildrenNode256 = 49; -static_assert(sizeof(Node256) < kMinChildrenNode256 * kBytesPerKey); -static_assert(sizeof(Node48) < kMinChildrenNode48 * kBytesPerKey); -static_assert(sizeof(Node16) < kMinChildrenNode16 * kBytesPerKey); -static_assert(sizeof(Node4) < kMinChildrenNode4 * kBytesPerKey); -static_assert(sizeof(Node0) < kBytesPerKey); +static_assert(sizeof(Node256) + kBytesPerKey <= + kMinChildrenNode256 * kBytesPerKey); +static_assert(sizeof(Node48) + kBytesPerKey <= + kMinChildrenNode48 * kBytesPerKey); +static_assert(sizeof(Node16) + kBytesPerKey <= + kMinChildrenNode16 * kBytesPerKey); +static_assert(sizeof(Node4) + kBytesPerKey <= kMinChildrenNode4 * kBytesPerKey); +static_assert(sizeof(Node0) <= kBytesPerKey); // setOldestVersion will additionally try to maintain this property: // `max(children, 1) * length >= capacity` @@ -736,21 +739,14 @@ Node *nextLogical(Node *node) { return node; } -// Fix larger-than-desired capacities. Does not return nodes to freelists, -// since that wouldn't actually reclaim the memory used for partial key -// capacity. -void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, - ConflictSet::Impl *impl) { - const int maxCapacity = - std::max(self->numChildren, 1) * self->partialKeyLen; - if (self->partialKeyCapacity <= maxCapacity) { - return; - } - +// Invalidates `self`, replacing it with a node of at least capacity. +// Does not return nodes to freelists. +void makeCapacityAtLeast(Node *&self, int capacity, NodeAllocators *allocators, + ConflictSet::Impl *impl) { switch (self->type) { case Type::Node0: { auto *self0 = (Node0 *)self; - auto *newSelf = allocators->node0.allocate(maxCapacity); + auto *newSelf = allocators->node0.allocate(capacity); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen); @@ -760,7 +756,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, } break; case Type::Node4: { auto *self4 = (Node4 *)self; - auto *newSelf = allocators->node4.allocate(maxCapacity); + auto *newSelf = allocators->node4.allocate(capacity); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen); @@ -776,7 +772,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, } break; case Type::Node16: { auto *self16 = (Node16 *)self; - auto *newSelf = allocators->node16.allocate(maxCapacity); + auto *newSelf = allocators->node16.allocate(capacity); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen); @@ -792,7 +788,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, } break; case Type::Node48: { auto *self48 = (Node48 *)self; - auto *newSelf = allocators->node48.allocate(maxCapacity); + auto *newSelf = allocators->node48.allocate(capacity); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen); @@ -812,7 +808,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, } break; case Type::Node256: { auto *self256 = (Node256 *)self; - auto *newSelf = allocators->node256.allocate(maxCapacity); + auto *newSelf = allocators->node256.allocate(capacity); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, kNodeCopySize); memcpy(newSelf->partialKey(), self256->partialKey(), self->partialKeyLen); @@ -827,6 +823,19 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, } } +// Fix larger-than-desired capacities. Does not return nodes to freelists, +// since that wouldn't actually reclaim the memory used for partial key +// capacity. +void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, + ConflictSet::Impl *impl) { + const int maxCapacity = + std::max(self->numChildren, 1) * self->partialKeyLen; + if (self->partialKeyCapacity <= maxCapacity) { + return; + } + makeCapacityAtLeast(self, maxCapacity, allocators, impl); +} + // TODO fuse into erase child so we don't need to repeat branches on type void maybeDownsize(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, Node *&dontInvalidate) { @@ -885,7 +894,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, } } break; case Type::Node16: - if (self->numChildren < kMinChildrenNode16) { + if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) { auto *self16 = (Node16 *)self; auto *newSelf = allocators->node4.allocate(self->partialKeyLen); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, @@ -902,7 +911,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, } break; case Type::Node48: - if (self->numChildren < kMinChildrenNode48) { + if (self->numChildren + int(self->entryPresent) < kMinChildrenNode48) { auto *self48 = (Node48 *)self; auto *newSelf = allocators->node16.allocate(self->partialKeyLen); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, @@ -915,7 +924,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, // Suppress a false positive -Waggressive-loop-optimizations warning // in gcc. `assume` doesn't work for some reason. if (!(i < 16)) { - __builtin_unreachable(); + __builtin_unreachable(); // GCOVR_EXCL_LINE } newSelf->index[i] = c; newSelf->children[i] = self48->children[self48->index[c]]; @@ -929,7 +938,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, } break; case Type::Node256: - if (self->numChildren < kMinChildrenNode256) { + if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) { auto *self256 = (Node256 *)self; auto *newSelf = allocators->node48.allocate(self->partialKeyLen); memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, @@ -2500,7 +2509,7 @@ Iterator firstGeq(Node *n, std::string_view key) { minNumChildren = kMinChildrenNode256; break; } - if (node->numChildren < minNumChildren) { + if (node->numChildren + int(node->entryPresent) < minNumChildren) { fprintf(stderr, "%s has %d children, which is less than the minimum required %d\n", getSearchPathPrintable(node).c_str(), node->numChildren,