Maintain capacity invariant strictly

This commit is contained in:
2024-11-04 13:43:02 -08:00
parent 891100e649
commit 4eaad39294

View File

@@ -210,7 +210,7 @@ enum Type : int8_t {
Type_Node256, Type_Node256,
}; };
template <class T> struct BoundedFreeListAllocator; template <class T> struct NodeAllocator;
struct TaggedNodePointer { struct TaggedNodePointer {
TaggedNodePointer() = default; TaggedNodePointer() = default;
@@ -297,9 +297,9 @@ struct Node {
} }
private: private:
template <class T> friend struct BoundedFreeListAllocator; template <class T> friend struct NodeAllocator;
// These are publically readable, but should only be written by // These are publically readable, but should only be written by
// BoundedFreeListAllocator // NodeAllocator
Type type; Type type;
int32_t partialKeyCapacity; int32_t partialKeyCapacity;
}; };
@@ -644,7 +644,7 @@ constexpr int kMinNodeSurplus = 104;
constexpr int kBytesPerKey = 112; constexpr int kBytesPerKey = 112;
constexpr int kMinNodeSurplus = 80; constexpr int kMinNodeSurplus = 80;
#endif #endif
// Cound the entry itself as a child // Count the entry itself as a child
constexpr int kMinChildrenNode0 = 1; constexpr int kMinChildrenNode0 = 1;
constexpr int kMinChildrenNode3 = 2; constexpr int kMinChildrenNode3 = 2;
constexpr int kMinChildrenNode16 = 4; constexpr int kMinChildrenNode16 = 4;
@@ -669,48 +669,22 @@ static_assert(kNode3Surplus >= kMinNodeSurplus);
static_assert(kBytesPerKey - sizeof(Node0) >= kMinNodeSurplus); static_assert(kBytesPerKey - sizeof(Node0) >= kMinNodeSurplus);
// setOldestVersion will additionally try to maintain this property: // We'll additionally maintain this property:
// `(children + entryPresent) * length >= capacity` // `(children + entryPresent) * length >= capacity`
// //
// Which should give us the budget to pay for the key bytes. (children + // Which should give us the budget to pay for the key bytes. (children +
// entryPresent) is a lower bound on how many keys these bytes are a prefix of // entryPresent) is a lower bound on how many keys these bytes are a prefix of
constexpr int64_t kFreeListMaxMemory = 1 << 20; // For now it's pretty much just a wrapper around malloc/free with some
// application-specific initialization. Maintaining a free list doesn't work
template <class T> struct BoundedFreeListAllocator { // that well since partial capacities mean the nodes have different sizes. If we
// come up with something better later we can implement it here.
template <class T> struct NodeAllocator {
static_assert(sizeof(T) >= sizeof(void *)); static_assert(sizeof(T) >= sizeof(void *));
static_assert(std::derived_from<T, Node>); static_assert(std::derived_from<T, Node>);
static_assert(std::is_trivial_v<T>); static_assert(std::is_trivial_v<T>);
T *allocate_helper(int partialKeyCapacity) {
if (freeList != nullptr) {
T *n = (T *)freeList;
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(freeList));
memcpy(&freeList, freeList, sizeof(freeList));
VALGRIND_MAKE_MEM_UNDEFINED(n, sizeof(T));
VALGRIND_MAKE_MEM_DEFINED(&n->partialKeyCapacity,
sizeof(n->partialKeyCapacity));
VALGRIND_MAKE_MEM_DEFINED(&n->type, sizeof(n->type));
assert(n->type == T::kType);
VALGRIND_MAKE_MEM_UNDEFINED(n + 1, n->partialKeyCapacity);
freeListBytes -= sizeof(T) + n->partialKeyCapacity;
if (n->partialKeyCapacity >= partialKeyCapacity) {
return n;
} else {
// The intent is to filter out too-small nodes in the freelist
removeNode(n);
safe_free(n, sizeof(T) + n->partialKeyCapacity);
}
}
auto *result = (T *)safe_malloc(sizeof(T) + partialKeyCapacity);
result->type = T::kType;
result->partialKeyCapacity = partialKeyCapacity;
addNode(result);
return result;
}
T *allocate(int partialKeyCapacity) { T *allocate(int partialKeyCapacity) {
T *result = allocate_helper(partialKeyCapacity); T *result = allocate_helper(partialKeyCapacity);
result->endOfRange = false; result->endOfRange = false;
@@ -732,37 +706,27 @@ template <class T> struct BoundedFreeListAllocator {
} }
void release(T *p) { void release(T *p) {
if (freeListBytes >= kFreeListMaxMemory) {
removeNode(p); removeNode(p);
return safe_free(p, sizeof(T) + p->partialKeyCapacity); return safe_free(p, sizeof(T) + p->partialKeyCapacity);
} }
memcpy((void *)p, &freeList, sizeof(freeList));
freeList = p;
freeListBytes += sizeof(T) + p->partialKeyCapacity;
VALGRIND_MAKE_MEM_NOACCESS(freeList, sizeof(T) + p->partialKeyCapacity);
}
BoundedFreeListAllocator() = default; NodeAllocator() = default;
BoundedFreeListAllocator(const BoundedFreeListAllocator &) = delete; NodeAllocator(const NodeAllocator &) = delete;
BoundedFreeListAllocator & NodeAllocator &operator=(const NodeAllocator &) = delete;
operator=(const BoundedFreeListAllocator &) = delete; NodeAllocator(NodeAllocator &&) = delete;
BoundedFreeListAllocator(BoundedFreeListAllocator &&) = delete; NodeAllocator &operator=(NodeAllocator &&) = delete;
BoundedFreeListAllocator &operator=(BoundedFreeListAllocator &&) = delete;
~BoundedFreeListAllocator() { ~NodeAllocator() {}
for (void *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(Node));
auto *tmp = (T *)iter;
memcpy(&iter, iter, sizeof(void *));
removeNode((tmp));
safe_free(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
}
private: private:
int64_t freeListBytes = 0; T *allocate_helper(int partialKeyCapacity) {
void *freeList = nullptr; auto *result = (T *)safe_malloc(sizeof(T) + partialKeyCapacity);
result->type = T::kType;
result->partialKeyCapacity = partialKeyCapacity;
addNode(result);
return result;
}
}; };
uint8_t *Node::partialKey() { uint8_t *Node::partialKey() {
@@ -896,11 +860,11 @@ struct WriteContext {
private: private:
Node *deferredList = nullptr; Node *deferredList = nullptr;
BoundedFreeListAllocator<Node0> node0; NodeAllocator<Node0> node0;
BoundedFreeListAllocator<Node3> node3; NodeAllocator<Node3> node3;
BoundedFreeListAllocator<Node16> node16; NodeAllocator<Node16> node16;
BoundedFreeListAllocator<Node48> node48; NodeAllocator<Node48> node48;
BoundedFreeListAllocator<Node256> node256; NodeAllocator<Node256> node256;
}; };
int getNodeIndex(Node3 *self, uint8_t index) { int getNodeIndex(Node3 *self, uint8_t index) {
@@ -1177,7 +1141,8 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
} }
} }
TaggedNodePointer &getInTree(Node *n, ConflictSet::Impl *); // If impl is nullptr, then n->parent must not be nullptr
TaggedNodePointer &getInTree(Node *n, ConflictSet::Impl *impl);
TaggedNodePointer getChild(Node0 *, uint8_t) { return nullptr; } TaggedNodePointer getChild(Node0 *, uint8_t) { return nullptr; }
TaggedNodePointer getChild(Node3 *self, uint8_t index) { TaggedNodePointer getChild(Node3 *self, uint8_t index) {
@@ -1430,6 +1395,9 @@ TaggedNodePointer getFirstChildExists(Node *self) {
// GCOVR_EXCL_STOP // GCOVR_EXCL_STOP
} }
// self must not be the root
void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext);
void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key, void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
InternalVersionT writeVersion, InternalVersionT writeVersion,
WriteContext *writeContext) { WriteContext *writeContext) {
@@ -1466,9 +1434,8 @@ void consumePartialKeyFull(TaggedNodePointer &self, TrivialSpan &key,
old->partialKeyLen - (partialKeyIndex + 1)); old->partialKeyLen - (partialKeyIndex + 1));
old->partialKeyLen -= partialKeyIndex + 1; old->partialKeyLen -= partialKeyIndex + 1;
// We would consider decreasing capacity here, but we can't invalidate // Maintain memory capacity invariant
// old since it's not on the search path. setOldestVersion will clean it maybeDecreaseCapacity(old, writeContext);
// up.
} }
key = key.subspan(partialKeyIndex, key.size() - partialKeyIndex); key = key.subspan(partialKeyIndex, key.size() - partialKeyIndex);
} }
@@ -1707,76 +1674,47 @@ downLeftSpine:
return node; return node;
} }
// Invalidates `self`, replacing it with a node of at least capacity.
// Does not return nodes to freelists when kUseFreeList is false.
void freeAndMakeCapacityAtLeast(Node *&self, int capacity, void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
WriteContext *writeContext, WriteContext *writeContext) {
ConflictSet::Impl *impl,
const bool kUseFreeList) {
switch (self->getType()) { switch (self->getType()) {
case Type_Node0: { case Type_Node0: {
auto *self0 = (Node0 *)self; auto *self0 = (Node0 *)self;
auto *newSelf = writeContext->allocate<Node0>(capacity); auto *newSelf = writeContext->allocate<Node0>(capacity);
newSelf->copyChildrenAndKeyFrom(*self0); newSelf->copyChildrenAndKeyFrom(*self0);
getInTree(self, impl) = newSelf; getInTree(self, nullptr) = newSelf;
if (kUseFreeList) {
writeContext->deferRelease(self0, newSelf); writeContext->deferRelease(self0, newSelf);
} else {
removeNode(self0);
safe_free(self0, self0->size());
}
self = newSelf; self = newSelf;
} break; } break;
case Type_Node3: { case Type_Node3: {
auto *self3 = (Node3 *)self; auto *self3 = (Node3 *)self;
auto *newSelf = writeContext->allocate<Node3>(capacity); auto *newSelf = writeContext->allocate<Node3>(capacity);
newSelf->copyChildrenAndKeyFrom(*self3); newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf; getInTree(self, nullptr) = newSelf;
if (kUseFreeList) {
writeContext->deferRelease(self3, newSelf); writeContext->deferRelease(self3, newSelf);
} else {
removeNode(self3);
safe_free(self3, self3->size());
}
self = newSelf; self = newSelf;
} break; } break;
case Type_Node16: { case Type_Node16: {
auto *self16 = (Node16 *)self; auto *self16 = (Node16 *)self;
auto *newSelf = writeContext->allocate<Node16>(capacity); auto *newSelf = writeContext->allocate<Node16>(capacity);
newSelf->copyChildrenAndKeyFrom(*self16); newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf; getInTree(self, nullptr) = newSelf;
if (kUseFreeList) {
writeContext->deferRelease(self16, newSelf); writeContext->deferRelease(self16, newSelf);
} else {
removeNode(self16);
safe_free(self16, self16->size());
}
self = newSelf; self = newSelf;
} break; } break;
case Type_Node48: { case Type_Node48: {
auto *self48 = (Node48 *)self; auto *self48 = (Node48 *)self;
auto *newSelf = writeContext->allocate<Node48>(capacity); auto *newSelf = writeContext->allocate<Node48>(capacity);
newSelf->copyChildrenAndKeyFrom(*self48); newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf; getInTree(self, nullptr) = newSelf;
if (kUseFreeList) {
writeContext->deferRelease(self48, newSelf); writeContext->deferRelease(self48, newSelf);
} else {
removeNode(self48);
safe_free(self48, self48->size());
}
self = newSelf; self = newSelf;
} break; } break;
case Type_Node256: { case Type_Node256: {
auto *self256 = (Node256 *)self; auto *self256 = (Node256 *)self;
auto *newSelf = writeContext->allocate<Node256>(capacity); auto *newSelf = writeContext->allocate<Node256>(capacity);
newSelf->copyChildrenAndKeyFrom(*self256); newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf; getInTree(self, nullptr) = newSelf;
if (kUseFreeList) {
writeContext->deferRelease(self256, newSelf); writeContext->deferRelease(self256, newSelf);
} else {
removeNode(self256);
safe_free(self256, self256->size());
}
self = newSelf; self = newSelf;
} break; } break;
default: // GCOVR_EXCL_LINE default: // GCOVR_EXCL_LINE
@@ -1784,11 +1722,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
} }
} }
// Fix larger-than-desired capacities. Does not return nodes to freelists, // Fix larger-than-desired capacities. self must not be the root
// since that wouldn't actually reclaim the memory used for partial key void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext) {
// capacity.
void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext,
ConflictSet::Impl *impl) {
const int maxCapacity = const int maxCapacity =
(self->numChildren + int(self->entryPresent)) * (self->partialKeyLen + 1); (self->numChildren + int(self->entryPresent)) * (self->partialKeyLen + 1);
@@ -1800,7 +1735,7 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext,
if (self->getCapacity() <= maxCapacity) { if (self->getCapacity() <= maxCapacity) {
return; return;
} }
freeAndMakeCapacityAtLeast(self, maxCapacity, writeContext, impl, false); freeAndMakeCapacityAtLeast(self, maxCapacity, writeContext);
} }
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__) #if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
@@ -1870,13 +1805,13 @@ void rezero(Node *n, InternalVersionT z) {
#endif #endif
void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext, void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
ConflictSet::Impl *impl, Node3 *self3) { Node3 *self3) {
assert(!self3->entryPresent); assert(!self3->entryPresent);
Node *child = self3->children[0]; Node *child = self3->children[0];
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen; int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
if (minCapacity > child->getCapacity()) { if (minCapacity > child->getCapacity()) {
freeAndMakeCapacityAtLeast(child, minCapacity, writeContext, impl, true); freeAndMakeCapacityAtLeast(child, minCapacity, writeContext);
} }
// Merge partial key with child // Merge partial key with child
@@ -1921,7 +1856,7 @@ void downsize(Node3 *self, WriteContext *writeContext,
writeContext->deferRelease(self, newSelf); writeContext->deferRelease(self, newSelf);
} else { } else {
assert(self->numChildren == 1 && !self->entryPresent); assert(self->numChildren == 1 && !self->entryPresent);
mergeWithChild(getInTree(self, impl), writeContext, impl, self); mergeWithChild(getInTree(self, impl), writeContext, self);
} }
} }
@@ -2001,6 +1936,10 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
if (needsDownsize(self)) { if (needsDownsize(self)) {
downsize(self, writeContext, impl); downsize(self, writeContext, impl);
} }
while (self->releaseDeferred) {
self = self->forwardTo;
}
maybeDecreaseCapacity(self, writeContext);
if (result != nullptr) { if (result != nullptr) {
while (result->releaseDeferred) { while (result->releaseDeferred) {
result = result->forwardTo; result = result->forwardTo;
@@ -2088,6 +2027,11 @@ Node *erase(Node *self, WriteContext *writeContext, ConflictSet::Impl *impl,
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
} }
while (parent->releaseDeferred) {
parent = parent->forwardTo;
}
maybeDecreaseCapacity(parent, writeContext);
if (result != nullptr) { if (result != nullptr) {
while (result->releaseDeferred) { while (result->releaseDeferred) {
result = result->forwardTo; result = result->forwardTo;
@@ -5271,7 +5215,6 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
assert(n->entry.rangeVersion <= oldestVersion); assert(n->entry.rangeVersion <= oldestVersion);
n = erase(n, &writeContext, this, /*logical*/ false); n = erase(n, &writeContext, this, /*logical*/ false);
} else { } else {
maybeDecreaseCapacity(n, &writeContext, this);
n = nextPhysical(n); n = nextPhysical(n);
} }
} }
@@ -5923,7 +5866,16 @@ checkMaxVersion(Node *root, Node *node, InternalVersionT oldestVersion,
int(node->entryPresent), minNumChildren); int(node->entryPresent), minNumChildren);
success = false; success = false;
} }
// TODO check that the max capacity property eventually holds
const int maxCapacity =
(node->numChildren + int(node->entryPresent)) * (node->partialKeyLen + 1);
if (node->getCapacity() > maxCapacity) {
fprintf(stderr, "%s has d capacity %d, which is more than the allowed %d\n",
getSearchPathPrintable(node).c_str(), node->getCapacity(),
maxCapacity);
success = false;
}
for (auto child = getChildGeq(node, 0); child != nullptr; for (auto child = getChildGeq(node, 0); child != nullptr;
child = getChildGeq(node, child->parentsIndex + 1)) { child = getChildGeq(node, child->parentsIndex + 1)) {
checkMemoryBoundInvariants(child, success); checkMemoryBoundInvariants(child, success);