Bring back custom allocator

This commit is contained in:
2024-03-08 14:43:18 -08:00
parent 0c8cb8faa5
commit 3fb8bf7c3b

View File

@@ -244,13 +244,73 @@ struct Node256 : Node {
uint8_t *partialKey() { return (uint8_t *)(this + 1); } uint8_t *partialKey() { return (uint8_t *)(this + 1); }
}; };
template <class NodeT> NodeT *newNode(int partialKeyCapacity) { // Bounds memory usage in free list, but does not account for memory for partial
auto *result = new (safe_malloc(sizeof(NodeT) + partialKeyCapacity)) NodeT; // keys.
#ifndef NDEBUG template <class T, size_t kMemoryBound = (1 << 20)>
result->partialKeyCapacity = partialKeyCapacity; struct BoundedFreeListAllocator {
static_assert(sizeof(T) >= sizeof(void *));
static_assert(std::derived_from<T, Node>);
T *allocate(int partialKeyCapacity) {
#if SHOW_MEMORY
++liveAllocations;
maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations);
#endif #endif
return result; if (freeList != nullptr) {
} T *n = (T *)freeList;
VALGRIND_MAKE_MEM_DEFINED(n, sizeof(T));
if (n->partialKeyLen >= partialKeyCapacity) {
memcpy(&freeList, freeList, sizeof(freeList));
--freeListSize;
VALGRIND_MAKE_MEM_UNDEFINED(n, sizeof(T));
return new (n) T;
}
VALGRIND_MAKE_MEM_NOACCESS(n, sizeof(T));
}
auto *result = new (safe_malloc(sizeof(T) + partialKeyCapacity)) T;
#ifndef NDEBUG
result->partialKeyCapacity = partialKeyCapacity;
#endif
return result;
}
void release(T *p) {
#if SHOW_MEMORY
--liveAllocations;
#endif
p->~T();
if (freeListSize == kMaxFreeListSize) {
return free(p);
}
memcpy((void *)p, &freeList, sizeof(freeList));
freeList = p;
++freeListSize;
VALGRIND_MAKE_MEM_NOACCESS(freeList, sizeof(T));
}
~BoundedFreeListAllocator() {
for (void *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(iter));
auto *tmp = iter;
memcpy(&iter, iter, sizeof(void *));
free(tmp);
}
}
#if SHOW_MEMORY
int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); }
#endif
private:
static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T);
int freeListSize = 0;
void *freeList = nullptr;
#if SHOW_MEMORY
int64_t maxLiveAllocations = 0;
int64_t liveAllocations = 0;
#endif
};
uint8_t *Node::partialKey() { uint8_t *Node::partialKey() {
switch (type) { switch (type) {
@@ -267,6 +327,14 @@ uint8_t *Node::partialKey() {
} }
} }
struct NodeAllocators {
BoundedFreeListAllocator<Node0> node0;
BoundedFreeListAllocator<Node4> node4;
BoundedFreeListAllocator<Node16> node16;
BoundedFreeListAllocator<Node48> node48;
BoundedFreeListAllocator<Node256> node256;
};
int getNodeIndex(Node16 *self, uint8_t index) { int getNodeIndex(Node16 *self, uint8_t index) {
#ifdef HAS_AVX #ifdef HAS_AVX
// Based on https://www.the-paper-trail.org/post/art-paper-notes/ // Based on https://www.the-paper-trail.org/post/art-paper-notes/
@@ -473,7 +541,8 @@ void setChildrenParents(Node256 *n) {
// Caller is responsible for assigning a non-null pointer to the returned // Caller is responsible for assigning a non-null pointer to the returned
// reference if null // reference if null
Node *&getOrCreateChild(Node *&self, uint8_t index) { Node *&getOrCreateChild(Node *&self, uint8_t index,
NodeAllocators *allocators) {
// Fast path for if it exists already // Fast path for if it exists already
if (self->type <= Type::Node16) { if (self->type <= Type::Node16) {
@@ -498,11 +567,11 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
if (self->type == Type::Node0) { if (self->type == Type::Node0) {
auto *self0 = static_cast<Node0 *>(self); auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = newNode<Node4>(self->partialKeyLen); auto *newSelf = allocators->node4.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen);
free(self0); allocators->node0.release(self0);
self = newSelf; self = newSelf;
goto insert16; goto insert16;
@@ -511,7 +580,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
auto *self4 = static_cast<Node4 *>(self); auto *self4 = static_cast<Node4 *>(self);
if (self->numChildren == 4) { if (self->numChildren == 4) {
auto *newSelf = newNode<Node16>(self->partialKeyLen); auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen);
@@ -520,7 +589,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
newSelf->index[i] = self4->index[i]; newSelf->index[i] = self4->index[i];
newSelf->children[i] = self4->children[i]; newSelf->children[i] = self4->children[i];
} }
free(self4); allocators->node4.release(self4);
setChildrenParents(newSelf); setChildrenParents(newSelf);
self = newSelf; self = newSelf;
} }
@@ -531,7 +600,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
if (self->numChildren == 16) { if (self->numChildren == 16) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = newNode<Node48>(self->partialKeyLen); auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
@@ -544,7 +613,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
++i; ++i;
} }
assert(i == 16); assert(i == 16);
free(self16); allocators->node16.release(self16);
setChildrenParents(newSelf); setChildrenParents(newSelf);
self = newSelf; self = newSelf;
goto insert48; goto insert48;
@@ -572,7 +641,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
if (self->numChildren == 48) { if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = newNode<Node256>(self->partialKeyLen); auto *newSelf = allocators->node256.allocate(self->partialKeyLen);
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin, memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen); memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen);
@@ -582,7 +651,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
newSelf->children[i] = self48->children[self48->index[i]]; newSelf->children[i] = self48->children[self48->index[i]];
}, },
0, 256); 0, 256);
free(self48); allocators->node48.release(self48);
setChildrenParents(newSelf); setChildrenParents(newSelf);
self = newSelf; self = newSelf;
goto insert256; goto insert256;
@@ -609,9 +678,25 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
} }
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
void eraseChild(Node *self, uint8_t index) { void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
auto *child = getChildExists(self, index); auto *child = getChildExists(self, index);
free(child); switch (child->type) {
case Type::Node0:
allocators->node0.release((Node0 *)child);
break;
case Type::Node4:
allocators->node4.release((Node4 *)child);
break;
case Type::Node16:
allocators->node16.release((Node16 *)child);
break;
case Type::Node48:
allocators->node48.release((Node48 *)child);
break;
case Type::Node256:
allocators->node256.release((Node256 *)child);
break;
}
if (self->type <= Type::Node16) { if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
@@ -643,7 +728,7 @@ void eraseChild(Node *self, uint8_t index) {
--self->numChildren; --self->numChildren;
if (self->numChildren == 0 && !self->entryPresent && if (self->numChildren == 0 && !self->entryPresent &&
self->parent != nullptr) { self->parent != nullptr) {
eraseChild(self->parent, self->parentsIndex); eraseChild(self->parent, self->parentsIndex, allocators);
} }
} }
@@ -1464,7 +1549,8 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
// a postcondition. // a postcondition.
template <bool kBegin> template <bool kBegin>
[[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key, [[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key,
int64_t writeVersion, ConflictSet::Impl *impl) { int64_t writeVersion, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
for (;;) { for (;;) {
@@ -1477,7 +1563,7 @@ template <bool kBegin>
auto *old = *self; auto *old = *self;
int64_t oldMaxVersion = maxVersion(old, impl); int64_t oldMaxVersion = maxVersion(old, impl);
*self = newNode<Node4>(partialKeyIndex); *self = allocators->node4.allocate(partialKeyIndex);
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin, memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
@@ -1487,7 +1573,8 @@ template <bool kBegin>
memcpy((*self)->partialKey(), old->partialKey(), memcpy((*self)->partialKey(), old->partialKey(),
(*self)->partialKeyLen); (*self)->partialKeyLen);
getOrCreateChild(*self, old->partialKey()[partialKeyIndex]) = old; getOrCreateChild(*self, old->partialKey()[partialKeyIndex],
allocators) = old;
old->parent = *self; old->parent = *self;
old->parentsIndex = old->partialKey()[partialKeyIndex]; old->parentsIndex = old->partialKey()[partialKeyIndex];
maxVersion(old, impl) = oldMaxVersion; maxVersion(old, impl) = oldMaxVersion;
@@ -1501,7 +1588,7 @@ template <bool kBegin>
} else { } else {
// Consider adding a partial key // Consider adding a partial key
if ((*self)->numChildren == 0 && !(*self)->entryPresent) { if ((*self)->numChildren == 0 && !(*self)->entryPresent) {
assert((*self)->partialKeyCapacity == int(key.size())); assert((*self)->partialKeyCapacity >= int(key.size()));
(*self)->partialKeyLen = key.size(); (*self)->partialKeyLen = key.size();
memcpy((*self)->partialKey(), key.data(), (*self)->partialKeyLen); memcpy((*self)->partialKey(), key.data(), (*self)->partialKeyLen);
key = key.subspan((*self)->partialKeyLen, key = key.subspan((*self)->partialKeyLen,
@@ -1525,9 +1612,9 @@ template <bool kBegin>
m = writeVersion; m = writeVersion;
} }
auto &child = getOrCreateChild(*self, key.front()); auto &child = getOrCreateChild(*self, key.front(), allocators);
if (!child) { if (!child) {
child = newNode<Node0>(key.size() - 1); child = allocators->node0.allocate(key.size() - 1);
child->parent = *self; child->parent = *self;
child->parentsIndex = key.front(); child->parentsIndex = key.front();
maxVersion(child, impl) = maxVersion(child, impl) =
@@ -1559,8 +1646,8 @@ void destroyTree(Node *root) {
void addPointWrite(Node *&root, int64_t oldestVersion, void addPointWrite(Node *&root, int64_t oldestVersion,
std::span<const uint8_t> key, int64_t writeVersion, std::span<const uint8_t> key, int64_t writeVersion,
ConflictSet::Impl *impl) { NodeAllocators *allocators, ConflictSet::Impl *impl) {
auto *n = insert<true>(&root, key, writeVersion, impl); auto *n = insert<true>(&root, key, writeVersion, allocators, impl);
if (!n->entryPresent) { if (!n->entryPresent) {
auto *p = nextLogical(n); auto *p = nextLogical(n);
n->entryPresent = true; n->entryPresent = true;
@@ -1576,13 +1663,15 @@ void addPointWrite(Node *&root, int64_t oldestVersion,
void addWriteRange(Node *&root, int64_t oldestVersion, void addWriteRange(Node *&root, int64_t oldestVersion,
std::span<const uint8_t> begin, std::span<const uint8_t> end, std::span<const uint8_t> begin, std::span<const uint8_t> end,
int64_t writeVersion, ConflictSet::Impl *impl) { int64_t writeVersion, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
int lcp = longestCommonPrefix(begin.data(), end.data(), int lcp = longestCommonPrefix(begin.data(), end.data(),
std::min(begin.size(), end.size())); std::min(begin.size(), end.size()));
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 && if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
end.back() == 0) { end.back() == 0) {
return addPointWrite(root, oldestVersion, begin, writeVersion, impl); return addPointWrite(root, oldestVersion, begin, writeVersion, allocators,
impl);
} }
auto remaining = begin.subspan(0, lcp); auto remaining = begin.subspan(0, lcp);
@@ -1621,7 +1710,8 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
begin = begin.subspan(consumed, begin.size() - consumed); begin = begin.subspan(consumed, begin.size() - consumed);
end = end.subspan(consumed, end.size() - consumed); end = end.subspan(consumed, end.size() - consumed);
auto *beginNode = insert<true>(useAsRoot, begin, writeVersion, impl); auto *beginNode =
insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
const bool insertedBegin = !beginNode->entryPresent; const bool insertedBegin = !beginNode->entryPresent;
beginNode->entryPresent = true; beginNode->entryPresent = true;
@@ -1639,7 +1729,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
assert(writeVersion >= beginNode->entry.pointVersion); assert(writeVersion >= beginNode->entry.pointVersion);
beginNode->entry.pointVersion = writeVersion; beginNode->entry.pointVersion = writeVersion;
auto *endNode = insert<false>(useAsRoot, end, writeVersion, impl); auto *endNode = insert<false>(useAsRoot, end, writeVersion, allocators, impl);
const bool insertedEnd = !endNode->entryPresent; const bool insertedEnd = !endNode->entryPresent;
endNode->entryPresent = true; endNode->entryPresent = true;
@@ -1655,7 +1745,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
if (insertedEnd) { if (insertedEnd) {
// beginNode may have been invalidated // beginNode may have been invalidated
beginNode = insert<true>(useAsRoot, begin, writeVersion, impl); beginNode = insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
assert(beginNode->entryPresent); assert(beginNode->entryPresent);
} }
@@ -1664,7 +1754,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
beginNode = nextLogical(beginNode); beginNode = nextLogical(beginNode);
old->entryPresent = false; old->entryPresent = false;
if (old->numChildren == 0 && old->parent != nullptr) { if (old->numChildren == 0 && old->parent != nullptr) {
eraseChild(old->parent, old->parentsIndex); eraseChild(old->parent, old->parentsIndex, allocators);
} }
} }
} }
@@ -1758,10 +1848,12 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
auto end = std::span<const uint8_t>(w.end.p, w.end.len); auto end = std::span<const uint8_t>(w.end.p, w.end.len);
if (w.end.len > 0) { if (w.end.len > 0) {
keyUpdates += 3; keyUpdates += 3;
addWriteRange(root, oldestVersion, begin, end, writeVersion, this); addWriteRange(root, oldestVersion, begin, end, writeVersion,
&allocators, this);
} else { } else {
keyUpdates += 2; keyUpdates += 2;
addPointWrite(root, oldestVersion, begin, writeVersion, this); addPointWrite(root, oldestVersion, begin, writeVersion, &allocators,
this);
} }
} }
} }
@@ -1794,7 +1886,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
assert(n->entry.rangeVersion <= oldestVersion); assert(n->entry.rangeVersion <= oldestVersion);
prev->entryPresent = false; prev->entryPresent = false;
if (prev->numChildren == 0 && prev->parent != nullptr) { if (prev->numChildren == 0 && prev->parent != nullptr) {
eraseChild(prev->parent, prev->parentsIndex); eraseChild(prev->parent, prev->parentsIndex, &allocators);
} }
} }
@@ -1806,7 +1898,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) { explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
// Insert "" // Insert ""
root = newNode<Node4>(0); root = allocators.node0.allocate(0);
rootMaxVersion = oldestVersion; rootMaxVersion = oldestVersion;
root->entry.pointVersion = oldestVersion; root->entry.pointVersion = oldestVersion;
root->entry.rangeVersion = oldestVersion; root->entry.rangeVersion = oldestVersion;
@@ -1814,6 +1906,8 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
} }
~Impl() { destroyTree(root); } ~Impl() { destroyTree(root); }
NodeAllocators allocators;
Arena removalKeyArena; Arena removalKeyArena;
std::span<const uint8_t> removalKey; std::span<const uint8_t> removalKey;
int64_t keyUpdates = 0; int64_t keyUpdates = 0;
@@ -1873,6 +1967,23 @@ ConflictSet::~ConflictSet() {
} }
} }
#if SHOW_MEMORY
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
ConflictSet::Impl *impl;
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n",
impl->allocators.node0.highWaterMarkBytes());
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
impl->allocators.node4.highWaterMarkBytes());
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
impl->allocators.node16.highWaterMarkBytes());
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
impl->allocators.node48.highWaterMarkBytes());
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
impl->allocators.node256.highWaterMarkBytes());
}
#endif
ConflictSet::ConflictSet(ConflictSet &&other) noexcept ConflictSet::ConflictSet(ConflictSet &&other) noexcept
: impl(std::exchange(other.impl, nullptr)) {} : impl(std::exchange(other.impl, nullptr)) {}
@@ -2148,7 +2259,8 @@ int main(void) {
ankerl::nanobench::Bench bench; ankerl::nanobench::Bench bench;
ConflictSet::Impl cs{0}; ConflictSet::Impl cs{0};
for (int j = 0; j < 256; ++j) { for (int j = 0; j < 256; ++j) {
getOrCreateChild(cs.root, j) = newNode<Node0>(0); getOrCreateChild(cs.root, j, &cs.allocators) =
cs.allocators.node0.allocate(0);
if (j % 10 == 0) { if (j % 10 == 0) {
bench.run("MaxExclusive " + std::to_string(j), [&]() { bench.run("MaxExclusive " + std::to_string(j), [&]() {
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256)); bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));