Bring back custom allocator
This commit is contained in:
178
ConflictSet.cpp
178
ConflictSet.cpp
@@ -244,14 +244,74 @@ struct Node256 : Node {
|
||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||
};
|
||||
|
||||
template <class NodeT> NodeT *newNode(int partialKeyCapacity) {
|
||||
auto *result = new (safe_malloc(sizeof(NodeT) + partialKeyCapacity)) NodeT;
|
||||
// Bounds memory usage in free list, but does not account for memory for partial
|
||||
// keys.
|
||||
template <class T, size_t kMemoryBound = (1 << 20)>
|
||||
struct BoundedFreeListAllocator {
|
||||
static_assert(sizeof(T) >= sizeof(void *));
|
||||
static_assert(std::derived_from<T, Node>);
|
||||
|
||||
T *allocate(int partialKeyCapacity) {
|
||||
#if SHOW_MEMORY
|
||||
++liveAllocations;
|
||||
maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations);
|
||||
#endif
|
||||
if (freeList != nullptr) {
|
||||
T *n = (T *)freeList;
|
||||
VALGRIND_MAKE_MEM_DEFINED(n, sizeof(T));
|
||||
if (n->partialKeyLen >= partialKeyCapacity) {
|
||||
memcpy(&freeList, freeList, sizeof(freeList));
|
||||
--freeListSize;
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(n, sizeof(T));
|
||||
return new (n) T;
|
||||
}
|
||||
VALGRIND_MAKE_MEM_NOACCESS(n, sizeof(T));
|
||||
}
|
||||
|
||||
auto *result = new (safe_malloc(sizeof(T) + partialKeyCapacity)) T;
|
||||
#ifndef NDEBUG
|
||||
result->partialKeyCapacity = partialKeyCapacity;
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
void release(T *p) {
|
||||
#if SHOW_MEMORY
|
||||
--liveAllocations;
|
||||
#endif
|
||||
p->~T();
|
||||
if (freeListSize == kMaxFreeListSize) {
|
||||
return free(p);
|
||||
}
|
||||
memcpy((void *)p, &freeList, sizeof(freeList));
|
||||
freeList = p;
|
||||
++freeListSize;
|
||||
VALGRIND_MAKE_MEM_NOACCESS(freeList, sizeof(T));
|
||||
}
|
||||
|
||||
~BoundedFreeListAllocator() {
|
||||
for (void *iter = freeList; iter != nullptr;) {
|
||||
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(iter));
|
||||
auto *tmp = iter;
|
||||
memcpy(&iter, iter, sizeof(void *));
|
||||
free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
#if SHOW_MEMORY
|
||||
int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); }
|
||||
#endif
|
||||
|
||||
private:
|
||||
static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T);
|
||||
int freeListSize = 0;
|
||||
void *freeList = nullptr;
|
||||
#if SHOW_MEMORY
|
||||
int64_t maxLiveAllocations = 0;
|
||||
int64_t liveAllocations = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
uint8_t *Node::partialKey() {
|
||||
switch (type) {
|
||||
case Type::Node0:
|
||||
@@ -267,6 +327,14 @@ uint8_t *Node::partialKey() {
|
||||
}
|
||||
}
|
||||
|
||||
struct NodeAllocators {
|
||||
BoundedFreeListAllocator<Node0> node0;
|
||||
BoundedFreeListAllocator<Node4> node4;
|
||||
BoundedFreeListAllocator<Node16> node16;
|
||||
BoundedFreeListAllocator<Node48> node48;
|
||||
BoundedFreeListAllocator<Node256> node256;
|
||||
};
|
||||
|
||||
int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
#ifdef HAS_AVX
|
||||
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
||||
@@ -473,7 +541,8 @@ void setChildrenParents(Node256 *n) {
|
||||
|
||||
// Caller is responsible for assigning a non-null pointer to the returned
|
||||
// reference if null
|
||||
Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
NodeAllocators *allocators) {
|
||||
|
||||
// Fast path for if it exists already
|
||||
if (self->type <= Type::Node16) {
|
||||
@@ -498,11 +567,11 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
if (self->type == Type::Node0) {
|
||||
auto *self0 = static_cast<Node0 *>(self);
|
||||
|
||||
auto *newSelf = newNode<Node4>(self->partialKeyLen);
|
||||
auto *newSelf = allocators->node4.allocate(self->partialKeyLen);
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self0->partialKey(), self->partialKeyLen);
|
||||
free(self0);
|
||||
allocators->node0.release(self0);
|
||||
self = newSelf;
|
||||
|
||||
goto insert16;
|
||||
@@ -511,7 +580,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
auto *self4 = static_cast<Node4 *>(self);
|
||||
|
||||
if (self->numChildren == 4) {
|
||||
auto *newSelf = newNode<Node16>(self->partialKeyLen);
|
||||
auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self4->partialKey(), self->partialKeyLen);
|
||||
@@ -520,7 +589,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
newSelf->index[i] = self4->index[i];
|
||||
newSelf->children[i] = self4->children[i];
|
||||
}
|
||||
free(self4);
|
||||
allocators->node4.release(self4);
|
||||
setChildrenParents(newSelf);
|
||||
self = newSelf;
|
||||
}
|
||||
@@ -531,7 +600,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
|
||||
if (self->numChildren == 16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
auto *newSelf = newNode<Node48>(self->partialKeyLen);
|
||||
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
||||
@@ -544,7 +613,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
++i;
|
||||
}
|
||||
assert(i == 16);
|
||||
free(self16);
|
||||
allocators->node16.release(self16);
|
||||
setChildrenParents(newSelf);
|
||||
self = newSelf;
|
||||
goto insert48;
|
||||
@@ -572,7 +641,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
|
||||
if (self->numChildren == 48) {
|
||||
auto *self48 = static_cast<Node48 *>(self);
|
||||
auto *newSelf = newNode<Node256>(self->partialKeyLen);
|
||||
auto *newSelf = allocators->node256.allocate(self->partialKeyLen);
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen);
|
||||
@@ -582,7 +651,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
newSelf->children[i] = self48->children[self48->index[i]];
|
||||
},
|
||||
0, 256);
|
||||
free(self48);
|
||||
allocators->node48.release(self48);
|
||||
setChildrenParents(newSelf);
|
||||
self = newSelf;
|
||||
goto insert256;
|
||||
@@ -609,9 +678,25 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
}
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
void eraseChild(Node *self, uint8_t index) {
|
||||
void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
|
||||
auto *child = getChildExists(self, index);
|
||||
free(child);
|
||||
switch (child->type) {
|
||||
case Type::Node0:
|
||||
allocators->node0.release((Node0 *)child);
|
||||
break;
|
||||
case Type::Node4:
|
||||
allocators->node4.release((Node4 *)child);
|
||||
break;
|
||||
case Type::Node16:
|
||||
allocators->node16.release((Node16 *)child);
|
||||
break;
|
||||
case Type::Node48:
|
||||
allocators->node48.release((Node48 *)child);
|
||||
break;
|
||||
case Type::Node256:
|
||||
allocators->node256.release((Node256 *)child);
|
||||
break;
|
||||
}
|
||||
|
||||
if (self->type <= Type::Node16) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
@@ -643,7 +728,7 @@ void eraseChild(Node *self, uint8_t index) {
|
||||
--self->numChildren;
|
||||
if (self->numChildren == 0 && !self->entryPresent &&
|
||||
self->parent != nullptr) {
|
||||
eraseChild(self->parent, self->parentsIndex);
|
||||
eraseChild(self->parent, self->parentsIndex, allocators);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1464,7 +1549,8 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
// a postcondition.
|
||||
template <bool kBegin>
|
||||
[[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key,
|
||||
int64_t writeVersion, ConflictSet::Impl *impl) {
|
||||
int64_t writeVersion, NodeAllocators *allocators,
|
||||
ConflictSet::Impl *impl) {
|
||||
|
||||
for (;;) {
|
||||
|
||||
@@ -1477,7 +1563,7 @@ template <bool kBegin>
|
||||
auto *old = *self;
|
||||
int64_t oldMaxVersion = maxVersion(old, impl);
|
||||
|
||||
*self = newNode<Node4>(partialKeyIndex);
|
||||
*self = allocators->node4.allocate(partialKeyIndex);
|
||||
|
||||
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
@@ -1487,7 +1573,8 @@ template <bool kBegin>
|
||||
memcpy((*self)->partialKey(), old->partialKey(),
|
||||
(*self)->partialKeyLen);
|
||||
|
||||
getOrCreateChild(*self, old->partialKey()[partialKeyIndex]) = old;
|
||||
getOrCreateChild(*self, old->partialKey()[partialKeyIndex],
|
||||
allocators) = old;
|
||||
old->parent = *self;
|
||||
old->parentsIndex = old->partialKey()[partialKeyIndex];
|
||||
maxVersion(old, impl) = oldMaxVersion;
|
||||
@@ -1501,7 +1588,7 @@ template <bool kBegin>
|
||||
} else {
|
||||
// Consider adding a partial key
|
||||
if ((*self)->numChildren == 0 && !(*self)->entryPresent) {
|
||||
assert((*self)->partialKeyCapacity == int(key.size()));
|
||||
assert((*self)->partialKeyCapacity >= int(key.size()));
|
||||
(*self)->partialKeyLen = key.size();
|
||||
memcpy((*self)->partialKey(), key.data(), (*self)->partialKeyLen);
|
||||
key = key.subspan((*self)->partialKeyLen,
|
||||
@@ -1525,9 +1612,9 @@ template <bool kBegin>
|
||||
m = writeVersion;
|
||||
}
|
||||
|
||||
auto &child = getOrCreateChild(*self, key.front());
|
||||
auto &child = getOrCreateChild(*self, key.front(), allocators);
|
||||
if (!child) {
|
||||
child = newNode<Node0>(key.size() - 1);
|
||||
child = allocators->node0.allocate(key.size() - 1);
|
||||
child->parent = *self;
|
||||
child->parentsIndex = key.front();
|
||||
maxVersion(child, impl) =
|
||||
@@ -1559,8 +1646,8 @@ void destroyTree(Node *root) {
|
||||
|
||||
void addPointWrite(Node *&root, int64_t oldestVersion,
|
||||
std::span<const uint8_t> key, int64_t writeVersion,
|
||||
ConflictSet::Impl *impl) {
|
||||
auto *n = insert<true>(&root, key, writeVersion, impl);
|
||||
NodeAllocators *allocators, ConflictSet::Impl *impl) {
|
||||
auto *n = insert<true>(&root, key, writeVersion, allocators, impl);
|
||||
if (!n->entryPresent) {
|
||||
auto *p = nextLogical(n);
|
||||
n->entryPresent = true;
|
||||
@@ -1576,13 +1663,15 @@ void addPointWrite(Node *&root, int64_t oldestVersion,
|
||||
|
||||
void addWriteRange(Node *&root, int64_t oldestVersion,
|
||||
std::span<const uint8_t> begin, std::span<const uint8_t> end,
|
||||
int64_t writeVersion, ConflictSet::Impl *impl) {
|
||||
int64_t writeVersion, NodeAllocators *allocators,
|
||||
ConflictSet::Impl *impl) {
|
||||
|
||||
int lcp = longestCommonPrefix(begin.data(), end.data(),
|
||||
std::min(begin.size(), end.size()));
|
||||
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
|
||||
end.back() == 0) {
|
||||
return addPointWrite(root, oldestVersion, begin, writeVersion, impl);
|
||||
return addPointWrite(root, oldestVersion, begin, writeVersion, allocators,
|
||||
impl);
|
||||
}
|
||||
auto remaining = begin.subspan(0, lcp);
|
||||
|
||||
@@ -1621,7 +1710,8 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
|
||||
begin = begin.subspan(consumed, begin.size() - consumed);
|
||||
end = end.subspan(consumed, end.size() - consumed);
|
||||
|
||||
auto *beginNode = insert<true>(useAsRoot, begin, writeVersion, impl);
|
||||
auto *beginNode =
|
||||
insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
|
||||
|
||||
const bool insertedBegin = !beginNode->entryPresent;
|
||||
beginNode->entryPresent = true;
|
||||
@@ -1639,7 +1729,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
|
||||
assert(writeVersion >= beginNode->entry.pointVersion);
|
||||
beginNode->entry.pointVersion = writeVersion;
|
||||
|
||||
auto *endNode = insert<false>(useAsRoot, end, writeVersion, impl);
|
||||
auto *endNode = insert<false>(useAsRoot, end, writeVersion, allocators, impl);
|
||||
|
||||
const bool insertedEnd = !endNode->entryPresent;
|
||||
endNode->entryPresent = true;
|
||||
@@ -1655,7 +1745,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
|
||||
|
||||
if (insertedEnd) {
|
||||
// beginNode may have been invalidated
|
||||
beginNode = insert<true>(useAsRoot, begin, writeVersion, impl);
|
||||
beginNode = insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
|
||||
assert(beginNode->entryPresent);
|
||||
}
|
||||
|
||||
@@ -1664,7 +1754,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
|
||||
beginNode = nextLogical(beginNode);
|
||||
old->entryPresent = false;
|
||||
if (old->numChildren == 0 && old->parent != nullptr) {
|
||||
eraseChild(old->parent, old->parentsIndex);
|
||||
eraseChild(old->parent, old->parentsIndex, allocators);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1758,10 +1848,12 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
auto end = std::span<const uint8_t>(w.end.p, w.end.len);
|
||||
if (w.end.len > 0) {
|
||||
keyUpdates += 3;
|
||||
addWriteRange(root, oldestVersion, begin, end, writeVersion, this);
|
||||
addWriteRange(root, oldestVersion, begin, end, writeVersion,
|
||||
&allocators, this);
|
||||
} else {
|
||||
keyUpdates += 2;
|
||||
addPointWrite(root, oldestVersion, begin, writeVersion, this);
|
||||
addPointWrite(root, oldestVersion, begin, writeVersion, &allocators,
|
||||
this);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1794,7 +1886,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
assert(n->entry.rangeVersion <= oldestVersion);
|
||||
prev->entryPresent = false;
|
||||
if (prev->numChildren == 0 && prev->parent != nullptr) {
|
||||
eraseChild(prev->parent, prev->parentsIndex);
|
||||
eraseChild(prev->parent, prev->parentsIndex, &allocators);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1806,7 +1898,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
|
||||
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
|
||||
// Insert ""
|
||||
root = newNode<Node4>(0);
|
||||
root = allocators.node0.allocate(0);
|
||||
rootMaxVersion = oldestVersion;
|
||||
root->entry.pointVersion = oldestVersion;
|
||||
root->entry.rangeVersion = oldestVersion;
|
||||
@@ -1814,6 +1906,8 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
}
|
||||
~Impl() { destroyTree(root); }
|
||||
|
||||
NodeAllocators allocators;
|
||||
|
||||
Arena removalKeyArena;
|
||||
std::span<const uint8_t> removalKey;
|
||||
int64_t keyUpdates = 0;
|
||||
@@ -1873,6 +1967,23 @@ ConflictSet::~ConflictSet() {
|
||||
}
|
||||
}
|
||||
|
||||
#if SHOW_MEMORY
|
||||
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
|
||||
ConflictSet::Impl *impl;
|
||||
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
|
||||
fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n",
|
||||
impl->allocators.node0.highWaterMarkBytes());
|
||||
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
|
||||
impl->allocators.node4.highWaterMarkBytes());
|
||||
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
|
||||
impl->allocators.node16.highWaterMarkBytes());
|
||||
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
|
||||
impl->allocators.node48.highWaterMarkBytes());
|
||||
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
|
||||
impl->allocators.node256.highWaterMarkBytes());
|
||||
}
|
||||
#endif
|
||||
|
||||
ConflictSet::ConflictSet(ConflictSet &&other) noexcept
|
||||
: impl(std::exchange(other.impl, nullptr)) {}
|
||||
|
||||
@@ -2148,7 +2259,8 @@ int main(void) {
|
||||
ankerl::nanobench::Bench bench;
|
||||
ConflictSet::Impl cs{0};
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
getOrCreateChild(cs.root, j) = newNode<Node0>(0);
|
||||
getOrCreateChild(cs.root, j, &cs.allocators) =
|
||||
cs.allocators.node0.allocate(0);
|
||||
if (j % 10 == 0) {
|
||||
bench.run("MaxExclusive " + std::to_string(j), [&]() {
|
||||
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
|
||||
|
Reference in New Issue
Block a user