Remove custom allocator

To prepare for variable size partial keys
This commit is contained in:
2024-03-08 13:02:33 -08:00
parent 8802d17acd
commit 782abc70d6

View File

@@ -46,65 +46,6 @@ struct Entry {
int64_t rangeVersion;
};
template <class T, size_t kMemoryBound = (1 << 20)>
struct BoundedFreeListAllocator {
static_assert(sizeof(T) >= sizeof(void *));
T *allocate() {
#if SHOW_MEMORY
++liveAllocations;
maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations);
#endif
if (freeListSize == 0) {
assert(freeList == nullptr);
return new (safe_malloc(sizeof(T))) T;
}
assert(freeList != nullptr);
void *buffer = freeList;
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(freeList));
memcpy(&freeList, freeList, sizeof(freeList));
--freeListSize;
VALGRIND_MAKE_MEM_UNDEFINED(buffer, sizeof(T));
return new (buffer) T;
}
void release(T *p) {
#if SHOW_MEMORY
--liveAllocations;
#endif
p->~T();
if (freeListSize == kMaxFreeListSize) {
return free(p);
}
memcpy((void *)p, &freeList, sizeof(freeList));
freeList = p;
++freeListSize;
VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T));
}
~BoundedFreeListAllocator() {
for (void *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(iter));
auto *tmp = iter;
memcpy(&iter, iter, sizeof(void *));
free(tmp);
}
}
#if SHOW_MEMORY
int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); }
#endif
private:
static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T);
int freeListSize = 0;
void *freeList = nullptr;
#if SHOW_MEMORY
int64_t maxLiveAllocations = 0;
int64_t liveAllocations = 0;
#endif
};
struct BitSet {
bool test(int i) const;
void set(int i);
@@ -304,13 +245,9 @@ struct Node256 : Node {
}
};
struct NodeAllocators {
BoundedFreeListAllocator<Node0> node0;
BoundedFreeListAllocator<Node4> node4;
BoundedFreeListAllocator<Node16> node16;
BoundedFreeListAllocator<Node48> node48;
BoundedFreeListAllocator<Node256> node256;
};
template <class NodeT> NodeT *newNode() {
return new (safe_malloc(sizeof(NodeT))) NodeT;
}
int getNodeIndex(Node16 *self, uint8_t index) {
#ifdef HAS_AVX
@@ -518,8 +455,7 @@ void setChildrenParents(Node256 *n) {
// Caller is responsible for assigning a non-null pointer to the returned
// reference if null
Node *&getOrCreateChild(Node *&self, uint8_t index,
NodeAllocators *allocators) {
Node *&getOrCreateChild(Node *&self, uint8_t index) {
// Fast path for if it exists already
if (self->type <= Type::Node16) {
@@ -544,10 +480,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
if (self->type == Type::Node0) {
auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = allocators->node4.allocate();
auto *newSelf = newNode<Node4>();
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize);
allocators->node0.release(self0);
free(self0);
self = newSelf;
goto insert16;
@@ -556,12 +492,12 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
auto *self4 = static_cast<Node4 *>(self);
if (self->numChildren == 4) {
auto *newSelf = allocators->node16.allocate();
auto *newSelf = newNode<Node16>();
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize);
memcpy((void *)newSelf->index, (void *)self4->index,
sizeof(self4->index) + sizeof(self4->children));
allocators->node4.release(self4);
free(self4);
setChildrenParents(newSelf);
self = newSelf;
}
@@ -572,7 +508,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
if (self->numChildren == 16) {
auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = allocators->node48.allocate();
auto *newSelf = newNode<Node48>();
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize);
newSelf->nextFree = 16;
@@ -584,7 +520,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
++i;
}
assert(i == 16);
allocators->node16.release(self16);
free(self16);
setChildrenParents(newSelf);
self = newSelf;
goto insert48;
@@ -612,7 +548,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = allocators->node256.allocate();
auto *newSelf = newNode<Node256>();
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
kNodeCopySize);
newSelf->bitSet = self48->bitSet;
@@ -621,7 +557,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
newSelf->children[i] = self48->children[self48->index[i]];
},
0, 256);
allocators->node48.release(self48);
free(self48);
setChildrenParents(newSelf);
self = newSelf;
goto insert256;
@@ -648,25 +584,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
}
// Precondition - an entry for index must exist in the node
void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
void eraseChild(Node *self, uint8_t index) {
auto *child = getChildExists(self, index);
switch (child->type) {
case Type::Node0:
allocators->node0.release((Node0 *)child);
break;
case Type::Node4:
allocators->node4.release((Node4 *)child);
break;
case Type::Node16:
allocators->node16.release((Node16 *)child);
break;
case Type::Node48:
allocators->node48.release((Node48 *)child);
break;
case Type::Node256:
allocators->node256.release((Node256 *)child);
break;
}
free(child);
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
@@ -698,7 +618,7 @@ void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
--self->numChildren;
if (self->numChildren == 0 && !self->entryPresent &&
self->parent != nullptr) {
eraseChild(self->parent, self->parentsIndex, allocators);
eraseChild(self->parent, self->parentsIndex);
}
}
@@ -1526,8 +1446,7 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
// a postcondition.
template <bool kBegin>
[[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key,
int64_t writeVersion, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
int64_t writeVersion, ConflictSet::Impl *impl) {
for (;;) {
@@ -1545,7 +1464,7 @@ template <bool kBegin>
auto *old = *self;
int64_t oldMaxVersion = maxVersion(old, impl);
*self = allocators->node0.allocate();
*self = newNode<Node4>();
memcpy((char *)*self + sizeof(Node::type),
(char *)old + sizeof(Node::type),
@@ -1554,8 +1473,7 @@ template <bool kBegin>
(*self)->entryPresent = false;
(*self)->numChildren = 0;
getOrCreateChild(*self, old->partialKey[partialKeyIndex], allocators) =
old;
getOrCreateChild(*self, old->partialKey[partialKeyIndex]) = old;
old->parent = *self;
old->parentsIndex = old->partialKey[partialKeyIndex];
maxVersion(old, impl) = oldMaxVersion;
@@ -1597,9 +1515,9 @@ template <bool kBegin>
m = writeVersion;
}
auto &child = getOrCreateChild(*self, key.front(), allocators);
auto &child = getOrCreateChild(*self, key.front());
if (!child) {
child = allocators->node0.allocate();
child = newNode<Node0>();
child->parent = *self;
child->parentsIndex = key.front();
maxVersion(child, impl) =
@@ -1631,8 +1549,8 @@ void destroyTree(Node *root) {
void addPointWrite(Node *&root, int64_t oldestVersion,
std::span<const uint8_t> key, int64_t writeVersion,
NodeAllocators *allocators, ConflictSet::Impl *impl) {
auto *n = insert<true>(&root, key, writeVersion, allocators, impl);
ConflictSet::Impl *impl) {
auto *n = insert<true>(&root, key, writeVersion, impl);
if (!n->entryPresent) {
auto *p = nextLogical(n);
n->entryPresent = true;
@@ -1648,15 +1566,13 @@ void addPointWrite(Node *&root, int64_t oldestVersion,
void addWriteRange(Node *&root, int64_t oldestVersion,
std::span<const uint8_t> begin, std::span<const uint8_t> end,
int64_t writeVersion, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
int64_t writeVersion, ConflictSet::Impl *impl) {
int lcp = longestCommonPrefix(begin.data(), end.data(),
std::min(begin.size(), end.size()));
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
end.back() == 0) {
return addPointWrite(root, oldestVersion, begin, writeVersion, allocators,
impl);
return addPointWrite(root, oldestVersion, begin, writeVersion, impl);
}
auto remaining = begin.subspan(0, lcp);
@@ -1695,8 +1611,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
begin = begin.subspan(consumed, begin.size() - consumed);
end = end.subspan(consumed, end.size() - consumed);
auto *beginNode =
insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
auto *beginNode = insert<true>(useAsRoot, begin, writeVersion, impl);
const bool insertedBegin = !std::exchange(beginNode->entryPresent, true);
@@ -1713,7 +1628,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
assert(writeVersion >= beginNode->entry.pointVersion);
beginNode->entry.pointVersion = writeVersion;
auto *endNode = insert<false>(useAsRoot, end, writeVersion, allocators, impl);
auto *endNode = insert<false>(useAsRoot, end, writeVersion, impl);
const bool insertedEnd = !std::exchange(endNode->entryPresent, true);
@@ -1728,7 +1643,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
if (insertedEnd) {
// beginNode may have been invalidated
beginNode = insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
beginNode = insert<true>(useAsRoot, begin, writeVersion, impl);
assert(beginNode->entryPresent);
}
@@ -1737,7 +1652,7 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
beginNode = nextLogical(beginNode);
old->entryPresent = false;
if (old->numChildren == 0 && old->parent != nullptr) {
eraseChild(old->parent, old->parentsIndex, allocators);
eraseChild(old->parent, old->parentsIndex);
}
}
}
@@ -1831,12 +1746,10 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
auto end = std::span<const uint8_t>(w.end.p, w.end.len);
if (w.end.len > 0) {
keyUpdates += 3;
addWriteRange(root, oldestVersion, begin, end, writeVersion,
&allocators, this);
addWriteRange(root, oldestVersion, begin, end, writeVersion, this);
} else {
keyUpdates += 2;
addPointWrite(root, oldestVersion, begin, writeVersion, &allocators,
this);
addPointWrite(root, oldestVersion, begin, writeVersion, this);
}
}
}
@@ -1869,7 +1782,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
assert(n->entry.rangeVersion <= oldestVersion);
prev->entryPresent = false;
if (prev->numChildren == 0 && prev->parent != nullptr) {
eraseChild(prev->parent, prev->parentsIndex, &allocators);
eraseChild(prev->parent, prev->parentsIndex);
}
}
@@ -1881,7 +1794,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
// Insert ""
root = allocators.node0.allocate();
root = newNode<Node4>();
rootMaxVersion = oldestVersion;
root->entry.pointVersion = oldestVersion;
root->entry.rangeVersion = oldestVersion;
@@ -1889,8 +1802,6 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
}
~Impl() { destroyTree(root); }
NodeAllocators allocators;
Arena removalKeyArena;
std::span<const uint8_t> removalKey;
int64_t keyUpdates = 0;
@@ -1950,23 +1861,6 @@ ConflictSet::~ConflictSet() {
}
}
#if SHOW_MEMORY
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
ConflictSet::Impl *impl;
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
fprintf(stderr, "Max Node0 memory usage: %" PRId64 "\n",
impl->allocators.node0.highWaterMarkBytes());
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
impl->allocators.node4.highWaterMarkBytes());
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
impl->allocators.node16.highWaterMarkBytes());
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
impl->allocators.node48.highWaterMarkBytes());
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
impl->allocators.node256.highWaterMarkBytes());
}
#endif
ConflictSet::ConflictSet(ConflictSet &&other) noexcept
: impl(std::exchange(other.impl, nullptr)) {}
@@ -2242,8 +2136,7 @@ int main(void) {
ankerl::nanobench::Bench bench;
ConflictSet::Impl cs{0};
for (int j = 0; j < 256; ++j) {
getOrCreateChild(cs.root, j, &cs.allocators) =
cs.allocators.node0.allocate();
getOrCreateChild(cs.root, j) = newNode<Node0>();
if (j % 10 == 0) {
bench.run("MaxExclusive " + std::to_string(j), [&]() {
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));