|
|
|
@@ -51,6 +51,10 @@ struct BoundedFreeListAllocator {
|
|
|
|
|
static_assert(sizeof(T) >= sizeof(void *));
|
|
|
|
|
|
|
|
|
|
T *allocate() {
|
|
|
|
|
#if SHOW_MEMORY
|
|
|
|
|
++liveAllocations;
|
|
|
|
|
maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations);
|
|
|
|
|
#endif
|
|
|
|
|
if (freeListSize == 0) {
|
|
|
|
|
assert(freeList == nullptr);
|
|
|
|
|
return new (safe_malloc(sizeof(T))) T;
|
|
|
|
@@ -65,6 +69,9 @@ struct BoundedFreeListAllocator {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void release(T *p) {
|
|
|
|
|
#if SHOW_MEMORY
|
|
|
|
|
--liveAllocations;
|
|
|
|
|
#endif
|
|
|
|
|
p->~T();
|
|
|
|
|
if (freeListSize == kMaxFreeListSize) {
|
|
|
|
|
return free(p);
|
|
|
|
@@ -84,10 +91,18 @@ struct BoundedFreeListAllocator {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if SHOW_MEMORY
|
|
|
|
|
int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); }
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T);
|
|
|
|
|
int freeListSize = 0;
|
|
|
|
|
void *freeList = nullptr;
|
|
|
|
|
#if SHOW_MEMORY
|
|
|
|
|
int64_t maxLiveAllocations = 0;
|
|
|
|
|
int64_t liveAllocations = 0;
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct BitSet {
|
|
|
|
@@ -116,6 +131,11 @@ struct BitSet {
|
|
|
|
|
// Check begin partial word
|
|
|
|
|
if (begin & 63) {
|
|
|
|
|
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
|
|
|
|
|
if (std::popcount(word) + (begin & 63) == 64) {
|
|
|
|
|
while (begin & 63) {
|
|
|
|
|
f(begin++);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
while (word) {
|
|
|
|
|
uint64_t temp = word & -word;
|
|
|
|
|
int index = (begin & ~63) + std::countr_zero(word);
|
|
|
|
@@ -125,6 +145,7 @@ struct BitSet {
|
|
|
|
|
begin &= ~63;
|
|
|
|
|
begin += 64;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check inner, full words
|
|
|
|
|
while (begin != (end & ~63)) {
|
|
|
|
@@ -147,6 +168,11 @@ struct BitSet {
|
|
|
|
|
if (end & 63) {
|
|
|
|
|
// Check end partial word
|
|
|
|
|
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
|
|
|
|
|
if (std::popcount(word) == (end & 63)) {
|
|
|
|
|
while (begin < end) {
|
|
|
|
|
f(begin++);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
while (word) {
|
|
|
|
|
uint64_t temp = word & -word;
|
|
|
|
|
int index = begin + std::countr_zero(word);
|
|
|
|
@@ -155,6 +181,7 @@ struct BitSet {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
uint64_t words[4] = {};
|
|
|
|
@@ -193,6 +220,7 @@ int BitSet::firstSetGeq(int i) const {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum class Type : int8_t {
|
|
|
|
|
Node1,
|
|
|
|
|
Node4,
|
|
|
|
|
Node16,
|
|
|
|
|
Node48,
|
|
|
|
@@ -221,10 +249,18 @@ struct Child {
|
|
|
|
|
Node *child;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct Node1 : Node {
|
|
|
|
|
// Sorted
|
|
|
|
|
uint8_t index[16]; // 16 so that we can use the same simd index search
|
|
|
|
|
// implementation as Node16
|
|
|
|
|
Child children[1];
|
|
|
|
|
Node1() { this->type = Type::Node1; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct Node4 : Node {
|
|
|
|
|
// Sorted
|
|
|
|
|
uint8_t index[16]; // 16 so that we can use the same simd index search
|
|
|
|
|
// implementation for Node4 as Node16
|
|
|
|
|
// implementation as Node16
|
|
|
|
|
Child children[4];
|
|
|
|
|
Node4() { this->type = Type::Node4; }
|
|
|
|
|
};
|
|
|
|
@@ -259,6 +295,7 @@ struct Node256 : Node {
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct NodeAllocators {
|
|
|
|
|
BoundedFreeListAllocator<Node1> node1;
|
|
|
|
|
BoundedFreeListAllocator<Node4> node4;
|
|
|
|
|
BoundedFreeListAllocator<Node16> node16;
|
|
|
|
|
BoundedFreeListAllocator<Node48> node48;
|
|
|
|
@@ -334,6 +371,7 @@ Node *&getChildExists(Node *self, uint8_t index) {
|
|
|
|
|
return self48->children[self48->index[index]].child;
|
|
|
|
|
} else {
|
|
|
|
|
auto *self256 = static_cast<Node256 *>(self);
|
|
|
|
|
assert(self256->bitSet.test(index));
|
|
|
|
|
return self256->children[index].child;
|
|
|
|
|
}
|
|
|
|
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
|
|
@@ -350,6 +388,7 @@ int64_t getChildMaxVersion(Node *self, uint8_t index) {
|
|
|
|
|
return self48->children[self48->index[index]].childMaxVersion;
|
|
|
|
|
} else {
|
|
|
|
|
auto *self256 = static_cast<Node256 *>(self);
|
|
|
|
|
assert(self256->bitSet.test(index));
|
|
|
|
|
return self256->children[index].childMaxVersion;
|
|
|
|
|
}
|
|
|
|
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
|
|
@@ -445,6 +484,12 @@ int getChildGeq(Node *self, int child) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void setChildrenParents(Node4 *n) {
|
|
|
|
|
for (int i = 0; i < n->numChildren; ++i) {
|
|
|
|
|
n->children[i].child->parent = n;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void setChildrenParents(Node16 *n) {
|
|
|
|
|
for (int i = 0; i < n->numChildren; ++i) {
|
|
|
|
|
n->children[i].child->parent = n;
|
|
|
|
@@ -452,21 +497,13 @@ void setChildrenParents(Node16 *n) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void setChildrenParents(Node48 *n) {
|
|
|
|
|
for (int i = 0; i < 256; ++i) {
|
|
|
|
|
int c = n->index[i];
|
|
|
|
|
if (c != -1) {
|
|
|
|
|
n->children[c].child->parent = n;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
n->bitSet.forEachInRange(
|
|
|
|
|
[&](int i) { n->children[n->index[i]].child->parent = n; }, 0, 256);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void setChildrenParents(Node256 *n) {
|
|
|
|
|
for (int i = 0; i < 256; ++i) {
|
|
|
|
|
auto *child = n->children[i].child;
|
|
|
|
|
if (child != nullptr) {
|
|
|
|
|
child->parent = n;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
n->bitSet.forEachInRange([&](int i) { n->children[i].child->parent = n; }, 0,
|
|
|
|
|
256);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Caller is responsible for assigning a non-null pointer to the returned
|
|
|
|
@@ -494,7 +531,23 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (self->type == Type::Node4) {
|
|
|
|
|
if (self->type == Type::Node1) {
|
|
|
|
|
auto *self1 = static_cast<Node1 *>(self);
|
|
|
|
|
|
|
|
|
|
if (self->numChildren == 1) {
|
|
|
|
|
auto *newSelf = allocators->node4.allocate();
|
|
|
|
|
memcpy((void *)newSelf, self, sizeof(Node1));
|
|
|
|
|
newSelf->type = Type::Node4;
|
|
|
|
|
allocators->node1.release(self1);
|
|
|
|
|
setChildrenParents(newSelf);
|
|
|
|
|
self = newSelf;
|
|
|
|
|
} else {
|
|
|
|
|
assert(self->numChildren == 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
goto insert16;
|
|
|
|
|
|
|
|
|
|
} else if (self->type == Type::Node4) {
|
|
|
|
|
auto *self4 = static_cast<Node4 *>(self);
|
|
|
|
|
|
|
|
|
|
if (self->numChildren == 4) {
|
|
|
|
@@ -554,12 +607,11 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|
|
|
|
auto *newSelf = allocators->node256.allocate();
|
|
|
|
|
memcpy((void *)newSelf, self, offsetof(Node, type));
|
|
|
|
|
newSelf->bitSet = self48->bitSet;
|
|
|
|
|
for (int i = 0; i < 256; ++i) {
|
|
|
|
|
int c = self48->index[i];
|
|
|
|
|
if (c >= 0) {
|
|
|
|
|
newSelf->children[i] = self48->children[c];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
newSelf->bitSet.forEachInRange(
|
|
|
|
|
[&](int i) {
|
|
|
|
|
newSelf->children[i] = self48->children[self48->index[i]];
|
|
|
|
|
},
|
|
|
|
|
0, 256);
|
|
|
|
|
allocators->node48.release(self48);
|
|
|
|
|
setChildrenParents(newSelf);
|
|
|
|
|
self = newSelf;
|
|
|
|
@@ -577,6 +629,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|
|
|
|
result = nullptr;
|
|
|
|
|
return result;
|
|
|
|
|
} else {
|
|
|
|
|
assert(self->type == Type::Node256);
|
|
|
|
|
insert256:
|
|
|
|
|
auto *self256 = static_cast<Node256 *>(self);
|
|
|
|
|
++self->numChildren;
|
|
|
|
@@ -589,6 +642,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|
|
|
|
void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
|
|
|
|
|
auto *child = getChildExists(self, index);
|
|
|
|
|
switch (child->type) {
|
|
|
|
|
case Type::Node1:
|
|
|
|
|
allocators->node1.release((Node1 *)child);
|
|
|
|
|
break;
|
|
|
|
|
case Type::Node4:
|
|
|
|
|
allocators->node4.release((Node4 *)child);
|
|
|
|
|
break;
|
|
|
|
@@ -974,6 +1030,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
switch (n->type) {
|
|
|
|
|
case Type::Node1:
|
|
|
|
|
[[fallthrough]];
|
|
|
|
|
case Type::Node4:
|
|
|
|
|
[[fallthrough]];
|
|
|
|
|
case Type::Node16: {
|
|
|
|
@@ -1475,7 +1533,7 @@ template <bool kBegin>
|
|
|
|
|
auto *old = *self;
|
|
|
|
|
int64_t oldMaxVersion = maxVersion(old, impl);
|
|
|
|
|
|
|
|
|
|
*self = allocators->node4.allocate();
|
|
|
|
|
*self = allocators->node1.allocate();
|
|
|
|
|
|
|
|
|
|
memcpy((void *)*self, old, offsetof(Node, type));
|
|
|
|
|
(*self)->partialKeyLen = partialKeyIndex;
|
|
|
|
@@ -1521,7 +1579,7 @@ template <bool kBegin>
|
|
|
|
|
|
|
|
|
|
auto &child = getOrCreateChild(*self, key.front(), allocators);
|
|
|
|
|
if (!child) {
|
|
|
|
|
child = allocators->node4.allocate();
|
|
|
|
|
child = allocators->node1.allocate();
|
|
|
|
|
child->parent = *self;
|
|
|
|
|
child->parentsIndex = key.front();
|
|
|
|
|
maxVersion(child, impl) =
|
|
|
|
@@ -1793,11 +1851,11 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|
|
|
|
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
|
|
|
|
|
auto end = std::span<const uint8_t>(w.end.p, w.end.len);
|
|
|
|
|
if (w.end.len > 0) {
|
|
|
|
|
keyUpdates += 2;
|
|
|
|
|
keyUpdates += 3;
|
|
|
|
|
addWriteRange(root, oldestVersion, begin, end, w.writeVersion,
|
|
|
|
|
&allocators, this);
|
|
|
|
|
} else {
|
|
|
|
|
keyUpdates += 1;
|
|
|
|
|
keyUpdates += 2;
|
|
|
|
|
addPointWrite(root, oldestVersion, begin, w.writeVersion, &allocators,
|
|
|
|
|
this);
|
|
|
|
|
}
|
|
|
|
@@ -1812,7 +1870,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|
|
|
|
Node *prev = firstGeq(root, removalKey).n;
|
|
|
|
|
// There's no way to erase removalKey without introducing a key after it
|
|
|
|
|
assert(prev != nullptr);
|
|
|
|
|
while (keyUpdates-- > 0) {
|
|
|
|
|
for (; keyUpdates > 0; --keyUpdates) {
|
|
|
|
|
Node *n = nextLogical(prev);
|
|
|
|
|
if (n == nullptr) {
|
|
|
|
|
removalKey = {};
|
|
|
|
@@ -1841,7 +1899,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|
|
|
|
|
|
|
|
|
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
|
|
|
|
|
// Insert ""
|
|
|
|
|
root = allocators.node4.allocate();
|
|
|
|
|
root = allocators.node1.allocate();
|
|
|
|
|
rootMaxVersion = oldestVersion;
|
|
|
|
|
root->entry.pointVersion = oldestVersion;
|
|
|
|
|
root->entry.rangeVersion = oldestVersion;
|
|
|
|
@@ -1877,6 +1935,7 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *impl) {
|
|
|
|
|
return n48->children[n48->index[index]].childMaxVersion;
|
|
|
|
|
} else {
|
|
|
|
|
auto *n256 = static_cast<Node256 *>(n);
|
|
|
|
|
assert(n256->bitSet.test(index));
|
|
|
|
|
return n256->children[index].childMaxVersion;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -1908,6 +1967,23 @@ ConflictSet::~ConflictSet() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if SHOW_MEMORY
|
|
|
|
|
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
|
|
|
|
|
ConflictSet::Impl *impl;
|
|
|
|
|
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
|
|
|
|
|
fprintf(stderr, "Max Node1 memory usage: %" PRId64 "\n",
|
|
|
|
|
impl->allocators.node1.highWaterMarkBytes());
|
|
|
|
|
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
|
|
|
|
|
impl->allocators.node4.highWaterMarkBytes());
|
|
|
|
|
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
|
|
|
|
|
impl->allocators.node16.highWaterMarkBytes());
|
|
|
|
|
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
|
|
|
|
|
impl->allocators.node48.highWaterMarkBytes());
|
|
|
|
|
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
|
|
|
|
|
impl->allocators.node256.highWaterMarkBytes());
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
ConflictSet::ConflictSet(ConflictSet &&other) noexcept
|
|
|
|
|
: impl(std::exchange(other.impl, nullptr)) {}
|
|
|
|
|
|
|
|
|
@@ -2176,7 +2252,7 @@ int main(void) {
|
|
|
|
|
ConflictSet::Impl cs{0};
|
|
|
|
|
for (int j = 0; j < 256; ++j) {
|
|
|
|
|
getOrCreateChild(cs.root, j, &cs.allocators) =
|
|
|
|
|
cs.allocators.node4.allocate();
|
|
|
|
|
cs.allocators.node1.allocate();
|
|
|
|
|
if (j % 10 == 0) {
|
|
|
|
|
bench.run("MaxExclusive " + std::to_string(j), [&]() {
|
|
|
|
|
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
|
|
|
|
|