7 Commits

Author SHA1 Message Date
3f45535f89 Fix setOldestVersion
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap: Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/40//gcc">weaselab » conflict-set » main #40</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good
We were not accumulating keyUpdates strictly faster than we were
spending them for a monotonically increasing workload.
2024-02-26 13:51:03 -08:00
2b0dbabb5c Add public showMemory symbol (when SHOW_MEMORY=1) 2024-02-26 13:48:20 -08:00
be7f643f14 Add Node1 and SHOW_MEMORY 2024-02-26 12:25:06 -08:00
6a08bdd40e Add bitSet asserts for Node256 paths 2024-02-26 10:58:16 -08:00
333ac74d91 Use forEachInRange for overfill for Node48 2024-02-26 10:55:50 -08:00
17ac9b38fb Use forEachInRange in setChildrenParents 2024-02-26 10:37:28 -08:00
172dd40648 Check for full words on boundaries 2024-02-26 10:36:45 -08:00
4 changed files with 128 additions and 39 deletions

View File

@@ -15,4 +15,12 @@ repos:
description: disallow checking in DEBUG_VERBOSE=1 description: disallow checking in DEBUG_VERBOSE=1
entry: '^#define DEBUG_VERBOSE 1$' entry: '^#define DEBUG_VERBOSE 1$'
language: pygrep language: pygrep
types: [c++]
- repo: local
hooks:
- id: debug verbose check
name: disallow checking in SHOW_MEMORY=1
description: disallow checking in SHOW_MEMORY=1
entry: '^#define SHOW_MEMORY 1$'
language: pygrep
types: [c++] types: [c++]

View File

@@ -4,6 +4,10 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#if SHOW_MEMORY
void showMemory(const ConflictSet &cs);
#endif
#define ANKERL_NANOBENCH_IMPLEMENT #define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h" #include "third_party/nanobench.h"

View File

@@ -51,6 +51,10 @@ struct BoundedFreeListAllocator {
static_assert(sizeof(T) >= sizeof(void *)); static_assert(sizeof(T) >= sizeof(void *));
T *allocate() { T *allocate() {
#if SHOW_MEMORY
++liveAllocations;
maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations);
#endif
if (freeListSize == 0) { if (freeListSize == 0) {
assert(freeList == nullptr); assert(freeList == nullptr);
return new (safe_malloc(sizeof(T))) T; return new (safe_malloc(sizeof(T))) T;
@@ -65,6 +69,9 @@ struct BoundedFreeListAllocator {
} }
void release(T *p) { void release(T *p) {
#if SHOW_MEMORY
--liveAllocations;
#endif
p->~T(); p->~T();
if (freeListSize == kMaxFreeListSize) { if (freeListSize == kMaxFreeListSize) {
return free(p); return free(p);
@@ -84,10 +91,18 @@ struct BoundedFreeListAllocator {
} }
} }
#if SHOW_MEMORY
int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); }
#endif
private: private:
static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T); static constexpr int kMaxFreeListSize = kMemoryBound / sizeof(T);
int freeListSize = 0; int freeListSize = 0;
void *freeList = nullptr; void *freeList = nullptr;
#if SHOW_MEMORY
int64_t maxLiveAllocations = 0;
int64_t liveAllocations = 0;
#endif
}; };
struct BitSet { struct BitSet {
@@ -116,14 +131,20 @@ struct BitSet {
// Check begin partial word // Check begin partial word
if (begin & 63) { if (begin & 63) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)); uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
while (word) { if (std::popcount(word) + (begin & 63) == 64) {
uint64_t temp = word & -word; while (begin & 63) {
int index = (begin & ~63) + std::countr_zero(word); f(begin++);
f(index); }
word ^= temp; } else {
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
begin &= ~63;
begin += 64;
} }
begin &= ~63;
begin += 64;
} }
// Check inner, full words // Check inner, full words
@@ -147,11 +168,17 @@ struct BitSet {
if (end & 63) { if (end & 63) {
// Check end partial word // Check end partial word
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63)); uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
while (word) { if (std::popcount(word) == (end & 63)) {
uint64_t temp = word & -word; while (begin < end) {
int index = begin + std::countr_zero(word); f(begin++);
f(index); }
word ^= temp; } else {
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
} }
} }
} }
@@ -193,6 +220,7 @@ int BitSet::firstSetGeq(int i) const {
} }
enum class Type : int8_t { enum class Type : int8_t {
Node1,
Node4, Node4,
Node16, Node16,
Node48, Node48,
@@ -221,10 +249,18 @@ struct Child {
Node *child; Node *child;
}; };
struct Node1 : Node {
// Sorted
uint8_t index[16]; // 16 so that we can use the same simd index search
// implementation as Node16
Child children[1];
Node1() { this->type = Type::Node1; }
};
struct Node4 : Node { struct Node4 : Node {
// Sorted // Sorted
uint8_t index[16]; // 16 so that we can use the same simd index search uint8_t index[16]; // 16 so that we can use the same simd index search
// implementation for Node4 as Node16 // implementation as Node16
Child children[4]; Child children[4];
Node4() { this->type = Type::Node4; } Node4() { this->type = Type::Node4; }
}; };
@@ -259,6 +295,7 @@ struct Node256 : Node {
}; };
struct NodeAllocators { struct NodeAllocators {
BoundedFreeListAllocator<Node1> node1;
BoundedFreeListAllocator<Node4> node4; BoundedFreeListAllocator<Node4> node4;
BoundedFreeListAllocator<Node16> node16; BoundedFreeListAllocator<Node16> node16;
BoundedFreeListAllocator<Node48> node48; BoundedFreeListAllocator<Node48> node48;
@@ -334,6 +371,7 @@ Node *&getChildExists(Node *self, uint8_t index) {
return self48->children[self48->index[index]].child; return self48->children[self48->index[index]].child;
} else { } else {
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
assert(self256->bitSet.test(index));
return self256->children[index].child; return self256->children[index].child;
} }
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
@@ -350,6 +388,7 @@ int64_t getChildMaxVersion(Node *self, uint8_t index) {
return self48->children[self48->index[index]].childMaxVersion; return self48->children[self48->index[index]].childMaxVersion;
} else { } else {
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
assert(self256->bitSet.test(index));
return self256->children[index].childMaxVersion; return self256->children[index].childMaxVersion;
} }
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
@@ -445,6 +484,12 @@ int getChildGeq(Node *self, int child) {
return -1; return -1;
} }
void setChildrenParents(Node4 *n) {
for (int i = 0; i < n->numChildren; ++i) {
n->children[i].child->parent = n;
}
}
void setChildrenParents(Node16 *n) { void setChildrenParents(Node16 *n) {
for (int i = 0; i < n->numChildren; ++i) { for (int i = 0; i < n->numChildren; ++i) {
n->children[i].child->parent = n; n->children[i].child->parent = n;
@@ -452,21 +497,13 @@ void setChildrenParents(Node16 *n) {
} }
void setChildrenParents(Node48 *n) { void setChildrenParents(Node48 *n) {
for (int i = 0; i < 256; ++i) { n->bitSet.forEachInRange(
int c = n->index[i]; [&](int i) { n->children[n->index[i]].child->parent = n; }, 0, 256);
if (c != -1) {
n->children[c].child->parent = n;
}
}
} }
void setChildrenParents(Node256 *n) { void setChildrenParents(Node256 *n) {
for (int i = 0; i < 256; ++i) { n->bitSet.forEachInRange([&](int i) { n->children[i].child->parent = n; }, 0,
auto *child = n->children[i].child; 256);
if (child != nullptr) {
child->parent = n;
}
}
} }
// Caller is responsible for assigning a non-null pointer to the returned // Caller is responsible for assigning a non-null pointer to the returned
@@ -494,7 +531,23 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
} }
} }
if (self->type == Type::Node4) { if (self->type == Type::Node1) {
auto *self1 = static_cast<Node1 *>(self);
if (self->numChildren == 1) {
auto *newSelf = allocators->node4.allocate();
memcpy((void *)newSelf, self, sizeof(Node1));
newSelf->type = Type::Node4;
allocators->node1.release(self1);
setChildrenParents(newSelf);
self = newSelf;
} else {
assert(self->numChildren == 0);
}
goto insert16;
} else if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self); auto *self4 = static_cast<Node4 *>(self);
if (self->numChildren == 4) { if (self->numChildren == 4) {
@@ -554,12 +607,11 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
auto *newSelf = allocators->node256.allocate(); auto *newSelf = allocators->node256.allocate();
memcpy((void *)newSelf, self, offsetof(Node, type)); memcpy((void *)newSelf, self, offsetof(Node, type));
newSelf->bitSet = self48->bitSet; newSelf->bitSet = self48->bitSet;
for (int i = 0; i < 256; ++i) { newSelf->bitSet.forEachInRange(
int c = self48->index[i]; [&](int i) {
if (c >= 0) { newSelf->children[i] = self48->children[self48->index[i]];
newSelf->children[i] = self48->children[c]; },
} 0, 256);
}
allocators->node48.release(self48); allocators->node48.release(self48);
setChildrenParents(newSelf); setChildrenParents(newSelf);
self = newSelf; self = newSelf;
@@ -577,6 +629,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
result = nullptr; result = nullptr;
return result; return result;
} else { } else {
assert(self->type == Type::Node256);
insert256: insert256:
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
++self->numChildren; ++self->numChildren;
@@ -589,6 +642,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) { void eraseChild(Node *self, uint8_t index, NodeAllocators *allocators) {
auto *child = getChildExists(self, index); auto *child = getChildExists(self, index);
switch (child->type) { switch (child->type) {
case Type::Node1:
allocators->node1.release((Node1 *)child);
break;
case Type::Node4: case Type::Node4:
allocators->node4.release((Node4 *)child); allocators->node4.release((Node4 *)child);
break; break;
@@ -974,6 +1030,8 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
} }
switch (n->type) { switch (n->type) {
case Type::Node1:
[[fallthrough]];
case Type::Node4: case Type::Node4:
[[fallthrough]]; [[fallthrough]];
case Type::Node16: { case Type::Node16: {
@@ -1475,7 +1533,7 @@ template <bool kBegin>
auto *old = *self; auto *old = *self;
int64_t oldMaxVersion = maxVersion(old, impl); int64_t oldMaxVersion = maxVersion(old, impl);
*self = allocators->node4.allocate(); *self = allocators->node1.allocate();
memcpy((void *)*self, old, offsetof(Node, type)); memcpy((void *)*self, old, offsetof(Node, type));
(*self)->partialKeyLen = partialKeyIndex; (*self)->partialKeyLen = partialKeyIndex;
@@ -1521,7 +1579,7 @@ template <bool kBegin>
auto &child = getOrCreateChild(*self, key.front(), allocators); auto &child = getOrCreateChild(*self, key.front(), allocators);
if (!child) { if (!child) {
child = allocators->node4.allocate(); child = allocators->node1.allocate();
child->parent = *self; child->parent = *self;
child->parentsIndex = key.front(); child->parentsIndex = key.front();
maxVersion(child, impl) = maxVersion(child, impl) =
@@ -1793,11 +1851,11 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len); auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
auto end = std::span<const uint8_t>(w.end.p, w.end.len); auto end = std::span<const uint8_t>(w.end.p, w.end.len);
if (w.end.len > 0) { if (w.end.len > 0) {
keyUpdates += 2; keyUpdates += 3;
addWriteRange(root, oldestVersion, begin, end, w.writeVersion, addWriteRange(root, oldestVersion, begin, end, w.writeVersion,
&allocators, this); &allocators, this);
} else { } else {
keyUpdates += 1; keyUpdates += 2;
addPointWrite(root, oldestVersion, begin, w.writeVersion, &allocators, addPointWrite(root, oldestVersion, begin, w.writeVersion, &allocators,
this); this);
} }
@@ -1812,7 +1870,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
Node *prev = firstGeq(root, removalKey).n; Node *prev = firstGeq(root, removalKey).n;
// There's no way to erase removalKey without introducing a key after it // There's no way to erase removalKey without introducing a key after it
assert(prev != nullptr); assert(prev != nullptr);
while (keyUpdates-- > 0) { for (; keyUpdates > 0; --keyUpdates) {
Node *n = nextLogical(prev); Node *n = nextLogical(prev);
if (n == nullptr) { if (n == nullptr) {
removalKey = {}; removalKey = {};
@@ -1841,7 +1899,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) { explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
// Insert "" // Insert ""
root = allocators.node4.allocate(); root = allocators.node1.allocate();
rootMaxVersion = oldestVersion; rootMaxVersion = oldestVersion;
root->entry.pointVersion = oldestVersion; root->entry.pointVersion = oldestVersion;
root->entry.rangeVersion = oldestVersion; root->entry.rangeVersion = oldestVersion;
@@ -1877,6 +1935,7 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *impl) {
return n48->children[n48->index[index]].childMaxVersion; return n48->children[n48->index[index]].childMaxVersion;
} else { } else {
auto *n256 = static_cast<Node256 *>(n); auto *n256 = static_cast<Node256 *>(n);
assert(n256->bitSet.test(index));
return n256->children[index].childMaxVersion; return n256->children[index].childMaxVersion;
} }
} }
@@ -1908,6 +1967,23 @@ ConflictSet::~ConflictSet() {
} }
} }
#if SHOW_MEMORY
__attribute__((visibility("default"))) void showMemory(const ConflictSet &cs) {
ConflictSet::Impl *impl;
memcpy(&impl, &cs, sizeof(impl)); // NOLINT
fprintf(stderr, "Max Node1 memory usage: %" PRId64 "\n",
impl->allocators.node1.highWaterMarkBytes());
fprintf(stderr, "Max Node4 memory usage: %" PRId64 "\n",
impl->allocators.node4.highWaterMarkBytes());
fprintf(stderr, "Max Node16 memory usage: %" PRId64 "\n",
impl->allocators.node16.highWaterMarkBytes());
fprintf(stderr, "Max Node48 memory usage: %" PRId64 "\n",
impl->allocators.node48.highWaterMarkBytes());
fprintf(stderr, "Max Node256 memory usage: %" PRId64 "\n",
impl->allocators.node256.highWaterMarkBytes());
}
#endif
ConflictSet::ConflictSet(ConflictSet &&other) noexcept ConflictSet::ConflictSet(ConflictSet &&other) noexcept
: impl(std::exchange(other.impl, nullptr)) {} : impl(std::exchange(other.impl, nullptr)) {}
@@ -2176,7 +2252,7 @@ int main(void) {
ConflictSet::Impl cs{0}; ConflictSet::Impl cs{0};
for (int j = 0; j < 256; ++j) { for (int j = 0; j < 256; ++j) {
getOrCreateChild(cs.root, j, &cs.allocators) = getOrCreateChild(cs.root, j, &cs.allocators) =
cs.allocators.node4.allocate(); cs.allocators.node1.allocate();
if (j % 10 == 0) { if (j % 10 == 0) {
bench.run("MaxExclusive " + std::to_string(j), [&]() { bench.run("MaxExclusive " + std::to_string(j), [&]() {
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256)); bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));

View File

@@ -21,6 +21,7 @@
#include <callgrind.h> #include <callgrind.h>
#define DEBUG_VERBOSE 0 #define DEBUG_VERBOSE 0
#define SHOW_MEMORY 0
[[nodiscard]] inline auto [[nodiscard]] inline auto
operator<=>(const std::span<const uint8_t> &lhs, operator<=>(const std::span<const uint8_t> &lhs,