Track initializedness of Node memory more precisely
By not initializing Node members with dummy default values. This has performance/code size benefits, and improves debugging when running under valgrind. Unfortunately this also makes it easy to write code that uses uninitialized memory, so if valgrind doesn't have good coverage then we might let some uninit usages sneak through. We plan to have good coverage for valgrind, so I think it's ok. If writing correct code becomes too tedious then we can go back to initializing Node fields with dummy default values.
This commit is contained in:
165
ConflictSet.cpp
165
ConflictSet.cpp
@@ -152,8 +152,14 @@ struct BitSet {
|
||||
}
|
||||
}
|
||||
|
||||
void init() {
|
||||
for (auto &w : words) {
|
||||
w = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint64_t words[4] = {};
|
||||
uint64_t words[4];
|
||||
};
|
||||
|
||||
bool BitSet::test(int i) const {
|
||||
@@ -196,28 +202,38 @@ enum Type {
|
||||
Type_Node256,
|
||||
};
|
||||
|
||||
template <class T> struct BoundedFreeListAllocator;
|
||||
|
||||
struct Node {
|
||||
|
||||
/* begin section that's copied to the next node */
|
||||
Node *parent = nullptr;
|
||||
Entry entry;
|
||||
int32_t partialKeyLen = 0;
|
||||
int16_t numChildren = 0;
|
||||
bool entryPresent = false;
|
||||
uint8_t parentsIndex = 0;
|
||||
Node *parent;
|
||||
int32_t partialKeyLen;
|
||||
int16_t numChildren;
|
||||
bool entryPresent;
|
||||
uint8_t parentsIndex;
|
||||
/* end section that's copied to the next node */
|
||||
|
||||
Type type;
|
||||
|
||||
// Leaving this uninitialized is intentional and necessary for correctness.
|
||||
// Basically it needs to be preserved when going to the free list and back.
|
||||
int32_t partialKeyCapacity;
|
||||
// These survive the free list round trip unscathed, and are only written by
|
||||
// BoundedFreeListAllocator
|
||||
|
||||
uint8_t *partialKey();
|
||||
|
||||
Type getType() const { return type; }
|
||||
int32_t getCapacity() const { return partialKeyCapacity; }
|
||||
|
||||
private:
|
||||
template <class T> friend struct BoundedFreeListAllocator;
|
||||
// These are publically readable, but should only be written by
|
||||
// BoundedFreeListAllocator
|
||||
Type type;
|
||||
int32_t partialKeyCapacity;
|
||||
};
|
||||
|
||||
constexpr int kNodeCopyBegin = offsetof(Node, parent);
|
||||
constexpr int kNodeCopySize = offsetof(Node, type) - kNodeCopyBegin;
|
||||
constexpr int kNodeCopyBegin = offsetof(Node, entry);
|
||||
constexpr int kNodeCopySize =
|
||||
offsetof(Node, parentsIndex) + sizeof(Node::parentsIndex) - kNodeCopyBegin;
|
||||
|
||||
struct Child {
|
||||
int64_t childMaxVersion;
|
||||
@@ -225,49 +241,41 @@ struct Child {
|
||||
};
|
||||
|
||||
struct Node0 : Node {
|
||||
Node0() { this->type = Type_Node0; }
|
||||
constexpr static auto kType = Type_Node0;
|
||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||
};
|
||||
|
||||
struct Node3 : Node {
|
||||
constexpr static auto kMaxNodes = 3;
|
||||
constexpr static auto kType = Type_Node3;
|
||||
// Sorted
|
||||
uint8_t index[kMaxNodes];
|
||||
Child children[kMaxNodes];
|
||||
Node3() { this->type = Type_Node3; }
|
||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||
};
|
||||
|
||||
struct Node16 : Node {
|
||||
constexpr static auto kType = Type_Node16;
|
||||
constexpr static auto kMaxNodes = 16;
|
||||
// Sorted
|
||||
uint8_t index[kMaxNodes];
|
||||
Child children[kMaxNodes];
|
||||
Node16() { this->type = Type_Node16; }
|
||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||
};
|
||||
|
||||
struct Node48 : Node {
|
||||
constexpr static auto kType = Type_Node48;
|
||||
BitSet bitSet;
|
||||
Child children[48];
|
||||
int8_t nextFree = 0;
|
||||
int8_t nextFree;
|
||||
int8_t index[256];
|
||||
Node48() {
|
||||
memset(index, -1, 256);
|
||||
this->type = Type_Node48;
|
||||
}
|
||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||
};
|
||||
|
||||
struct Node256 : Node {
|
||||
constexpr static auto kType = Type_Node256;
|
||||
BitSet bitSet;
|
||||
Child children[256];
|
||||
Node256() {
|
||||
this->type = Type_Node256;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
children[i].child = nullptr;
|
||||
}
|
||||
}
|
||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||
};
|
||||
|
||||
@@ -311,22 +319,27 @@ static_assert(kBytesPerKey - sizeof(Node0) >= kMinNodeSurplus);
|
||||
// Which should give us the budget to pay for the key bytes. (children +
|
||||
// entryPresent) is a lower bound on how many keys these bytes are a prefix of
|
||||
|
||||
template <class T, int64_t kMemoryBound = (1 << 20)>
|
||||
struct BoundedFreeListAllocator {
|
||||
constexpr int64_t kFreeListMaxMemory = 1 << 20;
|
||||
|
||||
template <class T> struct BoundedFreeListAllocator {
|
||||
static_assert(sizeof(T) >= sizeof(void *));
|
||||
static_assert(std::derived_from<T, Node>);
|
||||
static_assert(std::is_trivial_v<T>);
|
||||
|
||||
T *allocate(int partialKeyCapacity) {
|
||||
if (freeList != nullptr) {
|
||||
T *n = (T *)freeList;
|
||||
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(freeList));
|
||||
memcpy(&freeList, freeList, sizeof(freeList));
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(n, sizeof(T));
|
||||
VALGRIND_MAKE_MEM_DEFINED(&n->partialKeyCapacity,
|
||||
sizeof(n->partialKeyCapacity));
|
||||
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(freeList));
|
||||
memcpy(&freeList, freeList, sizeof(freeList));
|
||||
VALGRIND_MAKE_MEM_DEFINED(&n->type, sizeof(n->type));
|
||||
assert(n->type == T::kType);
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(n + 1, n->partialKeyCapacity);
|
||||
freeListBytes -= sizeof(T) + n->partialKeyCapacity;
|
||||
if (n->partialKeyCapacity >= partialKeyCapacity) {
|
||||
return new (n) T;
|
||||
return n;
|
||||
} else {
|
||||
// The intent is to filter out too-small nodes in the freelist
|
||||
removeNode(n);
|
||||
@@ -334,7 +347,8 @@ struct BoundedFreeListAllocator {
|
||||
}
|
||||
}
|
||||
|
||||
auto *result = new (safe_malloc(sizeof(T) + partialKeyCapacity)) T;
|
||||
auto *result = (T *)safe_malloc(sizeof(T) + partialKeyCapacity);
|
||||
result->type = T::kType;
|
||||
result->partialKeyCapacity = partialKeyCapacity;
|
||||
addNode(result);
|
||||
return result;
|
||||
@@ -342,14 +356,14 @@ struct BoundedFreeListAllocator {
|
||||
|
||||
void release(T *p) {
|
||||
static_assert(std::is_trivially_destructible_v<T>);
|
||||
if (freeListBytes >= kMemoryBound) {
|
||||
if (freeListBytes >= kFreeListMaxMemory) {
|
||||
removeNode(p);
|
||||
return safe_free(p);
|
||||
}
|
||||
memcpy((void *)p, &freeList, sizeof(freeList));
|
||||
freeList = p;
|
||||
freeListBytes += sizeof(T) + p->partialKeyCapacity;
|
||||
VALGRIND_MAKE_MEM_NOACCESS(freeList, sizeof(T));
|
||||
VALGRIND_MAKE_MEM_NOACCESS(freeList, sizeof(T) + p->partialKeyCapacity);
|
||||
}
|
||||
|
||||
~BoundedFreeListAllocator() {
|
||||
@@ -455,7 +469,7 @@ template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node *self, uint8_t index) {
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0:
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
@@ -487,7 +501,7 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *);
|
||||
Node *&getInTree(Node *n, ConflictSet::Impl *);
|
||||
|
||||
Node *getChild(Node *self, uint8_t index) {
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0:
|
||||
return nullptr;
|
||||
case Type_Node3: {
|
||||
@@ -576,7 +590,7 @@ int getChildGeq(Node *self, int child) {
|
||||
if (child > 255) {
|
||||
return -1;
|
||||
}
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0:
|
||||
return -1;
|
||||
case Type_Node3:
|
||||
@@ -623,7 +637,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
NodeAllocators *allocators) {
|
||||
|
||||
// Fast path for if it exists already
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0:
|
||||
break;
|
||||
case Type_Node3: {
|
||||
@@ -657,7 +671,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0: {
|
||||
auto *self0 = static_cast<Node0 *>(self);
|
||||
|
||||
@@ -710,9 +724,11 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
if (self->numChildren == Node16::kMaxNodes) {
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
|
||||
memset(newSelf->index, -1, sizeof(newSelf->index));
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
||||
newSelf->bitSet.init();
|
||||
newSelf->nextFree = Node16::kMaxNodes;
|
||||
int i = 0;
|
||||
for (auto x : self16->index) {
|
||||
@@ -730,7 +746,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
|
||||
insert16:
|
||||
auto *self16 = static_cast<Node16 *>(self);
|
||||
assert(self->type == Type_Node16);
|
||||
assert(self->getType() == Type_Node16);
|
||||
|
||||
++self->numChildren;
|
||||
int i = 0;
|
||||
@@ -753,6 +769,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
||||
if (self->numChildren == 48) {
|
||||
auto *self48 = static_cast<Node48 *>(self);
|
||||
auto *newSelf = allocators->node256.allocate(self->partialKeyLen);
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
newSelf->children[i].child = nullptr;
|
||||
}
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen);
|
||||
@@ -819,7 +838,7 @@ template <bool kUseFreeList>
|
||||
void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
|
||||
NodeAllocators *allocators,
|
||||
ConflictSet::Impl *impl) {
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0: {
|
||||
auto *self0 = (Node0 *)self;
|
||||
auto *newSelf = allocators->node0.allocate(capacity);
|
||||
@@ -884,6 +903,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen);
|
||||
newSelf->bitSet = self48->bitSet;
|
||||
// TODO check codegen here?
|
||||
newSelf->nextFree = 0;
|
||||
newSelf->bitSet.forEachInRange(
|
||||
[&](int c) {
|
||||
int index = newSelf->nextFree;
|
||||
@@ -933,7 +954,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators,
|
||||
ConflictSet::Impl *impl) {
|
||||
const int maxCapacity =
|
||||
(self->numChildren + int(self->entryPresent)) * self->partialKeyLen;
|
||||
if (self->partialKeyCapacity <= maxCapacity) {
|
||||
if (self->getCapacity() <= maxCapacity) {
|
||||
return;
|
||||
}
|
||||
freeAndMakeCapacityAtLeast</*kUseFreeList*/ false>(self, maxCapacity,
|
||||
@@ -948,7 +969,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
||||
fprintf(stderr, "maybeDownsize: %s\n", getSearchPathPrintable(self).c_str());
|
||||
#endif
|
||||
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0:
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
@@ -966,7 +987,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
||||
auto *child = self3->children[0].child;
|
||||
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
|
||||
|
||||
if (minCapacity > child->partialKeyCapacity) {
|
||||
if (minCapacity > child->getCapacity()) {
|
||||
const bool update = child == dontInvalidate;
|
||||
freeAndMakeCapacityAtLeast</*kUseFreeList*/ true>(child, minCapacity,
|
||||
allocators, impl);
|
||||
@@ -1011,6 +1032,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self16->partialKey(), self->partialKeyLen);
|
||||
// TODO replace with memcpy?
|
||||
static_assert(Node3::kMaxNodes == kMinChildrenNode16 - 1);
|
||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||
newSelf->index[i] = self16->index[i];
|
||||
newSelf->children[i] = self16->children[i];
|
||||
@@ -1028,6 +1050,7 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self48->partialKey(), self->partialKeyLen);
|
||||
|
||||
static_assert(Node16::kMaxNodes == kMinChildrenNode48 - 1);
|
||||
int i = 0;
|
||||
self48->bitSet.forEachInRange(
|
||||
[&](int c) {
|
||||
@@ -1051,10 +1074,12 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
|
||||
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) {
|
||||
auto *self256 = (Node256 *)self;
|
||||
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
|
||||
memset(newSelf->index, -1, sizeof(newSelf->index));
|
||||
memcpy((char *)newSelf + kNodeCopyBegin, (char *)self + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memcpy(newSelf->partialKey(), self256->partialKey(), self->partialKeyLen);
|
||||
|
||||
newSelf->nextFree = 0;
|
||||
newSelf->bitSet = self256->bitSet;
|
||||
newSelf->bitSet.forEachInRange(
|
||||
[&](int c) {
|
||||
@@ -1103,7 +1128,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
|
||||
return result;
|
||||
}
|
||||
|
||||
switch (self->type) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0:
|
||||
allocators->node0.release((Node0 *)self);
|
||||
break;
|
||||
@@ -1123,7 +1148,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
switch (parent->type) {
|
||||
switch (parent->getType()) {
|
||||
case Type_Node0:
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
@@ -1481,7 +1506,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
switch (n->type) {
|
||||
switch (n->getType()) {
|
||||
case Type_Node0:
|
||||
// We would have returned above, after not finding a child
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -2027,7 +2052,7 @@ template <bool kBegin>
|
||||
} else {
|
||||
// Consider adding a partial key
|
||||
if ((*self)->numChildren == 0 && !(*self)->entryPresent) {
|
||||
assert((*self)->partialKeyCapacity >= int(key.size()));
|
||||
assert((*self)->getCapacity() >= int(key.size()));
|
||||
(*self)->partialKeyLen = key.size();
|
||||
memcpy((*self)->partialKey(), key.data(), (*self)->partialKeyLen);
|
||||
key = key.subspan((*self)->partialKeyLen,
|
||||
@@ -2054,6 +2079,9 @@ template <bool kBegin>
|
||||
auto &child = getOrCreateChild(*self, key.front(), allocators);
|
||||
if (!child) {
|
||||
child = allocators->node0.allocate(key.size() - 1);
|
||||
child->numChildren = 0;
|
||||
child->entryPresent = false;
|
||||
child->partialKeyLen = 0;
|
||||
child->parent = *self;
|
||||
child->parentsIndex = key.front();
|
||||
maxVersion(child, impl) =
|
||||
@@ -2348,12 +2376,17 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
|
||||
// Insert ""
|
||||
root = allocators.node0.allocate(0);
|
||||
root->numChildren = 0;
|
||||
root->parent = nullptr;
|
||||
rootMaxVersion = oldestVersion;
|
||||
root->entry.pointVersion = oldestVersion;
|
||||
root->entry.rangeVersion = oldestVersion;
|
||||
root->entryPresent = false;
|
||||
root->partialKeyLen = 0;
|
||||
|
||||
addKey(root);
|
||||
|
||||
root->entryPresent = true;
|
||||
root->entry.pointVersion = oldestVersion;
|
||||
root->entry.rangeVersion = oldestVersion;
|
||||
}
|
||||
~Impl() { destroyTree(root); }
|
||||
|
||||
@@ -2375,7 +2408,7 @@ int64_t &maxVersion(Node *n, ConflictSet::Impl *impl) {
|
||||
if (n == nullptr) {
|
||||
return impl->rootMaxVersion;
|
||||
}
|
||||
switch (n->type) {
|
||||
switch (n->getType()) {
|
||||
case Type_Node0:
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
@@ -2654,7 +2687,7 @@ Iterator firstGeq(Node *n, std::string_view key) {
|
||||
|
||||
[[maybe_unused]] void checkMemoryBoundInvariants(Node *node, bool &success) {
|
||||
int minNumChildren;
|
||||
switch (node->type) {
|
||||
switch (node->getType()) {
|
||||
case Type_Node0:
|
||||
minNumChildren = 0;
|
||||
break;
|
||||
@@ -2718,7 +2751,7 @@ int64_t keyBytes = 0;
|
||||
int64_t peakKeyBytes = 0;
|
||||
|
||||
int64_t getNodeSize(struct Node *n) {
|
||||
switch (n->type) {
|
||||
switch (n->getType()) {
|
||||
case Type_Node0:
|
||||
return sizeof(Node0);
|
||||
case Type_Node3:
|
||||
@@ -2750,7 +2783,7 @@ int64_t getSearchPathLength(Node *n) {
|
||||
|
||||
void addNode(Node *n) {
|
||||
nodeBytes += getNodeSize(n);
|
||||
partialCapacityBytes += n->partialKeyCapacity;
|
||||
partialCapacityBytes += n->getCapacity();
|
||||
if (nodeBytes > peakNodeBytes) {
|
||||
peakNodeBytes = nodeBytes;
|
||||
}
|
||||
@@ -2761,7 +2794,7 @@ void addNode(Node *n) {
|
||||
|
||||
void removeNode(Node *n) {
|
||||
nodeBytes -= getNodeSize(n);
|
||||
partialCapacityBytes -= n->partialKeyCapacity;
|
||||
partialCapacityBytes -= n->getCapacity();
|
||||
}
|
||||
|
||||
void addKey(Node *n) {
|
||||
@@ -2824,25 +2857,7 @@ void printTree() {
|
||||
debugPrintDot(stdout, cs.root, &cs);
|
||||
}
|
||||
|
||||
#define ANKERL_NANOBENCH_IMPLEMENT
|
||||
#include "third_party/nanobench.h"
|
||||
|
||||
int main(void) {
|
||||
printTree();
|
||||
return 0;
|
||||
ankerl::nanobench::Bench bench;
|
||||
ConflictSet::Impl cs{0};
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
getOrCreateChild(cs.root, j, &cs.allocators) =
|
||||
cs.allocators.node0.allocate(0);
|
||||
if (j % 10 == 0) {
|
||||
bench.run("MaxExclusive " + std::to_string(j), [&]() {
|
||||
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
|
||||
});
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
int main(void) { printTree(); }
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_FUZZ
|
||||
|
Reference in New Issue
Block a user