Files
conflict-set/ConflictSet.cpp
Andrew Noyes 6222b74787
Some checks failed
Tests / Clang total: 1096, failed: 2, passed: 1094
Tests / Release [gcc] total: 1096, failed: 2, passed: 1094
Tests / Release [gcc,aarch64] total: 824, passed: 824
Tests / Coverage total: 823, passed: 823
weaselab/conflict-set/pipeline/head There was a failure building this commit
Fix tests
Add the new symbol, and update the valgrind client request so that
Node::partialKeyCapacity is defined.
2024-03-20 12:21:59 -07:00

3016 lines
88 KiB
C++

/*
Copyright 2024 Andrew Noyes
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "ConflictSet.h"
#include "Internal.h"
#include <algorithm>
#include <bit>
#include <cassert>
#include <compare>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <inttypes.h>
#include <limits>
#include <span>
#include <string>
#include <string_view>
#include <utility>
#ifdef HAS_AVX
#include <immintrin.h>
#elif defined(HAS_ARM_NEON)
#include <arm_neon.h>
#endif
#include <memcheck.h>
// Use assert for checking potentially complex properties during tests.
// Use assume to hint simple properties to the optimizer.
// TODO use the c++23 version when that's available
#ifdef NDEBUG
#if __has_builtin(__builtin_assume)
#define assume(e) __builtin_assume(e)
#else
#define assume(e) \
if (!(e)) \
__builtin_unreachable()
#endif
#else
#define assume assert
#endif
#if SHOW_MEMORY
void addNode(struct Node *);
void removeNode(struct Node *);
void addKey(struct Node *);
void removeKey(struct Node *);
#else
constexpr void addNode(struct Node *) {}
constexpr void removeNode(struct Node *) {}
constexpr void addKey(struct Node *) {}
constexpr void removeKey(struct Node *) {}
#endif
// ==================== BEGIN IMPLEMENTATION ====================
struct Entry {
int64_t pointVersion;
int64_t rangeVersion;
};
struct BitSet {
bool test(int i) const;
void set(int i);
void reset(int i);
int firstSetGeq(int i) const;
// Calls `f` with the index of each bit set in [begin, end)
template <class F> void forEachInRange(F f, int begin, int end) const {
// See section 3.1 in https://arxiv.org/pdf/1709.07821.pdf for details about
// this approach
if ((begin >> 6) == (end >> 6)) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
~(uint64_t(-1) << (end & 63));
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
return;
}
// Check begin partial word
if (begin & 63) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
if (std::popcount(word) + (begin & 63) == 64) {
while (begin & 63) {
f(begin++);
}
} else {
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
begin &= ~63;
begin += 64;
}
}
// Check inner, full words
while (begin != (end & ~63)) {
uint64_t word = words[begin >> 6];
if (word == uint64_t(-1)) {
for (int i = 0; i < 64; ++i) {
f(begin + i);
}
} else {
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
}
begin += 64;
}
if (end & 63) {
// Check end partial word
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
if (std::popcount(word) == (end & 63)) {
while (begin < end) {
f(begin++);
}
} else {
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
}
}
}
void init() {
for (auto &w : words) {
w = 0;
}
}
private:
uint64_t words[4];
};
bool BitSet::test(int i) const {
assert(0 <= i);
assert(i < 256);
return words[i >> 6] & (uint64_t(1) << (i & 63));
}
void BitSet::set(int i) {
assert(0 <= i);
assert(i < 256);
words[i >> 6] |= uint64_t(1) << (i & 63);
}
void BitSet::reset(int i) {
assert(0 <= i);
assert(i < 256);
words[i >> 6] &= ~(uint64_t(1) << (i & 63));
}
int BitSet::firstSetGeq(int i) const {
assume(0 <= i);
// i may be >= 256
uint64_t mask = uint64_t(-1) << (i & 63);
for (int j = i >> 6; j < 4; ++j) {
uint64_t masked = mask & words[j];
if (masked) {
return (j << 6) + std::countr_zero(masked);
}
mask = -1;
}
return -1;
}
enum Type {
Type_Node0,
Type_Node3,
Type_Node16,
Type_Node48,
Type_Node256,
};
template <class T> struct BoundedFreeListAllocator;
struct Node {
/* begin section that's copied to the next node */
Entry entry;
Node *parent;
int32_t partialKeyLen;
int16_t numChildren;
bool entryPresent;
uint8_t parentsIndex;
/* end section that's copied to the next node */
uint8_t *partialKey();
size_t size() const;
Type getType() const { return type; }
int32_t getCapacity() const { return partialKeyCapacity; }
private:
template <class T> friend struct BoundedFreeListAllocator;
// These are publically readable, but should only be written by
// BoundedFreeListAllocator
Type type;
int32_t partialKeyCapacity;
};
constexpr int kNodeCopyBegin = offsetof(Node, entry);
constexpr int kNodeCopySize =
offsetof(Node, parentsIndex) + sizeof(Node::parentsIndex) - kNodeCopyBegin;
struct Child {
int64_t childMaxVersion;
Node *child;
};
// copyChildrenAndKeyFrom is responsible for copying all
// public members of Node, copying the partial key, logically copying the
// children (converting representation if necessary), and updating all the
// children's parent pointers. The caller must then insert the new node into the
// tree.
struct Node0 : Node {
constexpr static auto kType = Type_Node0;
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node0 &other);
void copyChildrenAndKeyFrom(const struct Node3 &other);
size_t size() const { return sizeof(Node0) + getCapacity(); }
};
struct Node3 : Node {
constexpr static auto kMaxNodes = 3;
constexpr static auto kType = Type_Node3;
// Sorted
uint8_t index[kMaxNodes];
Child children[kMaxNodes];
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node0 &other);
void copyChildrenAndKeyFrom(const Node3 &other);
void copyChildrenAndKeyFrom(const struct Node16 &other);
size_t size() const { return sizeof(Node3) + getCapacity(); }
};
struct Node16 : Node {
constexpr static auto kType = Type_Node16;
constexpr static auto kMaxNodes = 16;
// Sorted
uint8_t index[kMaxNodes];
Child children[kMaxNodes];
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node3 &other);
void copyChildrenAndKeyFrom(const Node16 &other);
void copyChildrenAndKeyFrom(const struct Node48 &other);
size_t size() const { return sizeof(Node16) + getCapacity(); }
};
struct Node48 : Node {
constexpr static auto kType = Type_Node48;
constexpr static auto kMaxNodes = 48;
BitSet bitSet;
int8_t nextFree;
int8_t index[256];
Child children[kMaxNodes];
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node16 &other);
void copyChildrenAndKeyFrom(const Node48 &other);
void copyChildrenAndKeyFrom(const struct Node256 &other);
size_t size() const { return sizeof(Node48) + getCapacity(); }
};
struct Node256 : Node {
constexpr static auto kType = Type_Node256;
BitSet bitSet;
Child children[256];
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node48 &other);
void copyChildrenAndKeyFrom(const Node256 &other);
size_t size() const { return sizeof(Node256) + getCapacity(); }
};
inline void Node0::copyChildrenAndKeyFrom(const Node0 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node0::copyChildrenAndKeyFrom(const Node3 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node3::copyChildrenAndKeyFrom(const Node0 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node3::copyChildrenAndKeyFrom(const Node3 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(index, other.index, sizeof(*this) - sizeof(Node));
memcpy(partialKey(), &other + 1, partialKeyLen);
for (int i = 0; i < numChildren; ++i) {
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
}
inline void Node3::copyChildrenAndKeyFrom(const Node16 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(index, other.index, kMaxNodes);
memcpy(children, other.children, kMaxNodes * sizeof(Child));
memcpy(partialKey(), &other + 1, partialKeyLen);
for (int i = 0; i < numChildren; ++i) {
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
}
inline void Node16::copyChildrenAndKeyFrom(const Node3 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(index, other.index, Node3::kMaxNodes);
memcpy(children, other.children, Node3::kMaxNodes * sizeof(Child));
memcpy(partialKey(), &other + 1, partialKeyLen);
assert(numChildren == Node3::kMaxNodes);
for (int i = 0; i < Node3::kMaxNodes; ++i) {
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
}
inline void Node16::copyChildrenAndKeyFrom(const Node16 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(index, other.index, sizeof(index));
for (int i = 0; i < numChildren; ++i) {
children[i] = other.children[i];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node16::copyChildrenAndKeyFrom(const Node48 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
int i = 0;
other.bitSet.forEachInRange(
[&](int c) {
// Suppress a false positive -Waggressive-loop-optimizations warning
// in gcc
assume(i < Node16::kMaxNodes);
index[i] = c;
children[i] = other.children[other.index[c]];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
++i;
},
0, 256);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
assert(numChildren == Node16::kMaxNodes);
memset(index, -1, sizeof(index));
memcpy(partialKey(), &other + 1, partialKeyLen);
bitSet.init();
nextFree = Node16::kMaxNodes;
int i = 0;
for (auto x : other.index) {
bitSet.set(x);
index[x] = i;
children[i] = other.children[i];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
++i;
}
}
inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(&bitSet, &other.bitSet,
sizeof(*this) - sizeof(Node) - sizeof(children));
for (int i = 0; i < numChildren; ++i) {
children[i] = other.children[i];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memset(index, -1, sizeof(index));
nextFree = other.numChildren;
bitSet = other.bitSet;
int i = 0;
bitSet.forEachInRange(
[&](int c) {
// Suppress a false positive -Waggressive-loop-optimizations warning
// in gcc.
assume(i < Node48::kMaxNodes);
index[c] = i;
children[i] = other.children[c];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
++i;
},
0, 256);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node256::copyChildrenAndKeyFrom(const Node48 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memset(children, 0, sizeof(children));
bitSet = other.bitSet;
bitSet.forEachInRange(
[&](int c) {
children[c] = other.children[other.index[c]];
assert(children[c].child->parent == &other);
children[c].child->parent = this;
},
0, 256);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
inline void Node256::copyChildrenAndKeyFrom(const Node256 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memset(children, 0, sizeof(children));
bitSet = other.bitSet;
bitSet.forEachInRange(
[&](int c) {
children[c] = other.children[c];
assert(children[c].child->parent == &other);
children[c].child->parent = this;
},
0, 256);
memcpy(partialKey(), &other + 1, partialKeyLen);
}
namespace {
std::string getSearchPathPrintable(Node *n);
std::string getSearchPath(Node *n);
} // namespace
// Bound memory usage following the analysis in the ART paper
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
// has an entry present.
constexpr int kBytesPerKey = 144;
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
constexpr int kMinNodeSurplus = 104;
constexpr int kMinChildrenNode3 = 2;
constexpr int kMinChildrenNode16 = 4;
constexpr int kMinChildrenNode48 = 17;
constexpr int kMinChildrenNode256 = 49;
constexpr int kNode256Surplus =
kMinChildrenNode256 * kMinNodeSurplus - sizeof(Node256);
static_assert(kNode256Surplus >= kMinNodeSurplus);
constexpr int kNode48Surplus =
kMinChildrenNode48 * kMinNodeSurplus - sizeof(Node48);
static_assert(kNode48Surplus >= kMinNodeSurplus);
constexpr int kNode16Surplus =
kMinChildrenNode16 * kMinNodeSurplus - sizeof(Node16);
static_assert(kNode16Surplus >= kMinNodeSurplus);
constexpr int kNode3Surplus =
kMinChildrenNode3 * kMinNodeSurplus - sizeof(Node3);
static_assert(kNode3Surplus >= kMinNodeSurplus);
static_assert(kBytesPerKey - sizeof(Node0) >= kMinNodeSurplus);
// setOldestVersion will additionally try to maintain this property:
// `(children + entryPresent) * length >= capacity`
//
// Which should give us the budget to pay for the key bytes. (children +
// entryPresent) is a lower bound on how many keys these bytes are a prefix of
constexpr int64_t kFreeListMaxMemory = 1 << 20;
template <class T> struct BoundedFreeListAllocator {
static_assert(sizeof(T) >= sizeof(void *));
static_assert(std::derived_from<T, Node>);
static_assert(std::is_trivial_v<T>);
T *allocate(int partialKeyCapacity) {
if (freeList != nullptr) {
T *n = (T *)freeList;
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(freeList));
memcpy(&freeList, freeList, sizeof(freeList));
VALGRIND_MAKE_MEM_UNDEFINED(n, sizeof(T));
VALGRIND_MAKE_MEM_DEFINED(&n->partialKeyCapacity,
sizeof(n->partialKeyCapacity));
VALGRIND_MAKE_MEM_DEFINED(&n->type, sizeof(n->type));
assert(n->type == T::kType);
VALGRIND_MAKE_MEM_UNDEFINED(n + 1, n->partialKeyCapacity);
freeListBytes -= sizeof(T) + n->partialKeyCapacity;
if (n->partialKeyCapacity >= partialKeyCapacity) {
return n;
} else {
// The intent is to filter out too-small nodes in the freelist
removeNode(n);
safe_free(n, sizeof(T) + n->partialKeyCapacity);
}
}
auto *result = (T *)safe_malloc(sizeof(T) + partialKeyCapacity);
result->type = T::kType;
result->partialKeyCapacity = partialKeyCapacity;
addNode(result);
return result;
}
void release(T *p) {
static_assert(std::is_trivially_destructible_v<T>);
if (freeListBytes >= kFreeListMaxMemory) {
removeNode(p);
return safe_free(p, sizeof(T) + p->partialKeyCapacity);
}
memcpy((void *)p, &freeList, sizeof(freeList));
freeList = p;
freeListBytes += sizeof(T) + p->partialKeyCapacity;
VALGRIND_MAKE_MEM_NOACCESS(freeList, sizeof(T) + p->partialKeyCapacity);
}
~BoundedFreeListAllocator() {
for (void *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(Node));
auto *tmp = (T *)iter;
memcpy(&iter, iter, sizeof(void *));
removeNode((tmp));
safe_free(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
}
private:
int64_t freeListBytes = 0;
void *freeList = nullptr;
};
uint8_t *Node::partialKey() {
switch (type) {
case Type_Node0:
return ((Node0 *)this)->partialKey();
case Type_Node3:
return ((Node3 *)this)->partialKey();
case Type_Node16:
return ((Node16 *)this)->partialKey();
case Type_Node48:
return ((Node48 *)this)->partialKey();
case Type_Node256:
return ((Node256 *)this)->partialKey();
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
size_t Node::size() const {
switch (type) {
case Type_Node0:
return ((Node0 *)this)->size();
case Type_Node3:
return ((Node3 *)this)->size();
case Type_Node16:
return ((Node16 *)this)->size();
case Type_Node48:
return ((Node48 *)this)->size();
case Type_Node256:
return ((Node256 *)this)->size();
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
struct NodeAllocators {
BoundedFreeListAllocator<Node0> node0;
BoundedFreeListAllocator<Node3> node3;
BoundedFreeListAllocator<Node16> node16;
BoundedFreeListAllocator<Node48> node48;
BoundedFreeListAllocator<Node256> node256;
};
template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
// cachegrind says the plain loop is fewer instructions and more mis-predicted
// branches. Microbenchmark says plain loop is faster. It's written in this
// weird "generic" way though in case someday we can use the simd
// implementation easily if we want.
if constexpr (std::is_same_v<NodeT, Node3>) {
Node3 *n = (Node3 *)self;
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] == index) {
return i;
}
}
return -1;
}
#ifdef HAS_AVX
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
// key_vec is 16 repeated copies of the searched-for byte, one for every
// possible position in child_keys that needs to be searched.
__m128i key_vec = _mm_set1_epi8(index);
// Compare all child_keys to 'index' in parallel. Don't worry if some of the
// keys aren't valid, we'll mask the results to only consider the valid ones
// below.
__m128i indices;
memcpy(&indices, self->index, NodeT::kMaxNodes);
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
// Build a mask to select only the first node->num_children values from the
// comparison (because the other values are meaningless)
uint32_t mask = (1 << self->numChildren) - 1;
// Change the results of the comparison into a bitfield, masking off any
// invalid comparisons.
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
// No match if there are no '1's in the bitfield.
if (bitfield == 0)
return -1;
// Find the index of the first '1' in the bitfield by counting the leading
// zeros.
return std::countr_zero(bitfield);
#elif defined(HAS_ARM_NEON)
// Based on
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
uint8x16_t indices;
memcpy(&indices, self->index, NodeT::kMaxNodes);
// 0xff for each match
uint16x8_t results =
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
static_assert(NodeT::kMaxNodes <= 16);
assume(self->numChildren <= NodeT::kMaxNodes);
uint64_t mask = self->numChildren == 16
? uint64_t(-1)
: (uint64_t(1) << (self->numChildren * 4)) - 1;
// 0xf for each match in valid range
uint64_t bitfield =
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
if (bitfield == 0)
return -1;
return std::countr_zero(bitfield) / 4;
#else
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] == index) {
return i;
}
}
return -1;
#endif
}
// Precondition - an entry for index must exist in the node
Node *&getChildExists(Node *self, uint8_t index) {
switch (self->getType()) {
case Type_Node0: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *self3 = static_cast<Node3 *>(self);
return self3->children[getNodeIndex(self3, index)].child;
}
case Type_Node16: {
auto *self16 = static_cast<Node16 *>(self);
return self16->children[getNodeIndex(self16, index)].child;
}
case Type_Node48: {
auto *self48 = static_cast<Node48 *>(self);
assert(self48->bitSet.test(index));
return self48->children[self48->index[index]].child;
}
case Type_Node256: {
auto *self256 = static_cast<Node256 *>(self);
assert(self256->bitSet.test(index));
return self256->children[index].child;
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
// Precondition - an entry for index must exist in the node
int64_t &maxVersion(Node *n, ConflictSet::Impl *);
Node *&getInTree(Node *n, ConflictSet::Impl *);
Node *getChild(Node *self, uint8_t index) {
switch (self->getType()) {
case Type_Node0:
return nullptr;
case Type_Node3: {
auto *self3 = static_cast<Node3 *>(self);
int i = getNodeIndex(self3, index);
return i < 0 ? nullptr : self3->children[i].child;
}
case Type_Node16: {
auto *self16 = static_cast<Node16 *>(self);
int i = getNodeIndex(self16, index);
return i < 0 ? nullptr : self16->children[i].child;
}
case Type_Node48: {
auto *self48 = static_cast<Node48 *>(self);
int i = self48->index[index];
return i < 0 ? nullptr : self48->children[i].child;
}
case Type_Node256: {
auto *self256 = static_cast<Node256 *>(self);
return self256->children[index].child;
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
template <class NodeT> int getChildGeqSimd(NodeT *self, int child) {
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
// cachegrind says the plain loop is fewer instructions and more mis-predicted
// branches. Microbenchmark says plain loop is faster. It's written in this
// weird "generic" way though in case someday we can use the simd
// implementation easily if we want.
if constexpr (std::is_same_v<NodeT, Node3>) {
Node3 *n = (Node3 *)self;
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] >= child) {
return n->index[i];
}
}
return -1;
}
#ifdef HAS_AVX
__m128i key_vec = _mm_set1_epi8(child);
__m128i indices;
memcpy(&indices, self->index, NodeT::kMaxNodes);
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << self->numChildren) - 1;
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
int result = bitfield == 0 ? -1 : self->index[std::countr_zero(bitfield)];
assert(result == [&]() -> int {
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] >= child) {
return self->index[i];
}
}
return -1;
}());
return result;
#elif defined(HAS_ARM_NEON)
uint8x16_t indices;
memcpy(&indices, self->index, sizeof(self->index));
// 0xff for each leq
auto results = vcleq_u8(vdupq_n_u8(child), indices);
static_assert(NodeT::kMaxNodes <= 16);
assume(self->numChildren <= NodeT::kMaxNodes);
uint64_t mask = self->numChildren == 16
? uint64_t(-1)
: (uint64_t(1) << (self->numChildren * 4)) - 1;
// 0xf for each 0xff (within mask)
uint64_t bitfield =
vget_lane_u64(
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
0) &
mask;
int simd = bitfield == 0 ? -1 : self->index[std::countr_zero(bitfield) / 4];
assert(simd == [&]() -> int {
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] >= child) {
return self->index[i];
}
}
return -1;
}());
return simd;
#else
for (int i = 0; i < self->numChildren; ++i) {
if (i > 0) {
assert(self->index[i - 1] < self->index[i]);
}
if (self->index[i] >= child) {
return self->index[i];
}
}
return -1;
#endif
}
int getChildGeq(Node *self, int child) {
if (child > 255) {
return -1;
}
switch (self->getType()) {
case Type_Node0:
return -1;
case Type_Node3:
return getChildGeqSimd(static_cast<Node3 *>(self), child);
case Type_Node16:
return getChildGeqSimd(static_cast<Node16 *>(self), child);
case Type_Node48:
[[fallthrough]];
case Type_Node256: {
static_assert(offsetof(Node48, bitSet) == offsetof(Node256, bitSet));
auto *self48 = static_cast<Node48 *>(self);
return self48->bitSet.firstSetGeq(child);
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
// Caller is responsible for assigning a non-null pointer to the returned
// reference if null
Node *&getOrCreateChild(Node *&self, uint8_t index,
NodeAllocators *allocators) {
// Fast path for if it exists already
switch (self->getType()) {
case Type_Node0:
break;
case Type_Node3: {
auto *self3 = static_cast<Node3 *>(self);
int i = getNodeIndex(self3, index);
if (i >= 0) {
return self3->children[i].child;
}
} break;
case Type_Node16: {
auto *self16 = static_cast<Node16 *>(self);
int i = getNodeIndex(self16, index);
if (i >= 0) {
return self16->children[i].child;
}
} break;
case Type_Node48: {
auto *self48 = static_cast<Node48 *>(self);
int secondIndex = self48->index[index];
if (secondIndex >= 0) {
return self48->children[secondIndex].child;
}
} break;
case Type_Node256: {
auto *self256 = static_cast<Node256 *>(self);
if (auto &result = self256->children[index].child; result != nullptr) {
return result;
}
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
switch (self->getType()) {
case Type_Node0: {
auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self0);
allocators->node0.release(self0);
self = newSelf;
goto insert3;
}
case Type_Node3: {
if (self->numChildren == Node3::kMaxNodes) {
auto *self3 = static_cast<Node3 *>(self);
auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self3);
allocators->node3.release(self3);
self = newSelf;
goto insert16;
}
insert3:
auto *self3 = static_cast<Node3 *>(self);
++self->numChildren;
int i = 0;
for (; i < self->numChildren - 1; ++i) {
if (int(self3->index[i]) > int(index)) {
memmove(self3->index + i + 1, self3->index + i,
self->numChildren - (i + 1));
memmove(self3->children + i + 1, self3->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
break;
}
}
self3->index[i] = index;
auto &result = self3->children[i].child;
result = nullptr;
return result;
}
case Type_Node16: {
if (self->numChildren == Node16::kMaxNodes) {
auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self16);
allocators->node16.release(self16);
self = newSelf;
goto insert48;
}
insert16:
auto *self16 = static_cast<Node16 *>(self);
assert(self->getType() == Type_Node16);
++self->numChildren;
int i = 0;
for (; i < int(self->numChildren) - 1; ++i) {
if (int(self16->index[i]) > int(index)) {
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
break;
}
}
self16->index[i] = index;
auto &result = self16->children[i].child;
result = nullptr;
return result;
}
case Type_Node48: {
if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = allocators->node256.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self48);
allocators->node48.release(self48);
self = newSelf;
goto insert256;
}
insert48:
auto *self48 = static_cast<Node48 *>(self);
self48->bitSet.set(index);
++self->numChildren;
assert(self48->nextFree < 48);
int nextFree = self48->nextFree++;
self48->index[index] = nextFree;
auto &result = self48->children[nextFree].child;
result = nullptr;
return result;
}
case Type_Node256: {
insert256:
auto *self256 = static_cast<Node256 *>(self);
++self->numChildren;
self256->bitSet.set(index);
return self256->children[index].child;
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
Node *nextPhysical(Node *node) {
int index = -1;
for (;;) {
auto nextChild = getChildGeq(node, index + 1);
if (nextChild >= 0) {
return getChildExists(node, nextChild);
}
index = node->parentsIndex;
node = node->parent;
if (node == nullptr) {
return nullptr;
}
}
}
Node *nextLogical(Node *node) {
for (node = nextPhysical(node); node != nullptr && !node->entryPresent;
node = nextPhysical(node))
;
return node;
}
// Invalidates `self`, replacing it with a node of at least capacity.
// Does not return nodes to freelists when kUseFreeList is false.
void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
NodeAllocators *allocators,
ConflictSet::Impl *impl,
const bool kUseFreeList) {
switch (self->getType()) {
case Type_Node0: {
auto *self0 = (Node0 *)self;
auto *newSelf = allocators->node0.allocate(capacity);
newSelf->copyChildrenAndKeyFrom(*self0);
getInTree(self, impl) = newSelf;
if (kUseFreeList) {
allocators->node0.release(self0);
} else {
removeNode(self0);
safe_free(self0, self0->size());
}
self = newSelf;
} break;
case Type_Node3: {
auto *self3 = (Node3 *)self;
auto *newSelf = allocators->node3.allocate(capacity);
newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf;
if (kUseFreeList) {
allocators->node3.release(self3);
} else {
removeNode(self3);
safe_free(self3, self3->size());
}
self = newSelf;
} break;
case Type_Node16: {
auto *self16 = (Node16 *)self;
auto *newSelf = allocators->node16.allocate(capacity);
newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf;
if (kUseFreeList) {
allocators->node16.release(self16);
} else {
removeNode(self16);
safe_free(self16, self16->size());
}
self = newSelf;
} break;
case Type_Node48: {
auto *self48 = (Node48 *)self;
auto *newSelf = allocators->node48.allocate(capacity);
newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf;
if (kUseFreeList) {
allocators->node48.release(self48);
} else {
removeNode(self48);
safe_free(self48, self48->size());
}
self = newSelf;
} break;
case Type_Node256: {
auto *self256 = (Node256 *)self;
auto *newSelf = allocators->node256.allocate(capacity);
newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf;
if (kUseFreeList) {
allocators->node256.release(self256);
} else {
removeNode(self256);
safe_free(self256, self256->size());
}
self = newSelf;
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
// Fix larger-than-desired capacities. Does not return nodes to freelists,
// since that wouldn't actually reclaim the memory used for partial key
// capacity.
void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
const int maxCapacity =
(self->numChildren + int(self->entryPresent)) * (self->partialKeyLen + 1);
if (self->getCapacity() <= maxCapacity) {
return;
}
freeAndMakeCapacityAtLeast(self, maxCapacity, allocators, impl, false);
}
void maybeDownsize(Node *self, NodeAllocators *allocators,
ConflictSet::Impl *impl, Node *&dontInvalidate) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "maybeDownsize: %s\n", getSearchPathPrintable(self).c_str());
#endif
switch (self->getType()) {
case Type_Node0: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *self3 = (Node3 *)self;
if (self->numChildren == 0) {
auto *newSelf = allocators->node0.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf;
allocators->node3.release(self3);
} else if (self->numChildren == 1 && !self->entryPresent) {
auto *child = self3->children[0].child;
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
if (minCapacity > child->getCapacity()) {
const bool update = child == dontInvalidate;
freeAndMakeCapacityAtLeast(child, minCapacity, allocators, impl, true);
if (update) {
dontInvalidate = child;
}
}
// Merge partial key with child
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Merge %s into %s\n",
getSearchPathPrintable(self).c_str(),
getSearchPathPrintable(child).c_str());
#endif
int64_t childMaxVersion = maxVersion(child, impl);
// Construct new partial key for child
memmove(child->partialKey() + self3->partialKeyLen + 1,
child->partialKey(), child->partialKeyLen);
memcpy(child->partialKey(), self3->partialKey(), self->partialKeyLen);
child->partialKey()[self3->partialKeyLen] = self3->index[0];
child->partialKeyLen += 1 + self3->partialKeyLen;
child->parent = self->parent;
child->parentsIndex = self->parentsIndex;
// Max versions are stored in the parent, so we need to update it now
// that we have a new parent.
maxVersion(child, impl) = childMaxVersion;
getInTree(self, impl) = child;
allocators->node3.release(self3);
}
} break;
case Type_Node16:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) {
auto *self16 = (Node16 *)self;
auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf;
allocators->node16.release(self16);
}
break;
case Type_Node48:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode48) {
auto *self48 = (Node48 *)self;
auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf;
allocators->node48.release(self48);
}
break;
case Type_Node256:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) {
auto *self256 = (Node256 *)self;
auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf;
allocators->node256.release(self256);
}
break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
// Precondition: self is not the root. May invalidate nodes along the search
// path to self. May invalidate children of self->parent. Returns a pointer to
// the node after self. If erase invalidates the pointee of `dontInvalidate`, it
// will update it to its new pointee as well.
Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
Node *&dontInvalidate) {
assert(self->parent != nullptr);
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Erase: %s\n", getSearchPathPrintable(self).c_str());
#endif
Node *parent = self->parent;
uint8_t parentsIndex = self->parentsIndex;
auto *result = nextLogical(self);
removeKey(self);
self->entryPresent = false;
if (self->numChildren != 0) {
const bool update = result == dontInvalidate;
maybeDownsize(self, allocators, impl, result);
if (update) {
dontInvalidate = result;
}
return result;
}
assert(self->getType() == Type_Node0);
allocators->node0.release((Node0 *)self);
switch (parent->getType()) {
case Type_Node0: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *parent3 = static_cast<Node3 *>(parent);
int nodeIndex = getNodeIndex(parent3, parentsIndex);
assert(nodeIndex >= 0);
memmove(parent3->index + nodeIndex, parent3->index + nodeIndex + 1,
sizeof(parent3->index[0]) *
(parent->numChildren - (nodeIndex + 1)));
memmove(parent3->children + nodeIndex, parent3->children + nodeIndex + 1,
sizeof(parent3->children[0]) *
(parent->numChildren - (nodeIndex + 1)));
--parent->numChildren;
assert(parent->numChildren > 0 || parent->entryPresent);
} break;
case Type_Node16: {
auto *parent16 = static_cast<Node16 *>(parent);
int nodeIndex = getNodeIndex(parent16, parentsIndex);
assert(nodeIndex >= 0);
memmove(parent16->index + nodeIndex, parent16->index + nodeIndex + 1,
sizeof(parent16->index[0]) *
(parent->numChildren - (nodeIndex + 1)));
memmove(parent16->children + nodeIndex, parent16->children + nodeIndex + 1,
sizeof(parent16->children[0]) *
(parent->numChildren - (nodeIndex + 1)));
--parent->numChildren;
// By kMinChildrenNode16
assert(parent->numChildren > 0);
} break;
case Type_Node48: {
auto *parent48 = static_cast<Node48 *>(parent);
parent48->bitSet.reset(parentsIndex);
int8_t toRemoveChildrenIndex =
std::exchange(parent48->index[parentsIndex], -1);
int8_t lastChildrenIndex = --parent48->nextFree;
assert(toRemoveChildrenIndex >= 0);
assert(lastChildrenIndex >= 0);
if (toRemoveChildrenIndex != lastChildrenIndex) {
parent48->children[toRemoveChildrenIndex] =
parent48->children[lastChildrenIndex];
parent48->index[parent48->children[toRemoveChildrenIndex]
.child->parentsIndex] = toRemoveChildrenIndex;
}
--parent->numChildren;
// By kMinChildrenNode48
assert(parent->numChildren > 0);
} break;
case Type_Node256: {
auto *parent256 = static_cast<Node256 *>(parent);
parent256->bitSet.reset(parentsIndex);
parent256->children[parentsIndex].child = nullptr;
--parent->numChildren;
// By kMinChildrenNode256
assert(parent->numChildren > 0);
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
const bool update = result == dontInvalidate;
maybeDownsize(parent, allocators, impl, result);
if (update) {
dontInvalidate = result;
}
return result;
}
struct Iterator {
Node *n;
int cmp;
};
Node *nextSibling(Node *node) {
for (;;) {
if (node->parent == nullptr) {
return nullptr;
}
auto next = getChildGeq(node->parent, node->parentsIndex + 1);
if (next < 0) {
node = node->parent;
} else {
return getChildExists(node->parent, next);
}
}
}
#if defined(HAS_AVX) || defined(HAS_ARM_NEON)
constexpr int kStride = 64;
#else
constexpr int kStride = 16;
#endif
constexpr int kUnrollFactor = 4;
bool compareStride(const uint8_t *ap, const uint8_t *bp) {
#if defined(HAS_ARM_NEON)
static_assert(kStride == 64);
uint8x16_t x[4]; // GCOVR_EXCL_LINE
for (int i = 0; i < 4; ++i) {
x[i] = vceqq_u8(vld1q_u8(ap + i * 16), vld1q_u8(bp + i * 16));
}
auto results = vreinterpretq_u16_u8(
vandq_u8(vandq_u8(x[0], x[1]), vandq_u8(x[2], x[3])));
bool eq = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) ==
uint64_t(-1);
#elif defined(HAS_AVX)
static_assert(kStride == 64);
__m128i x[4]; // GCOVR_EXCL_LINE
for (int i = 0; i < 4; ++i) {
x[i] = _mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)(ap + i * 16)),
_mm_loadu_si128((__m128i *)(bp + i * 16)));
}
auto eq =
_mm_movemask_epi8(_mm_and_si128(_mm_and_si128(x[0], x[1]),
_mm_and_si128(x[2], x[3]))) == 0xffff;
#else
// Hope it gets vectorized
auto eq = memcmp(ap, bp, kStride) == 0;
#endif
return eq;
}
// Precondition: ap[:kStride] != bp[:kStride]
int firstNeqStride(const uint8_t *ap, const uint8_t *bp) {
#if defined(HAS_AVX)
static_assert(kStride == 64);
uint64_t c[kStride / 16]; // GCOVR_EXCL_LINE
for (int i = 0; i < kStride; i += 16) {
const auto a = _mm_loadu_si128((__m128i *)(ap + i));
const auto b = _mm_loadu_si128((__m128i *)(bp + i));
const auto compared = _mm_cmpeq_epi8(a, b);
c[i / 16] = _mm_movemask_epi8(compared) & 0xffff;
}
return std::countr_zero(~(c[0] | c[1] << 16 | c[2] << 32 | c[3] << 48));
#elif defined(HAS_ARM_NEON)
static_assert(kStride == 64);
for (int i = 0; i < kStride; i += 16) {
// 0xff for each match
uint16x8_t results =
vreinterpretq_u16_u8(vceqq_u8(vld1q_u8(ap + i), vld1q_u8(bp + i)));
// 0xf for each mismatch
uint64_t bitfield =
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
if (bitfield) {
return i + (std::countr_zero(bitfield) >> 2);
}
}
__builtin_unreachable(); // GCOVR_EXCL_LINE
#else
int i = 0;
for (; i < kStride - 1; ++i) {
if (*ap++ != *bp++) {
break;
}
}
return i;
#endif
}
int longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
assume(cl >= 0);
int i = 0;
int end;
if (cl < 8) {
goto bytes;
}
// kStride * kUnrollCount at a time
end = cl & ~(kStride * kUnrollFactor - 1);
while (i < end) {
for (int j = 0; j < kUnrollFactor; ++j) {
if (!compareStride(ap, bp)) {
return i + firstNeqStride(ap, bp);
}
i += kStride;
ap += kStride;
bp += kStride;
}
}
// kStride at a time
end = cl & ~(kStride - 1);
while (i < end) {
if (!compareStride(ap, bp)) {
return i + firstNeqStride(ap, bp);
}
i += kStride;
ap += kStride;
bp += kStride;
}
// word at a time
end = cl & ~(sizeof(uint64_t) - 1);
while (i < end) {
uint64_t a; // GCOVR_EXCL_LINE
uint64_t b; // GCOVR_EXCL_LINE
memcpy(&a, ap, 8);
memcpy(&b, bp, 8);
const auto mismatched = a ^ b;
if (mismatched) {
return i + std::countr_zero(mismatched) / 8;
}
i += 8;
ap += 8;
bp += 8;
}
bytes:
// byte at a time
while (i < cl) {
if (*ap != *bp) {
break;
}
++ap;
++bp;
++i;
}
return i;
}
// Performs a physical search for remaining
struct SearchStepWise {
Node *n;
std::span<const uint8_t> remaining;
SearchStepWise() {}
SearchStepWise(Node *n, std::span<const uint8_t> remaining)
: n(n), remaining(remaining) {
assert(n->partialKeyLen == 0);
}
bool step() {
if (remaining.size() == 0) {
return true;
}
auto *child = getChild(n, remaining[0]);
if (child == nullptr) {
return true;
}
int cl = std::min<int>(child->partialKeyLen, remaining.size() - 1);
int i = longestCommonPrefix(child->partialKey(), remaining.data() + 1, cl);
if (i != child->partialKeyLen) {
return true;
}
n = child;
remaining =
remaining.subspan(1 + child->partialKeyLen,
remaining.size() - (1 + child->partialKeyLen));
return false;
}
};
// Logically this is the same as performing firstGeq and then checking against
// point or range version according to cmp, but this version short circuits as
// soon as it can prove that there's no conflict.
bool checkPointRead(Node *n, const std::span<const uint8_t> key,
int64_t readVersion, ConflictSet::Impl *impl) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
#endif
auto remaining = key;
for (;;) {
if (maxVersion(n, impl) <= readVersion) {
return true;
}
if (remaining.size() == 0) {
if (n->entryPresent) {
return n->entry.pointVersion <= readVersion;
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
goto downLeftSpine;
}
auto *child = getChild(n, remaining[0]);
if (child == nullptr) {
int c = getChildGeq(n, remaining[0]);
if (c >= 0) {
n = getChildExists(n, c);
goto downLeftSpine;
} else {
n = nextSibling(n);
if (n == nullptr) {
return true;
}
goto downLeftSpine;
}
}
n = child;
remaining = remaining.subspan(1, remaining.size() - 1);
if (n->partialKeyLen > 0) {
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
int i = longestCommonPrefix(n->partialKey(), remaining.data(), commonLen);
if (i < commonLen) {
auto c = n->partialKey()[i] <=> remaining[i];
if (c > 0) {
goto downLeftSpine;
} else {
n = nextSibling(n);
if (n == nullptr) {
return true;
}
goto downLeftSpine;
}
}
if (commonLen == n->partialKeyLen) {
// partial key matches
remaining = remaining.subspan(commonLen, remaining.size() - commonLen);
} else if (n->partialKeyLen > int(remaining.size())) {
// n is the first physical node greater than remaining, and there's no
// eq node
goto downLeftSpine;
}
}
}
downLeftSpine:
for (;;) {
if (n->entryPresent) {
return n->entry.rangeVersion <= readVersion;
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
}
}
// Return the max version among all keys starting with the search path of n +
// [child], where child in (begin, end). Does not account for the range version
// of firstGt(searchpath(n) + [end - 1])
int64_t maxBetweenExclusive(Node *n, int begin, int end) {
assume(-1 <= begin);
assume(begin <= 256);
assume(-1 <= end);
assume(end <= 256);
assume(begin < end);
int64_t result = std::numeric_limits<int64_t>::lowest();
{
int c = getChildGeq(n, begin + 1);
if (c >= 0 && c < end) {
auto *child = getChildExists(n, c);
if (child->entryPresent) {
result = std::max(result, child->entry.rangeVersion);
}
begin = c;
} else {
return result;
}
}
switch (n->getType()) {
case Type_Node0: // GCOVR_EXCL_LINE
// We would have returned above, after not finding a child
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *self = static_cast<Node3 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin <= self->index[i]) {
result = std::max(result, self->children[i].childMaxVersion);
}
}
} break;
case Type_Node16: {
auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin <= self->index[i]) {
result = std::max(result, self->children[i].childMaxVersion);
}
}
} break;
case Type_Node48: {
auto *self = static_cast<Node48 *>(n);
self->bitSet.forEachInRange(
[&](int i) {
result =
std::max(result, self->children[self->index[i]].childMaxVersion);
},
begin, end);
break;
}
case Type_Node256: {
auto *self = static_cast<Node256 *>(n);
self->bitSet.forEachInRange(
[&](int i) {
result = std::max(result, self->children[i].childMaxVersion);
},
begin, end);
break;
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "At `%s', max version in (%02x, %02x) is %" PRId64 "\n",
getSearchPathPrintable(n).c_str(), begin, end, result);
#endif
return result;
}
Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
assert(n != nullptr);
auto result = vector<uint8_t>(arena);
for (;;) {
for (int i = n->partialKeyLen - 1; i >= 0; --i) {
result.push_back(n->partialKey()[i]);
}
if (n->parent == nullptr) {
break;
}
result.push_back(n->parentsIndex);
n = n->parent;
}
std::reverse(result.begin(), result.end());
return result;
} // GCOVR_EXCL_LINE
// Return true if the max version among all keys that start with key + [child],
// where begin < child < end, is <= readVersion.
//
// Precondition: transitively, no child of n has a search path that's a longer
// prefix of key than n
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
int end, int64_t readVersion,
ConflictSet::Impl *impl) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "%s(%02x,%02x)*\n", printable(key).c_str(), begin, end);
#endif
auto remaining = key;
if (remaining.size() == 0) {
return maxBetweenExclusive(n, begin, end) <= readVersion;
}
auto *child = getChild(n, remaining[0]);
if (child == nullptr) {
int c = getChildGeq(n, remaining[0]);
if (c >= 0) {
n = getChildExists(n, c);
goto downLeftSpine;
} else {
n = nextSibling(n);
if (n == nullptr) {
return true;
}
goto downLeftSpine;
}
}
n = child;
remaining = remaining.subspan(1, remaining.size() - 1);
assert(n->partialKeyLen > 0);
{
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
int i = longestCommonPrefix(n->partialKey(), remaining.data(), commonLen);
if (i < commonLen) {
auto c = n->partialKey()[i] <=> remaining[i];
if (c > 0) {
goto downLeftSpine;
} else {
n = nextSibling(n);
if (n == nullptr) {
return true;
}
goto downLeftSpine;
}
}
assert(n->partialKeyLen > int(remaining.size()));
if (begin < n->partialKey()[remaining.size()] &&
n->partialKey()[remaining.size()] < end) {
if (n->entryPresent && n->entry.rangeVersion > readVersion) {
return false;
}
return maxVersion(n, impl) <= readVersion;
}
return true;
}
__builtin_unreachable(); // GCOVR_EXCL_LINE
downLeftSpine:
for (;;) {
if (n->entryPresent) {
return n->entry.rangeVersion <= readVersion;
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
}
}
// Return true if the max version among all keys that start with key[:prefixLen]
// that are >= key is <= readVersion
struct CheckRangeLeftSide {
CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
int64_t readVersion, ConflictSet::Impl *impl)
: n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion),
impl(impl) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check range left side from %s for keys starting with %s\n",
printable(key).c_str(),
printable(key.subspan(0, prefixLen)).c_str());
#endif
}
Node *n;
std::span<const uint8_t> remaining;
int prefixLen;
int64_t readVersion;
ConflictSet::Impl *impl;
int searchPathLen = 0;
bool ok;
enum Phase { Search, DownLeftSpine } phase = Search;
bool step() {
switch (phase) {
case Search: {
if (maxVersion(n, impl) <= readVersion) {
ok = true;
return true;
}
if (remaining.size() == 0) {
assert(searchPathLen >= prefixLen);
ok = maxVersion(n, impl) <= readVersion;
return true;
}
if (searchPathLen >= prefixLen) {
if (maxBetweenExclusive(n, remaining[0], 256) > readVersion) {
ok = false;
return true;
}
}
auto *child = getChild(n, remaining[0]);
if (child == nullptr) {
int c = getChildGeq(n, remaining[0]);
if (c >= 0) {
if (searchPathLen < prefixLen) {
n = getChildExists(n, c);
return downLeftSpine();
}
n = getChildExists(n, c);
ok = maxVersion(n, impl) <= readVersion;
return true;
} else {
n = nextSibling(n);
if (n == nullptr) {
ok = true;
return true;
}
return downLeftSpine();
}
}
n = child;
remaining = remaining.subspan(1, remaining.size() - 1);
++searchPathLen;
if (n->partialKeyLen > 0) {
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
int i =
longestCommonPrefix(n->partialKey(), remaining.data(), commonLen);
searchPathLen += i;
if (i < commonLen) {
auto c = n->partialKey()[i] <=> remaining[i];
if (c > 0) {
if (searchPathLen < prefixLen) {
return downLeftSpine();
}
if (n->entryPresent && n->entry.rangeVersion > readVersion) {
ok = false;
return true;
}
ok = maxVersion(n, impl) <= readVersion;
return true;
} else {
n = nextSibling(n);
if (n == nullptr) {
ok = true;
return true;
}
return downLeftSpine();
}
}
if (commonLen == n->partialKeyLen) {
// partial key matches
remaining =
remaining.subspan(commonLen, remaining.size() - commonLen);
} else if (n->partialKeyLen > int(remaining.size())) {
assert(searchPathLen >= prefixLen);
if (n->entryPresent && n->entry.rangeVersion > readVersion) {
ok = false;
return true;
}
ok = maxVersion(n, impl) <= readVersion;
return true;
}
}
break;
}
case DownLeftSpine: {
if (n->entryPresent) {
ok = n->entry.rangeVersion <= readVersion;
return true;
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
return false;
}
bool downLeftSpine() {
phase = DownLeftSpine;
return false;
}
};
// Return true if the max version among all keys that start with key[:prefixLen]
// that are < key is <= readVersion
struct CheckRangeRightSide {
CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen,
int64_t readVersion, ConflictSet::Impl *impl)
: n(n), key(key), remaining(key), prefixLen(prefixLen),
readVersion(readVersion), impl(impl) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check range right side to %s for keys starting with %s\n",
printable(key).c_str(),
printable(key.subspan(0, prefixLen)).c_str());
#endif
}
Node *n;
std::span<const uint8_t> key;
std::span<const uint8_t> remaining;
int prefixLen;
int64_t readVersion;
ConflictSet::Impl *impl;
int searchPathLen = 0;
bool ok;
enum Phase { Search, DownLeftSpine } phase = Search;
bool step() {
switch (phase) {
case Search: {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(
stderr,
"Search path: %s, searchPathLen: %d, prefixLen: %d, remaining: %s\n",
getSearchPathPrintable(n).c_str(), searchPathLen, prefixLen,
printable(remaining).c_str());
#endif
assert(searchPathLen <= int(key.size()));
if (remaining.size() == 0) {
return downLeftSpine();
}
if (searchPathLen >= prefixLen) {
if (n->entryPresent && n->entry.pointVersion > readVersion) {
ok = false;
return true;
}
if (maxBetweenExclusive(n, -1, remaining[0]) > readVersion) {
ok = false;
return true;
}
}
if (searchPathLen > prefixLen && n->entryPresent &&
n->entry.rangeVersion > readVersion) {
ok = false;
return true;
}
auto *child = getChild(n, remaining[0]);
if (child == nullptr) {
int c = getChildGeq(n, remaining[0]);
if (c >= 0) {
n = getChildExists(n, c);
return downLeftSpine();
} else {
return backtrack();
}
}
n = child;
remaining = remaining.subspan(1, remaining.size() - 1);
++searchPathLen;
if (n->partialKeyLen > 0) {
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
int i =
longestCommonPrefix(n->partialKey(), remaining.data(), commonLen);
searchPathLen += i;
if (i < commonLen) {
++searchPathLen;
auto c = n->partialKey()[i] <=> remaining[i];
if (c > 0) {
return downLeftSpine();
} else {
if (searchPathLen > prefixLen && n->entryPresent &&
n->entry.rangeVersion > readVersion) {
ok = false;
return true;
}
return backtrack();
}
}
if (commonLen == n->partialKeyLen) {
// partial key matches
remaining =
remaining.subspan(commonLen, remaining.size() - commonLen);
} else if (n->partialKeyLen > int(remaining.size())) {
return downLeftSpine();
}
}
} break;
case DownLeftSpine: {
if (n->entryPresent) {
ok = n->entry.rangeVersion <= readVersion;
return true;
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
return false;
}
bool backtrack() {
for (;;) {
if (searchPathLen > prefixLen && maxVersion(n, impl) > readVersion) {
ok = false;
return true;
}
if (n->parent == nullptr) {
ok = true;
return true;
}
auto next = getChildGeq(n->parent, n->parentsIndex + 1);
if (next < 0) {
searchPathLen -= 1 + n->partialKeyLen;
n = n->parent;
} else {
searchPathLen -= n->partialKeyLen;
n = getChildExists(n->parent, next);
searchPathLen += n->partialKeyLen;
return downLeftSpine();
}
}
}
bool downLeftSpine() {
phase = DownLeftSpine;
assert(n != nullptr);
return false;
}
};
bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
std::span<const uint8_t> end, int64_t readVersion,
ConflictSet::Impl *impl) {
int lcp = longestCommonPrefix(begin.data(), end.data(),
std::min(begin.size(), end.size()));
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
end.back() == 0) {
return checkPointRead(n, begin, readVersion, impl);
}
SearchStepWise search{n, begin.subspan(0, lcp)};
Arena arena;
for (;;) {
assert(getSearchPath(arena, search.n) <=>
begin.subspan(0, lcp - search.remaining.size()) ==
0);
if (maxVersion(search.n, impl) <= readVersion) {
return true;
}
if (search.step()) {
break;
}
}
assert(getSearchPath(arena, search.n) <=>
begin.subspan(0, lcp - search.remaining.size()) ==
0);
const int consumed = lcp - search.remaining.size();
assume(consumed >= 0);
begin = begin.subspan(consumed, int(begin.size()) - consumed);
end = end.subspan(consumed, int(end.size()) - consumed);
n = search.n;
lcp -= consumed;
if (lcp == int(begin.size())) {
CheckRangeRightSide checkRangeRightSide{n, end, lcp, readVersion, impl};
while (!checkRangeRightSide.step())
;
return checkRangeRightSide.ok;
}
if (!checkRangeStartsWith(n, begin.subspan(0, lcp), begin[lcp], end[lcp],
readVersion, impl)) {
return false;
}
CheckRangeLeftSide checkRangeLeftSide{n, begin, lcp + 1, readVersion, impl};
CheckRangeRightSide checkRangeRightSide{n, end, lcp + 1, readVersion, impl};
for (;;) {
bool leftDone = checkRangeLeftSide.step();
bool rightDone = checkRangeRightSide.step();
if (!leftDone && !rightDone) {
continue;
}
if (leftDone && rightDone) {
break;
} else if (leftDone) {
while (!checkRangeRightSide.step())
;
break;
} else {
assert(rightDone);
while (!checkRangeLeftSide.step())
;
}
break;
}
return checkRangeLeftSide.ok & checkRangeRightSide.ok;
}
// Returns a pointer to the newly inserted node. Caller must set
// `entryPresent`, `entry` fields and `maxVersion` on the result. The search
// path of the result's parent will have `maxVersion` at least `writeVersion` as
// a postcondition. Nodes along the search path to `key` may be invalidated.
template <bool kBegin>
[[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key,
int64_t writeVersion, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
for (;;) {
if ((*self)->partialKeyLen > 0) {
// Handle an existing partial key
int commonLen = std::min<int>((*self)->partialKeyLen, key.size());
int partialKeyIndex =
longestCommonPrefix((*self)->partialKey(), key.data(), commonLen);
if (partialKeyIndex < (*self)->partialKeyLen) {
auto *old = *self;
int64_t oldMaxVersion = maxVersion(old, impl);
// *self will have one child
*self = allocators->node3.allocate(partialKeyIndex);
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
kNodeCopySize);
(*self)->partialKeyLen = partialKeyIndex;
// Not necessary to call removeKey here, since this node is "synthetic"
(*self)->entryPresent = false;
(*self)->numChildren = 0;
memcpy((*self)->partialKey(), old->partialKey(),
(*self)->partialKeyLen);
getOrCreateChild(*self, old->partialKey()[partialKeyIndex],
allocators) = old;
old->parent = *self;
old->parentsIndex = old->partialKey()[partialKeyIndex];
maxVersion(old, impl) = oldMaxVersion;
memmove(old->partialKey(), old->partialKey() + partialKeyIndex + 1,
old->partialKeyLen - (partialKeyIndex + 1));
old->partialKeyLen -= partialKeyIndex + 1;
// We would consider decreasing capacity here, but we can't invalidate
// old since it's not on the search path. setOldestVersion will clean it
// up.
}
key = key.subspan(partialKeyIndex, key.size() - partialKeyIndex);
} else {
// Consider adding a partial key
if ((*self)->numChildren == 0 && !(*self)->entryPresent) {
assert((*self)->getCapacity() >= int(key.size()));
(*self)->partialKeyLen = key.size();
memcpy((*self)->partialKey(), key.data(), (*self)->partialKeyLen);
key = key.subspan((*self)->partialKeyLen,
key.size() - (*self)->partialKeyLen);
}
}
if constexpr (kBegin) {
auto &m = maxVersion(*self, impl);
assert(writeVersion >= m);
m = writeVersion;
}
if (key.size() == 0) {
return *self;
}
if constexpr (!kBegin) {
auto &m = maxVersion(*self, impl);
assert(writeVersion >= m);
m = writeVersion;
}
auto &child = getOrCreateChild(*self, key.front(), allocators);
if (!child) {
child = allocators->node0.allocate(key.size() - 1);
child->numChildren = 0;
child->entryPresent = false;
child->partialKeyLen = 0;
child->parent = *self;
child->parentsIndex = key.front();
maxVersion(child, impl) =
kBegin ? writeVersion : std::numeric_limits<int64_t>::lowest();
}
self = &child;
key = key.subspan(1, key.size() - 1);
}
}
void destroyTree(Node *root) {
Arena arena;
auto toFree = vector<Node *>(arena);
toFree.push_back(root);
#if SHOW_MEMORY
for (auto *iter = root; iter != nullptr; iter = nextPhysical(iter)) {
removeNode(iter);
removeKey(iter);
}
#endif
while (toFree.size() > 0) {
auto *n = toFree.back();
toFree.pop_back();
// Add all children to toFree
for (int child = getChildGeq(n, 0); child >= 0;
child = getChildGeq(n, child + 1)) {
auto *c = getChildExists(n, child);
assert(c != nullptr);
toFree.push_back(c);
}
safe_free(n, n->size());
}
}
void addPointWrite(Node *&root, int64_t oldestVersion,
std::span<const uint8_t> key, int64_t writeVersion,
NodeAllocators *allocators, ConflictSet::Impl *impl) {
auto *n = insert<true>(&root, key, writeVersion, allocators, impl);
if (!n->entryPresent) {
auto *p = nextLogical(n);
addKey(n);
n->entryPresent = true;
n->entry.pointVersion = writeVersion;
maxVersion(n, impl) = writeVersion;
n->entry.rangeVersion =
p != nullptr ? p->entry.rangeVersion : oldestVersion;
} else {
assert(writeVersion >= n->entry.pointVersion);
n->entry.pointVersion = writeVersion;
}
}
void addWriteRange(Node *&root, int64_t oldestVersion,
std::span<const uint8_t> begin, std::span<const uint8_t> end,
int64_t writeVersion, NodeAllocators *allocators,
ConflictSet::Impl *impl) {
int lcp = longestCommonPrefix(begin.data(), end.data(),
std::min(begin.size(), end.size()));
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
end.back() == 0) {
return addPointWrite(root, oldestVersion, begin, writeVersion, allocators,
impl);
}
const bool beginIsPrefix = lcp == int(begin.size());
auto remaining = begin.subspan(0, lcp);
auto *n = root;
for (;;) {
if (int(remaining.size()) <= n->partialKeyLen) {
break;
}
int i = longestCommonPrefix(n->partialKey(), remaining.data(),
n->partialKeyLen);
if (i != n->partialKeyLen) {
break;
}
auto *child = getChild(n, remaining[n->partialKeyLen]);
if (child == nullptr) {
break;
}
auto &m = maxVersion(n, impl);
assert(writeVersion >= m);
m = writeVersion;
remaining = remaining.subspan(n->partialKeyLen + 1,
remaining.size() - (n->partialKeyLen + 1));
n = child;
}
Node **useAsRoot = &getInTree(n, impl);
int consumed = lcp - remaining.size();
begin = begin.subspan(consumed, begin.size() - consumed);
end = end.subspan(consumed, end.size() - consumed);
auto *beginNode =
insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
const bool insertedBegin = !beginNode->entryPresent;
addKey(beginNode);
beginNode->entryPresent = true;
if (insertedBegin) {
auto *p = nextLogical(beginNode);
beginNode->entry.rangeVersion =
p != nullptr ? p->entry.rangeVersion : oldestVersion;
beginNode->entry.pointVersion = writeVersion;
maxVersion(beginNode, impl) = writeVersion;
}
auto &m = maxVersion(beginNode, impl);
assert(writeVersion >= m);
m = writeVersion;
assert(writeVersion >= beginNode->entry.pointVersion);
beginNode->entry.pointVersion = writeVersion;
auto *endNode = insert<false>(useAsRoot, end, writeVersion, allocators, impl);
const bool insertedEnd = !endNode->entryPresent;
addKey(endNode);
endNode->entryPresent = true;
if (insertedEnd) {
auto *p = nextLogical(endNode);
endNode->entry.pointVersion =
p != nullptr ? p->entry.rangeVersion : oldestVersion;
auto &m = maxVersion(endNode, impl);
m = std::max(m, endNode->entry.pointVersion);
}
endNode->entry.rangeVersion = writeVersion;
if (beginIsPrefix && insertedEnd) {
// beginNode may have been invalidated when inserting end. TODO can we do
// better?
beginNode = insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
assert(beginNode->entryPresent);
}
for (beginNode = nextLogical(beginNode); beginNode != endNode;
beginNode = erase(beginNode, allocators, impl, endNode)) {
}
}
Iterator firstGeq(Node *n, const std::span<const uint8_t> key) {
auto remaining = key;
for (;;) {
if (remaining.size() == 0) {
if (n->entryPresent) {
return {n, 0};
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
goto downLeftSpine;
}
auto *child = getChild(n, remaining[0]);
if (child == nullptr) {
int c = getChildGeq(n, remaining[0]);
if (c >= 0) {
n = getChildExists(n, c);
goto downLeftSpine;
} else {
n = nextSibling(n);
if (n == nullptr) {
return {nullptr, 1};
}
goto downLeftSpine;
}
}
n = child;
remaining = remaining.subspan(1, remaining.size() - 1);
if (n->partialKeyLen > 0) {
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
int i = longestCommonPrefix(n->partialKey(), remaining.data(), commonLen);
if (i < commonLen) {
auto c = n->partialKey()[i] <=> remaining[i];
if (c > 0) {
goto downLeftSpine;
} else {
n = nextSibling(n);
goto downLeftSpine;
}
}
if (commonLen == n->partialKeyLen) {
// partial key matches
remaining = remaining.subspan(commonLen, remaining.size() - commonLen);
} else if (n->partialKeyLen > int(remaining.size())) {
// n is the first physical node greater than remaining, and there's no
// eq node
goto downLeftSpine;
}
}
}
downLeftSpine:
for (;;) {
if (n->entryPresent) {
return {n, 1};
}
int c = getChildGeq(n, 0);
assert(c >= 0);
n = getChildExists(n, c);
}
}
struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
void check(const ReadRange *reads, Result *result, int count) {
for (int i = 0; i < count; ++i) {
const auto &r = reads[i];
auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
auto end = std::span<const uint8_t>(r.end.p, r.end.len);
result[i] =
reads[i].readVersion < oldestVersion ? TooOld
: (end.size() > 0
? checkRangeRead(root, begin, end, reads[i].readVersion, this)
: checkPointRead(root, begin, reads[i].readVersion, this))
? Commit
: Conflict;
}
}
void addWrites(const WriteRange *writes, int count, int64_t writeVersion) {
for (int i = 0; i < count; ++i) {
const auto &w = writes[i];
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
auto end = std::span<const uint8_t>(w.end.p, w.end.len);
if (w.end.len > 0) {
keyUpdates += 3;
addWriteRange(root, oldestVersion, begin, end, writeVersion,
&allocators, this);
} else {
keyUpdates += 2;
addPointWrite(root, oldestVersion, begin, writeVersion, &allocators,
this);
}
}
}
void setOldestVersion(int64_t oldestVersion) {
if (oldestVersion <= this->oldestVersion) {
return;
}
this->oldestVersion = oldestVersion;
#ifdef NDEBUG
// This is here for performance reasons, since we want to amortize the cost
// of storing the search path as a string. In tests, we want to exercise the
// rest of the code often.
if (keyUpdates < 100) {
return;
}
#endif
Node *n = firstGeq(root, removalKey).n;
// There's no way to erase removalKey without introducing a key after it
assert(n != nullptr);
// Don't erase the root
if (n == root) {
n = nextPhysical(n);
}
for (; keyUpdates > 0 && n != nullptr; --keyUpdates) {
if (n->entryPresent && std::max(n->entry.pointVersion,
n->entry.rangeVersion) <= oldestVersion) {
// Any transaction n would have prevented from committing is
// going to fail with TooOld anyway.
// There's no way to insert a range such that range version of the right
// node is greater than the point version of the left node
assert(n->entry.rangeVersion <= oldestVersion);
Node *dummy = nullptr;
n = erase(n, &allocators, this, dummy);
} else {
maybeDecreaseCapacity(n, &allocators, this);
n = nextPhysical(n);
}
}
if (n == nullptr) {
removalKey = {};
} else {
removalKeyArena = Arena();
removalKey = getSearchPath(removalKeyArena, n);
}
}
explicit Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
// Insert ""
root = allocators.node0.allocate(0);
root->numChildren = 0;
root->parent = nullptr;
rootMaxVersion = oldestVersion;
root->entryPresent = false;
root->partialKeyLen = 0;
addKey(root);
root->entryPresent = true;
root->entry.pointVersion = oldestVersion;
root->entry.rangeVersion = oldestVersion;
}
~Impl() { destroyTree(root); }
NodeAllocators allocators;
Arena removalKeyArena;
std::span<const uint8_t> removalKey;
int64_t keyUpdates = 10;
Node *root;
int64_t rootMaxVersion;
int64_t oldestVersion;
int64_t totalBytes = 0;
};
// Precondition - an entry for index must exist in the node
int64_t &maxVersion(Node *n, ConflictSet::Impl *impl) {
int index = n->parentsIndex;
n = n->parent;
if (n == nullptr) {
return impl->rootMaxVersion;
}
switch (n->getType()) {
case Type_Node0: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndex(n3, index);
return n3->children[i].childMaxVersion;
}
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndex(n16, index);
return n16->children[i].childMaxVersion;
}
case Type_Node48: {
auto *n48 = static_cast<Node48 *>(n);
assert(n48->bitSet.test(index));
return n48->children[n48->index[index]].childMaxVersion;
}
case Type_Node256: {
auto *n256 = static_cast<Node256 *>(n);
assert(n256->bitSet.test(index));
return n256->children[index].childMaxVersion;
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
Node *&getInTree(Node *n, ConflictSet::Impl *impl) {
return n->parent == nullptr ? impl->root
: getChildExists(n->parent, n->parentsIndex);
}
// ==================== END IMPLEMENTATION ====================
// GCOVR_EXCL_START
void ConflictSet::check(const ReadRange *reads, Result *results,
int count) const {
return impl->check(reads, results, count);
}
void ConflictSet::addWrites(const WriteRange *writes, int count,
int64_t writeVersion) {
mallocBytesDelta = 0;
impl->addWrites(writes, count, writeVersion);
impl->totalBytes += mallocBytesDelta;
#if SHOW_MEMORY
if (impl->totalBytes != mallocBytes) {
abort();
}
#endif
}
void ConflictSet::setOldestVersion(int64_t oldestVersion) {
mallocBytesDelta = 0;
impl->setOldestVersion(oldestVersion);
impl->totalBytes += mallocBytesDelta;
#if SHOW_MEMORY
if (impl->totalBytes != mallocBytes) {
abort();
}
#endif
}
int64_t ConflictSet::getBytes() const { return impl->totalBytes; }
ConflictSet::ConflictSet(int64_t oldestVersion)
: impl((mallocBytesDelta = 0,
new (safe_malloc(sizeof(Impl))) Impl{oldestVersion})) {
impl->totalBytes += mallocBytesDelta;
}
ConflictSet::~ConflictSet() {
if (impl) {
impl->~Impl();
safe_free(impl, sizeof(*impl));
}
}
ConflictSet::ConflictSet(ConflictSet &&other) noexcept
: impl(std::exchange(other.impl, nullptr)) {}
ConflictSet &ConflictSet::operator=(ConflictSet &&other) noexcept {
impl = std::exchange(other.impl, nullptr);
return *this;
}
using ConflictSet_Result = ConflictSet::Result;
using ConflictSet_Key = ConflictSet::Key;
using ConflictSet_ReadRange = ConflictSet::ReadRange;
using ConflictSet_WriteRange = ConflictSet::WriteRange;
extern "C" {
__attribute__((__visibility__("default"))) void
ConflictSet_check(void *cs, const ConflictSet_ReadRange *reads,
ConflictSet_Result *results, int count) {
((ConflictSet::Impl *)cs)->check(reads, results, count);
}
__attribute__((__visibility__("default"))) void
ConflictSet_addWrites(void *cs, const ConflictSet_WriteRange *writes, int count,
int64_t writeVersion) {
((ConflictSet::Impl *)cs)->addWrites(writes, count, writeVersion);
}
__attribute__((__visibility__("default"))) void
ConflictSet_setOldestVersion(void *cs, int64_t oldestVersion) {
((ConflictSet::Impl *)cs)->setOldestVersion(oldestVersion);
}
__attribute__((__visibility__("default"))) void *
ConflictSet_create(int64_t oldestVersion) {
return new (safe_malloc(sizeof(ConflictSet::Impl)))
ConflictSet::Impl{oldestVersion};
}
__attribute__((__visibility__("default"))) void ConflictSet_destroy(void *cs) {
using Impl = ConflictSet::Impl;
((Impl *)cs)->~Impl();
safe_free(cs, sizeof(Impl));
}
__attribute__((__visibility__("default"))) int64_t
ConflictSet_getBytes(void *cs) {
using Impl = ConflictSet::Impl;
return ((Impl *)cs)->totalBytes;
}
}
namespace {
std::string getSearchPathPrintable(Node *n) {
Arena arena;
if (n == nullptr) {
return "<end>";
}
auto result = vector<char>(arena);
for (;;) {
for (int i = n->partialKeyLen - 1; i >= 0; --i) {
result.push_back(n->partialKey()[i]);
}
if (n->parent == nullptr) {
break;
}
result.push_back(n->parentsIndex);
n = n->parent;
}
std::reverse(result.begin(), result.end());
if (result.size() > 0) {
return printable(std::string_view((const char *)&result[0],
result.size())); // NOLINT
} else {
return std::string();
}
}
std::string getPartialKeyPrintable(Node *n) {
Arena arena;
if (n == nullptr) {
return "<end>";
}
auto result = std::string((const char *)&n->parentsIndex,
n->parent == nullptr ? 0 : 1) +
std::string((const char *)n->partialKey(), n->partialKeyLen);
return printable(result); // NOLINT
}
std::string strinc(std::string_view str, bool &ok) {
int index;
for (index = str.size() - 1; index >= 0; index--)
if ((uint8_t &)(str[index]) != 255)
break;
// Must not be called with a string that consists only of zero or more '\xff'
// bytes.
if (index < 0) {
ok = false;
return {};
}
ok = true;
auto r = std::string(str.substr(0, index + 1));
((uint8_t &)r[r.size() - 1])++;
return r;
}
std::string getSearchPath(Node *n) {
assert(n != nullptr);
Arena arena;
auto result = getSearchPath(arena, n);
return std::string((const char *)result.data(), result.size());
}
[[maybe_unused]] void debugPrintDot(FILE *file, Node *node,
ConflictSet::Impl *impl) {
constexpr int kSeparation = 3;
struct DebugDotPrinter {
explicit DebugDotPrinter(FILE *file, ConflictSet::Impl *impl)
: file(file), impl(impl) {}
void print(Node *n, int y = 0) {
assert(n != nullptr);
if (n->entryPresent) {
fprintf(file,
" k_%p [label=\"m=%" PRId64 " p=%" PRId64 " r=%" PRId64
"\n%s\", pos=\"%d,%d!\"];\n",
(void *)n, maxVersion(n, impl), n->entry.pointVersion,
n->entry.rangeVersion, getPartialKeyPrintable(n).c_str(), x, y);
} else {
fprintf(file, " k_%p [label=\"m=%" PRId64 "\n%s\", pos=\"%d,%d!\"];\n",
(void *)n, maxVersion(n, impl),
getPartialKeyPrintable(n).c_str(), x, y);
}
x += kSeparation;
for (int child = getChildGeq(n, 0); child >= 0;
child = getChildGeq(n, child + 1)) {
auto *c = getChildExists(n, child);
fprintf(file, " k_%p -> k_%p;\n", (void *)n, (void *)c);
print(c, y - kSeparation);
}
}
int x = 0;
FILE *file;
ConflictSet::Impl *impl;
};
fprintf(file, "digraph ConflictSet {\n");
fprintf(file, " node [shape = box];\n");
assert(node != nullptr);
DebugDotPrinter printer{file, impl};
printer.print(node);
fprintf(file, "}\n");
}
void checkParentPointers(Node *node, bool &success) {
for (int i = getChildGeq(node, 0); i >= 0; i = getChildGeq(node, i + 1)) {
auto *child = getChildExists(node, i);
if (child->parent != node) {
fprintf(stderr, "%s child %d has parent pointer %p. Expected %p\n",
getSearchPathPrintable(node).c_str(), i, (void *)child->parent,
(void *)node);
success = false;
}
checkParentPointers(child, success);
}
}
Iterator firstGeq(Node *n, std::string_view key) {
return firstGeq(
n, std::span<const uint8_t>((const uint8_t *)key.data(), key.size()));
}
[[maybe_unused]] int64_t checkMaxVersion(Node *root, Node *node,
int64_t oldestVersion, bool &success,
ConflictSet::Impl *impl) {
int64_t expected = std::numeric_limits<int64_t>::lowest();
if (node->entryPresent) {
expected = std::max(expected, node->entry.pointVersion);
}
for (int i = getChildGeq(node, 0); i >= 0; i = getChildGeq(node, i + 1)) {
auto *child = getChildExists(node, i);
expected = std::max(
expected, checkMaxVersion(root, child, oldestVersion, success, impl));
if (child->entryPresent) {
expected = std::max(expected, child->entry.rangeVersion);
}
}
auto key = getSearchPath(root);
bool ok;
auto inc = strinc(key, ok);
if (ok) {
auto borrowed = firstGeq(root, inc);
if (borrowed.n != nullptr) {
expected = std::max(expected, borrowed.n->entry.rangeVersion);
}
}
if (maxVersion(node, impl) > oldestVersion &&
maxVersion(node, impl) != expected) {
fprintf(stderr, "%s has max version %" PRId64 " . Expected %" PRId64 "\n",
getSearchPathPrintable(node).c_str(), maxVersion(node, impl),
expected);
success = false;
}
return expected;
}
[[maybe_unused]] int64_t checkEntriesExist(Node *node, bool &success) {
int64_t total = node->entryPresent;
for (int i = getChildGeq(node, 0); i >= 0; i = getChildGeq(node, i + 1)) {
auto *child = getChildExists(node, i);
int64_t e = checkEntriesExist(child, success);
total += e;
if (e == 0) {
Arena arena;
fprintf(stderr, "%s has child %02x with no reachable entries\n",
getSearchPathPrintable(node).c_str(), i);
success = false;
}
}
return total;
}
[[maybe_unused]] void checkMemoryBoundInvariants(Node *node, bool &success) {
int minNumChildren;
switch (node->getType()) {
case Type_Node0:
minNumChildren = 0;
break;
case Type_Node3:
minNumChildren = kMinChildrenNode3;
break;
case Type_Node16:
minNumChildren = kMinChildrenNode16;
break;
case Type_Node48:
minNumChildren = kMinChildrenNode48;
break;
case Type_Node256:
minNumChildren = kMinChildrenNode256;
break;
default:
abort();
}
if (node->numChildren + int(node->entryPresent) < minNumChildren) {
fprintf(stderr,
"%s has %d children + %d entries, which is less than the minimum "
"required %d\n",
getSearchPathPrintable(node).c_str(), node->numChildren,
int(node->entryPresent), minNumChildren);
success = false;
}
// TODO check that the max capacity property eventually holds
for (int i = getChildGeq(node, 0); i >= 0; i = getChildGeq(node, i + 1)) {
auto *child = getChildExists(node, i);
checkMemoryBoundInvariants(child, success);
}
}
bool checkCorrectness(Node *node, int64_t oldestVersion,
ConflictSet::Impl *impl) {
bool success = true;
checkParentPointers(node, success);
checkMaxVersion(node, node, oldestVersion, success, impl);
checkEntriesExist(node, success);
checkMemoryBoundInvariants(node, success);
return success;
}
} // namespace
namespace std {
void __throw_length_error(const char *) { __builtin_unreachable(); }
} // namespace std
#if SHOW_MEMORY
int64_t nodeBytes = 0;
int64_t peakNodeBytes = 0;
int64_t partialCapacityBytes = 0;
int64_t peakPartialCapacityBytes = 0;
int64_t totalKeys = 0;
int64_t peakKeys = 0;
int64_t keyBytes = 0;
int64_t peakKeyBytes = 0;
int64_t getNodeSize(struct Node *n) {
switch (n->getType()) {
case Type_Node0:
return sizeof(Node0);
case Type_Node3:
return sizeof(Node3);
case Type_Node16:
return sizeof(Node16);
case Type_Node48:
return sizeof(Node48);
case Type_Node256:
return sizeof(Node256);
default:
abort();
}
}
int64_t getSearchPathLength(Node *n) {
assert(n != nullptr);
int64_t result = 0;
for (;;) {
result += n->partialKeyLen;
if (n->parent == nullptr) {
break;
}
++result;
n = n->parent;
}
return result;
}
void addNode(Node *n) {
nodeBytes += getNodeSize(n);
partialCapacityBytes += n->getCapacity();
if (nodeBytes > peakNodeBytes) {
peakNodeBytes = nodeBytes;
}
if (partialCapacityBytes > peakPartialCapacityBytes) {
peakPartialCapacityBytes = partialCapacityBytes;
}
}
void removeNode(Node *n) {
nodeBytes -= getNodeSize(n);
partialCapacityBytes -= n->getCapacity();
}
void addKey(Node *n) {
if (!n->entryPresent) {
++totalKeys;
keyBytes += getSearchPathLength(n);
if (totalKeys > peakKeys) {
peakKeys = totalKeys;
}
if (keyBytes > peakKeyBytes) {
peakKeyBytes = keyBytes;
}
}
}
void removeKey(Node *n) {
if (n->entryPresent) {
--totalKeys;
keyBytes -= getSearchPathLength(n);
}
}
struct __attribute__((visibility("default"))) PeakPrinter {
~PeakPrinter() {
printf("malloc bytes: %g\n", double(mallocBytes));
printf("Peak malloc bytes: %g\n", double(peakMallocBytes));
printf("Node bytes: %g\n", double(nodeBytes));
printf("Peak node bytes: %g\n", double(peakNodeBytes));
printf("Expected worst case node bytes: %g\n",
double(peakKeys * kBytesPerKey));
printf("Key bytes: %g\n", double(keyBytes));
printf("Peak key bytes: %g (not sharing common prefixes)\n",
double(peakKeyBytes));
printf("Partial key capacity bytes: %g\n", double(partialCapacityBytes));
printf("Peak partial key capacity bytes: %g\n",
double(peakPartialCapacityBytes));
}
} peakPrinter;
#endif
#ifdef ENABLE_MAIN
void printTree() {
int64_t writeVersion = 0;
ConflictSet::Impl cs{writeVersion};
ReferenceImpl refImpl{writeVersion};
Arena arena;
ConflictSet::WriteRange write;
write.begin = "and"_s;
write.end = "ant"_s;
cs.addWrites(&write, 1, ++writeVersion);
write.begin = "any"_s;
write.end = ""_s;
cs.addWrites(&write, 1, ++writeVersion);
write.begin = "are"_s;
write.end = ""_s;
cs.addWrites(&write, 1, ++writeVersion);
write.begin = "art"_s;
write.end = ""_s;
cs.addWrites(&write, 1, ++writeVersion);
debugPrintDot(stdout, cs.root, &cs);
}
int main(void) { printTree(); }
#endif
#ifdef ENABLE_FUZZ
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
TestDriver<ConflictSet::Impl> driver{data, size};
for (;;) {
bool done = driver.next();
if (!driver.ok) {
debugPrintDot(stdout, driver.cs.root, &driver.cs);
fflush(stdout);
abort();
}
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check correctness\n");
#endif
bool success =
checkCorrectness(driver.cs.root, driver.cs.oldestVersion, &driver.cs);
if (!success) {
debugPrintDot(stdout, driver.cs.root, &driver.cs);
fflush(stdout);
abort();
}
if (done) {
break;
}
}
return 0;
}
#endif
// GCOVR_EXCL_STOP