From 8a6973e2cd647ed69c0b9b9e7a7f2ba72c3745f0 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 17 Jan 2024 12:26:41 -0800 Subject: [PATCH] Implement all but check and addWrites --- ConflictSet.cpp | 117 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 2 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 94e25d6..8c7eef5 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -1,14 +1,127 @@ #include "ConflictSet.h" +#include +#include #include +namespace { +// A node in the tree representing write conflict history. This tree maintains +// several invariants: + +// 1. BST invariant: all keys in the tree rooted at the left child of a node +// compare less than that node's key, and all keys in the tree rooted at the +// right child of a node compare greater than that node's key. +// 2. Heap invariant: the priority of a node is greater than all the priorities +// of its children (transitively) +// 3. Max invariant: `maxVersion` is the max among all values of `pointVersion` +// and `beyondVersion` for this node and its children (transitively) +// 4. The lowest key (an empty byte sequence) is always physically present in +// the tree so that "last less than or equal" queries are always well-defined. + +// Logically, the contents of the tree represent a "range map" where all of the +// infinitely many points in the key space are associated with a writeVersion. +// If a point is physically present in the tree, then its writeVersion is its +// node's `pointVersion`. Otherwise, its writeVersion is the `rangeVersion` of +// the node with the last key less than point. +struct Node { + // See "Max invariant" above + int64_t maxVersion; + // The write version of the point in the key space represented by this node's + // key + int64_t pointVersion; + // The write version of the range immediately after this node's key, until + // just before the next key in the tree. I.e. (this key, next key) + int64_t rangeVersion; + // child[0] is the left child or nullptr. child[1] is the right child or + // nullptr + Node *child[2]; + // The parent of this node in the tree, or nullptr if this node is the root + Node *parent; + // As a treap, this tree satisfies the heap invariant on each node's priority + uint32_t priority; + // The length of this node's key + int len; + // The contents of this node's key + // uint8_t[len]; + + auto operator<=>(const Node &other) const { + const int minLen = std::min(len, other.len); + const int c = memcmp(this + 1, &other + 1, minLen); + return c != 0 ? c <=> 0 : len <=> other.len; + } + auto operator<=>(std::string_view other) const { + const int minLen = std::min(len, other.size()); + const int c = memcmp(this + 1, other.data(), minLen); + return c != 0 ? c <=> 0 : len <=> int(other.size()); + } +}; + +// TODO: use a better prng. This is technically vulnerable to a +// denial-of-service attack that can make conflict-checking linear in the +// number of nodes in the tree. +thread_local uint32_t gSeed = 1013904223L; +uint32_t fastRand() { + auto result = gSeed; + gSeed = gSeed * 1664525L + 1013904223L; + return result; +} + +// Note: `rangeVersion` is left uninitialized. +Node *createNode(std::string_view key, Node *parent, int64_t pointVersion) { + assert(key.size() <= std::numeric_limits::max()); + Node *result = (Node *)malloc(sizeof(Node) + key.size()); + result->maxVersion = pointVersion; + result->pointVersion = pointVersion; + result->child[0] = nullptr; + result->child[1] = nullptr; + result->parent = parent; + result->priority = fastRand(); + result->len = key.size(); + memcpy(result + 1, key.data(), key.size()); + return result; +} + +void destroyNode(Node *node) { + assert(node->child[0] == nullptr); + assert(node->child[1] == nullptr); + free(node); +} +} // namespace + struct ConflictSet::Impl { - explicit Impl(int64_t oldestVersion) noexcept {} + Node *root; + int64_t oldestVersion; + explicit Impl(int64_t oldestVersion) noexcept + : root(createNode("", nullptr, oldestVersion)), + oldestVersion(oldestVersion) { + root->rangeVersion = oldestVersion; + } void check(const ReadRange *reads, Result *results, int count) const {} void addWrites(const WriteRange *writes, int count) {} - void setOldestVersion(int64_t oldestVersion) {} + void setOldestVersion(int64_t oldestVersion) { + assert(oldestVersion > this->oldestVersion); + this->oldestVersion = oldestVersion; + } + + ~Impl() { + std::vector toFree; + if (root != nullptr) { + toFree.push_back(root); + } + while (toFree.size() > 0) { + Node *n = toFree.back(); + toFree.pop_back(); + for (int i = 0; i < 2; ++i) { + auto *c = std::exchange(n->child[i], nullptr); + if (c != nullptr) { + toFree.push_back(c); + } + } + destroyNode(n); + } + } }; void ConflictSet::check(const ReadRange *reads, Result *results,