conflict-set/ConflictSet.cpp

#include "ConflictSet.h"

#include <algorithm>
#include <cassert>
#include <compare>
#include <cstring>
#include <map>
#include <random>
#include <set>
#include <span>
#include <string>
#include <string_view>
#include <utility>
#include <vector>

// ==================== BEGIN ARENA IMPL ====================

/// Group allocations with similar lifetimes to amortize the cost of malloc/free
struct Arena {
  explicit Arena(int initialSize = 0);
  /// O(log n) in the number of allocations
  ~Arena();
  struct ArenaImpl;
  Arena(const Arena &) = delete;
  Arena &operator=(const Arena &) = delete;
  Arena(Arena &&other) noexcept;
  Arena &operator=(Arena &&other) noexcept;

private:
  ArenaImpl *impl = nullptr;
  friend void *operator new(size_t size, std::align_val_t align, Arena &arena);
};

inline void operator delete(void *, std::align_val_t, Arena &) {}
void *operator new(size_t size, std::align_val_t align, Arena &arena);
void *operator new(size_t size, std::align_val_t align, Arena *arena) = delete;

inline void operator delete(void *, Arena &) {}
inline void *operator new(size_t size, Arena &arena) {
  return operator new(size, std::align_val_t(alignof(std::max_align_t)), arena);
}
inline void *operator new(size_t size, Arena *arena) = delete;

inline void operator delete[](void *, Arena &) {}
inline void *operator new[](size_t size, Arena &arena) {
  return operator new(size, arena);
}
inline void *operator new[](size_t size, Arena *arena) = delete;

inline void operator delete[](void *, std::align_val_t, Arena &) {}
inline void *operator new[](size_t size, std::align_val_t align, Arena &arena) {
  return operator new(size, align, arena);
}
inline void *operator new[](size_t size, std::align_val_t align,
                            Arena *arena) = delete;

/// align must be a power of two
template <class T> T *align_up(T *t, size_t align) {
  auto unaligned = uintptr_t(t);
  auto aligned = (unaligned + align - 1) & ~(align - 1);
  return reinterpret_cast<T *>(reinterpret_cast<char *>(t) + aligned -
                               unaligned);
}

/// align must be a power of two
constexpr inline int align_up(uint32_t unaligned, uint32_t align) {
  return (unaligned + align - 1) & ~(align - 1);
}

/// Returns the smallest power of two >= x
constexpr inline uint32_t nextPowerOfTwo(uint32_t x) {
  return x <= 1 ? 1 : 1 << (32 - __builtin_clz(x - 1));
}

/// \private
struct Arena::ArenaImpl {
  Arena::ArenaImpl *prev;
  int capacity;
  int used;
  uint8_t *begin() { return reinterpret_cast<uint8_t *>(this + 1); }
};

static_assert(sizeof(Arena::ArenaImpl) == 16);
static_assert(alignof(Arena::ArenaImpl) == 8);

Arena::Arena(int initialSize) : impl(nullptr) {
  if (initialSize > 0) {
    auto allocationSize = align_up(initialSize + sizeof(ArenaImpl), 16);
    impl = (Arena::ArenaImpl *)malloc(allocationSize);
    impl->prev = nullptr;
    impl->capacity = allocationSize - sizeof(ArenaImpl);
    impl->used = 0;
  }
}

namespace {
void onDestroy(Arena::ArenaImpl *impl) {
  while (impl) {
    auto *prev = impl->prev;
    free(impl);
    impl = prev;
  }
}
} // namespace

Arena::Arena(Arena &&other) noexcept
    : impl(std::exchange(other.impl, nullptr)) {}
Arena &Arena::operator=(Arena &&other) noexcept {
  onDestroy(impl);
  impl = std::exchange(other.impl, nullptr);
  return *this;
}

Arena::~Arena() { onDestroy(impl); }

void *operator new(size_t size, std::align_val_t align, Arena &arena) {
  int64_t aligned_size = size + size_t(align) - 1;
  if (arena.impl == nullptr ||
      (arena.impl->capacity - arena.impl->used) < aligned_size) {
    auto allocationSize = align_up(
        sizeof(Arena::ArenaImpl) +
            std::max<int>(aligned_size,
                          (arena.impl ? std::max<int>(sizeof(Arena::ArenaImpl),
                                                      arena.impl->capacity * 2)
                                      : 0)),
        16);
    auto *impl = (Arena::ArenaImpl *)malloc(allocationSize);
    impl->prev = arena.impl;
    impl->capacity = allocationSize - sizeof(Arena::ArenaImpl);
    impl->used = 0;
    arena.impl = impl;
  }
  auto *result =
      align_up(arena.impl->begin() + arena.impl->used, size_t(align));
  auto usedDelta = (result - arena.impl->begin()) + size - arena.impl->used;
  arena.impl->used += usedDelta;
  return result;
}

/// STL-friendly allocator using an arena
template <class T> struct ArenaAlloc {
  typedef T value_type;

  ArenaAlloc() = delete;
  explicit ArenaAlloc(Arena *arena) : arena(arena) {}

  Arena *arena;

  template <class U> constexpr ArenaAlloc(const ArenaAlloc<U> &other) noexcept {
    arena = other.arena;
  }

  [[nodiscard]] T *allocate(size_t n) {
    if (n > 0xfffffffffffffffful / sizeof(T)) { // NOLINT
      __builtin_unreachable();
    }

    return static_cast<T *>((void *)new (std::align_val_t(alignof(T)), *arena)
                                uint8_t[n * sizeof(T)]); // NOLINT
  }

  void deallocate(T *, size_t) noexcept {}

private:
};

template <class T, class U>
bool operator==(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
  return lhs.arena == rhs.arena;
}

template <class T, class U>
bool operator!=(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
  return !(lhs == rhs);
}

// ==================== END ARENA IMPL ====================

// ==================== BEGIN RANDOM IMPL ====================

struct Random {
  // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org
  // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
  //
  // Modified - mostly c -> c++
  Random() = default;

  Random(uint64_t initState, uint64_t initSeq) {
    pcg32_srandom_r(initState, initSeq);
    next();
  }

  /// Draws from a uniform distribution of uint32_t's
  uint32_t next() {
    auto result = next_;
    next_ = pcg32_random_r();
    return result;
  }

  /// Draws from a uniform distribution of [0, s). From
  /// https://arxiv.org/pdf/1805.10941.pdf
  uint32_t bounded(uint32_t s) {
    assert(s != 0);
    uint32_t x = next();
    auto m = uint64_t(x) * uint64_t(s);
    auto l = uint32_t(m);
    if (l < s) {
      uint32_t t = -s % s;
      while (l < t) {
        x = next();
        m = uint64_t(x) * uint64_t(s);
        l = uint32_t(m);
      }
    }
    uint32_t result = m >> 32;
    return result;
  }

  /// Fill `bytes` with `size` random bytes
  void randomBytes(uint8_t *bytes, int size);

  /// Fill `bytes` with `size` random hex bytes
  void randomHex(uint8_t *bytes, int size);

  template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
  T randT() {
    T t;
    randomBytes((uint8_t *)&t, sizeof(T));
    return t;
  }

private:
  uint32_t pcg32_random_r() {
    uint64_t oldState = state;
    // Advance internal state
    state = oldState * 6364136223846793005ULL + inc;
    // Calculate output function (XSH RR), uses old state for max ILP
    uint32_t xorShifted = ((oldState >> 18u) ^ oldState) >> 27u;
    uint32_t rot = oldState >> 59u;
    return (xorShifted >> rot) | (xorShifted << ((-rot) & 31));
  }

  // Seed the rng.  Specified in two parts, state initializer and a
  // sequence selection constant (a.k.a. stream id)
  void pcg32_srandom_r(uint64_t initstate, uint64_t initSeq) {
    state = 0U;
    inc = (initSeq << 1u) | 1u;
    pcg32_random_r();
    state += initstate;
    pcg32_random_r();
  }
  uint32_t next_{};
  // RNG state.  All values are possible.
  uint64_t state{};
  // Controls which RNG sequence (stream) is selected. Must *always* be odd.
  uint64_t inc{};
};

template <class Container> void shuffle(Random &rand, Container &x) {
  using std::swap;
  for (int i = x.size() - 1; i > 0; --i) {
    int j = rand.bounded(i + 1);
    if (i != j) {
      swap(x[i], x[j]);
    }
  }
}

void Random::randomBytes(uint8_t *bytes, int size) {
  int i = 0;
  for (; i + 4 < size; i += 4) {
    uint32_t random = next();
    memcpy(bytes + i, &random, 4);
  }
  if (i < size) {
    uint32_t random = next();
    memcpy(bytes + i, &random, size - i);
  }
}

void Random::randomHex(uint8_t *bytes, int size) {
  int i = 0;
  while (i + 8 < size) {
    uint32_t r = next();
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
  }
  uint32_t r = next();
  while (i < size) {
    bytes[i++] = "0123456789abcdef"[r & 0b1111];
    r >>= 4;
  }
}

// ==================== END RANDOM IMPL ====================

// ==================== BEGIN ARBITRARY IMPL ====================

/// Think of `Arbitrary` as an attacker-controlled random number generator.
/// Usually you want your random number generator to be fair, so that you can
/// sensibly analyze probabilities. E.g. The analysis that shows that quicksort
/// is expected O(n log n) with a random pivot relies on the random pivot being
/// selected uniformly from a fair distribution.
///
/// Other times you want your randomness to be diabolically unfair, like when
/// looking for bugs and fuzzing. The random-number-like interface is still
/// convenient here, but you can potentially get much better coverage by
/// allowing the possibility of e.g. flipping heads 100 times in a row.
///
/// When it runs out of entropy, it always returns 0.
struct Arbitrary {
  Arbitrary() = default;

  explicit Arbitrary(std::span<const uint8_t> bytecode) : bytecode(bytecode) {}

  /// Draws an arbitrary uint32_t
  uint32_t next() { return consume<4>(); }

  /// Draws an arbitrary element from [0, s)
  uint32_t bounded(uint32_t s);

  /// Fill `bytes` with `size` arbitrary bytes
  void randomBytes(uint8_t *bytes, int size) {
    int toFill = std::min<int>(size, bytecode.size());
    if (toFill > 0) {
      memcpy(bytes, bytecode.data(), toFill);
    }
    bytecode = bytecode.subspan(toFill, bytecode.size() - toFill);
    memset(bytes + toFill, 0, size - toFill);
  }

  /// Fill `bytes` with `size` random hex bytes
  void randomHex(uint8_t *bytes, int size) {
    for (int i = 0; i < size;) {
      uint8_t arbitrary = consume<1>();
      bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
      arbitrary >>= 4;
      if (i < size) {
        bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
      }
    }
  }

  template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
  T randT() {
    T t;
    randomBytes((uint8_t *)&t, sizeof(T));
    return t;
  }

  bool hasEntropy() const { return bytecode.size() != 0; }

private:
  uint8_t consumeByte() {
    if (bytecode.size() == 0) {
      return 0;
    }
    auto result = bytecode[0];
    bytecode = bytecode.subspan(1, bytecode.size() - 1);
    return result;
  }

  template <int kBytes> uint32_t consume() {
    uint32_t result = 0;
    static_assert(kBytes <= 4);
    for (int i = 0; i < kBytes; ++i) {
      result <<= 8;
      result |= consumeByte();
    }
    return result;
  }

  std::span<const uint8_t> bytecode;
};

inline Arbitrary gArbitrary;

void initFuzz(const uint8_t *data, size_t size);

uint32_t Arbitrary::bounded(uint32_t s) {
  if (s == 1) {
    return 0;
  }
  switch (32 - __builtin_clz(s - 1)) {
  case 1:
  case 2:
  case 3:
  case 4:
  case 5:
  case 6:
  case 7:
  case 8:
    return consume<1>() % s;
  case 9:
  case 10:
  case 11:
  case 12:
  case 13:
  case 14:
  case 15:
  case 16:
    return consume<2>() % s;
  case 17:
  case 18:
  case 19:
  case 20:
  case 21:
  case 22:
  case 23:
  case 24:
    return consume<3>() % s;
  default:
    return consume<4>() % s;
  }
}

// ==================== END ARBITRARY IMPL ====================

#define SHOW_PRIORITY 0
#define DEBUG 0

using Key = ConflictSet::Key;

static auto operator<=>(const Key &lhs, const Key &rhs) {
  const int minLen = std::min(lhs.len, rhs.len);
  const int c = memcmp(lhs.p, rhs.p, minLen);
  return c != 0 ? c <=> 0 : lhs.len <=> rhs.len;
}

namespace {
// A node in the tree representing write conflict history. This tree maintains
// several invariants:

// 1. BST invariant: all keys in the tree rooted at the left child of a node
// compare less than that node's key, and all keys in the tree rooted at the
// right child of a node compare greater than that node's key.
// 2. Heap invariant: the priority of a node is >= all the priorities
// of its children (transitively)
// 3. Max invariant: `maxVersion` is the max among all values of `pointVersion`
// and `beyondVersion` for this node and its children (transitively)
// 4. The lowest key (an empty byte sequence) is always physically present in
// the tree so that "last less than or equal" queries are always well-defined.

// Logically, the contents of the tree represent a "range map" where all of the
// infinitely many points in the key space are associated with a writeVersion.
// If a point is physically present in the tree, then its writeVersion is its
// node's `pointVersion`. Otherwise, its writeVersion is the `rangeVersion` of
// the node with the last key less than point.
struct Node {
  // See "Max invariant" above
  int64_t maxVersion;
  // The write version of the point in the key space represented by this node's
  // key
  int64_t pointVersion;
  // The write version of the range immediately after this node's key, until
  // just before the next key in the tree. I.e. (this key, next key)
  int64_t rangeVersion;
  // child[0] is the left child or nullptr. child[1] is the right child or
  // nullptr
  Node *child[2];
  // The parent of this node in the tree, or nullptr if this node is the root
  Node *parent;
  // As a treap, this tree satisfies the heap invariant on each node's priority
  uint32_t priority;
  // The length of this node's key
  int len;
  // The contents of this node's key
  // uint8_t[len];

  auto operator<=>(const ConflictSet::Key &other) const {
    const int minLen = std::min<int>(len, other.len);
    const int c = memcmp(this + 1, other.p, minLen);
    return c != 0 ? c <=> 0 : len <=> other.len;
  }
};

// Note: `rangeVersion` is left uninitialized.
Node *createNode(const Key &key, Node *parent, int64_t pointVersion,
                 Random &rand) {
  assert(key.len <= std::numeric_limits<int>::max());
  Node *result = (Node *)malloc(sizeof(Node) + key.len);
  result->maxVersion = pointVersion;
  result->pointVersion = pointVersion;
  result->child[0] = nullptr;
  result->child[1] = nullptr;
  result->parent = parent;
  result->priority = rand.next();
#if SHOW_PRIORITY
  result->priority &= 0xff;
#endif
  result->len = key.len;
  memcpy(result + 1, key.p, key.len);
  return result;
}

void destroyNode(Node *node) {
  assert(node->child[0] == nullptr);
  assert(node->child[1] == nullptr);
  free(node);
}

struct Iterator {
  Node *node;
  int cmp;
};

// Call Stepwise::step for each element of remaining until it returns true.
// Applies a permutation to `remaining` as a side effect.
template <class Stepwise>
void runInterleaved(std::span<Stepwise> remaining, int stepLimit = -1) {
  while (remaining.size() > 0) {
    for (int i = 0; i < int(remaining.size());) {
      if (stepLimit-- == 0) {
        return;
      }
      bool done = remaining[i].step();
      if (done) {
        if (i != int(remaining.size()) - 1) {
          using std::swap;
          swap(remaining[i], remaining.back());
        }
        remaining = remaining.subspan(0, remaining.size() - 1);
      } else {
        ++i;
      }
    }
  }
};

template <class Stepwise>
void runSequential(std::span<Stepwise> remaining, int stepLimit = -1) {
  for (auto &r : remaining) {
    if (stepLimit-- == 0) {
      return;
    }
    while (!r.step()) {
      if (stepLimit-- == 0) {
        return;
      }
    }
  }
}

struct StepwiseLastLeq {
  Node *current;
  Node *result;
  const Key *key;
  int resultC = -1;
  int index;
  std::strong_ordering c = std::strong_ordering::equal;

  StepwiseLastLeq() {}
  StepwiseLastLeq(Node *current, Node *result, const Key &key, int index)
      : current(current), result(result), key(&key), index(index) {}

  bool step() {
    if (current == nullptr) {
      return true;
    }
    c = *current <=> *key;
    if (c == 0) {
      result = current;
      resultC = 0;
      return true;
    }
    result = c < 0 ? current : result;
    current = current->child[c < 0];
    return false;
  }
};

void lastLeqMulti(Arena &arena, Node *root, std::span<Key> keys,
                  Iterator *results) {
  assert(std::is_sorted(keys.begin(), keys.end()));

  if (keys.size() == 0) {
    return;
  }

  auto *stepwiseLastLeqs = new (arena) StepwiseLastLeq[keys.size()];

  // Descend until queries for front and back diverge
  Node *current = root;
  Node *resultP = nullptr;
  auto stepwiseFront = StepwiseLastLeq(current, resultP, keys.front(), -1);
  auto stepwiseBack = StepwiseLastLeq(current, resultP, keys.back(), -1);
  for (;;) {
    bool done1 = stepwiseFront.step();
    bool done2 = stepwiseBack.step();
    if (!done1 && !done2 && stepwiseFront.c == stepwiseBack.c) {
      assert(stepwiseFront.current == stepwiseBack.current);
      assert(stepwiseFront.result == stepwiseBack.result);
      current = stepwiseFront.current;
      resultP = stepwiseFront.result;
    } else {
      break;
    }
  }

  int index = 0;
  {
    auto iter = stepwiseLastLeqs;
    for (const auto &k : keys) {
      *iter++ = StepwiseLastLeq(current, resultP, k, index++);
    }
  }
  auto stepwiseSpan = std::span<StepwiseLastLeq>(stepwiseLastLeqs, keys.size());
  runInterleaved(stepwiseSpan);
  for (const auto &stepwise : stepwiseSpan) {
    results[stepwise.index] = Iterator{stepwise.result, stepwise.resultC};
  }
}

// Return a pointer to the node whose key immediately follows `n`'s key (if
// `dir` is false, precedes). Return nullptr if none exists.
[[maybe_unused]] Node *next(Node *n, bool dir) {
  // Traverse left spine of right child (when moving right, i.e. dir = true)
  if (n->child[dir]) {
    n = n->child[dir];
    while (n->child[!dir]) {
      n = n->child[!dir];
    }
  } else {
    // Search upward for a node such that we're the left child (when moving
    // right, i.e. dir = true)
    while (n->parent && n == n->parent->child[dir]) {
      n = n->parent;
    }
    n = n->parent;
  }
  return n;
}

// Return a pointer to the node whose key is greatest among keys in the tree
// rooted at `n` (if dir = false, least). Return nullptr if none exists (i.e.
// `n` is null).
[[maybe_unused]] Node *extrema(Node *n, bool dir) {
  if (n == nullptr) {
    return nullptr;
  }
  while (n->child[dir] != nullptr) {
    n = n->child[dir];
  }
  return n;
}

[[maybe_unused]] void debugPrintDot(FILE *file, Node *node) {

  struct DebugDotPrinter {

    explicit DebugDotPrinter(FILE *file) : file(file) {}

    void print(Node *node) {
      if (node->child[0] == nullptr && node->child[1] == nullptr) {
        return;
      }
      for (int i = 0; i < 2; ++i) {
        if (node->child[i] != nullptr) {
          fprintf(file, " k_%.*s -> k_%.*s;\n", node->len,
                  (const char *)(node + 1), node->child[i]->len,
                  (const char *)(node->child[i] + 1));
          print(node->child[i]);
        } else {
          fprintf(file, " k_%.*s -> null%d;\n", node->len,
                  (const char *)(node + 1), id);
          ++id;
        }
      }
    }
    int id = 0;
    FILE *file;
  };

  fprintf(file, "digraph ConflictSet {\n");
  if (node != nullptr) {
    DebugDotPrinter printer{file};
    fprintf(file, "\n");
    printer.print(node);
    fprintf(file, "\n");
    for (auto iter = extrema(node, false); iter != nullptr;
         iter = next(iter, true)) {
      fprintf(file,
              " k_%.*s [label=\"k=\\\"%.*s\\\"\\n"
#if SHOW_PRIORITY
              "p=%u\\n"
#endif
              "m=%d\\nv=%d r=%d\"];\n",
              iter->len, (const char *)(iter + 1), iter->len,
              (const char *)(iter + 1),
#if SHOW_PRIORITY
              iter->priority,
#endif
              int(iter->maxVersion), int(iter->pointVersion),
              int(iter->rangeVersion));
    }
    for (int i = 0; i < printer.id; ++i) {
      fprintf(file, " null%d [shape=point];\n", i);
    }
  }
  fprintf(file, "}\n");
}

[[maybe_unused]] void printLogical(std::string &result, Node *node) {
  for (auto iter = extrema(node, false); iter != nullptr;) {
    auto *next = ::next(iter, true);
    std::string key;
    for (uint8_t c : std::string_view((const char *)(iter + 1), iter->len)) {
      key += "x";
      key += "0123456789abcdef"[c / 16];
      key += "0123456789abcdef"[c % 16];
    }
    if (iter->pointVersion == iter->rangeVersion) {
      result += key + " -> " + std::to_string(iter->pointVersion) + "\n";
    } else {
      result += key + " -> " + std::to_string(iter->pointVersion) + "\n";
      if (next == nullptr ||
          (std::string_view((const char *)(next + 1), iter->len) !=
           (std::string((const char *)(iter + 1), iter->len) +
            std::string("\x00", 1)))) {
        result += key + "x00 -> " + std::to_string(iter->rangeVersion) + "\n";
      }
    }
    iter = next;
  }
}

[[maybe_unused]] Key toKey(Arena &arena, int n) {
  constexpr int kMaxLength = 4;
  int i = kMaxLength;
  uint8_t *itoaBuf = new (arena) uint8_t[kMaxLength];
  memset(itoaBuf, '0', kMaxLength);
  do {
    itoaBuf[--i] = "0123456789abcdef"[n % 16];
    n /= 16;
  } while (n);
  return Key{itoaBuf, kMaxLength};
}

[[maybe_unused]] Key toKeyAfter(Arena &arena, int n) {
  constexpr int kMaxLength = 4;
  int i = kMaxLength;
  uint8_t *itoaBuf = new (arena) uint8_t[kMaxLength + 1];
  memset(itoaBuf, '0', kMaxLength);
  itoaBuf[kMaxLength] = 0;
  do {
    itoaBuf[--i] = "0123456789abcdef"[n % 16];
    n /= 16;
  } while (n);
  return Key{itoaBuf, kMaxLength + 1};
}

// Recompute maxVersion, and propagate up the tree as necessary
// TODO interleave this? Will require careful analysis for correctness, and the
// performance gains may not be worth it.
void updateMaxVersion(Node *n) {
  for (;;) {
    int64_t maxVersion = std::max(n->pointVersion, n->rangeVersion);
    for (int i = 0; i < 2; ++i) {
      maxVersion =
          std::max(maxVersion, n->child[i] != nullptr ? n->child[i]->maxVersion
                                                      : maxVersion);
    }
    if (n->maxVersion == maxVersion) {
      break;
    }
    n->maxVersion = maxVersion;
    if (n->parent == nullptr) {
      break;
    }
    n = n->parent;
  }
}

void rotate(Node **node, bool dir) {
  // diagram shown for dir == true
  /*    n
       /
      l
       \
        lr
  */
  assert(node != nullptr);
  Node *n = *node;
  assert(n != nullptr);
  Node *parent = n->parent;
  Node *l = n->child[!dir];
  assert(l != nullptr);
  Node *lr = l->child[dir];
  n->child[!dir] = lr;
  if (lr) {
    lr->parent = n;
  }
  l->child[dir] = n;
  n->parent = l;
  l->parent = parent;
  *node = l;
  /*    l
         \
          n
         /
        lr
  */
  updateMaxVersion(n);
  updateMaxVersion(l);
}

[[maybe_unused]] void checkParentPointers(Node *node, bool &success) {
  for (int i = 0; i < 2; ++i) {
    if (node->child[i] != nullptr) {
      if (node->child[i]->parent != node) {
        fprintf(stderr, "%.*s child %d has parent pointer %p. Expected %p\n",
                node->len, (const char *)(node + 1), i,
                (void *)node->child[i]->parent, (void *)node);
        success = false;
      }
      checkParentPointers(node->child[i], success);
    }
  }
}

[[maybe_unused]] int64_t checkMaxVersion(Node *node, bool &success) {
  int64_t expected = std::max(node->pointVersion, node->rangeVersion);
  for (int i = 0; i < 2; ++i) {
    if (node->child[i] != nullptr) {
      expected = std::max(expected, checkMaxVersion(node->child[i], success));
    }
  }
  if (node->maxVersion != expected) {
    fprintf(stderr, "%.*s has max version %d. Expected %d\n", node->len,
            (const char *)(node + 1), int(node->maxVersion), int(expected));
    success = false;
  }
  return expected;
}

template <class ReferenceImpl>
bool checkCorrectness(Node *node, ReferenceImpl &refImpl) {
  bool success = true;
  // Check bst invariant
  Arena arena;
  std::vector<std::string_view, ArenaAlloc<std::string_view>> keys{
      ArenaAlloc<std::string_view>(&arena)};
  for (auto iter = extrema(node, false); iter != nullptr;
       iter = next(iter, true)) {
    keys.push_back(std::string_view((char *)(iter + 1), iter->len));
    for (int i = 0; i < 2; ++i) {
      if (iter->child[i] != nullptr) {
        if (iter->priority < iter->child[i]->priority) {
          fprintf(stderr, "%.*s has priority < its child %.*s\n", iter->len,
                  (const char *)(iter + 1), iter->child[i]->len,
                  (const char *)(iter->child[i] + 1));
          success = false;
        }
      }
    }
  }
  assert(std::is_sorted(keys.begin(), keys.end()));

  checkMaxVersion(node, success);
  checkParentPointers(node, success);

  std::string logicalMap;
  std::string referenceLogicalMap;
  printLogical(logicalMap, node);
  refImpl.printLogical(referenceLogicalMap);
  if (logicalMap != referenceLogicalMap) {
    fprintf(stderr,
            "Logical map not equal to reference logical map.\n\nActual:\n"
            "%s\nExpected:\n%s\n",
            logicalMap.c_str(), referenceLogicalMap.c_str());
    success = false;
  }

  return success;
}

} // namespace

struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl {
  Random rand;
  Node *root;
  int64_t oldestVersion;

  explicit Impl(int64_t oldestVersion, uint64_t seed) noexcept
      : rand{seed & 0xfffffffful, seed >> 32},
        root(createNode({nullptr, 0}, nullptr, oldestVersion, rand)),
        oldestVersion(oldestVersion) {
    root->rangeVersion = oldestVersion;
  }

  void check(const ReadRange *reads, Result *results, int count) const {
    int searchCount = 0;
    for (int i = 0; i < count; ++i) {
      if (reads[i].readVersion >= oldestVersion) {
        ++searchCount;
      } else {
        results[i] = ConflictSet::TooOld;
      }
    }
    Arena arena;
    auto *iters = new (arena) Iterator[searchCount];
    auto *begins = new (arena) Key[searchCount];
    int j = 0;
    for (int i = 0; i < count; ++i) {
      if (reads[i].readVersion >= oldestVersion) {
        begins[j++] = reads[i].begin;
      }
    }
    lastLeqMulti(arena, root, std::span<Key>(begins, searchCount), iters);
    // TODO check non-singleton reads lol
    j = 0;
    for (int i = 0; i < count; ++i) {
      if (reads[i].readVersion >= oldestVersion) {
        assert(reads[i].end.len == 0);
        assert(iters[i].node != nullptr);
        if ((iters[j].cmp == 0
                 ? iters[j].node->pointVersion
                 : iters[j].node->rangeVersion) > reads[i].readVersion) {
          results[i] = ConflictSet::Conflict;
        } else {
          results[i] = ConflictSet::Commit;
        }
        ++j;
      }
    }
  }

  struct StepwiseInsert {
    // After this phase, the heap invariant may be violated for *current and
    // (*current)->parent.
    Node **current;
    Node *parent;
    const Key *key;
    int64_t writeVersion;
    Random *rand;

    StepwiseInsert() {}
    StepwiseInsert(Node **root, const Key &key, int64_t writeVersion,
                   Random *rand)
        : current(root), parent(nullptr), key(&key), writeVersion(writeVersion),
          rand(rand) {}
    bool step() {
#if DEBUG
      fprintf(stderr, "Step insert of %.*s. At node: %.*s\n", key->len, key->p,
              (*current) ? (*current)->len : 7,
              (*current) ? (const char *)((*current) + 1) : "nullptr");
#endif
      if (*current == nullptr) {
        auto *newNode = createNode(*key, parent, writeVersion, *rand);
        *current = newNode;
        // We could interleave the iteration in ::next, but we'd need a careful
        // analysis for correctness and it's unlikely to be worthwhile.
        auto *prev = ::next(newNode, false);
        // The empty key always exists. If *key is empty then we won't reach here.
        assert(prev != nullptr);
        assert(prev->rangeVersion <= writeVersion);
        newNode->rangeVersion = prev->rangeVersion;
        return true;
      } else {
        // This is the key optimization - setting the max version on the way
        // down the search path so we only have to do one traversal.
        (*current)->maxVersion = std::max((*current)->maxVersion, writeVersion);
        auto c = *key <=> **current;
        if (c == 0) {
          (*current)->pointVersion = writeVersion;
          return true;
        }
        parent = *current;
        current = &((*current)->child[c > 0]);
      }
      return false;
    }
  };

  void addWrites(const WriteRange *writes, int count) {
    Arena arena;
    auto stepwiseInserts =
        std::span<StepwiseInsert>(new (arena) StepwiseInsert[count], count);
    for (int i = 0; i < count; ++i) {
      // TODO handle non-singleton writes lol
      assert(writes[i].end.len == 0);

      stepwiseInserts[i] =
          StepwiseInsert{&root, writes[i].begin, writes[i].writeVersion, &rand};
    }

    // TODO Descend until queries for front and back diverge

    // Mitigate potential n^2 behavior of insertion (imagine if all inserts
    // shared the same search path in the pre-existing tree) by shuffling the
    // insertion order. Not sure how this interacts with interleaved insertion
    // but it's probably fine. There's a hand-wavy symmetry argument.
    shuffle(rand, stepwiseInserts);

    runInterleaved(stepwiseInserts);

    std::vector<Node *, ArenaAlloc<Node *>> workList{
        ArenaAlloc<Node *>(&arena)};
    workList.reserve(count);
    for (int i = 0; i < count; ++i) {
      Node *node = *stepwiseInserts[i].current;
      assert(node != nullptr);
      workList.push_back(*stepwiseInserts[i].current);
    }

    while (!workList.empty()) {
      Node *n = workList.back();
      workList.pop_back();
#if DEBUG
      fprintf(stderr, "\tcheck heap invariant %.*s\n", n->len,
              (const char *)(n + 1));
#endif
      if (n->parent == nullptr) {
        continue;
      }
      const bool dir = n == n->parent->child[1];
      assert(dir || n == n->parent->child[0]);
      // p is the address of the pointer to n->parent in the tree
      Node **p = n->parent->parent == nullptr
                     ? &root
                     : &n->parent->parent
                            ->child[n->parent->parent->child[1] == n->parent];
      assert(*p == n->parent);
      if (n->parent->priority < n->priority) {
#if DEBUG
        fprintf(stderr, "\trotate %.*s %s\n", n->len, (const char *)(n + 1),
                !dir ? "right" : "left");
#endif
        rotate(p, !dir);
        workList.push_back(*p);
        assert((*p)->child[!dir] != nullptr);
        auto *lr = (*p)->child[!dir]->child[dir];
        if (lr != nullptr) {
          workList.push_back(lr);
        }
      }
    }
  }

  void setOldestVersion(int64_t oldestVersion) {
    assert(oldestVersion > this->oldestVersion);
    this->oldestVersion = oldestVersion;
  }

  ~Impl() {
    Arena arena;
    std::vector<Node *, ArenaAlloc<Node *>> toFree{ArenaAlloc<Node *>(&arena)};
    if (root != nullptr) {
      toFree.push_back(root);
    }
    while (toFree.size() > 0) {
      Node *n = toFree.back();
      toFree.pop_back();
      for (int i = 0; i < 2; ++i) {
        auto *c = std::exchange(n->child[i], nullptr);
        if (c != nullptr) {
          toFree.push_back(c);
        }
      }
      destroyNode(n);
    }
  }
};

void ConflictSet::check(const ReadRange *reads, Result *results,
                        int count) const {
  return impl->check(reads, results, count);
}

void ConflictSet::addWrites(const WriteRange *writes, int count) {
  return impl->addWrites(writes, count);
}

void ConflictSet::setOldestVersion(int64_t oldestVersion) {
  return impl->setOldestVersion(oldestVersion);
}

ConflictSet::ConflictSet(int64_t oldestVersion, uint64_t seed)
    : impl(new(malloc(sizeof(Impl))) Impl{oldestVersion, seed}) {}

ConflictSet::~ConflictSet() {
  if (impl) {
    impl->~Impl();
    free(impl);
  }
}

ConflictSet::ConflictSet(ConflictSet &&other) noexcept
    : impl(std::exchange(other.impl, nullptr)) {}

ConflictSet &ConflictSet::operator=(ConflictSet &&other) noexcept {
  impl = std::exchange(other.impl, nullptr);
  return *this;
}

using ConflictSet_Result = ConflictSet::Result;
using ConflictSet_Key = ConflictSet::Key;
using ConflictSet_ReadRange = ConflictSet::ReadRange;
using ConflictSet_WriteRange = ConflictSet::WriteRange;

extern "C" {
__attribute__((__visibility__("default"))) void
ConflictSet_check(void *cs, const ConflictSet_ReadRange *reads,
                  ConflictSet_Result *results, int count) {
  ((ConflictSet::Impl *)cs)->check(reads, results, count);
}
__attribute__((__visibility__("default"))) void
ConflictSet_addWrites(void *cs, const ConflictSet_WriteRange *writes,
                      int count) {
  ((ConflictSet::Impl *)cs)->addWrites(writes, count);
}
__attribute__((__visibility__("default"))) void
ConflictSet_setOldestVersion(void *cs, int64_t oldestVersion) {
  ((ConflictSet::Impl *)cs)->setOldestVersion(oldestVersion);
}
__attribute__((__visibility__("default"))) void *
ConflictSet_create(int64_t oldestVersion, uint64_t seed) {
  return new (malloc(sizeof(ConflictSet::Impl)))
      ConflictSet::Impl{oldestVersion, seed};
}
__attribute__((__visibility__("default"))) void ConflictSet_destroy(void *cs) {
  using Impl = ConflictSet::Impl;
  ((Impl *)cs)->~Impl();
  free(cs);
}
}

namespace std {
void __throw_length_error(const char *) { __builtin_unreachable(); }
} // namespace std

namespace {
struct ReferenceImpl {
  explicit ReferenceImpl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
    writeVersionMap[""] = oldestVersion;
  }
  void check(const ConflictSet::ReadRange *reads, ConflictSet::Result *results,
             int count) const {
    for (int i = 0; i < count; ++i) {
      if (reads[i].readVersion < oldestVersion) {
        results[i] = ConflictSet::TooOld;
        continue;
      }
      auto begin =
          std::string((const char *)reads[i].begin.p, reads[i].begin.len);
      auto end =
          reads[i].end.len == 0
              ? begin + std::string("\x00", 1)
              : std::string((const char *)reads[i].end.p, reads[i].end.len);
      int64_t maxVersion = oldestVersion;
      for (auto iter = --writeVersionMap.upper_bound(begin),
                endIter = writeVersionMap.lower_bound(end);
           iter != endIter; ++iter) {
        maxVersion = std::max(maxVersion, iter->second);
      }
      results[i] = maxVersion > reads[i].readVersion ? ConflictSet::Conflict
                                                     : ConflictSet::Commit;
    }
  }
  void addWrites(const ConflictSet::WriteRange *writes, int count) {
    for (int i = 0; i < count; ++i) {
      auto begin =
          std::string((const char *)writes[i].begin.p, writes[i].begin.len);
      auto end =
          writes[i].end.len == 0
              ? begin + std::string("\x00", 1)
              : std::string((const char *)writes[i].end.p, writes[i].end.len);
      auto writeVersion = writes[i].writeVersion;
      auto prevVersion = (--writeVersionMap.upper_bound(end))->second;
      for (auto iter = writeVersionMap.lower_bound(begin),
                endIter = writeVersionMap.lower_bound(end);
           iter != endIter;) {
        iter = writeVersionMap.erase(iter);
      }
      writeVersionMap[begin] = writeVersion;
      writeVersionMap[end] = prevVersion;
    }
  }

  void setOldestVersion(int64_t oldestVersion) {
    this->oldestVersion = oldestVersion;
  }

  void printLogical(std::string &result) {
    for (const auto &[k, v] : writeVersionMap) {
      std::string key;
      for (uint8_t c : k) {
        key += "x";
        key += "0123456789abcdef"[c / 16];
        key += "0123456789abcdef"[c % 16];
      }
      result += key + " -> " + std::to_string(v) + "\n";
    }
  }

  int64_t oldestVersion;
  std::map<std::string, int64_t> writeVersionMap;
};
} // namespace

#ifdef ENABLE_TESTS
int main(void) {
  int64_t writeVersion = 0;
  ConflictSet::Impl cs{writeVersion, 0};
  ReferenceImpl refImpl{writeVersion};
  Arena arena;
  constexpr int kNumKeys = 10;
  auto *write = new (arena) ConflictSet::WriteRange[kNumKeys];
  for (int i = 0; i < kNumKeys; ++i) {
    write[i].begin = toKey(arena, i);
    write[i].end = toKeyAfter(arena, i);
    write[i].end.len = 0;
    write[i].writeVersion = ++writeVersion;
  }
  cs.addWrites(write, kNumKeys);
  refImpl.addWrites(write, kNumKeys);
  debugPrintDot(stdout, cs.root);
  bool success = checkCorrectness(cs.root, refImpl);
  return success ? 0 : 1;
}
#endif

#ifdef ENABLE_FUZZ
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
  // TODO call setOldestVersion, and check range writes/reads
  gArbitrary = Arbitrary{{data, size}};
  uint64_t state = gArbitrary.next();
  uint64_t seq = gArbitrary.next();
  auto rand = Random{state, seq};

  int64_t writeVersion = 0;
  ConflictSet::Impl cs{writeVersion, rand.next()};
  ReferenceImpl refImpl{writeVersion};

  while (gArbitrary.hasEntropy()) {
    Arena arena;
    {
      int numWrites = gArbitrary.bounded(10);
      int64_t v = ++writeVersion;
      auto *writes = new (arena) ConflictSet::WriteRange[numWrites];
      std::set<int, std::less<int>, ArenaAlloc<int>> keys{
          ArenaAlloc<int>(&arena)};
      while (int(keys.size()) < numWrites) {
        keys.insert(gArbitrary.hasEntropy() ? gArbitrary.bounded(100)
                                            : rand.bounded(100));
      }
      auto iter = keys.begin();
      for (int i = 0; i < numWrites; ++i) {
        writes[i].begin = toKey(arena, *iter++);
        writes[i].end.len = 0;
        writes[i].writeVersion = v;
      }
      cs.addWrites(writes, numWrites);
      refImpl.addWrites(writes, numWrites);
    }
    bool success = checkCorrectness(cs.root, refImpl);
    if (!success) {
      abort();
    }
    {
      int numReads = gArbitrary.bounded(10);
      int64_t v = writeVersion - gArbitrary.bounded(10);
      auto *reads = new (arena) ConflictSet::ReadRange[numReads];
      std::set<int, std::less<int>, ArenaAlloc<int>> keys{
          ArenaAlloc<int>(&arena)};
      while (int(keys.size()) < numReads) {
        keys.insert(gArbitrary.hasEntropy() ? gArbitrary.bounded(100)
                                            : rand.bounded(100));
      }
      auto iter = keys.begin();
      for (int i = 0; i < numReads; ++i) {
        reads[i].begin = toKey(arena, *iter++);
        reads[i].end.len = 0;
        reads[i].readVersion = v;
      }
      auto *results1 = new (arena) ConflictSet::Result[numReads];
      auto *results2 = new (arena) ConflictSet::Result[numReads];
      cs.check(reads, results1, numReads);
      refImpl.check(reads, results2, numReads);
      for (int i = 0; i < numReads; ++i) {
        if (results1[i] != results2[i]) {
          fprintf(stderr,
                  "Expected %d, got %d for read of %.*s at version %d\n",
                  results2[i], results1[i], reads[i].begin.len,
                  reads[i].begin.p, int(reads[i].readVersion));
          std::string referenceLogicalMap;
          refImpl.printLogical(referenceLogicalMap);
          fprintf(stderr, "Logical map:\n\n%s\n", referenceLogicalMap.c_str());
          abort();
        }
      }
    }
  }
  return 0;
}
#endif