Files
conflict-set/Internal.h
2024-02-01 17:01:47 -08:00

569 lines
17 KiB
C++

#pragma once
#include "ConflictSet.h"
#include <bit>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <inttypes.h>
#include <map>
#include <set>
#include <span>
#include <string>
#include <utility>
#include <vector>
#define DEBUG_VERBOSE 0
// This header contains code that we want to reuse outside of ConflictSet.cpp or
// want to exclude from coverage since it's only testing related.
// GCOVR_EXCL_START
__attribute__((always_inline)) inline void *safe_malloc(size_t s) {
if (void *p = malloc(s)) {
return p;
}
abort();
}
// ==================== BEGIN ARENA IMPL ====================
/// Group allocations with similar lifetimes to amortize the cost of malloc/free
struct Arena {
explicit Arena(int initialSize = 0);
/// O(log n) in the number of allocations
~Arena();
struct ArenaImpl;
Arena(const Arena &) = delete;
Arena &operator=(const Arena &) = delete;
Arena(Arena &&other) noexcept;
Arena &operator=(Arena &&other) noexcept;
ArenaImpl *impl = nullptr;
};
[[maybe_unused]] inline void operator delete(void *, std::align_val_t,
Arena &) {}
inline void *operator new(size_t size, std::align_val_t align, Arena &arena);
void *operator new(size_t size, std::align_val_t align, Arena *arena) = delete;
[[maybe_unused]] inline void operator delete(void *, Arena &) {}
inline void *operator new(size_t size, Arena &arena) {
return operator new(size, std::align_val_t(alignof(std::max_align_t)), arena);
}
inline void *operator new(size_t size, Arena *arena) = delete;
[[maybe_unused]] inline void operator delete[](void *, Arena &) {}
inline void *operator new[](size_t size, Arena &arena) {
return operator new(size, arena);
}
inline void *operator new[](size_t size, Arena *arena) = delete;
[[maybe_unused]] inline void operator delete[](void *, std::align_val_t,
Arena &) {}
inline void *operator new[](size_t size, std::align_val_t align, Arena &arena) {
return operator new(size, align, arena);
}
inline void *operator new[](size_t size, std::align_val_t align,
Arena *arena) = delete;
/// align must be a power of two
template <class T> T *align_up(T *t, size_t align) {
auto unaligned = uintptr_t(t);
auto aligned = (unaligned + align - 1) & ~(align - 1);
return reinterpret_cast<T *>(reinterpret_cast<char *>(t) + aligned -
unaligned);
}
/// align must be a power of two
constexpr inline int align_up(uint32_t unaligned, uint32_t align) {
return (unaligned + align - 1) & ~(align - 1);
}
/// Returns the smallest power of two >= x
[[maybe_unused]] constexpr inline uint32_t nextPowerOfTwo(uint32_t x) {
return x <= 1 ? 1 : 1 << (32 - std::countl_zero(x - 1));
}
struct Arena::ArenaImpl {
Arena::ArenaImpl *prev;
int capacity;
int used;
uint8_t *begin() { return reinterpret_cast<uint8_t *>(this + 1); }
};
static_assert(sizeof(Arena::ArenaImpl) == 16);
static_assert(alignof(Arena::ArenaImpl) == 8);
inline Arena::Arena(int initialSize) : impl(nullptr) {
if (initialSize > 0) {
auto allocationSize = align_up(initialSize + sizeof(ArenaImpl), 16);
impl = (Arena::ArenaImpl *)safe_malloc(allocationSize);
impl->prev = nullptr;
impl->capacity = allocationSize - sizeof(ArenaImpl);
impl->used = 0;
}
}
inline void onDestroy(Arena::ArenaImpl *impl) {
while (impl) {
auto *prev = impl->prev;
free(impl);
impl = prev;
}
}
[[maybe_unused]] inline Arena::Arena(Arena &&other) noexcept
: impl(std::exchange(other.impl, nullptr)) {}
[[maybe_unused]] inline Arena &Arena::operator=(Arena &&other) noexcept {
onDestroy(impl);
impl = std::exchange(other.impl, nullptr);
return *this;
}
inline Arena::~Arena() { onDestroy(impl); }
inline void *operator new(size_t size, std::align_val_t align, Arena &arena) {
int64_t aligned_size = size + size_t(align) - 1;
if (arena.impl == nullptr ||
(arena.impl->capacity - arena.impl->used) < aligned_size) {
auto allocationSize = align_up(
sizeof(Arena::ArenaImpl) +
std::max<int>(aligned_size,
(arena.impl ? std::max<int>(sizeof(Arena::ArenaImpl),
arena.impl->capacity * 2)
: 0)),
16);
auto *impl = (Arena::ArenaImpl *)safe_malloc(allocationSize);
impl->prev = arena.impl;
impl->capacity = allocationSize - sizeof(Arena::ArenaImpl);
impl->used = 0;
arena.impl = impl;
}
auto *result =
align_up(arena.impl->begin() + arena.impl->used, size_t(align));
auto usedDelta = (result - arena.impl->begin()) + size - arena.impl->used;
arena.impl->used += usedDelta;
return result;
}
/// STL-friendly allocator using an arena
template <class T> struct ArenaAlloc {
typedef T value_type;
ArenaAlloc() = delete;
explicit ArenaAlloc(Arena *arena) : arena(arena) {}
Arena *arena;
template <class U> constexpr ArenaAlloc(const ArenaAlloc<U> &other) noexcept {
arena = other.arena;
}
[[nodiscard]] T *allocate(size_t n) {
if (n > 0xfffffffffffffffful / sizeof(T)) { // NOLINT
__builtin_unreachable();
}
return static_cast<T *>((void *)new (std::align_val_t(alignof(T)), *arena)
uint8_t[n * sizeof(T)]); // NOLINT
}
void deallocate(T *, size_t) noexcept {}
};
template <class T> using Vector = std::vector<T, ArenaAlloc<T>>;
template <class T> auto vector(Arena &arena) {
return Vector<T>(ArenaAlloc<T>(&arena));
}
template <class T> using Set = std::set<T, std::less<T>, ArenaAlloc<T>>;
template <class T> auto set(Arena &arena) {
return Set<T>(ArenaAlloc<T>(&arena));
}
template <class T, class U>
bool operator==(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
return lhs.arena == rhs.arena;
}
template <class T, class U>
bool operator!=(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
return !(lhs == rhs);
}
// ==================== END ARENA IMPL ====================
// ==================== BEGIN ARBITRARY IMPL ====================
/// Think of `Arbitrary` as an attacker-controlled random number generator.
/// Usually you want your random number generator to be fair, so that you can
/// sensibly analyze probabilities. E.g. The analysis that shows that quicksort
/// is expected O(n log n) with a random pivot relies on the random pivot being
/// selected uniformly from a fair distribution.
///
/// Other times you want your randomness to be diabolically unfair, like when
/// looking for bugs and fuzzing. The random-number-like interface is still
/// convenient here, but you can potentially get much better coverage by
/// allowing the possibility of e.g. flipping heads 100 times in a row.
///
/// When it runs out of entropy, it always returns 0.
struct Arbitrary {
Arbitrary() = default;
explicit Arbitrary(std::span<const uint8_t> bytecode) : bytecode(bytecode) {}
/// Draws an arbitrary uint32_t
uint32_t next() { return consume<4>(); }
/// Draws an arbitrary element from [0, s)
uint32_t bounded(uint32_t s);
/// Fill `bytes` with `size` arbitrary bytes
void randomBytes(uint8_t *bytes, int size) {
int toFill = std::min<int>(size, bytecode.size());
if (toFill > 0) {
memcpy(bytes, bytecode.data(), toFill);
}
bytecode = bytecode.subspan(toFill, bytecode.size() - toFill);
memset(bytes + toFill, 0, size - toFill);
}
/// Fill `bytes` with `size` random hex bytes
void randomHex(uint8_t *bytes, int size) {
for (int i = 0; i < size;) {
uint8_t arbitrary = consume<1>();
bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
arbitrary >>= 4;
if (i < size) {
bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
}
}
}
template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
T randT() {
T t;
randomBytes((uint8_t *)&t, sizeof(T));
return t;
}
bool hasEntropy() const { return bytecode.size() != 0; }
private:
uint8_t consumeByte() {
if (bytecode.size() == 0) {
return 0;
}
auto result = bytecode[0];
bytecode = bytecode.subspan(1, bytecode.size() - 1);
return result;
}
template <int kBytes> uint32_t consume() {
uint32_t result = 0;
static_assert(kBytes <= 4);
for (int i = 0; i < kBytes; ++i) {
result <<= 8;
result |= consumeByte();
}
return result;
}
std::span<const uint8_t> bytecode;
};
inline uint32_t Arbitrary::bounded(uint32_t s) {
if (s == 1) {
return 0;
}
switch (32 - std::countl_zero(s - 1)) {
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
return consume<1>() % s;
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
case 16:
return consume<2>() % s;
case 17:
case 18:
case 19:
case 20:
case 21:
case 22:
case 23:
case 24:
return consume<3>() % s;
default:
return consume<4>() % s;
}
}
// ==================== END ARBITRARY IMPL ====================
// ==================== BEGIN UTILITIES IMPL ====================
// Call Stepwise::step for each element of remaining until it returns true.
// Applies a permutation to `remaining` as a side effect.
template <class Stepwise> void runInterleaved(std::span<Stepwise> remaining) {
while (remaining.size() > 0) {
for (int i = 0; i < int(remaining.size());) {
bool done = remaining[i].step();
if (done) {
if (i != int(remaining.size()) - 1) {
using std::swap;
swap(remaining[i], remaining.back());
}
remaining = remaining.subspan(0, remaining.size() - 1);
} else {
++i;
}
}
}
};
template <class Stepwise> void runSequential(std::span<Stepwise> remaining) {
for (auto &r : remaining) {
while (!r.step()) {
}
}
}
struct ReferenceImpl {
explicit ReferenceImpl(int64_t oldestVersion) : oldestVersion(oldestVersion) {
writeVersionMap[""] = oldestVersion;
}
void check(const ConflictSet::ReadRange *reads, ConflictSet::Result *results,
int count) const {
for (int i = 0; i < count; ++i) {
if (reads[i].readVersion < oldestVersion) {
results[i] = ConflictSet::TooOld;
continue;
}
auto begin =
std::string((const char *)reads[i].begin.p, reads[i].begin.len);
auto end =
reads[i].end.len == 0
? begin + std::string("\x00", 1)
: std::string((const char *)reads[i].end.p, reads[i].end.len);
int64_t maxVersion = oldestVersion;
for (auto iter = --writeVersionMap.upper_bound(begin),
endIter = writeVersionMap.lower_bound(end);
iter != endIter; ++iter) {
maxVersion = std::max(maxVersion, iter->second);
}
results[i] = maxVersion > reads[i].readVersion ? ConflictSet::Conflict
: ConflictSet::Commit;
}
}
void addWrites(const ConflictSet::WriteRange *writes, int count) {
for (int i = 0; i < count; ++i) {
auto begin =
std::string((const char *)writes[i].begin.p, writes[i].begin.len);
auto end =
writes[i].end.len == 0
? begin + std::string("\x00", 1)
: std::string((const char *)writes[i].end.p, writes[i].end.len);
auto writeVersion = writes[i].writeVersion;
auto prevVersion = (--writeVersionMap.upper_bound(end))->second;
for (auto iter = writeVersionMap.lower_bound(begin),
endIter = writeVersionMap.lower_bound(end);
iter != endIter;) {
iter = writeVersionMap.erase(iter);
}
writeVersionMap[begin] = writeVersion;
writeVersionMap[end] = prevVersion;
}
}
void setOldestVersion(int64_t oldestVersion) {
this->oldestVersion = oldestVersion;
}
void printLogical(std::string &result) {
for (const auto &[k, v] : writeVersionMap) {
std::string key;
for (uint8_t c : k) {
key += "x";
key += "0123456789abcdef"[c / 16];
key += "0123456789abcdef"[c % 16];
}
result += key + " -> " + std::to_string(v) + "\n";
}
}
int64_t oldestVersion;
std::map<std::string, int64_t> writeVersionMap;
};
using Key = ConflictSet::Key;
[[maybe_unused]] static Key toKey(Arena &arena, int n) {
uint8_t *buf = new (arena) uint8_t[sizeof(n)];
memcpy(buf, &n, sizeof(n));
return Key{buf, sizeof(n)};
}
[[maybe_unused]] static Key toKeyAfter(Arena &arena, int n) {
uint8_t *buf = new (arena) uint8_t[sizeof(n) + 1];
memcpy(buf, &n, sizeof(n));
buf[sizeof(n)] = 0;
return Key{buf, sizeof(n) + 1};
}
inline std::string printable(std::string_view key) {
std::string result;
for (uint8_t c : key) {
result += "x";
result += "0123456789abcdef"[c / 16];
result += "0123456789abcdef"[c % 16];
}
return result;
}
inline std::string printable(const Key &key) {
return printable(std::string_view((const char *)key.p, key.len));
}
namespace {
template <class ConflictSetImpl> struct TestDriver {
// TODO call setOldestVersion, and check range writes/reads
Arbitrary arbitrary;
explicit TestDriver(const uint8_t *data, size_t size)
: arbitrary({data, size}) {}
int64_t writeVersion = 0;
ConflictSetImpl cs{writeVersion};
ReferenceImpl refImpl{writeVersion};
constexpr static auto kMaxKeyLen = 24;
bool ok = true;
static const char *resultToStr(ConflictSet::Result r) {
switch (r) {
case ConflictSet::Commit:
return "commit";
case ConflictSet::Conflict:
return "conflict";
case ConflictSet::TooOld:
return "too old";
}
abort();
}
// Call until it returns true, for "done". Check internal invariants etc
// between calls to next.
bool next() {
if (!arbitrary.hasEntropy()) {
return true;
}
Arena arena;
{
int numWriteKeys = arbitrary.bounded(10);
int64_t v = ++writeVersion;
auto *writes = new (arena) ConflictSet::WriteRange[numWriteKeys];
auto keys = set<std::string_view>(arena);
while (int(keys.size()) < numWriteKeys) {
if (!arbitrary.hasEntropy()) {
return true;
}
int keyLen = arbitrary.bounded(kMaxKeyLen);
auto *begin = new (arena) uint8_t[keyLen];
arbitrary.randomBytes(begin, keyLen);
keys.insert(std::string_view((const char *)begin, keyLen));
}
auto iter = keys.begin();
int numWrites = 0;
for (int i = 0; i < numWriteKeys; ++i, ++numWrites) {
writes[numWrites].begin.p = (const uint8_t *)iter->data();
writes[numWrites].begin.len = iter->size();
++iter;
if (i + 1 < numWriteKeys && arbitrary.bounded(2)) {
++i;
writes[numWrites].end.p = (const uint8_t *)iter->data();
writes[numWrites].end.len = iter->size();
++iter;
} else {
writes[numWrites].end.len = 0;
}
writes[numWrites].writeVersion = v;
#if DEBUG_VERBOSE && !defined(NDEBUG)
if (writes[numWrites].end.len == 0) {
fprintf(stderr, "Write: {%s} -> %d\n",
printable(writes[numWrites].begin).c_str(),
int(writes[numWrites].writeVersion));
} else {
fprintf(stderr, "Write: [%s, %s) -> %d\n",
printable(writes[numWrites].begin).c_str(),
printable(writes[numWrites].end).c_str(),
int(writes[numWrites].writeVersion));
}
#endif
}
assert(iter == keys.end());
cs.addWrites(writes, numWrites);
refImpl.addWrites(writes, numWrites);
}
{
int numReads = arbitrary.bounded(10);
int64_t v = std::max<int64_t>(writeVersion - arbitrary.bounded(10), 0);
auto *reads = new (arena) ConflictSet::ReadRange[numReads];
auto keys = set<std::string_view>(arena);
while (int(keys.size()) < numReads) {
if (!arbitrary.hasEntropy()) {
return true;
}
int keyLen = arbitrary.bounded(kMaxKeyLen);
auto *begin = new (arena) uint8_t[keyLen];
arbitrary.randomBytes(begin, keyLen);
keys.insert(std::string_view((const char *)begin, keyLen));
}
auto iter = keys.begin();
for (int i = 0; i < numReads; ++i) {
reads[i].begin.p = (const uint8_t *)iter->data();
reads[i].begin.len = iter->size();
++iter;
reads[i].end.len = 0;
reads[i].readVersion = v;
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Read: {%s} at %d\n", printable(reads[i].begin).c_str(),
int(reads[i].readVersion));
#endif
}
assert(iter == keys.end());
auto *results1 = new (arena) ConflictSet::Result[numReads];
auto *results2 = new (arena) ConflictSet::Result[numReads];
cs.check(reads, results1, numReads);
refImpl.check(reads, results2, numReads);
for (int i = 0; i < numReads; ++i) {
if (results1[i] != results2[i]) {
fprintf(stderr,
"Expected %s, got %s for read of %s at version %" PRId64 "\n",
resultToStr(results2[i]), resultToStr(results1[i]),
printable(reads[i].begin).c_str(), reads[i].readVersion);
ok = false;
return true;
}
}
}
return false;
}
};
} // namespace
// GCOVR_EXCL_STOP