Prepare for fuzzing
This commit is contained in:
335
ConflictSet.cpp
335
ConflictSet.cpp
@@ -12,17 +12,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#define SHOW_PRIORITY 0
|
||||
#define DEBUG 0
|
||||
|
||||
using Key = ConflictSet::Key;
|
||||
|
||||
static auto operator<=>(const Key &lhs, const Key &rhs) {
|
||||
const int minLen = std::min(lhs.len, rhs.len);
|
||||
const int c = memcmp(lhs.p, rhs.p, minLen);
|
||||
return c != 0 ? c <=> 0 : lhs.len <=> rhs.len;
|
||||
}
|
||||
|
||||
// ==================== BEGIN ARENA IMPL ====================
|
||||
|
||||
/// Group allocations with similar lifetimes to amortize the cost of malloc/free
|
||||
@@ -189,6 +178,280 @@ bool operator!=(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
|
||||
|
||||
// ==================== END ARENA IMPL ====================
|
||||
|
||||
// ==================== BEGIN RANDOM IMPL ====================
|
||||
|
||||
struct Random {
|
||||
// *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org
|
||||
// Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
|
||||
//
|
||||
// Modified - mostly c -> c++
|
||||
Random() = default;
|
||||
|
||||
Random(uint64_t initState, uint64_t initSeq) {
|
||||
pcg32_srandom_r(initState, initSeq);
|
||||
next();
|
||||
}
|
||||
|
||||
/// Draws from a uniform distribution of uint32_t's
|
||||
uint32_t next() {
|
||||
auto result = next_;
|
||||
next_ = pcg32_random_r();
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Draws from a uniform distribution of [0, s). From
|
||||
/// https://arxiv.org/pdf/1805.10941.pdf
|
||||
uint32_t bounded(uint32_t s) {
|
||||
assert(s != 0);
|
||||
uint32_t x = next();
|
||||
auto m = uint64_t(x) * uint64_t(s);
|
||||
auto l = uint32_t(m);
|
||||
if (l < s) {
|
||||
uint32_t t = -s % s;
|
||||
while (l < t) {
|
||||
x = next();
|
||||
m = uint64_t(x) * uint64_t(s);
|
||||
l = uint32_t(m);
|
||||
}
|
||||
}
|
||||
uint32_t result = m >> 32;
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Fill `bytes` with `size` random bytes
|
||||
void randomBytes(uint8_t *bytes, int size);
|
||||
|
||||
/// Fill `bytes` with `size` random hex bytes
|
||||
void randomHex(uint8_t *bytes, int size);
|
||||
|
||||
template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
|
||||
T randT() {
|
||||
T t;
|
||||
randomBytes((uint8_t *)&t, sizeof(T));
|
||||
return t;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t pcg32_random_r() {
|
||||
uint64_t oldState = state;
|
||||
// Advance internal state
|
||||
state = oldState * 6364136223846793005ULL + inc;
|
||||
// Calculate output function (XSH RR), uses old state for max ILP
|
||||
uint32_t xorShifted = ((oldState >> 18u) ^ oldState) >> 27u;
|
||||
uint32_t rot = oldState >> 59u;
|
||||
return (xorShifted >> rot) | (xorShifted << ((-rot) & 31));
|
||||
}
|
||||
|
||||
// Seed the rng. Specified in two parts, state initializer and a
|
||||
// sequence selection constant (a.k.a. stream id)
|
||||
void pcg32_srandom_r(uint64_t initstate, uint64_t initSeq) {
|
||||
state = 0U;
|
||||
inc = (initSeq << 1u) | 1u;
|
||||
pcg32_random_r();
|
||||
state += initstate;
|
||||
pcg32_random_r();
|
||||
}
|
||||
uint32_t next_{};
|
||||
// RNG state. All values are possible.
|
||||
uint64_t state{};
|
||||
// Controls which RNG sequence (stream) is selected. Must *always* be odd.
|
||||
uint64_t inc{};
|
||||
};
|
||||
|
||||
// TODO provide a way to seed this
|
||||
thread_local inline Random gRandom{0, 0};
|
||||
|
||||
template <class Container> void shuffle(Container &x) {
|
||||
using std::swap;
|
||||
for (int i = x.size() - 1; i > 0; --i) {
|
||||
int j = gRandom.bounded(i + 1);
|
||||
if (i != j) {
|
||||
swap(x[i], x[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Random::randomBytes(uint8_t *bytes, int size) {
|
||||
int i = 0;
|
||||
for (; i + 4 < size; i += 4) {
|
||||
uint32_t random = next();
|
||||
memcpy(bytes + i, &random, 4);
|
||||
}
|
||||
if (i < size) {
|
||||
uint32_t random = next();
|
||||
memcpy(bytes + i, &random, size - i);
|
||||
}
|
||||
}
|
||||
|
||||
void Random::randomHex(uint8_t *bytes, int size) {
|
||||
int i = 0;
|
||||
while (i + 8 < size) {
|
||||
uint32_t r = next();
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
}
|
||||
uint32_t r = next();
|
||||
while (i < size) {
|
||||
bytes[i++] = "0123456789abcdef"[r & 0b1111];
|
||||
r >>= 4;
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== END RANDOM IMPL ====================
|
||||
|
||||
// ==================== BEGIN ARBITRARY IMPL ====================
|
||||
|
||||
/// Think of `Arbitrary` as an attacker-controlled random number generator.
|
||||
/// Usually you want your random number generator to be fair, so that you can
|
||||
/// sensibly analyze probabilities. E.g. The analysis that shows that quicksort
|
||||
/// is expected O(n log n) with a random pivot relies on the random pivot being
|
||||
/// selected uniformly from a fair distribution.
|
||||
///
|
||||
/// Other times you want your randomness to be diabolically unfair, like when
|
||||
/// looking for bugs and fuzzing. The random-number-like interface is still
|
||||
/// convenient here, but you can potentially get much better coverage by
|
||||
/// allowing the possibility of e.g. flipping heads 100 times in a row.
|
||||
///
|
||||
/// When it runs out of entropy, it always returns 0.
|
||||
struct Arbitrary {
|
||||
Arbitrary() = default;
|
||||
|
||||
explicit Arbitrary(std::span<const uint8_t> bytecode) : bytecode(bytecode) {}
|
||||
|
||||
/// Draws an arbitrary uint32_t
|
||||
uint32_t next() { return consume<4>(); }
|
||||
|
||||
/// Draws an arbitrary element from [0, s)
|
||||
uint32_t bounded(uint32_t s);
|
||||
|
||||
/// Fill `bytes` with `size` arbitrary bytes
|
||||
void randomBytes(uint8_t *bytes, int size) {
|
||||
int toFill = std::min<int>(size, bytecode.size());
|
||||
if (toFill > 0) {
|
||||
memcpy(bytes, bytecode.data(), toFill);
|
||||
}
|
||||
bytecode = bytecode.subspan(toFill, bytecode.size() - toFill);
|
||||
memset(bytes + toFill, 0, size - toFill);
|
||||
}
|
||||
|
||||
/// Fill `bytes` with `size` random hex bytes
|
||||
void randomHex(uint8_t *bytes, int size) {
|
||||
for (int i = 0; i < size;) {
|
||||
uint8_t arbitrary = consume<1>();
|
||||
bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
|
||||
arbitrary >>= 4;
|
||||
if (i < size) {
|
||||
bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
|
||||
T randT() {
|
||||
T t;
|
||||
randomBytes((uint8_t *)&t, sizeof(T));
|
||||
return t;
|
||||
}
|
||||
|
||||
bool hasEntropy() const { return bytecode.size() != 0; }
|
||||
|
||||
private:
|
||||
uint8_t consumeByte() {
|
||||
if (bytecode.size() == 0) {
|
||||
return 0;
|
||||
}
|
||||
auto result = bytecode[0];
|
||||
bytecode = bytecode.subspan(1, bytecode.size() - 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <int kBytes> uint32_t consume() {
|
||||
uint32_t result = 0;
|
||||
static_assert(kBytes <= 4);
|
||||
for (int i = 0; i < kBytes; ++i) {
|
||||
result <<= 8;
|
||||
result |= consumeByte();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::span<const uint8_t> bytecode;
|
||||
};
|
||||
|
||||
inline Arbitrary gArbitrary;
|
||||
|
||||
void initFuzz(const uint8_t *data, size_t size);
|
||||
|
||||
uint32_t Arbitrary::bounded(uint32_t s) {
|
||||
if (s == 1) {
|
||||
return 0;
|
||||
}
|
||||
switch (32 - __builtin_clz(s - 1)) {
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
return consume<1>() % s;
|
||||
case 9:
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
return consume<2>() % s;
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
case 20:
|
||||
case 21:
|
||||
case 22:
|
||||
case 23:
|
||||
case 24:
|
||||
return consume<3>() % s;
|
||||
default:
|
||||
return consume<4>() % s;
|
||||
}
|
||||
}
|
||||
|
||||
void initFuzz(const uint8_t *data, size_t size) {
|
||||
gArbitrary = Arbitrary{{data, size}};
|
||||
uint64_t state = gArbitrary.next();
|
||||
uint64_t seq = gArbitrary.next();
|
||||
gRandom = Random{state, seq};
|
||||
}
|
||||
|
||||
// ==================== END ARBITRARY IMPL ====================
|
||||
|
||||
#define SHOW_PRIORITY 0
|
||||
#define DEBUG 0
|
||||
|
||||
using Key = ConflictSet::Key;
|
||||
|
||||
static auto operator<=>(const Key &lhs, const Key &rhs) {
|
||||
const int minLen = std::min(lhs.len, rhs.len);
|
||||
const int c = memcmp(lhs.p, rhs.p, minLen);
|
||||
return c != 0 ? c <=> 0 : lhs.len <=> rhs.len;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// A node in the tree representing write conflict history. This tree maintains
|
||||
// several invariants:
|
||||
@@ -236,16 +499,6 @@ struct Node {
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: use a better prng. This is technically vulnerable to a
|
||||
// denial-of-service attack that can make conflict-checking linear in the
|
||||
// number of nodes in the tree.
|
||||
thread_local uint32_t gSeed = 1013904223L;
|
||||
uint32_t fastRand() {
|
||||
auto result = gSeed;
|
||||
gSeed = gSeed * 1664525L + 1013904223L;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Note: `rangeVersion` is left uninitialized.
|
||||
Node *createNode(const Key &key, Node *parent, int64_t pointVersion) {
|
||||
assert(key.len <= std::numeric_limits<int>::max());
|
||||
@@ -255,7 +508,7 @@ Node *createNode(const Key &key, Node *parent, int64_t pointVersion) {
|
||||
result->child[0] = nullptr;
|
||||
result->child[1] = nullptr;
|
||||
result->parent = parent;
|
||||
result->priority = fastRand();
|
||||
result->priority = gRandom.next();
|
||||
#if SHOW_PRIORITY
|
||||
result->priority &= 0xff;
|
||||
#endif
|
||||
@@ -601,7 +854,8 @@ int64_t checkMaxVersion(Node *node, bool &success) {
|
||||
return expected;
|
||||
}
|
||||
|
||||
bool checkInvariants(Node *node) {
|
||||
template <class ReferenceImpl>
|
||||
bool checkCorrectness(Node *node, ReferenceImpl &refImpl) {
|
||||
bool success = true;
|
||||
// Check bst invariant
|
||||
Arena arena;
|
||||
@@ -626,6 +880,18 @@ bool checkInvariants(Node *node) {
|
||||
checkMaxVersion(node, success);
|
||||
checkParentPointers(node, success);
|
||||
|
||||
std::string logicalMap;
|
||||
std::string referenceLogicalMap;
|
||||
printLogical(logicalMap, node);
|
||||
refImpl.printLogical(referenceLogicalMap);
|
||||
if (logicalMap != referenceLogicalMap) {
|
||||
fprintf(stderr,
|
||||
"Logical map not equal to reference logical map.\n\nActual:\n"
|
||||
"%s\nExpected:\n%s\n",
|
||||
logicalMap.c_str(), referenceLogicalMap.c_str());
|
||||
success = false;
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
@@ -706,7 +972,8 @@ struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl {
|
||||
|
||||
void addWrites(const WriteRange *writes, int count) {
|
||||
Arena arena;
|
||||
auto *stepwiseInserts = new (arena) StepwiseInsert[count];
|
||||
auto stepwiseInserts =
|
||||
std::span<StepwiseInsert>(new (arena) StepwiseInsert[count], count);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
// TODO handle non-singleton writes lol
|
||||
assert(writes[i].end.len == 0);
|
||||
@@ -720,11 +987,9 @@ struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl {
|
||||
// Mitigate potential n^2 behavior of insertion by shuffling the insertion
|
||||
// order. Not sure how this interacts with interleaved insertion but it's
|
||||
// probably fine.
|
||||
// TODO better/faster RNG?
|
||||
std::mt19937 g(fastRand());
|
||||
std::shuffle(stepwiseInserts, stepwiseInserts + count, g);
|
||||
shuffle(stepwiseInserts);
|
||||
|
||||
runInterleaved(std::span<StepwiseInsert>(stepwiseInserts, count));
|
||||
runInterleaved(stepwiseInserts);
|
||||
|
||||
std::vector<Node *, ArenaAlloc<Node *>> workList{
|
||||
ArenaAlloc<Node *>(&arena)};
|
||||
@@ -929,7 +1194,6 @@ struct ReferenceImpl {
|
||||
} // namespace
|
||||
|
||||
#ifdef ENABLE_TESTS
|
||||
|
||||
int main(void) {
|
||||
int64_t writeVersion = 0;
|
||||
ConflictSet::Impl cs{writeVersion};
|
||||
@@ -946,18 +1210,7 @@ int main(void) {
|
||||
cs.addWrites(write, kNumKeys);
|
||||
refImpl.addWrites(write, kNumKeys);
|
||||
debugPrintDot(stdout, cs.root);
|
||||
bool success = checkInvariants(cs.root);
|
||||
std::string logicalMap;
|
||||
std::string referenceLogicalMap;
|
||||
printLogical(logicalMap, cs.root);
|
||||
refImpl.printLogical(referenceLogicalMap);
|
||||
if (logicalMap != referenceLogicalMap) {
|
||||
fprintf(stderr,
|
||||
"Logical map not equal to reference logical map.\n\nActual:\n"
|
||||
"%s\nExpected:\n%s\n",
|
||||
logicalMap.c_str(), referenceLogicalMap.c_str());
|
||||
success = false;
|
||||
}
|
||||
bool success = checkCorrectness(cs.root, refImpl);
|
||||
return success ? 0 : 1;
|
||||
}
|
||||
#endif
|
Reference in New Issue
Block a user