Prepare for fuzzing

This commit is contained in:
2024-01-19 14:50:36 -08:00
parent ffa60c9b4f
commit a2a55c9717
2 changed files with 295 additions and 42 deletions

View File

@@ -12,17 +12,6 @@
#include <utility>
#include <vector>
#define SHOW_PRIORITY 0
#define DEBUG 0
using Key = ConflictSet::Key;
static auto operator<=>(const Key &lhs, const Key &rhs) {
const int minLen = std::min(lhs.len, rhs.len);
const int c = memcmp(lhs.p, rhs.p, minLen);
return c != 0 ? c <=> 0 : lhs.len <=> rhs.len;
}
// ==================== BEGIN ARENA IMPL ====================
/// Group allocations with similar lifetimes to amortize the cost of malloc/free
@@ -189,6 +178,280 @@ bool operator!=(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
// ==================== END ARENA IMPL ====================
// ==================== BEGIN RANDOM IMPL ====================
struct Random {
// *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org
// Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
//
// Modified - mostly c -> c++
Random() = default;
Random(uint64_t initState, uint64_t initSeq) {
pcg32_srandom_r(initState, initSeq);
next();
}
/// Draws from a uniform distribution of uint32_t's
uint32_t next() {
auto result = next_;
next_ = pcg32_random_r();
return result;
}
/// Draws from a uniform distribution of [0, s). From
/// https://arxiv.org/pdf/1805.10941.pdf
uint32_t bounded(uint32_t s) {
assert(s != 0);
uint32_t x = next();
auto m = uint64_t(x) * uint64_t(s);
auto l = uint32_t(m);
if (l < s) {
uint32_t t = -s % s;
while (l < t) {
x = next();
m = uint64_t(x) * uint64_t(s);
l = uint32_t(m);
}
}
uint32_t result = m >> 32;
return result;
}
/// Fill `bytes` with `size` random bytes
void randomBytes(uint8_t *bytes, int size);
/// Fill `bytes` with `size` random hex bytes
void randomHex(uint8_t *bytes, int size);
template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
T randT() {
T t;
randomBytes((uint8_t *)&t, sizeof(T));
return t;
}
private:
uint32_t pcg32_random_r() {
uint64_t oldState = state;
// Advance internal state
state = oldState * 6364136223846793005ULL + inc;
// Calculate output function (XSH RR), uses old state for max ILP
uint32_t xorShifted = ((oldState >> 18u) ^ oldState) >> 27u;
uint32_t rot = oldState >> 59u;
return (xorShifted >> rot) | (xorShifted << ((-rot) & 31));
}
// Seed the rng. Specified in two parts, state initializer and a
// sequence selection constant (a.k.a. stream id)
void pcg32_srandom_r(uint64_t initstate, uint64_t initSeq) {
state = 0U;
inc = (initSeq << 1u) | 1u;
pcg32_random_r();
state += initstate;
pcg32_random_r();
}
uint32_t next_{};
// RNG state. All values are possible.
uint64_t state{};
// Controls which RNG sequence (stream) is selected. Must *always* be odd.
uint64_t inc{};
};
// TODO provide a way to seed this
thread_local inline Random gRandom{0, 0};
template <class Container> void shuffle(Container &x) {
using std::swap;
for (int i = x.size() - 1; i > 0; --i) {
int j = gRandom.bounded(i + 1);
if (i != j) {
swap(x[i], x[j]);
}
}
}
void Random::randomBytes(uint8_t *bytes, int size) {
int i = 0;
for (; i + 4 < size; i += 4) {
uint32_t random = next();
memcpy(bytes + i, &random, 4);
}
if (i < size) {
uint32_t random = next();
memcpy(bytes + i, &random, size - i);
}
}
void Random::randomHex(uint8_t *bytes, int size) {
int i = 0;
while (i + 8 < size) {
uint32_t r = next();
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
bytes[i++] = "0123456789abcdef"[r & 0b1111];
}
uint32_t r = next();
while (i < size) {
bytes[i++] = "0123456789abcdef"[r & 0b1111];
r >>= 4;
}
}
// ==================== END RANDOM IMPL ====================
// ==================== BEGIN ARBITRARY IMPL ====================
/// Think of `Arbitrary` as an attacker-controlled random number generator.
/// Usually you want your random number generator to be fair, so that you can
/// sensibly analyze probabilities. E.g. The analysis that shows that quicksort
/// is expected O(n log n) with a random pivot relies on the random pivot being
/// selected uniformly from a fair distribution.
///
/// Other times you want your randomness to be diabolically unfair, like when
/// looking for bugs and fuzzing. The random-number-like interface is still
/// convenient here, but you can potentially get much better coverage by
/// allowing the possibility of e.g. flipping heads 100 times in a row.
///
/// When it runs out of entropy, it always returns 0.
struct Arbitrary {
Arbitrary() = default;
explicit Arbitrary(std::span<const uint8_t> bytecode) : bytecode(bytecode) {}
/// Draws an arbitrary uint32_t
uint32_t next() { return consume<4>(); }
/// Draws an arbitrary element from [0, s)
uint32_t bounded(uint32_t s);
/// Fill `bytes` with `size` arbitrary bytes
void randomBytes(uint8_t *bytes, int size) {
int toFill = std::min<int>(size, bytecode.size());
if (toFill > 0) {
memcpy(bytes, bytecode.data(), toFill);
}
bytecode = bytecode.subspan(toFill, bytecode.size() - toFill);
memset(bytes + toFill, 0, size - toFill);
}
/// Fill `bytes` with `size` random hex bytes
void randomHex(uint8_t *bytes, int size) {
for (int i = 0; i < size;) {
uint8_t arbitrary = consume<1>();
bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
arbitrary >>= 4;
if (i < size) {
bytes[i++] = "0123456789abcdef"[arbitrary & 0xf];
}
}
}
template <class T, class = std::enable_if_t<std::is_trivially_copyable_v<T>>>
T randT() {
T t;
randomBytes((uint8_t *)&t, sizeof(T));
return t;
}
bool hasEntropy() const { return bytecode.size() != 0; }
private:
uint8_t consumeByte() {
if (bytecode.size() == 0) {
return 0;
}
auto result = bytecode[0];
bytecode = bytecode.subspan(1, bytecode.size() - 1);
return result;
}
template <int kBytes> uint32_t consume() {
uint32_t result = 0;
static_assert(kBytes <= 4);
for (int i = 0; i < kBytes; ++i) {
result <<= 8;
result |= consumeByte();
}
return result;
}
std::span<const uint8_t> bytecode;
};
inline Arbitrary gArbitrary;
void initFuzz(const uint8_t *data, size_t size);
uint32_t Arbitrary::bounded(uint32_t s) {
if (s == 1) {
return 0;
}
switch (32 - __builtin_clz(s - 1)) {
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
return consume<1>() % s;
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
case 16:
return consume<2>() % s;
case 17:
case 18:
case 19:
case 20:
case 21:
case 22:
case 23:
case 24:
return consume<3>() % s;
default:
return consume<4>() % s;
}
}
void initFuzz(const uint8_t *data, size_t size) {
gArbitrary = Arbitrary{{data, size}};
uint64_t state = gArbitrary.next();
uint64_t seq = gArbitrary.next();
gRandom = Random{state, seq};
}
// ==================== END ARBITRARY IMPL ====================
#define SHOW_PRIORITY 0
#define DEBUG 0
using Key = ConflictSet::Key;
static auto operator<=>(const Key &lhs, const Key &rhs) {
const int minLen = std::min(lhs.len, rhs.len);
const int c = memcmp(lhs.p, rhs.p, minLen);
return c != 0 ? c <=> 0 : lhs.len <=> rhs.len;
}
namespace {
// A node in the tree representing write conflict history. This tree maintains
// several invariants:
@@ -236,16 +499,6 @@ struct Node {
}
};
// TODO: use a better prng. This is technically vulnerable to a
// denial-of-service attack that can make conflict-checking linear in the
// number of nodes in the tree.
thread_local uint32_t gSeed = 1013904223L;
uint32_t fastRand() {
auto result = gSeed;
gSeed = gSeed * 1664525L + 1013904223L;
return result;
}
// Note: `rangeVersion` is left uninitialized.
Node *createNode(const Key &key, Node *parent, int64_t pointVersion) {
assert(key.len <= std::numeric_limits<int>::max());
@@ -255,7 +508,7 @@ Node *createNode(const Key &key, Node *parent, int64_t pointVersion) {
result->child[0] = nullptr;
result->child[1] = nullptr;
result->parent = parent;
result->priority = fastRand();
result->priority = gRandom.next();
#if SHOW_PRIORITY
result->priority &= 0xff;
#endif
@@ -601,7 +854,8 @@ int64_t checkMaxVersion(Node *node, bool &success) {
return expected;
}
bool checkInvariants(Node *node) {
template <class ReferenceImpl>
bool checkCorrectness(Node *node, ReferenceImpl &refImpl) {
bool success = true;
// Check bst invariant
Arena arena;
@@ -626,6 +880,18 @@ bool checkInvariants(Node *node) {
checkMaxVersion(node, success);
checkParentPointers(node, success);
std::string logicalMap;
std::string referenceLogicalMap;
printLogical(logicalMap, node);
refImpl.printLogical(referenceLogicalMap);
if (logicalMap != referenceLogicalMap) {
fprintf(stderr,
"Logical map not equal to reference logical map.\n\nActual:\n"
"%s\nExpected:\n%s\n",
logicalMap.c_str(), referenceLogicalMap.c_str());
success = false;
}
return success;
}
@@ -706,7 +972,8 @@ struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl {
void addWrites(const WriteRange *writes, int count) {
Arena arena;
auto *stepwiseInserts = new (arena) StepwiseInsert[count];
auto stepwiseInserts =
std::span<StepwiseInsert>(new (arena) StepwiseInsert[count], count);
for (int i = 0; i < count; ++i) {
// TODO handle non-singleton writes lol
assert(writes[i].end.len == 0);
@@ -720,11 +987,9 @@ struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl {
// Mitigate potential n^2 behavior of insertion by shuffling the insertion
// order. Not sure how this interacts with interleaved insertion but it's
// probably fine.
// TODO better/faster RNG?
std::mt19937 g(fastRand());
std::shuffle(stepwiseInserts, stepwiseInserts + count, g);
shuffle(stepwiseInserts);
runInterleaved(std::span<StepwiseInsert>(stepwiseInserts, count));
runInterleaved(stepwiseInserts);
std::vector<Node *, ArenaAlloc<Node *>> workList{
ArenaAlloc<Node *>(&arena)};
@@ -929,7 +1194,6 @@ struct ReferenceImpl {
} // namespace
#ifdef ENABLE_TESTS
int main(void) {
int64_t writeVersion = 0;
ConflictSet::Impl cs{writeVersion};
@@ -946,18 +1210,7 @@ int main(void) {
cs.addWrites(write, kNumKeys);
refImpl.addWrites(write, kNumKeys);
debugPrintDot(stdout, cs.root);
bool success = checkInvariants(cs.root);
std::string logicalMap;
std::string referenceLogicalMap;
printLogical(logicalMap, cs.root);
refImpl.printLogical(referenceLogicalMap);
if (logicalMap != referenceLogicalMap) {
fprintf(stderr,
"Logical map not equal to reference logical map.\n\nActual:\n"
"%s\nExpected:\n%s\n",
logicalMap.c_str(), referenceLogicalMap.c_str());
success = false;
}
bool success = checkCorrectness(cs.root, refImpl);
return success ? 0 : 1;
}
#endif