From a2a55c9717df1a7ff5d419f47c6ec003706432bb Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 19 Jan 2024 14:50:36 -0800 Subject: [PATCH] Prepare for fuzzing --- CMakeLists.txt | 2 +- ConflictSet.cpp | 335 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 295 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bc5dee7..12e6cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ target_compile_definitions(conflict_set_test PRIVATE ENABLE_TESTS) # keep asserts for test target_compile_options(conflict_set_test PRIVATE -UNDEBUG) # Only emit compile warnings for test -target_compile_options(conflict_set_test PRIVATE -Wall -Wextra -Wpedantic -Wunreachable-code -Werror) +target_compile_options(conflict_set_test PRIVATE -Wall -Wextra -Wpedantic -Wunreachable-code) include(CTest) add_test(NAME conflict_set_test COMMAND conflict_set_test) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 728a69f..f902695 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -12,17 +12,6 @@ #include #include -#define SHOW_PRIORITY 0 -#define DEBUG 0 - -using Key = ConflictSet::Key; - -static auto operator<=>(const Key &lhs, const Key &rhs) { - const int minLen = std::min(lhs.len, rhs.len); - const int c = memcmp(lhs.p, rhs.p, minLen); - return c != 0 ? c <=> 0 : lhs.len <=> rhs.len; -} - // ==================== BEGIN ARENA IMPL ==================== /// Group allocations with similar lifetimes to amortize the cost of malloc/free @@ -189,6 +178,280 @@ bool operator!=(const ArenaAlloc &lhs, const ArenaAlloc &rhs) { // ==================== END ARENA IMPL ==================== +// ==================== BEGIN RANDOM IMPL ==================== + +struct Random { + // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org + // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website) + // + // Modified - mostly c -> c++ + Random() = default; + + Random(uint64_t initState, uint64_t initSeq) { + pcg32_srandom_r(initState, initSeq); + next(); + } + + /// Draws from a uniform distribution of uint32_t's + uint32_t next() { + auto result = next_; + next_ = pcg32_random_r(); + return result; + } + + /// Draws from a uniform distribution of [0, s). From + /// https://arxiv.org/pdf/1805.10941.pdf + uint32_t bounded(uint32_t s) { + assert(s != 0); + uint32_t x = next(); + auto m = uint64_t(x) * uint64_t(s); + auto l = uint32_t(m); + if (l < s) { + uint32_t t = -s % s; + while (l < t) { + x = next(); + m = uint64_t(x) * uint64_t(s); + l = uint32_t(m); + } + } + uint32_t result = m >> 32; + return result; + } + + /// Fill `bytes` with `size` random bytes + void randomBytes(uint8_t *bytes, int size); + + /// Fill `bytes` with `size` random hex bytes + void randomHex(uint8_t *bytes, int size); + + template >> + T randT() { + T t; + randomBytes((uint8_t *)&t, sizeof(T)); + return t; + } + +private: + uint32_t pcg32_random_r() { + uint64_t oldState = state; + // Advance internal state + state = oldState * 6364136223846793005ULL + inc; + // Calculate output function (XSH RR), uses old state for max ILP + uint32_t xorShifted = ((oldState >> 18u) ^ oldState) >> 27u; + uint32_t rot = oldState >> 59u; + return (xorShifted >> rot) | (xorShifted << ((-rot) & 31)); + } + + // Seed the rng. Specified in two parts, state initializer and a + // sequence selection constant (a.k.a. stream id) + void pcg32_srandom_r(uint64_t initstate, uint64_t initSeq) { + state = 0U; + inc = (initSeq << 1u) | 1u; + pcg32_random_r(); + state += initstate; + pcg32_random_r(); + } + uint32_t next_{}; + // RNG state. All values are possible. + uint64_t state{}; + // Controls which RNG sequence (stream) is selected. Must *always* be odd. + uint64_t inc{}; +}; + +// TODO provide a way to seed this +thread_local inline Random gRandom{0, 0}; + +template void shuffle(Container &x) { + using std::swap; + for (int i = x.size() - 1; i > 0; --i) { + int j = gRandom.bounded(i + 1); + if (i != j) { + swap(x[i], x[j]); + } + } +} + +void Random::randomBytes(uint8_t *bytes, int size) { + int i = 0; + for (; i + 4 < size; i += 4) { + uint32_t random = next(); + memcpy(bytes + i, &random, 4); + } + if (i < size) { + uint32_t random = next(); + memcpy(bytes + i, &random, size - i); + } +} + +void Random::randomHex(uint8_t *bytes, int size) { + int i = 0; + while (i + 8 < size) { + uint32_t r = next(); + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + } + uint32_t r = next(); + while (i < size) { + bytes[i++] = "0123456789abcdef"[r & 0b1111]; + r >>= 4; + } +} + +// ==================== END RANDOM IMPL ==================== + +// ==================== BEGIN ARBITRARY IMPL ==================== + +/// Think of `Arbitrary` as an attacker-controlled random number generator. +/// Usually you want your random number generator to be fair, so that you can +/// sensibly analyze probabilities. E.g. The analysis that shows that quicksort +/// is expected O(n log n) with a random pivot relies on the random pivot being +/// selected uniformly from a fair distribution. +/// +/// Other times you want your randomness to be diabolically unfair, like when +/// looking for bugs and fuzzing. The random-number-like interface is still +/// convenient here, but you can potentially get much better coverage by +/// allowing the possibility of e.g. flipping heads 100 times in a row. +/// +/// When it runs out of entropy, it always returns 0. +struct Arbitrary { + Arbitrary() = default; + + explicit Arbitrary(std::span bytecode) : bytecode(bytecode) {} + + /// Draws an arbitrary uint32_t + uint32_t next() { return consume<4>(); } + + /// Draws an arbitrary element from [0, s) + uint32_t bounded(uint32_t s); + + /// Fill `bytes` with `size` arbitrary bytes + void randomBytes(uint8_t *bytes, int size) { + int toFill = std::min(size, bytecode.size()); + if (toFill > 0) { + memcpy(bytes, bytecode.data(), toFill); + } + bytecode = bytecode.subspan(toFill, bytecode.size() - toFill); + memset(bytes + toFill, 0, size - toFill); + } + + /// Fill `bytes` with `size` random hex bytes + void randomHex(uint8_t *bytes, int size) { + for (int i = 0; i < size;) { + uint8_t arbitrary = consume<1>(); + bytes[i++] = "0123456789abcdef"[arbitrary & 0xf]; + arbitrary >>= 4; + if (i < size) { + bytes[i++] = "0123456789abcdef"[arbitrary & 0xf]; + } + } + } + + template >> + T randT() { + T t; + randomBytes((uint8_t *)&t, sizeof(T)); + return t; + } + + bool hasEntropy() const { return bytecode.size() != 0; } + +private: + uint8_t consumeByte() { + if (bytecode.size() == 0) { + return 0; + } + auto result = bytecode[0]; + bytecode = bytecode.subspan(1, bytecode.size() - 1); + return result; + } + + template uint32_t consume() { + uint32_t result = 0; + static_assert(kBytes <= 4); + for (int i = 0; i < kBytes; ++i) { + result <<= 8; + result |= consumeByte(); + } + return result; + } + + std::span bytecode; +}; + +inline Arbitrary gArbitrary; + +void initFuzz(const uint8_t *data, size_t size); + +uint32_t Arbitrary::bounded(uint32_t s) { + if (s == 1) { + return 0; + } + switch (32 - __builtin_clz(s - 1)) { + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + return consume<1>() % s; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + return consume<2>() % s; + case 17: + case 18: + case 19: + case 20: + case 21: + case 22: + case 23: + case 24: + return consume<3>() % s; + default: + return consume<4>() % s; + } +} + +void initFuzz(const uint8_t *data, size_t size) { + gArbitrary = Arbitrary{{data, size}}; + uint64_t state = gArbitrary.next(); + uint64_t seq = gArbitrary.next(); + gRandom = Random{state, seq}; +} + +// ==================== END ARBITRARY IMPL ==================== + +#define SHOW_PRIORITY 0 +#define DEBUG 0 + +using Key = ConflictSet::Key; + +static auto operator<=>(const Key &lhs, const Key &rhs) { + const int minLen = std::min(lhs.len, rhs.len); + const int c = memcmp(lhs.p, rhs.p, minLen); + return c != 0 ? c <=> 0 : lhs.len <=> rhs.len; +} + namespace { // A node in the tree representing write conflict history. This tree maintains // several invariants: @@ -236,16 +499,6 @@ struct Node { } }; -// TODO: use a better prng. This is technically vulnerable to a -// denial-of-service attack that can make conflict-checking linear in the -// number of nodes in the tree. -thread_local uint32_t gSeed = 1013904223L; -uint32_t fastRand() { - auto result = gSeed; - gSeed = gSeed * 1664525L + 1013904223L; - return result; -} - // Note: `rangeVersion` is left uninitialized. Node *createNode(const Key &key, Node *parent, int64_t pointVersion) { assert(key.len <= std::numeric_limits::max()); @@ -255,7 +508,7 @@ Node *createNode(const Key &key, Node *parent, int64_t pointVersion) { result->child[0] = nullptr; result->child[1] = nullptr; result->parent = parent; - result->priority = fastRand(); + result->priority = gRandom.next(); #if SHOW_PRIORITY result->priority &= 0xff; #endif @@ -601,7 +854,8 @@ int64_t checkMaxVersion(Node *node, bool &success) { return expected; } -bool checkInvariants(Node *node) { +template +bool checkCorrectness(Node *node, ReferenceImpl &refImpl) { bool success = true; // Check bst invariant Arena arena; @@ -626,6 +880,18 @@ bool checkInvariants(Node *node) { checkMaxVersion(node, success); checkParentPointers(node, success); + std::string logicalMap; + std::string referenceLogicalMap; + printLogical(logicalMap, node); + refImpl.printLogical(referenceLogicalMap); + if (logicalMap != referenceLogicalMap) { + fprintf(stderr, + "Logical map not equal to reference logical map.\n\nActual:\n" + "%s\nExpected:\n%s\n", + logicalMap.c_str(), referenceLogicalMap.c_str()); + success = false; + } + return success; } @@ -706,7 +972,8 @@ struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl { void addWrites(const WriteRange *writes, int count) { Arena arena; - auto *stepwiseInserts = new (arena) StepwiseInsert[count]; + auto stepwiseInserts = + std::span(new (arena) StepwiseInsert[count], count); for (int i = 0; i < count; ++i) { // TODO handle non-singleton writes lol assert(writes[i].end.len == 0); @@ -720,11 +987,9 @@ struct __attribute__((__visibility__("hidden"))) ConflictSet::Impl { // Mitigate potential n^2 behavior of insertion by shuffling the insertion // order. Not sure how this interacts with interleaved insertion but it's // probably fine. - // TODO better/faster RNG? - std::mt19937 g(fastRand()); - std::shuffle(stepwiseInserts, stepwiseInserts + count, g); + shuffle(stepwiseInserts); - runInterleaved(std::span(stepwiseInserts, count)); + runInterleaved(stepwiseInserts); std::vector> workList{ ArenaAlloc(&arena)}; @@ -929,7 +1194,6 @@ struct ReferenceImpl { } // namespace #ifdef ENABLE_TESTS - int main(void) { int64_t writeVersion = 0; ConflictSet::Impl cs{writeVersion}; @@ -946,18 +1210,7 @@ int main(void) { cs.addWrites(write, kNumKeys); refImpl.addWrites(write, kNumKeys); debugPrintDot(stdout, cs.root); - bool success = checkInvariants(cs.root); - std::string logicalMap; - std::string referenceLogicalMap; - printLogical(logicalMap, cs.root); - refImpl.printLogical(referenceLogicalMap); - if (logicalMap != referenceLogicalMap) { - fprintf(stderr, - "Logical map not equal to reference logical map.\n\nActual:\n" - "%s\nExpected:\n%s\n", - logicalMap.c_str(), referenceLogicalMap.c_str()); - success = false; - } + bool success = checkCorrectness(cs.root, refImpl); return success ? 0 : 1; } #endif \ No newline at end of file