diff --git a/VersionedMap.cpp b/VersionedMap.cpp index 046d841..58b028f 100644 --- a/VersionedMap.cpp +++ b/VersionedMap.cpp @@ -8,8 +8,6 @@ #include #include -#include - void *mmapSafe(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { void *result = mmap(addr, len, prot, flags, fd, offset); @@ -111,53 +109,30 @@ constexpr uint32_t kUpsizeBytes = 1 << 20; constexpr uint32_t kUpsizeNodes = kUpsizeBytes / sizeof(Node); static_assert(kUpsizeNodes * sizeof(Node) == kUpsizeBytes); -struct BitSetUnorderedSet { - explicit BitSetUnorderedSet(uint32_t size) : s() {} +struct BitSet { + explicit BitSet(uint32_t size) : words((uint64_t *)malloc(size / 64 + 64)) {} - bool test(uint32_t i) const { return s.find(i) != s.end(); } + bool test(uint32_t i) const { + return words[i >> 6] & (uint64_t(1) << (i & 63)); + } // Returns former value bool set(uint32_t i) { - auto [it, inserted] = s.insert(i); + const auto prev = words[i >> 6]; + const auto mask = uint64_t(1) << (i & 63); + words[i >> 6] |= mask; max_ = std::max(i, max_); - return !inserted; + return prev & mask; } // Returns 0 if set is empty uint32_t max() const { return max_; } template - void iterateAbsentBackwards(F f, uint32_t begin, uint32_t end) const { - for (uint32_t i = end - 1; i >= begin; --i) { - if (!test(i)) { - f(i); - } - } - } - -private: - uint32_t max_ = 0; - std::unordered_set s; -}; - -struct BitSetR { - explicit BitSetR(uint32_t size) : s(bitset_create_with_capacity(size)) {} - - bool test(uint32_t i) const { return bitset_get(s, i); } - - // Returns former value - bool set(uint32_t i) { - max_ = std::max(i, max_); - auto result = test(i); - bitset_set(s, i); - return result; - } - - // Returns 0 if set is empty - uint32_t max() const { return max_; } - - template - void iterateAbsentBackwards(F f, uint32_t begin, uint32_t end) const { + void iterateAbsentApproxBackwards(F f, uint32_t begin, uint32_t end) const { + // TODO can this be improved? We can do something with a word at a time + // instead of a bit at a time. The first attempt at doing so benchmarked as + // slower. assert(begin != 0); for (uint32_t i = end - 1; i >= begin; --i) { if (!test(i)) { @@ -166,11 +141,11 @@ struct BitSetR { } } - ~BitSetR() { bitset_free(s); } + ~BitSet() { free(words); } private: uint32_t max_ = 0; - bitset_t *s; + uint64_t *const words; }; struct MemManager { @@ -212,10 +187,8 @@ struct MemManager { } void gc(const uint32_t *roots, int numRoots, int64_t oldestVersion) { - // TODO better bitset? - // Calculate reachable set - BitSetR reachable{next}; + BitSet reachable{next}; uint32_t stack[1000]; // Much more than bound imposed by max height of tree int stackIndex = 0; auto tryPush = [&](uint32_t p) { @@ -272,7 +245,7 @@ struct MemManager { // Rebuild free list and delref entries freeList = 0; - reachable.iterateAbsentBackwards( + reachable.iterateAbsentApproxBackwards( [&](uint32_t i) { if (base[i].entry != nullptr) { base[i].entry->delref(); @@ -298,6 +271,7 @@ struct VersionedMap::Impl {}; int main() { ankerl::nanobench::Bench bench; + bench.minEpochIterations(5000); weaselab::MemManager mm; bench.run("allocate", [&]() { auto x = mm.allocate();