diff --git a/Bench.cpp b/Bench.cpp index d0afa78..80a38f4 100644 --- a/Bench.cpp +++ b/Bench.cpp @@ -63,6 +63,7 @@ void bulkFirstGeq() { constexpr int kNumQueries = 100; ankerl::nanobench::Bench bench; bench.batch(kNumQueries); + bench.minEpochIterations(kNumQueries * 5); // Initialize `versionedMap` weaselab::VersionedMap versionedMap{0}; diff --git a/README.md b/README.md index 085d10e..f11c14d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ feature | weaselab | fdb | |-|-|-| |based on|node-copying treap|node-copying treap| +|branch-free `child` function|✅|❌| |bytes per node (worst-case)|64 + kv len + 1\*|96 + kv len| |cheaper iteration*|❌|✅| |efficient bulk queries|✅|❌| @@ -21,27 +22,28 @@ feature | weaselab | fdb | # benchmarks ## weaselab - | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- -| 1,537.31 | 650,485.22 | 2.0% | 0.21 | `monotonically increasing` -| 1.25 | 798,963,102.97 | 0.1% | 0.01 | `*iter` -| 38.71 | 25,831,420.46 | 3.2% | 0.01 | `++iter` -| 37.31 | 26,801,664.98 | 0.3% | 0.01 | `--iter` -| 133.17 | 7,508,978.37 | 2.8% | 0.02 | `begin` -| 129.51 | 7,721,547.35 | 0.4% | 0.02 | `begin (firstGeq)` -| 54.40 | 18,381,173.00 | 1.2% | 0.01 | `end` -| 245.34 | 4,075,961.09 | 1.1% | 0.01 | `bulkFirstGeq` +| 1,538.88 | 649,824.01 | 2.7% | 0.22 | `monotonically increasing` +| 1.25 | 798,888,023.37 | 0.0% | 0.01 | `*iter` +| 32.10 | 31,153,434.23 | 0.7% | 0.01 | `++iter` +| 31.08 | 32,178,831.30 | 0.3% | 0.01 | `--iter` +| 139.19 | 7,184,433.93 | 2.2% | 0.02 | `begin` +| 176.43 | 5,667,847.59 | 0.3% | 0.02 | `begin (firstGeq)` +| 54.56 | 18,329,336.92 | 0.4% | 0.01 | `end` +| 159.37 | 6,274,892.80 | 0.1% | 0.10 | `bulkFirstGeq` +| 153.55 | 6,512,408.74 | 0.1% | 0.09 | `bulkFirstGeq (latest version)` ## fdb | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- -| 2,244.65 | 445,503.99 | 1.3% | 0.29 | `monotonically increasing` -| 2.06 | 486,311,738.92 | 0.3% | 0.01 | `*iter` -| 12.53 | 79,826,977.13 | 0.4% | 0.01 | `++iter` -| 8.86 | 112,927,293.80 | 1.3% | 0.01 | `--iter` -| 89.27 | 11,201,505.02 | 0.7% | 0.01 | `begin` -| 144.38 | 6,926,060.07 | 2.9% | 0.02 | `begin (firstGeq)` -| 71.84 | 13,920,283.88 | 0.4% | 0.01 | `end` -| 412.10 | 2,426,597.75 | 0.2% | 0.01 | `bulkFirstGeq` +| 2,236.83 | 447,061.26 | 1.0% | 0.29 | `monotonically increasing` +| 2.11 | 473,370,596.42 | 0.9% | 0.01 | `*iter` +| 12.07 | 82,858,977.78 | 1.2% | 0.01 | `++iter` +| 8.54 | 117,158,071.80 | 0.9% | 0.01 | `--iter` +| 94.26 | 10,608,588.66 | 1.1% | 0.01 | `begin` +| 159.01 | 6,288,940.61 | 0.4% | 0.02 | `begin (firstGeq)` +| 73.47 | 13,610,276.41 | 0.9% | 0.01 | `end` +| 422.53 | 2,366,723.10 | 0.1% | 0.25 | `bulkFirstGeq` +| 365.13 | 2,738,758.86 | 0.1% | 0.22 | `bulkFirstGeq (latest version)` diff --git a/VersionedMap.cpp b/VersionedMap.cpp index 0559fd3..6ccb4f8 100644 --- a/VersionedMap.cpp +++ b/VersionedMap.cpp @@ -150,15 +150,27 @@ struct Entry { } }; +struct UpdateInfo { + UpdateInfo() : version(kVersionIfNotUpdated) {} + int64_t version; + constexpr static int64_t kVersionIfNotUpdated = 0x7fffffffffffffff; + bool updated() const { return version != UpdateInfo::kVersionIfNotUpdated; } + bool updated(int64_t at) const { return version <= at; } +}; + +static_assert(std::atomic::is_always_lock_free); + struct Node { union { - int64_t updateVersion; + std::atomic updateInfo; uint32_t nextFree; }; Entry *entry; - uint32_t pointer[3]; - bool replacedPointer; - std::atomic updated; + // [left/right, older/newer]. Logically this is only 1 aux pointer since we + // only store one updateInfo, but this encoding let's us write a branch-free + // `child` function, which really helps with the effective ILP of the bulk + // firstGeq function. + uint32_t pointer[2][2]; }; // Limit mmap to 32 GiB so valgrind doesn't complain. @@ -294,24 +306,21 @@ struct MemManager { while (stackIndex > 0) { uint32_t p = stack[--stackIndex]; auto &node = base[p]; - if (node.updated.load(std::memory_order_relaxed)) { - if (node.pointer[!node.replacedPointer] != 0) { - tryPush(p, node.pointer[!node.replacedPointer]); + auto updateInfo = node.updateInfo.load(std::memory_order_relaxed); + if (updateInfo.updated()) { + if (node.pointer[0][1] != 0) { + tryPush(p, node.pointer[0][1]); } - if (oldestVersion < node.updateVersion) { - if (node.pointer[node.replacedPointer] != 0) { - tryPush(p, node.pointer[node.replacedPointer]); - } + if (node.pointer[1][1] != 0) { + tryPush(p, node.pointer[1][1]); } - if (node.pointer[2] != 0) { - tryPush(p, node.pointer[2]); + } + if (!updateInfo.updated(oldestVersion)) { + if (node.pointer[0][0] != 0) { + tryPush(p, node.pointer[0][0]); } - } else { - if (node.pointer[0] != 0) { - tryPush(p, node.pointer[0]); - } - if (node.pointer[1] != 0) { - tryPush(p, node.pointer[1]); + if (node.pointer[1][0] != 0) { + tryPush(p, node.pointer[1][0]); } } } @@ -498,12 +507,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl { kOrder == std::memory_order_relaxed); auto &n = mm.base[node]; uint32_t result; - if (n.updated.load(kOrder) && n.updateVersion <= at && - which == n.replacedPointer) { - result = n.pointer[2]; - } else { - result = n.pointer[which]; - } + assert(at < UpdateInfo::kVersionIfNotUpdated); + auto updateInfo = n.updateInfo.load(kOrder); + result = n.pointer[which][updateInfo.updated(at)]; assert(result == 0 || result >= kMinAddressable); #ifndef NDEBUG if (result != 0) { @@ -521,7 +527,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl { return node; } auto &n = mm.base[node]; - const bool updated = n.updated.load(std::memory_order_relaxed); + + auto updateInfo = n.updateInfo.load(std::memory_order_relaxed); + + const bool updated = updateInfo.updated(); auto doCopy = [&]() { uint32_t copy = mm.allocate(); @@ -532,30 +541,25 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl { #endif auto &c = mm.base[copy]; c.entry = n.entry->addref(); - c.pointer[which] = child; - c.pointer[!which] = + c.pointer[which][0] = child; + c.pointer[!which][0] = this->child(node, !which, latestVersion); - c.updated.store(false, std::memory_order_relaxed); - c.updateVersion = version; + c.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed); assert(copy == 0 || copy >= kMinAddressable); return copy; }; - if (n.updateVersion == version) { - // The reason these aren't data races is that concurrent readers are - // reading < `version` - if (updated && n.replacedPointer != which) { - auto result = doCopy(); - // We can't update n.replacedPointer without introducing a data race - // (unless we packed it into the atomic?) so we copy. pointer[2] becomes - // unreachable, but need to tell the garbage collector. - n.pointer[2] = 0; - return result; - } else if (updated) { - n.pointer[2] = child; - } else { - n.pointer[which] = child; - } + if (n.entry->pointVersion == version || n.entry->rangeVersion == version) { + // This node is not yet published to concurrent readers + n.pointer[which][0] = child; + assert(node == 0 || node >= kMinAddressable); + return node; + } + + if (updateInfo.version == version) { + // Not a data race since concurrent readers are reading at a version < + // `updateInfo.version` + n.pointer[which][1] = child; assert(node == 0 || node >= kMinAddressable); return node; } @@ -564,10 +568,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl { // We already used this node's in-place update return doCopy(); } else { - n.updateVersion = version; - n.pointer[2] = child; - n.replacedPointer = which; - n.updated.store(true, std::memory_order_release); // Must be last + n.pointer[which][1] = child; + n.pointer[!which][1] = n.pointer[!which][0]; + updateInfo.version = version; + n.updateInfo.store(updateInfo, std::memory_order_release); // Must be last assert(node == 0 || node >= kMinAddressable); return node; } @@ -660,10 +664,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl { inserted ? gRandom.next() : mm.base[finger.backNode()].entry->priority); if (!inserted) { auto &n = mm.base[node]; - n.pointer[0] = child(finger.backNode(), false, - latestVersion); - n.pointer[1] = child(finger.backNode(), true, - latestVersion); + n.pointer[0][0] = child(finger.backNode(), + false, latestVersion); + n.pointer[1][0] = child(finger.backNode(), + true, latestVersion); } finger.backNodeRef() = node; uint32_t oldSize = finger.searchPathSize(); @@ -775,10 +779,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl { uint32_t priority) { auto result = mm.allocate(); auto &node = mm.base[result]; - node.updateVersion = version; - node.pointer[0] = 0; - node.pointer[1] = 0; - node.updated.store(false, std::memory_order_relaxed); + node.pointer[0][0] = 0; + node.pointer[1][0] = 0; + node.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed); node.entry = Entry::make(version, rangeVersion, key, keyLen, val, valLen, priority); return result;