Branch-free child function
This commit is contained in:
@@ -63,6 +63,7 @@ void bulkFirstGeq() {
|
||||
constexpr int kNumQueries = 100;
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(kNumQueries);
|
||||
bench.minEpochIterations(kNumQueries * 5);
|
||||
|
||||
// Initialize `versionedMap`
|
||||
weaselab::VersionedMap versionedMap{0};
|
||||
|
36
README.md
36
README.md
@@ -1,6 +1,7 @@
|
||||
feature | weaselab | fdb |
|
||||
|-|-|-|
|
||||
|based on|node-copying treap|node-copying treap|
|
||||
|branch-free `child` function|✅|❌|
|
||||
|bytes per node (worst-case)|64 + kv len + 1\*|96 + kv len|
|
||||
|cheaper iteration*|❌|✅|
|
||||
|efficient bulk queries|✅|❌|
|
||||
@@ -21,27 +22,28 @@ feature | weaselab | fdb |
|
||||
# benchmarks
|
||||
|
||||
## weaselab
|
||||
|
||||
| ns/op | op/s | err% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||
| 1,537.31 | 650,485.22 | 2.0% | 0.21 | `monotonically increasing`
|
||||
| 1.25 | 798,963,102.97 | 0.1% | 0.01 | `*iter`
|
||||
| 38.71 | 25,831,420.46 | 3.2% | 0.01 | `++iter`
|
||||
| 37.31 | 26,801,664.98 | 0.3% | 0.01 | `--iter`
|
||||
| 133.17 | 7,508,978.37 | 2.8% | 0.02 | `begin`
|
||||
| 129.51 | 7,721,547.35 | 0.4% | 0.02 | `begin (firstGeq)`
|
||||
| 54.40 | 18,381,173.00 | 1.2% | 0.01 | `end`
|
||||
| 245.34 | 4,075,961.09 | 1.1% | 0.01 | `bulkFirstGeq`
|
||||
| 1,538.88 | 649,824.01 | 2.7% | 0.22 | `monotonically increasing`
|
||||
| 1.25 | 798,888,023.37 | 0.0% | 0.01 | `*iter`
|
||||
| 32.10 | 31,153,434.23 | 0.7% | 0.01 | `++iter`
|
||||
| 31.08 | 32,178,831.30 | 0.3% | 0.01 | `--iter`
|
||||
| 139.19 | 7,184,433.93 | 2.2% | 0.02 | `begin`
|
||||
| 176.43 | 5,667,847.59 | 0.3% | 0.02 | `begin (firstGeq)`
|
||||
| 54.56 | 18,329,336.92 | 0.4% | 0.01 | `end`
|
||||
| 159.37 | 6,274,892.80 | 0.1% | 0.10 | `bulkFirstGeq`
|
||||
| 153.55 | 6,512,408.74 | 0.1% | 0.09 | `bulkFirstGeq (latest version)`
|
||||
|
||||
## fdb
|
||||
|
||||
| ns/op | op/s | err% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||
| 2,244.65 | 445,503.99 | 1.3% | 0.29 | `monotonically increasing`
|
||||
| 2.06 | 486,311,738.92 | 0.3% | 0.01 | `*iter`
|
||||
| 12.53 | 79,826,977.13 | 0.4% | 0.01 | `++iter`
|
||||
| 8.86 | 112,927,293.80 | 1.3% | 0.01 | `--iter`
|
||||
| 89.27 | 11,201,505.02 | 0.7% | 0.01 | `begin`
|
||||
| 144.38 | 6,926,060.07 | 2.9% | 0.02 | `begin (firstGeq)`
|
||||
| 71.84 | 13,920,283.88 | 0.4% | 0.01 | `end`
|
||||
| 412.10 | 2,426,597.75 | 0.2% | 0.01 | `bulkFirstGeq`
|
||||
| 2,236.83 | 447,061.26 | 1.0% | 0.29 | `monotonically increasing`
|
||||
| 2.11 | 473,370,596.42 | 0.9% | 0.01 | `*iter`
|
||||
| 12.07 | 82,858,977.78 | 1.2% | 0.01 | `++iter`
|
||||
| 8.54 | 117,158,071.80 | 0.9% | 0.01 | `--iter`
|
||||
| 94.26 | 10,608,588.66 | 1.1% | 0.01 | `begin`
|
||||
| 159.01 | 6,288,940.61 | 0.4% | 0.02 | `begin (firstGeq)`
|
||||
| 73.47 | 13,610,276.41 | 0.9% | 0.01 | `end`
|
||||
| 422.53 | 2,366,723.10 | 0.1% | 0.25 | `bulkFirstGeq`
|
||||
| 365.13 | 2,738,758.86 | 0.1% | 0.22 | `bulkFirstGeq (latest version)`
|
||||
|
117
VersionedMap.cpp
117
VersionedMap.cpp
@@ -150,15 +150,27 @@ struct Entry {
|
||||
}
|
||||
};
|
||||
|
||||
struct UpdateInfo {
|
||||
UpdateInfo() : version(kVersionIfNotUpdated) {}
|
||||
int64_t version;
|
||||
constexpr static int64_t kVersionIfNotUpdated = 0x7fffffffffffffff;
|
||||
bool updated() const { return version != UpdateInfo::kVersionIfNotUpdated; }
|
||||
bool updated(int64_t at) const { return version <= at; }
|
||||
};
|
||||
|
||||
static_assert(std::atomic<UpdateInfo>::is_always_lock_free);
|
||||
|
||||
struct Node {
|
||||
union {
|
||||
int64_t updateVersion;
|
||||
std::atomic<UpdateInfo> updateInfo;
|
||||
uint32_t nextFree;
|
||||
};
|
||||
Entry *entry;
|
||||
uint32_t pointer[3];
|
||||
bool replacedPointer;
|
||||
std::atomic<bool> updated;
|
||||
// [left/right, older/newer]. Logically this is only 1 aux pointer since we
|
||||
// only store one updateInfo, but this encoding let's us write a branch-free
|
||||
// `child` function, which really helps with the effective ILP of the bulk
|
||||
// firstGeq function.
|
||||
uint32_t pointer[2][2];
|
||||
};
|
||||
|
||||
// Limit mmap to 32 GiB so valgrind doesn't complain.
|
||||
@@ -294,24 +306,21 @@ struct MemManager {
|
||||
while (stackIndex > 0) {
|
||||
uint32_t p = stack[--stackIndex];
|
||||
auto &node = base[p];
|
||||
if (node.updated.load(std::memory_order_relaxed)) {
|
||||
if (node.pointer[!node.replacedPointer] != 0) {
|
||||
tryPush(p, node.pointer[!node.replacedPointer]);
|
||||
auto updateInfo = node.updateInfo.load(std::memory_order_relaxed);
|
||||
if (updateInfo.updated()) {
|
||||
if (node.pointer[0][1] != 0) {
|
||||
tryPush(p, node.pointer[0][1]);
|
||||
}
|
||||
if (oldestVersion < node.updateVersion) {
|
||||
if (node.pointer[node.replacedPointer] != 0) {
|
||||
tryPush(p, node.pointer[node.replacedPointer]);
|
||||
}
|
||||
if (node.pointer[1][1] != 0) {
|
||||
tryPush(p, node.pointer[1][1]);
|
||||
}
|
||||
if (node.pointer[2] != 0) {
|
||||
tryPush(p, node.pointer[2]);
|
||||
}
|
||||
if (!updateInfo.updated(oldestVersion)) {
|
||||
if (node.pointer[0][0] != 0) {
|
||||
tryPush(p, node.pointer[0][0]);
|
||||
}
|
||||
} else {
|
||||
if (node.pointer[0] != 0) {
|
||||
tryPush(p, node.pointer[0]);
|
||||
}
|
||||
if (node.pointer[1] != 0) {
|
||||
tryPush(p, node.pointer[1]);
|
||||
if (node.pointer[1][0] != 0) {
|
||||
tryPush(p, node.pointer[1][0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -498,12 +507,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
||||
kOrder == std::memory_order_relaxed);
|
||||
auto &n = mm.base[node];
|
||||
uint32_t result;
|
||||
if (n.updated.load(kOrder) && n.updateVersion <= at &&
|
||||
which == n.replacedPointer) {
|
||||
result = n.pointer[2];
|
||||
} else {
|
||||
result = n.pointer[which];
|
||||
}
|
||||
assert(at < UpdateInfo::kVersionIfNotUpdated);
|
||||
auto updateInfo = n.updateInfo.load(kOrder);
|
||||
result = n.pointer[which][updateInfo.updated(at)];
|
||||
assert(result == 0 || result >= kMinAddressable);
|
||||
#ifndef NDEBUG
|
||||
if (result != 0) {
|
||||
@@ -521,7 +527,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
||||
return node;
|
||||
}
|
||||
auto &n = mm.base[node];
|
||||
const bool updated = n.updated.load(std::memory_order_relaxed);
|
||||
|
||||
auto updateInfo = n.updateInfo.load(std::memory_order_relaxed);
|
||||
|
||||
const bool updated = updateInfo.updated();
|
||||
|
||||
auto doCopy = [&]() {
|
||||
uint32_t copy = mm.allocate();
|
||||
@@ -532,30 +541,25 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
||||
#endif
|
||||
auto &c = mm.base[copy];
|
||||
c.entry = n.entry->addref();
|
||||
c.pointer[which] = child;
|
||||
c.pointer[!which] =
|
||||
c.pointer[which][0] = child;
|
||||
c.pointer[!which][0] =
|
||||
this->child<std::memory_order_relaxed>(node, !which, latestVersion);
|
||||
c.updated.store(false, std::memory_order_relaxed);
|
||||
c.updateVersion = version;
|
||||
c.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
|
||||
assert(copy == 0 || copy >= kMinAddressable);
|
||||
return copy;
|
||||
};
|
||||
|
||||
if (n.updateVersion == version) {
|
||||
// The reason these aren't data races is that concurrent readers are
|
||||
// reading < `version`
|
||||
if (updated && n.replacedPointer != which) {
|
||||
auto result = doCopy();
|
||||
// We can't update n.replacedPointer without introducing a data race
|
||||
// (unless we packed it into the atomic?) so we copy. pointer[2] becomes
|
||||
// unreachable, but need to tell the garbage collector.
|
||||
n.pointer[2] = 0;
|
||||
return result;
|
||||
} else if (updated) {
|
||||
n.pointer[2] = child;
|
||||
} else {
|
||||
n.pointer[which] = child;
|
||||
}
|
||||
if (n.entry->pointVersion == version || n.entry->rangeVersion == version) {
|
||||
// This node is not yet published to concurrent readers
|
||||
n.pointer[which][0] = child;
|
||||
assert(node == 0 || node >= kMinAddressable);
|
||||
return node;
|
||||
}
|
||||
|
||||
if (updateInfo.version == version) {
|
||||
// Not a data race since concurrent readers are reading at a version <
|
||||
// `updateInfo.version`
|
||||
n.pointer[which][1] = child;
|
||||
assert(node == 0 || node >= kMinAddressable);
|
||||
return node;
|
||||
}
|
||||
@@ -564,10 +568,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
||||
// We already used this node's in-place update
|
||||
return doCopy();
|
||||
} else {
|
||||
n.updateVersion = version;
|
||||
n.pointer[2] = child;
|
||||
n.replacedPointer = which;
|
||||
n.updated.store(true, std::memory_order_release); // Must be last
|
||||
n.pointer[which][1] = child;
|
||||
n.pointer[!which][1] = n.pointer[!which][0];
|
||||
updateInfo.version = version;
|
||||
n.updateInfo.store(updateInfo, std::memory_order_release); // Must be last
|
||||
assert(node == 0 || node >= kMinAddressable);
|
||||
return node;
|
||||
}
|
||||
@@ -660,10 +664,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
||||
inserted ? gRandom.next() : mm.base[finger.backNode()].entry->priority);
|
||||
if (!inserted) {
|
||||
auto &n = mm.base[node];
|
||||
n.pointer[0] = child<std::memory_order_relaxed>(finger.backNode(), false,
|
||||
latestVersion);
|
||||
n.pointer[1] = child<std::memory_order_relaxed>(finger.backNode(), true,
|
||||
latestVersion);
|
||||
n.pointer[0][0] = child<std::memory_order_relaxed>(finger.backNode(),
|
||||
false, latestVersion);
|
||||
n.pointer[1][0] = child<std::memory_order_relaxed>(finger.backNode(),
|
||||
true, latestVersion);
|
||||
}
|
||||
finger.backNodeRef() = node;
|
||||
uint32_t oldSize = finger.searchPathSize();
|
||||
@@ -775,10 +779,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
||||
uint32_t priority) {
|
||||
auto result = mm.allocate();
|
||||
auto &node = mm.base[result];
|
||||
node.updateVersion = version;
|
||||
node.pointer[0] = 0;
|
||||
node.pointer[1] = 0;
|
||||
node.updated.store(false, std::memory_order_relaxed);
|
||||
node.pointer[0][0] = 0;
|
||||
node.pointer[1][0] = 0;
|
||||
node.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
|
||||
node.entry =
|
||||
Entry::make(version, rangeVersion, key, keyLen, val, valLen, priority);
|
||||
return result;
|
||||
|
Reference in New Issue
Block a user