Branch-free child function

This commit is contained in:
2024-06-04 13:59:26 -07:00
parent 118071e3e9
commit 13f6279970
3 changed files with 80 additions and 74 deletions

View File

@@ -63,6 +63,7 @@ void bulkFirstGeq() {
constexpr int kNumQueries = 100;
ankerl::nanobench::Bench bench;
bench.batch(kNumQueries);
bench.minEpochIterations(kNumQueries * 5);
// Initialize `versionedMap`
weaselab::VersionedMap versionedMap{0};

View File

@@ -1,6 +1,7 @@
feature | weaselab | fdb |
|-|-|-|
|based on|node-copying treap|node-copying treap|
|branch-free `child` function|✅|❌|
|bytes per node (worst-case)|64 + kv len + 1\*|96 + kv len|
|cheaper iteration*|❌|✅|
|efficient bulk queries|✅|❌|
@@ -21,27 +22,28 @@ feature | weaselab | fdb |
# benchmarks
## weaselab
| ns/op | op/s | err% | total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
| 1,537.31 | 650,485.22 | 2.0% | 0.21 | `monotonically increasing`
| 1.25 | 798,963,102.97 | 0.1% | 0.01 | `*iter`
| 38.71 | 25,831,420.46 | 3.2% | 0.01 | `++iter`
| 37.31 | 26,801,664.98 | 0.3% | 0.01 | `--iter`
| 133.17 | 7,508,978.37 | 2.8% | 0.02 | `begin`
| 129.51 | 7,721,547.35 | 0.4% | 0.02 | `begin (firstGeq)`
| 54.40 | 18,381,173.00 | 1.2% | 0.01 | `end`
| 245.34 | 4,075,961.09 | 1.1% | 0.01 | `bulkFirstGeq`
| 1,538.88 | 649,824.01 | 2.7% | 0.22 | `monotonically increasing`
| 1.25 | 798,888,023.37 | 0.0% | 0.01 | `*iter`
| 32.10 | 31,153,434.23 | 0.7% | 0.01 | `++iter`
| 31.08 | 32,178,831.30 | 0.3% | 0.01 | `--iter`
| 139.19 | 7,184,433.93 | 2.2% | 0.02 | `begin`
| 176.43 | 5,667,847.59 | 0.3% | 0.02 | `begin (firstGeq)`
| 54.56 | 18,329,336.92 | 0.4% | 0.01 | `end`
| 159.37 | 6,274,892.80 | 0.1% | 0.10 | `bulkFirstGeq`
| 153.55 | 6,512,408.74 | 0.1% | 0.09 | `bulkFirstGeq (latest version)`
## fdb
| ns/op | op/s | err% | total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
| 2,244.65 | 445,503.99 | 1.3% | 0.29 | `monotonically increasing`
| 2.06 | 486,311,738.92 | 0.3% | 0.01 | `*iter`
| 12.53 | 79,826,977.13 | 0.4% | 0.01 | `++iter`
| 8.86 | 112,927,293.80 | 1.3% | 0.01 | `--iter`
| 89.27 | 11,201,505.02 | 0.7% | 0.01 | `begin`
| 144.38 | 6,926,060.07 | 2.9% | 0.02 | `begin (firstGeq)`
| 71.84 | 13,920,283.88 | 0.4% | 0.01 | `end`
| 412.10 | 2,426,597.75 | 0.2% | 0.01 | `bulkFirstGeq`
| 2,236.83 | 447,061.26 | 1.0% | 0.29 | `monotonically increasing`
| 2.11 | 473,370,596.42 | 0.9% | 0.01 | `*iter`
| 12.07 | 82,858,977.78 | 1.2% | 0.01 | `++iter`
| 8.54 | 117,158,071.80 | 0.9% | 0.01 | `--iter`
| 94.26 | 10,608,588.66 | 1.1% | 0.01 | `begin`
| 159.01 | 6,288,940.61 | 0.4% | 0.02 | `begin (firstGeq)`
| 73.47 | 13,610,276.41 | 0.9% | 0.01 | `end`
| 422.53 | 2,366,723.10 | 0.1% | 0.25 | `bulkFirstGeq`
| 365.13 | 2,738,758.86 | 0.1% | 0.22 | `bulkFirstGeq (latest version)`

View File

@@ -150,15 +150,27 @@ struct Entry {
}
};
struct UpdateInfo {
UpdateInfo() : version(kVersionIfNotUpdated) {}
int64_t version;
constexpr static int64_t kVersionIfNotUpdated = 0x7fffffffffffffff;
bool updated() const { return version != UpdateInfo::kVersionIfNotUpdated; }
bool updated(int64_t at) const { return version <= at; }
};
static_assert(std::atomic<UpdateInfo>::is_always_lock_free);
struct Node {
union {
int64_t updateVersion;
std::atomic<UpdateInfo> updateInfo;
uint32_t nextFree;
};
Entry *entry;
uint32_t pointer[3];
bool replacedPointer;
std::atomic<bool> updated;
// [left/right, older/newer]. Logically this is only 1 aux pointer since we
// only store one updateInfo, but this encoding let's us write a branch-free
// `child` function, which really helps with the effective ILP of the bulk
// firstGeq function.
uint32_t pointer[2][2];
};
// Limit mmap to 32 GiB so valgrind doesn't complain.
@@ -294,24 +306,21 @@ struct MemManager {
while (stackIndex > 0) {
uint32_t p = stack[--stackIndex];
auto &node = base[p];
if (node.updated.load(std::memory_order_relaxed)) {
if (node.pointer[!node.replacedPointer] != 0) {
tryPush(p, node.pointer[!node.replacedPointer]);
auto updateInfo = node.updateInfo.load(std::memory_order_relaxed);
if (updateInfo.updated()) {
if (node.pointer[0][1] != 0) {
tryPush(p, node.pointer[0][1]);
}
if (oldestVersion < node.updateVersion) {
if (node.pointer[node.replacedPointer] != 0) {
tryPush(p, node.pointer[node.replacedPointer]);
}
if (node.pointer[1][1] != 0) {
tryPush(p, node.pointer[1][1]);
}
if (node.pointer[2] != 0) {
tryPush(p, node.pointer[2]);
}
if (!updateInfo.updated(oldestVersion)) {
if (node.pointer[0][0] != 0) {
tryPush(p, node.pointer[0][0]);
}
} else {
if (node.pointer[0] != 0) {
tryPush(p, node.pointer[0]);
}
if (node.pointer[1] != 0) {
tryPush(p, node.pointer[1]);
if (node.pointer[1][0] != 0) {
tryPush(p, node.pointer[1][0]);
}
}
}
@@ -498,12 +507,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
kOrder == std::memory_order_relaxed);
auto &n = mm.base[node];
uint32_t result;
if (n.updated.load(kOrder) && n.updateVersion <= at &&
which == n.replacedPointer) {
result = n.pointer[2];
} else {
result = n.pointer[which];
}
assert(at < UpdateInfo::kVersionIfNotUpdated);
auto updateInfo = n.updateInfo.load(kOrder);
result = n.pointer[which][updateInfo.updated(at)];
assert(result == 0 || result >= kMinAddressable);
#ifndef NDEBUG
if (result != 0) {
@@ -521,7 +527,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
return node;
}
auto &n = mm.base[node];
const bool updated = n.updated.load(std::memory_order_relaxed);
auto updateInfo = n.updateInfo.load(std::memory_order_relaxed);
const bool updated = updateInfo.updated();
auto doCopy = [&]() {
uint32_t copy = mm.allocate();
@@ -532,30 +541,25 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
#endif
auto &c = mm.base[copy];
c.entry = n.entry->addref();
c.pointer[which] = child;
c.pointer[!which] =
c.pointer[which][0] = child;
c.pointer[!which][0] =
this->child<std::memory_order_relaxed>(node, !which, latestVersion);
c.updated.store(false, std::memory_order_relaxed);
c.updateVersion = version;
c.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
assert(copy == 0 || copy >= kMinAddressable);
return copy;
};
if (n.updateVersion == version) {
// The reason these aren't data races is that concurrent readers are
// reading < `version`
if (updated && n.replacedPointer != which) {
auto result = doCopy();
// We can't update n.replacedPointer without introducing a data race
// (unless we packed it into the atomic?) so we copy. pointer[2] becomes
// unreachable, but need to tell the garbage collector.
n.pointer[2] = 0;
return result;
} else if (updated) {
n.pointer[2] = child;
} else {
n.pointer[which] = child;
}
if (n.entry->pointVersion == version || n.entry->rangeVersion == version) {
// This node is not yet published to concurrent readers
n.pointer[which][0] = child;
assert(node == 0 || node >= kMinAddressable);
return node;
}
if (updateInfo.version == version) {
// Not a data race since concurrent readers are reading at a version <
// `updateInfo.version`
n.pointer[which][1] = child;
assert(node == 0 || node >= kMinAddressable);
return node;
}
@@ -564,10 +568,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
// We already used this node's in-place update
return doCopy();
} else {
n.updateVersion = version;
n.pointer[2] = child;
n.replacedPointer = which;
n.updated.store(true, std::memory_order_release); // Must be last
n.pointer[which][1] = child;
n.pointer[!which][1] = n.pointer[!which][0];
updateInfo.version = version;
n.updateInfo.store(updateInfo, std::memory_order_release); // Must be last
assert(node == 0 || node >= kMinAddressable);
return node;
}
@@ -660,10 +664,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
inserted ? gRandom.next() : mm.base[finger.backNode()].entry->priority);
if (!inserted) {
auto &n = mm.base[node];
n.pointer[0] = child<std::memory_order_relaxed>(finger.backNode(), false,
latestVersion);
n.pointer[1] = child<std::memory_order_relaxed>(finger.backNode(), true,
latestVersion);
n.pointer[0][0] = child<std::memory_order_relaxed>(finger.backNode(),
false, latestVersion);
n.pointer[1][0] = child<std::memory_order_relaxed>(finger.backNode(),
true, latestVersion);
}
finger.backNodeRef() = node;
uint32_t oldSize = finger.searchPathSize();
@@ -775,10 +779,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
uint32_t priority) {
auto result = mm.allocate();
auto &node = mm.base[result];
node.updateVersion = version;
node.pointer[0] = 0;
node.pointer[1] = 0;
node.updated.store(false, std::memory_order_relaxed);
node.pointer[0][0] = 0;
node.pointer[1][0] = 0;
node.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
node.entry =
Entry::make(version, rangeVersion, key, keyLen, val, valLen, priority);
return result;