Branch-free child function
This commit is contained in:
@@ -63,6 +63,7 @@ void bulkFirstGeq() {
|
|||||||
constexpr int kNumQueries = 100;
|
constexpr int kNumQueries = 100;
|
||||||
ankerl::nanobench::Bench bench;
|
ankerl::nanobench::Bench bench;
|
||||||
bench.batch(kNumQueries);
|
bench.batch(kNumQueries);
|
||||||
|
bench.minEpochIterations(kNumQueries * 5);
|
||||||
|
|
||||||
// Initialize `versionedMap`
|
// Initialize `versionedMap`
|
||||||
weaselab::VersionedMap versionedMap{0};
|
weaselab::VersionedMap versionedMap{0};
|
||||||
|
36
README.md
36
README.md
@@ -1,6 +1,7 @@
|
|||||||
feature | weaselab | fdb |
|
feature | weaselab | fdb |
|
||||||
|-|-|-|
|
|-|-|-|
|
||||||
|based on|node-copying treap|node-copying treap|
|
|based on|node-copying treap|node-copying treap|
|
||||||
|
|branch-free `child` function|✅|❌|
|
||||||
|bytes per node (worst-case)|64 + kv len + 1\*|96 + kv len|
|
|bytes per node (worst-case)|64 + kv len + 1\*|96 + kv len|
|
||||||
|cheaper iteration*|❌|✅|
|
|cheaper iteration*|❌|✅|
|
||||||
|efficient bulk queries|✅|❌|
|
|efficient bulk queries|✅|❌|
|
||||||
@@ -21,27 +22,28 @@ feature | weaselab | fdb |
|
|||||||
# benchmarks
|
# benchmarks
|
||||||
|
|
||||||
## weaselab
|
## weaselab
|
||||||
|
|
||||||
| ns/op | op/s | err% | total | benchmark
|
| ns/op | op/s | err% | total | benchmark
|
||||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||||
| 1,537.31 | 650,485.22 | 2.0% | 0.21 | `monotonically increasing`
|
| 1,538.88 | 649,824.01 | 2.7% | 0.22 | `monotonically increasing`
|
||||||
| 1.25 | 798,963,102.97 | 0.1% | 0.01 | `*iter`
|
| 1.25 | 798,888,023.37 | 0.0% | 0.01 | `*iter`
|
||||||
| 38.71 | 25,831,420.46 | 3.2% | 0.01 | `++iter`
|
| 32.10 | 31,153,434.23 | 0.7% | 0.01 | `++iter`
|
||||||
| 37.31 | 26,801,664.98 | 0.3% | 0.01 | `--iter`
|
| 31.08 | 32,178,831.30 | 0.3% | 0.01 | `--iter`
|
||||||
| 133.17 | 7,508,978.37 | 2.8% | 0.02 | `begin`
|
| 139.19 | 7,184,433.93 | 2.2% | 0.02 | `begin`
|
||||||
| 129.51 | 7,721,547.35 | 0.4% | 0.02 | `begin (firstGeq)`
|
| 176.43 | 5,667,847.59 | 0.3% | 0.02 | `begin (firstGeq)`
|
||||||
| 54.40 | 18,381,173.00 | 1.2% | 0.01 | `end`
|
| 54.56 | 18,329,336.92 | 0.4% | 0.01 | `end`
|
||||||
| 245.34 | 4,075,961.09 | 1.1% | 0.01 | `bulkFirstGeq`
|
| 159.37 | 6,274,892.80 | 0.1% | 0.10 | `bulkFirstGeq`
|
||||||
|
| 153.55 | 6,512,408.74 | 0.1% | 0.09 | `bulkFirstGeq (latest version)`
|
||||||
|
|
||||||
## fdb
|
## fdb
|
||||||
|
|
||||||
| ns/op | op/s | err% | total | benchmark
|
| ns/op | op/s | err% | total | benchmark
|
||||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||||
| 2,244.65 | 445,503.99 | 1.3% | 0.29 | `monotonically increasing`
|
| 2,236.83 | 447,061.26 | 1.0% | 0.29 | `monotonically increasing`
|
||||||
| 2.06 | 486,311,738.92 | 0.3% | 0.01 | `*iter`
|
| 2.11 | 473,370,596.42 | 0.9% | 0.01 | `*iter`
|
||||||
| 12.53 | 79,826,977.13 | 0.4% | 0.01 | `++iter`
|
| 12.07 | 82,858,977.78 | 1.2% | 0.01 | `++iter`
|
||||||
| 8.86 | 112,927,293.80 | 1.3% | 0.01 | `--iter`
|
| 8.54 | 117,158,071.80 | 0.9% | 0.01 | `--iter`
|
||||||
| 89.27 | 11,201,505.02 | 0.7% | 0.01 | `begin`
|
| 94.26 | 10,608,588.66 | 1.1% | 0.01 | `begin`
|
||||||
| 144.38 | 6,926,060.07 | 2.9% | 0.02 | `begin (firstGeq)`
|
| 159.01 | 6,288,940.61 | 0.4% | 0.02 | `begin (firstGeq)`
|
||||||
| 71.84 | 13,920,283.88 | 0.4% | 0.01 | `end`
|
| 73.47 | 13,610,276.41 | 0.9% | 0.01 | `end`
|
||||||
| 412.10 | 2,426,597.75 | 0.2% | 0.01 | `bulkFirstGeq`
|
| 422.53 | 2,366,723.10 | 0.1% | 0.25 | `bulkFirstGeq`
|
||||||
|
| 365.13 | 2,738,758.86 | 0.1% | 0.22 | `bulkFirstGeq (latest version)`
|
||||||
|
113
VersionedMap.cpp
113
VersionedMap.cpp
@@ -150,15 +150,27 @@ struct Entry {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct UpdateInfo {
|
||||||
|
UpdateInfo() : version(kVersionIfNotUpdated) {}
|
||||||
|
int64_t version;
|
||||||
|
constexpr static int64_t kVersionIfNotUpdated = 0x7fffffffffffffff;
|
||||||
|
bool updated() const { return version != UpdateInfo::kVersionIfNotUpdated; }
|
||||||
|
bool updated(int64_t at) const { return version <= at; }
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(std::atomic<UpdateInfo>::is_always_lock_free);
|
||||||
|
|
||||||
struct Node {
|
struct Node {
|
||||||
union {
|
union {
|
||||||
int64_t updateVersion;
|
std::atomic<UpdateInfo> updateInfo;
|
||||||
uint32_t nextFree;
|
uint32_t nextFree;
|
||||||
};
|
};
|
||||||
Entry *entry;
|
Entry *entry;
|
||||||
uint32_t pointer[3];
|
// [left/right, older/newer]. Logically this is only 1 aux pointer since we
|
||||||
bool replacedPointer;
|
// only store one updateInfo, but this encoding let's us write a branch-free
|
||||||
std::atomic<bool> updated;
|
// `child` function, which really helps with the effective ILP of the bulk
|
||||||
|
// firstGeq function.
|
||||||
|
uint32_t pointer[2][2];
|
||||||
};
|
};
|
||||||
|
|
||||||
// Limit mmap to 32 GiB so valgrind doesn't complain.
|
// Limit mmap to 32 GiB so valgrind doesn't complain.
|
||||||
@@ -294,24 +306,21 @@ struct MemManager {
|
|||||||
while (stackIndex > 0) {
|
while (stackIndex > 0) {
|
||||||
uint32_t p = stack[--stackIndex];
|
uint32_t p = stack[--stackIndex];
|
||||||
auto &node = base[p];
|
auto &node = base[p];
|
||||||
if (node.updated.load(std::memory_order_relaxed)) {
|
auto updateInfo = node.updateInfo.load(std::memory_order_relaxed);
|
||||||
if (node.pointer[!node.replacedPointer] != 0) {
|
if (updateInfo.updated()) {
|
||||||
tryPush(p, node.pointer[!node.replacedPointer]);
|
if (node.pointer[0][1] != 0) {
|
||||||
|
tryPush(p, node.pointer[0][1]);
|
||||||
}
|
}
|
||||||
if (oldestVersion < node.updateVersion) {
|
if (node.pointer[1][1] != 0) {
|
||||||
if (node.pointer[node.replacedPointer] != 0) {
|
tryPush(p, node.pointer[1][1]);
|
||||||
tryPush(p, node.pointer[node.replacedPointer]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (node.pointer[2] != 0) {
|
if (!updateInfo.updated(oldestVersion)) {
|
||||||
tryPush(p, node.pointer[2]);
|
if (node.pointer[0][0] != 0) {
|
||||||
|
tryPush(p, node.pointer[0][0]);
|
||||||
}
|
}
|
||||||
} else {
|
if (node.pointer[1][0] != 0) {
|
||||||
if (node.pointer[0] != 0) {
|
tryPush(p, node.pointer[1][0]);
|
||||||
tryPush(p, node.pointer[0]);
|
|
||||||
}
|
|
||||||
if (node.pointer[1] != 0) {
|
|
||||||
tryPush(p, node.pointer[1]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -498,12 +507,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
|||||||
kOrder == std::memory_order_relaxed);
|
kOrder == std::memory_order_relaxed);
|
||||||
auto &n = mm.base[node];
|
auto &n = mm.base[node];
|
||||||
uint32_t result;
|
uint32_t result;
|
||||||
if (n.updated.load(kOrder) && n.updateVersion <= at &&
|
assert(at < UpdateInfo::kVersionIfNotUpdated);
|
||||||
which == n.replacedPointer) {
|
auto updateInfo = n.updateInfo.load(kOrder);
|
||||||
result = n.pointer[2];
|
result = n.pointer[which][updateInfo.updated(at)];
|
||||||
} else {
|
|
||||||
result = n.pointer[which];
|
|
||||||
}
|
|
||||||
assert(result == 0 || result >= kMinAddressable);
|
assert(result == 0 || result >= kMinAddressable);
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
@@ -521,7 +527,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
|||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
auto &n = mm.base[node];
|
auto &n = mm.base[node];
|
||||||
const bool updated = n.updated.load(std::memory_order_relaxed);
|
|
||||||
|
auto updateInfo = n.updateInfo.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
|
const bool updated = updateInfo.updated();
|
||||||
|
|
||||||
auto doCopy = [&]() {
|
auto doCopy = [&]() {
|
||||||
uint32_t copy = mm.allocate();
|
uint32_t copy = mm.allocate();
|
||||||
@@ -532,30 +541,25 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
|||||||
#endif
|
#endif
|
||||||
auto &c = mm.base[copy];
|
auto &c = mm.base[copy];
|
||||||
c.entry = n.entry->addref();
|
c.entry = n.entry->addref();
|
||||||
c.pointer[which] = child;
|
c.pointer[which][0] = child;
|
||||||
c.pointer[!which] =
|
c.pointer[!which][0] =
|
||||||
this->child<std::memory_order_relaxed>(node, !which, latestVersion);
|
this->child<std::memory_order_relaxed>(node, !which, latestVersion);
|
||||||
c.updated.store(false, std::memory_order_relaxed);
|
c.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
|
||||||
c.updateVersion = version;
|
|
||||||
assert(copy == 0 || copy >= kMinAddressable);
|
assert(copy == 0 || copy >= kMinAddressable);
|
||||||
return copy;
|
return copy;
|
||||||
};
|
};
|
||||||
|
|
||||||
if (n.updateVersion == version) {
|
if (n.entry->pointVersion == version || n.entry->rangeVersion == version) {
|
||||||
// The reason these aren't data races is that concurrent readers are
|
// This node is not yet published to concurrent readers
|
||||||
// reading < `version`
|
n.pointer[which][0] = child;
|
||||||
if (updated && n.replacedPointer != which) {
|
assert(node == 0 || node >= kMinAddressable);
|
||||||
auto result = doCopy();
|
return node;
|
||||||
// We can't update n.replacedPointer without introducing a data race
|
|
||||||
// (unless we packed it into the atomic?) so we copy. pointer[2] becomes
|
|
||||||
// unreachable, but need to tell the garbage collector.
|
|
||||||
n.pointer[2] = 0;
|
|
||||||
return result;
|
|
||||||
} else if (updated) {
|
|
||||||
n.pointer[2] = child;
|
|
||||||
} else {
|
|
||||||
n.pointer[which] = child;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (updateInfo.version == version) {
|
||||||
|
// Not a data race since concurrent readers are reading at a version <
|
||||||
|
// `updateInfo.version`
|
||||||
|
n.pointer[which][1] = child;
|
||||||
assert(node == 0 || node >= kMinAddressable);
|
assert(node == 0 || node >= kMinAddressable);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
@@ -564,10 +568,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
|||||||
// We already used this node's in-place update
|
// We already used this node's in-place update
|
||||||
return doCopy();
|
return doCopy();
|
||||||
} else {
|
} else {
|
||||||
n.updateVersion = version;
|
n.pointer[which][1] = child;
|
||||||
n.pointer[2] = child;
|
n.pointer[!which][1] = n.pointer[!which][0];
|
||||||
n.replacedPointer = which;
|
updateInfo.version = version;
|
||||||
n.updated.store(true, std::memory_order_release); // Must be last
|
n.updateInfo.store(updateInfo, std::memory_order_release); // Must be last
|
||||||
assert(node == 0 || node >= kMinAddressable);
|
assert(node == 0 || node >= kMinAddressable);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
@@ -660,10 +664,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
|||||||
inserted ? gRandom.next() : mm.base[finger.backNode()].entry->priority);
|
inserted ? gRandom.next() : mm.base[finger.backNode()].entry->priority);
|
||||||
if (!inserted) {
|
if (!inserted) {
|
||||||
auto &n = mm.base[node];
|
auto &n = mm.base[node];
|
||||||
n.pointer[0] = child<std::memory_order_relaxed>(finger.backNode(), false,
|
n.pointer[0][0] = child<std::memory_order_relaxed>(finger.backNode(),
|
||||||
latestVersion);
|
false, latestVersion);
|
||||||
n.pointer[1] = child<std::memory_order_relaxed>(finger.backNode(), true,
|
n.pointer[1][0] = child<std::memory_order_relaxed>(finger.backNode(),
|
||||||
latestVersion);
|
true, latestVersion);
|
||||||
}
|
}
|
||||||
finger.backNodeRef() = node;
|
finger.backNodeRef() = node;
|
||||||
uint32_t oldSize = finger.searchPathSize();
|
uint32_t oldSize = finger.searchPathSize();
|
||||||
@@ -775,10 +779,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
|
|||||||
uint32_t priority) {
|
uint32_t priority) {
|
||||||
auto result = mm.allocate();
|
auto result = mm.allocate();
|
||||||
auto &node = mm.base[result];
|
auto &node = mm.base[result];
|
||||||
node.updateVersion = version;
|
node.pointer[0][0] = 0;
|
||||||
node.pointer[0] = 0;
|
node.pointer[1][0] = 0;
|
||||||
node.pointer[1] = 0;
|
node.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
|
||||||
node.updated.store(false, std::memory_order_relaxed);
|
|
||||||
node.entry =
|
node.entry =
|
||||||
Entry::make(version, rangeVersion, key, keyLen, val, valLen, priority);
|
Entry::make(version, rangeVersion, key, keyLen, val, valLen, priority);
|
||||||
return result;
|
return result;
|
||||||
|
Reference in New Issue
Block a user