Branch-free child function

This commit is contained in:
2024-06-04 13:59:26 -07:00
parent 118071e3e9
commit 13f6279970
3 changed files with 80 additions and 74 deletions

View File

@@ -150,15 +150,27 @@ struct Entry {
}
};
struct UpdateInfo {
UpdateInfo() : version(kVersionIfNotUpdated) {}
int64_t version;
constexpr static int64_t kVersionIfNotUpdated = 0x7fffffffffffffff;
bool updated() const { return version != UpdateInfo::kVersionIfNotUpdated; }
bool updated(int64_t at) const { return version <= at; }
};
static_assert(std::atomic<UpdateInfo>::is_always_lock_free);
struct Node {
union {
int64_t updateVersion;
std::atomic<UpdateInfo> updateInfo;
uint32_t nextFree;
};
Entry *entry;
uint32_t pointer[3];
bool replacedPointer;
std::atomic<bool> updated;
// [left/right, older/newer]. Logically this is only 1 aux pointer since we
// only store one updateInfo, but this encoding let's us write a branch-free
// `child` function, which really helps with the effective ILP of the bulk
// firstGeq function.
uint32_t pointer[2][2];
};
// Limit mmap to 32 GiB so valgrind doesn't complain.
@@ -294,24 +306,21 @@ struct MemManager {
while (stackIndex > 0) {
uint32_t p = stack[--stackIndex];
auto &node = base[p];
if (node.updated.load(std::memory_order_relaxed)) {
if (node.pointer[!node.replacedPointer] != 0) {
tryPush(p, node.pointer[!node.replacedPointer]);
auto updateInfo = node.updateInfo.load(std::memory_order_relaxed);
if (updateInfo.updated()) {
if (node.pointer[0][1] != 0) {
tryPush(p, node.pointer[0][1]);
}
if (oldestVersion < node.updateVersion) {
if (node.pointer[node.replacedPointer] != 0) {
tryPush(p, node.pointer[node.replacedPointer]);
}
if (node.pointer[1][1] != 0) {
tryPush(p, node.pointer[1][1]);
}
if (node.pointer[2] != 0) {
tryPush(p, node.pointer[2]);
}
if (!updateInfo.updated(oldestVersion)) {
if (node.pointer[0][0] != 0) {
tryPush(p, node.pointer[0][0]);
}
} else {
if (node.pointer[0] != 0) {
tryPush(p, node.pointer[0]);
}
if (node.pointer[1] != 0) {
tryPush(p, node.pointer[1]);
if (node.pointer[1][0] != 0) {
tryPush(p, node.pointer[1][0]);
}
}
}
@@ -498,12 +507,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
kOrder == std::memory_order_relaxed);
auto &n = mm.base[node];
uint32_t result;
if (n.updated.load(kOrder) && n.updateVersion <= at &&
which == n.replacedPointer) {
result = n.pointer[2];
} else {
result = n.pointer[which];
}
assert(at < UpdateInfo::kVersionIfNotUpdated);
auto updateInfo = n.updateInfo.load(kOrder);
result = n.pointer[which][updateInfo.updated(at)];
assert(result == 0 || result >= kMinAddressable);
#ifndef NDEBUG
if (result != 0) {
@@ -521,7 +527,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
return node;
}
auto &n = mm.base[node];
const bool updated = n.updated.load(std::memory_order_relaxed);
auto updateInfo = n.updateInfo.load(std::memory_order_relaxed);
const bool updated = updateInfo.updated();
auto doCopy = [&]() {
uint32_t copy = mm.allocate();
@@ -532,30 +541,25 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
#endif
auto &c = mm.base[copy];
c.entry = n.entry->addref();
c.pointer[which] = child;
c.pointer[!which] =
c.pointer[which][0] = child;
c.pointer[!which][0] =
this->child<std::memory_order_relaxed>(node, !which, latestVersion);
c.updated.store(false, std::memory_order_relaxed);
c.updateVersion = version;
c.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
assert(copy == 0 || copy >= kMinAddressable);
return copy;
};
if (n.updateVersion == version) {
// The reason these aren't data races is that concurrent readers are
// reading < `version`
if (updated && n.replacedPointer != which) {
auto result = doCopy();
// We can't update n.replacedPointer without introducing a data race
// (unless we packed it into the atomic?) so we copy. pointer[2] becomes
// unreachable, but need to tell the garbage collector.
n.pointer[2] = 0;
return result;
} else if (updated) {
n.pointer[2] = child;
} else {
n.pointer[which] = child;
}
if (n.entry->pointVersion == version || n.entry->rangeVersion == version) {
// This node is not yet published to concurrent readers
n.pointer[which][0] = child;
assert(node == 0 || node >= kMinAddressable);
return node;
}
if (updateInfo.version == version) {
// Not a data race since concurrent readers are reading at a version <
// `updateInfo.version`
n.pointer[which][1] = child;
assert(node == 0 || node >= kMinAddressable);
return node;
}
@@ -564,10 +568,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
// We already used this node's in-place update
return doCopy();
} else {
n.updateVersion = version;
n.pointer[2] = child;
n.replacedPointer = which;
n.updated.store(true, std::memory_order_release); // Must be last
n.pointer[which][1] = child;
n.pointer[!which][1] = n.pointer[!which][0];
updateInfo.version = version;
n.updateInfo.store(updateInfo, std::memory_order_release); // Must be last
assert(node == 0 || node >= kMinAddressable);
return node;
}
@@ -660,10 +664,10 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
inserted ? gRandom.next() : mm.base[finger.backNode()].entry->priority);
if (!inserted) {
auto &n = mm.base[node];
n.pointer[0] = child<std::memory_order_relaxed>(finger.backNode(), false,
latestVersion);
n.pointer[1] = child<std::memory_order_relaxed>(finger.backNode(), true,
latestVersion);
n.pointer[0][0] = child<std::memory_order_relaxed>(finger.backNode(),
false, latestVersion);
n.pointer[1][0] = child<std::memory_order_relaxed>(finger.backNode(),
true, latestVersion);
}
finger.backNodeRef() = node;
uint32_t oldSize = finger.searchPathSize();
@@ -775,10 +779,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
uint32_t priority) {
auto result = mm.allocate();
auto &node = mm.base[result];
node.updateVersion = version;
node.pointer[0] = 0;
node.pointer[1] = 0;
node.updated.store(false, std::memory_order_relaxed);
node.pointer[0][0] = 0;
node.pointer[1][0] = 0;
node.updateInfo.store(UpdateInfo{}, std::memory_order_relaxed);
node.entry =
Entry::make(version, rangeVersion, key, keyLen, val, valLen, priority);
return result;