Avoid some tls lookups for InternalVersionT::zero

This commit is contained in:
2024-08-02 13:54:53 -07:00
parent c8d9dc034d
commit bad9d7ced8

View File

@@ -395,8 +395,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
assert(numChildren == Node16::kMaxNodes); assert(numChildren == Node16::kMaxNodes);
memset(index, -1, sizeof(index)); memset(index, -1, sizeof(index));
memset(children, 0, sizeof(children)); memset(children, 0, sizeof(children));
const auto z = InternalVersionT::zero;
for (auto &v : childMaxVersion) { for (auto &v : childMaxVersion) {
v = InternalVersionT::zero; v = z;
} }
memcpy(partialKey(), &other + 1, partialKeyLen); memcpy(partialKey(), &other + 1, partialKeyLen);
bitSet.init(); bitSet.init();
@@ -423,8 +424,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) {
nextFree = other.nextFree; nextFree = other.nextFree;
memcpy(index, other.index, sizeof(index)); memcpy(index, other.index, sizeof(index));
memset(children, 0, sizeof(children)); memset(children, 0, sizeof(children));
const auto z = InternalVersionT::zero;
for (auto &v : childMaxVersion) { for (auto &v : childMaxVersion) {
v = InternalVersionT::zero; v = z;
} }
for (int i = 0; i < numChildren; ++i) { for (int i = 0; i < numChildren; ++i) {
children[i] = other.children[i]; children[i] = other.children[i];
@@ -442,8 +444,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
kNodeCopySize); kNodeCopySize);
memset(index, -1, sizeof(index)); memset(index, -1, sizeof(index));
memset(children, 0, sizeof(children)); memset(children, 0, sizeof(children));
const auto z = InternalVersionT::zero;
for (auto &v : childMaxVersion) { for (auto &v : childMaxVersion) {
v = InternalVersionT::zero; v = z;
} }
nextFree = other.numChildren; nextFree = other.numChildren;
bitSet = other.bitSet; bitSet = other.bitSet;
@@ -470,11 +473,12 @@ inline void Node256::copyChildrenAndKeyFrom(const Node48 &other) {
kNodeCopySize); kNodeCopySize);
bitSet = other.bitSet; bitSet = other.bitSet;
memset(children, 0, sizeof(children)); memset(children, 0, sizeof(children));
const auto z = InternalVersionT::zero;
for (auto &v : childMaxVersion) { for (auto &v : childMaxVersion) {
v = InternalVersionT::zero; v = z;
} }
for (auto &v : maxOfMax) { for (auto &v : maxOfMax) {
v = InternalVersionT::zero; v = z;
} }
bitSet.forEachSet([&](int c) { bitSet.forEachSet([&](int c) {
children[c] = other.children[other.index[c]]; children[c] = other.children[other.index[c]];
@@ -491,8 +495,9 @@ inline void Node256::copyChildrenAndKeyFrom(const Node256 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin, memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
memset(children, 0, sizeof(children)); memset(children, 0, sizeof(children));
const auto z = InternalVersionT::zero;
for (auto &v : childMaxVersion) { for (auto &v : childMaxVersion) {
v = InternalVersionT::zero; v = z;
} }
bitSet = other.bitSet; bitSet = other.bitSet;
bitSet.forEachSet([&](int c) { bitSet.forEachSet([&](int c) {
@@ -619,13 +624,15 @@ template <class T> struct BoundedFreeListAllocator {
T *result = allocate_helper(partialKeyCapacity); T *result = allocate_helper(partialKeyCapacity);
if constexpr (!std::is_same_v<T, Node0>) { if constexpr (!std::is_same_v<T, Node0>) {
memset(result->children, 0, sizeof(result->children)); memset(result->children, 0, sizeof(result->children));
const auto z = InternalVersionT::zero;
for (auto &v : result->childMaxVersion) { for (auto &v : result->childMaxVersion) {
v = InternalVersionT::zero; v = z;
} }
} }
if constexpr (std::is_same_v<T, Node48> || std::is_same_v<T, Node256>) { if constexpr (std::is_same_v<T, Node48> || std::is_same_v<T, Node256>) {
const auto z = InternalVersionT::zero;
for (auto &v : result->maxOfMax) { for (auto &v : result->maxOfMax) {
v = InternalVersionT::zero; v = z;
} }
} }
return result; return result;
@@ -730,6 +737,10 @@ struct WriteContext {
int64_t write_bytes; int64_t write_bytes;
} accum; } accum;
// Cache a copy of InternalVersionT::zero, so we don't need to do the TLS
// lookup as often.
InternalVersionT zero;
WriteContext() { memset(&accum, 0, sizeof(accum)); } WriteContext() { memset(&accum, 0, sizeof(accum)); }
template <class T> T *allocate(int c) { template <class T> T *allocate(int c) {
@@ -1417,7 +1428,7 @@ void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
// that we have a new parent. // that we have a new parent.
setMaxVersion(child, impl, childMaxVersion); setMaxVersion(child, impl, childMaxVersion);
if (child->parent) { if (child->parent) {
rezero(child->parent, InternalVersionT::zero); rezero(child->parent, tls->zero);
} }
getInTree(self, impl) = child; getInTree(self, impl) = child;
@@ -1553,7 +1564,7 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
parent48->index[parentIndex] = toRemoveChildrenIndex; parent48->index[parentIndex] = toRemoveChildrenIndex;
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex; parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
} }
parent48->childMaxVersion[lastChildrenIndex] = InternalVersionT::zero; parent48->childMaxVersion[lastChildrenIndex] = tls->zero;
--parent->numChildren; --parent->numChildren;
@@ -2894,8 +2905,7 @@ template <bool kBegin>
child->partialKeyLen = 0; child->partialKeyLen = 0;
child->parent = *self; child->parent = *self;
child->parentsIndex = key.front(); child->parentsIndex = key.front();
setMaxVersion(child, impl, setMaxVersion(child, impl, kBegin ? writeVersion : tls->zero);
kBegin ? writeVersion : InternalVersionT::zero);
} }
self = &child; self = &child;
@@ -2943,8 +2953,7 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
n->entry.pointVersion = writeVersion; n->entry.pointVersion = writeVersion;
setMaxVersion(n, impl, writeVersion); setMaxVersion(n, impl, writeVersion);
n->entry.rangeVersion = n->entry.rangeVersion =
p == nullptr ? InternalVersionT::zero p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
} else { } else {
assert(writeVersion >= n->entry.pointVersion); assert(writeVersion >= n->entry.pointVersion);
n->entry.pointVersion = writeVersion; n->entry.pointVersion = writeVersion;
@@ -3008,8 +3017,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
++tls->accum.entries_inserted; ++tls->accum.entries_inserted;
auto *p = nextLogical(beginNode); auto *p = nextLogical(beginNode);
beginNode->entry.rangeVersion = beginNode->entry.rangeVersion =
p == nullptr ? InternalVersionT::zero p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
beginNode->entry.pointVersion = writeVersion; beginNode->entry.pointVersion = writeVersion;
assert(maxVersion(beginNode, impl) <= writeVersion); assert(maxVersion(beginNode, impl) <= writeVersion);
setMaxVersion(beginNode, impl, writeVersion); setMaxVersion(beginNode, impl, writeVersion);
@@ -3029,8 +3037,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
++tls->accum.entries_inserted; ++tls->accum.entries_inserted;
auto *p = nextLogical(endNode); auto *p = nextLogical(endNode);
endNode->entry.pointVersion = endNode->entry.pointVersion =
p == nullptr ? InternalVersionT::zero p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
auto m = maxVersion(endNode, impl); auto m = maxVersion(endNode, impl);
setMaxVersion(endNode, impl, setMaxVersion(endNode, impl,
std::max<InternalVersionT>(m, endNode->entry.pointVersion)); std::max<InternalVersionT>(m, endNode->entry.pointVersion));
@@ -3151,7 +3158,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
// There could be other conflict sets in the same thread. We need // There could be other conflict sets in the same thread. We need
// InternalVersionT::zero to be correct for this conflict set for the // InternalVersionT::zero to be correct for this conflict set for the
// lifetime of the current call frame. // lifetime of the current call frame.
InternalVersionT::zero = oldestVersion; InternalVersionT::zero = tls.zero = oldestVersion;
assert(writeVersion >= newestVersionFullPrecision); assert(writeVersion >= newestVersionFullPrecision);
@@ -3262,7 +3269,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
InternalVersionT oldestVersion{o}; InternalVersionT oldestVersion{o};
this->oldestVersionFullPrecision = o; this->oldestVersionFullPrecision = o;
this->oldestVersion = oldestVersion; this->oldestVersion = oldestVersion;
InternalVersionT::zero = oldestVersion; InternalVersionT::zero = tls.zero = oldestVersion;
#ifdef NDEBUG #ifdef NDEBUG
// This is here for performance reasons, since we want to amortize the cost // This is here for performance reasons, since we want to amortize the cost
// of storing the search path as a string. In tests, we want to exercise the // of storing the search path as a string. In tests, we want to exercise the
@@ -3312,7 +3319,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
root->entry.pointVersion = this->oldestVersion; root->entry.pointVersion = this->oldestVersion;
root->entry.rangeVersion = this->oldestVersion; root->entry.rangeVersion = this->oldestVersion;
InternalVersionT::zero = this->oldestVersion; InternalVersionT::zero = tls.zero = this->oldestVersion;
// Intentionally not resetting totalBytes // Intentionally not resetting totalBytes
} }