From bad9d7ced86adf26fcb4172fb00bd81917450975 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 2 Aug 2024 13:54:53 -0700 Subject: [PATCH] Avoid some tls lookups for InternalVersionT::zero --- ConflictSet.cpp | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 7c1e889..d28e568 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -395,8 +395,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) { assert(numChildren == Node16::kMaxNodes); memset(index, -1, sizeof(index)); memset(children, 0, sizeof(children)); + const auto z = InternalVersionT::zero; for (auto &v : childMaxVersion) { - v = InternalVersionT::zero; + v = z; } memcpy(partialKey(), &other + 1, partialKeyLen); bitSet.init(); @@ -423,8 +424,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) { nextFree = other.nextFree; memcpy(index, other.index, sizeof(index)); memset(children, 0, sizeof(children)); + const auto z = InternalVersionT::zero; for (auto &v : childMaxVersion) { - v = InternalVersionT::zero; + v = z; } for (int i = 0; i < numChildren; ++i) { children[i] = other.children[i]; @@ -442,8 +444,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) { kNodeCopySize); memset(index, -1, sizeof(index)); memset(children, 0, sizeof(children)); + const auto z = InternalVersionT::zero; for (auto &v : childMaxVersion) { - v = InternalVersionT::zero; + v = z; } nextFree = other.numChildren; bitSet = other.bitSet; @@ -470,11 +473,12 @@ inline void Node256::copyChildrenAndKeyFrom(const Node48 &other) { kNodeCopySize); bitSet = other.bitSet; memset(children, 0, sizeof(children)); + const auto z = InternalVersionT::zero; for (auto &v : childMaxVersion) { - v = InternalVersionT::zero; + v = z; } for (auto &v : maxOfMax) { - v = InternalVersionT::zero; + v = z; } bitSet.forEachSet([&](int c) { children[c] = other.children[other.index[c]]; @@ -491,8 +495,9 @@ inline void Node256::copyChildrenAndKeyFrom(const Node256 &other) { memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin, kNodeCopySize); memset(children, 0, sizeof(children)); + const auto z = InternalVersionT::zero; for (auto &v : childMaxVersion) { - v = InternalVersionT::zero; + v = z; } bitSet = other.bitSet; bitSet.forEachSet([&](int c) { @@ -619,13 +624,15 @@ template struct BoundedFreeListAllocator { T *result = allocate_helper(partialKeyCapacity); if constexpr (!std::is_same_v) { memset(result->children, 0, sizeof(result->children)); + const auto z = InternalVersionT::zero; for (auto &v : result->childMaxVersion) { - v = InternalVersionT::zero; + v = z; } } if constexpr (std::is_same_v || std::is_same_v) { + const auto z = InternalVersionT::zero; for (auto &v : result->maxOfMax) { - v = InternalVersionT::zero; + v = z; } } return result; @@ -730,6 +737,10 @@ struct WriteContext { int64_t write_bytes; } accum; + // Cache a copy of InternalVersionT::zero, so we don't need to do the TLS + // lookup as often. + InternalVersionT zero; + WriteContext() { memset(&accum, 0, sizeof(accum)); } template T *allocate(int c) { @@ -1417,7 +1428,7 @@ void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl, // that we have a new parent. setMaxVersion(child, impl, childMaxVersion); if (child->parent) { - rezero(child->parent, InternalVersionT::zero); + rezero(child->parent, tls->zero); } getInTree(self, impl) = child; @@ -1553,7 +1564,7 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl, parent48->index[parentIndex] = toRemoveChildrenIndex; parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex; } - parent48->childMaxVersion[lastChildrenIndex] = InternalVersionT::zero; + parent48->childMaxVersion[lastChildrenIndex] = tls->zero; --parent->numChildren; @@ -2894,8 +2905,7 @@ template child->partialKeyLen = 0; child->parent = *self; child->parentsIndex = key.front(); - setMaxVersion(child, impl, - kBegin ? writeVersion : InternalVersionT::zero); + setMaxVersion(child, impl, kBegin ? writeVersion : tls->zero); } self = &child; @@ -2943,8 +2953,7 @@ void addPointWrite(Node *&root, std::span key, n->entry.pointVersion = writeVersion; setMaxVersion(n, impl, writeVersion); n->entry.rangeVersion = - p == nullptr ? InternalVersionT::zero - : std::max(p->entry.rangeVersion, InternalVersionT::zero); + p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero); } else { assert(writeVersion >= n->entry.pointVersion); n->entry.pointVersion = writeVersion; @@ -3008,8 +3017,7 @@ void addWriteRange(Node *&root, std::span begin, ++tls->accum.entries_inserted; auto *p = nextLogical(beginNode); beginNode->entry.rangeVersion = - p == nullptr ? InternalVersionT::zero - : std::max(p->entry.rangeVersion, InternalVersionT::zero); + p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero); beginNode->entry.pointVersion = writeVersion; assert(maxVersion(beginNode, impl) <= writeVersion); setMaxVersion(beginNode, impl, writeVersion); @@ -3029,8 +3037,7 @@ void addWriteRange(Node *&root, std::span begin, ++tls->accum.entries_inserted; auto *p = nextLogical(endNode); endNode->entry.pointVersion = - p == nullptr ? InternalVersionT::zero - : std::max(p->entry.rangeVersion, InternalVersionT::zero); + p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero); auto m = maxVersion(endNode, impl); setMaxVersion(endNode, impl, std::max(m, endNode->entry.pointVersion)); @@ -3151,7 +3158,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { // There could be other conflict sets in the same thread. We need // InternalVersionT::zero to be correct for this conflict set for the // lifetime of the current call frame. - InternalVersionT::zero = oldestVersion; + InternalVersionT::zero = tls.zero = oldestVersion; assert(writeVersion >= newestVersionFullPrecision); @@ -3262,7 +3269,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { InternalVersionT oldestVersion{o}; this->oldestVersionFullPrecision = o; this->oldestVersion = oldestVersion; - InternalVersionT::zero = oldestVersion; + InternalVersionT::zero = tls.zero = oldestVersion; #ifdef NDEBUG // This is here for performance reasons, since we want to amortize the cost // of storing the search path as a string. In tests, we want to exercise the @@ -3312,7 +3319,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { root->entry.pointVersion = this->oldestVersion; root->entry.rangeVersion = this->oldestVersion; - InternalVersionT::zero = this->oldestVersion; + InternalVersionT::zero = tls.zero = this->oldestVersion; // Intentionally not resetting totalBytes }