Avoid some tls lookups for InternalVersionT::zero
This commit is contained in:
@@ -395,8 +395,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
|
||||
assert(numChildren == Node16::kMaxNodes);
|
||||
memset(index, -1, sizeof(index));
|
||||
memset(children, 0, sizeof(children));
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : childMaxVersion) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
memcpy(partialKey(), &other + 1, partialKeyLen);
|
||||
bitSet.init();
|
||||
@@ -423,8 +424,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) {
|
||||
nextFree = other.nextFree;
|
||||
memcpy(index, other.index, sizeof(index));
|
||||
memset(children, 0, sizeof(children));
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : childMaxVersion) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
for (int i = 0; i < numChildren; ++i) {
|
||||
children[i] = other.children[i];
|
||||
@@ -442,8 +444,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
|
||||
kNodeCopySize);
|
||||
memset(index, -1, sizeof(index));
|
||||
memset(children, 0, sizeof(children));
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : childMaxVersion) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
nextFree = other.numChildren;
|
||||
bitSet = other.bitSet;
|
||||
@@ -470,11 +473,12 @@ inline void Node256::copyChildrenAndKeyFrom(const Node48 &other) {
|
||||
kNodeCopySize);
|
||||
bitSet = other.bitSet;
|
||||
memset(children, 0, sizeof(children));
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : childMaxVersion) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
for (auto &v : maxOfMax) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
bitSet.forEachSet([&](int c) {
|
||||
children[c] = other.children[other.index[c]];
|
||||
@@ -491,8 +495,9 @@ inline void Node256::copyChildrenAndKeyFrom(const Node256 &other) {
|
||||
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
||||
kNodeCopySize);
|
||||
memset(children, 0, sizeof(children));
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : childMaxVersion) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
bitSet = other.bitSet;
|
||||
bitSet.forEachSet([&](int c) {
|
||||
@@ -619,13 +624,15 @@ template <class T> struct BoundedFreeListAllocator {
|
||||
T *result = allocate_helper(partialKeyCapacity);
|
||||
if constexpr (!std::is_same_v<T, Node0>) {
|
||||
memset(result->children, 0, sizeof(result->children));
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : result->childMaxVersion) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
}
|
||||
if constexpr (std::is_same_v<T, Node48> || std::is_same_v<T, Node256>) {
|
||||
const auto z = InternalVersionT::zero;
|
||||
for (auto &v : result->maxOfMax) {
|
||||
v = InternalVersionT::zero;
|
||||
v = z;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
@@ -730,6 +737,10 @@ struct WriteContext {
|
||||
int64_t write_bytes;
|
||||
} accum;
|
||||
|
||||
// Cache a copy of InternalVersionT::zero, so we don't need to do the TLS
|
||||
// lookup as often.
|
||||
InternalVersionT zero;
|
||||
|
||||
WriteContext() { memset(&accum, 0, sizeof(accum)); }
|
||||
|
||||
template <class T> T *allocate(int c) {
|
||||
@@ -1417,7 +1428,7 @@ void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
||||
// that we have a new parent.
|
||||
setMaxVersion(child, impl, childMaxVersion);
|
||||
if (child->parent) {
|
||||
rezero(child->parent, InternalVersionT::zero);
|
||||
rezero(child->parent, tls->zero);
|
||||
}
|
||||
|
||||
getInTree(self, impl) = child;
|
||||
@@ -1553,7 +1564,7 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
||||
parent48->index[parentIndex] = toRemoveChildrenIndex;
|
||||
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
|
||||
}
|
||||
parent48->childMaxVersion[lastChildrenIndex] = InternalVersionT::zero;
|
||||
parent48->childMaxVersion[lastChildrenIndex] = tls->zero;
|
||||
|
||||
--parent->numChildren;
|
||||
|
||||
@@ -2894,8 +2905,7 @@ template <bool kBegin>
|
||||
child->partialKeyLen = 0;
|
||||
child->parent = *self;
|
||||
child->parentsIndex = key.front();
|
||||
setMaxVersion(child, impl,
|
||||
kBegin ? writeVersion : InternalVersionT::zero);
|
||||
setMaxVersion(child, impl, kBegin ? writeVersion : tls->zero);
|
||||
}
|
||||
|
||||
self = &child;
|
||||
@@ -2943,8 +2953,7 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
||||
n->entry.pointVersion = writeVersion;
|
||||
setMaxVersion(n, impl, writeVersion);
|
||||
n->entry.rangeVersion =
|
||||
p == nullptr ? InternalVersionT::zero
|
||||
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
|
||||
p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
|
||||
} else {
|
||||
assert(writeVersion >= n->entry.pointVersion);
|
||||
n->entry.pointVersion = writeVersion;
|
||||
@@ -3008,8 +3017,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
++tls->accum.entries_inserted;
|
||||
auto *p = nextLogical(beginNode);
|
||||
beginNode->entry.rangeVersion =
|
||||
p == nullptr ? InternalVersionT::zero
|
||||
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
|
||||
p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
|
||||
beginNode->entry.pointVersion = writeVersion;
|
||||
assert(maxVersion(beginNode, impl) <= writeVersion);
|
||||
setMaxVersion(beginNode, impl, writeVersion);
|
||||
@@ -3029,8 +3037,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
++tls->accum.entries_inserted;
|
||||
auto *p = nextLogical(endNode);
|
||||
endNode->entry.pointVersion =
|
||||
p == nullptr ? InternalVersionT::zero
|
||||
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
|
||||
p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
|
||||
auto m = maxVersion(endNode, impl);
|
||||
setMaxVersion(endNode, impl,
|
||||
std::max<InternalVersionT>(m, endNode->entry.pointVersion));
|
||||
@@ -3151,7 +3158,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
// There could be other conflict sets in the same thread. We need
|
||||
// InternalVersionT::zero to be correct for this conflict set for the
|
||||
// lifetime of the current call frame.
|
||||
InternalVersionT::zero = oldestVersion;
|
||||
InternalVersionT::zero = tls.zero = oldestVersion;
|
||||
|
||||
assert(writeVersion >= newestVersionFullPrecision);
|
||||
|
||||
@@ -3262,7 +3269,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
InternalVersionT oldestVersion{o};
|
||||
this->oldestVersionFullPrecision = o;
|
||||
this->oldestVersion = oldestVersion;
|
||||
InternalVersionT::zero = oldestVersion;
|
||||
InternalVersionT::zero = tls.zero = oldestVersion;
|
||||
#ifdef NDEBUG
|
||||
// This is here for performance reasons, since we want to amortize the cost
|
||||
// of storing the search path as a string. In tests, we want to exercise the
|
||||
@@ -3312,7 +3319,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
root->entry.pointVersion = this->oldestVersion;
|
||||
root->entry.rangeVersion = this->oldestVersion;
|
||||
|
||||
InternalVersionT::zero = this->oldestVersion;
|
||||
InternalVersionT::zero = tls.zero = this->oldestVersion;
|
||||
|
||||
// Intentionally not resetting totalBytes
|
||||
}
|
||||
|
Reference in New Issue
Block a user