Fix issue with thread_local performance

This commit is contained in:
2024-07-17 17:53:12 -07:00
parent b7d54d44e1
commit 640c1ca9dd

View File

@@ -587,9 +587,6 @@ struct Counter : private Metric {
} }
}; };
thread_local double nodes_allocated_accum = 0;
thread_local double nodes_released_accum = 0;
template <class T> struct BoundedFreeListAllocator { template <class T> struct BoundedFreeListAllocator {
static_assert(sizeof(T) >= sizeof(void *)); static_assert(sizeof(T) >= sizeof(void *));
@@ -625,7 +622,6 @@ template <class T> struct BoundedFreeListAllocator {
} }
T *allocate(int partialKeyCapacity) { T *allocate(int partialKeyCapacity) {
++nodes_allocated_accum;
T *result = allocate_helper(partialKeyCapacity); T *result = allocate_helper(partialKeyCapacity);
if constexpr (!std::is_same_v<T, Node0>) { if constexpr (!std::is_same_v<T, Node0>) {
memset(result->children, 0, sizeof(result->children)); memset(result->children, 0, sizeof(result->children));
@@ -642,7 +638,6 @@ template <class T> struct BoundedFreeListAllocator {
} }
void release(T *p) { void release(T *p) {
++nodes_released_accum;
if (freeListBytes >= kFreeListMaxMemory) { if (freeListBytes >= kFreeListMaxMemory) {
removeNode(p); removeNode(p);
return safe_free(p, sizeof(T) + p->partialKeyCapacity); return safe_free(p, sizeof(T) + p->partialKeyCapacity);
@@ -710,7 +705,60 @@ size_t Node::size() const {
} }
} }
struct NodeAllocators { // A type that's plumbed along the check call tree. Lifetime ends after each
// check call.
struct ReadContext {
double point_read_accum = 0;
double prefix_read_accum = 0;
double range_read_accum = 0;
double point_read_short_circuit_accum = 0;
double prefix_read_short_circuit_accum = 0;
double range_read_short_circuit_accum = 0;
double point_read_iterations_accum = 0;
double prefix_read_iterations_accum = 0;
double range_read_iterations_accum = 0;
double range_read_node_scan_accum = 0;
};
// A type that's plumbed along the non-const call tree. Same lifetime as
// ConflictSet::Impl
struct WriteContext {
double entries_erased_accum = 0;
double insert_iterations_accum = 0;
double entries_inserted_accum = 0;
double nodes_allocated_accum = 0;
double nodes_released_accum = 0;
template <class T> T *allocate(int c) {
++nodes_allocated_accum;
if constexpr (std::is_same_v<T, Node0>) {
return node0.allocate(c);
} else if constexpr (std::is_same_v<T, Node3>) {
return node3.allocate(c);
} else if constexpr (std::is_same_v<T, Node16>) {
return node16.allocate(c);
} else if constexpr (std::is_same_v<T, Node48>) {
return node48.allocate(c);
} else if constexpr (std::is_same_v<T, Node256>) {
return node256.allocate(c);
}
}
template <class T> void release(T *c) {
static_assert(!std::is_same_v<T, Node>);
++nodes_released_accum;
if constexpr (std::is_same_v<T, Node0>) {
return node0.release(c);
} else if constexpr (std::is_same_v<T, Node3>) {
return node3.release(c);
} else if constexpr (std::is_same_v<T, Node16>) {
return node16.release(c);
} else if constexpr (std::is_same_v<T, Node48>) {
return node48.release(c);
} else if constexpr (std::is_same_v<T, Node256>) {
return node256.release(c);
}
}
private:
BoundedFreeListAllocator<Node0> node0; BoundedFreeListAllocator<Node0> node0;
BoundedFreeListAllocator<Node3> node3; BoundedFreeListAllocator<Node3> node3;
BoundedFreeListAllocator<Node16> node16; BoundedFreeListAllocator<Node16> node16;
@@ -954,8 +1002,7 @@ int getChildGeq(Node *self, int child) {
// Caller is responsible for assigning a non-null pointer to the returned // Caller is responsible for assigning a non-null pointer to the returned
// reference if null // reference if null
Node *&getOrCreateChild(Node *&self, uint8_t index, Node *&getOrCreateChild(Node *&self, uint8_t index, WriteContext *tls) {
NodeAllocators *allocators) {
// Fast path for if it exists already // Fast path for if it exists already
switch (self->getType()) { switch (self->getType()) {
@@ -996,9 +1043,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
case Type_Node0: { case Type_Node0: {
auto *self0 = static_cast<Node0 *>(self); auto *self0 = static_cast<Node0 *>(self);
auto *newSelf = allocators->node3.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node3>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self0); newSelf->copyChildrenAndKeyFrom(*self0);
allocators->node0.release(self0); tls->release(self0);
self = newSelf; self = newSelf;
goto insert3; goto insert3;
@@ -1006,9 +1053,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
case Type_Node3: { case Type_Node3: {
if (self->numChildren == Node3::kMaxNodes) { if (self->numChildren == Node3::kMaxNodes) {
auto *self3 = static_cast<Node3 *>(self); auto *self3 = static_cast<Node3 *>(self);
auto *newSelf = allocators->node16.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node16>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self3); newSelf->copyChildrenAndKeyFrom(*self3);
allocators->node3.release(self3); tls->release(self3);
self = newSelf; self = newSelf;
goto insert16; goto insert16;
} }
@@ -1038,9 +1085,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
case Type_Node16: { case Type_Node16: {
if (self->numChildren == Node16::kMaxNodes) { if (self->numChildren == Node16::kMaxNodes) {
auto *self16 = static_cast<Node16 *>(self); auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = allocators->node48.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node48>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self16); newSelf->copyChildrenAndKeyFrom(*self16);
allocators->node16.release(self16); tls->release(self16);
self = newSelf; self = newSelf;
goto insert48; goto insert48;
} }
@@ -1116,9 +1163,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
if (self->numChildren == 48) { if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = allocators->node256.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node256>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self48); newSelf->copyChildrenAndKeyFrom(*self48);
allocators->node48.release(self48); tls->release(self48);
self = newSelf; self = newSelf;
goto insert256; goto insert256;
} }
@@ -1172,18 +1219,17 @@ Node *nextLogical(Node *node) {
// Invalidates `self`, replacing it with a node of at least capacity. // Invalidates `self`, replacing it with a node of at least capacity.
// Does not return nodes to freelists when kUseFreeList is false. // Does not return nodes to freelists when kUseFreeList is false.
void freeAndMakeCapacityAtLeast(Node *&self, int capacity, void freeAndMakeCapacityAtLeast(Node *&self, int capacity, WriteContext *tls,
NodeAllocators *allocators,
ConflictSet::Impl *impl, ConflictSet::Impl *impl,
const bool kUseFreeList) { const bool kUseFreeList) {
switch (self->getType()) { switch (self->getType()) {
case Type_Node0: { case Type_Node0: {
auto *self0 = (Node0 *)self; auto *self0 = (Node0 *)self;
auto *newSelf = allocators->node0.allocate(capacity); auto *newSelf = tls->allocate<Node0>(capacity);
newSelf->copyChildrenAndKeyFrom(*self0); newSelf->copyChildrenAndKeyFrom(*self0);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
if (kUseFreeList) { if (kUseFreeList) {
allocators->node0.release(self0); tls->release(self0);
} else { } else {
removeNode(self0); removeNode(self0);
safe_free(self0, self0->size()); safe_free(self0, self0->size());
@@ -1192,11 +1238,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
} break; } break;
case Type_Node3: { case Type_Node3: {
auto *self3 = (Node3 *)self; auto *self3 = (Node3 *)self;
auto *newSelf = allocators->node3.allocate(capacity); auto *newSelf = tls->allocate<Node3>(capacity);
newSelf->copyChildrenAndKeyFrom(*self3); newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
if (kUseFreeList) { if (kUseFreeList) {
allocators->node3.release(self3); tls->release(self3);
} else { } else {
removeNode(self3); removeNode(self3);
safe_free(self3, self3->size()); safe_free(self3, self3->size());
@@ -1205,11 +1251,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
} break; } break;
case Type_Node16: { case Type_Node16: {
auto *self16 = (Node16 *)self; auto *self16 = (Node16 *)self;
auto *newSelf = allocators->node16.allocate(capacity); auto *newSelf = tls->allocate<Node16>(capacity);
newSelf->copyChildrenAndKeyFrom(*self16); newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
if (kUseFreeList) { if (kUseFreeList) {
allocators->node16.release(self16); tls->release(self16);
} else { } else {
removeNode(self16); removeNode(self16);
safe_free(self16, self16->size()); safe_free(self16, self16->size());
@@ -1218,11 +1264,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
} break; } break;
case Type_Node48: { case Type_Node48: {
auto *self48 = (Node48 *)self; auto *self48 = (Node48 *)self;
auto *newSelf = allocators->node48.allocate(capacity); auto *newSelf = tls->allocate<Node48>(capacity);
newSelf->copyChildrenAndKeyFrom(*self48); newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
if (kUseFreeList) { if (kUseFreeList) {
allocators->node48.release(self48); tls->release(self48);
} else { } else {
removeNode(self48); removeNode(self48);
safe_free(self48, self48->size()); safe_free(self48, self48->size());
@@ -1231,11 +1277,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
} break; } break;
case Type_Node256: { case Type_Node256: {
auto *self256 = (Node256 *)self; auto *self256 = (Node256 *)self;
auto *newSelf = allocators->node256.allocate(capacity); auto *newSelf = tls->allocate<Node256>(capacity);
newSelf->copyChildrenAndKeyFrom(*self256); newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
if (kUseFreeList) { if (kUseFreeList) {
allocators->node256.release(self256); tls->release(self256);
} else { } else {
removeNode(self256); removeNode(self256);
safe_free(self256, self256->size()); safe_free(self256, self256->size());
@@ -1250,7 +1296,7 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
// Fix larger-than-desired capacities. Does not return nodes to freelists, // Fix larger-than-desired capacities. Does not return nodes to freelists,
// since that wouldn't actually reclaim the memory used for partial key // since that wouldn't actually reclaim the memory used for partial key
// capacity. // capacity.
void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, void maybeDecreaseCapacity(Node *&self, WriteContext *tls,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
const int maxCapacity = const int maxCapacity =
@@ -1263,7 +1309,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators,
if (self->getCapacity() <= maxCapacity) { if (self->getCapacity() <= maxCapacity) {
return; return;
} }
freeAndMakeCapacityAtLeast(self, maxCapacity, allocators, impl, false); freeAndMakeCapacityAtLeast(self, maxCapacity, tls, impl, false);
} }
void rezero(Node *n, InternalVersionT z) { void rezero(Node *n, InternalVersionT z) {
@@ -1313,8 +1359,8 @@ void rezero(Node *n, InternalVersionT z) {
} }
} }
void maybeDownsize(Node *self, NodeAllocators *allocators, void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
ConflictSet::Impl *impl, Node *&dontInvalidate) { Node *&dontInvalidate) {
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "maybeDownsize: %s\n", getSearchPathPrintable(self).c_str()); fprintf(stderr, "maybeDownsize: %s\n", getSearchPathPrintable(self).c_str());
@@ -1326,17 +1372,17 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
case Type_Node3: { case Type_Node3: {
auto *self3 = (Node3 *)self; auto *self3 = (Node3 *)self;
if (self->numChildren == 0) { if (self->numChildren == 0) {
auto *newSelf = allocators->node0.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node0>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self3); newSelf->copyChildrenAndKeyFrom(*self3);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
allocators->node3.release(self3); tls->release(self3);
} else if (self->numChildren == 1 && !self->entryPresent) { } else if (self->numChildren == 1 && !self->entryPresent) {
auto *child = self3->children[0]; auto *child = self3->children[0];
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen; int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
if (minCapacity > child->getCapacity()) { if (minCapacity > child->getCapacity()) {
const bool update = child == dontInvalidate; const bool update = child == dontInvalidate;
freeAndMakeCapacityAtLeast(child, minCapacity, allocators, impl, true); freeAndMakeCapacityAtLeast(child, minCapacity, tls, impl, true);
if (update) { if (update) {
dontInvalidate = child; dontInvalidate = child;
} }
@@ -1369,34 +1415,34 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
} }
getInTree(self, impl) = child; getInTree(self, impl) = child;
allocators->node3.release(self3); tls->release(self3);
} }
} break; } break;
case Type_Node16: case Type_Node16:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) { if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) {
auto *self16 = (Node16 *)self; auto *self16 = (Node16 *)self;
auto *newSelf = allocators->node3.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node3>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self16); newSelf->copyChildrenAndKeyFrom(*self16);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
allocators->node16.release(self16); tls->release(self16);
} }
break; break;
case Type_Node48: case Type_Node48:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode48) { if (self->numChildren + int(self->entryPresent) < kMinChildrenNode48) {
auto *self48 = (Node48 *)self; auto *self48 = (Node48 *)self;
auto *newSelf = allocators->node16.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node16>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self48); newSelf->copyChildrenAndKeyFrom(*self48);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
allocators->node48.release(self48); tls->release(self48);
} }
break; break;
case Type_Node256: case Type_Node256:
if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) { if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) {
auto *self256 = (Node256 *)self; auto *self256 = (Node256 *)self;
auto *newSelf = allocators->node48.allocate(self->partialKeyLen); auto *newSelf = tls->allocate<Node48>(self->partialKeyLen);
newSelf->copyChildrenAndKeyFrom(*self256); newSelf->copyChildrenAndKeyFrom(*self256);
getInTree(self, impl) = newSelf; getInTree(self, impl) = newSelf;
allocators->node256.release(self256); tls->release(self256);
} }
break; break;
default: // GCOVR_EXCL_LINE default: // GCOVR_EXCL_LINE
@@ -1404,15 +1450,13 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
} }
} }
thread_local double entries_erased_accum;
// Precondition: self is not the root. May invalidate nodes along the search // Precondition: self is not the root. May invalidate nodes along the search
// path to self. May invalidate children of self->parent. Returns a pointer to // path to self. May invalidate children of self->parent. Returns a pointer to
// the node after self. If erase invalidates the pointee of `dontInvalidate`, it // the node after self. If erase invalidates the pointee of `dontInvalidate`, it
// will update it to its new pointee as well. Precondition: `self->entryPresent` // will update it to its new pointee as well. Precondition: `self->entryPresent`
Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
bool logical, Node *&dontInvalidate) { bool logical, Node *&dontInvalidate) {
++entries_erased_accum; ++tls->entries_erased_accum;
assert(self->parent != nullptr); assert(self->parent != nullptr);
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
@@ -1430,7 +1474,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
if (self->numChildren != 0) { if (self->numChildren != 0) {
const bool update = result == dontInvalidate; const bool update = result == dontInvalidate;
maybeDownsize(self, allocators, impl, result); maybeDownsize(self, tls, impl, result);
if (update) { if (update) {
dontInvalidate = result; dontInvalidate = result;
} }
@@ -1438,7 +1482,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
} }
assert(self->getType() == Type_Node0); assert(self->getType() == Type_Node0);
allocators->node0.release((Node0 *)self); tls->release((Node0 *)self);
switch (parent->getType()) { switch (parent->getType()) {
case Type_Node0: // GCOVR_EXCL_LINE case Type_Node0: // GCOVR_EXCL_LINE
@@ -1526,7 +1570,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
} }
const bool update = result == dontInvalidate; const bool update = result == dontInvalidate;
maybeDownsize(parent, allocators, impl, result); maybeDownsize(parent, tls, impl, result);
if (update) { if (update) {
dontInvalidate = result; dontInvalidate = result;
} }
@@ -1721,30 +1765,20 @@ struct SearchStepWise {
} }
}; };
thread_local double point_read_accum = 0;
thread_local double prefix_read_accum = 0;
thread_local double range_read_accum = 0;
thread_local double point_read_short_circuit_accum = 0;
thread_local double prefix_read_short_circuit_accum = 0;
thread_local double range_read_short_circuit_accum = 0;
thread_local double point_read_iterations_accum = 0;
thread_local double prefix_read_iterations_accum = 0;
thread_local double range_read_iterations_accum = 0;
thread_local double range_read_node_scan_accum = 0;
// Logically this is the same as performing firstGeq and then checking against // Logically this is the same as performing firstGeq and then checking against
// point or range version according to cmp, but this version short circuits as // point or range version according to cmp, but this version short circuits as
// soon as it can prove that there's no conflict. // soon as it can prove that there's no conflict.
bool checkPointRead(Node *n, const std::span<const uint8_t> key, bool checkPointRead(Node *n, const std::span<const uint8_t> key,
InternalVersionT readVersion, ConflictSet::Impl *impl) { InternalVersionT readVersion, ConflictSet::Impl *impl,
++point_read_accum; ReadContext *tls) {
++tls->point_read_accum;
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check point read: %s\n", printable(key).c_str()); fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
#endif #endif
auto remaining = key; auto remaining = key;
for (;; ++point_read_iterations_accum) { for (;; ++tls->point_read_iterations_accum) {
if (maxVersion(n, impl) <= readVersion) { if (maxVersion(n, impl) <= readVersion) {
++point_read_short_circuit_accum; ++tls->point_read_short_circuit_accum;
return true; return true;
} }
if (remaining.size() == 0) { if (remaining.size() == 0) {
@@ -1815,20 +1849,21 @@ downLeftSpine:
// max version or range version if this prefix doesn't exist, but this version // max version or range version if this prefix doesn't exist, but this version
// short circuits as soon as it can prove that there's no conflict. // short circuits as soon as it can prove that there's no conflict.
bool checkPrefixRead(Node *n, const std::span<const uint8_t> key, bool checkPrefixRead(Node *n, const std::span<const uint8_t> key,
InternalVersionT readVersion, ConflictSet::Impl *impl) { InternalVersionT readVersion, ConflictSet::Impl *impl,
++prefix_read_accum; ReadContext *tls) {
++tls->prefix_read_accum;
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check prefix read: %s\n", printable(key).c_str()); fprintf(stderr, "Check prefix read: %s\n", printable(key).c_str());
#endif #endif
auto remaining = key; auto remaining = key;
for (;; ++prefix_read_iterations_accum) { for (;; ++tls->prefix_read_iterations_accum) {
auto m = maxVersion(n, impl); auto m = maxVersion(n, impl);
if (remaining.size() == 0) { if (remaining.size() == 0) {
return m <= readVersion; return m <= readVersion;
} }
if (m <= readVersion) { if (m <= readVersion) {
++prefix_read_short_circuit_accum; ++tls->prefix_read_short_circuit_accum;
return true; return true;
} }
@@ -2058,8 +2093,8 @@ scan16(const InternalVersionT *vs, int begin, int end,
// account for the range version of firstGt(searchpath(n) + [end - 1]) // account for the range version of firstGt(searchpath(n) + [end - 1])
template <bool kAVX512> template <bool kAVX512>
bool checkMaxBetweenExclusive(Node *n, int begin, int end, bool checkMaxBetweenExclusive(Node *n, int begin, int end,
InternalVersionT readVersion) { InternalVersionT readVersion, ReadContext *tls) {
++range_read_node_scan_accum; ++tls->range_read_node_scan_accum;
assume(-1 <= begin); assume(-1 <= begin);
assume(begin <= 256); assume(begin <= 256);
assume(-1 <= end); assume(-1 <= end);
@@ -2328,13 +2363,13 @@ Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
template <bool kAVX512> template <bool kAVX512>
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin, bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
int end, InternalVersionT readVersion, int end, InternalVersionT readVersion,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl, ReadContext *tls) {
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "%s(%02x,%02x)*\n", printable(key).c_str(), begin, end); fprintf(stderr, "%s(%02x,%02x)*\n", printable(key).c_str(), begin, end);
#endif #endif
auto remaining = key; auto remaining = key;
if (remaining.size() == 0) { if (remaining.size() == 0) {
return checkMaxBetweenExclusive<kAVX512>(n, begin, end, readVersion); return checkMaxBetweenExclusive<kAVX512>(n, begin, end, readVersion, tls);
} }
auto *child = getChild(n, remaining[0]); auto *child = getChild(n, remaining[0]);
@@ -2400,9 +2435,10 @@ namespace {
// that are >= key is <= readVersion // that are >= key is <= readVersion
template <bool kAVX512> struct CheckRangeLeftSide { template <bool kAVX512> struct CheckRangeLeftSide {
CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen, CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
InternalVersionT readVersion, ConflictSet::Impl *impl) InternalVersionT readVersion, ConflictSet::Impl *impl,
ReadContext *tls)
: n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion), : n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion),
impl(impl) { impl(impl), tls(tls) {
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check range left side from %s for keys starting with %s\n", fprintf(stderr, "Check range left side from %s for keys starting with %s\n",
printable(key).c_str(), printable(key).c_str(),
@@ -2415,6 +2451,7 @@ template <bool kAVX512> struct CheckRangeLeftSide {
int prefixLen; int prefixLen;
InternalVersionT readVersion; InternalVersionT readVersion;
ConflictSet::Impl *impl; ConflictSet::Impl *impl;
ReadContext *tls;
int searchPathLen = 0; int searchPathLen = 0;
bool ok; bool ok;
@@ -2430,8 +2467,8 @@ template <bool kAVX512> struct CheckRangeLeftSide {
} }
if (searchPathLen >= prefixLen) { if (searchPathLen >= prefixLen) {
if (!checkMaxBetweenExclusive<kAVX512>(n, remaining[0], 256, if (!checkMaxBetweenExclusive<kAVX512>(n, remaining[0], 256, readVersion,
readVersion)) { tls)) {
ok = false; ok = false;
return true; return true;
} }
@@ -2520,9 +2557,10 @@ template <bool kAVX512> struct CheckRangeLeftSide {
// that are < key is <= readVersion // that are < key is <= readVersion
template <bool kAVX512> struct CheckRangeRightSide { template <bool kAVX512> struct CheckRangeRightSide {
CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen, CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen,
InternalVersionT readVersion, ConflictSet::Impl *impl) InternalVersionT readVersion, ConflictSet::Impl *impl,
ReadContext *tls)
: n(n), key(key), remaining(key), prefixLen(prefixLen), : n(n), key(key), remaining(key), prefixLen(prefixLen),
readVersion(readVersion), impl(impl) { readVersion(readVersion), impl(impl), tls(tls) {
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check range right side to %s for keys starting with %s\n", fprintf(stderr, "Check range right side to %s for keys starting with %s\n",
printable(key).c_str(), printable(key).c_str(),
@@ -2536,6 +2574,7 @@ template <bool kAVX512> struct CheckRangeRightSide {
int prefixLen; int prefixLen;
InternalVersionT readVersion; InternalVersionT readVersion;
ConflictSet::Impl *impl; ConflictSet::Impl *impl;
ReadContext *tls;
int searchPathLen = 0; int searchPathLen = 0;
bool ok; bool ok;
@@ -2560,8 +2599,8 @@ template <bool kAVX512> struct CheckRangeRightSide {
return true; return true;
} }
if (!checkMaxBetweenExclusive<kAVX512>(n, -1, remaining[0], if (!checkMaxBetweenExclusive<kAVX512>(n, -1, remaining[0], readVersion,
readVersion)) { tls)) {
ok = false; ok = false;
return true; return true;
} }
@@ -2656,28 +2695,29 @@ template <bool kAVX512> struct CheckRangeRightSide {
template <bool kAVX512> template <bool kAVX512>
bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin, bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin,
std::span<const uint8_t> end, std::span<const uint8_t> end,
InternalVersionT readVersion, ConflictSet::Impl *impl) { InternalVersionT readVersion, ConflictSet::Impl *impl,
ReadContext *tls) {
int lcp = longestCommonPrefix(begin.data(), end.data(), int lcp = longestCommonPrefix(begin.data(), end.data(),
std::min(begin.size(), end.size())); std::min(begin.size(), end.size()));
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 && if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
end.back() == 0) { end.back() == 0) {
return checkPointRead(n, begin, readVersion, impl); return checkPointRead(n, begin, readVersion, impl, tls);
} }
if (lcp == int(begin.size() - 1) && end.size() == begin.size() && if (lcp == int(begin.size() - 1) && end.size() == begin.size() &&
int(begin.back()) + 1 == int(end.back())) { int(begin.back()) + 1 == int(end.back())) {
return checkPrefixRead(n, begin, readVersion, impl); return checkPrefixRead(n, begin, readVersion, impl, tls);
} }
++range_read_accum; ++tls->range_read_accum;
SearchStepWise search{n, begin.subspan(0, lcp)}; SearchStepWise search{n, begin.subspan(0, lcp)};
Arena arena; Arena arena;
for (;; ++range_read_iterations_accum) { for (;; ++tls->range_read_iterations_accum) {
assert(getSearchPath(arena, search.n) <=> assert(getSearchPath(arena, search.n) <=>
begin.subspan(0, lcp - search.remaining.size()) == begin.subspan(0, lcp - search.remaining.size()) ==
0); 0);
if (maxVersion(search.n, impl) <= readVersion) { if (maxVersion(search.n, impl) <= readVersion) {
++range_read_short_circuit_accum; ++tls->range_read_short_circuit_accum;
return true; return true;
} }
if (search.step()) { if (search.step()) {
@@ -2697,41 +2737,41 @@ bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin,
lcp -= consumed; lcp -= consumed;
if (lcp == int(begin.size())) { if (lcp == int(begin.size())) {
CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp, readVersion, CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp,
impl}; readVersion, impl, tls};
while (!checkRangeRightSide.step()) while (!checkRangeRightSide.step())
; ;
return checkRangeRightSide.ok; return checkRangeRightSide.ok;
} }
if (!checkRangeStartsWith<kAVX512>(n, begin.subspan(0, lcp), begin[lcp], if (!checkRangeStartsWith<kAVX512>(n, begin.subspan(0, lcp), begin[lcp],
end[lcp], readVersion, impl)) { end[lcp], readVersion, impl, tls)) {
return false; return false;
} }
CheckRangeLeftSide<kAVX512> checkRangeLeftSide{n, begin, lcp + 1, readVersion, CheckRangeLeftSide<kAVX512> checkRangeLeftSide{n, begin, lcp + 1,
impl}; readVersion, impl, tls};
CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp + 1, readVersion, CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp + 1,
impl}; readVersion, impl, tls};
for (;;) { for (;;) {
bool leftDone = checkRangeLeftSide.step(); bool leftDone = checkRangeLeftSide.step();
bool rightDone = checkRangeRightSide.step(); bool rightDone = checkRangeRightSide.step();
if (!leftDone && !rightDone) { if (!leftDone && !rightDone) {
range_read_iterations_accum += 2; tls->range_read_iterations_accum += 2;
continue; continue;
} }
if (leftDone && rightDone) { if (leftDone && rightDone) {
break; break;
} else if (leftDone) { } else if (leftDone) {
while (!checkRangeRightSide.step()) { while (!checkRangeRightSide.step()) {
++range_read_iterations_accum; ++tls->range_read_iterations_accum;
} }
break; break;
} else { } else {
assert(rightDone); assert(rightDone);
while (!checkRangeLeftSide.step()) { while (!checkRangeLeftSide.step()) {
++range_read_iterations_accum; ++tls->range_read_iterations_accum;
} }
} }
break; break;
@@ -2770,8 +2810,8 @@ checkRangeReadImpl<true>(Node *n, std::span<const uint8_t> begin,
#if defined(__SANITIZE_THREAD__) || !defined(__x86_64__) #if defined(__SANITIZE_THREAD__) || !defined(__x86_64__)
bool checkRangeRead(Node *n, std::span<const uint8_t> begin, bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
std::span<const uint8_t> end, InternalVersionT readVersion, std::span<const uint8_t> end, InternalVersionT readVersion,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl, ReadContext *tls) {
return checkRangeReadImpl<false>(n, begin, end, readVersion, impl); return checkRangeReadImpl<false>(n, begin, end, readVersion, impl, tls);
} }
#else #else
__attribute__((target("default"))) bool __attribute__((target("default"))) bool
@@ -2788,18 +2828,16 @@ checkRangeRead(Node *n, std::span<const uint8_t> begin,
} }
#endif #endif
thread_local double insert_iterations_accum;
// Returns a pointer to the newly inserted node. Caller must set // Returns a pointer to the newly inserted node. Caller must set
// `entryPresent`, `entry` fields and `maxVersion` on the result. The search // `entryPresent`, `entry` fields and `maxVersion` on the result. The search
// path of the result's parent will have `maxVersion` at least `writeVersion` as // path of the result's parent will have `maxVersion` at least `writeVersion` as
// a postcondition. Nodes along the search path to `key` may be invalidated. // a postcondition. Nodes along the search path to `key` may be invalidated.
template <bool kBegin> template <bool kBegin>
[[nodiscard]] Node * [[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key,
insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion, InternalVersionT writeVersion, WriteContext *tls,
NodeAllocators *allocators, ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
for (;; ++insert_iterations_accum) { for (;; ++tls->insert_iterations_accum) {
if ((*self)->partialKeyLen > 0) { if ((*self)->partialKeyLen > 0) {
// Handle an existing partial key // Handle an existing partial key
@@ -2811,7 +2849,7 @@ insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
InternalVersionT oldMaxVersion = maxVersion(old, impl); InternalVersionT oldMaxVersion = maxVersion(old, impl);
// *self will have one child // *self will have one child
*self = allocators->node3.allocate(partialKeyIndex); *self = tls->allocate<Node3>(partialKeyIndex);
memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin, memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
kNodeCopySize); kNodeCopySize);
@@ -2824,8 +2862,7 @@ insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
memcpy((*self)->partialKey(), old->partialKey(), memcpy((*self)->partialKey(), old->partialKey(),
(*self)->partialKeyLen); (*self)->partialKeyLen);
getOrCreateChild(*self, old->partialKey()[partialKeyIndex], getOrCreateChild(*self, old->partialKey()[partialKeyIndex], tls) = old;
allocators) = old;
old->parent = *self; old->parent = *self;
old->parentsIndex = old->partialKey()[partialKeyIndex]; old->parentsIndex = old->partialKey()[partialKeyIndex];
setMaxVersion(old, impl, oldMaxVersion); setMaxVersion(old, impl, oldMaxVersion);
@@ -2865,9 +2902,9 @@ insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
setMaxVersion(*self, impl, writeVersion); setMaxVersion(*self, impl, writeVersion);
} }
auto &child = getOrCreateChild(*self, key.front(), allocators); auto &child = getOrCreateChild(*self, key.front(), tls);
if (!child) { if (!child) {
child = allocators->node0.allocate(key.size() - 1); child = tls->allocate<Node0>(key.size() - 1);
child->numChildren = 0; child->numChildren = 0;
child->entryPresent = false; child->entryPresent = false;
child->partialKeyLen = 0; child->partialKeyLen = 0;
@@ -2908,14 +2945,12 @@ void destroyTree(Node *root) {
} }
} }
thread_local double entries_inserted_accum;
void addPointWrite(Node *&root, std::span<const uint8_t> key, void addPointWrite(Node *&root, std::span<const uint8_t> key,
InternalVersionT writeVersion, NodeAllocators *allocators, InternalVersionT writeVersion, WriteContext *tls,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
auto *n = insert<true>(&root, key, writeVersion, allocators, impl); auto *n = insert<true>(&root, key, writeVersion, tls, impl);
if (!n->entryPresent) { if (!n->entryPresent) {
++entries_inserted_accum; ++tls->entries_inserted_accum;
auto *p = nextLogical(n); auto *p = nextLogical(n);
addKey(n); addKey(n);
@@ -2934,13 +2969,13 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
void addWriteRange(Node *&root, std::span<const uint8_t> begin, void addWriteRange(Node *&root, std::span<const uint8_t> begin,
std::span<const uint8_t> end, InternalVersionT writeVersion, std::span<const uint8_t> end, InternalVersionT writeVersion,
NodeAllocators *allocators, ConflictSet::Impl *impl) { WriteContext *tls, ConflictSet::Impl *impl) {
int lcp = longestCommonPrefix(begin.data(), end.data(), int lcp = longestCommonPrefix(begin.data(), end.data(),
std::min(begin.size(), end.size())); std::min(begin.size(), end.size()));
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 && if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
end.back() == 0) { end.back() == 0) {
return addPointWrite(root, begin, writeVersion, allocators, impl); return addPointWrite(root, begin, writeVersion, tls, impl);
} }
const bool beginIsPrefix = lcp == int(begin.size()); const bool beginIsPrefix = lcp == int(begin.size());
auto remaining = begin.subspan(0, lcp); auto remaining = begin.subspan(0, lcp);
@@ -2977,8 +3012,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
begin = begin.subspan(consumed, begin.size() - consumed); begin = begin.subspan(consumed, begin.size() - consumed);
end = end.subspan(consumed, end.size() - consumed); end = end.subspan(consumed, end.size() - consumed);
auto *beginNode = auto *beginNode = insert<true>(useAsRoot, begin, writeVersion, tls, impl);
insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
const bool insertedBegin = !beginNode->entryPresent; const bool insertedBegin = !beginNode->entryPresent;
@@ -2986,7 +3020,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
beginNode->entryPresent = true; beginNode->entryPresent = true;
if (insertedBegin) { if (insertedBegin) {
++entries_inserted_accum; ++tls->entries_inserted_accum;
auto *p = nextLogical(beginNode); auto *p = nextLogical(beginNode);
beginNode->entry.rangeVersion = beginNode->entry.rangeVersion =
p == nullptr ? InternalVersionT::zero p == nullptr ? InternalVersionT::zero
@@ -2999,7 +3033,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
assert(writeVersion >= beginNode->entry.pointVersion); assert(writeVersion >= beginNode->entry.pointVersion);
beginNode->entry.pointVersion = writeVersion; beginNode->entry.pointVersion = writeVersion;
auto *endNode = insert<false>(useAsRoot, end, writeVersion, allocators, impl); auto *endNode = insert<false>(useAsRoot, end, writeVersion, tls, impl);
const bool insertedEnd = !endNode->entryPresent; const bool insertedEnd = !endNode->entryPresent;
@@ -3007,7 +3041,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
endNode->entryPresent = true; endNode->entryPresent = true;
if (insertedEnd) { if (insertedEnd) {
++entries_inserted_accum; ++tls->entries_inserted_accum;
auto *p = nextLogical(endNode); auto *p = nextLogical(endNode);
endNode->entry.pointVersion = endNode->entry.pointVersion =
p == nullptr ? InternalVersionT::zero p == nullptr ? InternalVersionT::zero
@@ -3021,13 +3055,12 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
if (beginIsPrefix && insertedEnd) { if (beginIsPrefix && insertedEnd) {
// beginNode may have been invalidated when inserting end. TODO can we do // beginNode may have been invalidated when inserting end. TODO can we do
// better? // better?
beginNode = insert<true>(useAsRoot, begin, writeVersion, allocators, impl); beginNode = insert<true>(useAsRoot, begin, writeVersion, tls, impl);
assert(beginNode->entryPresent); assert(beginNode->entryPresent);
} }
for (beginNode = nextLogical(beginNode); beginNode != endNode; for (beginNode = nextLogical(beginNode); beginNode != endNode;
beginNode = beginNode = erase(beginNode, tls, impl, /*logical*/ true, endNode)) {
erase(beginNode, allocators, impl, /*logical*/ true, endNode)) {
} }
} }
@@ -3093,6 +3126,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
timespec ts_begin; timespec ts_begin;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_begin); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_begin);
#endif #endif
ReadContext tls;
int commits_accum = 0; int commits_accum = 0;
int conflicts_accum = 0; int conflicts_accum = 0;
int too_olds_accum = 0; int too_olds_accum = 0;
@@ -3108,32 +3142,34 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
reads[i].readVersion < oldestVersionFullPrecision ? TooOld reads[i].readVersion < oldestVersionFullPrecision ? TooOld
: (end.size() > 0 : (end.size() > 0
? checkRangeRead(root, begin, end, ? checkRangeRead(root, begin, end,
InternalVersionT(reads[i].readVersion), this) InternalVersionT(reads[i].readVersion), this,
&tls)
: checkPointRead(root, begin, : checkPointRead(root, begin,
InternalVersionT(reads[i].readVersion), this)) InternalVersionT(reads[i].readVersion), this,
&tls))
? Commit ? Commit
: Conflict; : Conflict;
commits_accum += result[i] == Commit; commits_accum += result[i] == Commit;
conflicts_accum += result[i] == Conflict; conflicts_accum += result[i] == Conflict;
too_olds_accum += result[i] == TooOld; too_olds_accum += result[i] == TooOld;
} }
point_read_total.add(std::exchange(point_read_accum, 0)); point_read_total.add(std::exchange(tls.point_read_accum, 0));
prefix_read_total.add(std::exchange(prefix_read_accum, 0)); prefix_read_total.add(std::exchange(tls.prefix_read_accum, 0));
range_read_total.add(std::exchange(range_read_accum, 0)); range_read_total.add(std::exchange(tls.range_read_accum, 0));
range_read_node_scan_total.add( range_read_node_scan_total.add(
std::exchange(range_read_node_scan_accum, 0)); std::exchange(tls.range_read_node_scan_accum, 0));
point_read_short_circuit_total.add( point_read_short_circuit_total.add(
std::exchange(point_read_short_circuit_accum, 0)); std::exchange(tls.point_read_short_circuit_accum, 0));
prefix_read_short_circuit_total.add( prefix_read_short_circuit_total.add(
std::exchange(prefix_read_short_circuit_accum, 0)); std::exchange(tls.prefix_read_short_circuit_accum, 0));
range_read_short_circuit_total.add( range_read_short_circuit_total.add(
std::exchange(range_read_short_circuit_accum, 0)); std::exchange(tls.range_read_short_circuit_accum, 0));
point_read_iterations_total.add( point_read_iterations_total.add(
std::exchange(point_read_iterations_accum, 0)); std::exchange(tls.point_read_iterations_accum, 0));
prefix_read_iterations_total.add( prefix_read_iterations_total.add(
std::exchange(prefix_read_iterations_accum, 0)); std::exchange(tls.prefix_read_iterations_accum, 0));
range_read_iterations_total.add( range_read_iterations_total.add(
std::exchange(range_read_iterations_accum, 0)); std::exchange(tls.range_read_iterations_accum, 0));
commits_total.add(commits_accum); commits_total.add(commits_accum);
conflicts_total.add(conflicts_accum); conflicts_total.add(conflicts_accum);
too_olds_total.add(too_olds_accum); too_olds_total.add(too_olds_accum);
@@ -3173,21 +3209,20 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
auto end = std::span<const uint8_t>(w.end.p, w.end.len); auto end = std::span<const uint8_t>(w.end.p, w.end.len);
if (w.end.len > 0) { if (w.end.len > 0) {
keyUpdates += 3; keyUpdates += 3;
addWriteRange(root, begin, end, InternalVersionT(writeVersion), addWriteRange(root, begin, end, InternalVersionT(writeVersion), &tls,
&allocators, this); this);
} else { } else {
keyUpdates += 2; keyUpdates += 2;
addPointWrite(root, begin, InternalVersionT(writeVersion), &allocators, addPointWrite(root, begin, InternalVersionT(writeVersion), &tls, this);
this);
} }
} }
memory_bytes.set(totalBytes); memory_bytes.set(totalBytes);
nodes_allocated_total.add(std::exchange(nodes_allocated_accum, 0)); nodes_allocated_total.add(std::exchange(tls.nodes_allocated_accum, 0));
nodes_released_total.add(std::exchange(nodes_released_accum, 0)); nodes_released_total.add(std::exchange(tls.nodes_released_accum, 0));
entries_inserted_total.add(std::exchange(entries_inserted_accum, 0)); entries_inserted_total.add(std::exchange(tls.entries_inserted_accum, 0));
entries_erased_total.add(std::exchange(entries_erased_accum, 0)); entries_erased_total.add(std::exchange(tls.entries_erased_accum, 0));
insert_iterations_total.add(std::exchange(insert_iterations_accum, 0)); insert_iterations_total.add(std::exchange(tls.insert_iterations_accum, 0));
write_bytes_total.add(write_byte_accum); write_bytes_total.add(write_byte_accum);
} }
@@ -3219,9 +3254,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
// node is greater than the point version of the left node // node is greater than the point version of the left node
assert(n->entry.rangeVersion <= oldestVersion); assert(n->entry.rangeVersion <= oldestVersion);
Node *dummy = nullptr; Node *dummy = nullptr;
n = erase(n, &allocators, this, /*logical*/ false, dummy); n = erase(n, &tls, this, /*logical*/ false, dummy);
} else { } else {
maybeDecreaseCapacity(n, &allocators, this); maybeDecreaseCapacity(n, &tls, this);
n = nextPhysical(n); n = nextPhysical(n);
} }
} }
@@ -3262,10 +3297,10 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
keyUpdates = gcScanStep(keyUpdates); keyUpdates = gcScanStep(keyUpdates);
memory_bytes.set(totalBytes); memory_bytes.set(totalBytes);
nodes_allocated_total.add(std::exchange(nodes_allocated_accum, 0)); nodes_allocated_total.add(std::exchange(tls.nodes_allocated_accum, 0));
nodes_released_total.add(std::exchange(nodes_released_accum, 0)); nodes_released_total.add(std::exchange(tls.nodes_released_accum, 0));
entries_inserted_total.add(std::exchange(entries_inserted_accum, 0)); entries_inserted_total.add(std::exchange(tls.entries_inserted_accum, 0));
entries_erased_total.add(std::exchange(entries_erased_accum, 0)); entries_erased_total.add(std::exchange(tls.entries_erased_accum, 0));
oldest_version.set(oldestVersionFullPrecision); oldest_version.set(oldestVersionFullPrecision);
} }
@@ -3278,15 +3313,15 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
oldest_version.set(oldestVersionFullPrecision); oldest_version.set(oldestVersionFullPrecision);
newest_version.set(newestVersionFullPrecision); newest_version.set(newestVersionFullPrecision);
allocators.~NodeAllocators(); tls.~WriteContext();
new (&allocators) NodeAllocators(); new (&tls) WriteContext();
removalKeyArena = Arena{}; removalKeyArena = Arena{};
removalKey = {}; removalKey = {};
keyUpdates = 10; keyUpdates = 10;
// Insert "" // Insert ""
root = allocators.node0.allocate(0); root = tls.allocate<Node0>(0);
root->numChildren = 0; root->numChildren = 0;
root->parent = nullptr; root->parent = nullptr;
rootMaxVersion = this->oldestVersion; rootMaxVersion = this->oldestVersion;
@@ -3313,7 +3348,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
safe_free(metrics, metricsCount * sizeof(metrics[0])); safe_free(metrics, metricsCount * sizeof(metrics[0]));
} }
NodeAllocators allocators; WriteContext tls;
Arena removalKeyArena; Arena removalKeyArena;
std::span<const uint8_t> removalKey; std::span<const uint8_t> removalKey;