diff --git a/ConflictSet.cpp b/ConflictSet.cpp index ba6417c..44c40f7 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -587,9 +587,6 @@ struct Counter : private Metric { } }; -thread_local double nodes_allocated_accum = 0; -thread_local double nodes_released_accum = 0; - template struct BoundedFreeListAllocator { static_assert(sizeof(T) >= sizeof(void *)); @@ -625,7 +622,6 @@ template struct BoundedFreeListAllocator { } T *allocate(int partialKeyCapacity) { - ++nodes_allocated_accum; T *result = allocate_helper(partialKeyCapacity); if constexpr (!std::is_same_v) { memset(result->children, 0, sizeof(result->children)); @@ -642,7 +638,6 @@ template struct BoundedFreeListAllocator { } void release(T *p) { - ++nodes_released_accum; if (freeListBytes >= kFreeListMaxMemory) { removeNode(p); return safe_free(p, sizeof(T) + p->partialKeyCapacity); @@ -710,7 +705,60 @@ size_t Node::size() const { } } -struct NodeAllocators { +// A type that's plumbed along the check call tree. Lifetime ends after each +// check call. +struct ReadContext { + double point_read_accum = 0; + double prefix_read_accum = 0; + double range_read_accum = 0; + double point_read_short_circuit_accum = 0; + double prefix_read_short_circuit_accum = 0; + double range_read_short_circuit_accum = 0; + double point_read_iterations_accum = 0; + double prefix_read_iterations_accum = 0; + double range_read_iterations_accum = 0; + double range_read_node_scan_accum = 0; +}; + +// A type that's plumbed along the non-const call tree. Same lifetime as +// ConflictSet::Impl +struct WriteContext { + double entries_erased_accum = 0; + double insert_iterations_accum = 0; + double entries_inserted_accum = 0; + double nodes_allocated_accum = 0; + double nodes_released_accum = 0; + template T *allocate(int c) { + ++nodes_allocated_accum; + if constexpr (std::is_same_v) { + return node0.allocate(c); + } else if constexpr (std::is_same_v) { + return node3.allocate(c); + } else if constexpr (std::is_same_v) { + return node16.allocate(c); + } else if constexpr (std::is_same_v) { + return node48.allocate(c); + } else if constexpr (std::is_same_v) { + return node256.allocate(c); + } + } + template void release(T *c) { + static_assert(!std::is_same_v); + ++nodes_released_accum; + if constexpr (std::is_same_v) { + return node0.release(c); + } else if constexpr (std::is_same_v) { + return node3.release(c); + } else if constexpr (std::is_same_v) { + return node16.release(c); + } else if constexpr (std::is_same_v) { + return node48.release(c); + } else if constexpr (std::is_same_v) { + return node256.release(c); + } + } + +private: BoundedFreeListAllocator node0; BoundedFreeListAllocator node3; BoundedFreeListAllocator node16; @@ -954,8 +1002,7 @@ int getChildGeq(Node *self, int child) { // Caller is responsible for assigning a non-null pointer to the returned // reference if null -Node *&getOrCreateChild(Node *&self, uint8_t index, - NodeAllocators *allocators) { +Node *&getOrCreateChild(Node *&self, uint8_t index, WriteContext *tls) { // Fast path for if it exists already switch (self->getType()) { @@ -996,9 +1043,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, case Type_Node0: { auto *self0 = static_cast(self); - auto *newSelf = allocators->node3.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self0); - allocators->node0.release(self0); + tls->release(self0); self = newSelf; goto insert3; @@ -1006,9 +1053,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, case Type_Node3: { if (self->numChildren == Node3::kMaxNodes) { auto *self3 = static_cast(self); - auto *newSelf = allocators->node16.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self3); - allocators->node3.release(self3); + tls->release(self3); self = newSelf; goto insert16; } @@ -1038,9 +1085,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, case Type_Node16: { if (self->numChildren == Node16::kMaxNodes) { auto *self16 = static_cast(self); - auto *newSelf = allocators->node48.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self16); - allocators->node16.release(self16); + tls->release(self16); self = newSelf; goto insert48; } @@ -1116,9 +1163,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, if (self->numChildren == 48) { auto *self48 = static_cast(self); - auto *newSelf = allocators->node256.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self48); - allocators->node48.release(self48); + tls->release(self48); self = newSelf; goto insert256; } @@ -1172,18 +1219,17 @@ Node *nextLogical(Node *node) { // Invalidates `self`, replacing it with a node of at least capacity. // Does not return nodes to freelists when kUseFreeList is false. -void freeAndMakeCapacityAtLeast(Node *&self, int capacity, - NodeAllocators *allocators, +void freeAndMakeCapacityAtLeast(Node *&self, int capacity, WriteContext *tls, ConflictSet::Impl *impl, const bool kUseFreeList) { switch (self->getType()) { case Type_Node0: { auto *self0 = (Node0 *)self; - auto *newSelf = allocators->node0.allocate(capacity); + auto *newSelf = tls->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self0); getInTree(self, impl) = newSelf; if (kUseFreeList) { - allocators->node0.release(self0); + tls->release(self0); } else { removeNode(self0); safe_free(self0, self0->size()); @@ -1192,11 +1238,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, } break; case Type_Node3: { auto *self3 = (Node3 *)self; - auto *newSelf = allocators->node3.allocate(capacity); + auto *newSelf = tls->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self3); getInTree(self, impl) = newSelf; if (kUseFreeList) { - allocators->node3.release(self3); + tls->release(self3); } else { removeNode(self3); safe_free(self3, self3->size()); @@ -1205,11 +1251,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, } break; case Type_Node16: { auto *self16 = (Node16 *)self; - auto *newSelf = allocators->node16.allocate(capacity); + auto *newSelf = tls->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self16); getInTree(self, impl) = newSelf; if (kUseFreeList) { - allocators->node16.release(self16); + tls->release(self16); } else { removeNode(self16); safe_free(self16, self16->size()); @@ -1218,11 +1264,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, } break; case Type_Node48: { auto *self48 = (Node48 *)self; - auto *newSelf = allocators->node48.allocate(capacity); + auto *newSelf = tls->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self48); getInTree(self, impl) = newSelf; if (kUseFreeList) { - allocators->node48.release(self48); + tls->release(self48); } else { removeNode(self48); safe_free(self48, self48->size()); @@ -1231,11 +1277,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, } break; case Type_Node256: { auto *self256 = (Node256 *)self; - auto *newSelf = allocators->node256.allocate(capacity); + auto *newSelf = tls->allocate(capacity); newSelf->copyChildrenAndKeyFrom(*self256); getInTree(self, impl) = newSelf; if (kUseFreeList) { - allocators->node256.release(self256); + tls->release(self256); } else { removeNode(self256); safe_free(self256, self256->size()); @@ -1250,7 +1296,7 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, // Fix larger-than-desired capacities. Does not return nodes to freelists, // since that wouldn't actually reclaim the memory used for partial key // capacity. -void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, +void maybeDecreaseCapacity(Node *&self, WriteContext *tls, ConflictSet::Impl *impl) { const int maxCapacity = @@ -1263,7 +1309,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators, if (self->getCapacity() <= maxCapacity) { return; } - freeAndMakeCapacityAtLeast(self, maxCapacity, allocators, impl, false); + freeAndMakeCapacityAtLeast(self, maxCapacity, tls, impl, false); } void rezero(Node *n, InternalVersionT z) { @@ -1313,8 +1359,8 @@ void rezero(Node *n, InternalVersionT z) { } } -void maybeDownsize(Node *self, NodeAllocators *allocators, - ConflictSet::Impl *impl, Node *&dontInvalidate) { +void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl, + Node *&dontInvalidate) { #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "maybeDownsize: %s\n", getSearchPathPrintable(self).c_str()); @@ -1326,17 +1372,17 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, case Type_Node3: { auto *self3 = (Node3 *)self; if (self->numChildren == 0) { - auto *newSelf = allocators->node0.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self3); getInTree(self, impl) = newSelf; - allocators->node3.release(self3); + tls->release(self3); } else if (self->numChildren == 1 && !self->entryPresent) { auto *child = self3->children[0]; int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen; if (minCapacity > child->getCapacity()) { const bool update = child == dontInvalidate; - freeAndMakeCapacityAtLeast(child, minCapacity, allocators, impl, true); + freeAndMakeCapacityAtLeast(child, minCapacity, tls, impl, true); if (update) { dontInvalidate = child; } @@ -1369,34 +1415,34 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, } getInTree(self, impl) = child; - allocators->node3.release(self3); + tls->release(self3); } } break; case Type_Node16: if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) { auto *self16 = (Node16 *)self; - auto *newSelf = allocators->node3.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self16); getInTree(self, impl) = newSelf; - allocators->node16.release(self16); + tls->release(self16); } break; case Type_Node48: if (self->numChildren + int(self->entryPresent) < kMinChildrenNode48) { auto *self48 = (Node48 *)self; - auto *newSelf = allocators->node16.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self48); getInTree(self, impl) = newSelf; - allocators->node48.release(self48); + tls->release(self48); } break; case Type_Node256: if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) { auto *self256 = (Node256 *)self; - auto *newSelf = allocators->node48.allocate(self->partialKeyLen); + auto *newSelf = tls->allocate(self->partialKeyLen); newSelf->copyChildrenAndKeyFrom(*self256); getInTree(self, impl) = newSelf; - allocators->node256.release(self256); + tls->release(self256); } break; default: // GCOVR_EXCL_LINE @@ -1404,15 +1450,13 @@ void maybeDownsize(Node *self, NodeAllocators *allocators, } } -thread_local double entries_erased_accum; - // Precondition: self is not the root. May invalidate nodes along the search // path to self. May invalidate children of self->parent. Returns a pointer to // the node after self. If erase invalidates the pointee of `dontInvalidate`, it // will update it to its new pointee as well. Precondition: `self->entryPresent` -Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, +Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl, bool logical, Node *&dontInvalidate) { - ++entries_erased_accum; + ++tls->entries_erased_accum; assert(self->parent != nullptr); #if DEBUG_VERBOSE && !defined(NDEBUG) @@ -1430,7 +1474,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, if (self->numChildren != 0) { const bool update = result == dontInvalidate; - maybeDownsize(self, allocators, impl, result); + maybeDownsize(self, tls, impl, result); if (update) { dontInvalidate = result; } @@ -1438,7 +1482,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, } assert(self->getType() == Type_Node0); - allocators->node0.release((Node0 *)self); + tls->release((Node0 *)self); switch (parent->getType()) { case Type_Node0: // GCOVR_EXCL_LINE @@ -1526,7 +1570,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, } const bool update = result == dontInvalidate; - maybeDownsize(parent, allocators, impl, result); + maybeDownsize(parent, tls, impl, result); if (update) { dontInvalidate = result; } @@ -1721,30 +1765,20 @@ struct SearchStepWise { } }; -thread_local double point_read_accum = 0; -thread_local double prefix_read_accum = 0; -thread_local double range_read_accum = 0; -thread_local double point_read_short_circuit_accum = 0; -thread_local double prefix_read_short_circuit_accum = 0; -thread_local double range_read_short_circuit_accum = 0; -thread_local double point_read_iterations_accum = 0; -thread_local double prefix_read_iterations_accum = 0; -thread_local double range_read_iterations_accum = 0; -thread_local double range_read_node_scan_accum = 0; - // Logically this is the same as performing firstGeq and then checking against // point or range version according to cmp, but this version short circuits as // soon as it can prove that there's no conflict. bool checkPointRead(Node *n, const std::span key, - InternalVersionT readVersion, ConflictSet::Impl *impl) { - ++point_read_accum; + InternalVersionT readVersion, ConflictSet::Impl *impl, + ReadContext *tls) { + ++tls->point_read_accum; #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "Check point read: %s\n", printable(key).c_str()); #endif auto remaining = key; - for (;; ++point_read_iterations_accum) { + for (;; ++tls->point_read_iterations_accum) { if (maxVersion(n, impl) <= readVersion) { - ++point_read_short_circuit_accum; + ++tls->point_read_short_circuit_accum; return true; } if (remaining.size() == 0) { @@ -1815,20 +1849,21 @@ downLeftSpine: // max version or range version if this prefix doesn't exist, but this version // short circuits as soon as it can prove that there's no conflict. bool checkPrefixRead(Node *n, const std::span key, - InternalVersionT readVersion, ConflictSet::Impl *impl) { - ++prefix_read_accum; + InternalVersionT readVersion, ConflictSet::Impl *impl, + ReadContext *tls) { + ++tls->prefix_read_accum; #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "Check prefix read: %s\n", printable(key).c_str()); #endif auto remaining = key; - for (;; ++prefix_read_iterations_accum) { + for (;; ++tls->prefix_read_iterations_accum) { auto m = maxVersion(n, impl); if (remaining.size() == 0) { return m <= readVersion; } if (m <= readVersion) { - ++prefix_read_short_circuit_accum; + ++tls->prefix_read_short_circuit_accum; return true; } @@ -2058,8 +2093,8 @@ scan16(const InternalVersionT *vs, int begin, int end, // account for the range version of firstGt(searchpath(n) + [end - 1]) template bool checkMaxBetweenExclusive(Node *n, int begin, int end, - InternalVersionT readVersion) { - ++range_read_node_scan_accum; + InternalVersionT readVersion, ReadContext *tls) { + ++tls->range_read_node_scan_accum; assume(-1 <= begin); assume(begin <= 256); assume(-1 <= end); @@ -2328,13 +2363,13 @@ Vector getSearchPath(Arena &arena, Node *n) { template bool checkRangeStartsWith(Node *n, std::span key, int begin, int end, InternalVersionT readVersion, - ConflictSet::Impl *impl) { + ConflictSet::Impl *impl, ReadContext *tls) { #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "%s(%02x,%02x)*\n", printable(key).c_str(), begin, end); #endif auto remaining = key; if (remaining.size() == 0) { - return checkMaxBetweenExclusive(n, begin, end, readVersion); + return checkMaxBetweenExclusive(n, begin, end, readVersion, tls); } auto *child = getChild(n, remaining[0]); @@ -2400,9 +2435,10 @@ namespace { // that are >= key is <= readVersion template struct CheckRangeLeftSide { CheckRangeLeftSide(Node *n, std::span key, int prefixLen, - InternalVersionT readVersion, ConflictSet::Impl *impl) + InternalVersionT readVersion, ConflictSet::Impl *impl, + ReadContext *tls) : n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion), - impl(impl) { + impl(impl), tls(tls) { #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "Check range left side from %s for keys starting with %s\n", printable(key).c_str(), @@ -2415,6 +2451,7 @@ template struct CheckRangeLeftSide { int prefixLen; InternalVersionT readVersion; ConflictSet::Impl *impl; + ReadContext *tls; int searchPathLen = 0; bool ok; @@ -2430,8 +2467,8 @@ template struct CheckRangeLeftSide { } if (searchPathLen >= prefixLen) { - if (!checkMaxBetweenExclusive(n, remaining[0], 256, - readVersion)) { + if (!checkMaxBetweenExclusive(n, remaining[0], 256, readVersion, + tls)) { ok = false; return true; } @@ -2520,9 +2557,10 @@ template struct CheckRangeLeftSide { // that are < key is <= readVersion template struct CheckRangeRightSide { CheckRangeRightSide(Node *n, std::span key, int prefixLen, - InternalVersionT readVersion, ConflictSet::Impl *impl) + InternalVersionT readVersion, ConflictSet::Impl *impl, + ReadContext *tls) : n(n), key(key), remaining(key), prefixLen(prefixLen), - readVersion(readVersion), impl(impl) { + readVersion(readVersion), impl(impl), tls(tls) { #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "Check range right side to %s for keys starting with %s\n", printable(key).c_str(), @@ -2536,6 +2574,7 @@ template struct CheckRangeRightSide { int prefixLen; InternalVersionT readVersion; ConflictSet::Impl *impl; + ReadContext *tls; int searchPathLen = 0; bool ok; @@ -2560,8 +2599,8 @@ template struct CheckRangeRightSide { return true; } - if (!checkMaxBetweenExclusive(n, -1, remaining[0], - readVersion)) { + if (!checkMaxBetweenExclusive(n, -1, remaining[0], readVersion, + tls)) { ok = false; return true; } @@ -2656,28 +2695,29 @@ template struct CheckRangeRightSide { template bool checkRangeReadImpl(Node *n, std::span begin, std::span end, - InternalVersionT readVersion, ConflictSet::Impl *impl) { + InternalVersionT readVersion, ConflictSet::Impl *impl, + ReadContext *tls) { int lcp = longestCommonPrefix(begin.data(), end.data(), std::min(begin.size(), end.size())); if (lcp == int(begin.size()) && end.size() == begin.size() + 1 && end.back() == 0) { - return checkPointRead(n, begin, readVersion, impl); + return checkPointRead(n, begin, readVersion, impl, tls); } if (lcp == int(begin.size() - 1) && end.size() == begin.size() && int(begin.back()) + 1 == int(end.back())) { - return checkPrefixRead(n, begin, readVersion, impl); + return checkPrefixRead(n, begin, readVersion, impl, tls); } - ++range_read_accum; + ++tls->range_read_accum; SearchStepWise search{n, begin.subspan(0, lcp)}; Arena arena; - for (;; ++range_read_iterations_accum) { + for (;; ++tls->range_read_iterations_accum) { assert(getSearchPath(arena, search.n) <=> begin.subspan(0, lcp - search.remaining.size()) == 0); if (maxVersion(search.n, impl) <= readVersion) { - ++range_read_short_circuit_accum; + ++tls->range_read_short_circuit_accum; return true; } if (search.step()) { @@ -2697,41 +2737,41 @@ bool checkRangeReadImpl(Node *n, std::span begin, lcp -= consumed; if (lcp == int(begin.size())) { - CheckRangeRightSide checkRangeRightSide{n, end, lcp, readVersion, - impl}; + CheckRangeRightSide checkRangeRightSide{n, end, lcp, + readVersion, impl, tls}; while (!checkRangeRightSide.step()) ; return checkRangeRightSide.ok; } if (!checkRangeStartsWith(n, begin.subspan(0, lcp), begin[lcp], - end[lcp], readVersion, impl)) { + end[lcp], readVersion, impl, tls)) { return false; } - CheckRangeLeftSide checkRangeLeftSide{n, begin, lcp + 1, readVersion, - impl}; - CheckRangeRightSide checkRangeRightSide{n, end, lcp + 1, readVersion, - impl}; + CheckRangeLeftSide checkRangeLeftSide{n, begin, lcp + 1, + readVersion, impl, tls}; + CheckRangeRightSide checkRangeRightSide{n, end, lcp + 1, + readVersion, impl, tls}; for (;;) { bool leftDone = checkRangeLeftSide.step(); bool rightDone = checkRangeRightSide.step(); if (!leftDone && !rightDone) { - range_read_iterations_accum += 2; + tls->range_read_iterations_accum += 2; continue; } if (leftDone && rightDone) { break; } else if (leftDone) { while (!checkRangeRightSide.step()) { - ++range_read_iterations_accum; + ++tls->range_read_iterations_accum; } break; } else { assert(rightDone); while (!checkRangeLeftSide.step()) { - ++range_read_iterations_accum; + ++tls->range_read_iterations_accum; } } break; @@ -2770,8 +2810,8 @@ checkRangeReadImpl(Node *n, std::span begin, #if defined(__SANITIZE_THREAD__) || !defined(__x86_64__) bool checkRangeRead(Node *n, std::span begin, std::span end, InternalVersionT readVersion, - ConflictSet::Impl *impl) { - return checkRangeReadImpl(n, begin, end, readVersion, impl); + ConflictSet::Impl *impl, ReadContext *tls) { + return checkRangeReadImpl(n, begin, end, readVersion, impl, tls); } #else __attribute__((target("default"))) bool @@ -2788,18 +2828,16 @@ checkRangeRead(Node *n, std::span begin, } #endif -thread_local double insert_iterations_accum; - // Returns a pointer to the newly inserted node. Caller must set // `entryPresent`, `entry` fields and `maxVersion` on the result. The search // path of the result's parent will have `maxVersion` at least `writeVersion` as // a postcondition. Nodes along the search path to `key` may be invalidated. template -[[nodiscard]] Node * -insert(Node **self, std::span key, InternalVersionT writeVersion, - NodeAllocators *allocators, ConflictSet::Impl *impl) { +[[nodiscard]] Node *insert(Node **self, std::span key, + InternalVersionT writeVersion, WriteContext *tls, + ConflictSet::Impl *impl) { - for (;; ++insert_iterations_accum) { + for (;; ++tls->insert_iterations_accum) { if ((*self)->partialKeyLen > 0) { // Handle an existing partial key @@ -2811,7 +2849,7 @@ insert(Node **self, std::span key, InternalVersionT writeVersion, InternalVersionT oldMaxVersion = maxVersion(old, impl); // *self will have one child - *self = allocators->node3.allocate(partialKeyIndex); + *self = tls->allocate(partialKeyIndex); memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin, kNodeCopySize); @@ -2824,8 +2862,7 @@ insert(Node **self, std::span key, InternalVersionT writeVersion, memcpy((*self)->partialKey(), old->partialKey(), (*self)->partialKeyLen); - getOrCreateChild(*self, old->partialKey()[partialKeyIndex], - allocators) = old; + getOrCreateChild(*self, old->partialKey()[partialKeyIndex], tls) = old; old->parent = *self; old->parentsIndex = old->partialKey()[partialKeyIndex]; setMaxVersion(old, impl, oldMaxVersion); @@ -2865,9 +2902,9 @@ insert(Node **self, std::span key, InternalVersionT writeVersion, setMaxVersion(*self, impl, writeVersion); } - auto &child = getOrCreateChild(*self, key.front(), allocators); + auto &child = getOrCreateChild(*self, key.front(), tls); if (!child) { - child = allocators->node0.allocate(key.size() - 1); + child = tls->allocate(key.size() - 1); child->numChildren = 0; child->entryPresent = false; child->partialKeyLen = 0; @@ -2908,14 +2945,12 @@ void destroyTree(Node *root) { } } -thread_local double entries_inserted_accum; - void addPointWrite(Node *&root, std::span key, - InternalVersionT writeVersion, NodeAllocators *allocators, + InternalVersionT writeVersion, WriteContext *tls, ConflictSet::Impl *impl) { - auto *n = insert(&root, key, writeVersion, allocators, impl); + auto *n = insert(&root, key, writeVersion, tls, impl); if (!n->entryPresent) { - ++entries_inserted_accum; + ++tls->entries_inserted_accum; auto *p = nextLogical(n); addKey(n); @@ -2934,13 +2969,13 @@ void addPointWrite(Node *&root, std::span key, void addWriteRange(Node *&root, std::span begin, std::span end, InternalVersionT writeVersion, - NodeAllocators *allocators, ConflictSet::Impl *impl) { + WriteContext *tls, ConflictSet::Impl *impl) { int lcp = longestCommonPrefix(begin.data(), end.data(), std::min(begin.size(), end.size())); if (lcp == int(begin.size()) && end.size() == begin.size() + 1 && end.back() == 0) { - return addPointWrite(root, begin, writeVersion, allocators, impl); + return addPointWrite(root, begin, writeVersion, tls, impl); } const bool beginIsPrefix = lcp == int(begin.size()); auto remaining = begin.subspan(0, lcp); @@ -2977,8 +3012,7 @@ void addWriteRange(Node *&root, std::span begin, begin = begin.subspan(consumed, begin.size() - consumed); end = end.subspan(consumed, end.size() - consumed); - auto *beginNode = - insert(useAsRoot, begin, writeVersion, allocators, impl); + auto *beginNode = insert(useAsRoot, begin, writeVersion, tls, impl); const bool insertedBegin = !beginNode->entryPresent; @@ -2986,7 +3020,7 @@ void addWriteRange(Node *&root, std::span begin, beginNode->entryPresent = true; if (insertedBegin) { - ++entries_inserted_accum; + ++tls->entries_inserted_accum; auto *p = nextLogical(beginNode); beginNode->entry.rangeVersion = p == nullptr ? InternalVersionT::zero @@ -2999,7 +3033,7 @@ void addWriteRange(Node *&root, std::span begin, assert(writeVersion >= beginNode->entry.pointVersion); beginNode->entry.pointVersion = writeVersion; - auto *endNode = insert(useAsRoot, end, writeVersion, allocators, impl); + auto *endNode = insert(useAsRoot, end, writeVersion, tls, impl); const bool insertedEnd = !endNode->entryPresent; @@ -3007,7 +3041,7 @@ void addWriteRange(Node *&root, std::span begin, endNode->entryPresent = true; if (insertedEnd) { - ++entries_inserted_accum; + ++tls->entries_inserted_accum; auto *p = nextLogical(endNode); endNode->entry.pointVersion = p == nullptr ? InternalVersionT::zero @@ -3021,13 +3055,12 @@ void addWriteRange(Node *&root, std::span begin, if (beginIsPrefix && insertedEnd) { // beginNode may have been invalidated when inserting end. TODO can we do // better? - beginNode = insert(useAsRoot, begin, writeVersion, allocators, impl); + beginNode = insert(useAsRoot, begin, writeVersion, tls, impl); assert(beginNode->entryPresent); } for (beginNode = nextLogical(beginNode); beginNode != endNode; - beginNode = - erase(beginNode, allocators, impl, /*logical*/ true, endNode)) { + beginNode = erase(beginNode, tls, impl, /*logical*/ true, endNode)) { } } @@ -3093,6 +3126,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { timespec ts_begin; clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_begin); #endif + ReadContext tls; int commits_accum = 0; int conflicts_accum = 0; int too_olds_accum = 0; @@ -3108,32 +3142,34 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { reads[i].readVersion < oldestVersionFullPrecision ? TooOld : (end.size() > 0 ? checkRangeRead(root, begin, end, - InternalVersionT(reads[i].readVersion), this) + InternalVersionT(reads[i].readVersion), this, + &tls) : checkPointRead(root, begin, - InternalVersionT(reads[i].readVersion), this)) + InternalVersionT(reads[i].readVersion), this, + &tls)) ? Commit : Conflict; commits_accum += result[i] == Commit; conflicts_accum += result[i] == Conflict; too_olds_accum += result[i] == TooOld; } - point_read_total.add(std::exchange(point_read_accum, 0)); - prefix_read_total.add(std::exchange(prefix_read_accum, 0)); - range_read_total.add(std::exchange(range_read_accum, 0)); + point_read_total.add(std::exchange(tls.point_read_accum, 0)); + prefix_read_total.add(std::exchange(tls.prefix_read_accum, 0)); + range_read_total.add(std::exchange(tls.range_read_accum, 0)); range_read_node_scan_total.add( - std::exchange(range_read_node_scan_accum, 0)); + std::exchange(tls.range_read_node_scan_accum, 0)); point_read_short_circuit_total.add( - std::exchange(point_read_short_circuit_accum, 0)); + std::exchange(tls.point_read_short_circuit_accum, 0)); prefix_read_short_circuit_total.add( - std::exchange(prefix_read_short_circuit_accum, 0)); + std::exchange(tls.prefix_read_short_circuit_accum, 0)); range_read_short_circuit_total.add( - std::exchange(range_read_short_circuit_accum, 0)); + std::exchange(tls.range_read_short_circuit_accum, 0)); point_read_iterations_total.add( - std::exchange(point_read_iterations_accum, 0)); + std::exchange(tls.point_read_iterations_accum, 0)); prefix_read_iterations_total.add( - std::exchange(prefix_read_iterations_accum, 0)); + std::exchange(tls.prefix_read_iterations_accum, 0)); range_read_iterations_total.add( - std::exchange(range_read_iterations_accum, 0)); + std::exchange(tls.range_read_iterations_accum, 0)); commits_total.add(commits_accum); conflicts_total.add(conflicts_accum); too_olds_total.add(too_olds_accum); @@ -3173,21 +3209,20 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { auto end = std::span(w.end.p, w.end.len); if (w.end.len > 0) { keyUpdates += 3; - addWriteRange(root, begin, end, InternalVersionT(writeVersion), - &allocators, this); + addWriteRange(root, begin, end, InternalVersionT(writeVersion), &tls, + this); } else { keyUpdates += 2; - addPointWrite(root, begin, InternalVersionT(writeVersion), &allocators, - this); + addPointWrite(root, begin, InternalVersionT(writeVersion), &tls, this); } } memory_bytes.set(totalBytes); - nodes_allocated_total.add(std::exchange(nodes_allocated_accum, 0)); - nodes_released_total.add(std::exchange(nodes_released_accum, 0)); - entries_inserted_total.add(std::exchange(entries_inserted_accum, 0)); - entries_erased_total.add(std::exchange(entries_erased_accum, 0)); - insert_iterations_total.add(std::exchange(insert_iterations_accum, 0)); + nodes_allocated_total.add(std::exchange(tls.nodes_allocated_accum, 0)); + nodes_released_total.add(std::exchange(tls.nodes_released_accum, 0)); + entries_inserted_total.add(std::exchange(tls.entries_inserted_accum, 0)); + entries_erased_total.add(std::exchange(tls.entries_erased_accum, 0)); + insert_iterations_total.add(std::exchange(tls.insert_iterations_accum, 0)); write_bytes_total.add(write_byte_accum); } @@ -3219,9 +3254,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { // node is greater than the point version of the left node assert(n->entry.rangeVersion <= oldestVersion); Node *dummy = nullptr; - n = erase(n, &allocators, this, /*logical*/ false, dummy); + n = erase(n, &tls, this, /*logical*/ false, dummy); } else { - maybeDecreaseCapacity(n, &allocators, this); + maybeDecreaseCapacity(n, &tls, this); n = nextPhysical(n); } } @@ -3262,10 +3297,10 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { keyUpdates = gcScanStep(keyUpdates); memory_bytes.set(totalBytes); - nodes_allocated_total.add(std::exchange(nodes_allocated_accum, 0)); - nodes_released_total.add(std::exchange(nodes_released_accum, 0)); - entries_inserted_total.add(std::exchange(entries_inserted_accum, 0)); - entries_erased_total.add(std::exchange(entries_erased_accum, 0)); + nodes_allocated_total.add(std::exchange(tls.nodes_allocated_accum, 0)); + nodes_released_total.add(std::exchange(tls.nodes_released_accum, 0)); + entries_inserted_total.add(std::exchange(tls.entries_inserted_accum, 0)); + entries_erased_total.add(std::exchange(tls.entries_erased_accum, 0)); oldest_version.set(oldestVersionFullPrecision); } @@ -3278,15 +3313,15 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { oldest_version.set(oldestVersionFullPrecision); newest_version.set(newestVersionFullPrecision); - allocators.~NodeAllocators(); - new (&allocators) NodeAllocators(); + tls.~WriteContext(); + new (&tls) WriteContext(); removalKeyArena = Arena{}; removalKey = {}; keyUpdates = 10; // Insert "" - root = allocators.node0.allocate(0); + root = tls.allocate(0); root->numChildren = 0; root->parent = nullptr; rootMaxVersion = this->oldestVersion; @@ -3313,7 +3348,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { safe_free(metrics, metricsCount * sizeof(metrics[0])); } - NodeAllocators allocators; + WriteContext tls; Arena removalKeyArena; std::span removalKey;