diff --git a/ConflictSet.cpp b/ConflictSet.cpp
index ba6417c..44c40f7 100644
--- a/ConflictSet.cpp
+++ b/ConflictSet.cpp
@@ -587,9 +587,6 @@ struct Counter : private Metric {
   }
 };
 
-thread_local double nodes_allocated_accum = 0;
-thread_local double nodes_released_accum = 0;
-
 template <class T> struct BoundedFreeListAllocator {
 
   static_assert(sizeof(T) >= sizeof(void *));
@@ -625,7 +622,6 @@ template <class T> struct BoundedFreeListAllocator {
   }
 
   T *allocate(int partialKeyCapacity) {
-    ++nodes_allocated_accum;
     T *result = allocate_helper(partialKeyCapacity);
     if constexpr (!std::is_same_v<T, Node0>) {
       memset(result->children, 0, sizeof(result->children));
@@ -642,7 +638,6 @@ template <class T> struct BoundedFreeListAllocator {
   }
 
   void release(T *p) {
-    ++nodes_released_accum;
     if (freeListBytes >= kFreeListMaxMemory) {
       removeNode(p);
       return safe_free(p, sizeof(T) + p->partialKeyCapacity);
@@ -710,7 +705,60 @@ size_t Node::size() const {
   }
 }
 
-struct NodeAllocators {
+// A type that's plumbed along the check call tree. Lifetime ends after each
+// check call.
+struct ReadContext {
+  double point_read_accum = 0;
+  double prefix_read_accum = 0;
+  double range_read_accum = 0;
+  double point_read_short_circuit_accum = 0;
+  double prefix_read_short_circuit_accum = 0;
+  double range_read_short_circuit_accum = 0;
+  double point_read_iterations_accum = 0;
+  double prefix_read_iterations_accum = 0;
+  double range_read_iterations_accum = 0;
+  double range_read_node_scan_accum = 0;
+};
+
+// A type that's plumbed along the non-const call tree. Same lifetime as
+// ConflictSet::Impl
+struct WriteContext {
+  double entries_erased_accum = 0;
+  double insert_iterations_accum = 0;
+  double entries_inserted_accum = 0;
+  double nodes_allocated_accum = 0;
+  double nodes_released_accum = 0;
+  template <class T> T *allocate(int c) {
+    ++nodes_allocated_accum;
+    if constexpr (std::is_same_v<T, Node0>) {
+      return node0.allocate(c);
+    } else if constexpr (std::is_same_v<T, Node3>) {
+      return node3.allocate(c);
+    } else if constexpr (std::is_same_v<T, Node16>) {
+      return node16.allocate(c);
+    } else if constexpr (std::is_same_v<T, Node48>) {
+      return node48.allocate(c);
+    } else if constexpr (std::is_same_v<T, Node256>) {
+      return node256.allocate(c);
+    }
+  }
+  template <class T> void release(T *c) {
+    static_assert(!std::is_same_v<T, Node>);
+    ++nodes_released_accum;
+    if constexpr (std::is_same_v<T, Node0>) {
+      return node0.release(c);
+    } else if constexpr (std::is_same_v<T, Node3>) {
+      return node3.release(c);
+    } else if constexpr (std::is_same_v<T, Node16>) {
+      return node16.release(c);
+    } else if constexpr (std::is_same_v<T, Node48>) {
+      return node48.release(c);
+    } else if constexpr (std::is_same_v<T, Node256>) {
+      return node256.release(c);
+    }
+  }
+
+private:
   BoundedFreeListAllocator<Node0> node0;
   BoundedFreeListAllocator<Node3> node3;
   BoundedFreeListAllocator<Node16> node16;
@@ -954,8 +1002,7 @@ int getChildGeq(Node *self, int child) {
 
 // Caller is responsible for assigning a non-null pointer to the returned
 // reference if null
-Node *&getOrCreateChild(Node *&self, uint8_t index,
-                        NodeAllocators *allocators) {
+Node *&getOrCreateChild(Node *&self, uint8_t index, WriteContext *tls) {
 
   // Fast path for if it exists already
   switch (self->getType()) {
@@ -996,9 +1043,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
   case Type_Node0: {
     auto *self0 = static_cast<Node0 *>(self);
 
-    auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
+    auto *newSelf = tls->allocate<Node3>(self->partialKeyLen);
     newSelf->copyChildrenAndKeyFrom(*self0);
-    allocators->node0.release(self0);
+    tls->release(self0);
     self = newSelf;
 
     goto insert3;
@@ -1006,9 +1053,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
   case Type_Node3: {
     if (self->numChildren == Node3::kMaxNodes) {
       auto *self3 = static_cast<Node3 *>(self);
-      auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node16>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self3);
-      allocators->node3.release(self3);
+      tls->release(self3);
       self = newSelf;
       goto insert16;
     }
@@ -1038,9 +1085,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
   case Type_Node16: {
     if (self->numChildren == Node16::kMaxNodes) {
       auto *self16 = static_cast<Node16 *>(self);
-      auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node48>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self16);
-      allocators->node16.release(self16);
+      tls->release(self16);
       self = newSelf;
       goto insert48;
     }
@@ -1116,9 +1163,9 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
 
     if (self->numChildren == 48) {
       auto *self48 = static_cast<Node48 *>(self);
-      auto *newSelf = allocators->node256.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node256>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self48);
-      allocators->node48.release(self48);
+      tls->release(self48);
       self = newSelf;
       goto insert256;
     }
@@ -1172,18 +1219,17 @@ Node *nextLogical(Node *node) {
 
 // Invalidates `self`, replacing it with a node of at least capacity.
 // Does not return nodes to freelists when kUseFreeList is false.
-void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
-                                NodeAllocators *allocators,
+void freeAndMakeCapacityAtLeast(Node *&self, int capacity, WriteContext *tls,
                                 ConflictSet::Impl *impl,
                                 const bool kUseFreeList) {
   switch (self->getType()) {
   case Type_Node0: {
     auto *self0 = (Node0 *)self;
-    auto *newSelf = allocators->node0.allocate(capacity);
+    auto *newSelf = tls->allocate<Node0>(capacity);
     newSelf->copyChildrenAndKeyFrom(*self0);
     getInTree(self, impl) = newSelf;
     if (kUseFreeList) {
-      allocators->node0.release(self0);
+      tls->release(self0);
     } else {
       removeNode(self0);
       safe_free(self0, self0->size());
@@ -1192,11 +1238,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
   } break;
   case Type_Node3: {
     auto *self3 = (Node3 *)self;
-    auto *newSelf = allocators->node3.allocate(capacity);
+    auto *newSelf = tls->allocate<Node3>(capacity);
     newSelf->copyChildrenAndKeyFrom(*self3);
     getInTree(self, impl) = newSelf;
     if (kUseFreeList) {
-      allocators->node3.release(self3);
+      tls->release(self3);
     } else {
       removeNode(self3);
       safe_free(self3, self3->size());
@@ -1205,11 +1251,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
   } break;
   case Type_Node16: {
     auto *self16 = (Node16 *)self;
-    auto *newSelf = allocators->node16.allocate(capacity);
+    auto *newSelf = tls->allocate<Node16>(capacity);
     newSelf->copyChildrenAndKeyFrom(*self16);
     getInTree(self, impl) = newSelf;
     if (kUseFreeList) {
-      allocators->node16.release(self16);
+      tls->release(self16);
     } else {
       removeNode(self16);
       safe_free(self16, self16->size());
@@ -1218,11 +1264,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
   } break;
   case Type_Node48: {
     auto *self48 = (Node48 *)self;
-    auto *newSelf = allocators->node48.allocate(capacity);
+    auto *newSelf = tls->allocate<Node48>(capacity);
     newSelf->copyChildrenAndKeyFrom(*self48);
     getInTree(self, impl) = newSelf;
     if (kUseFreeList) {
-      allocators->node48.release(self48);
+      tls->release(self48);
     } else {
       removeNode(self48);
       safe_free(self48, self48->size());
@@ -1231,11 +1277,11 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
   } break;
   case Type_Node256: {
     auto *self256 = (Node256 *)self;
-    auto *newSelf = allocators->node256.allocate(capacity);
+    auto *newSelf = tls->allocate<Node256>(capacity);
     newSelf->copyChildrenAndKeyFrom(*self256);
     getInTree(self, impl) = newSelf;
     if (kUseFreeList) {
-      allocators->node256.release(self256);
+      tls->release(self256);
     } else {
       removeNode(self256);
       safe_free(self256, self256->size());
@@ -1250,7 +1296,7 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity,
 // Fix larger-than-desired capacities. Does not return nodes to freelists,
 // since that wouldn't actually reclaim the memory used for partial key
 // capacity.
-void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators,
+void maybeDecreaseCapacity(Node *&self, WriteContext *tls,
                            ConflictSet::Impl *impl) {
 
   const int maxCapacity =
@@ -1263,7 +1309,7 @@ void maybeDecreaseCapacity(Node *&self, NodeAllocators *allocators,
   if (self->getCapacity() <= maxCapacity) {
     return;
   }
-  freeAndMakeCapacityAtLeast(self, maxCapacity, allocators, impl, false);
+  freeAndMakeCapacityAtLeast(self, maxCapacity, tls, impl, false);
 }
 
 void rezero(Node *n, InternalVersionT z) {
@@ -1313,8 +1359,8 @@ void rezero(Node *n, InternalVersionT z) {
   }
 }
 
-void maybeDownsize(Node *self, NodeAllocators *allocators,
-                   ConflictSet::Impl *impl, Node *&dontInvalidate) {
+void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
+                   Node *&dontInvalidate) {
 
 #if DEBUG_VERBOSE && !defined(NDEBUG)
   fprintf(stderr, "maybeDownsize: %s\n", getSearchPathPrintable(self).c_str());
@@ -1326,17 +1372,17 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
   case Type_Node3: {
     auto *self3 = (Node3 *)self;
     if (self->numChildren == 0) {
-      auto *newSelf = allocators->node0.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node0>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self3);
       getInTree(self, impl) = newSelf;
-      allocators->node3.release(self3);
+      tls->release(self3);
     } else if (self->numChildren == 1 && !self->entryPresent) {
       auto *child = self3->children[0];
       int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
 
       if (minCapacity > child->getCapacity()) {
         const bool update = child == dontInvalidate;
-        freeAndMakeCapacityAtLeast(child, minCapacity, allocators, impl, true);
+        freeAndMakeCapacityAtLeast(child, minCapacity, tls, impl, true);
         if (update) {
           dontInvalidate = child;
         }
@@ -1369,34 +1415,34 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
       }
 
       getInTree(self, impl) = child;
-      allocators->node3.release(self3);
+      tls->release(self3);
     }
   } break;
   case Type_Node16:
     if (self->numChildren + int(self->entryPresent) < kMinChildrenNode16) {
       auto *self16 = (Node16 *)self;
-      auto *newSelf = allocators->node3.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node3>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self16);
       getInTree(self, impl) = newSelf;
-      allocators->node16.release(self16);
+      tls->release(self16);
     }
     break;
   case Type_Node48:
     if (self->numChildren + int(self->entryPresent) < kMinChildrenNode48) {
       auto *self48 = (Node48 *)self;
-      auto *newSelf = allocators->node16.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node16>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self48);
       getInTree(self, impl) = newSelf;
-      allocators->node48.release(self48);
+      tls->release(self48);
     }
     break;
   case Type_Node256:
     if (self->numChildren + int(self->entryPresent) < kMinChildrenNode256) {
       auto *self256 = (Node256 *)self;
-      auto *newSelf = allocators->node48.allocate(self->partialKeyLen);
+      auto *newSelf = tls->allocate<Node48>(self->partialKeyLen);
       newSelf->copyChildrenAndKeyFrom(*self256);
       getInTree(self, impl) = newSelf;
-      allocators->node256.release(self256);
+      tls->release(self256);
     }
     break;
   default:                   // GCOVR_EXCL_LINE
@@ -1404,15 +1450,13 @@ void maybeDownsize(Node *self, NodeAllocators *allocators,
   }
 }
 
-thread_local double entries_erased_accum;
-
 // Precondition: self is not the root. May invalidate nodes along the search
 // path to self. May invalidate children of self->parent. Returns a pointer to
 // the node after self. If erase invalidates the pointee of `dontInvalidate`, it
 // will update it to its new pointee as well. Precondition: `self->entryPresent`
-Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
+Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
             bool logical, Node *&dontInvalidate) {
-  ++entries_erased_accum;
+  ++tls->entries_erased_accum;
   assert(self->parent != nullptr);
 
 #if DEBUG_VERBOSE && !defined(NDEBUG)
@@ -1430,7 +1474,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
 
   if (self->numChildren != 0) {
     const bool update = result == dontInvalidate;
-    maybeDownsize(self, allocators, impl, result);
+    maybeDownsize(self, tls, impl, result);
     if (update) {
       dontInvalidate = result;
     }
@@ -1438,7 +1482,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
   }
 
   assert(self->getType() == Type_Node0);
-  allocators->node0.release((Node0 *)self);
+  tls->release((Node0 *)self);
 
   switch (parent->getType()) {
   case Type_Node0:           // GCOVR_EXCL_LINE
@@ -1526,7 +1570,7 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
   }
 
   const bool update = result == dontInvalidate;
-  maybeDownsize(parent, allocators, impl, result);
+  maybeDownsize(parent, tls, impl, result);
   if (update) {
     dontInvalidate = result;
   }
@@ -1721,30 +1765,20 @@ struct SearchStepWise {
   }
 };
 
-thread_local double point_read_accum = 0;
-thread_local double prefix_read_accum = 0;
-thread_local double range_read_accum = 0;
-thread_local double point_read_short_circuit_accum = 0;
-thread_local double prefix_read_short_circuit_accum = 0;
-thread_local double range_read_short_circuit_accum = 0;
-thread_local double point_read_iterations_accum = 0;
-thread_local double prefix_read_iterations_accum = 0;
-thread_local double range_read_iterations_accum = 0;
-thread_local double range_read_node_scan_accum = 0;
-
 // Logically this is the same as performing firstGeq and then checking against
 // point or range version according to cmp, but this version short circuits as
 // soon as it can prove that there's no conflict.
 bool checkPointRead(Node *n, const std::span<const uint8_t> key,
-                    InternalVersionT readVersion, ConflictSet::Impl *impl) {
-  ++point_read_accum;
+                    InternalVersionT readVersion, ConflictSet::Impl *impl,
+                    ReadContext *tls) {
+  ++tls->point_read_accum;
 #if DEBUG_VERBOSE && !defined(NDEBUG)
   fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
 #endif
   auto remaining = key;
-  for (;; ++point_read_iterations_accum) {
+  for (;; ++tls->point_read_iterations_accum) {
     if (maxVersion(n, impl) <= readVersion) {
-      ++point_read_short_circuit_accum;
+      ++tls->point_read_short_circuit_accum;
       return true;
     }
     if (remaining.size() == 0) {
@@ -1815,20 +1849,21 @@ downLeftSpine:
 // max version or range version if this prefix doesn't exist, but this version
 // short circuits as soon as it can prove that there's no conflict.
 bool checkPrefixRead(Node *n, const std::span<const uint8_t> key,
-                     InternalVersionT readVersion, ConflictSet::Impl *impl) {
-  ++prefix_read_accum;
+                     InternalVersionT readVersion, ConflictSet::Impl *impl,
+                     ReadContext *tls) {
+  ++tls->prefix_read_accum;
 #if DEBUG_VERBOSE && !defined(NDEBUG)
   fprintf(stderr, "Check prefix read: %s\n", printable(key).c_str());
 #endif
   auto remaining = key;
-  for (;; ++prefix_read_iterations_accum) {
+  for (;; ++tls->prefix_read_iterations_accum) {
     auto m = maxVersion(n, impl);
     if (remaining.size() == 0) {
       return m <= readVersion;
     }
 
     if (m <= readVersion) {
-      ++prefix_read_short_circuit_accum;
+      ++tls->prefix_read_short_circuit_accum;
       return true;
     }
 
@@ -2058,8 +2093,8 @@ scan16(const InternalVersionT *vs, int begin, int end,
 // account for the range version of firstGt(searchpath(n) + [end - 1])
 template <bool kAVX512>
 bool checkMaxBetweenExclusive(Node *n, int begin, int end,
-                              InternalVersionT readVersion) {
-  ++range_read_node_scan_accum;
+                              InternalVersionT readVersion, ReadContext *tls) {
+  ++tls->range_read_node_scan_accum;
   assume(-1 <= begin);
   assume(begin <= 256);
   assume(-1 <= end);
@@ -2328,13 +2363,13 @@ Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
 template <bool kAVX512>
 bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
                           int end, InternalVersionT readVersion,
-                          ConflictSet::Impl *impl) {
+                          ConflictSet::Impl *impl, ReadContext *tls) {
 #if DEBUG_VERBOSE && !defined(NDEBUG)
   fprintf(stderr, "%s(%02x,%02x)*\n", printable(key).c_str(), begin, end);
 #endif
   auto remaining = key;
   if (remaining.size() == 0) {
-    return checkMaxBetweenExclusive<kAVX512>(n, begin, end, readVersion);
+    return checkMaxBetweenExclusive<kAVX512>(n, begin, end, readVersion, tls);
   }
 
   auto *child = getChild(n, remaining[0]);
@@ -2400,9 +2435,10 @@ namespace {
 // that are >= key is <= readVersion
 template <bool kAVX512> struct CheckRangeLeftSide {
   CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
-                     InternalVersionT readVersion, ConflictSet::Impl *impl)
+                     InternalVersionT readVersion, ConflictSet::Impl *impl,
+                     ReadContext *tls)
       : n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion),
-        impl(impl) {
+        impl(impl), tls(tls) {
 #if DEBUG_VERBOSE && !defined(NDEBUG)
     fprintf(stderr, "Check range left side from %s for keys starting with %s\n",
             printable(key).c_str(),
@@ -2415,6 +2451,7 @@ template <bool kAVX512> struct CheckRangeLeftSide {
   int prefixLen;
   InternalVersionT readVersion;
   ConflictSet::Impl *impl;
+  ReadContext *tls;
   int searchPathLen = 0;
   bool ok;
 
@@ -2430,8 +2467,8 @@ template <bool kAVX512> struct CheckRangeLeftSide {
     }
 
     if (searchPathLen >= prefixLen) {
-      if (!checkMaxBetweenExclusive<kAVX512>(n, remaining[0], 256,
-                                             readVersion)) {
+      if (!checkMaxBetweenExclusive<kAVX512>(n, remaining[0], 256, readVersion,
+                                             tls)) {
         ok = false;
         return true;
       }
@@ -2520,9 +2557,10 @@ template <bool kAVX512> struct CheckRangeLeftSide {
 // that are < key is <= readVersion
 template <bool kAVX512> struct CheckRangeRightSide {
   CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen,
-                      InternalVersionT readVersion, ConflictSet::Impl *impl)
+                      InternalVersionT readVersion, ConflictSet::Impl *impl,
+                      ReadContext *tls)
       : n(n), key(key), remaining(key), prefixLen(prefixLen),
-        readVersion(readVersion), impl(impl) {
+        readVersion(readVersion), impl(impl), tls(tls) {
 #if DEBUG_VERBOSE && !defined(NDEBUG)
     fprintf(stderr, "Check range right side to %s for keys starting with %s\n",
             printable(key).c_str(),
@@ -2536,6 +2574,7 @@ template <bool kAVX512> struct CheckRangeRightSide {
   int prefixLen;
   InternalVersionT readVersion;
   ConflictSet::Impl *impl;
+  ReadContext *tls;
   int searchPathLen = 0;
   bool ok;
 
@@ -2560,8 +2599,8 @@ template <bool kAVX512> struct CheckRangeRightSide {
         return true;
       }
 
-      if (!checkMaxBetweenExclusive<kAVX512>(n, -1, remaining[0],
-                                             readVersion)) {
+      if (!checkMaxBetweenExclusive<kAVX512>(n, -1, remaining[0], readVersion,
+                                             tls)) {
         ok = false;
         return true;
       }
@@ -2656,28 +2695,29 @@ template <bool kAVX512> struct CheckRangeRightSide {
 template <bool kAVX512>
 bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin,
                         std::span<const uint8_t> end,
-                        InternalVersionT readVersion, ConflictSet::Impl *impl) {
+                        InternalVersionT readVersion, ConflictSet::Impl *impl,
+                        ReadContext *tls) {
   int lcp = longestCommonPrefix(begin.data(), end.data(),
                                 std::min(begin.size(), end.size()));
   if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
       end.back() == 0) {
-    return checkPointRead(n, begin, readVersion, impl);
+    return checkPointRead(n, begin, readVersion, impl, tls);
   }
   if (lcp == int(begin.size() - 1) && end.size() == begin.size() &&
       int(begin.back()) + 1 == int(end.back())) {
-    return checkPrefixRead(n, begin, readVersion, impl);
+    return checkPrefixRead(n, begin, readVersion, impl, tls);
   }
 
-  ++range_read_accum;
+  ++tls->range_read_accum;
 
   SearchStepWise search{n, begin.subspan(0, lcp)};
   Arena arena;
-  for (;; ++range_read_iterations_accum) {
+  for (;; ++tls->range_read_iterations_accum) {
     assert(getSearchPath(arena, search.n) <=>
                begin.subspan(0, lcp - search.remaining.size()) ==
            0);
     if (maxVersion(search.n, impl) <= readVersion) {
-      ++range_read_short_circuit_accum;
+      ++tls->range_read_short_circuit_accum;
       return true;
     }
     if (search.step()) {
@@ -2697,41 +2737,41 @@ bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin,
   lcp -= consumed;
 
   if (lcp == int(begin.size())) {
-    CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp, readVersion,
-                                                     impl};
+    CheckRangeRightSide<kAVX512> checkRangeRightSide{n,           end,  lcp,
+                                                     readVersion, impl, tls};
     while (!checkRangeRightSide.step())
       ;
     return checkRangeRightSide.ok;
   }
 
   if (!checkRangeStartsWith<kAVX512>(n, begin.subspan(0, lcp), begin[lcp],
-                                     end[lcp], readVersion, impl)) {
+                                     end[lcp], readVersion, impl, tls)) {
     return false;
   }
 
-  CheckRangeLeftSide<kAVX512> checkRangeLeftSide{n, begin, lcp + 1, readVersion,
-                                                 impl};
-  CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp + 1, readVersion,
-                                                   impl};
+  CheckRangeLeftSide<kAVX512> checkRangeLeftSide{n,           begin, lcp + 1,
+                                                 readVersion, impl,  tls};
+  CheckRangeRightSide<kAVX512> checkRangeRightSide{n,           end,  lcp + 1,
+                                                   readVersion, impl, tls};
 
   for (;;) {
     bool leftDone = checkRangeLeftSide.step();
     bool rightDone = checkRangeRightSide.step();
     if (!leftDone && !rightDone) {
-      range_read_iterations_accum += 2;
+      tls->range_read_iterations_accum += 2;
       continue;
     }
     if (leftDone && rightDone) {
       break;
     } else if (leftDone) {
       while (!checkRangeRightSide.step()) {
-        ++range_read_iterations_accum;
+        ++tls->range_read_iterations_accum;
       }
       break;
     } else {
       assert(rightDone);
       while (!checkRangeLeftSide.step()) {
-        ++range_read_iterations_accum;
+        ++tls->range_read_iterations_accum;
       }
     }
     break;
@@ -2770,8 +2810,8 @@ checkRangeReadImpl<true>(Node *n, std::span<const uint8_t> begin,
 #if defined(__SANITIZE_THREAD__) || !defined(__x86_64__)
 bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
                     std::span<const uint8_t> end, InternalVersionT readVersion,
-                    ConflictSet::Impl *impl) {
-  return checkRangeReadImpl<false>(n, begin, end, readVersion, impl);
+                    ConflictSet::Impl *impl, ReadContext *tls) {
+  return checkRangeReadImpl<false>(n, begin, end, readVersion, impl, tls);
 }
 #else
 __attribute__((target("default"))) bool
@@ -2788,18 +2828,16 @@ checkRangeRead(Node *n, std::span<const uint8_t> begin,
 }
 #endif
 
-thread_local double insert_iterations_accum;
-
 // Returns a pointer to the newly inserted node.  Caller must set
 // `entryPresent`, `entry` fields and `maxVersion` on the result.  The search
 // path of the result's parent will have `maxVersion` at least `writeVersion` as
 // a postcondition. Nodes along the search path to `key` may be invalidated.
 template <bool kBegin>
-[[nodiscard]] Node *
-insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
-       NodeAllocators *allocators, ConflictSet::Impl *impl) {
+[[nodiscard]] Node *insert(Node **self, std::span<const uint8_t> key,
+                           InternalVersionT writeVersion, WriteContext *tls,
+                           ConflictSet::Impl *impl) {
 
-  for (;; ++insert_iterations_accum) {
+  for (;; ++tls->insert_iterations_accum) {
 
     if ((*self)->partialKeyLen > 0) {
       // Handle an existing partial key
@@ -2811,7 +2849,7 @@ insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
         InternalVersionT oldMaxVersion = maxVersion(old, impl);
 
         // *self will have one child
-        *self = allocators->node3.allocate(partialKeyIndex);
+        *self = tls->allocate<Node3>(partialKeyIndex);
 
         memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin,
                kNodeCopySize);
@@ -2824,8 +2862,7 @@ insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
         memcpy((*self)->partialKey(), old->partialKey(),
                (*self)->partialKeyLen);
 
-        getOrCreateChild(*self, old->partialKey()[partialKeyIndex],
-                         allocators) = old;
+        getOrCreateChild(*self, old->partialKey()[partialKeyIndex], tls) = old;
         old->parent = *self;
         old->parentsIndex = old->partialKey()[partialKeyIndex];
         setMaxVersion(old, impl, oldMaxVersion);
@@ -2865,9 +2902,9 @@ insert(Node **self, std::span<const uint8_t> key, InternalVersionT writeVersion,
       setMaxVersion(*self, impl, writeVersion);
     }
 
-    auto &child = getOrCreateChild(*self, key.front(), allocators);
+    auto &child = getOrCreateChild(*self, key.front(), tls);
     if (!child) {
-      child = allocators->node0.allocate(key.size() - 1);
+      child = tls->allocate<Node0>(key.size() - 1);
       child->numChildren = 0;
       child->entryPresent = false;
       child->partialKeyLen = 0;
@@ -2908,14 +2945,12 @@ void destroyTree(Node *root) {
   }
 }
 
-thread_local double entries_inserted_accum;
-
 void addPointWrite(Node *&root, std::span<const uint8_t> key,
-                   InternalVersionT writeVersion, NodeAllocators *allocators,
+                   InternalVersionT writeVersion, WriteContext *tls,
                    ConflictSet::Impl *impl) {
-  auto *n = insert<true>(&root, key, writeVersion, allocators, impl);
+  auto *n = insert<true>(&root, key, writeVersion, tls, impl);
   if (!n->entryPresent) {
-    ++entries_inserted_accum;
+    ++tls->entries_inserted_accum;
     auto *p = nextLogical(n);
 
     addKey(n);
@@ -2934,13 +2969,13 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
 
 void addWriteRange(Node *&root, std::span<const uint8_t> begin,
                    std::span<const uint8_t> end, InternalVersionT writeVersion,
-                   NodeAllocators *allocators, ConflictSet::Impl *impl) {
+                   WriteContext *tls, ConflictSet::Impl *impl) {
 
   int lcp = longestCommonPrefix(begin.data(), end.data(),
                                 std::min(begin.size(), end.size()));
   if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
       end.back() == 0) {
-    return addPointWrite(root, begin, writeVersion, allocators, impl);
+    return addPointWrite(root, begin, writeVersion, tls, impl);
   }
   const bool beginIsPrefix = lcp == int(begin.size());
   auto remaining = begin.subspan(0, lcp);
@@ -2977,8 +3012,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
   begin = begin.subspan(consumed, begin.size() - consumed);
   end = end.subspan(consumed, end.size() - consumed);
 
-  auto *beginNode =
-      insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
+  auto *beginNode = insert<true>(useAsRoot, begin, writeVersion, tls, impl);
 
   const bool insertedBegin = !beginNode->entryPresent;
 
@@ -2986,7 +3020,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
   beginNode->entryPresent = true;
 
   if (insertedBegin) {
-    ++entries_inserted_accum;
+    ++tls->entries_inserted_accum;
     auto *p = nextLogical(beginNode);
     beginNode->entry.rangeVersion =
         p == nullptr ? InternalVersionT::zero
@@ -2999,7 +3033,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
   assert(writeVersion >= beginNode->entry.pointVersion);
   beginNode->entry.pointVersion = writeVersion;
 
-  auto *endNode = insert<false>(useAsRoot, end, writeVersion, allocators, impl);
+  auto *endNode = insert<false>(useAsRoot, end, writeVersion, tls, impl);
 
   const bool insertedEnd = !endNode->entryPresent;
 
@@ -3007,7 +3041,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
   endNode->entryPresent = true;
 
   if (insertedEnd) {
-    ++entries_inserted_accum;
+    ++tls->entries_inserted_accum;
     auto *p = nextLogical(endNode);
     endNode->entry.pointVersion =
         p == nullptr ? InternalVersionT::zero
@@ -3021,13 +3055,12 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
   if (beginIsPrefix && insertedEnd) {
     // beginNode may have been invalidated when inserting end. TODO can we do
     // better?
-    beginNode = insert<true>(useAsRoot, begin, writeVersion, allocators, impl);
+    beginNode = insert<true>(useAsRoot, begin, writeVersion, tls, impl);
     assert(beginNode->entryPresent);
   }
 
   for (beginNode = nextLogical(beginNode); beginNode != endNode;
-       beginNode =
-           erase(beginNode, allocators, impl, /*logical*/ true, endNode)) {
+       beginNode = erase(beginNode, tls, impl, /*logical*/ true, endNode)) {
   }
 }
 
@@ -3093,6 +3126,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
     timespec ts_begin;
     clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_begin);
 #endif
+    ReadContext tls;
     int commits_accum = 0;
     int conflicts_accum = 0;
     int too_olds_accum = 0;
@@ -3108,32 +3142,34 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
           reads[i].readVersion < oldestVersionFullPrecision ? TooOld
           : (end.size() > 0
                  ? checkRangeRead(root, begin, end,
-                                  InternalVersionT(reads[i].readVersion), this)
+                                  InternalVersionT(reads[i].readVersion), this,
+                                  &tls)
                  : checkPointRead(root, begin,
-                                  InternalVersionT(reads[i].readVersion), this))
+                                  InternalVersionT(reads[i].readVersion), this,
+                                  &tls))
               ? Commit
               : Conflict;
       commits_accum += result[i] == Commit;
       conflicts_accum += result[i] == Conflict;
       too_olds_accum += result[i] == TooOld;
     }
-    point_read_total.add(std::exchange(point_read_accum, 0));
-    prefix_read_total.add(std::exchange(prefix_read_accum, 0));
-    range_read_total.add(std::exchange(range_read_accum, 0));
+    point_read_total.add(std::exchange(tls.point_read_accum, 0));
+    prefix_read_total.add(std::exchange(tls.prefix_read_accum, 0));
+    range_read_total.add(std::exchange(tls.range_read_accum, 0));
     range_read_node_scan_total.add(
-        std::exchange(range_read_node_scan_accum, 0));
+        std::exchange(tls.range_read_node_scan_accum, 0));
     point_read_short_circuit_total.add(
-        std::exchange(point_read_short_circuit_accum, 0));
+        std::exchange(tls.point_read_short_circuit_accum, 0));
     prefix_read_short_circuit_total.add(
-        std::exchange(prefix_read_short_circuit_accum, 0));
+        std::exchange(tls.prefix_read_short_circuit_accum, 0));
     range_read_short_circuit_total.add(
-        std::exchange(range_read_short_circuit_accum, 0));
+        std::exchange(tls.range_read_short_circuit_accum, 0));
     point_read_iterations_total.add(
-        std::exchange(point_read_iterations_accum, 0));
+        std::exchange(tls.point_read_iterations_accum, 0));
     prefix_read_iterations_total.add(
-        std::exchange(prefix_read_iterations_accum, 0));
+        std::exchange(tls.prefix_read_iterations_accum, 0));
     range_read_iterations_total.add(
-        std::exchange(range_read_iterations_accum, 0));
+        std::exchange(tls.range_read_iterations_accum, 0));
     commits_total.add(commits_accum);
     conflicts_total.add(conflicts_accum);
     too_olds_total.add(too_olds_accum);
@@ -3173,21 +3209,20 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
       auto end = std::span<const uint8_t>(w.end.p, w.end.len);
       if (w.end.len > 0) {
         keyUpdates += 3;
-        addWriteRange(root, begin, end, InternalVersionT(writeVersion),
-                      &allocators, this);
+        addWriteRange(root, begin, end, InternalVersionT(writeVersion), &tls,
+                      this);
       } else {
         keyUpdates += 2;
-        addPointWrite(root, begin, InternalVersionT(writeVersion), &allocators,
-                      this);
+        addPointWrite(root, begin, InternalVersionT(writeVersion), &tls, this);
       }
     }
 
     memory_bytes.set(totalBytes);
-    nodes_allocated_total.add(std::exchange(nodes_allocated_accum, 0));
-    nodes_released_total.add(std::exchange(nodes_released_accum, 0));
-    entries_inserted_total.add(std::exchange(entries_inserted_accum, 0));
-    entries_erased_total.add(std::exchange(entries_erased_accum, 0));
-    insert_iterations_total.add(std::exchange(insert_iterations_accum, 0));
+    nodes_allocated_total.add(std::exchange(tls.nodes_allocated_accum, 0));
+    nodes_released_total.add(std::exchange(tls.nodes_released_accum, 0));
+    entries_inserted_total.add(std::exchange(tls.entries_inserted_accum, 0));
+    entries_erased_total.add(std::exchange(tls.entries_erased_accum, 0));
+    insert_iterations_total.add(std::exchange(tls.insert_iterations_accum, 0));
     write_bytes_total.add(write_byte_accum);
   }
 
@@ -3219,9 +3254,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
         // node is greater than the point version of the left node
         assert(n->entry.rangeVersion <= oldestVersion);
         Node *dummy = nullptr;
-        n = erase(n, &allocators, this, /*logical*/ false, dummy);
+        n = erase(n, &tls, this, /*logical*/ false, dummy);
       } else {
-        maybeDecreaseCapacity(n, &allocators, this);
+        maybeDecreaseCapacity(n, &tls, this);
         n = nextPhysical(n);
       }
     }
@@ -3262,10 +3297,10 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
     keyUpdates = gcScanStep(keyUpdates);
 
     memory_bytes.set(totalBytes);
-    nodes_allocated_total.add(std::exchange(nodes_allocated_accum, 0));
-    nodes_released_total.add(std::exchange(nodes_released_accum, 0));
-    entries_inserted_total.add(std::exchange(entries_inserted_accum, 0));
-    entries_erased_total.add(std::exchange(entries_erased_accum, 0));
+    nodes_allocated_total.add(std::exchange(tls.nodes_allocated_accum, 0));
+    nodes_released_total.add(std::exchange(tls.nodes_released_accum, 0));
+    entries_inserted_total.add(std::exchange(tls.entries_inserted_accum, 0));
+    entries_erased_total.add(std::exchange(tls.entries_erased_accum, 0));
     oldest_version.set(oldestVersionFullPrecision);
   }
 
@@ -3278,15 +3313,15 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
     oldest_version.set(oldestVersionFullPrecision);
     newest_version.set(newestVersionFullPrecision);
 
-    allocators.~NodeAllocators();
-    new (&allocators) NodeAllocators();
+    tls.~WriteContext();
+    new (&tls) WriteContext();
 
     removalKeyArena = Arena{};
     removalKey = {};
     keyUpdates = 10;
 
     // Insert ""
-    root = allocators.node0.allocate(0);
+    root = tls.allocate<Node0>(0);
     root->numChildren = 0;
     root->parent = nullptr;
     rootMaxVersion = this->oldestVersion;
@@ -3313,7 +3348,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
     safe_free(metrics, metricsCount * sizeof(metrics[0]));
   }
 
-  NodeAllocators allocators;
+  WriteContext tls;
 
   Arena removalKeyArena;
   std::span<const uint8_t> removalKey;