Reordering seems to improve codegen

WIP
Use preserve_none and put continuation array in CheckAll
2024-09-23 15:28:51 -07:00 · 2024-09-23 15:11:48 -07:00 · 2024-09-23 14:53:16 -07:00 · 2024-09-23 13:32:56 -07:00 · 2024-09-23 12:52:30 -07:00 · 2024-09-21 14:28:15 -07:00
98 changed files with 643 additions and 91 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,6 +24,14 @@ repos:
        entry: "^#define SHOW_MEMORY 1$"
        language: pygrep
        types: [c++]
+  - repo: local
+    hooks:
+      - id: sim cache misses check
+        name: disallow checking in SIM_CACHE_MISSES=1
+        description: disallow checking in SIM_CACHE_MISSES=1
+        entry: "^#define SIM_CACHE_MISSES 1$"
+        language: pygrep
+        types: [c++]
  - repo: https://github.com/shellcheck-py/shellcheck-py
    rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1
    hooks:
--- a/Bench.cpp
+++ b/Bench.cpp
@@ -7,7 +7,6 @@
 void showMemory(const ConflictSet &cs);
 #endif

-#define ANKERL_NANOBENCH_IMPLEMENT
 #include "third_party/nanobench.h"

 constexpr int kNumKeys = 1000000;
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,12 +72,6 @@ else()
  add_link_options(-Wl,--gc-sections)
 endif()

-if(EMSCRIPTEN)
-  # https://github.com/emscripten-core/emscripten/issues/15377#issuecomment-1285167486
-  add_link_options(-lnodefs.js -lnoderawfs.js)
-  add_link_options(-s ALLOW_MEMORY_GROWTH)
-endif()
-
 if(NOT USE_SIMD_FALLBACK)
  cmake_push_check_state()
  list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
@@ -144,6 +138,8 @@ include(CTest)
 # disable tests if this is being used through e.g. FetchContent
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)

+  add_library(nanobench ${CMAKE_CURRENT_SOURCE_DIR}/nanobench.cpp)
+
  set(TEST_FLAGS -Wall -Wextra -Wunreachable-code -Wpedantic -UNDEBUG)

  # corpus tests, which are tests curated by libfuzzer. The goal is to get broad
@@ -191,6 +187,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
  target_include_directories(conflict_set_main
                             PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
  target_compile_definitions(conflict_set_main PRIVATE ENABLE_MAIN)
+  target_link_libraries(conflict_set_main PRIVATE nanobench)

  if(NOT APPLE)
    # libfuzzer target, to generate/manage corpus
@@ -336,7 +333,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)

  # bench
  add_executable(conflict_set_bench Bench.cpp)
-  target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME})
+  target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME} nanobench)
  set_target_properties(conflict_set_bench PROPERTIES SKIP_BUILD_RPATH ON)
  add_executable(real_data_bench RealDataBench.cpp)
  target_link_libraries(real_data_bench PRIVATE ${PROJECT_NAME})
@@ -351,6 +348,14 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
  add_executable(server_bench ServerBench.cpp)
  target_link_libraries(server_bench PRIVATE ${PROJECT_NAME})
  set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
+
+  add_executable(interleaving_test InterleavingTest.cpp)
+  # work around lack of musttail for gcc
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE STREQUAL "Debug")
+    target_compile_options(interleaving_test PRIVATE -Og
+                                                     -foptimize-sibling-calls)
+  endif()
+  target_link_libraries(interleaving_test PRIVATE nanobench)
 endif()

 # packaging
--- a/ConflictSet.cpp
+++ b/ConflictSet.cpp
@@ -48,6 +48,17 @@ limitations under the License.
 #endif
 #endif

+#define SIM_CACHE_MISSES 0
+#if SIM_CACHE_MISSES
+constexpr void simCacheMiss(void *x) {
+  if (x) {
+    _mm_clflush(x);
+  }
+}
+#else
+constexpr void simCacheMiss(void *) {}
+#endif
+
 #include <memcheck.h>

 using namespace weaselab;
@@ -836,21 +847,29 @@ int getNodeIndex(Node16 *self, uint8_t index) {

 // Precondition - an entry for index must exist in the node
 Node *&getChildExists(Node3 *self, uint8_t index) {
-  return self->children[getNodeIndex(self, index)];
+  auto &result = self->children[getNodeIndex(self, index)];
+  simCacheMiss(result);
+  return result;
 }
 // Precondition - an entry for index must exist in the node
 Node *&getChildExists(Node16 *self, uint8_t index) {
-  return self->children[getNodeIndex(self, index)];
+  auto &result = self->children[getNodeIndex(self, index)];
+  simCacheMiss(result);
+  return result;
 }
 // Precondition - an entry for index must exist in the node
 Node *&getChildExists(Node48 *self, uint8_t index) {
  assert(self->bitSet.test(index));
-  return self->children[self->index[index]];
+  auto &result = self->children[self->index[index]];
+  simCacheMiss(result);
+  return result;
 }
 // Precondition - an entry for index must exist in the node
 Node *&getChildExists(Node256 *self, uint8_t index) {
  assert(self->bitSet.test(index));
-  return self->children[index];
+  auto &result = self->children[index];
+  simCacheMiss(result);
+  return result;
 }

 // Precondition - an entry for index must exist in the node
@@ -1030,6 +1049,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
  if (i < 0) {
    return {};
  }
+  simCacheMiss(self->children[i]);
  return {self->children[i], self->childMaxVersion[i]};
 }
 ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
@@ -1037,6 +1057,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
  if (i < 0) {
    return {};
  }
+  simCacheMiss(self->children[i]);
  return {self->children[i], self->childMaxVersion[i]};
 }
 ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
@@ -1044,9 +1065,11 @@ ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
  if (i < 0) {
    return {};
  }
+  simCacheMiss(self->children[i]);
  return {self->children[i], self->childMaxVersion[i]};
 }
 ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
+  simCacheMiss(self->children[index]);
  return {self->children[index], self->childMaxVersion[index]};
 }

@@ -1072,6 +1095,7 @@ Node *getChildGeq(Node0 *, int) { return nullptr; }
 Node *getChildGeq(Node3 *n, int child) {
  for (int i = 0; i < n->numChildren; ++i) {
    if (n->index[i] >= child) {
+      simCacheMiss(n->children[i]);
      return n->children[i];
    }
  }
@@ -1090,7 +1114,10 @@ Node *getChildGeq(Node16 *self, int child) {
  __m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
  int mask = (1 << self->numChildren) - 1;
  uint32_t bitfield = _mm_movemask_epi8(results) & mask;
-  return bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
+  auto *result =
+      bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
+  simCacheMiss(result);
+  return result;
 #elif defined(HAS_ARM_NEON)
  uint8x16_t indices;
  memcpy(&indices, self->index, sizeof(self->index));
@@ -1126,13 +1153,16 @@ Node *getChildGeq(Node48 *self, int child) {
  if (c < 0) {
    return nullptr;
  }
-  return self->children[self->index[c]];
+  auto *result = self->children[self->index[c]];
+  simCacheMiss(result);
+  return result;
 }
 Node *getChildGeq(Node256 *self, int child) {
  int c = self->bitSet.firstSetGeq(child);
  if (c < 0) {
    return nullptr;
  }
+  simCacheMiss(self->children[c]);
  return self->children[c];
 }

@@ -1156,20 +1186,26 @@ Node *getChildGeq(Node *self, int child) {
 // Precondition: self has a child
 Node *getFirstChildExists(Node3 *self) {
  assert(self->numChildren > 0);
+  simCacheMiss(self->children[0]);
  return self->children[0];
 }
 // Precondition: self has a child
 Node *getFirstChildExists(Node16 *self) {
  assert(self->numChildren > 0);
+  simCacheMiss(self->children[0]);
  return self->children[0];
 }
 // Precondition: self has a child
 Node *getFirstChildExists(Node48 *self) {
-  return self->children[self->index[self->bitSet.firstSetGeq(0)]];
+  auto *result = self->children[self->index[self->bitSet.firstSetGeq(0)]];
+  simCacheMiss(result);
+  return result;
 }
 // Precondition: self has a child
 Node *getFirstChildExists(Node256 *self) {
-  return self->children[self->bitSet.firstSetGeq(0)];
+  auto *result = self->children[self->bitSet.firstSetGeq(0)];
+  simCacheMiss(result);
+  return result;
 }

 // Precondition: self has a child
@@ -3009,34 +3045,288 @@ Node *firstGeqPhysical(Node *n, const std::span<const uint8_t> key) {
  }
 }

+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#if __has_attribute(musttail)
+#define MUSTTAIL __attribute__((musttail))
+#else
+#define MUSTTAIL
+#endif
+
+#if __has_attribute(preserve_none)
+#define CONTINUATION_CALLING_CONVENTION __attribute__((preserve_none))
+#else
+#define CONTINUATION_CALLING_CONVENTION
+#endif
+
+typedef CONTINUATION_CALLING_CONVENTION void (*continuation)(struct CheckAll *,
+                                                             int64_t prevJob,
+                                                             int64_t job,
+                                                             int64_t started,
+                                                             int64_t count);
+
+// State relevant to a particular query
+struct CheckJob {
+  void setResult(bool ok) {
+    *result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
+  }
+
+  [[nodiscard]] continuation init(const ConflictSet::ReadRange *read,
+                                  ConflictSet::Result *result, Node *root,
+                                  int64_t oldestVersionFullPrecision,
+                                  ReadContext *tls);
+
+  Node *n;
+  ChildAndMaxVersion childAndVersion;
+  std::span<const uint8_t> begin;
+  InternalVersionT readVersion;
+  ConflictSet::Result *result;
+};
+
+// State relevant to all queries
+struct CheckAll {
+  constexpr static int kConcurrent = 32;
+  CheckJob inProgress[kConcurrent];
+  continuation next[kConcurrent];
+  int nextJob[kConcurrent];
+  Node *root;
+  int64_t oldestVersionFullPrecision;
+  ReadContext *tls;
+  const ConflictSet::ReadRange *queries;
+  ConflictSet::Result *results;
+};
+
+CONTINUATION_CALLING_CONVENTION void keepGoing(CheckAll *context,
+                                               int64_t prevJob, int64_t job,
+                                               int64_t started, int64_t count) {
+  prevJob = job;
+  job = context->nextJob[job];
+  MUSTTAIL return context->next[job](context, prevJob, job, started, count);
+}
+
+CONTINUATION_CALLING_CONVENTION void complete(CheckAll *context,
+                                              int64_t prevJob, int64_t job,
+                                              int64_t started, int64_t count) {
+  if (started == count) {
+    if (prevJob == job) {
+      return;
+    }
+    context->nextJob[prevJob] = context->nextJob[job];
+    job = prevJob;
+  } else {
+    int temp = started++;
+    context->next[job] = context->inProgress[job].init(
+        context->queries + temp, context->results + temp, context->root,
+        context->oldestVersionFullPrecision, context->tls);
+  }
+  MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+}
+
+namespace check_point_read_state_machine {
+
+CONTINUATION_CALLING_CONVENTION void
+down_left_spine(struct CheckAll *, int64_t prevJob, int64_t job,
+                int64_t started, int64_t count);
+CONTINUATION_CALLING_CONVENTION void iter(struct CheckAll *, int64_t prevJob,
+                                          int64_t job, int64_t started,
+                                          int64_t count);
+CONTINUATION_CALLING_CONVENTION void begin(struct CheckAll *, int64_t prevJob,
+                                           int64_t job, int64_t started,
+                                           int64_t count);
+
+void begin(struct CheckAll *context, int64_t prevJob, int64_t job,
+           int64_t started, int64_t count) {
+  ++context->tls->point_read_accum;
+#if DEBUG_VERBOSE && !defined(NDEBUG)
+  fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
+#endif
+  auto *j = context->inProgress + job;
+
+  if (j->begin.size() == 0) {
+    if (j->n->entryPresent) {
+      j->setResult(j->n->entry.pointVersion <= j->readVersion);
+      MUSTTAIL return complete(context, prevJob, job, started, count);
+    }
+    j->n = getFirstChildExists(j->n);
+    context->next[job] = down_left_spine;
+    __builtin_prefetch(j->n);
+    MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+  }
+
+  j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
+  context->next[job] = iter;
+  __builtin_prefetch(j->childAndVersion.child);
+  MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+}
+
+void iter(struct CheckAll *context, int64_t prevJob, int64_t job,
+          int64_t started, int64_t count) {
+  auto *j = context->inProgress + job;
+  if (j->childAndVersion.child == nullptr) {
+    auto c = getChildGeq(j->n, j->begin[0]);
+    if (c != nullptr) {
+      j->n = c;
+      context->next[job] = down_left_spine;
+      __builtin_prefetch(j->n);
+      MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+    } else {
+      j->n = nextSibling(j->n);
+      if (j->n == nullptr) {
+        j->setResult(true);
+        MUSTTAIL return complete(context, prevJob, job, started, count);
+      }
+      context->next[job] = down_left_spine;
+      __builtin_prefetch(j->n);
+      MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+    }
+  }
+
+  j->n = j->childAndVersion.child;
+  j->begin = j->begin.subspan(1, j->begin.size() - 1);
+
+  if (j->n->partialKeyLen > 0) {
+    int commonLen = std::min<int>(j->n->partialKeyLen, j->begin.size());
+    int i = longestCommonPrefix(j->n->partialKey(), j->begin.data(), commonLen);
+    if (i < commonLen) {
+      auto c = j->n->partialKey()[i] <=> j->begin[i];
+      if (c > 0) {
+        context->next[job] = down_left_spine;
+        MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
+      } else {
+        j->n = nextSibling(j->n);
+        if (j->n == nullptr) {
+          j->setResult(true);
+          MUSTTAIL return complete(context, prevJob, job, started, count);
+        }
+        context->next[job] = down_left_spine;
+        __builtin_prefetch(j->n);
+        MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+      }
+    }
+    if (commonLen == j->n->partialKeyLen) {
+      // partial key matches
+      j->begin = j->begin.subspan(commonLen, j->begin.size() - commonLen);
+    } else if (j->n->partialKeyLen > int(j->begin.size())) {
+      // n is the first physical node greater than remaining, and there's no
+      // eq node
+      context->next[job] = down_left_spine;
+      MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
+    }
+  }
+
+  if (j->childAndVersion.maxVersion <= j->readVersion) {
+    ++context->tls->point_read_short_circuit_accum;
+    j->setResult(true);
+    MUSTTAIL return complete(context, prevJob, job, started, count);
+  }
+
+  ++context->tls->point_read_iterations_accum;
+
+  if (j->begin.size() == 0) {
+    if (j->n->entryPresent) {
+      j->setResult(j->n->entry.pointVersion <= j->readVersion);
+      MUSTTAIL return complete(context, prevJob, job, started, count);
+    }
+    j->n = getFirstChildExists(j->n);
+    context->next[job] = down_left_spine;
+    __builtin_prefetch(j->n);
+    MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+  }
+
+  j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
+  __builtin_prefetch(j->childAndVersion.child);
+  // j->next is already iter
+  MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+}
+
+void down_left_spine(struct CheckAll *context, int64_t prevJob, int64_t job,
+                     int64_t started, int64_t count) {
+  auto *j = context->inProgress + job;
+  if (j->n->entryPresent) {
+    j->setResult(j->n->entry.rangeVersion <= j->readVersion);
+    MUSTTAIL return complete(context, prevJob, job, started, count);
+  }
+  j->n = getFirstChildExists(j->n);
+  __builtin_prefetch(j->n);
+  // j->next is already down_left_spine
+  MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+}
+
+} // namespace check_point_read_state_machine
+
+continuation CheckJob::init(const ConflictSet::ReadRange *read,
+                            ConflictSet::Result *result, Node *root,
+                            int64_t oldestVersionFullPrecision,
+                            ReadContext *tls) {
+  auto begin = std::span<const uint8_t>(read->begin.p, read->begin.len);
+  auto end = std::span<const uint8_t>(read->end.p, read->end.len);
+  if (read->readVersion < oldestVersionFullPrecision) {
+    *result = ConflictSet::TooOld;
+    return complete;
+  } else if (end.size() == 0) {
+    this->begin = begin;
+    this->n = root;
+    this->readVersion = InternalVersionT(read->readVersion);
+    this->result = result;
+    return check_point_read_state_machine::begin;
+    // *result =
+    //     checkPointRead(root, begin, InternalVersionT(read->readVersion), tls)
+    //         ? ConflictSet::Commit
+    //         : ConflictSet::Conflict;
+    // return complete;
+  } else {
+    *result = checkRangeRead(root, begin, end,
+                             InternalVersionT(read->readVersion), tls)
+                  ? ConflictSet::Commit
+                  : ConflictSet::Conflict;
+    return complete;
+  }
+}
+
 struct __attribute__((visibility("hidden"))) ConflictSet::Impl {

  void check(const ReadRange *reads, Result *result, int count) {
+    assert(oldestVersionFullPrecision >=
+           newestVersionFullPrecision - kNominalVersionWindow);
+
+    if (count == 0) {
+      return;
+    }
+
    ReadContext tls;
    tls.impl = this;
    int64_t check_byte_accum = 0;
+
+    CheckAll context;
+    context.oldestVersionFullPrecision = oldestVersionFullPrecision;
+    context.queries = reads;
+    context.results = result;
+    context.root = root;
+    context.tls = &tls;
+
+    int64_t started = std::min(context.kConcurrent, count);
+    for (int i = 0; i < started; i++) {
+      context.next[i] = context.inProgress[i].init(
+          reads + i, result + i, root, oldestVersionFullPrecision, &tls);
+      context.nextJob[i] = i + 1;
+    }
+    context.nextJob[started - 1] = 0;
+    int prevJob = started - 1;
+    int job = 0;
+    context.next[job](&context, prevJob, job, started, count);
+
    for (int i = 0; i < count; ++i) {
      assert(reads[i].readVersion >= 0);
      assert(reads[i].readVersion <= newestVersionFullPrecision);
      const auto &r = reads[i];
      check_byte_accum += r.begin.len + r.end.len;
-      auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
-      auto end = std::span<const uint8_t>(r.end.p, r.end.len);
-      assert(oldestVersionFullPrecision >=
-             newestVersionFullPrecision - kNominalVersionWindow);
-      result[i] =
-          reads[i].readVersion < oldestVersionFullPrecision ? TooOld
-          : (end.size() > 0
-                 ? checkRangeRead(root, begin, end,
-                                  InternalVersionT(reads[i].readVersion), &tls)
-                 : checkPointRead(root, begin,
-                                  InternalVersionT(reads[i].readVersion), &tls))
-              ? Commit
-              : Conflict;
      tls.commits_accum += result[i] == Commit;
      tls.conflicts_accum += result[i] == Conflict;
      tls.too_olds_accum += result[i] == TooOld;
    }
+
    point_read_total.add(tls.point_read_accum);
    prefix_read_total.add(tls.prefix_read_accum);
    range_read_total.add(tls.range_read_accum);
@@ -3923,7 +4213,6 @@ struct __attribute__((visibility("default"))) PeakPrinter {

 #ifdef ENABLE_MAIN

-#define ANKERL_NANOBENCH_IMPLEMENT
 #include "third_party/nanobench.h"

 template <int kN> void benchRezero() {
@@ -3979,6 +4268,17 @@ template <int kN> void benchScan2() {
  });
 }

+void benchLCP(int len) {
+  ankerl::nanobench::Bench bench;
+  std::vector<uint8_t> lhs(len);
+  std::vector<uint8_t> rhs(len);
+  bench.run("lcp " + std::to_string(len), [&]() {
+    bench.doNotOptimizeAway(lhs);
+    bench.doNotOptimizeAway(rhs);
+    bench.doNotOptimizeAway(longestCommonPrefix(lhs.data(), rhs.data(), len));
+  });
+}
+
 void printTree() {
  int64_t writeVersion = 0;
  ConflictSet::Impl cs{writeVersion};
@@ -4000,7 +4300,11 @@ void printTree() {
  debugPrintDot(stdout, cs.root, &cs);
 }

-int main(void) { benchScan1<16>(); }
+int main(void) {
+  for (int i = 0; i < 256; ++i) {
+    benchLCP(i);
+  }
+}
 #endif

 #ifdef ENABLE_FUZZ
--- a/InterleavingTest.cpp
+++ b/InterleavingTest.cpp
@@ -0,0 +1,256 @@
+#include <alloca.h>
+#include <cassert>
+#ifdef __x86_64__
+#include <immintrin.h>
+#endif
+
+#include "third_party/nanobench.h"
+
+struct Job {
+  int *input;
+  // Returned void* is a function pointer to the next continuation. We have to
+  // use void* because otherwise the type would be recursive.
+  typedef void *(*continuation)(Job *);
+  continuation next;
+};
+
+void *stepJob(Job *j) {
+  auto done = --(*j->input) == 0;
+#ifdef __x86_64__
+  _mm_clflush(j->input);
+#endif
+  return done ? nullptr : (void *)stepJob;
+}
+
+void sequential(Job **jobs, int count) {
+  for (int i = 0; i < count; ++i) {
+    do {
+      jobs[i]->next = (Job::continuation)jobs[i]->next(jobs[i]);
+    } while (jobs[i]->next);
+  }
+}
+
+void sequentialNoFuncPtr(Job **jobs, int count) {
+  for (int i = 0; i < count; ++i) {
+    while (stepJob(jobs[i]))
+      ;
+  }
+}
+
+void interleaveSwapping(Job **jobs, int remaining) {
+  int current = 0;
+  while (remaining > 0) {
+    auto next = (Job::continuation)jobs[current]->next(jobs[current]);
+    jobs[current]->next = next;
+    if (next == nullptr) {
+      jobs[current] = jobs[remaining - 1];
+      --remaining;
+    } else {
+      ++current;
+    }
+    if (current == remaining) {
+      current = 0;
+    }
+  }
+}
+
+void interleaveBoundedCyclicList(Job **jobs, int count) {
+  if (count == 0) {
+    return;
+  }
+
+  constexpr int kConcurrent = 32;
+  Job *inProgress[kConcurrent];
+  int nextJob[kConcurrent];
+
+  int started = std::min(kConcurrent, count);
+  for (int i = 0; i < started; i++) {
+    inProgress[i] = jobs[i];
+    nextJob[i] = i + 1;
+  }
+  nextJob[started - 1] = 0;
+
+  int prevJob = started - 1;
+  int job = 0;
+  for (;;) {
+    auto next = (Job::continuation)inProgress[job]->next(inProgress[job]);
+    inProgress[job]->next = next;
+    if (next == nullptr) {
+      if (started == count) {
+        if (prevJob == job)
+          break;
+        nextJob[prevJob] = nextJob[job];
+        job = prevJob;
+      } else {
+        int temp = started++;
+        inProgress[job] = jobs[temp];
+      }
+    }
+    prevJob = job;
+    job = nextJob[job];
+  }
+}
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#if __has_attribute(musttail)
+#define MUSTTAIL __attribute__((musttail))
+#else
+#define MUSTTAIL
+#endif
+
+struct Context {
+  constexpr static int kConcurrent = 32;
+  Job **jobs;
+  Job *inProgress[kConcurrent];
+  void (*continuation[kConcurrent])(Context *, int64_t prevJob, int64_t job,
+                                    int64_t started, int64_t count);
+  int nextJob[kConcurrent];
+};
+
+void keepGoing(Context *context, int64_t prevJob, int64_t job, int64_t started,
+               int64_t count) {
+  prevJob = job;
+  job = context->nextJob[job];
+  MUSTTAIL return context->continuation[job](context, prevJob, job, started,
+                                             count);
+}
+
+void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
+                     int64_t started, int64_t count);
+
+void complete(Context *context, int64_t prevJob, int64_t job, int64_t started,
+              int64_t count) {
+  if (started == count) {
+    if (prevJob == job) {
+      return;
+    }
+    context->nextJob[prevJob] = context->nextJob[job];
+    job = prevJob;
+  } else {
+    context->inProgress[job] = context->jobs[started++];
+    context->continuation[job] = stepJobTailCall;
+  }
+  prevJob = job;
+  job = context->nextJob[job];
+  MUSTTAIL return context->continuation[job](context, prevJob, job, started,
+                                             count);
+}
+
+void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
+                     int64_t started, int64_t count) {
+  auto *j = context->inProgress[job];
+  auto done = --(*j->input) == 0;
+#ifdef __x86_64__
+  _mm_clflush(j->input);
+#endif
+  if (done) {
+    MUSTTAIL return complete(context, prevJob, job, started, count);
+  } else {
+    context->continuation[job] = stepJobTailCall;
+    MUSTTAIL return keepGoing(context, prevJob, job, started, count);
+  }
+}
+
+void useTailCalls(Job **jobs, int count) {
+  if (count == 0) {
+    return;
+  }
+  Context context;
+  context.jobs = jobs;
+  int64_t started = std::min(Context::kConcurrent, count);
+  for (int i = 0; i < started; i++) {
+    context.inProgress[i] = jobs[i];
+    context.nextJob[i] = i + 1;
+    context.continuation[i] = stepJobTailCall;
+  }
+  context.nextJob[started - 1] = 0;
+  int prevJob = started - 1;
+  int job = 0;
+  return context.continuation[job](&context, prevJob, job, started, count);
+}
+
+void interleaveCyclicList(Job **jobs, int count) {
+  auto *nextJob = (int *)alloca(sizeof(int) * count);
+
+  for (int i = 0; i < count - 1; ++i) {
+    nextJob[i] = i + 1;
+  }
+  nextJob[count - 1] = 0;
+
+  int prevJob = count - 1;
+  int job = 0;
+  for (;;) {
+    auto next = (Job::continuation)jobs[job]->next(jobs[job]);
+    jobs[job]->next = next;
+    if (next == nullptr) {
+      if (prevJob == job)
+        break;
+      nextJob[prevJob] = nextJob[job];
+      job = prevJob;
+    }
+    prevJob = job;
+    job = nextJob[job];
+  }
+}
+
+int main() {
+  ankerl::nanobench::Bench bench;
+
+  constexpr int kNumJobs = 10000;
+  bench.relative(true);
+
+  Job jobs[kNumJobs];
+  Job jobsCopy[kNumJobs];
+  int iters = 0;
+  int originalInput[kNumJobs];
+  for (int i = 0; i < kNumJobs; ++i) {
+    originalInput[i] = rand() % 5 + 3;
+    jobs[i].input = new int{originalInput[i]};
+    jobs[i].next = stepJob;
+    iters += *jobs[i].input;
+  }
+  bench.batch(iters);
+
+  for (auto [scheduler, name] :
+       {std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
+        std::make_pair(sequential, "sequential"),
+        std::make_pair(useTailCalls, "useTailCalls"),
+        std::make_pair(interleaveSwapping, "interleavingSwapping"),
+        std::make_pair(interleaveBoundedCyclicList,
+                       "interleaveBoundedCyclicList"),
+        std::make_pair(interleaveCyclicList, "interleaveCyclicList")}) {
+    for (int i = 0; i < kNumJobs; ++i) {
+      *jobs[i].input = originalInput[i];
+    }
+    memcpy(jobsCopy, jobs, sizeof(jobs));
+    Job *ps[kNumJobs];
+    for (int i = 0; i < kNumJobs; ++i) {
+      ps[i] = jobsCopy + i;
+    }
+    scheduler(ps, kNumJobs);
+    for (int i = 0; i < kNumJobs; ++i) {
+      if (*jobsCopy[i].input != 0) {
+        fprintf(stderr, "%s failed\n", name);
+        abort();
+      }
+    }
+
+    bench.run(name, [&]() {
+      for (int i = 0; i < kNumJobs; ++i) {
+        *jobs[i].input = originalInput[i];
+      }
+      memcpy(jobsCopy, jobs, sizeof(jobs));
+      Job *ps[kNumJobs];
+      for (int i = 0; i < kNumJobs; ++i) {
+        ps[i] = jobsCopy + i;
+      }
+      scheduler(ps, kNumJobs);
+    });
+  }
+  for (int i = 0; i < kNumJobs; ++i) {
+    delete jobs[i].input;
+  }
+}
--- a/ServerBench.cpp
+++ b/ServerBench.cpp
@@ -1,4 +1,5 @@
 #include <atomic>
+#include <cstdint>
 #include <errno.h>
 #include <netdb.h>
 #include <stdio.h>
@@ -21,78 +22,55 @@

 std::atomic<int64_t> transactions;

-constexpr int kBaseSearchDepth = 115;
 constexpr int kWindowSize = 10000000;

-std::string numToKey(int64_t num) {
+constexpr int kNumPrefixes = 250000;
+
+std::string makeKey(int64_t num, int suffixLen) {
  std::string result;
-  result.resize(kBaseSearchDepth + sizeof(int64_t));
-  memset(result.data(), 0, kBaseSearchDepth);
+  result.resize(sizeof(int64_t) + suffixLen);
  int64_t be = __builtin_bswap64(num);
-  memcpy(result.data() + kBaseSearchDepth, &be, sizeof(int64_t));
+  memcpy(result.data(), &be, sizeof(int64_t));
+  memset(result.data() + sizeof(int64_t), 0, suffixLen);
  return result;
 }

 void workload(weaselab::ConflictSet *cs) {
  int64_t version = kWindowSize;
-  cs->addWrites(nullptr, 0, version);
+  for (int i = 0; i < kNumPrefixes; ++i) {
+    for (int j = 0; j < 50; ++j) {
+      weaselab::ConflictSet::WriteRange wr;
+      auto k = makeKey(i, j);
+      wr.begin.p = (const uint8_t *)k.data();
+      wr.begin.len = k.size();
+      wr.end.len = 0;
+      cs->addWrites(&wr, 1, version);
+    }
+  }
+  ++version;
+  for (int i = 0; i < kNumPrefixes; ++i) {
+    weaselab::ConflictSet::WriteRange wr;
+    auto k = makeKey(i, 50);
+    wr.begin.p = (const uint8_t *)k.data();
+    wr.begin.len = k.size();
+    wr.end.len = 0;
+    cs->addWrites(&wr, 1, version);
+  }
+
+  std::vector<weaselab::ConflictSet::Result> results(10);
  for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
-    // Reads
-    {
-      auto beginK = numToKey(version - kWindowSize);
-      auto endK = numToKey(version - 1);
-      auto pointRv = version - kWindowSize + rand() % kWindowSize + 1;
-      auto pointK = numToKey(pointRv);
-      weaselab::ConflictSet::ReadRange reads[] = {
-          {
-              {(const uint8_t *)pointK.data(), int(pointK.size())},
-              {nullptr, 0},
-              pointRv,
-          },
-          {
-              {(const uint8_t *)beginK.data(), int(beginK.size())},
-              {(const uint8_t *)endK.data(), int(endK.size())},
-              version - 2,
-          },
-      };
-      weaselab::ConflictSet::Result result[sizeof(reads) / sizeof(reads[0])];
-      cs->check(reads, result, sizeof(reads) / sizeof(reads[0]));
-      // for (int i = 0; i < sizeof(reads) / sizeof(reads[0]); ++i) {
-      //   if (result[i] != weaselab::ConflictSet::Commit) {
-      //     fprintf(stderr, "Unexpected conflict: [%s, %s) @ %" PRId64 "\n",
-      //             printable(reads[i].begin).c_str(),
-      //             printable(reads[i].end).c_str(), reads[i].readVersion);
-      //     abort();
-      //   }
-      // }
+    std::vector<std::string> keys(10);
+    for (auto &k : keys) {
+      k = makeKey(rand() % kNumPrefixes, 49);
    }
-    // Writes
-    {
-      weaselab::ConflictSet::WriteRange w;
-      auto k = numToKey(version);
-      w.begin.p = (const uint8_t *)k.data();
-      w.end.len = 0;
-      if (version % (kWindowSize / 2) == 0) {
-        for (int l = 0; l <= k.size(); ++l) {
-          w.begin.len = l;
-          cs->addWrites(&w, 1, version);
-        }
-      } else {
-        w.begin.len = k.size();
-        cs->addWrites(&w, 1, version);
-        int64_t beginN = version - kWindowSize + rand() % kWindowSize;
-        auto b = numToKey(beginN);
-        auto e = numToKey(beginN + 1000);
-        w.begin.p = (const uint8_t *)b.data();
-        w.begin.len = b.size();
-        w.end.p = (const uint8_t *)e.data();
-        w.end.len = e.size();
-        cs->addWrites(&w, 1, version);
-      }
+    std::vector<weaselab::ConflictSet::ReadRange> reads(10);
+    for (int i = 0; i < reads.size(); ++i) {
+      reads[i].begin.p = (const uint8_t *)(keys[i].data());
+      reads[i].begin.len = keys[i].size();
+      reads[i].end.len = 0;
+      reads[i].readVersion = version - 1;
    }
-    // GC
-    cs->setOldestVersion(version - kWindowSize);
-    ++version;
+    cs->check(reads.data(), results.data(), 10);
  }
 }

--- a/corpus/008e3be49d62c3e7fa3785b882bdb65ee4b68977
+++ b/corpus/008e3be49d62c3e7fa3785b882bdb65ee4b68977
--- a/corpus/0292e5991e94d8861e8240171e7ce2c0a9742616
+++ b/corpus/0292e5991e94d8861e8240171e7ce2c0a9742616
--- a/corpus/0390280ec499c5f687428e700028c230e8ce944e
+++ b/corpus/0390280ec499c5f687428e700028c230e8ce944e
--- a/corpus/05a9e20eac1e7efb9b40fa032c35f76f87622210
+++ b/corpus/05a9e20eac1e7efb9b40fa032c35f76f87622210
--- a/corpus/076952738748aac93fca3e1e59b104547e4177ba
+++ b/corpus/076952738748aac93fca3e1e59b104547e4177ba
--- a/corpus/0b1ed19f50f707d5e805bcbed195bff0bcad8c7c
+++ b/corpus/0b1ed19f50f707d5e805bcbed195bff0bcad8c7c
--- a/corpus/0b287addaa1aa43c286023650cd407af888ea016
+++ b/corpus/0b287addaa1aa43c286023650cd407af888ea016
--- a/corpus/10970b15ecff0b0564ef8dc31c30680e5e4a4421
+++ b/corpus/10970b15ecff0b0564ef8dc31c30680e5e4a4421
--- a/corpus/1357a36bfcab4e48cf165b984e1840f3e8c6ce51
+++ b/corpus/1357a36bfcab4e48cf165b984e1840f3e8c6ce51
--- a/corpus/170a0ec4714b5ee34045037b38f038a8e3fa2035
+++ b/corpus/170a0ec4714b5ee34045037b38f038a8e3fa2035
--- a/corpus/175195f4753b812598e59f9d14742c1aeed67180
+++ b/corpus/175195f4753b812598e59f9d14742c1aeed67180
--- a/corpus/198314d10d86adbfb9d14a0a48cb01fd050cdd50
+++ b/corpus/198314d10d86adbfb9d14a0a48cb01fd050cdd50
--- a/corpus/1f1a703161be463bc598349de981d56fecff1f7a
+++ b/corpus/1f1a703161be463bc598349de981d56fecff1f7a
--- a/corpus/23098c203c4d176ab274847ed6131d1e72bb2f79
+++ b/corpus/23098c203c4d176ab274847ed6131d1e72bb2f79
--- a/corpus/23d4c4704607cea96775f90eec72ec444a53327e
+++ b/corpus/23d4c4704607cea96775f90eec72ec444a53327e
--- a/corpus/256d76a7b41e7ed6e7baca0a4968a872f670b40e
+++ b/corpus/256d76a7b41e7ed6e7baca0a4968a872f670b40e
--- a/corpus/26cb93458db8c96971c2dac90d1656309626057c
+++ b/corpus/26cb93458db8c96971c2dac90d1656309626057c
--- a/corpus/288d8bc1d39945470448074ce98e4cc92e95c4df
+++ b/corpus/288d8bc1d39945470448074ce98e4cc92e95c4df
--- a/corpus/2b2a3848abb733ce26310bce956cc299144e2c37
+++ b/corpus/2b2a3848abb733ce26310bce956cc299144e2c37
--- a/corpus/2cbf5f3c9d7c4fe38f2da48096d37d29fa75eec6
+++ b/corpus/2cbf5f3c9d7c4fe38f2da48096d37d29fa75eec6
--- a/corpus/2f850fa41006d53a3b4869295c319f077f585e27
+++ b/corpus/2f850fa41006d53a3b4869295c319f077f585e27
--- a/corpus/3246c5494cf81eb96a15a962352bf3f62cf44188
+++ b/corpus/3246c5494cf81eb96a15a962352bf3f62cf44188
--- a/corpus/32b1f5278c998e688bf7a44d89f54a2c183e75eb
+++ b/corpus/32b1f5278c998e688bf7a44d89f54a2c183e75eb
--- a/corpus/37636f0129daeb11db2665b746ec1d63650ddea7
+++ b/corpus/37636f0129daeb11db2665b746ec1d63650ddea7
--- a/corpus/3b6efa6b4581d3f75d307ecdd5aa53b4fa3fea90
+++ b/corpus/3b6efa6b4581d3f75d307ecdd5aa53b4fa3fea90
--- a/corpus/3feb7520632cdb454178f4fa5b63a5665d739e1c
+++ b/corpus/3feb7520632cdb454178f4fa5b63a5665d739e1c
--- a/corpus/4137466d9c418d90463f635ac6401ec69da598ab
+++ b/corpus/4137466d9c418d90463f635ac6401ec69da598ab
--- a/corpus/4206bbb85073ab30a5f04f17bcec0b6b09dec446
+++ b/corpus/4206bbb85073ab30a5f04f17bcec0b6b09dec446
--- a/corpus/4aee549dc867bd0fa32db38bd1680d66ef1a80e4
+++ b/corpus/4aee549dc867bd0fa32db38bd1680d66ef1a80e4
--- a/corpus/4e12dbd10635705a7230aa0f062e590264828cde
+++ b/corpus/4e12dbd10635705a7230aa0f062e590264828cde
--- a/corpus/4ee3990f6a97d901eaa69a32216a17f9f86e8c77
+++ b/corpus/4ee3990f6a97d901eaa69a32216a17f9f86e8c77
--- a/corpus/54952d3ba4a1dca49238f941dee26d26edc46db2
+++ b/corpus/54952d3ba4a1dca49238f941dee26d26edc46db2
--- a/corpus/57e1c08766ffade2c8221997d16b5e0ad7ecc93c
+++ b/corpus/57e1c08766ffade2c8221997d16b5e0ad7ecc93c
--- a/corpus/5acdc011bd62a40c8e018a2621545bf4e005a903
+++ b/corpus/5acdc011bd62a40c8e018a2621545bf4e005a903
--- a/corpus/63edba2479b48db7458149a7df2e2c224920877c
+++ b/corpus/63edba2479b48db7458149a7df2e2c224920877c
--- a/corpus/6769617662feb59ce9e326d3263cc75951e0a0bc
+++ b/corpus/6769617662feb59ce9e326d3263cc75951e0a0bc
--- a/corpus/688ce2c66897225949c811ce88c2fde511d69047
+++ b/corpus/688ce2c66897225949c811ce88c2fde511d69047
--- a/corpus/6b44ebb59ae80f72048e8c2c5572a7901166a461
+++ b/corpus/6b44ebb59ae80f72048e8c2c5572a7901166a461
--- a/corpus/6bea64428c692936d5017a7bfc9a9bbae4a7c646
+++ b/corpus/6bea64428c692936d5017a7bfc9a9bbae4a7c646
--- a/corpus/6bf94eb62dc0d1ac85f7529f3efbdea197c37c4a
+++ b/corpus/6bf94eb62dc0d1ac85f7529f3efbdea197c37c4a
--- a/corpus/6cc2abc6c19e461912bb07ae9d3031ebb1c6f2a7
+++ b/corpus/6cc2abc6c19e461912bb07ae9d3031ebb1c6f2a7
--- a/corpus/73894bc13e0bae69f58b228e148341143ebd5a8a
+++ b/corpus/73894bc13e0bae69f58b228e148341143ebd5a8a
--- a/corpus/74ebc76aa3859fb9db3b51af55713cbd845f99f0
+++ b/corpus/74ebc76aa3859fb9db3b51af55713cbd845f99f0
--- a/corpus/7754ffffb55baeb3e25d9c772d11e0efec95f751
+++ b/corpus/7754ffffb55baeb3e25d9c772d11e0efec95f751
--- a/corpus/7879186e12b1fa9339efa2aa37e80136df0e427c
+++ b/corpus/7879186e12b1fa9339efa2aa37e80136df0e427c
--- a/corpus/79e82b2aeffca8c33516ce8723ac5acfa85e2c61
+++ b/corpus/79e82b2aeffca8c33516ce8723ac5acfa85e2c61
--- a/corpus/7badef6c646baf6f1ac4aadedcb3842be1dd876c
+++ b/corpus/7badef6c646baf6f1ac4aadedcb3842be1dd876c
--- a/corpus/7e7b26c627325c68b5de1cb17d7e0bd35f6e2039
+++ b/corpus/7e7b26c627325c68b5de1cb17d7e0bd35f6e2039
--- a/corpus/7e7b3164f81316a25b103f3b391867692ea5cec0
+++ b/corpus/7e7b3164f81316a25b103f3b391867692ea5cec0
--- a/corpus/834f54738de969afb72c5355bc6bf226aee2bc46
+++ b/corpus/834f54738de969afb72c5355bc6bf226aee2bc46
--- a/corpus/841521614f7ecab8ba225275035ad413024dd552
+++ b/corpus/841521614f7ecab8ba225275035ad413024dd552
--- a/corpus/845ac7d3ef2e4e3844308ba4aa966cef34dae1c7
+++ b/corpus/845ac7d3ef2e4e3844308ba4aa966cef34dae1c7
--- a/corpus/86707e4f00fefde1e3fde7e5d3d66260658f0dbc
+++ b/corpus/86707e4f00fefde1e3fde7e5d3d66260658f0dbc
--- a/corpus/8c41ef3f7296bfb6205b075316ef446b481031fd
+++ b/corpus/8c41ef3f7296bfb6205b075316ef446b481031fd
--- a/corpus/9010fce51ca20e78fadcf10003ad80dec1a92b38
+++ b/corpus/9010fce51ca20e78fadcf10003ad80dec1a92b38
--- a/corpus/9a4c784fc270a66e635d42ca267ae9edeca2b948
+++ b/corpus/9a4c784fc270a66e635d42ca267ae9edeca2b948
--- a/corpus/9ae60c538e98c4569e1ed1744ee5be7cc8d02c51
+++ b/corpus/9ae60c538e98c4569e1ed1744ee5be7cc8d02c51
--- a/corpus/9be9e164978a0553b3a1cfe45c197ec1159ee7d0
+++ b/corpus/9be9e164978a0553b3a1cfe45c197ec1159ee7d0
--- a/corpus/a1a4d54aea9674d4b415cdae8626b6991e01ed1e
+++ b/corpus/a1a4d54aea9674d4b415cdae8626b6991e01ed1e
--- a/corpus/a4b24364f0a23987e8eeac1ef66e16e91766ff92
+++ b/corpus/a4b24364f0a23987e8eeac1ef66e16e91766ff92
--- a/corpus/a4f4b06d1c524d09ad4b9515353f0dcbc1e8ed92
+++ b/corpus/a4f4b06d1c524d09ad4b9515353f0dcbc1e8ed92
--- a/corpus/a8614158379e0b3f8eec546e17d3f73a9b8e81fe
+++ b/corpus/a8614158379e0b3f8eec546e17d3f73a9b8e81fe
--- a/corpus/aa26947b4c530eb1cd226950fe6202958dc9a755
+++ b/corpus/aa26947b4c530eb1cd226950fe6202958dc9a755
--- a/corpus/af593f241017f854b87f692ed318d7935d34510f
+++ b/corpus/af593f241017f854b87f692ed318d7935d34510f
--- a/corpus/b216fa66614fbd934e587e97f85f72ac7965f8ff
+++ b/corpus/b216fa66614fbd934e587e97f85f72ac7965f8ff
--- a/corpus/b5814790889fd3452daa4beed6f1167ee1986a17
+++ b/corpus/b5814790889fd3452daa4beed6f1167ee1986a17
--- a/corpus/b73d259e5934e4275aef3298f16aeaf63b294a84
+++ b/corpus/b73d259e5934e4275aef3298f16aeaf63b294a84
--- a/corpus/bc32def2213b5758b2e03413209c2f67e2768ca4
+++ b/corpus/bc32def2213b5758b2e03413209c2f67e2768ca4
--- a/corpus/be536e15193fbf20b4c6daf9da5543d4a5048ea0
+++ b/corpus/be536e15193fbf20b4c6daf9da5543d4a5048ea0
--- a/corpus/bf103ed62958ca20d91b5cc9774c1977b190f251
+++ b/corpus/bf103ed62958ca20d91b5cc9774c1977b190f251
--- a/corpus/c1cc87e1bdfb7e6a6d2e039bf7686c169a7d2bfb
+++ b/corpus/c1cc87e1bdfb7e6a6d2e039bf7686c169a7d2bfb
--- a/corpus/c49c4863fef47396b5d588a453275289c66e13cf
+++ b/corpus/c49c4863fef47396b5d588a453275289c66e13cf
--- a/corpus/c7937b0759b0319ef1eaa9e938bc2fae4a1bb9e3
+++ b/corpus/c7937b0759b0319ef1eaa9e938bc2fae4a1bb9e3
--- a/corpus/cc297c7df1c523973836d6eed05187664cf8b028
+++ b/corpus/cc297c7df1c523973836d6eed05187664cf8b028
--- a/corpus/cdf9b63169be156e3d3981f0046d63a833a8a9d4
+++ b/corpus/cdf9b63169be156e3d3981f0046d63a833a8a9d4
--- a/corpus/cfaba94477a30e01d857f9e63b26f04ade740205
+++ b/corpus/cfaba94477a30e01d857f9e63b26f04ade740205
--- a/corpus/cfd924c0ac6d2c4b8b091373cc110094c8ad47d1
+++ b/corpus/cfd924c0ac6d2c4b8b091373cc110094c8ad47d1
--- a/corpus/d12ac70ac49ad2d5b8fdc2cb1f800f9702c395bf
+++ b/corpus/d12ac70ac49ad2d5b8fdc2cb1f800f9702c395bf
--- a/corpus/d426172fde1e278c2c00cb6618ecb0fc18f246b6
+++ b/corpus/d426172fde1e278c2c00cb6618ecb0fc18f246b6
--- a/corpus/dba9de8358577750602a49cc36bfcdc1961f99f2
+++ b/corpus/dba9de8358577750602a49cc36bfcdc1961f99f2
--- a/corpus/dcbb39e697bfa20d9540b4cfe3560b90dafab2f8
+++ b/corpus/dcbb39e697bfa20d9540b4cfe3560b90dafab2f8
--- a/corpus/dce5be7f32f8303efb13a259c509d18df6baec4c
+++ b/corpus/dce5be7f32f8303efb13a259c509d18df6baec4c
--- a/corpus/dfcae858f0376f68bba82dd4285c36e045997c06
+++ b/corpus/dfcae858f0376f68bba82dd4285c36e045997c06
--- a/corpus/e9b37fd80338dbffa6b85349766cb4990c62a322
+++ b/corpus/e9b37fd80338dbffa6b85349766cb4990c62a322
--- a/corpus/ec9b099abb308a35df280f1f1d2ba997c479f6c5
+++ b/corpus/ec9b099abb308a35df280f1f1d2ba997c479f6c5
--- a/corpus/efce0eae3d927c293bfdf9b1cdfa29e706fb7dbb
+++ b/corpus/efce0eae3d927c293bfdf9b1cdfa29e706fb7dbb
--- a/corpus/f4e500fc111ba2e4713a8793fdde402c94cef933
+++ b/corpus/f4e500fc111ba2e4713a8793fdde402c94cef933
--- a/corpus/f8eef7202994e09cfe4efef3298be817f26f10f5
+++ b/corpus/f8eef7202994e09cfe4efef3298be817f26f10f5
--- a/corpus/fa56af5e7748c502a38a981d385d123700c15fbe
+++ b/corpus/fa56af5e7748c502a38a981d385d123700c15fbe
--- a/corpus/fc2a86450b65433b3604149b20006e5b4cffb20b
+++ b/corpus/fc2a86450b65433b3604149b20006e5b4cffb20b
--- a/corpus/fd98d4df19b922a79fab5cfaaf93d133f801d9a4
+++ b/corpus/fd98d4df19b922a79fab5cfaaf93d133f801d9a4
--- a/nanobench.cpp
+++ b/nanobench.cpp
@@ -0,0 +1,2 @@
+#define ANKERL_NANOBENCH_IMPLEMENT
+#include "third_party/nanobench.h"
Author	SHA1	Message	Date
Andrew Noyes	ed67486077	Reordering seems to improve codegen Some checks failed Tests / Clang total: 3244, passed: 3244 Details Clang \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / 64 bit versions total: 3244, passed: 3244 Details Tests / Debug total: 3242, passed: 3242 Details Tests / SIMD fallback total: 3244, passed: 3244 Details Tests / Release [gcc] total: 3244, passed: 3244 Details GNU C Compiler (gcc) \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / Release [gcc,aarch64] total: 2419, passed: 2419 Details Tests / Coverage total: 2437, passed: 2437 Details Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 98.98% (1938/1958) * Branch Coverage: 68.67% (1497/2180) * Complexity Density: 0.00 * Lines of Code: 1958 #### Quality Gates Summary Output truncated. Details weaselab/conflict-set/pipeline/head There was a failure building this commit Details	2024-09-23 15:28:51 -07:00
Andrew Noyes	b376f6fdd5	WIP Some checks failed Tests / Clang total: 3244, passed: 3244 Details Clang \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / 64 bit versions total: 3244, passed: 3244 Details Tests / Debug total: 3242, passed: 3242 Details Tests / SIMD fallback total: 3244, passed: 3244 Details Tests / Release [gcc] total: 3244, passed: 3244 Details GNU C Compiler (gcc) \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / Release [gcc,aarch64] total: 2419, passed: 2419 Details Tests / Coverage total: 2437, passed: 2437 Details Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 98.98% (1938/1958) * Branch Coverage: 68.67% (1497/2180) * Complexity Density: 0.00 * Lines of Code: 1958 #### Quality Gates Summary Output truncated. Details weaselab/conflict-set/pipeline/head There was a failure building this commit Details	2024-09-23 15:11:48 -07:00
Andrew Noyes	6de63dd3fe	Use preserve_none and put continuation array in CheckAll	2024-09-23 14:53:16 -07:00
Andrew Noyes	3e5f13bf54	WIP - tests pass	2024-09-23 13:32:56 -07:00
Andrew Noyes	e7e1d1f7f5	Add tail-call based interleaving approach	2024-09-23 12:52:30 -07:00
Andrew Noyes	442658e983	Target ~1GB memory usage in server bench	2024-09-21 14:28:15 -07:00
Andrew Noyes	26f602215e	Accentuate cache misses for point reads in server_bench	2024-09-14 22:42:40 -07:00
Andrew Noyes	98236f81cb	Add missing __builtin_prefetch	2024-09-14 22:41:58 -07:00
Andrew Noyes	3593b72880	Disallow checking SIM_CACHE_MISSES=1	2024-09-10 22:23:37 -07:00
Andrew Noyes	814aac4ea7	Experiment with causing cache misses	2024-09-10 22:06:00 -07:00
Andrew Noyes	0550fa0016	Add "iter" state	2024-09-10 17:22:10 -07:00
Andrew Noyes	fe5cfb0336	Remove redundant cast	2024-09-10 17:06:45 -07:00
Andrew Noyes	82203515a0	check_point_read_state_machine::down_left_spine	2024-09-10 17:05:09 -07:00
Andrew Noyes	465372c734	Scaffolding to prepare for interleaving checks	2024-09-10 16:10:57 -07:00
Andrew Noyes	867136ff1b	Return pointer to next function	2024-09-09 21:59:49 -07:00
Andrew Noyes	4b8f7320d3	Call function pointer in job	2024-09-09 21:00:31 -07:00
Andrew Noyes	6628092384	Tinker with interleaveBoundedCyclicList	2024-09-09 20:25:40 -07:00
Andrew Noyes	a0a4f1afea	Only compile nanobench once	2024-09-09 20:10:55 -07:00
Andrew Noyes	ca479c03ce	Induce a cache miss in interleaving test	2024-09-09 17:55:35 -07:00
Andrew Noyes	0a2e133ab9	Add InterleavingTest to explore #5	2024-09-09 17:27:58 -07:00
Andrew Noyes	b0b31419b0	Remove vestigial emscripten from cmake All checks were successful Tests / Clang total: 3244, passed: 3244 Details Clang \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / 64 bit versions total: 3244, passed: 3244 Details Tests / Debug total: 3242, passed: 3242 Details Tests / SIMD fallback total: 3244, passed: 3244 Details Tests / Release [gcc] total: 3244, passed: 3244 Details GNU C Compiler (gcc) \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / Release [gcc,aarch64] total: 2419, passed: 2419 Details Tests / Coverage total: 2437, passed: 2437 Details Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 99.23% (1803/1817) * Branch Coverage: 68.36% (1426/2086) * Complexity Density: 0.00 * Lines of Code: 1817 #### Quality Gates Summary Output truncated. Details weaselab/conflict-set/pipeline/head This commit looks good Details	2024-09-06 20:29:59 -07:00
Andrew Noyes	5c0cc1edf5	Add longestCommonPrefix bench All checks were successful Tests / Clang total: 3244, passed: 3244 Details Clang \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / 64 bit versions total: 3244, passed: 3244 Details Tests / Debug total: 3242, passed: 3242 Details Tests / SIMD fallback total: 3244, passed: 3244 Details Tests / Release [gcc] total: 3244, passed: 3244 Details GNU C Compiler (gcc) \|Total\|New\|Outstanding\|Fixed\|Trend \|:-:\|:-:\|:-:\|:-:\|:-: \|0\|0\|0\|0\|:clap: Details Tests / Release [gcc,aarch64] total: 2419, passed: 2419 Details Tests / Coverage total: 2437, passed: 2437 Details Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 99.23% (1803/1817) * Branch Coverage: 68.36% (1426/2086) * Complexity Density: 0.00 * Lines of Code: 1817 #### Quality Gates Summary Output truncated. Details weaselab/conflict-set/pipeline/head This commit looks good Details	2024-09-05 16:30:58 -07:00
Andrew Noyes	de47aa53b0	Add to corpus	2024-09-05 16:24:41 -07:00