10 Commits

Author SHA1 Message Date
b376f6fdd5 WIP
Some checks failed
Tests / Clang total: 3244, passed: 3244
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / 64 bit versions total: 3244, passed: 3244
Tests / Debug total: 3242, passed: 3242
Tests / SIMD fallback total: 3244, passed: 3244
Tests / Release [gcc] total: 3244, passed: 3244
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 2419, passed: 2419
Tests / Coverage total: 2437, passed: 2437
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 98.98% (1938/1958) * Branch Coverage: 68.67% (1497/2180) * Complexity Density: 0.00 * Lines of Code: 1958 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-09-23 15:11:48 -07:00
6de63dd3fe Use preserve_none and put continuation array in CheckAll 2024-09-23 14:53:16 -07:00
3e5f13bf54 WIP - tests pass 2024-09-23 13:32:56 -07:00
e7e1d1f7f5 Add tail-call based interleaving approach 2024-09-23 12:52:30 -07:00
442658e983 Target ~1GB memory usage in server bench 2024-09-21 14:28:15 -07:00
26f602215e Accentuate cache misses for point reads in server_bench 2024-09-14 22:42:40 -07:00
98236f81cb Add missing __builtin_prefetch 2024-09-14 22:41:58 -07:00
3593b72880 Disallow checking SIM_CACHE_MISSES=1 2024-09-10 22:23:37 -07:00
814aac4ea7 Experiment with causing cache misses 2024-09-10 22:06:00 -07:00
0550fa0016 Add "iter" state 2024-09-10 17:22:10 -07:00
5 changed files with 389 additions and 199 deletions

View File

@@ -24,6 +24,14 @@ repos:
entry: "^#define SHOW_MEMORY 1$" entry: "^#define SHOW_MEMORY 1$"
language: pygrep language: pygrep
types: [c++] types: [c++]
- repo: local
hooks:
- id: sim cache misses check
name: disallow checking in SIM_CACHE_MISSES=1
description: disallow checking in SIM_CACHE_MISSES=1
entry: "^#define SIM_CACHE_MISSES 1$"
language: pygrep
types: [c++]
- repo: https://github.com/shellcheck-py/shellcheck-py - repo: https://github.com/shellcheck-py/shellcheck-py
rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1 rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1
hooks: hooks:

View File

@@ -350,6 +350,11 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON) set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
add_executable(interleaving_test InterleavingTest.cpp) add_executable(interleaving_test InterleavingTest.cpp)
# work around lack of musttail for gcc
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_options(interleaving_test PRIVATE -Og
-foptimize-sibling-calls)
endif()
target_link_libraries(interleaving_test PRIVATE nanobench) target_link_libraries(interleaving_test PRIVATE nanobench)
endif() endif()

View File

@@ -48,6 +48,17 @@ limitations under the License.
#endif #endif
#endif #endif
#define SIM_CACHE_MISSES 0
#if SIM_CACHE_MISSES
constexpr void simCacheMiss(void *x) {
if (x) {
_mm_clflush(x);
}
}
#else
constexpr void simCacheMiss(void *) {}
#endif
#include <memcheck.h> #include <memcheck.h>
using namespace weaselab; using namespace weaselab;
@@ -836,21 +847,29 @@ int getNodeIndex(Node16 *self, uint8_t index) {
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
Node *&getChildExists(Node3 *self, uint8_t index) { Node *&getChildExists(Node3 *self, uint8_t index) {
return self->children[getNodeIndex(self, index)]; auto &result = self->children[getNodeIndex(self, index)];
simCacheMiss(result);
return result;
} }
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
Node *&getChildExists(Node16 *self, uint8_t index) { Node *&getChildExists(Node16 *self, uint8_t index) {
return self->children[getNodeIndex(self, index)]; auto &result = self->children[getNodeIndex(self, index)];
simCacheMiss(result);
return result;
} }
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
Node *&getChildExists(Node48 *self, uint8_t index) { Node *&getChildExists(Node48 *self, uint8_t index) {
assert(self->bitSet.test(index)); assert(self->bitSet.test(index));
return self->children[self->index[index]]; auto &result = self->children[self->index[index]];
simCacheMiss(result);
return result;
} }
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
Node *&getChildExists(Node256 *self, uint8_t index) { Node *&getChildExists(Node256 *self, uint8_t index) {
assert(self->bitSet.test(index)); assert(self->bitSet.test(index));
return self->children[index]; auto &result = self->children[index];
simCacheMiss(result);
return result;
} }
// Precondition - an entry for index must exist in the node // Precondition - an entry for index must exist in the node
@@ -1030,6 +1049,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
if (i < 0) { if (i < 0) {
return {}; return {};
} }
simCacheMiss(self->children[i]);
return {self->children[i], self->childMaxVersion[i]}; return {self->children[i], self->childMaxVersion[i]};
} }
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) { ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
@@ -1037,6 +1057,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
if (i < 0) { if (i < 0) {
return {}; return {};
} }
simCacheMiss(self->children[i]);
return {self->children[i], self->childMaxVersion[i]}; return {self->children[i], self->childMaxVersion[i]};
} }
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) { ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
@@ -1044,9 +1065,11 @@ ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
if (i < 0) { if (i < 0) {
return {}; return {};
} }
simCacheMiss(self->children[i]);
return {self->children[i], self->childMaxVersion[i]}; return {self->children[i], self->childMaxVersion[i]};
} }
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) { ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
simCacheMiss(self->children[index]);
return {self->children[index], self->childMaxVersion[index]}; return {self->children[index], self->childMaxVersion[index]};
} }
@@ -1072,6 +1095,7 @@ Node *getChildGeq(Node0 *, int) { return nullptr; }
Node *getChildGeq(Node3 *n, int child) { Node *getChildGeq(Node3 *n, int child) {
for (int i = 0; i < n->numChildren; ++i) { for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] >= child) { if (n->index[i] >= child) {
simCacheMiss(n->children[i]);
return n->children[i]; return n->children[i];
} }
} }
@@ -1090,7 +1114,10 @@ Node *getChildGeq(Node16 *self, int child) {
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices)); __m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << self->numChildren) - 1; int mask = (1 << self->numChildren) - 1;
uint32_t bitfield = _mm_movemask_epi8(results) & mask; uint32_t bitfield = _mm_movemask_epi8(results) & mask;
return bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)]; auto *result =
bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
simCacheMiss(result);
return result;
#elif defined(HAS_ARM_NEON) #elif defined(HAS_ARM_NEON)
uint8x16_t indices; uint8x16_t indices;
memcpy(&indices, self->index, sizeof(self->index)); memcpy(&indices, self->index, sizeof(self->index));
@@ -1126,13 +1153,16 @@ Node *getChildGeq(Node48 *self, int child) {
if (c < 0) { if (c < 0) {
return nullptr; return nullptr;
} }
return self->children[self->index[c]]; auto *result = self->children[self->index[c]];
simCacheMiss(result);
return result;
} }
Node *getChildGeq(Node256 *self, int child) { Node *getChildGeq(Node256 *self, int child) {
int c = self->bitSet.firstSetGeq(child); int c = self->bitSet.firstSetGeq(child);
if (c < 0) { if (c < 0) {
return nullptr; return nullptr;
} }
simCacheMiss(self->children[c]);
return self->children[c]; return self->children[c];
} }
@@ -1156,20 +1186,26 @@ Node *getChildGeq(Node *self, int child) {
// Precondition: self has a child // Precondition: self has a child
Node *getFirstChildExists(Node3 *self) { Node *getFirstChildExists(Node3 *self) {
assert(self->numChildren > 0); assert(self->numChildren > 0);
simCacheMiss(self->children[0]);
return self->children[0]; return self->children[0];
} }
// Precondition: self has a child // Precondition: self has a child
Node *getFirstChildExists(Node16 *self) { Node *getFirstChildExists(Node16 *self) {
assert(self->numChildren > 0); assert(self->numChildren > 0);
simCacheMiss(self->children[0]);
return self->children[0]; return self->children[0];
} }
// Precondition: self has a child // Precondition: self has a child
Node *getFirstChildExists(Node48 *self) { Node *getFirstChildExists(Node48 *self) {
return self->children[self->index[self->bitSet.firstSetGeq(0)]]; auto *result = self->children[self->index[self->bitSet.firstSetGeq(0)]];
simCacheMiss(result);
return result;
} }
// Precondition: self has a child // Precondition: self has a child
Node *getFirstChildExists(Node256 *self) { Node *getFirstChildExists(Node256 *self) {
return self->children[self->bitSet.firstSetGeq(0)]; auto *result = self->children[self->bitSet.firstSetGeq(0)];
simCacheMiss(result);
return result;
} }
// Precondition: self has a child // Precondition: self has a child
@@ -3009,143 +3045,243 @@ Node *firstGeqPhysical(Node *n, const std::span<const uint8_t> key) {
} }
} }
struct CheckJob { #ifndef __has_attribute
Node *n; #define __has_attribute(x) 0
std::span<const uint8_t> begin; #endif
InternalVersionT readVersion;
ReadContext *tls;
ConflictSet::Result *result;
#if __has_attribute(musttail)
#define MUSTTAIL __attribute__((musttail))
#else
#define MUSTTAIL
#endif
#if __has_attribute(preserve_none)
#define CONTINUATION_CALLING_CONVENTION __attribute__((preserve_none))
#else
#define CONTINUATION_CALLING_CONVENTION
#endif
typedef CONTINUATION_CALLING_CONVENTION void (*continuation)(struct CheckAll *,
int64_t prevJob,
int64_t job,
int64_t started,
int64_t count);
// State relevant to a particular query
struct CheckJob {
void setResult(bool ok) { void setResult(bool ok) {
*result = ok ? ConflictSet::Commit : ConflictSet::Conflict; *result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
} }
typedef void (*typeErasedContinuation)(void *); [[nodiscard]] continuation init(const ConflictSet::ReadRange *read,
ConflictSet::Result *result, Node *root,
int64_t oldestVersionFullPrecision,
ReadContext *tls);
// The type of a function that takes a CheckJob* and returns its own type Node *n;
struct continuation { ChildAndMaxVersion childAndVersion;
typedef continuation (*functionPtrType)(CheckJob *); std::span<const uint8_t> begin;
functionPtrType func; InternalVersionT readVersion;
continuation operator()(CheckJob *job) { return func(job); } ConflictSet::Result *result;
/*implicit*/ continuation(functionPtrType func) : func(func) {}
continuation() = default;
operator bool() { return func != nullptr; }
}; };
continuation next; // State relevant to all queries
void init(const ConflictSet::ReadRange *read, ConflictSet::Result *result, struct CheckAll {
Node *root, int64_t oldestVersionFullPrecision, ReadContext *tls); constexpr static int kConcurrent = 32;
const ConflictSet::ReadRange *queries;
ConflictSet::Result *results;
CheckJob inProgress[kConcurrent];
continuation next[kConcurrent];
int nextJob[kConcurrent];
Node *root;
int64_t oldestVersionFullPrecision;
ReadContext *tls;
}; };
CONTINUATION_CALLING_CONVENTION void keepGoing(CheckAll *context,
int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
prevJob = job;
job = context->nextJob[job];
MUSTTAIL return context->next[job](context, prevJob, job, started, count);
}
CONTINUATION_CALLING_CONVENTION void complete(CheckAll *context,
int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
if (started == count) {
if (prevJob == job) {
return;
}
context->nextJob[prevJob] = context->nextJob[job];
job = prevJob;
} else {
int temp = started++;
context->next[job] = context->inProgress[job].init(
context->queries + temp, context->results + temp, context->root,
context->oldestVersionFullPrecision, context->tls);
}
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
namespace check_point_read_state_machine { namespace check_point_read_state_machine {
CheckJob::continuation down_left_spine(CheckJob *job); CONTINUATION_CALLING_CONVENTION void
down_left_spine(struct CheckAll *, int64_t prevJob, int64_t job,
int64_t started, int64_t count);
CONTINUATION_CALLING_CONVENTION void iter(struct CheckAll *, int64_t prevJob,
int64_t job, int64_t started,
int64_t count);
CONTINUATION_CALLING_CONVENTION void begin(struct CheckAll *, int64_t prevJob,
int64_t job, int64_t started,
int64_t count);
// Logically this is the same as performing firstGeq and then checking against void begin(struct CheckAll *context, int64_t prevJob, int64_t job,
// point or range version according to cmp, but this version short circuits as int64_t started, int64_t count) {
// soon as it can prove that there's no conflict. ++context->tls->point_read_accum;
CheckJob::continuation begin(CheckJob *job) {
++job->tls->point_read_accum;
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check point read: %s\n", printable(key).c_str()); fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
#endif #endif
for (;; ++job->tls->point_read_iterations_accum) { auto *j = context->inProgress + job;
if (job->begin.size() == 0) {
if (job->n->entryPresent) { if (j->begin.size() == 0) {
job->setResult(job->n->entry.pointVersion <= job->readVersion); if (j->n->entryPresent) {
return nullptr; // Done j->setResult(j->n->entry.pointVersion <= j->readVersion);
MUSTTAIL return complete(context, prevJob, job, started, count);
} }
job->n = getFirstChildExists(job->n); j->n = getFirstChildExists(j->n);
return down_left_spine; context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
} }
auto [child, maxV] = getChildAndMaxVersion(job->n, job->begin[0]); j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
if (child == nullptr) { context->next[job] = iter;
auto c = getChildGeq(job->n, job->begin[0]); __builtin_prefetch(j->childAndVersion.child);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
void iter(struct CheckAll *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
auto *j = context->inProgress + job;
if (j->childAndVersion.child == nullptr) {
auto c = getChildGeq(j->n, j->begin[0]);
if (c != nullptr) { if (c != nullptr) {
job->n = c; j->n = c;
return down_left_spine; context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
} else { } else {
job->n = nextSibling(job->n); j->n = nextSibling(j->n);
if (job->n == nullptr) { if (j->n == nullptr) {
job->setResult(true); j->setResult(true);
return nullptr; // Done MUSTTAIL return complete(context, prevJob, job, started, count);
} }
return down_left_spine; context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
} }
} }
job->n = child; j->n = j->childAndVersion.child;
job->begin = job->begin.subspan(1, job->begin.size() - 1); j->begin = j->begin.subspan(1, j->begin.size() - 1);
if (job->n->partialKeyLen > 0) { if (j->n->partialKeyLen > 0) {
int commonLen = std::min<int>(job->n->partialKeyLen, job->begin.size()); int commonLen = std::min<int>(j->n->partialKeyLen, j->begin.size());
int i = longestCommonPrefix(job->n->partialKey(), job->begin.data(), int i = longestCommonPrefix(j->n->partialKey(), j->begin.data(), commonLen);
commonLen);
if (i < commonLen) { if (i < commonLen) {
auto c = job->n->partialKey()[i] <=> job->begin[i]; auto c = j->n->partialKey()[i] <=> j->begin[i];
if (c > 0) { if (c > 0) {
return down_left_spine; context->next[job] = down_left_spine;
MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
} else { } else {
job->n = nextSibling(job->n); j->n = nextSibling(j->n);
if (job->n == nullptr) { if (j->n == nullptr) {
job->setResult(true); j->setResult(true);
return nullptr; // Done MUSTTAIL return complete(context, prevJob, job, started, count);
} }
return down_left_spine; context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
} }
} }
if (commonLen == job->n->partialKeyLen) { if (commonLen == j->n->partialKeyLen) {
// partial key matches // partial key matches
job->begin = j->begin = j->begin.subspan(commonLen, j->begin.size() - commonLen);
job->begin.subspan(commonLen, job->begin.size() - commonLen); } else if (j->n->partialKeyLen > int(j->begin.size())) {
} else if (job->n->partialKeyLen > int(job->begin.size())) {
// n is the first physical node greater than remaining, and there's no // n is the first physical node greater than remaining, and there's no
// eq node // eq node
return down_left_spine; context->next[job] = down_left_spine;
MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
} }
} }
if (maxV <= job->readVersion) { if (j->childAndVersion.maxVersion <= j->readVersion) {
++job->tls->point_read_short_circuit_accum; ++context->tls->point_read_short_circuit_accum;
job->setResult(true); j->setResult(true);
return nullptr; // Done MUSTTAIL return complete(context, prevJob, job, started, count);
}
}
} }
CheckJob::continuation down_left_spine(CheckJob *job) { ++context->tls->point_read_iterations_accum;
if (job->n->entryPresent) {
job->setResult(job->n->entry.rangeVersion <= job->readVersion); if (j->begin.size() == 0) {
return nullptr; // Done if (j->n->entryPresent) {
j->setResult(j->n->entry.pointVersion <= j->readVersion);
MUSTTAIL return complete(context, prevJob, job, started, count);
} }
job->n = getFirstChildExists(job->n); j->n = getFirstChildExists(j->n);
return down_left_spine; context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
__builtin_prefetch(j->childAndVersion.child);
// j->next is already iter
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
void down_left_spine(struct CheckAll *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
auto *j = context->inProgress + job;
if (j->n->entryPresent) {
j->setResult(j->n->entry.rangeVersion <= j->readVersion);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
j->n = getFirstChildExists(j->n);
__builtin_prefetch(j->n);
// j->next is already down_left_spine
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
} }
} // namespace check_point_read_state_machine } // namespace check_point_read_state_machine
void CheckJob::init(const ConflictSet::ReadRange *read, continuation CheckJob::init(const ConflictSet::ReadRange *read,
ConflictSet::Result *result, Node *root, ConflictSet::Result *result, Node *root,
int64_t oldestVersionFullPrecision, ReadContext *tls) { int64_t oldestVersionFullPrecision,
ReadContext *tls) {
auto begin = std::span<const uint8_t>(read->begin.p, read->begin.len); auto begin = std::span<const uint8_t>(read->begin.p, read->begin.len);
auto end = std::span<const uint8_t>(read->end.p, read->end.len); auto end = std::span<const uint8_t>(read->end.p, read->end.len);
if (read->readVersion < oldestVersionFullPrecision) { if (read->readVersion < oldestVersionFullPrecision) {
*result = ConflictSet::TooOld; *result = ConflictSet::TooOld;
next = +[](CheckJob *) -> continuation { return nullptr; }; return complete;
} else if (end.size() == 0) { } else if (end.size() == 0) {
this->begin = begin; this->begin = begin;
this->n = root; this->n = root;
this->readVersion = InternalVersionT(read->readVersion); this->readVersion = InternalVersionT(read->readVersion);
this->result = result; this->result = result;
this->tls = tls; return check_point_read_state_machine::begin;
this->next = check_point_read_state_machine::begin; // *result =
// checkPointRead(root, begin, InternalVersionT(read->readVersion), tls)
// ? ConflictSet::Commit
// : ConflictSet::Conflict;
// return complete;
} else { } else {
*result = checkRangeRead(root, begin, end, *result = checkRangeRead(root, begin, end,
InternalVersionT(read->readVersion), tls) InternalVersionT(read->readVersion), tls)
? ConflictSet::Commit ? ConflictSet::Commit
: ConflictSet::Conflict; : ConflictSet::Conflict;
next = +[](CheckJob *) -> continuation { return nullptr; }; return complete;
} }
} }
@@ -3163,38 +3299,23 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
tls.impl = this; tls.impl = this;
int64_t check_byte_accum = 0; int64_t check_byte_accum = 0;
constexpr int kConcurrent = 32; CheckAll context;
CheckJob inProgress[kConcurrent]; context.oldestVersionFullPrecision = oldestVersionFullPrecision;
int nextJob[kConcurrent]; context.queries = reads;
context.results = result;
context.root = root;
context.tls = &tls;
int started = std::min(kConcurrent, count); int64_t started = std::min(context.kConcurrent, count);
for (int i = 0; i < started; i++) { for (int i = 0; i < started; i++) {
inProgress[i].init(reads + i, result + i, root, context.next[i] = context.inProgress[i].init(
oldestVersionFullPrecision, &tls); reads + i, result + i, root, oldestVersionFullPrecision, &tls);
nextJob[i] = i + 1; context.nextJob[i] = i + 1;
} }
nextJob[started - 1] = 0; context.nextJob[started - 1] = 0;
int prevJob = started - 1; int prevJob = started - 1;
int job = 0; int job = 0;
for (;;) { context.next[job](&context, prevJob, job, started, count);
auto next = inProgress[job].next(inProgress + job);
inProgress[job].next = next;
if (!next) {
if (started == count) {
if (prevJob == job)
break;
nextJob[prevJob] = nextJob[job];
job = prevJob;
} else {
int temp = started++;
inProgress[job].init(reads + temp, result + temp, root,
oldestVersionFullPrecision, &tls);
}
}
prevJob = job;
job = nextJob[job];
}
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
assert(reads[i].readVersion >= 0); assert(reads[i].readVersion >= 0);

View File

@@ -22,9 +22,6 @@ void *stepJob(Job *j) {
return done ? nullptr : (void *)stepJob; return done ? nullptr : (void *)stepJob;
} }
// So we can look at the disassembly more easily
extern "C" {
void sequential(Job **jobs, int count) { void sequential(Job **jobs, int count) {
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
do { do {
@@ -94,6 +91,87 @@ void interleaveBoundedCyclicList(Job **jobs, int count) {
} }
} }
#ifndef __has_attribute
#define __has_attribute(x) 0
#endif
#if __has_attribute(musttail)
#define MUSTTAIL __attribute__((musttail))
#else
#define MUSTTAIL
#endif
struct Context {
constexpr static int kConcurrent = 32;
Job **jobs;
Job *inProgress[kConcurrent];
void (*continuation[kConcurrent])(Context *, int64_t prevJob, int64_t job,
int64_t started, int64_t count);
int nextJob[kConcurrent];
};
void keepGoing(Context *context, int64_t prevJob, int64_t job, int64_t started,
int64_t count) {
prevJob = job;
job = context->nextJob[job];
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
count);
}
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count);
void complete(Context *context, int64_t prevJob, int64_t job, int64_t started,
int64_t count) {
if (started == count) {
if (prevJob == job) {
return;
}
context->nextJob[prevJob] = context->nextJob[job];
job = prevJob;
} else {
context->inProgress[job] = context->jobs[started++];
context->continuation[job] = stepJobTailCall;
}
prevJob = job;
job = context->nextJob[job];
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
count);
}
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
auto *j = context->inProgress[job];
auto done = --(*j->input) == 0;
#ifdef __x86_64__
_mm_clflush(j->input);
#endif
if (done) {
MUSTTAIL return complete(context, prevJob, job, started, count);
} else {
context->continuation[job] = stepJobTailCall;
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
}
void useTailCalls(Job **jobs, int count) {
if (count == 0) {
return;
}
Context context;
context.jobs = jobs;
int64_t started = std::min(Context::kConcurrent, count);
for (int i = 0; i < started; i++) {
context.inProgress[i] = jobs[i];
context.nextJob[i] = i + 1;
context.continuation[i] = stepJobTailCall;
}
context.nextJob[started - 1] = 0;
int prevJob = started - 1;
int job = 0;
return context.continuation[job](&context, prevJob, job, started, count);
}
void interleaveCyclicList(Job **jobs, int count) { void interleaveCyclicList(Job **jobs, int count) {
auto *nextJob = (int *)alloca(sizeof(int) * count); auto *nextJob = (int *)alloca(sizeof(int) * count);
@@ -117,12 +195,11 @@ void interleaveCyclicList(Job **jobs, int count) {
job = nextJob[job]; job = nextJob[job];
} }
} }
}
int main() { int main() {
ankerl::nanobench::Bench bench; ankerl::nanobench::Bench bench;
constexpr int kNumJobs = 100; constexpr int kNumJobs = 10000;
bench.relative(true); bench.relative(true);
Job jobs[kNumJobs]; Job jobs[kNumJobs];
@@ -140,6 +217,7 @@ int main() {
for (auto [scheduler, name] : for (auto [scheduler, name] :
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"), {std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
std::make_pair(sequential, "sequential"), std::make_pair(sequential, "sequential"),
std::make_pair(useTailCalls, "useTailCalls"),
std::make_pair(interleaveSwapping, "interleavingSwapping"), std::make_pair(interleaveSwapping, "interleavingSwapping"),
std::make_pair(interleaveBoundedCyclicList, std::make_pair(interleaveBoundedCyclicList,
"interleaveBoundedCyclicList"), "interleaveBoundedCyclicList"),

View File

@@ -1,4 +1,5 @@
#include <atomic> #include <atomic>
#include <cstdint>
#include <errno.h> #include <errno.h>
#include <netdb.h> #include <netdb.h>
#include <stdio.h> #include <stdio.h>
@@ -21,78 +22,55 @@
std::atomic<int64_t> transactions; std::atomic<int64_t> transactions;
constexpr int kBaseSearchDepth = 115;
constexpr int kWindowSize = 10000000; constexpr int kWindowSize = 10000000;
std::string numToKey(int64_t num) { constexpr int kNumPrefixes = 250000;
std::string makeKey(int64_t num, int suffixLen) {
std::string result; std::string result;
result.resize(kBaseSearchDepth + sizeof(int64_t)); result.resize(sizeof(int64_t) + suffixLen);
memset(result.data(), 0, kBaseSearchDepth);
int64_t be = __builtin_bswap64(num); int64_t be = __builtin_bswap64(num);
memcpy(result.data() + kBaseSearchDepth, &be, sizeof(int64_t)); memcpy(result.data(), &be, sizeof(int64_t));
memset(result.data() + sizeof(int64_t), 0, suffixLen);
return result; return result;
} }
void workload(weaselab::ConflictSet *cs) { void workload(weaselab::ConflictSet *cs) {
int64_t version = kWindowSize; int64_t version = kWindowSize;
cs->addWrites(nullptr, 0, version); for (int i = 0; i < kNumPrefixes; ++i) {
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) { for (int j = 0; j < 50; ++j) {
// Reads weaselab::ConflictSet::WriteRange wr;
{ auto k = makeKey(i, j);
auto beginK = numToKey(version - kWindowSize); wr.begin.p = (const uint8_t *)k.data();
auto endK = numToKey(version - 1); wr.begin.len = k.size();
auto pointRv = version - kWindowSize + rand() % kWindowSize + 1; wr.end.len = 0;
auto pointK = numToKey(pointRv); cs->addWrites(&wr, 1, version);
weaselab::ConflictSet::ReadRange reads[] = {
{
{(const uint8_t *)pointK.data(), int(pointK.size())},
{nullptr, 0},
pointRv,
},
{
{(const uint8_t *)beginK.data(), int(beginK.size())},
{(const uint8_t *)endK.data(), int(endK.size())},
version - 2,
},
};
weaselab::ConflictSet::Result result[sizeof(reads) / sizeof(reads[0])];
cs->check(reads, result, sizeof(reads) / sizeof(reads[0]));
// for (int i = 0; i < sizeof(reads) / sizeof(reads[0]); ++i) {
// if (result[i] != weaselab::ConflictSet::Commit) {
// fprintf(stderr, "Unexpected conflict: [%s, %s) @ %" PRId64 "\n",
// printable(reads[i].begin).c_str(),
// printable(reads[i].end).c_str(), reads[i].readVersion);
// abort();
// }
// }
}
// Writes
{
weaselab::ConflictSet::WriteRange w;
auto k = numToKey(version);
w.begin.p = (const uint8_t *)k.data();
w.end.len = 0;
if (version % (kWindowSize / 2) == 0) {
for (int l = 0; l <= k.size(); ++l) {
w.begin.len = l;
cs->addWrites(&w, 1, version);
}
} else {
w.begin.len = k.size();
cs->addWrites(&w, 1, version);
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
auto b = numToKey(beginN);
auto e = numToKey(beginN + 1000);
w.begin.p = (const uint8_t *)b.data();
w.begin.len = b.size();
w.end.p = (const uint8_t *)e.data();
w.end.len = e.size();
cs->addWrites(&w, 1, version);
} }
} }
// GC
cs->setOldestVersion(version - kWindowSize);
++version; ++version;
for (int i = 0; i < kNumPrefixes; ++i) {
weaselab::ConflictSet::WriteRange wr;
auto k = makeKey(i, 50);
wr.begin.p = (const uint8_t *)k.data();
wr.begin.len = k.size();
wr.end.len = 0;
cs->addWrites(&wr, 1, version);
}
std::vector<weaselab::ConflictSet::Result> results(10);
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
std::vector<std::string> keys(10);
for (auto &k : keys) {
k = makeKey(rand() % kNumPrefixes, 49);
}
std::vector<weaselab::ConflictSet::ReadRange> reads(10);
for (int i = 0; i < reads.size(); ++i) {
reads[i].begin.p = (const uint8_t *)(keys[i].data());
reads[i].begin.len = keys[i].size();
reads[i].end.len = 0;
reads[i].readVersion = version - 1;
}
cs->check(reads.data(), results.data(), 10);
} }
} }