Experiment with causing cache misses
This commit is contained in:
114
ConflictSet.cpp
114
ConflictSet.cpp
@@ -48,6 +48,17 @@ limitations under the License.
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define SIM_CACHE_MISSES 0
|
||||||
|
#if SIM_CACHE_MISSES
|
||||||
|
constexpr void simCacheMiss(void *x) {
|
||||||
|
if (x) {
|
||||||
|
_mm_clflush(x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
constexpr void simCacheMiss(void *) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <memcheck.h>
|
#include <memcheck.h>
|
||||||
|
|
||||||
using namespace weaselab;
|
using namespace weaselab;
|
||||||
@@ -836,21 +847,29 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
|||||||
|
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node3 *self, uint8_t index) {
|
Node *&getChildExists(Node3 *self, uint8_t index) {
|
||||||
return self->children[getNodeIndex(self, index)];
|
auto &result = self->children[getNodeIndex(self, index)];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node16 *self, uint8_t index) {
|
Node *&getChildExists(Node16 *self, uint8_t index) {
|
||||||
return self->children[getNodeIndex(self, index)];
|
auto &result = self->children[getNodeIndex(self, index)];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node48 *self, uint8_t index) {
|
Node *&getChildExists(Node48 *self, uint8_t index) {
|
||||||
assert(self->bitSet.test(index));
|
assert(self->bitSet.test(index));
|
||||||
return self->children[self->index[index]];
|
auto &result = self->children[self->index[index]];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node256 *self, uint8_t index) {
|
Node *&getChildExists(Node256 *self, uint8_t index) {
|
||||||
assert(self->bitSet.test(index));
|
assert(self->bitSet.test(index));
|
||||||
return self->children[index];
|
auto &result = self->children[index];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
@@ -1030,6 +1049,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
|
|||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
simCacheMiss(self->children[i]);
|
||||||
return {self->children[i], self->childMaxVersion[i]};
|
return {self->children[i], self->childMaxVersion[i]};
|
||||||
}
|
}
|
||||||
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
||||||
@@ -1037,6 +1057,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
|||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
simCacheMiss(self->children[i]);
|
||||||
return {self->children[i], self->childMaxVersion[i]};
|
return {self->children[i], self->childMaxVersion[i]};
|
||||||
}
|
}
|
||||||
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
||||||
@@ -1044,9 +1065,11 @@ ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
|||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
simCacheMiss(self->children[i]);
|
||||||
return {self->children[i], self->childMaxVersion[i]};
|
return {self->children[i], self->childMaxVersion[i]};
|
||||||
}
|
}
|
||||||
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
|
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
|
||||||
|
simCacheMiss(self->children[index]);
|
||||||
return {self->children[index], self->childMaxVersion[index]};
|
return {self->children[index], self->childMaxVersion[index]};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1072,6 +1095,7 @@ Node *getChildGeq(Node0 *, int) { return nullptr; }
|
|||||||
Node *getChildGeq(Node3 *n, int child) {
|
Node *getChildGeq(Node3 *n, int child) {
|
||||||
for (int i = 0; i < n->numChildren; ++i) {
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
if (n->index[i] >= child) {
|
if (n->index[i] >= child) {
|
||||||
|
simCacheMiss(n->children[i]);
|
||||||
return n->children[i];
|
return n->children[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1090,7 +1114,10 @@ Node *getChildGeq(Node16 *self, int child) {
|
|||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
int mask = (1 << self->numChildren) - 1;
|
int mask = (1 << self->numChildren) - 1;
|
||||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
return bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
|
auto *result =
|
||||||
|
bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
#elif defined(HAS_ARM_NEON)
|
#elif defined(HAS_ARM_NEON)
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self->index, sizeof(self->index));
|
memcpy(&indices, self->index, sizeof(self->index));
|
||||||
@@ -1126,13 +1153,16 @@ Node *getChildGeq(Node48 *self, int child) {
|
|||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
return self->children[self->index[c]];
|
auto *result = self->children[self->index[c]];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
Node *getChildGeq(Node256 *self, int child) {
|
Node *getChildGeq(Node256 *self, int child) {
|
||||||
int c = self->bitSet.firstSetGeq(child);
|
int c = self->bitSet.firstSetGeq(child);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
simCacheMiss(self->children[c]);
|
||||||
return self->children[c];
|
return self->children[c];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1156,20 +1186,26 @@ Node *getChildGeq(Node *self, int child) {
|
|||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node3 *self) {
|
Node *getFirstChildExists(Node3 *self) {
|
||||||
assert(self->numChildren > 0);
|
assert(self->numChildren > 0);
|
||||||
|
simCacheMiss(self->children[0]);
|
||||||
return self->children[0];
|
return self->children[0];
|
||||||
}
|
}
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node16 *self) {
|
Node *getFirstChildExists(Node16 *self) {
|
||||||
assert(self->numChildren > 0);
|
assert(self->numChildren > 0);
|
||||||
|
simCacheMiss(self->children[0]);
|
||||||
return self->children[0];
|
return self->children[0];
|
||||||
}
|
}
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node48 *self) {
|
Node *getFirstChildExists(Node48 *self) {
|
||||||
return self->children[self->index[self->bitSet.firstSetGeq(0)]];
|
auto *result = self->children[self->index[self->bitSet.firstSetGeq(0)]];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node256 *self) {
|
Node *getFirstChildExists(Node256 *self) {
|
||||||
return self->children[self->bitSet.firstSetGeq(0)];
|
auto *result = self->children[self->bitSet.firstSetGeq(0)];
|
||||||
|
simCacheMiss(result);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
@@ -3010,18 +3046,10 @@ Node *firstGeqPhysical(Node *n, const std::span<const uint8_t> key) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct CheckJob {
|
struct CheckJob {
|
||||||
Node *n;
|
|
||||||
std::span<const uint8_t> begin;
|
|
||||||
InternalVersionT readVersion;
|
|
||||||
ReadContext *tls;
|
|
||||||
ConflictSet::Result *result;
|
|
||||||
|
|
||||||
void setResult(bool ok) {
|
void setResult(bool ok) {
|
||||||
*result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
|
*result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*typeErasedContinuation)(void *);
|
|
||||||
|
|
||||||
// The type of a function that takes a CheckJob* and returns its own type
|
// The type of a function that takes a CheckJob* and returns its own type
|
||||||
struct continuation {
|
struct continuation {
|
||||||
typedef continuation (*functionPtrType)(CheckJob *);
|
typedef continuation (*functionPtrType)(CheckJob *);
|
||||||
@@ -3032,9 +3060,16 @@ struct CheckJob {
|
|||||||
operator bool() { return func != nullptr; }
|
operator bool() { return func != nullptr; }
|
||||||
};
|
};
|
||||||
|
|
||||||
continuation next;
|
|
||||||
void init(const ConflictSet::ReadRange *read, ConflictSet::Result *result,
|
void init(const ConflictSet::ReadRange *read, ConflictSet::Result *result,
|
||||||
Node *root, int64_t oldestVersionFullPrecision, ReadContext *tls);
|
Node *root, int64_t oldestVersionFullPrecision, ReadContext *tls);
|
||||||
|
|
||||||
|
continuation next;
|
||||||
|
Node *n;
|
||||||
|
ChildAndMaxVersion childAndVersion;
|
||||||
|
std::span<const uint8_t> begin;
|
||||||
|
InternalVersionT readVersion;
|
||||||
|
ReadContext *tls;
|
||||||
|
ConflictSet::Result *result;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace check_point_read_state_machine {
|
namespace check_point_read_state_machine {
|
||||||
@@ -3042,18 +3077,12 @@ namespace check_point_read_state_machine {
|
|||||||
CheckJob::continuation down_left_spine(CheckJob *job);
|
CheckJob::continuation down_left_spine(CheckJob *job);
|
||||||
CheckJob::continuation iter(CheckJob *job);
|
CheckJob::continuation iter(CheckJob *job);
|
||||||
|
|
||||||
// Logically this is the same as performing firstGeq and then checking against
|
|
||||||
// point or range version according to cmp, but this version short circuits as
|
|
||||||
// soon as it can prove that there's no conflict.
|
|
||||||
CheckJob::continuation begin(CheckJob *job) {
|
CheckJob::continuation begin(CheckJob *job) {
|
||||||
++job->tls->point_read_accum;
|
++job->tls->point_read_accum;
|
||||||
#if DEBUG_VERBOSE && !defined(NDEBUG)
|
#if DEBUG_VERBOSE && !defined(NDEBUG)
|
||||||
fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
|
fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
|
||||||
#endif
|
#endif
|
||||||
return iter(job);
|
|
||||||
}
|
|
||||||
|
|
||||||
CheckJob::continuation iter(CheckJob *job) {
|
|
||||||
if (job->begin.size() == 0) {
|
if (job->begin.size() == 0) {
|
||||||
if (job->n->entryPresent) {
|
if (job->n->entryPresent) {
|
||||||
job->setResult(job->n->entry.pointVersion <= job->readVersion);
|
job->setResult(job->n->entry.pointVersion <= job->readVersion);
|
||||||
@@ -3063,11 +3092,17 @@ CheckJob::continuation iter(CheckJob *job) {
|
|||||||
return down_left_spine;
|
return down_left_spine;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [child, maxV] = getChildAndMaxVersion(job->n, job->begin[0]);
|
job->childAndVersion = getChildAndMaxVersion(job->n, job->begin[0]);
|
||||||
if (child == nullptr) {
|
__builtin_prefetch(job->childAndVersion.child);
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
CheckJob::continuation iter(CheckJob *job) {
|
||||||
|
if (job->childAndVersion.child == nullptr) {
|
||||||
auto c = getChildGeq(job->n, job->begin[0]);
|
auto c = getChildGeq(job->n, job->begin[0]);
|
||||||
if (c != nullptr) {
|
if (c != nullptr) {
|
||||||
job->n = c;
|
job->n = c;
|
||||||
|
__builtin_prefetch(job->n);
|
||||||
return down_left_spine;
|
return down_left_spine;
|
||||||
} else {
|
} else {
|
||||||
job->n = nextSibling(job->n);
|
job->n = nextSibling(job->n);
|
||||||
@@ -3075,11 +3110,12 @@ CheckJob::continuation iter(CheckJob *job) {
|
|||||||
job->setResult(true);
|
job->setResult(true);
|
||||||
return nullptr; // Done
|
return nullptr; // Done
|
||||||
}
|
}
|
||||||
|
__builtin_prefetch(job->n);
|
||||||
return down_left_spine;
|
return down_left_spine;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
job->n = child;
|
job->n = job->childAndVersion.child;
|
||||||
job->begin = job->begin.subspan(1, job->begin.size() - 1);
|
job->begin = job->begin.subspan(1, job->begin.size() - 1);
|
||||||
|
|
||||||
if (job->n->partialKeyLen > 0) {
|
if (job->n->partialKeyLen > 0) {
|
||||||
@@ -3089,13 +3125,14 @@ CheckJob::continuation iter(CheckJob *job) {
|
|||||||
if (i < commonLen) {
|
if (i < commonLen) {
|
||||||
auto c = job->n->partialKey()[i] <=> job->begin[i];
|
auto c = job->n->partialKey()[i] <=> job->begin[i];
|
||||||
if (c > 0) {
|
if (c > 0) {
|
||||||
return down_left_spine;
|
return down_left_spine(job);
|
||||||
} else {
|
} else {
|
||||||
job->n = nextSibling(job->n);
|
job->n = nextSibling(job->n);
|
||||||
if (job->n == nullptr) {
|
if (job->n == nullptr) {
|
||||||
job->setResult(true);
|
job->setResult(true);
|
||||||
return nullptr; // Done
|
return nullptr; // Done
|
||||||
}
|
}
|
||||||
|
__builtin_prefetch(job->n);
|
||||||
return down_left_spine;
|
return down_left_spine;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3105,17 +3142,30 @@ CheckJob::continuation iter(CheckJob *job) {
|
|||||||
} else if (job->n->partialKeyLen > int(job->begin.size())) {
|
} else if (job->n->partialKeyLen > int(job->begin.size())) {
|
||||||
// n is the first physical node greater than remaining, and there's no
|
// n is the first physical node greater than remaining, and there's no
|
||||||
// eq node
|
// eq node
|
||||||
return down_left_spine;
|
return down_left_spine(job);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxV <= job->readVersion) {
|
if (job->childAndVersion.maxVersion <= job->readVersion) {
|
||||||
++job->tls->point_read_short_circuit_accum;
|
++job->tls->point_read_short_circuit_accum;
|
||||||
job->setResult(true);
|
job->setResult(true);
|
||||||
return nullptr; // Done
|
return nullptr; // Done
|
||||||
}
|
}
|
||||||
|
|
||||||
++job->tls->point_read_iterations_accum;
|
++job->tls->point_read_iterations_accum;
|
||||||
|
|
||||||
|
if (job->begin.size() == 0) {
|
||||||
|
if (job->n->entryPresent) {
|
||||||
|
job->setResult(job->n->entry.pointVersion <= job->readVersion);
|
||||||
|
return nullptr; // Done
|
||||||
|
}
|
||||||
|
job->n = getFirstChildExists(job->n);
|
||||||
|
__builtin_prefetch(job->n);
|
||||||
|
return down_left_spine;
|
||||||
|
}
|
||||||
|
|
||||||
|
job->childAndVersion = getChildAndMaxVersion(job->n, job->begin[0]);
|
||||||
|
__builtin_prefetch(job->childAndVersion.child);
|
||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3125,6 +3175,7 @@ CheckJob::continuation down_left_spine(CheckJob *job) {
|
|||||||
return nullptr; // Done
|
return nullptr; // Done
|
||||||
}
|
}
|
||||||
job->n = getFirstChildExists(job->n);
|
job->n = getFirstChildExists(job->n);
|
||||||
|
__builtin_prefetch(job->n);
|
||||||
return down_left_spine;
|
return down_left_spine;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3145,6 +3196,11 @@ void CheckJob::init(const ConflictSet::ReadRange *read,
|
|||||||
this->result = result;
|
this->result = result;
|
||||||
this->tls = tls;
|
this->tls = tls;
|
||||||
this->next = check_point_read_state_machine::begin;
|
this->next = check_point_read_state_machine::begin;
|
||||||
|
// *result =
|
||||||
|
// checkPointRead(root, begin, InternalVersionT(read->readVersion), tls)
|
||||||
|
// ? ConflictSet::Commit
|
||||||
|
// : ConflictSet::Conflict;
|
||||||
|
// next = +[](CheckJob *) -> continuation { return nullptr; };
|
||||||
} else {
|
} else {
|
||||||
*result = checkRangeRead(root, begin, end,
|
*result = checkRangeRead(root, begin, end,
|
||||||
InternalVersionT(read->readVersion), tls)
|
InternalVersionT(read->readVersion), tls)
|
||||||
|
|||||||
Reference in New Issue
Block a user