|
|
|
|
@@ -87,22 +87,42 @@ constexpr int64_t kMaxCorrectVersionWindow =
|
|
|
|
|
std::numeric_limits<int32_t>::max();
|
|
|
|
|
static_assert(kNominalVersionWindow <= kMaxCorrectVersionWindow);
|
|
|
|
|
|
|
|
|
|
#ifndef USE_64_BIT
|
|
|
|
|
#define USE_64_BIT 0
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
struct InternalVersionT {
|
|
|
|
|
constexpr InternalVersionT() = default;
|
|
|
|
|
constexpr explicit InternalVersionT(int64_t value) : value(value) {}
|
|
|
|
|
constexpr int64_t toInt64() const { return value; } // GCOVR_EXCL_LINE
|
|
|
|
|
constexpr auto operator<=>(const InternalVersionT &rhs) const {
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
return value <=> rhs.value;
|
|
|
|
|
#else
|
|
|
|
|
// Maintains ordering after overflow, as long as the full-precision versions
|
|
|
|
|
// are within `kMaxCorrectVersionWindow` of eachother.
|
|
|
|
|
return int32_t(value - rhs.value) <=> 0;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
constexpr bool operator==(const InternalVersionT &) const = default;
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
static const InternalVersionT zero;
|
|
|
|
|
#else
|
|
|
|
|
static thread_local InternalVersionT zero;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
int64_t value;
|
|
|
|
|
#else
|
|
|
|
|
uint32_t value;
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
const InternalVersionT InternalVersionT::zero{0};
|
|
|
|
|
#else
|
|
|
|
|
thread_local InternalVersionT InternalVersionT::zero;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
struct Entry {
|
|
|
|
|
InternalVersionT pointVersion;
|
|
|
|
|
@@ -518,8 +538,13 @@ std::string getSearchPath(Node *n);
|
|
|
|
|
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
|
|
|
|
// has an entry present.
|
|
|
|
|
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
constexpr int kBytesPerKey = 144;
|
|
|
|
|
constexpr int kMinNodeSurplus = 104;
|
|
|
|
|
#else
|
|
|
|
|
constexpr int kBytesPerKey = 112;
|
|
|
|
|
constexpr int kMinNodeSurplus = 80;
|
|
|
|
|
#endif
|
|
|
|
|
// Cound the entry itself as a child
|
|
|
|
|
constexpr int kMinChildrenNode0 = 1;
|
|
|
|
|
constexpr int kMinChildrenNode3 = 2;
|
|
|
|
|
@@ -724,9 +749,13 @@ struct WriteContext {
|
|
|
|
|
int64_t write_bytes;
|
|
|
|
|
} accum;
|
|
|
|
|
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
static constexpr InternalVersionT zero{0};
|
|
|
|
|
#else
|
|
|
|
|
// Cache a copy of InternalVersionT::zero, so we don't need to do the TLS
|
|
|
|
|
// lookup as often.
|
|
|
|
|
InternalVersionT zero;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
WriteContext() { memset(&accum, 0, sizeof(accum)); }
|
|
|
|
|
|
|
|
|
|
@@ -1563,6 +1592,9 @@ void rezero16(InternalVersionT *vs, InternalVersionT zero) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
void rezero(Node *, InternalVersionT) {}
|
|
|
|
|
#else
|
|
|
|
|
void rezero(Node *n, InternalVersionT z) {
|
|
|
|
|
#if DEBUG_VERBOSE && !defined(NDEBUG)
|
|
|
|
|
fprintf(stderr, "rezero to %" PRId64 ": %s\n", z.toInt64(),
|
|
|
|
|
@@ -1605,6 +1637,7 @@ void rezero(Node *n, InternalVersionT z) {
|
|
|
|
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
void mergeWithChild(Node *&self, WriteContext *tls, ConflictSet::Impl *impl,
|
|
|
|
|
Node *&dontInvalidate, Node3 *self3) {
|
|
|
|
|
@@ -1987,7 +2020,14 @@ downLeftSpine:
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef HAS_AVX
|
|
|
|
|
uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|
|
|
|
uint32_t compare16(const InternalVersionT *vs, InternalVersionT rv) {
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
uint32_t compared = 0;
|
|
|
|
|
for (int i = 0; i < 16; ++i) {
|
|
|
|
|
compared |= (vs[i] > rv) << i;
|
|
|
|
|
}
|
|
|
|
|
return compared;
|
|
|
|
|
#else
|
|
|
|
|
uint32_t compared = 0;
|
|
|
|
|
__m128i w[4]; // GCOVR_EXCL_LINE
|
|
|
|
|
memcpy(w, vs, sizeof(w));
|
|
|
|
|
@@ -2001,15 +2041,26 @@ uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|
|
|
|
<< (i * 4);
|
|
|
|
|
}
|
|
|
|
|
return compared;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__attribute__((target("avx512f"))) uint32_t
|
|
|
|
|
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
|
|
|
|
compare16_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
int64_t r;
|
|
|
|
|
memcpy(&r, &rv, sizeof(r));
|
|
|
|
|
uint32_t low =
|
|
|
|
|
_mm512_cmpgt_epi64_mask(_mm512_loadu_epi64(vs), _mm512_set1_epi64(r));
|
|
|
|
|
uint32_t high =
|
|
|
|
|
_mm512_cmpgt_epi64_mask(_mm512_loadu_epi64(vs + 8), _mm512_set1_epi64(r));
|
|
|
|
|
return low | (high << 8);
|
|
|
|
|
#else
|
|
|
|
|
uint32_t r;
|
|
|
|
|
memcpy(&r, &rv, sizeof(r));
|
|
|
|
|
return _mm512_cmpgt_epi32_mask(
|
|
|
|
|
_mm512_sub_epi32(_mm512_loadu_epi32(vs), _mm512_set1_epi32(r)),
|
|
|
|
|
_mm512_setzero_epi32());
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
@@ -2066,9 +2117,9 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|
|
|
|
|
|
|
|
|
uint32_t compared = 0;
|
|
|
|
|
if constexpr (kAVX512) {
|
|
|
|
|
compared = compare16_32bit_avx512(vs, readVersion); // GCOVR_EXCL_LINE
|
|
|
|
|
compared = compare16_avx512(vs, readVersion);
|
|
|
|
|
} else {
|
|
|
|
|
compared = compare16_32bit(vs, readVersion); // GCOVR_EXCL_LINE
|
|
|
|
|
compared = compare16(vs, readVersion);
|
|
|
|
|
}
|
|
|
|
|
return !(compared & mask);
|
|
|
|
|
|
|
|
|
|
@@ -2127,9 +2178,9 @@ bool scan16(const InternalVersionT *vs, int begin, int end,
|
|
|
|
|
#elif defined(HAS_AVX)
|
|
|
|
|
uint32_t conflict;
|
|
|
|
|
if constexpr (kAVX512) {
|
|
|
|
|
conflict = compare16_32bit_avx512(vs, readVersion); // GCOVR_EXCL_LINE
|
|
|
|
|
conflict = compare16_avx512(vs, readVersion);
|
|
|
|
|
} else {
|
|
|
|
|
conflict = compare16_32bit(vs, readVersion); // GCOVR_EXCL_LINE
|
|
|
|
|
conflict = compare16(vs, readVersion);
|
|
|
|
|
}
|
|
|
|
|
conflict &= (1 << end) - 1;
|
|
|
|
|
conflict >>= begin;
|
|
|
|
|
@@ -2264,12 +2315,9 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
|
|
|
|
|
|
|
|
|
uint32_t compared = 0;
|
|
|
|
|
if constexpr (kAVX512) {
|
|
|
|
|
compared = // GCOVR_EXCL_LINE
|
|
|
|
|
compare16_32bit_avx512(self->childMaxVersion, // GCOVR_EXCL_LINE
|
|
|
|
|
readVersion); // GCOVR_EXCL_LINE
|
|
|
|
|
compared = compare16_avx512(self->childMaxVersion, readVersion);
|
|
|
|
|
} else {
|
|
|
|
|
compared = compare16_32bit(self->childMaxVersion,
|
|
|
|
|
readVersion); // GCOVR_EXCL_LINE
|
|
|
|
|
compared = compare16(self->childMaxVersion, readVersion);
|
|
|
|
|
}
|
|
|
|
|
return !(compared & mask) && firstRangeOk;
|
|
|
|
|
|
|
|
|
|
@@ -3137,10 +3185,12 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void addWrites(const WriteRange *writes, int count, int64_t writeVersion) {
|
|
|
|
|
#if !USE_64_BIT
|
|
|
|
|
// There could be other conflict sets in the same thread. We need
|
|
|
|
|
// InternalVersionT::zero to be correct for this conflict set for the
|
|
|
|
|
// lifetime of the current call frame.
|
|
|
|
|
InternalVersionT::zero = tls.zero = oldestVersion;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
assert(writeVersion >= newestVersionFullPrecision);
|
|
|
|
|
assert(tls.accum.entries_erased == 0);
|
|
|
|
|
@@ -3255,7 +3305,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|
|
|
|
InternalVersionT oldestVersion{o};
|
|
|
|
|
this->oldestVersionFullPrecision = o;
|
|
|
|
|
this->oldestVersion = oldestVersion;
|
|
|
|
|
#if !USE_64_BIT
|
|
|
|
|
InternalVersionT::zero = tls.zero = oldestVersion;
|
|
|
|
|
#endif
|
|
|
|
|
#ifdef NDEBUG
|
|
|
|
|
// This is here for performance reasons, since we want to amortize the cost
|
|
|
|
|
// of storing the search path as a string. In tests, we want to exercise the
|
|
|
|
|
@@ -3304,7 +3356,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|
|
|
|
root->entry.pointVersion = this->oldestVersion;
|
|
|
|
|
root->entry.rangeVersion = this->oldestVersion;
|
|
|
|
|
|
|
|
|
|
#if !USE_64_BIT
|
|
|
|
|
InternalVersionT::zero = tls.zero = this->oldestVersion;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Intentionally not resetting totalBytes
|
|
|
|
|
}
|
|
|
|
|
@@ -3751,6 +3805,9 @@ Node *firstGeq(Node *n, std::string_view key) {
|
|
|
|
|
n, std::span<const uint8_t>((const uint8_t *)key.data(), key.size()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if USE_64_BIT
|
|
|
|
|
void checkVersionsGeqOldestExtant(Node *, InternalVersionT) {}
|
|
|
|
|
#else
|
|
|
|
|
void checkVersionsGeqOldestExtant(Node *n,
|
|
|
|
|
InternalVersionT oldestExtantVersion) {
|
|
|
|
|
if (n->entryPresent) {
|
|
|
|
|
@@ -3794,6 +3851,7 @@ void checkVersionsGeqOldestExtant(Node *n,
|
|
|
|
|
abort();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
[[maybe_unused]] InternalVersionT
|
|
|
|
|
checkMaxVersion(Node *root, Node *node, InternalVersionT oldestVersion,
|
|
|
|
|
|