Apply function multi versioning higher in call stack to save branches
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, passed: 780
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, passed: 780
weaselab/conflict-set/pipeline/head There was a failure building this commit
This commit is contained in:
100
ConflictSet.cpp
100
ConflictSet.cpp
@@ -1709,11 +1709,7 @@ downLeftSpine:
|
||||
}
|
||||
|
||||
#ifdef HAS_AVX
|
||||
#ifndef __SANITIZE_THREAD__
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
uint32_t
|
||||
compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
uint32_t compared = 0;
|
||||
__m128i w[4];
|
||||
memcpy(w, vs, sizeof(w));
|
||||
@@ -1729,9 +1725,8 @@ compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
return compared;
|
||||
}
|
||||
|
||||
#ifndef __SANITIZE_THREAD__
|
||||
__attribute__((target("avx512f"))) uint32_t
|
||||
compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
__m512i w;
|
||||
memcpy(&w, vs, sizeof(w));
|
||||
uint32_t r;
|
||||
@@ -1740,10 +1735,10 @@ compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
||||
// Preconditions: begin <= end, end - begin < 256
|
||||
template <bool kAVX512>
|
||||
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||
InternalVersionT readVersion) {
|
||||
|
||||
@@ -1800,7 +1795,11 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||
|
||||
uint32_t compared = 0;
|
||||
#if INTERNAL_VERSION_32_BIT
|
||||
if constexpr (kAVX512) {
|
||||
compared = compare16_32bit_avx512(vs, readVersion);
|
||||
} else {
|
||||
compared = compare16_32bit(vs, readVersion);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
compared |= (vs[i] > readVersion) << i;
|
||||
@@ -1830,6 +1829,7 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||
// Returns true if v[i] <= readVersion for all i such that begin <= i < end
|
||||
//
|
||||
// always_inline So that we can optimize when begin or end is a constant.
|
||||
template <bool kAVX512>
|
||||
inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
||||
int begin, int end,
|
||||
InternalVersionT readVersion) {
|
||||
@@ -1862,7 +1862,12 @@ inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
||||
conflict >>= begin << 2;
|
||||
return !conflict;
|
||||
#elif INTERNAL_VERSION_32_BIT && defined(HAS_AVX)
|
||||
uint32_t conflict = compare16_32bit(vs, readVersion);
|
||||
uint32_t conflict;
|
||||
if constexpr (kAVX512) {
|
||||
conflict = compare16_32bit_avx512(vs, readVersion);
|
||||
} else {
|
||||
conflict = compare16_32bit(vs, readVersion);
|
||||
}
|
||||
conflict &= (1 << end) - 1;
|
||||
conflict >>= begin;
|
||||
return !conflict;
|
||||
@@ -1880,6 +1885,7 @@ inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
||||
// Return whether or not the max version among all keys starting with the search
|
||||
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
||||
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
InternalVersionT readVersion) {
|
||||
assume(-1 <= begin);
|
||||
@@ -1934,7 +1940,8 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
case Type_Node16: {
|
||||
auto *self = static_cast<Node16 *>(n);
|
||||
|
||||
return scan16(self->childMaxVersion, self->index, begin, end, readVersion);
|
||||
return scan16<kAVX512>(self->childMaxVersion, self->index, begin, end,
|
||||
readVersion);
|
||||
}
|
||||
case Type_Node48: {
|
||||
auto *self = static_cast<Node48 *>(n);
|
||||
@@ -1942,9 +1949,10 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
||||
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
||||
if (self->maxOfMax[i] > readVersion) {
|
||||
if (!scan16(self->childMaxVersion + (i << Node48::kMaxOfMaxShift),
|
||||
self->reverseIndex + (i << Node48::kMaxOfMaxShift), begin,
|
||||
end, readVersion)) {
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(i << Node48::kMaxOfMaxShift),
|
||||
self->reverseIndex + (i << Node48::kMaxOfMaxShift),
|
||||
begin, end, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1966,14 +1974,14 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
}
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
return scan16(self->childMaxVersion +
|
||||
return scan16<kAVX512>(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, intraPageEnd, readVersion);
|
||||
}
|
||||
// Check the first page
|
||||
if (self->maxOfMax[firstPage] > readVersion) {
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
if (!scan16(self->childMaxVersion +
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, 16, readVersion)) {
|
||||
return false;
|
||||
@@ -1982,7 +1990,8 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
// Check the last page
|
||||
if (self->maxOfMax[lastPage] > readVersion) {
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
if (!scan16(self->childMaxVersion + (lastPage << Node256::kMaxOfMaxShift),
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(lastPage << Node256::kMaxOfMaxShift),
|
||||
0, intraPageEnd, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
@@ -1990,7 +1999,8 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
// Check inner pages
|
||||
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
|
||||
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||
return scan16(self->maxOfMax, innerPageBegin, innerPageEnd, readVersion);
|
||||
return scan16<kAVX512>(self->maxOfMax, innerPageBegin, innerPageEnd,
|
||||
readVersion);
|
||||
}
|
||||
default: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -2019,6 +2029,7 @@ Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
|
||||
//
|
||||
// Precondition: transitively, no child of n has a search path that's a longer
|
||||
// prefix of key than n
|
||||
template <bool kAVX512>
|
||||
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
|
||||
int end, InternalVersionT readVersion,
|
||||
ConflictSet::Impl *impl) {
|
||||
@@ -2027,7 +2038,7 @@ bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
|
||||
#endif
|
||||
auto remaining = key;
|
||||
if (remaining.size() == 0) {
|
||||
return checkMaxBetweenExclusive(n, begin, end, readVersion);
|
||||
return checkMaxBetweenExclusive<kAVX512>(n, begin, end, readVersion);
|
||||
}
|
||||
|
||||
auto *child = getChild(n, remaining[0]);
|
||||
@@ -2088,9 +2099,10 @@ downLeftSpine:
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Return true if the max version among all keys that start with key[:prefixLen]
|
||||
// that are >= key is <= readVersion
|
||||
struct CheckRangeLeftSide {
|
||||
template <bool kAVX512> struct CheckRangeLeftSide {
|
||||
CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
||||
InternalVersionT readVersion, ConflictSet::Impl *impl)
|
||||
: n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion),
|
||||
@@ -2122,7 +2134,8 @@ struct CheckRangeLeftSide {
|
||||
}
|
||||
|
||||
if (searchPathLen >= prefixLen) {
|
||||
if (!checkMaxBetweenExclusive(n, remaining[0], 256, readVersion)) {
|
||||
if (!checkMaxBetweenExclusive<kAVX512>(n, remaining[0], 256,
|
||||
readVersion)) {
|
||||
ok = false;
|
||||
return true;
|
||||
}
|
||||
@@ -2209,7 +2222,7 @@ struct CheckRangeLeftSide {
|
||||
|
||||
// Return true if the max version among all keys that start with key[:prefixLen]
|
||||
// that are < key is <= readVersion
|
||||
struct CheckRangeRightSide {
|
||||
template <bool kAVX512> struct CheckRangeRightSide {
|
||||
CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
||||
InternalVersionT readVersion, ConflictSet::Impl *impl)
|
||||
: n(n), key(key), remaining(key), prefixLen(prefixLen),
|
||||
@@ -2251,7 +2264,8 @@ struct CheckRangeRightSide {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!checkMaxBetweenExclusive(n, -1, remaining[0], readVersion)) {
|
||||
if (!checkMaxBetweenExclusive<kAVX512>(n, -1, remaining[0],
|
||||
readVersion)) {
|
||||
ok = false;
|
||||
return true;
|
||||
}
|
||||
@@ -2341,10 +2355,12 @@ struct CheckRangeRightSide {
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||
ConflictSet::Impl *impl) {
|
||||
template <bool kAVX512>
|
||||
bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end,
|
||||
InternalVersionT readVersion, ConflictSet::Impl *impl) {
|
||||
int lcp = longestCommonPrefix(begin.data(), end.data(),
|
||||
std::min(begin.size(), end.size()));
|
||||
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
|
||||
@@ -2378,19 +2394,22 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
lcp -= consumed;
|
||||
|
||||
if (lcp == int(begin.size())) {
|
||||
CheckRangeRightSide checkRangeRightSide{n, end, lcp, readVersion, impl};
|
||||
CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp, readVersion,
|
||||
impl};
|
||||
while (!checkRangeRightSide.step())
|
||||
;
|
||||
return checkRangeRightSide.ok;
|
||||
}
|
||||
|
||||
if (!checkRangeStartsWith(n, begin.subspan(0, lcp), begin[lcp], end[lcp],
|
||||
readVersion, impl)) {
|
||||
if (!checkRangeStartsWith<kAVX512>(n, begin.subspan(0, lcp), begin[lcp],
|
||||
end[lcp], readVersion, impl)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CheckRangeLeftSide checkRangeLeftSide{n, begin, lcp + 1, readVersion, impl};
|
||||
CheckRangeRightSide checkRangeRightSide{n, end, lcp + 1, readVersion, impl};
|
||||
CheckRangeLeftSide<kAVX512> checkRangeLeftSide{n, begin, lcp + 1, readVersion,
|
||||
impl};
|
||||
CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp + 1, readVersion,
|
||||
impl};
|
||||
|
||||
for (;;) {
|
||||
bool leftDone = checkRangeLeftSide.step();
|
||||
@@ -2415,6 +2434,27 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
return checkRangeLeftSide.ok & checkRangeRightSide.ok;
|
||||
}
|
||||
|
||||
#if defined(__SANITIZE_THREAD__) || !defined(__x86_64__)
|
||||
bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||
ConflictSet::Impl *impl) {
|
||||
return checkRangeReadImpl<false>(n, begin, end, readVersion, impl);
|
||||
}
|
||||
#else
|
||||
__attribute__((target("default"))) bool
|
||||
checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||
ConflictSet::Impl *impl) {
|
||||
return checkRangeReadImpl<false>(n, begin, end, readVersion, impl);
|
||||
}
|
||||
__attribute__((target("avx512f"))) bool
|
||||
checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||
ConflictSet::Impl *impl) {
|
||||
return checkRangeReadImpl<true>(n, begin, end, readVersion, impl);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Returns a pointer to the newly inserted node. Caller must set
|
||||
// `entryPresent`, `entry` fields and `maxVersion` on the result. The search
|
||||
// path of the result's parent will have `maxVersion` at least `writeVersion` as
|
||||
|
Reference in New Issue
Block a user