Apply function multi versioning higher in call stack to save branches
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, passed: 780
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, passed: 780
weaselab/conflict-set/pipeline/head There was a failure building this commit
This commit is contained in:
100
ConflictSet.cpp
100
ConflictSet.cpp
@@ -1709,11 +1709,7 @@ downLeftSpine:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
#ifndef __SANITIZE_THREAD__
|
uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||||
__attribute__((target("default")))
|
|
||||||
#endif
|
|
||||||
uint32_t
|
|
||||||
compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|
||||||
uint32_t compared = 0;
|
uint32_t compared = 0;
|
||||||
__m128i w[4];
|
__m128i w[4];
|
||||||
memcpy(w, vs, sizeof(w));
|
memcpy(w, vs, sizeof(w));
|
||||||
@@ -1729,9 +1725,8 @@ compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|||||||
return compared;
|
return compared;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __SANITIZE_THREAD__
|
|
||||||
__attribute__((target("avx512f"))) uint32_t
|
__attribute__((target("avx512f"))) uint32_t
|
||||||
compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
||||||
__m512i w;
|
__m512i w;
|
||||||
memcpy(&w, vs, sizeof(w));
|
memcpy(&w, vs, sizeof(w));
|
||||||
uint32_t r;
|
uint32_t r;
|
||||||
@@ -1740,10 +1735,10 @@ compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|||||||
_mm512_setzero_epi32());
|
_mm512_setzero_epi32());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
||||||
// Preconditions: begin <= end, end - begin < 256
|
// Preconditions: begin <= end, end - begin < 256
|
||||||
|
template <bool kAVX512>
|
||||||
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||||
InternalVersionT readVersion) {
|
InternalVersionT readVersion) {
|
||||||
|
|
||||||
@@ -1800,7 +1795,11 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
|
|
||||||
uint32_t compared = 0;
|
uint32_t compared = 0;
|
||||||
#if INTERNAL_VERSION_32_BIT
|
#if INTERNAL_VERSION_32_BIT
|
||||||
|
if constexpr (kAVX512) {
|
||||||
|
compared = compare16_32bit_avx512(vs, readVersion);
|
||||||
|
} else {
|
||||||
compared = compare16_32bit(vs, readVersion);
|
compared = compare16_32bit(vs, readVersion);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
compared |= (vs[i] > readVersion) << i;
|
compared |= (vs[i] > readVersion) << i;
|
||||||
@@ -1830,6 +1829,7 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
// Returns true if v[i] <= readVersion for all i such that begin <= i < end
|
// Returns true if v[i] <= readVersion for all i such that begin <= i < end
|
||||||
//
|
//
|
||||||
// always_inline So that we can optimize when begin or end is a constant.
|
// always_inline So that we can optimize when begin or end is a constant.
|
||||||
|
template <bool kAVX512>
|
||||||
inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
||||||
int begin, int end,
|
int begin, int end,
|
||||||
InternalVersionT readVersion) {
|
InternalVersionT readVersion) {
|
||||||
@@ -1862,7 +1862,12 @@ inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
|||||||
conflict >>= begin << 2;
|
conflict >>= begin << 2;
|
||||||
return !conflict;
|
return !conflict;
|
||||||
#elif INTERNAL_VERSION_32_BIT && defined(HAS_AVX)
|
#elif INTERNAL_VERSION_32_BIT && defined(HAS_AVX)
|
||||||
uint32_t conflict = compare16_32bit(vs, readVersion);
|
uint32_t conflict;
|
||||||
|
if constexpr (kAVX512) {
|
||||||
|
conflict = compare16_32bit_avx512(vs, readVersion);
|
||||||
|
} else {
|
||||||
|
conflict = compare16_32bit(vs, readVersion);
|
||||||
|
}
|
||||||
conflict &= (1 << end) - 1;
|
conflict &= (1 << end) - 1;
|
||||||
conflict >>= begin;
|
conflict >>= begin;
|
||||||
return !conflict;
|
return !conflict;
|
||||||
@@ -1880,6 +1885,7 @@ inline __attribute((always_inline)) bool scan16(const InternalVersionT *vs,
|
|||||||
// Return whether or not the max version among all keys starting with the search
|
// Return whether or not the max version among all keys starting with the search
|
||||||
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
||||||
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
||||||
|
template <bool kAVX512>
|
||||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||||
InternalVersionT readVersion) {
|
InternalVersionT readVersion) {
|
||||||
assume(-1 <= begin);
|
assume(-1 <= begin);
|
||||||
@@ -1934,7 +1940,8 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *self = static_cast<Node16 *>(n);
|
auto *self = static_cast<Node16 *>(n);
|
||||||
|
|
||||||
return scan16(self->childMaxVersion, self->index, begin, end, readVersion);
|
return scan16<kAVX512>(self->childMaxVersion, self->index, begin, end,
|
||||||
|
readVersion);
|
||||||
}
|
}
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
auto *self = static_cast<Node48 *>(n);
|
auto *self = static_cast<Node48 *>(n);
|
||||||
@@ -1942,9 +1949,10 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
||||||
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
||||||
if (self->maxOfMax[i] > readVersion) {
|
if (self->maxOfMax[i] > readVersion) {
|
||||||
if (!scan16(self->childMaxVersion + (i << Node48::kMaxOfMaxShift),
|
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||||
self->reverseIndex + (i << Node48::kMaxOfMaxShift), begin,
|
(i << Node48::kMaxOfMaxShift),
|
||||||
end, readVersion)) {
|
self->reverseIndex + (i << Node48::kMaxOfMaxShift),
|
||||||
|
begin, end, readVersion)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1966,14 +1974,14 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
}
|
}
|
||||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||||
return scan16(self->childMaxVersion +
|
return scan16<kAVX512>(self->childMaxVersion +
|
||||||
(firstPage << Node256::kMaxOfMaxShift),
|
(firstPage << Node256::kMaxOfMaxShift),
|
||||||
intraPageBegin, intraPageEnd, readVersion);
|
intraPageBegin, intraPageEnd, readVersion);
|
||||||
}
|
}
|
||||||
// Check the first page
|
// Check the first page
|
||||||
if (self->maxOfMax[firstPage] > readVersion) {
|
if (self->maxOfMax[firstPage] > readVersion) {
|
||||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||||
if (!scan16(self->childMaxVersion +
|
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||||
(firstPage << Node256::kMaxOfMaxShift),
|
(firstPage << Node256::kMaxOfMaxShift),
|
||||||
intraPageBegin, 16, readVersion)) {
|
intraPageBegin, 16, readVersion)) {
|
||||||
return false;
|
return false;
|
||||||
@@ -1982,7 +1990,8 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
// Check the last page
|
// Check the last page
|
||||||
if (self->maxOfMax[lastPage] > readVersion) {
|
if (self->maxOfMax[lastPage] > readVersion) {
|
||||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||||
if (!scan16(self->childMaxVersion + (lastPage << Node256::kMaxOfMaxShift),
|
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||||
|
(lastPage << Node256::kMaxOfMaxShift),
|
||||||
0, intraPageEnd, readVersion)) {
|
0, intraPageEnd, readVersion)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1990,7 +1999,8 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
// Check inner pages
|
// Check inner pages
|
||||||
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
|
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
|
||||||
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
|
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||||
return scan16(self->maxOfMax, innerPageBegin, innerPageEnd, readVersion);
|
return scan16<kAVX512>(self->maxOfMax, innerPageBegin, innerPageEnd,
|
||||||
|
readVersion);
|
||||||
}
|
}
|
||||||
default: // GCOVR_EXCL_LINE
|
default: // GCOVR_EXCL_LINE
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
@@ -2019,6 +2029,7 @@ Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
|
|||||||
//
|
//
|
||||||
// Precondition: transitively, no child of n has a search path that's a longer
|
// Precondition: transitively, no child of n has a search path that's a longer
|
||||||
// prefix of key than n
|
// prefix of key than n
|
||||||
|
template <bool kAVX512>
|
||||||
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
|
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
|
||||||
int end, InternalVersionT readVersion,
|
int end, InternalVersionT readVersion,
|
||||||
ConflictSet::Impl *impl) {
|
ConflictSet::Impl *impl) {
|
||||||
@@ -2027,7 +2038,7 @@ bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
|
|||||||
#endif
|
#endif
|
||||||
auto remaining = key;
|
auto remaining = key;
|
||||||
if (remaining.size() == 0) {
|
if (remaining.size() == 0) {
|
||||||
return checkMaxBetweenExclusive(n, begin, end, readVersion);
|
return checkMaxBetweenExclusive<kAVX512>(n, begin, end, readVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto *child = getChild(n, remaining[0]);
|
auto *child = getChild(n, remaining[0]);
|
||||||
@@ -2088,9 +2099,10 @@ downLeftSpine:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
// Return true if the max version among all keys that start with key[:prefixLen]
|
// Return true if the max version among all keys that start with key[:prefixLen]
|
||||||
// that are >= key is <= readVersion
|
// that are >= key is <= readVersion
|
||||||
struct CheckRangeLeftSide {
|
template <bool kAVX512> struct CheckRangeLeftSide {
|
||||||
CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
CheckRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
||||||
InternalVersionT readVersion, ConflictSet::Impl *impl)
|
InternalVersionT readVersion, ConflictSet::Impl *impl)
|
||||||
: n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion),
|
: n(n), remaining(key), prefixLen(prefixLen), readVersion(readVersion),
|
||||||
@@ -2122,7 +2134,8 @@ struct CheckRangeLeftSide {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (searchPathLen >= prefixLen) {
|
if (searchPathLen >= prefixLen) {
|
||||||
if (!checkMaxBetweenExclusive(n, remaining[0], 256, readVersion)) {
|
if (!checkMaxBetweenExclusive<kAVX512>(n, remaining[0], 256,
|
||||||
|
readVersion)) {
|
||||||
ok = false;
|
ok = false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -2209,7 +2222,7 @@ struct CheckRangeLeftSide {
|
|||||||
|
|
||||||
// Return true if the max version among all keys that start with key[:prefixLen]
|
// Return true if the max version among all keys that start with key[:prefixLen]
|
||||||
// that are < key is <= readVersion
|
// that are < key is <= readVersion
|
||||||
struct CheckRangeRightSide {
|
template <bool kAVX512> struct CheckRangeRightSide {
|
||||||
CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
CheckRangeRightSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
||||||
InternalVersionT readVersion, ConflictSet::Impl *impl)
|
InternalVersionT readVersion, ConflictSet::Impl *impl)
|
||||||
: n(n), key(key), remaining(key), prefixLen(prefixLen),
|
: n(n), key(key), remaining(key), prefixLen(prefixLen),
|
||||||
@@ -2251,7 +2264,8 @@ struct CheckRangeRightSide {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!checkMaxBetweenExclusive(n, -1, remaining[0], readVersion)) {
|
if (!checkMaxBetweenExclusive<kAVX512>(n, -1, remaining[0],
|
||||||
|
readVersion)) {
|
||||||
ok = false;
|
ok = false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -2341,10 +2355,12 @@ struct CheckRangeRightSide {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
template <bool kAVX512>
|
||||||
std::span<const uint8_t> end, InternalVersionT readVersion,
|
bool checkRangeReadImpl(Node *n, std::span<const uint8_t> begin,
|
||||||
ConflictSet::Impl *impl) {
|
std::span<const uint8_t> end,
|
||||||
|
InternalVersionT readVersion, ConflictSet::Impl *impl) {
|
||||||
int lcp = longestCommonPrefix(begin.data(), end.data(),
|
int lcp = longestCommonPrefix(begin.data(), end.data(),
|
||||||
std::min(begin.size(), end.size()));
|
std::min(begin.size(), end.size()));
|
||||||
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
|
if (lcp == int(begin.size()) && end.size() == begin.size() + 1 &&
|
||||||
@@ -2378,19 +2394,22 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
|||||||
lcp -= consumed;
|
lcp -= consumed;
|
||||||
|
|
||||||
if (lcp == int(begin.size())) {
|
if (lcp == int(begin.size())) {
|
||||||
CheckRangeRightSide checkRangeRightSide{n, end, lcp, readVersion, impl};
|
CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp, readVersion,
|
||||||
|
impl};
|
||||||
while (!checkRangeRightSide.step())
|
while (!checkRangeRightSide.step())
|
||||||
;
|
;
|
||||||
return checkRangeRightSide.ok;
|
return checkRangeRightSide.ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!checkRangeStartsWith(n, begin.subspan(0, lcp), begin[lcp], end[lcp],
|
if (!checkRangeStartsWith<kAVX512>(n, begin.subspan(0, lcp), begin[lcp],
|
||||||
readVersion, impl)) {
|
end[lcp], readVersion, impl)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
CheckRangeLeftSide checkRangeLeftSide{n, begin, lcp + 1, readVersion, impl};
|
CheckRangeLeftSide<kAVX512> checkRangeLeftSide{n, begin, lcp + 1, readVersion,
|
||||||
CheckRangeRightSide checkRangeRightSide{n, end, lcp + 1, readVersion, impl};
|
impl};
|
||||||
|
CheckRangeRightSide<kAVX512> checkRangeRightSide{n, end, lcp + 1, readVersion,
|
||||||
|
impl};
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
bool leftDone = checkRangeLeftSide.step();
|
bool leftDone = checkRangeLeftSide.step();
|
||||||
@@ -2415,6 +2434,27 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
|||||||
return checkRangeLeftSide.ok & checkRangeRightSide.ok;
|
return checkRangeLeftSide.ok & checkRangeRightSide.ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__SANITIZE_THREAD__) || !defined(__x86_64__)
|
||||||
|
bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||||
|
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||||
|
ConflictSet::Impl *impl) {
|
||||||
|
return checkRangeReadImpl<false>(n, begin, end, readVersion, impl);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
__attribute__((target("default"))) bool
|
||||||
|
checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||||
|
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||||
|
ConflictSet::Impl *impl) {
|
||||||
|
return checkRangeReadImpl<false>(n, begin, end, readVersion, impl);
|
||||||
|
}
|
||||||
|
__attribute__((target("avx512f"))) bool
|
||||||
|
checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||||
|
std::span<const uint8_t> end, InternalVersionT readVersion,
|
||||||
|
ConflictSet::Impl *impl) {
|
||||||
|
return checkRangeReadImpl<true>(n, begin, end, readVersion, impl);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Returns a pointer to the newly inserted node. Caller must set
|
// Returns a pointer to the newly inserted node. Caller must set
|
||||||
// `entryPresent`, `entry` fields and `maxVersion` on the result. The search
|
// `entryPresent`, `entry` fields and `maxVersion` on the result. The search
|
||||||
// path of the result's parent will have `maxVersion` at least `writeVersion` as
|
// path of the result's parent will have `maxVersion` at least `writeVersion` as
|
||||||
|
Reference in New Issue
Block a user