Specific node overloads for checkMaxBetweenExclusive

This commit is contained in:
2024-10-14 16:54:44 -07:00
parent 824037bf32
commit 11e8717da8

View File

@@ -2147,7 +2147,14 @@ bool scan16(const InternalVersionT *vs, int begin, int end,
// path of n + [child], where child in (begin, end) is <= readVersion. Does not // path of n + [child], where child in (begin, end) is <= readVersion. Does not
// account for the range version of firstGt(searchpath(n) + [end - 1]) // account for the range version of firstGt(searchpath(n) + [end - 1])
template <bool kAVX512> template <bool kAVX512>
bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end, bool checkMaxBetweenExclusiveImpl(Node0 *, int, int, InternalVersionT,
ReadContext *tls) {
++tls->range_read_node_scan_accum;
return true;
}
template <bool kAVX512>
bool checkMaxBetweenExclusiveImpl(Node3 *n, int begin, int end,
InternalVersionT readVersion, InternalVersionT readVersion,
ReadContext *tls) { ReadContext *tls) {
++tls->range_read_node_scan_accum; ++tls->range_read_node_scan_accum;
@@ -2156,22 +2163,15 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
assume(-1 <= end); assume(-1 <= end);
assume(end <= 256); assume(end <= 256);
assume(begin < end); assume(begin < end);
assert(!(begin == -1 && end == 256)); assert(!(begin == -1 && end == 256));
switch (n->getType()) {
case Type_Node0:
return true;
case Type_Node3: {
auto *self = static_cast<Node3 *>(n); auto *self = static_cast<Node3 *>(n);
++begin; ++begin;
const unsigned shiftUpperBound = end - begin; const unsigned shiftUpperBound = end - begin;
const unsigned shiftAmount = begin; const unsigned shiftAmount = begin;
auto inBounds = [&](unsigned c) { auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
return c - shiftAmount < shiftUpperBound;
};
uint32_t mask = 0; uint32_t mask = 0;
for (int i = 0; i < Node3::kMaxNodes; ++i) { for (int i = 0; i < Node3::kMaxNodes; ++i) {
@@ -2191,7 +2191,19 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
return !(compared & mask) && firstRangeOk; return !(compared & mask) && firstRangeOk;
} }
case Type_Node16: {
template <bool kAVX512>
bool checkMaxBetweenExclusiveImpl(Node16 *n, int begin, int end,
InternalVersionT readVersion,
ReadContext *tls) {
++tls->range_read_node_scan_accum;
assume(-1 <= begin);
assume(begin <= 256);
assume(-1 <= end);
assume(end <= 256);
assume(begin < end);
assert(!(begin == -1 && end == 256));
auto *self = static_cast<Node16 *>(n); auto *self = static_cast<Node16 *>(n);
++begin; ++begin;
@@ -2231,8 +2243,8 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
uint16x4_t conflicting[4]; uint16x4_t conflicting[4];
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
conflicting[i] = vmovn_u32( conflicting[i] =
vcgtq_s32(vreinterpretq_s32_u32(vsubq_u32(w4[i], rvVec)), z)); vmovn_u32(vcgtq_s32(vreinterpretq_s32_u32(vsubq_u32(w4[i], rvVec)), z));
} }
auto combined = auto combined =
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])), vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
@@ -2249,8 +2261,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
memcpy(&indices, self->index, 16); memcpy(&indices, self->index, 16);
indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin)); indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin));
uint32_t mask = uint32_t mask =
0xffff & 0xffff & ~_mm_movemask_epi8(_mm_cmpeq_epi8(
~_mm_movemask_epi8(_mm_cmpeq_epi8(
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin)))); indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
mask &= (1 << self->numChildren) - 1; mask &= (1 << self->numChildren) - 1;
if (!mask) { if (!mask) {
@@ -2272,9 +2283,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
const unsigned shiftUpperBound = end - begin; const unsigned shiftUpperBound = end - begin;
const unsigned shiftAmount = begin; const unsigned shiftAmount = begin;
auto inBounds = [&](unsigned c) { auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
return c - shiftAmount < shiftUpperBound;
};
uint32_t mask = 0; uint32_t mask = 0;
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
@@ -2295,7 +2304,19 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
#endif #endif
} }
case Type_Node48: {
template <bool kAVX512>
bool checkMaxBetweenExclusiveImpl(Node48 *n, int begin, int end,
InternalVersionT readVersion,
ReadContext *tls) {
++tls->range_read_node_scan_accum;
assume(-1 <= begin);
assume(begin <= 256);
assume(-1 <= end);
assume(end <= 256);
assume(begin < end);
assert(!(begin == -1 && end == 256));
auto *self = static_cast<Node48 *>(n); auto *self = static_cast<Node48 *>(n);
{ {
@@ -2328,7 +2349,19 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
} }
return true; return true;
} }
case Type_Node256: {
template <bool kAVX512>
bool checkMaxBetweenExclusiveImpl(Node256 *n, int begin, int end,
InternalVersionT readVersion,
ReadContext *tls) {
++tls->range_read_node_scan_accum;
assume(-1 <= begin);
assume(begin <= 256);
assume(-1 <= end);
assume(end <= 256);
assume(begin < end);
assert(!(begin == -1 && end == 256));
static_assert(Node256::kMaxOfMaxTotalPages == 16); static_assert(Node256::kMaxOfMaxTotalPages == 16);
auto *self = static_cast<Node256 *>(n); auto *self = static_cast<Node256 *>(n);
@@ -2380,8 +2413,32 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
} }
} }
// Check inner pages // Check inner pages
return scan16<kAVX512>(self->maxOfMax, firstPage + 1, lastPage, return scan16<kAVX512>(self->maxOfMax, firstPage + 1, lastPage, readVersion);
readVersion); }
template <bool kAVX512>
bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
InternalVersionT readVersion,
ReadContext *tls) {
switch (n->getType()) {
case Type_Node0:
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node0 *>(n), begin,
end, readVersion, tls);
case Type_Node3: {
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node3 *>(n), begin,
end, readVersion, tls);
}
case Type_Node16: {
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node16 *>(n),
begin, end, readVersion, tls);
}
case Type_Node48: {
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node48 *>(n),
begin, end, readVersion, tls);
}
case Type_Node256: {
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node256 *>(n),
begin, end, readVersion, tls);
} }
default: // GCOVR_EXCL_LINE default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE
@@ -2402,6 +2459,58 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls); return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
} }
bool checkMaxBetweenExclusive(Node0 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
}
bool checkMaxBetweenExclusive(Node3 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
__attribute__((target("avx512f"))) bool
checkMaxBetweenExclusive(Node16 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
}
__attribute__((target("default")))
#endif
bool checkMaxBetweenExclusive(Node16 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
__attribute__((target("avx512f"))) bool
checkMaxBetweenExclusive(Node48 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
}
__attribute__((target("default")))
#endif
bool checkMaxBetweenExclusive(Node48 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
__attribute__((target("avx512f"))) bool
checkMaxBetweenExclusive(Node256 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
}
__attribute__((target("default")))
#endif
bool checkMaxBetweenExclusive(Node256 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
}
Vector<uint8_t> getSearchPath(Arena &arena, Node *n) { Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
assert(n != nullptr); assert(n != nullptr);
auto result = vector<uint8_t>(arena); auto result = vector<uint8_t>(arena);
@@ -2424,7 +2533,8 @@ Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
// //
// Precondition: transitively, no child of n has a search path that's a longer // Precondition: transitively, no child of n has a search path that's a longer
// prefix of key than n // prefix of key than n
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin, template <class NodeT>
bool checkRangeStartsWith(NodeT *n, std::span<const uint8_t> key, int begin,
int end, InternalVersionT readVersion, int end, InternalVersionT readVersion,
ReadContext *tls) { ReadContext *tls) {
#if DEBUG_VERBOSE && !defined(NDEBUG) #if DEBUG_VERBOSE && !defined(NDEBUG)
@@ -2501,6 +2611,15 @@ scan16<true>(const InternalVersionT *vs, int begin, int end,
template __attribute__((target("avx512f"))) bool template __attribute__((target("avx512f"))) bool
checkMaxBetweenExclusiveImpl<true>(Node *n, int begin, int end, checkMaxBetweenExclusiveImpl<true>(Node *n, int begin, int end,
InternalVersionT readVersion, ReadContext *); InternalVersionT readVersion, ReadContext *);
template __attribute__((target("avx512f"))) bool
checkMaxBetweenExclusiveImpl<true>(Node16 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *);
template __attribute__((target("avx512f"))) bool
checkMaxBetweenExclusiveImpl<true>(Node48 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *);
template __attribute__((target("avx512f"))) bool
checkMaxBetweenExclusiveImpl<true>(Node256 *n, int begin, int end,
InternalVersionT readVersion, ReadContext *);
#endif #endif
// Returns a pointer the pointer to the newly inserted node in the tree. Caller // Returns a pointer the pointer to the newly inserted node in the tree. Caller
@@ -3207,14 +3326,16 @@ PRESERVE_NONE void begin(CheckJob *job, CheckContext *context) {
job->remaining = job->begin.subspan(0, job->lcp); job->remaining = job->begin.subspan(0, job->lcp);
if (job->remaining.size() == 0) { if (job->remaining.size() == 0) {
MUSTTAIL return done_common_prefix_iter(job, context); job->continuation = done_common_prefix_iter;
MUSTTAIL return job->continuation(job, context);
} }
auto [c, maxV] = getChildAndMaxVersion(job->n, job->remaining[0]); auto [c, maxV] = getChildAndMaxVersion(job->n, job->remaining[0]);
job->maxV = maxV; job->maxV = maxV;
job->child = c; job->child = c;
if (job->child == nullptr) { if (job->child == nullptr) {
MUSTTAIL return done_common_prefix_iter(job, context); job->continuation = done_common_prefix_iter;
MUSTTAIL return job->continuation(job, context);
} }
job->continuation = commonPrefixIterTable[c.getType()]; job->continuation = commonPrefixIterTable[c.getType()];