Specific node overloads for checkMaxBetweenExclusive
This commit is contained in:
509
ConflictSet.cpp
509
ConflictSet.cpp
@@ -2147,7 +2147,14 @@ bool scan16(const InternalVersionT *vs, int begin, int end,
|
||||
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
||||
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
bool checkMaxBetweenExclusiveImpl(Node0 *, int, int, InternalVersionT,
|
||||
ReadContext *tls) {
|
||||
++tls->range_read_node_scan_accum;
|
||||
return true;
|
||||
}
|
||||
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusiveImpl(Node3 *n, int begin, int end,
|
||||
InternalVersionT readVersion,
|
||||
ReadContext *tls) {
|
||||
++tls->range_read_node_scan_accum;
|
||||
@@ -2156,232 +2163,282 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
assume(-1 <= end);
|
||||
assume(end <= 256);
|
||||
assume(begin < end);
|
||||
|
||||
assert(!(begin == -1 && end == 256));
|
||||
|
||||
switch (n->getType()) {
|
||||
case Type_Node0:
|
||||
return true;
|
||||
case Type_Node3: {
|
||||
auto *self = static_cast<Node3 *>(n);
|
||||
auto *self = static_cast<Node3 *>(n);
|
||||
|
||||
++begin;
|
||||
++begin;
|
||||
|
||||
const unsigned shiftUpperBound = end - begin;
|
||||
const unsigned shiftAmount = begin;
|
||||
auto inBounds = [&](unsigned c) {
|
||||
return c - shiftAmount < shiftUpperBound;
|
||||
};
|
||||
const unsigned shiftUpperBound = end - begin;
|
||||
const unsigned shiftAmount = begin;
|
||||
auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
|
||||
|
||||
uint32_t mask = 0;
|
||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||
mask |= inBounds(self->index[i]) << i;
|
||||
}
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||
}
|
||||
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
uint32_t mask = 0;
|
||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||
mask |= inBounds(self->index[i]) << i;
|
||||
}
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *self = static_cast<Node16 *>(n);
|
||||
|
||||
++begin;
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
}
|
||||
|
||||
assert(begin <= end);
|
||||
assert(end - begin < 256);
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusiveImpl(Node16 *n, int begin, int end,
|
||||
InternalVersionT readVersion,
|
||||
ReadContext *tls) {
|
||||
++tls->range_read_node_scan_accum;
|
||||
assume(-1 <= begin);
|
||||
assume(begin <= 256);
|
||||
assume(-1 <= end);
|
||||
assume(end <= 256);
|
||||
assume(begin < end);
|
||||
assert(!(begin == -1 && end == 256));
|
||||
|
||||
auto *self = static_cast<Node16 *>(n);
|
||||
|
||||
++begin;
|
||||
|
||||
assert(begin <= end);
|
||||
assert(end - begin < 256);
|
||||
|
||||
#ifdef HAS_ARM_NEON
|
||||
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self->index, 16);
|
||||
// 0xff for each in bounds
|
||||
auto results =
|
||||
vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin));
|
||||
// 0xf for each 0xff
|
||||
uint64_t mask = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self->index, 16);
|
||||
// 0xff for each in bounds
|
||||
auto results =
|
||||
vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin));
|
||||
// 0xf for each 0xff
|
||||
uint64_t mask = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
||||
|
||||
mask &= self->numChildren == 16
|
||||
? uint64_t(-1)
|
||||
: (uint64_t(1) << (self->numChildren << 2)) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask) >> 2];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
mask &= self->numChildren == 16
|
||||
? uint64_t(-1)
|
||||
: (uint64_t(1) << (self->numChildren << 2)) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask) >> 2];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
|
||||
uint32x4_t w4[4];
|
||||
memcpy(w4, self->childMaxVersion, sizeof(w4));
|
||||
uint32_t rv;
|
||||
memcpy(&rv, &readVersion, sizeof(rv));
|
||||
const auto rvVec = vdupq_n_u32(rv);
|
||||
uint32x4_t w4[4];
|
||||
memcpy(w4, self->childMaxVersion, sizeof(w4));
|
||||
uint32_t rv;
|
||||
memcpy(&rv, &readVersion, sizeof(rv));
|
||||
const auto rvVec = vdupq_n_u32(rv);
|
||||
|
||||
int32x4_t z;
|
||||
memset(&z, 0, sizeof(z));
|
||||
int32x4_t z;
|
||||
memset(&z, 0, sizeof(z));
|
||||
|
||||
uint16x4_t conflicting[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
conflicting[i] = vmovn_u32(
|
||||
vcgtq_s32(vreinterpretq_s32_u32(vsubq_u32(w4[i], rvVec)), z));
|
||||
}
|
||||
auto combined =
|
||||
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||
uint16x4_t conflicting[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
conflicting[i] =
|
||||
vmovn_u32(vcgtq_s32(vreinterpretq_s32_u32(vsubq_u32(w4[i], rvVec)), z));
|
||||
}
|
||||
auto combined =
|
||||
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||
|
||||
uint64_t compared = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||
uint64_t compared = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
#elif defined(HAS_AVX)
|
||||
|
||||
__m128i indices;
|
||||
memcpy(&indices, self->index, 16);
|
||||
indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin));
|
||||
uint32_t mask =
|
||||
0xffff &
|
||||
~_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
__m128i indices;
|
||||
memcpy(&indices, self->index, 16);
|
||||
indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin));
|
||||
uint32_t mask =
|
||||
0xffff & ~_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
|
||||
uint32_t compared = 0;
|
||||
if constexpr (kAVX512) {
|
||||
compared = compare16_avx512(self->childMaxVersion, readVersion);
|
||||
} else {
|
||||
compared = compare16(self->childMaxVersion, readVersion);
|
||||
}
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
uint32_t compared = 0;
|
||||
if constexpr (kAVX512) {
|
||||
compared = compare16_avx512(self->childMaxVersion, readVersion);
|
||||
} else {
|
||||
compared = compare16(self->childMaxVersion, readVersion);
|
||||
}
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
#else
|
||||
|
||||
const unsigned shiftUpperBound = end - begin;
|
||||
const unsigned shiftAmount = begin;
|
||||
auto inBounds = [&](unsigned c) {
|
||||
return c - shiftAmount < shiftUpperBound;
|
||||
};
|
||||
const unsigned shiftUpperBound = end - begin;
|
||||
const unsigned shiftAmount = begin;
|
||||
auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
|
||||
|
||||
uint32_t mask = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
mask |= inBounds(self->index[i]) << i;
|
||||
}
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||
}
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
#endif
|
||||
uint32_t mask = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
mask |= inBounds(self->index[i]) << i;
|
||||
}
|
||||
case Type_Node48: {
|
||||
auto *self = static_cast<Node48 *>(n);
|
||||
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
Node *child = self->children[self->index[c]];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
begin = c;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
// [begin, end) is now the half-open interval of children we're interested
|
||||
// in.
|
||||
assert(begin < end);
|
||||
}
|
||||
|
||||
// Check all pages
|
||||
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
||||
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
||||
if (self->maxOfMax[i] > readVersion) {
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(i << Node48::kMaxOfMaxShift),
|
||||
self->reverseIndex + (i << Node48::kMaxOfMaxShift),
|
||||
begin, end, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
case Type_Node256: {
|
||||
static_assert(Node256::kMaxOfMaxTotalPages == 16);
|
||||
auto *self = static_cast<Node256 *>(n);
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||
}
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
Node *child = self->children[c];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
begin = c;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
// [begin, end) is now the half-open interval of children we're interested
|
||||
// in.
|
||||
assert(begin < end);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
||||
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||
// Check the only page if there's only one
|
||||
if (firstPage == lastPage) {
|
||||
if (self->maxOfMax[firstPage] <= readVersion) {
|
||||
return true;
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusiveImpl(Node48 *n, int begin, int end,
|
||||
InternalVersionT readVersion,
|
||||
ReadContext *tls) {
|
||||
++tls->range_read_node_scan_accum;
|
||||
assume(-1 <= begin);
|
||||
assume(begin <= 256);
|
||||
assume(-1 <= end);
|
||||
assume(end <= 256);
|
||||
assume(begin < end);
|
||||
assert(!(begin == -1 && end == 256));
|
||||
|
||||
auto *self = static_cast<Node48 *>(n);
|
||||
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
Node *child = self->children[self->index[c]];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
return scan16<kAVX512>(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, intraPageEnd, readVersion);
|
||||
begin = c;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
// Check the first page
|
||||
if (self->maxOfMax[firstPage] > readVersion) {
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
// [begin, end) is now the half-open interval of children we're interested
|
||||
// in.
|
||||
assert(begin < end);
|
||||
}
|
||||
|
||||
// Check all pages
|
||||
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
||||
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
||||
if (self->maxOfMax[i] > readVersion) {
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(i << Node48::kMaxOfMaxShift),
|
||||
self->reverseIndex + (i << Node48::kMaxOfMaxShift),
|
||||
begin, end, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusiveImpl(Node256 *n, int begin, int end,
|
||||
InternalVersionT readVersion,
|
||||
ReadContext *tls) {
|
||||
++tls->range_read_node_scan_accum;
|
||||
assume(-1 <= begin);
|
||||
assume(begin <= 256);
|
||||
assume(-1 <= end);
|
||||
assume(end <= 256);
|
||||
assume(begin < end);
|
||||
assert(!(begin == -1 && end == 256));
|
||||
|
||||
static_assert(Node256::kMaxOfMaxTotalPages == 16);
|
||||
auto *self = static_cast<Node256 *>(n);
|
||||
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
Node *child = self->children[c];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
begin = c;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
// [begin, end) is now the half-open interval of children we're interested
|
||||
// in.
|
||||
assert(begin < end);
|
||||
}
|
||||
|
||||
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
||||
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||
// Check the only page if there's only one
|
||||
if (firstPage == lastPage) {
|
||||
if (self->maxOfMax[firstPage] <= readVersion) {
|
||||
return true;
|
||||
}
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
return scan16<kAVX512>(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, 16, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
intraPageBegin, intraPageEnd, readVersion);
|
||||
}
|
||||
// Check the first page
|
||||
if (self->maxOfMax[firstPage] > readVersion) {
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, 16, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
// Check the last page
|
||||
if (self->maxOfMax[lastPage] > readVersion) {
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(lastPage << Node256::kMaxOfMaxShift),
|
||||
0, intraPageEnd, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Check the last page
|
||||
if (self->maxOfMax[lastPage] > readVersion) {
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
if (!scan16<kAVX512>(self->childMaxVersion +
|
||||
(lastPage << Node256::kMaxOfMaxShift),
|
||||
0, intraPageEnd, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
// Check inner pages
|
||||
return scan16<kAVX512>(self->maxOfMax, firstPage + 1, lastPage,
|
||||
readVersion);
|
||||
}
|
||||
// Check inner pages
|
||||
return scan16<kAVX512>(self->maxOfMax, firstPage + 1, lastPage, readVersion);
|
||||
}
|
||||
|
||||
template <bool kAVX512>
|
||||
bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
InternalVersionT readVersion,
|
||||
ReadContext *tls) {
|
||||
switch (n->getType()) {
|
||||
case Type_Node0:
|
||||
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node0 *>(n), begin,
|
||||
end, readVersion, tls);
|
||||
case Type_Node3: {
|
||||
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node3 *>(n), begin,
|
||||
end, readVersion, tls);
|
||||
}
|
||||
case Type_Node16: {
|
||||
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node16 *>(n),
|
||||
begin, end, readVersion, tls);
|
||||
}
|
||||
case Type_Node48: {
|
||||
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node48 *>(n),
|
||||
begin, end, readVersion, tls);
|
||||
}
|
||||
case Type_Node256: {
|
||||
return checkMaxBetweenExclusiveImpl<kAVX512>(static_cast<Node256 *>(n),
|
||||
begin, end, readVersion, tls);
|
||||
}
|
||||
default: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -2402,6 +2459,58 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
|
||||
bool checkMaxBetweenExclusive(Node0 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
|
||||
bool checkMaxBetweenExclusive(Node3 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusive(Node16 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
bool checkMaxBetweenExclusive(Node16 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusive(Node48 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
bool checkMaxBetweenExclusive(Node48 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusive(Node256 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
bool checkMaxBetweenExclusive(Node256 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
}
|
||||
|
||||
Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
|
||||
assert(n != nullptr);
|
||||
auto result = vector<uint8_t>(arena);
|
||||
@@ -2424,7 +2533,8 @@ Vector<uint8_t> getSearchPath(Arena &arena, Node *n) {
|
||||
//
|
||||
// Precondition: transitively, no child of n has a search path that's a longer
|
||||
// prefix of key than n
|
||||
bool checkRangeStartsWith(Node *n, std::span<const uint8_t> key, int begin,
|
||||
template <class NodeT>
|
||||
bool checkRangeStartsWith(NodeT *n, std::span<const uint8_t> key, int begin,
|
||||
int end, InternalVersionT readVersion,
|
||||
ReadContext *tls) {
|
||||
#if DEBUG_VERBOSE && !defined(NDEBUG)
|
||||
@@ -2501,6 +2611,15 @@ scan16<true>(const InternalVersionT *vs, int begin, int end,
|
||||
template __attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusiveImpl<true>(Node *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *);
|
||||
template __attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusiveImpl<true>(Node16 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *);
|
||||
template __attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusiveImpl<true>(Node48 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *);
|
||||
template __attribute__((target("avx512f"))) bool
|
||||
checkMaxBetweenExclusiveImpl<true>(Node256 *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *);
|
||||
#endif
|
||||
|
||||
// Returns a pointer the pointer to the newly inserted node in the tree. Caller
|
||||
@@ -3207,14 +3326,16 @@ PRESERVE_NONE void begin(CheckJob *job, CheckContext *context) {
|
||||
job->remaining = job->begin.subspan(0, job->lcp);
|
||||
|
||||
if (job->remaining.size() == 0) {
|
||||
MUSTTAIL return done_common_prefix_iter(job, context);
|
||||
job->continuation = done_common_prefix_iter;
|
||||
MUSTTAIL return job->continuation(job, context);
|
||||
}
|
||||
|
||||
auto [c, maxV] = getChildAndMaxVersion(job->n, job->remaining[0]);
|
||||
job->maxV = maxV;
|
||||
job->child = c;
|
||||
if (job->child == nullptr) {
|
||||
MUSTTAIL return done_common_prefix_iter(job, context);
|
||||
job->continuation = done_common_prefix_iter;
|
||||
MUSTTAIL return job->continuation(job, context);
|
||||
}
|
||||
|
||||
job->continuation = commonPrefixIterTable[c.getType()];
|
||||
|
Reference in New Issue
Block a user