Specialize scan16 for Node16 in checkMaxBetweenExclusive
All checks were successful
Tests / Clang total: 1337, passed: 1337
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1337, passed: 1337
Tests / 32-bit versions total: 1337, passed: 1337
Tests / Release [gcc] total: 1337, passed: 1337
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 997, passed: 997
Tests / Coverage total: 1004, passed: 1004
Code Coverage #### Project Overview
No changes detected, that affect the code coverage.
* Line Coverage: 98.71% (1606/1627)
* Branch Coverage: 65.58% (1471/2243)
* Complexity Density: 0.00
* Lines of Code: 1627
#### Quality Gates Summary
Output truncated.
weaselab/conflict-set/pipeline/head This commit looks good
All checks were successful
Tests / Clang total: 1337, passed: 1337
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1337, passed: 1337
Tests / 32-bit versions total: 1337, passed: 1337
Tests / Release [gcc] total: 1337, passed: 1337
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 997, passed: 997
Tests / Coverage total: 1004, passed: 1004
Code Coverage #### Project Overview
No changes detected, that affect the code coverage.
* Line Coverage: 98.71% (1606/1627)
* Branch Coverage: 65.58% (1471/2243)
* Complexity Density: 0.00
* Lines of Code: 1627
#### Quality Gates Summary
Output truncated.
weaselab/conflict-set/pipeline/head This commit looks good
This commit is contained in:
111
ConflictSet.cpp
111
ConflictSet.cpp
@@ -2036,23 +2036,106 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
case Type_Node16: {
|
||||
auto *self = static_cast<Node16 *>(n);
|
||||
|
||||
{
|
||||
int c = begin == 255 ? -1 : getChildGeqSimd(self, begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
auto *child = self->children[getNodeIndex(self, c)];
|
||||
if (child->entryPresent) {
|
||||
if (!(child->entry.rangeVersion <= readVersion)) {
|
||||
return false;
|
||||
};
|
||||
}
|
||||
begin = c;
|
||||
} else {
|
||||
++begin;
|
||||
|
||||
assert(begin <= end);
|
||||
assert(end - begin < 256);
|
||||
|
||||
#ifdef HAS_ARM_NEON
|
||||
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self->index, 16);
|
||||
// 0xff for each in bounds
|
||||
auto results =
|
||||
vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin));
|
||||
// 0xf for each 0xff
|
||||
uint64_t mask = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
||||
|
||||
mask &= self->numChildren == 16
|
||||
? uint64_t(-1)
|
||||
: (uint64_t(1) << (self->numChildren << 2)) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
// [begin, end) is now the half-open interval of children we're interested
|
||||
// in.
|
||||
assert(begin < end);
|
||||
auto *child = self->children[__builtin_ctzll(mask) >> 2];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
|
||||
uint32x4_t w4[4];
|
||||
memcpy(w4, self->childMaxVersion, sizeof(w4));
|
||||
uint32_t rv;
|
||||
memcpy(&rv, &readVersion, sizeof(rv));
|
||||
const auto rvVec = vdupq_n_u32(rv);
|
||||
|
||||
int32x4_t z;
|
||||
memset(&z, 0, sizeof(z));
|
||||
|
||||
uint16x4_t conflicting[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
conflicting[i] = vmovn_u32(
|
||||
vcgtq_s32(vreinterpretq_s32_u32(vsubq_u32(w4[i], rvVec)), z));
|
||||
}
|
||||
auto combined =
|
||||
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||
|
||||
uint64_t compared = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
#elif defined(HAS_AVX)
|
||||
|
||||
__m128i indices;
|
||||
memcpy(&indices, self->index, 16);
|
||||
indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin));
|
||||
uint32_t mask =
|
||||
0xffff &
|
||||
~_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
auto *child = self->children[__builtin_ctz(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
|
||||
uint32_t compared = 0;
|
||||
if constexpr (kAVX512) {
|
||||
compared = compare16_32bit_avx512(self->childMaxVersion, readVersion);
|
||||
} else {
|
||||
compared = compare16_32bit(self->childMaxVersion, readVersion);
|
||||
}
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
#else
|
||||
|
||||
const unsigned shiftUpperBound = end - begin;
|
||||
const unsigned shiftAmount = begin;
|
||||
auto inBounds = [&](unsigned c) {
|
||||
return c - shiftAmount < shiftUpperBound;
|
||||
};
|
||||
|
||||
uint32_t mask = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
mask |= inBounds(self->index[i]) << i;
|
||||
}
|
||||
mask &= (1 << self->numChildren) - 1;
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
auto *child = self->children[__builtin_ctz(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||
}
|
||||
return !(compared & mask) && firstRangeOk;
|
||||
|
||||
#endif
|
||||
|
||||
return scan16<kAVX512>(self->childMaxVersion, self->index, begin, end,
|
||||
readVersion);
|
||||
|
Reference in New Issue
Block a user