Write vectorized 32-bit compare by hand for arm in scan16
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit
This commit is contained in:
@@ -1776,10 +1776,32 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
uint64_t mask = vget_lane_u64(
|
uint64_t mask = vget_lane_u64(
|
||||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
||||||
|
|
||||||
|
#if INTERNAL_VERSION_32_BIT
|
||||||
|
uint32x4_t w4[4];
|
||||||
|
memcpy(w4, vs, sizeof(w4));
|
||||||
|
uint32_t rv;
|
||||||
|
memcpy(&rv, &readVersion, sizeof(rv));
|
||||||
|
|
||||||
|
int32x4_t z;
|
||||||
|
memset(&z, 0, sizeof(z));
|
||||||
|
|
||||||
|
uint16x4_t conflicting[4];
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], vdupq_n_u32(rv)), z));
|
||||||
|
}
|
||||||
|
auto combined =
|
||||||
|
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||||
|
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||||
|
|
||||||
|
uint64_t compared = vget_lane_u64(
|
||||||
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||||
|
#else
|
||||||
uint64_t compared = 0;
|
uint64_t compared = 0;
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
compared |= uint64_t(vs[i] > readVersion) << (i << 2);
|
compared |= uint64_t(vs[i] > readVersion) << (i << 2);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return !(compared & mask);
|
return !(compared & mask);
|
||||||
|
|
||||||
#elif defined(HAS_AVX)
|
#elif defined(HAS_AVX)
|
||||||
|
Reference in New Issue
Block a user