Use signed compare
Some checks failed
Tests / Clang total: 3296, failed: 38, passed: 3258
Tests / 64 bit versions total: 3296, passed: 3296
Tests / Debug total: 3294, failed: 76, passed: 3218
Tests / SIMD fallback total: 3296, passed: 3296
Tests / Release [gcc] total: 3296, failed: 38, passed: 3258
Tests / Release [gcc,aarch64] total: 2458, passed: 2458
Tests / Coverage total: 2476, failed: 76, passed: 2400
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
Tests / Clang total: 3296, failed: 38, passed: 3258
Tests / 64 bit versions total: 3296, passed: 3296
Tests / Debug total: 3294, failed: 76, passed: 3218
Tests / SIMD fallback total: 3296, passed: 3296
Tests / Release [gcc] total: 3296, failed: 38, passed: 3258
Tests / Release [gcc,aarch64] total: 2458, passed: 2458
Tests / Coverage total: 2476, failed: 76, passed: 2400
weaselab/conflict-set/pipeline/head There was a failure building this commit
Unsigned compare is incorrect here
This commit is contained in:
@@ -2925,11 +2925,18 @@ horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT z, int len) {
|
|||||||
uint32_t zero;
|
uint32_t zero;
|
||||||
memcpy(&zero, &z, sizeof(zero));
|
memcpy(&zero, &z, sizeof(zero));
|
||||||
auto zeroVec = _mm512_set1_epi32(zero);
|
auto zeroVec = _mm512_set1_epi32(zero);
|
||||||
return InternalVersionT(
|
auto actual = InternalVersionT(
|
||||||
zero +
|
zero +
|
||||||
_mm512_reduce_max_epu32(_mm512_sub_epi32(
|
_mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||||
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
|
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
|
||||||
zeroVec)));
|
zeroVec)));
|
||||||
|
// Hope it gets vectorized
|
||||||
|
InternalVersionT max = vs[0];
|
||||||
|
for (int i = 1; i < len; ++i) {
|
||||||
|
max = std::max(vs[i], max);
|
||||||
|
}
|
||||||
|
assert(actual == max);
|
||||||
|
return max;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
__attribute__((target("default")))
|
__attribute__((target("default")))
|
||||||
@@ -2959,7 +2966,7 @@ horizontalMax16(InternalVersionT *vs, InternalVersionT z) {
|
|||||||
uint32_t zero;
|
uint32_t zero;
|
||||||
memcpy(&zero, &z, sizeof(zero));
|
memcpy(&zero, &z, sizeof(zero));
|
||||||
auto zeroVec = _mm512_set1_epi32(zero);
|
auto zeroVec = _mm512_set1_epi32(zero);
|
||||||
return InternalVersionT(zero + _mm512_reduce_max_epu32(_mm512_sub_epi32(
|
return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||||
_mm512_loadu_epi32(vs), zeroVec)));
|
_mm512_loadu_epi32(vs), zeroVec)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user