From 52201fa4c77790a743245bb062815d350abe4ef3 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 13 Sep 2024 22:55:22 -0700 Subject: [PATCH] Use signed compare Unsigned compare is incorrect here --- ConflictSet.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 3997c61..f0e4ccf 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -2925,11 +2925,18 @@ horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT z, int len) { uint32_t zero; memcpy(&zero, &z, sizeof(zero)); auto zeroVec = _mm512_set1_epi32(zero); - return InternalVersionT( + auto actual = InternalVersionT( zero + - _mm512_reduce_max_epu32(_mm512_sub_epi32( + _mm512_reduce_max_epi32(_mm512_sub_epi32( _mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs), zeroVec))); + // Hope it gets vectorized + InternalVersionT max = vs[0]; + for (int i = 1; i < len; ++i) { + max = std::max(vs[i], max); + } + assert(actual == max); + return max; #endif } __attribute__((target("default"))) @@ -2959,7 +2966,7 @@ horizontalMax16(InternalVersionT *vs, InternalVersionT z) { uint32_t zero; memcpy(&zero, &z, sizeof(zero)); auto zeroVec = _mm512_set1_epi32(zero); - return InternalVersionT(zero + _mm512_reduce_max_epu32(_mm512_sub_epi32( + return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32( _mm512_loadu_epi32(vs), zeroVec))); #endif }