diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 3997c61..f0e4ccf 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -2925,11 +2925,18 @@ horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT z, int len) { uint32_t zero; memcpy(&zero, &z, sizeof(zero)); auto zeroVec = _mm512_set1_epi32(zero); - return InternalVersionT( + auto actual = InternalVersionT( zero + - _mm512_reduce_max_epu32(_mm512_sub_epi32( + _mm512_reduce_max_epi32(_mm512_sub_epi32( _mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs), zeroVec))); + // Hope it gets vectorized + InternalVersionT max = vs[0]; + for (int i = 1; i < len; ++i) { + max = std::max(vs[i], max); + } + assert(actual == max); + return max; #endif } __attribute__((target("default"))) @@ -2959,7 +2966,7 @@ horizontalMax16(InternalVersionT *vs, InternalVersionT z) { uint32_t zero; memcpy(&zero, &z, sizeof(zero)); auto zeroVec = _mm512_set1_epi32(zero); - return InternalVersionT(zero + _mm512_reduce_max_epu32(_mm512_sub_epi32( + return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32( _mm512_loadu_epi32(vs), zeroVec))); #endif }