Use _mm512_loadu_epi32 instead of memcpy

The latter somehow generated a vmovdqa64 instruction that segfaulted on
unaligned memory, with gcc and --coverage. Hmm.
This commit is contained in:
2024-08-01 15:24:09 -07:00
parent b8edd92698
commit 3898cb596a

View File

@@ -1988,12 +1988,11 @@ uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
__attribute__((target("avx512f"))) uint32_t
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
__m512i w;
memcpy(&w, vs, sizeof(w));
uint32_t r;
memcpy(&r, &rv, sizeof(r));
return _mm512_cmpgt_epi32_mask(_mm512_sub_epi32(w, _mm512_set1_epi32(r)),
_mm512_setzero_epi32());
return _mm512_cmpgt_epi32_mask(
_mm512_sub_epi32(_mm512_loadu_epi32(vs), _mm512_set1_epi32(r)),
_mm512_setzero_epi32());
}
#endif