Use _mm512_loadu_epi32 instead of memcpy
The latter somehow generated a vmovdqa64 instruction that segfaulted on unaligned memory, with gcc and --coverage. Hmm.
This commit is contained in:
@@ -1988,12 +1988,11 @@ uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
|
||||
__attribute__((target("avx512f"))) uint32_t
|
||||
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
||||
__m512i w;
|
||||
memcpy(&w, vs, sizeof(w));
|
||||
uint32_t r;
|
||||
memcpy(&r, &rv, sizeof(r));
|
||||
return _mm512_cmpgt_epi32_mask(_mm512_sub_epi32(w, _mm512_set1_epi32(r)),
|
||||
_mm512_setzero_epi32());
|
||||
return _mm512_cmpgt_epi32_mask(
|
||||
_mm512_sub_epi32(_mm512_loadu_epi32(vs), _mm512_set1_epi32(r)),
|
||||
_mm512_setzero_epi32());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user