Use _mm512_loadu_epi32 instead of memcpy
The latter somehow generated a vmovdqa64 instruction that segfaulted on unaligned memory, with gcc and --coverage. Hmm.
This commit is contained in:
@@ -1988,11 +1988,10 @@ uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|||||||
|
|
||||||
__attribute__((target("avx512f"))) uint32_t
|
__attribute__((target("avx512f"))) uint32_t
|
||||||
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
||||||
__m512i w;
|
|
||||||
memcpy(&w, vs, sizeof(w));
|
|
||||||
uint32_t r;
|
uint32_t r;
|
||||||
memcpy(&r, &rv, sizeof(r));
|
memcpy(&r, &rv, sizeof(r));
|
||||||
return _mm512_cmpgt_epi32_mask(_mm512_sub_epi32(w, _mm512_set1_epi32(r)),
|
return _mm512_cmpgt_epi32_mask(
|
||||||
|
_mm512_sub_epi32(_mm512_loadu_epi32(vs), _mm512_set1_epi32(r)),
|
||||||
_mm512_setzero_epi32());
|
_mm512_setzero_epi32());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user