Attempt at avx512f 32bit compare
Some checks failed
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
weaselab/conflict-set/pipeline/head There was a failure building this commit
This commit is contained in:
@@ -1766,6 +1766,30 @@ downLeftSpine:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAS_AVX
|
||||||
|
__attribute__((target("default"))) uint32_t compare16_32bit(const uint32_t *vs,
|
||||||
|
uint32_t rv) {
|
||||||
|
__m128i w[4];
|
||||||
|
memcpy(w, vs, sizeof(w));
|
||||||
|
const auto rvVec = _mm_set1_epi32(rv);
|
||||||
|
const auto zero = _mm_setzero_si128();
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
compared |=
|
||||||
|
_mm_movemask_ps(_mm_cmpgt_epi32(_mm_sub_epi32(w[i], rvVec), zero))
|
||||||
|
<< (i * 4);
|
||||||
|
}
|
||||||
|
return compared;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((target("avx512f"))) uint32_t compare16_32bit(const uint32_t *vs,
|
||||||
|
uint32_t rv) {
|
||||||
|
__mm512i w;
|
||||||
|
memcpy(&w, vs, sizeof(w));
|
||||||
|
return _mm512_cmpgt_epi32_mask(_mm512_sub_epi32(w, _mm512_set1_epi32(rv)),
|
||||||
|
_mm512_setzero());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
||||||
// Preconditions: begin <= end, end - begin < 256
|
// Preconditions: begin <= end, end - begin < 256
|
||||||
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||||
@@ -1824,17 +1848,7 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
|
|
||||||
uint32_t compared = 0;
|
uint32_t compared = 0;
|
||||||
#if INTERNAL_VERSION_32_BIT
|
#if INTERNAL_VERSION_32_BIT
|
||||||
__m128i w4[4];
|
compared = compare16_32bit(vs, readVersion);
|
||||||
memcpy(w4, vs, sizeof(w4));
|
|
||||||
uint32_t rv;
|
|
||||||
memcpy(&rv, &readVersion, sizeof(rv));
|
|
||||||
const auto rvVec = _mm_set1_epi32(rv);
|
|
||||||
const auto zero = _mm_setzero_si128();
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
|
||||||
compared |=
|
|
||||||
_mm_movemask_ps(_mm_cmpgt_epi32(_mm_sub_epi32(w4[i], rvVec), zero))
|
|
||||||
<< (i * 4);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
compared |= (vs[i] > readVersion) << i;
|
compared |= (vs[i] > readVersion) << i;
|
||||||
|
Reference in New Issue
Block a user