From f5d021d6b62b9ad7fe2fa5d9da75f2f35e0c1403 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Sat, 3 Aug 2024 14:22:50 -0700 Subject: [PATCH] Add multi-version rezero16 --- ConflictSet.cpp | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 02216ce..ebb7d93 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -1332,6 +1332,29 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *tls, freeAndMakeCapacityAtLeast(self, maxCapacity, tls, impl, false); } +#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__) +// This gets covered in local development +// GCOVR_EXCL_START +__attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs, + InternalVersionT zero) { + uint32_t z; + memcpy(&z, &zero, sizeof(z)); + const auto zvec = _mm512_set1_epi32(z); + auto m = _mm512_cmplt_epi32_mask( + _mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32()); + _mm512_mask_storeu_epi32(vs, m, zvec); +} +// GCOVR_EXCL_STOP + +__attribute__((target("default"))) +#endif + +void rezero16(InternalVersionT *vs, InternalVersionT zero) { + for (int i = 0; i < 16; ++i) { + vs[i] = std::max(vs[i], zero); + } +} + void rezero(Node *n, InternalVersionT z) { #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "rezero to %" PRId64 ": %s\n", z.toInt64(), @@ -1352,14 +1375,12 @@ void rezero(Node *n, InternalVersionT z) { } break; case Type_Node16: { auto *self = static_cast(n); - for (int i = 0; i < 16; ++i) { - self->childMaxVersion[i] = std::max(self->childMaxVersion[i], z); - } + rezero16(self->childMaxVersion, z); } break; case Type_Node48: { auto *self = static_cast(n); - for (int i = 0; i < 48; ++i) { - self->childMaxVersion[i] = std::max(self->childMaxVersion[i], z); + for (int i = 0; i < 48; i += 16) { + rezero16(self->childMaxVersion + i, z); } for (auto &m : self->maxOfMax) { m = std::max(m, z); @@ -1367,12 +1388,10 @@ void rezero(Node *n, InternalVersionT z) { } break; case Type_Node256: { auto *self = static_cast(n); - for (int i = 0; i < 256; ++i) { - self->childMaxVersion[i] = std::max(self->childMaxVersion[i], z); - } - for (auto &m : self->maxOfMax) { - m = std::max(m, z); + for (int i = 0; i < 256; i += 16) { + rezero16(self->childMaxVersion + i, z); } + rezero16(self->maxOfMax, z); } break; default: // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE