Add multi-version rezero16
All checks were successful
Tests / Clang total: 1479, passed: 1479
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Debug total: 1477, passed: 1477
Tests / SIMD fallback total: 1479, passed: 1479
Tests / Release [gcc] total: 1479, passed: 1479
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 1102, passed: 1102
Tests / Coverage total: 1111, passed: 1111
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 99.60% (1739/1746) * Branch Coverage: 64.95% (1492/2297) * Complexity Density: 0.00 * Lines of Code: 1746 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head This commit looks good

This commit is contained in:
2024-08-03 14:22:50 -07:00
parent 1c41605b53
commit f5d021d6b6

View File

@@ -1332,6 +1332,29 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *tls,
freeAndMakeCapacityAtLeast(self, maxCapacity, tls, impl, false);
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
// This gets covered in local development
// GCOVR_EXCL_START
__attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
InternalVersionT zero) {
uint32_t z;
memcpy(&z, &zero, sizeof(z));
const auto zvec = _mm512_set1_epi32(z);
auto m = _mm512_cmplt_epi32_mask(
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
_mm512_mask_storeu_epi32(vs, m, zvec);
}
// GCOVR_EXCL_STOP
__attribute__((target("default")))
#endif
void rezero16(InternalVersionT *vs, InternalVersionT zero) {
for (int i = 0; i < 16; ++i) {
vs[i] = std::max(vs[i], zero);
}
}
void rezero(Node *n, InternalVersionT z) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "rezero to %" PRId64 ": %s\n", z.toInt64(),
@@ -1352,14 +1375,12 @@ void rezero(Node *n, InternalVersionT z) {
} break;
case Type_Node16: {
auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < 16; ++i) {
self->childMaxVersion[i] = std::max(self->childMaxVersion[i], z);
}
rezero16(self->childMaxVersion, z);
} break;
case Type_Node48: {
auto *self = static_cast<Node48 *>(n);
for (int i = 0; i < 48; ++i) {
self->childMaxVersion[i] = std::max(self->childMaxVersion[i], z);
for (int i = 0; i < 48; i += 16) {
rezero16(self->childMaxVersion + i, z);
}
for (auto &m : self->maxOfMax) {
m = std::max(m, z);
@@ -1367,12 +1388,10 @@ void rezero(Node *n, InternalVersionT z) {
} break;
case Type_Node256: {
auto *self = static_cast<Node256 *>(n);
for (int i = 0; i < 256; ++i) {
self->childMaxVersion[i] = std::max(self->childMaxVersion[i], z);
}
for (auto &m : self->maxOfMax) {
m = std::max(m, z);
for (int i = 0; i < 256; i += 16) {
rezero16(self->childMaxVersion + i, z);
}
rezero16(self->maxOfMax, z);
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE