Compare commits
2 Commits
84c4d0fcba
...
0814822d82
| Author | SHA1 | Date | |
|---|---|---|---|
| 0814822d82 | |||
| 41df2398e8 |
105
ConflictSet.cpp
105
ConflictSet.cpp
@@ -1604,7 +1604,6 @@ __attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
||||
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
||||
_mm512_mask_storeu_epi32(vs, m, zvec);
|
||||
}
|
||||
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
@@ -2471,6 +2470,7 @@ checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
@@ -2910,6 +2910,71 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) InternalVersionT
|
||||
horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT z, int len) {
|
||||
assume(len <= 16);
|
||||
#if USE_64_BIT
|
||||
// Hope it gets vectorized
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < len; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
#else
|
||||
uint32_t zero;
|
||||
memcpy(&zero, &z, sizeof(zero));
|
||||
auto zeroVec = _mm512_set1_epi32(zero);
|
||||
return InternalVersionT(
|
||||
zero +
|
||||
_mm512_reduce_max_epu32(_mm512_sub_epi32(
|
||||
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
|
||||
zeroVec)));
|
||||
#endif
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
InternalVersionT
|
||||
horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT, int len) {
|
||||
assume(len <= 16);
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < len; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) InternalVersionT
|
||||
horizontalMax16(InternalVersionT *vs, InternalVersionT z) {
|
||||
#if USE_64_BIT
|
||||
// Hope it gets vectorized
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
#else
|
||||
uint32_t zero;
|
||||
memcpy(&zero, &z, sizeof(zero));
|
||||
auto zeroVec = _mm512_set1_epi32(zero);
|
||||
return InternalVersionT(zero + _mm512_reduce_max_epu32(_mm512_sub_epi32(
|
||||
_mm512_loadu_epi32(vs), zeroVec)));
|
||||
#endif
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
InternalVersionT
|
||||
horizontalMax16(InternalVersionT *vs, InternalVersionT) {
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// Precondition: `node->entryPresent`, and node is not the root
|
||||
void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
assert(node->parent);
|
||||
@@ -2921,15 +2986,13 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
break;
|
||||
case Type_Node3: {
|
||||
auto *self3 = static_cast<Node3 *>(node);
|
||||
for (int i = 0; i < self3->numChildren; ++i) {
|
||||
max = std::max(self3->childMaxVersion[i], max);
|
||||
}
|
||||
max = std::max(max, horizontalMaxUpTo16(self3->childMaxVersion, tls->zero,
|
||||
self3->numChildren));
|
||||
} break;
|
||||
case Type_Node16: {
|
||||
auto *self16 = static_cast<Node16 *>(node);
|
||||
for (int i = 0; i < self16->numChildren; ++i) {
|
||||
max = std::max(self16->childMaxVersion[i], max);
|
||||
}
|
||||
max = std::max(max, horizontalMaxUpTo16(self16->childMaxVersion, tls->zero,
|
||||
self16->numChildren));
|
||||
} break;
|
||||
case Type_Node48: {
|
||||
auto *self48 = static_cast<Node48 *>(node);
|
||||
@@ -2939,9 +3002,7 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
} break;
|
||||
case Type_Node256: {
|
||||
auto *self256 = static_cast<Node256 *>(node);
|
||||
for (auto v : self256->maxOfMax) {
|
||||
max = std::max(v, max);
|
||||
}
|
||||
max = std::max(max, horizontalMax16(self256->childMaxVersion, tls->zero));
|
||||
} break;
|
||||
default: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -4033,6 +4094,24 @@ template <int kN> void benchScan2() {
|
||||
});
|
||||
}
|
||||
|
||||
void benchHorizontal16() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
InternalVersionT vs[16];
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
vs[i] = InternalVersionT(rand() % 1000 + 1000);
|
||||
}
|
||||
#if !USE_64_BIT
|
||||
InternalVersionT::zero = InternalVersionT(rand() % 1000);
|
||||
#endif
|
||||
bench.run("horizontal16", [&]() {
|
||||
bench.doNotOptimizeAway(horizontalMax16(vs, InternalVersionT::zero));
|
||||
});
|
||||
int x = rand() % 15 + 1;
|
||||
bench.run("horizontalUpTo16", [&]() {
|
||||
bench.doNotOptimizeAway(horizontalMaxUpTo16(vs, InternalVersionT::zero, x));
|
||||
});
|
||||
}
|
||||
|
||||
void benchLCP(int len) {
|
||||
ankerl::nanobench::Bench bench;
|
||||
std::vector<uint8_t> lhs(len);
|
||||
@@ -4065,11 +4144,7 @@ void printTree() {
|
||||
debugPrintDot(stdout, cs.root, &cs);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
benchLCP(i);
|
||||
}
|
||||
}
|
||||
int main(void) { benchHorizontal16(); }
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_FUZZ
|
||||
|
||||
BIN
corpus/1863492b4bfa4e57a2dd04457f45dd1adbc1b43b
Normal file
BIN
corpus/1863492b4bfa4e57a2dd04457f45dd1adbc1b43b
Normal file
Binary file not shown.
BIN
corpus/1b73191605897b8fb531d18ce6309d5bd5118268
Normal file
BIN
corpus/1b73191605897b8fb531d18ce6309d5bd5118268
Normal file
Binary file not shown.
BIN
corpus/290bc08207a6de4b9adceb42003d4aeb5758bfdd
Normal file
BIN
corpus/290bc08207a6de4b9adceb42003d4aeb5758bfdd
Normal file
Binary file not shown.
BIN
corpus/43950334272a0d818f646b118439717c19a99837
Normal file
BIN
corpus/43950334272a0d818f646b118439717c19a99837
Normal file
Binary file not shown.
BIN
corpus/4920f8c7afdef94d228a5683c8d31cebf40c27a8
Normal file
BIN
corpus/4920f8c7afdef94d228a5683c8d31cebf40c27a8
Normal file
Binary file not shown.
BIN
corpus/7e8f30ffd0b8f665d87159020d0c977bca72a972
Normal file
BIN
corpus/7e8f30ffd0b8f665d87159020d0c977bca72a972
Normal file
Binary file not shown.
BIN
corpus/904e5d8ff4df7175e5d97536d687bbfa2a8b6c50
Normal file
BIN
corpus/904e5d8ff4df7175e5d97536d687bbfa2a8b6c50
Normal file
Binary file not shown.
BIN
corpus/98060955d1d4b9ab23969fc8715d64d90b41b498
Normal file
BIN
corpus/98060955d1d4b9ab23969fc8715d64d90b41b498
Normal file
Binary file not shown.
BIN
corpus/b38d17b3b3f4689098f13451b448688e3e2e0d4d
Normal file
BIN
corpus/b38d17b3b3f4689098f13451b448688e3e2e0d4d
Normal file
Binary file not shown.
BIN
corpus/bef22e17c90d2b121dbd1f48ba9e09b3684f54e1
Normal file
BIN
corpus/bef22e17c90d2b121dbd1f48ba9e09b3684f54e1
Normal file
Binary file not shown.
BIN
corpus/c90e64707700c71cd8848ace4f34825766255457
Normal file
BIN
corpus/c90e64707700c71cd8848ace4f34825766255457
Normal file
Binary file not shown.
BIN
corpus/cc104b98c539e338887b8d9d3aac60561e7fbfff
Normal file
BIN
corpus/cc104b98c539e338887b8d9d3aac60561e7fbfff
Normal file
Binary file not shown.
BIN
corpus/e2aa45c8babd79c5ae6ec0b53757f1a23d8f1c62
Normal file
BIN
corpus/e2aa45c8babd79c5ae6ec0b53757f1a23d8f1c62
Normal file
Binary file not shown.
Reference in New Issue
Block a user