More SIMD for scanning Node256 with 32-bit versions
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit
This commit is contained in:
105
ConflictSet.cpp
105
ConflictSet.cpp
@@ -72,7 +72,9 @@ constexpr void removeKey(struct Node *) {}
|
||||
|
||||
// ==================== BEGIN IMPLEMENTATION ====================
|
||||
|
||||
#ifndef INTERNAL_VERSION_32_BIT
|
||||
#define INTERNAL_VERSION_32_BIT 0
|
||||
#endif
|
||||
|
||||
#if INTERNAL_VERSION_32_BIT
|
||||
struct InternalVersionT {
|
||||
@@ -582,9 +584,14 @@ std::string getSearchPath(Node *n);
|
||||
|
||||
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
||||
// has an entry present.
|
||||
constexpr int kBytesPerKey = 144;
|
||||
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
||||
#if INTERNAL_VERSION_32_BIT
|
||||
constexpr int kBytesPerKey = 112;
|
||||
constexpr int kMinNodeSurplus = 80;
|
||||
#else
|
||||
constexpr int kBytesPerKey = 144;
|
||||
constexpr int kMinNodeSurplus = 104;
|
||||
#endif
|
||||
constexpr int kMinChildrenNode3 = 2;
|
||||
constexpr int kMinChildrenNode16 = 4;
|
||||
constexpr int kMinChildrenNode48 = 17;
|
||||
@@ -1759,10 +1766,12 @@ downLeftSpine:
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if all in-bounds vs are <= readVersion
|
||||
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
||||
// Preconditions: begin <= end, end - begin < 256
|
||||
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||
InternalVersionT readVersion) {
|
||||
|
||||
assert(begin <= end);
|
||||
assert(end - begin < 256);
|
||||
|
||||
#ifdef HAS_ARM_NEON
|
||||
@@ -1781,13 +1790,14 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||
memcpy(w4, vs, sizeof(w4));
|
||||
uint32_t rv;
|
||||
memcpy(&rv, &readVersion, sizeof(rv));
|
||||
const auto rvVec = vdupq_n_u32(rv);
|
||||
|
||||
int32x4_t z;
|
||||
memset(&z, 0, sizeof(z));
|
||||
|
||||
uint16x4_t conflicting[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], vdupq_n_u32(rv)), z));
|
||||
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z));
|
||||
}
|
||||
auto combined =
|
||||
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||
@@ -1837,6 +1847,48 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||
#endif
|
||||
}
|
||||
|
||||
// Returns true if v[i] <= readVersion for all i such that begin <= i < end
|
||||
bool scan16(const InternalVersionT *vs, int begin, int end,
|
||||
InternalVersionT readVersion) {
|
||||
assert(0 <= begin && begin < 16);
|
||||
assert(0 <= end && end <= 16);
|
||||
assert(begin <= end);
|
||||
|
||||
#if INTERNAL_VERSION_32_BIT && defined(HAS_ARM_NEON)
|
||||
uint32x4_t w4[4];
|
||||
memcpy(w4, vs, sizeof(w4));
|
||||
uint32_t rv;
|
||||
memcpy(&rv, &readVersion, sizeof(rv));
|
||||
const auto rvVec = vdupq_n_u32(rv);
|
||||
|
||||
int32x4_t z;
|
||||
memset(&z, 0, sizeof(z));
|
||||
|
||||
uint16x4_t conflicting[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z));
|
||||
}
|
||||
auto combined =
|
||||
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||
|
||||
uint64_t conflict = vget_lane_u64(
|
||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||
|
||||
conflict &= end == 16 ? -1 : (uint64_t(1) << (end << 2)) - 1;
|
||||
conflict >>= begin << 2;
|
||||
return !conflict;
|
||||
#else
|
||||
uint64_t conflict = 0;
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
conflict |= (vs[i] > readVersion) << i;
|
||||
}
|
||||
conflict &= (1 << end) - 1;
|
||||
conflict >>= begin;
|
||||
return !conflict;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Return whether or not the max version among all keys starting with the search
|
||||
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
||||
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
||||
@@ -1907,68 +1959,45 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
return true;
|
||||
}
|
||||
case Type_Node256: {
|
||||
static_assert(Node256::kMaxOfMaxTotalPages == 16);
|
||||
auto *self = static_cast<Node256 *>(n);
|
||||
if (end <= 0) {
|
||||
return true;
|
||||
}
|
||||
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
||||
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||
// Check the only page if there's only one
|
||||
if (firstPage == lastPage) {
|
||||
if (self->maxOfMax[firstPage] <= readVersion) {
|
||||
return true;
|
||||
}
|
||||
uint64_t conflict = 0;
|
||||
// Check all in page
|
||||
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
|
||||
conflict |=
|
||||
(self->childMaxVersion[(firstPage << Node256::kMaxOfMaxShift) + i] >
|
||||
readVersion)
|
||||
<< i;
|
||||
}
|
||||
// Mask away out of bounds
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
conflict &= (1 << intraPageEnd) - 1;
|
||||
conflict >>= intraPageBegin;
|
||||
return !conflict;
|
||||
return scan16(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, intraPageEnd, readVersion);
|
||||
}
|
||||
// Check the first page
|
||||
if (self->maxOfMax[firstPage] > readVersion) {
|
||||
uint64_t conflict = 0;
|
||||
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
|
||||
int j = (firstPage << Node256::kMaxOfMaxShift) + i;
|
||||
conflict |= (self->childMaxVersion[j] > readVersion) << i;
|
||||
}
|
||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||
conflict >>= intraPageBegin;
|
||||
if (conflict) {
|
||||
if (!scan16(self->childMaxVersion +
|
||||
(firstPage << Node256::kMaxOfMaxShift),
|
||||
intraPageBegin, 16, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Check the last page
|
||||
if (self->maxOfMax[lastPage] > readVersion) {
|
||||
uint64_t conflict = 0;
|
||||
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
|
||||
int j = (lastPage << Node256::kMaxOfMaxShift) + i;
|
||||
conflict |= (self->childMaxVersion[j] > readVersion) << i;
|
||||
}
|
||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||
conflict &= (1 << intraPageEnd) - 1;
|
||||
if (conflict) {
|
||||
if (!scan16(self->childMaxVersion + (lastPage << Node256::kMaxOfMaxShift),
|
||||
0, intraPageEnd, readVersion)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
uint64_t conflict = 0;
|
||||
// Check all possible inner pages
|
||||
for (int i = 1; i < Node256::kMaxOfMaxTotalPages - 1; ++i) {
|
||||
conflict |= (self->maxOfMax[i] > readVersion) << i;
|
||||
}
|
||||
// Only keep inner pages
|
||||
// Check inner pages
|
||||
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
|
||||
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||
conflict &= (1 << innerPageEnd) - 1;
|
||||
conflict >>= innerPageBegin;
|
||||
return !conflict;
|
||||
return scan16(self->maxOfMax, innerPageBegin, innerPageEnd, readVersion);
|
||||
}
|
||||
default: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
|
Reference in New Issue
Block a user