More SIMD for scanning Node256 with 32-bit versions
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit

This commit is contained in:
2024-06-28 19:48:06 -07:00
parent f762add4d6
commit d3f4afa167
3 changed files with 86 additions and 38 deletions

View File

@@ -59,6 +59,10 @@ cmake_pop_check_state()
option(USE_SIMD_FALLBACK
"Use fallback implementations of functions that use SIMD" OFF)
option(
USE_32_BIT_VERSIONS,
"Store 32 bit versions internally, and rely on versions never being different by more than 2e9"
OFF)
# This is encouraged according to
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
@@ -103,6 +107,10 @@ if(NOT USE_SIMD_FALLBACK)
endif()
endif()
if(USE_32_BIT_VERSIONS)
add_compile_definitions(INTERNAL_VERSION_32_BIT=1)
endif()
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp)

View File

@@ -72,7 +72,9 @@ constexpr void removeKey(struct Node *) {}
// ==================== BEGIN IMPLEMENTATION ====================
#ifndef INTERNAL_VERSION_32_BIT
#define INTERNAL_VERSION_32_BIT 0
#endif
#if INTERNAL_VERSION_32_BIT
struct InternalVersionT {
@@ -582,9 +584,14 @@ std::string getSearchPath(Node *n);
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
// has an entry present.
constexpr int kBytesPerKey = 144;
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
#if INTERNAL_VERSION_32_BIT
constexpr int kBytesPerKey = 112;
constexpr int kMinNodeSurplus = 80;
#else
constexpr int kBytesPerKey = 144;
constexpr int kMinNodeSurplus = 104;
#endif
constexpr int kMinChildrenNode3 = 2;
constexpr int kMinChildrenNode16 = 4;
constexpr int kMinChildrenNode48 = 17;
@@ -1759,10 +1766,12 @@ downLeftSpine:
}
}
// Returns true if all in-bounds vs are <= readVersion
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
// Preconditions: begin <= end, end - begin < 256
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
InternalVersionT readVersion) {
assert(begin <= end);
assert(end - begin < 256);
#ifdef HAS_ARM_NEON
@@ -1781,13 +1790,14 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
memcpy(w4, vs, sizeof(w4));
uint32_t rv;
memcpy(&rv, &readVersion, sizeof(rv));
const auto rvVec = vdupq_n_u32(rv);
int32x4_t z;
memset(&z, 0, sizeof(z));
uint16x4_t conflicting[4];
for (int i = 0; i < 4; ++i) {
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], vdupq_n_u32(rv)), z));
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z));
}
auto combined =
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
@@ -1837,6 +1847,48 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
#endif
}
// Returns true if v[i] <= readVersion for all i such that begin <= i < end
bool scan16(const InternalVersionT *vs, int begin, int end,
InternalVersionT readVersion) {
assert(0 <= begin && begin < 16);
assert(0 <= end && end <= 16);
assert(begin <= end);
#if INTERNAL_VERSION_32_BIT && defined(HAS_ARM_NEON)
uint32x4_t w4[4];
memcpy(w4, vs, sizeof(w4));
uint32_t rv;
memcpy(&rv, &readVersion, sizeof(rv));
const auto rvVec = vdupq_n_u32(rv);
int32x4_t z;
memset(&z, 0, sizeof(z));
uint16x4_t conflicting[4];
for (int i = 0; i < 4; ++i) {
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z));
}
auto combined =
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
uint64_t conflict = vget_lane_u64(
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
conflict &= end == 16 ? -1 : (uint64_t(1) << (end << 2)) - 1;
conflict >>= begin << 2;
return !conflict;
#else
uint64_t conflict = 0;
for (int i = 0; i < 16; ++i) {
conflict |= (vs[i] > readVersion) << i;
}
conflict &= (1 << end) - 1;
conflict >>= begin;
return !conflict;
#endif
}
// Return whether or not the max version among all keys starting with the search
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
// account for the range version of firstGt(searchpath(n) + [end - 1])
@@ -1907,68 +1959,45 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
return true;
}
case Type_Node256: {
static_assert(Node256::kMaxOfMaxTotalPages == 16);
auto *self = static_cast<Node256 *>(n);
if (end <= 0) {
return true;
}
const int firstPage = begin >> Node256::kMaxOfMaxShift;
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
// Check the only page if there's only one
if (firstPage == lastPage) {
if (self->maxOfMax[firstPage] <= readVersion) {
return true;
}
uint64_t conflict = 0;
// Check all in page
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
conflict |=
(self->childMaxVersion[(firstPage << Node256::kMaxOfMaxShift) + i] >
readVersion)
<< i;
}
// Mask away out of bounds
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
conflict &= (1 << intraPageEnd) - 1;
conflict >>= intraPageBegin;
return !conflict;
return scan16(self->childMaxVersion +
(firstPage << Node256::kMaxOfMaxShift),
intraPageBegin, intraPageEnd, readVersion);
}
// Check the first page
if (self->maxOfMax[firstPage] > readVersion) {
uint64_t conflict = 0;
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
int j = (firstPage << Node256::kMaxOfMaxShift) + i;
conflict |= (self->childMaxVersion[j] > readVersion) << i;
}
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
conflict >>= intraPageBegin;
if (conflict) {
if (!scan16(self->childMaxVersion +
(firstPage << Node256::kMaxOfMaxShift),
intraPageBegin, 16, readVersion)) {
return false;
}
}
// Check the last page
if (self->maxOfMax[lastPage] > readVersion) {
uint64_t conflict = 0;
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
int j = (lastPage << Node256::kMaxOfMaxShift) + i;
conflict |= (self->childMaxVersion[j] > readVersion) << i;
}
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
conflict &= (1 << intraPageEnd) - 1;
if (conflict) {
if (!scan16(self->childMaxVersion + (lastPage << Node256::kMaxOfMaxShift),
0, intraPageEnd, readVersion)) {
return false;
}
}
uint64_t conflict = 0;
// Check all possible inner pages
for (int i = 1; i < Node256::kMaxOfMaxTotalPages - 1; ++i) {
conflict |= (self->maxOfMax[i] > readVersion) << i;
}
// Only keep inner pages
// Check inner pages
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
conflict &= (1 << innerPageEnd) - 1;
conflict >>= innerPageBegin;
return !conflict;
return scan16(self->maxOfMax, innerPageBegin, innerPageEnd, readVersion);
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE

11
Jenkinsfile vendored
View File

@@ -59,6 +59,17 @@ pipeline {
CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON")
}
}
stage('32-bit versions') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
reuseNode true
}
}
steps {
CleanBuildAndTest("-DUSE_32_BIT_VERSIONS=ON")
}
}
stage('Release [gcc]') {
agent {
dockerfile {