More SIMD for scanning Node256 with 32-bit versions
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit
Some checks failed
Tests / Clang total: 1039, passed: 1039
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / SIMD fallback total: 1039, passed: 1039
Tests / 32-bit versions total: 1039, passed: 1039
Tests / Release [gcc] total: 1039, passed: 1039
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 775, passed: 775
Tests / Coverage total: 780, failed: 1, passed: 779
weaselab/conflict-set/pipeline/head There was a failure building this commit
This commit is contained in:
@@ -59,6 +59,10 @@ cmake_pop_check_state()
|
|||||||
|
|
||||||
option(USE_SIMD_FALLBACK
|
option(USE_SIMD_FALLBACK
|
||||||
"Use fallback implementations of functions that use SIMD" OFF)
|
"Use fallback implementations of functions that use SIMD" OFF)
|
||||||
|
option(
|
||||||
|
USE_32_BIT_VERSIONS,
|
||||||
|
"Store 32 bit versions internally, and rely on versions never being different by more than 2e9"
|
||||||
|
OFF)
|
||||||
|
|
||||||
# This is encouraged according to
|
# This is encouraged according to
|
||||||
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
||||||
@@ -103,6 +107,10 @@ if(NOT USE_SIMD_FALLBACK)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(USE_32_BIT_VERSIONS)
|
||||||
|
add_compile_definitions(INTERNAL_VERSION_32_BIT=1)
|
||||||
|
endif()
|
||||||
|
|
||||||
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
||||||
|
|
||||||
add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp)
|
add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp)
|
||||||
|
105
ConflictSet.cpp
105
ConflictSet.cpp
@@ -72,7 +72,9 @@ constexpr void removeKey(struct Node *) {}
|
|||||||
|
|
||||||
// ==================== BEGIN IMPLEMENTATION ====================
|
// ==================== BEGIN IMPLEMENTATION ====================
|
||||||
|
|
||||||
|
#ifndef INTERNAL_VERSION_32_BIT
|
||||||
#define INTERNAL_VERSION_32_BIT 0
|
#define INTERNAL_VERSION_32_BIT 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#if INTERNAL_VERSION_32_BIT
|
#if INTERNAL_VERSION_32_BIT
|
||||||
struct InternalVersionT {
|
struct InternalVersionT {
|
||||||
@@ -582,9 +584,14 @@ std::string getSearchPath(Node *n);
|
|||||||
|
|
||||||
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
||||||
// has an entry present.
|
// has an entry present.
|
||||||
constexpr int kBytesPerKey = 144;
|
|
||||||
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
||||||
|
#if INTERNAL_VERSION_32_BIT
|
||||||
|
constexpr int kBytesPerKey = 112;
|
||||||
|
constexpr int kMinNodeSurplus = 80;
|
||||||
|
#else
|
||||||
|
constexpr int kBytesPerKey = 144;
|
||||||
constexpr int kMinNodeSurplus = 104;
|
constexpr int kMinNodeSurplus = 104;
|
||||||
|
#endif
|
||||||
constexpr int kMinChildrenNode3 = 2;
|
constexpr int kMinChildrenNode3 = 2;
|
||||||
constexpr int kMinChildrenNode16 = 4;
|
constexpr int kMinChildrenNode16 = 4;
|
||||||
constexpr int kMinChildrenNode48 = 17;
|
constexpr int kMinChildrenNode48 = 17;
|
||||||
@@ -1759,10 +1766,12 @@ downLeftSpine:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if all in-bounds vs are <= readVersion
|
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
||||||
|
// Preconditions: begin <= end, end - begin < 256
|
||||||
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
||||||
InternalVersionT readVersion) {
|
InternalVersionT readVersion) {
|
||||||
|
|
||||||
|
assert(begin <= end);
|
||||||
assert(end - begin < 256);
|
assert(end - begin < 256);
|
||||||
|
|
||||||
#ifdef HAS_ARM_NEON
|
#ifdef HAS_ARM_NEON
|
||||||
@@ -1781,13 +1790,14 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
memcpy(w4, vs, sizeof(w4));
|
memcpy(w4, vs, sizeof(w4));
|
||||||
uint32_t rv;
|
uint32_t rv;
|
||||||
memcpy(&rv, &readVersion, sizeof(rv));
|
memcpy(&rv, &readVersion, sizeof(rv));
|
||||||
|
const auto rvVec = vdupq_n_u32(rv);
|
||||||
|
|
||||||
int32x4_t z;
|
int32x4_t z;
|
||||||
memset(&z, 0, sizeof(z));
|
memset(&z, 0, sizeof(z));
|
||||||
|
|
||||||
uint16x4_t conflicting[4];
|
uint16x4_t conflicting[4];
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], vdupq_n_u32(rv)), z));
|
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z));
|
||||||
}
|
}
|
||||||
auto combined =
|
auto combined =
|
||||||
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||||
@@ -1837,6 +1847,48 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns true if v[i] <= readVersion for all i such that begin <= i < end
|
||||||
|
bool scan16(const InternalVersionT *vs, int begin, int end,
|
||||||
|
InternalVersionT readVersion) {
|
||||||
|
assert(0 <= begin && begin < 16);
|
||||||
|
assert(0 <= end && end <= 16);
|
||||||
|
assert(begin <= end);
|
||||||
|
|
||||||
|
#if INTERNAL_VERSION_32_BIT && defined(HAS_ARM_NEON)
|
||||||
|
uint32x4_t w4[4];
|
||||||
|
memcpy(w4, vs, sizeof(w4));
|
||||||
|
uint32_t rv;
|
||||||
|
memcpy(&rv, &readVersion, sizeof(rv));
|
||||||
|
const auto rvVec = vdupq_n_u32(rv);
|
||||||
|
|
||||||
|
int32x4_t z;
|
||||||
|
memset(&z, 0, sizeof(z));
|
||||||
|
|
||||||
|
uint16x4_t conflicting[4];
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z));
|
||||||
|
}
|
||||||
|
auto combined =
|
||||||
|
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||||
|
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||||
|
|
||||||
|
uint64_t conflict = vget_lane_u64(
|
||||||
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||||
|
|
||||||
|
conflict &= end == 16 ? -1 : (uint64_t(1) << (end << 2)) - 1;
|
||||||
|
conflict >>= begin << 2;
|
||||||
|
return !conflict;
|
||||||
|
#else
|
||||||
|
uint64_t conflict = 0;
|
||||||
|
for (int i = 0; i < 16; ++i) {
|
||||||
|
conflict |= (vs[i] > readVersion) << i;
|
||||||
|
}
|
||||||
|
conflict &= (1 << end) - 1;
|
||||||
|
conflict >>= begin;
|
||||||
|
return !conflict;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// Return whether or not the max version among all keys starting with the search
|
// Return whether or not the max version among all keys starting with the search
|
||||||
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
// path of n + [child], where child in (begin, end) is <= readVersion. Does not
|
||||||
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
// account for the range version of firstGt(searchpath(n) + [end - 1])
|
||||||
@@ -1907,68 +1959,45 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case Type_Node256: {
|
case Type_Node256: {
|
||||||
|
static_assert(Node256::kMaxOfMaxTotalPages == 16);
|
||||||
auto *self = static_cast<Node256 *>(n);
|
auto *self = static_cast<Node256 *>(n);
|
||||||
if (end <= 0) {
|
if (end <= 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
||||||
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||||
|
// Check the only page if there's only one
|
||||||
if (firstPage == lastPage) {
|
if (firstPage == lastPage) {
|
||||||
if (self->maxOfMax[firstPage] <= readVersion) {
|
if (self->maxOfMax[firstPage] <= readVersion) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
uint64_t conflict = 0;
|
|
||||||
// Check all in page
|
|
||||||
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
|
|
||||||
conflict |=
|
|
||||||
(self->childMaxVersion[(firstPage << Node256::kMaxOfMaxShift) + i] >
|
|
||||||
readVersion)
|
|
||||||
<< i;
|
|
||||||
}
|
|
||||||
// Mask away out of bounds
|
|
||||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||||
conflict &= (1 << intraPageEnd) - 1;
|
return scan16(self->childMaxVersion +
|
||||||
conflict >>= intraPageBegin;
|
(firstPage << Node256::kMaxOfMaxShift),
|
||||||
return !conflict;
|
intraPageBegin, intraPageEnd, readVersion);
|
||||||
}
|
}
|
||||||
// Check the first page
|
// Check the first page
|
||||||
if (self->maxOfMax[firstPage] > readVersion) {
|
if (self->maxOfMax[firstPage] > readVersion) {
|
||||||
uint64_t conflict = 0;
|
|
||||||
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
|
|
||||||
int j = (firstPage << Node256::kMaxOfMaxShift) + i;
|
|
||||||
conflict |= (self->childMaxVersion[j] > readVersion) << i;
|
|
||||||
}
|
|
||||||
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1);
|
||||||
conflict >>= intraPageBegin;
|
if (!scan16(self->childMaxVersion +
|
||||||
if (conflict) {
|
(firstPage << Node256::kMaxOfMaxShift),
|
||||||
|
intraPageBegin, 16, readVersion)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Check the last page
|
// Check the last page
|
||||||
if (self->maxOfMax[lastPage] > readVersion) {
|
if (self->maxOfMax[lastPage] > readVersion) {
|
||||||
uint64_t conflict = 0;
|
|
||||||
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
|
|
||||||
int j = (lastPage << Node256::kMaxOfMaxShift) + i;
|
|
||||||
conflict |= (self->childMaxVersion[j] > readVersion) << i;
|
|
||||||
}
|
|
||||||
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift);
|
||||||
conflict &= (1 << intraPageEnd) - 1;
|
if (!scan16(self->childMaxVersion + (lastPage << Node256::kMaxOfMaxShift),
|
||||||
if (conflict) {
|
0, intraPageEnd, readVersion)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint64_t conflict = 0;
|
// Check inner pages
|
||||||
// Check all possible inner pages
|
|
||||||
for (int i = 1; i < Node256::kMaxOfMaxTotalPages - 1; ++i) {
|
|
||||||
conflict |= (self->maxOfMax[i] > readVersion) << i;
|
|
||||||
}
|
|
||||||
// Only keep inner pages
|
|
||||||
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
|
const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1;
|
||||||
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
|
const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||||
conflict &= (1 << innerPageEnd) - 1;
|
return scan16(self->maxOfMax, innerPageBegin, innerPageEnd, readVersion);
|
||||||
conflict >>= innerPageBegin;
|
|
||||||
return !conflict;
|
|
||||||
}
|
}
|
||||||
default: // GCOVR_EXCL_LINE
|
default: // GCOVR_EXCL_LINE
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
|
11
Jenkinsfile
vendored
11
Jenkinsfile
vendored
@@ -59,6 +59,17 @@ pipeline {
|
|||||||
CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON")
|
CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage('32-bit versions') {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
args '-v /home/jenkins/ccache:/ccache'
|
||||||
|
reuseNode true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
CleanBuildAndTest("-DUSE_32_BIT_VERSIONS=ON")
|
||||||
|
}
|
||||||
|
}
|
||||||
stage('Release [gcc]') {
|
stage('Release [gcc]') {
|
||||||
agent {
|
agent {
|
||||||
dockerfile {
|
dockerfile {
|
||||||
|
Reference in New Issue
Block a user