Commit to 32-bit versions
This commit is contained in:
@@ -59,10 +59,6 @@ cmake_pop_check_state()
|
|||||||
|
|
||||||
option(USE_SIMD_FALLBACK
|
option(USE_SIMD_FALLBACK
|
||||||
"Use fallback implementations of functions that use SIMD" OFF)
|
"Use fallback implementations of functions that use SIMD" OFF)
|
||||||
option(
|
|
||||||
USE_32_BIT_VERSIONS
|
|
||||||
"Store 32 bit versions internally, and rely on versions never being different by more than 2e9"
|
|
||||||
OFF)
|
|
||||||
|
|
||||||
# This is encouraged according to
|
# This is encouraged according to
|
||||||
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
||||||
@@ -107,10 +103,6 @@ if(NOT USE_SIMD_FALLBACK)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(USE_32_BIT_VERSIONS)
|
|
||||||
add_compile_definitions(INTERNAL_VERSION_32_BIT=1)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
||||||
|
|
||||||
add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp)
|
add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp)
|
||||||
|
@@ -78,11 +78,6 @@ constexpr void removeKey(struct Node *) {}
|
|||||||
|
|
||||||
// ==================== BEGIN IMPLEMENTATION ====================
|
// ==================== BEGIN IMPLEMENTATION ====================
|
||||||
|
|
||||||
#ifndef INTERNAL_VERSION_32_BIT
|
|
||||||
#define INTERNAL_VERSION_32_BIT 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if INTERNAL_VERSION_32_BIT
|
|
||||||
struct InternalVersionT {
|
struct InternalVersionT {
|
||||||
constexpr InternalVersionT() = default;
|
constexpr InternalVersionT() = default;
|
||||||
constexpr explicit InternalVersionT(int64_t value) : value(value) {}
|
constexpr explicit InternalVersionT(int64_t value) : value(value) {}
|
||||||
@@ -99,20 +94,6 @@ private:
|
|||||||
uint32_t value;
|
uint32_t value;
|
||||||
};
|
};
|
||||||
thread_local InternalVersionT InternalVersionT::zero;
|
thread_local InternalVersionT InternalVersionT::zero;
|
||||||
#else
|
|
||||||
struct InternalVersionT {
|
|
||||||
constexpr InternalVersionT() = default;
|
|
||||||
constexpr explicit InternalVersionT(int64_t value) : value(value) {}
|
|
||||||
constexpr int64_t toInt64() const { return value; } // GCOVR_EXCL_LINE
|
|
||||||
constexpr auto operator<=>(const InternalVersionT &rhs) const = default;
|
|
||||||
constexpr bool operator==(const InternalVersionT &) const = default;
|
|
||||||
static const InternalVersionT zero;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int64_t value;
|
|
||||||
};
|
|
||||||
const InternalVersionT InternalVersionT::zero{0};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct Entry {
|
struct Entry {
|
||||||
InternalVersionT pointVersion;
|
InternalVersionT pointVersion;
|
||||||
@@ -527,13 +508,8 @@ std::string getSearchPath(Node *n);
|
|||||||
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
// Each node with an entry present gets a budget of kBytesPerKey. Node0 always
|
||||||
// has an entry present.
|
// has an entry present.
|
||||||
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
// Induction hypothesis is that each node's surplus is >= kMinNodeSurplus
|
||||||
#if INTERNAL_VERSION_32_BIT
|
|
||||||
constexpr int kBytesPerKey = 112;
|
constexpr int kBytesPerKey = 112;
|
||||||
constexpr int kMinNodeSurplus = 80;
|
constexpr int kMinNodeSurplus = 80;
|
||||||
#else
|
|
||||||
constexpr int kBytesPerKey = 144;
|
|
||||||
constexpr int kMinNodeSurplus = 104;
|
|
||||||
#endif
|
|
||||||
constexpr int kMinChildrenNode3 = 2;
|
constexpr int kMinChildrenNode3 = 2;
|
||||||
constexpr int kMinChildrenNode16 = 4;
|
constexpr int kMinChildrenNode16 = 4;
|
||||||
constexpr int kMinChildrenNode48 = 17;
|
constexpr int kMinChildrenNode48 = 17;
|
||||||
@@ -1833,7 +1809,6 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
uint64_t mask = vget_lane_u64(
|
uint64_t mask = vget_lane_u64(
|
||||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
||||||
|
|
||||||
#if INTERNAL_VERSION_32_BIT
|
|
||||||
uint32x4_t w4[4];
|
uint32x4_t w4[4];
|
||||||
memcpy(w4, vs, sizeof(w4));
|
memcpy(w4, vs, sizeof(w4));
|
||||||
uint32_t rv;
|
uint32_t rv;
|
||||||
@@ -1853,12 +1828,6 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
|
|
||||||
uint64_t compared = vget_lane_u64(
|
uint64_t compared = vget_lane_u64(
|
||||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||||
#else
|
|
||||||
uint64_t compared = 0;
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
|
||||||
compared |= uint64_t(vs[i] > readVersion) << (i << 2);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return !(compared & mask);
|
return !(compared & mask);
|
||||||
|
|
||||||
@@ -1871,17 +1840,11 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end,
|
|||||||
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
|
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
|
||||||
|
|
||||||
uint32_t compared = 0;
|
uint32_t compared = 0;
|
||||||
#if INTERNAL_VERSION_32_BIT
|
|
||||||
if constexpr (kAVX512) {
|
if constexpr (kAVX512) {
|
||||||
compared = compare16_32bit_avx512(vs, readVersion);
|
compared = compare16_32bit_avx512(vs, readVersion);
|
||||||
} else {
|
} else {
|
||||||
compared = compare16_32bit(vs, readVersion);
|
compared = compare16_32bit(vs, readVersion);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
|
||||||
compared |= (vs[i] > readVersion) << i;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return !(compared & mask);
|
return !(compared & mask);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
@@ -1914,7 +1877,7 @@ scan16(const InternalVersionT *vs, int begin, int end,
|
|||||||
assert(0 <= end && end <= 16);
|
assert(0 <= end && end <= 16);
|
||||||
assert(begin <= end);
|
assert(begin <= end);
|
||||||
|
|
||||||
#if INTERNAL_VERSION_32_BIT && defined(HAS_ARM_NEON)
|
#if defined(HAS_ARM_NEON)
|
||||||
uint32x4_t w4[4];
|
uint32x4_t w4[4];
|
||||||
memcpy(w4, vs, sizeof(w4));
|
memcpy(w4, vs, sizeof(w4));
|
||||||
uint32_t rv;
|
uint32_t rv;
|
||||||
@@ -1938,7 +1901,7 @@ scan16(const InternalVersionT *vs, int begin, int end,
|
|||||||
conflict &= end == 16 ? -1 : (uint64_t(1) << (end << 2)) - 1;
|
conflict &= end == 16 ? -1 : (uint64_t(1) << (end << 2)) - 1;
|
||||||
conflict >>= begin << 2;
|
conflict >>= begin << 2;
|
||||||
return !conflict;
|
return !conflict;
|
||||||
#elif INTERNAL_VERSION_32_BIT && defined(HAS_AVX)
|
#elif defined(HAS_AVX)
|
||||||
uint32_t conflict;
|
uint32_t conflict;
|
||||||
if constexpr (kAVX512) {
|
if constexpr (kAVX512) {
|
||||||
conflict = compare16_32bit_avx512(vs, readVersion);
|
conflict = compare16_32bit_avx512(vs, readVersion);
|
||||||
@@ -2886,9 +2849,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
void addWrites(const WriteRange *writes, int count, int64_t writeVersion) {
|
void addWrites(const WriteRange *writes, int count, int64_t writeVersion) {
|
||||||
assert(writeVersion >= newestVersionFullPrecision);
|
assert(writeVersion >= newestVersionFullPrecision);
|
||||||
newestVersionFullPrecision = writeVersion;
|
newestVersionFullPrecision = writeVersion;
|
||||||
#if INTERNAL_VERSION_32_BIT
|
|
||||||
InternalVersionT::zero = oldestVersion;
|
InternalVersionT::zero = oldestVersion;
|
||||||
#endif
|
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
const auto &w = writes[i];
|
const auto &w = writes[i];
|
||||||
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
|
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
|
||||||
@@ -2910,9 +2871,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
assert(o >= oldestVersionFullPrecision);
|
assert(o >= oldestVersionFullPrecision);
|
||||||
this->oldestVersionFullPrecision = o;
|
this->oldestVersionFullPrecision = o;
|
||||||
this->oldestVersion = oldestVersion;
|
this->oldestVersion = oldestVersion;
|
||||||
#if INTERNAL_VERSION_32_BIT
|
|
||||||
InternalVersionT::zero = oldestVersion;
|
InternalVersionT::zero = oldestVersion;
|
||||||
#endif
|
|
||||||
#ifdef NDEBUG
|
#ifdef NDEBUG
|
||||||
// This is here for performance reasons, since we want to amortize the cost
|
// This is here for performance reasons, since we want to amortize the cost
|
||||||
// of storing the search path as a string. In tests, we want to exercise the
|
// of storing the search path as a string. In tests, we want to exercise the
|
||||||
|
Reference in New Issue
Block a user