From 7f86fdee668444eed4c2e0ba94ffa12ce8a61182 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 21 Aug 2024 13:58:09 -0700 Subject: [PATCH] Test 64 bit versions Keep 32 bit versions the default though --- ConflictSet.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++------- Jenkinsfile | 11 +++++++ 2 files changed, 80 insertions(+), 11 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 4aeb087..dc31cc8 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -87,22 +87,42 @@ constexpr int64_t kMaxCorrectVersionWindow = std::numeric_limits::max(); static_assert(kNominalVersionWindow <= kMaxCorrectVersionWindow); +#ifndef USE_64_BIT +#define USE_64_BIT 0 +#endif + struct InternalVersionT { constexpr InternalVersionT() = default; constexpr explicit InternalVersionT(int64_t value) : value(value) {} constexpr int64_t toInt64() const { return value; } // GCOVR_EXCL_LINE constexpr auto operator<=>(const InternalVersionT &rhs) const { +#if USE_64_BIT + return value <=> rhs.value; +#else // Maintains ordering after overflow, as long as the full-precision versions // are within `kMaxCorrectVersionWindow` of eachother. return int32_t(value - rhs.value) <=> 0; +#endif } constexpr bool operator==(const InternalVersionT &) const = default; +#if USE_64_BIT + static const InternalVersionT zero; +#else static thread_local InternalVersionT zero; +#endif private: +#if USE_64_BIT + int64_t value; +#else uint32_t value; +#endif }; +#if USE_64_BIT +const InternalVersionT InternalVersionT::zero{0}; +#else thread_local InternalVersionT InternalVersionT::zero; +#endif struct Entry { InternalVersionT pointVersion; @@ -518,8 +538,13 @@ std::string getSearchPath(Node *n); // Each node with an entry present gets a budget of kBytesPerKey. Node0 always // has an entry present. // Induction hypothesis is that each node's surplus is >= kMinNodeSurplus +#if USE_64_BIT +constexpr int kBytesPerKey = 144; +constexpr int kMinNodeSurplus = 104; +#else constexpr int kBytesPerKey = 112; constexpr int kMinNodeSurplus = 80; +#endif // Cound the entry itself as a child constexpr int kMinChildrenNode0 = 1; constexpr int kMinChildrenNode3 = 2; @@ -724,9 +749,13 @@ struct WriteContext { int64_t write_bytes; } accum; +#if USE_64_BIT + static constexpr InternalVersionT zero{0}; +#else // Cache a copy of InternalVersionT::zero, so we don't need to do the TLS // lookup as often. InternalVersionT zero; +#endif WriteContext() { memset(&accum, 0, sizeof(accum)); } @@ -1563,6 +1592,9 @@ void rezero16(InternalVersionT *vs, InternalVersionT zero) { } } +#if USE_64_BIT +void rezero(Node *, InternalVersionT) {} +#else void rezero(Node *n, InternalVersionT z) { #if DEBUG_VERBOSE && !defined(NDEBUG) fprintf(stderr, "rezero to %" PRId64 ": %s\n", z.toInt64(), @@ -1605,6 +1637,7 @@ void rezero(Node *n, InternalVersionT z) { __builtin_unreachable(); // GCOVR_EXCL_LINE } } +#endif void mergeWithChild(Node *&self, WriteContext *tls, ConflictSet::Impl *impl, Node *&dontInvalidate, Node3 *self3) { @@ -1987,7 +2020,14 @@ downLeftSpine: } #ifdef HAS_AVX -uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) { +uint32_t compare16(const InternalVersionT *vs, InternalVersionT rv) { +#if USE_64_BIT + uint32_t compared = 0; + for (int i = 0; i < 16; ++i) { + compared |= (vs[i] > rv) << i; + } + return compared; +#else uint32_t compared = 0; __m128i w[4]; // GCOVR_EXCL_LINE memcpy(w, vs, sizeof(w)); @@ -2001,15 +2041,26 @@ uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) { << (i * 4); } return compared; +#endif } __attribute__((target("avx512f"))) uint32_t -compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) { +compare16_avx512(const InternalVersionT *vs, InternalVersionT rv) { +#if USE_64_BIT + int64_t r; + memcpy(&r, &rv, sizeof(r)); + uint32_t low = + _mm512_cmpgt_epi64_mask(_mm512_loadu_epi64(vs), _mm512_set1_epi64(r)); + uint32_t high = + _mm512_cmpgt_epi64_mask(_mm512_loadu_epi64(vs + 8), _mm512_set1_epi64(r)); + return low | (high << 8); +#else uint32_t r; memcpy(&r, &rv, sizeof(r)); return _mm512_cmpgt_epi32_mask( _mm512_sub_epi32(_mm512_loadu_epi32(vs), _mm512_set1_epi32(r)), _mm512_setzero_epi32()); +#endif } #endif @@ -2066,9 +2117,9 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end, uint32_t compared = 0; if constexpr (kAVX512) { - compared = compare16_32bit_avx512(vs, readVersion); // GCOVR_EXCL_LINE + compared = compare16_avx512(vs, readVersion); } else { - compared = compare16_32bit(vs, readVersion); // GCOVR_EXCL_LINE + compared = compare16(vs, readVersion); } return !(compared & mask); @@ -2127,9 +2178,9 @@ bool scan16(const InternalVersionT *vs, int begin, int end, #elif defined(HAS_AVX) uint32_t conflict; if constexpr (kAVX512) { - conflict = compare16_32bit_avx512(vs, readVersion); // GCOVR_EXCL_LINE + conflict = compare16_avx512(vs, readVersion); } else { - conflict = compare16_32bit(vs, readVersion); // GCOVR_EXCL_LINE + conflict = compare16(vs, readVersion); } conflict &= (1 << end) - 1; conflict >>= begin; @@ -2264,12 +2315,9 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end, uint32_t compared = 0; if constexpr (kAVX512) { - compared = // GCOVR_EXCL_LINE - compare16_32bit_avx512(self->childMaxVersion, // GCOVR_EXCL_LINE - readVersion); // GCOVR_EXCL_LINE + compared = compare16_avx512(self->childMaxVersion, readVersion); } else { - compared = compare16_32bit(self->childMaxVersion, - readVersion); // GCOVR_EXCL_LINE + compared = compare16(self->childMaxVersion, readVersion); } return !(compared & mask) && firstRangeOk; @@ -3137,10 +3185,12 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { } void addWrites(const WriteRange *writes, int count, int64_t writeVersion) { +#if !USE_64_BIT // There could be other conflict sets in the same thread. We need // InternalVersionT::zero to be correct for this conflict set for the // lifetime of the current call frame. InternalVersionT::zero = tls.zero = oldestVersion; +#endif assert(writeVersion >= newestVersionFullPrecision); assert(tls.accum.entries_erased == 0); @@ -3255,7 +3305,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { InternalVersionT oldestVersion{o}; this->oldestVersionFullPrecision = o; this->oldestVersion = oldestVersion; +#if !USE_64_BIT InternalVersionT::zero = tls.zero = oldestVersion; +#endif #ifdef NDEBUG // This is here for performance reasons, since we want to amortize the cost // of storing the search path as a string. In tests, we want to exercise the @@ -3304,7 +3356,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { root->entry.pointVersion = this->oldestVersion; root->entry.rangeVersion = this->oldestVersion; +#if !USE_64_BIT InternalVersionT::zero = tls.zero = this->oldestVersion; +#endif // Intentionally not resetting totalBytes } @@ -3751,6 +3805,9 @@ Node *firstGeq(Node *n, std::string_view key) { n, std::span((const uint8_t *)key.data(), key.size())); } +#if USE_64_BIT +void checkVersionsGeqOldestExtant(Node *, InternalVersionT) {} +#else void checkVersionsGeqOldestExtant(Node *n, InternalVersionT oldestExtantVersion) { if (n->entryPresent) { @@ -3794,6 +3851,7 @@ void checkVersionsGeqOldestExtant(Node *n, abort(); } } +#endif [[maybe_unused]] InternalVersionT checkMaxVersion(Node *root, Node *node, InternalVersionT oldestVersion, diff --git a/Jenkinsfile b/Jenkinsfile index 7fa807d..22f8f63 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,6 +48,17 @@ pipeline { recordIssues(tools: [clang()]) } } + stage('64 bit versions') { + agent { + dockerfile { + args '-v /home/jenkins/ccache:/ccache' + reuseNode true + } + } + steps { + CleanBuildAndTest("-DCMAKE_CXX_FLAGS=-DUSE_64_BIT=1") + } + } stage('Debug') { agent { dockerfile {