diff --git a/CMakeLists.txt b/CMakeLists.txt index 221f969..0db101d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,10 @@ cmake_pop_check_state() option(USE_SIMD_FALLBACK "Use fallback implementations of functions that use SIMD" OFF) +option( + USE_32_BIT_VERSIONS, + "Store 32 bit versions internally, and rely on versions never being different by more than 2e9" + OFF) # This is encouraged according to # https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq @@ -103,6 +107,10 @@ if(NOT USE_SIMD_FALLBACK) endif() endif() +if(USE_32_BIT_VERSIONS) + add_compile_definitions(INTERNAL_VERSION_32_BIT=1) +endif() + set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 57cbbcf..ecb250a 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -72,7 +72,9 @@ constexpr void removeKey(struct Node *) {} // ==================== BEGIN IMPLEMENTATION ==================== +#ifndef INTERNAL_VERSION_32_BIT #define INTERNAL_VERSION_32_BIT 0 +#endif #if INTERNAL_VERSION_32_BIT struct InternalVersionT { @@ -582,9 +584,14 @@ std::string getSearchPath(Node *n); // Each node with an entry present gets a budget of kBytesPerKey. Node0 always // has an entry present. -constexpr int kBytesPerKey = 144; // Induction hypothesis is that each node's surplus is >= kMinNodeSurplus +#if INTERNAL_VERSION_32_BIT +constexpr int kBytesPerKey = 112; +constexpr int kMinNodeSurplus = 80; +#else +constexpr int kBytesPerKey = 144; constexpr int kMinNodeSurplus = 104; +#endif constexpr int kMinChildrenNode3 = 2; constexpr int kMinChildrenNode16 = 4; constexpr int kMinChildrenNode48 = 17; @@ -1759,10 +1766,12 @@ downLeftSpine: } } -// Returns true if all in-bounds vs are <= readVersion +// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end +// Preconditions: begin <= end, end - begin < 256 bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end, InternalVersionT readVersion) { + assert(begin <= end); assert(end - begin < 256); #ifdef HAS_ARM_NEON @@ -1781,13 +1790,14 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end, memcpy(w4, vs, sizeof(w4)); uint32_t rv; memcpy(&rv, &readVersion, sizeof(rv)); + const auto rvVec = vdupq_n_u32(rv); int32x4_t z; memset(&z, 0, sizeof(z)); uint16x4_t conflicting[4]; for (int i = 0; i < 4; ++i) { - conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], vdupq_n_u32(rv)), z)); + conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z)); } auto combined = vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])), @@ -1837,6 +1847,48 @@ bool scan16(const InternalVersionT *vs, const uint8_t *is, int begin, int end, #endif } +// Returns true if v[i] <= readVersion for all i such that begin <= i < end +bool scan16(const InternalVersionT *vs, int begin, int end, + InternalVersionT readVersion) { + assert(0 <= begin && begin < 16); + assert(0 <= end && end <= 16); + assert(begin <= end); + +#if INTERNAL_VERSION_32_BIT && defined(HAS_ARM_NEON) + uint32x4_t w4[4]; + memcpy(w4, vs, sizeof(w4)); + uint32_t rv; + memcpy(&rv, &readVersion, sizeof(rv)); + const auto rvVec = vdupq_n_u32(rv); + + int32x4_t z; + memset(&z, 0, sizeof(z)); + + uint16x4_t conflicting[4]; + for (int i = 0; i < 4; ++i) { + conflicting[i] = vmovn_u32(vcgtq_s32(vsubq_u32(w4[i], rvVec), z)); + } + auto combined = + vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])), + vmovn_u16(vcombine_u16(conflicting[2], conflicting[3]))); + + uint64_t conflict = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0); + + conflict &= end == 16 ? -1 : (uint64_t(1) << (end << 2)) - 1; + conflict >>= begin << 2; + return !conflict; +#else + uint64_t conflict = 0; + for (int i = 0; i < 16; ++i) { + conflict |= (vs[i] > readVersion) << i; + } + conflict &= (1 << end) - 1; + conflict >>= begin; + return !conflict; +#endif +} + // Return whether or not the max version among all keys starting with the search // path of n + [child], where child in (begin, end) is <= readVersion. Does not // account for the range version of firstGt(searchpath(n) + [end - 1]) @@ -1907,68 +1959,45 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end, return true; } case Type_Node256: { + static_assert(Node256::kMaxOfMaxTotalPages == 16); auto *self = static_cast(n); if (end <= 0) { return true; } const int firstPage = begin >> Node256::kMaxOfMaxShift; const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift; + // Check the only page if there's only one if (firstPage == lastPage) { if (self->maxOfMax[firstPage] <= readVersion) { return true; } - uint64_t conflict = 0; - // Check all in page - for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) { - conflict |= - (self->childMaxVersion[(firstPage << Node256::kMaxOfMaxShift) + i] > - readVersion) - << i; - } - // Mask away out of bounds const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1); const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift); - conflict &= (1 << intraPageEnd) - 1; - conflict >>= intraPageBegin; - return !conflict; + return scan16(self->childMaxVersion + + (firstPage << Node256::kMaxOfMaxShift), + intraPageBegin, intraPageEnd, readVersion); } // Check the first page if (self->maxOfMax[firstPage] > readVersion) { - uint64_t conflict = 0; - for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) { - int j = (firstPage << Node256::kMaxOfMaxShift) + i; - conflict |= (self->childMaxVersion[j] > readVersion) << i; - } const int intraPageBegin = begin & (Node256::kMaxOfMaxPageSize - 1); - conflict >>= intraPageBegin; - if (conflict) { + if (!scan16(self->childMaxVersion + + (firstPage << Node256::kMaxOfMaxShift), + intraPageBegin, 16, readVersion)) { return false; } } // Check the last page if (self->maxOfMax[lastPage] > readVersion) { - uint64_t conflict = 0; - for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) { - int j = (lastPage << Node256::kMaxOfMaxShift) + i; - conflict |= (self->childMaxVersion[j] > readVersion) << i; - } const int intraPageEnd = end - (lastPage << Node256::kMaxOfMaxShift); - conflict &= (1 << intraPageEnd) - 1; - if (conflict) { + if (!scan16(self->childMaxVersion + (lastPage << Node256::kMaxOfMaxShift), + 0, intraPageEnd, readVersion)) { return false; } } - uint64_t conflict = 0; - // Check all possible inner pages - for (int i = 1; i < Node256::kMaxOfMaxTotalPages - 1; ++i) { - conflict |= (self->maxOfMax[i] > readVersion) << i; - } - // Only keep inner pages + // Check inner pages const int innerPageBegin = (begin >> Node256::kMaxOfMaxShift) + 1; const int innerPageEnd = (end - 1) >> Node256::kMaxOfMaxShift; - conflict &= (1 << innerPageEnd) - 1; - conflict >>= innerPageBegin; - return !conflict; + return scan16(self->maxOfMax, innerPageBegin, innerPageEnd, readVersion); } default: // GCOVR_EXCL_LINE __builtin_unreachable(); // GCOVR_EXCL_LINE diff --git a/Jenkinsfile b/Jenkinsfile index 096b1bb..cd71ed6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -59,6 +59,17 @@ pipeline { CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON") } } + stage('32-bit versions') { + agent { + dockerfile { + args '-v /home/jenkins/ccache:/ccache' + reuseNode true + } + } + steps { + CleanBuildAndTest("-DUSE_32_BIT_VERSIONS=ON") + } + } stage('Release [gcc]') { agent { dockerfile {