diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 6575860..5f6f5fb 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -293,7 +293,7 @@ struct Node48 : Node { Node *children[kMaxNodes]; int64_t childMaxVersion[kMaxNodes]; uint8_t reverseIndex[kMaxNodes]; - constexpr static int kMaxOfMaxPageSize = 8; + constexpr static int kMaxOfMaxPageSize = 16; constexpr static int kMaxOfMaxShift = std::countr_zero(uint32_t(kMaxOfMaxPageSize)); constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize; @@ -1709,6 +1709,47 @@ downLeftSpine: } } +// Returns true if all in-bounds vs are <= readVersion +bool scan16(const int64_t *vs, const uint8_t *is, int begin, int end, + int64_t readVersion) { + +#ifdef HAS_ARM_NEON + + assert(end - begin < 256); + uint8x16_t indices; + memcpy(&indices, is, 16); + // 0xff for each in bounds + auto results = + vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin)); + // 0xf for each 0xff + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0); + + uint64_t compared = 0; + for (int i = 0; i < 16; ++i) { + compared |= (uint64_t(vs[i] > readVersion) << (i << 2)); + } + return !(compared & mask); + +#else + + const unsigned shiftUpperBound = end - begin; + const unsigned shiftAmount = begin; + auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; }; + + uint32_t compared = 0; + for (int i = 0; i < 16; ++i) { + compared |= ((vs[i] > readVersion) << i); + } + uint32_t mask = 0; + for (int i = 0; i < 16; ++i) { + mask |= inBounds(is[i]) << i; + } + return !(compared & mask); + +#endif +} + // Return whether or not the max version among all keys starting with the search // path of n + [child], where child in (begin, end) is <= readVersion. Does not // account for the range version of firstGt(searchpath(n) + [end - 1]) @@ -1759,49 +1800,23 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end, case Type_Node16: { auto *self = static_cast(n); -#ifdef HAS_ARM_NEON - assert(end - begin < 256); - uint8x16_t indices; - memcpy(&indices, self->index, sizeof(self->index)); - // 0xff for each in bounds - auto results = - vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin)); - // 0xf for each 0xff - uint64_t mask = vget_lane_u64( - vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0); + return scan16(self->childMaxVersion, self->index, begin, end, readVersion); - uint64_t compared = 0; - for (int i = 0; i < 16; ++i) { - compared |= - (uint64_t(self->childMaxVersion[i] > readVersion) << (i << 2)); - } - return !(compared & mask); -#else - uint32_t compared = 0; - for (int i = 0; i < 16; ++i) { - compared |= ((self->childMaxVersion[i] > readVersion) << i); - } - uint32_t mask = 0; - for (int i = 0; i < 16; ++i) { - mask |= inBounds(self->index[i]) << i; - } - return !(compared & mask); -#endif } break; case Type_Node48: { auto *self = static_cast(n); // Check all pages - bool result = true; + static_assert(Node48::kMaxOfMaxPageSize == 16); for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) { if (self->maxOfMax[i] > readVersion) { - for (int j = 0; j < Node48::kMaxOfMaxPageSize; ++j) { - int k = (i << Node48::kMaxOfMaxShift) + j; - result &= !((self->childMaxVersion[k] > readVersion) & - inBounds(self->reverseIndex[k])); + if (!scan16(self->childMaxVersion + (i << Node48::kMaxOfMaxShift), + self->reverseIndex + (i << Node48::kMaxOfMaxShift), begin, + end, readVersion)) { + return false; } } } - return result; + return true; } case Type_Node256: { auto *self = static_cast(n);