diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 7deb0f8..6575860 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -1719,6 +1719,9 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end, assume(-1 <= end); assume(end <= 256); assume(begin < end); + + assert(!(begin == -1 && end == 256)); + { int c = getChildGeq(n, begin + 1); if (c >= 0 && c < end) { @@ -1755,17 +1758,35 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end, } break; case Type_Node16: { auto *self = static_cast(n); + +#ifdef HAS_ARM_NEON + assert(end - begin < 256); + uint8x16_t indices; + memcpy(&indices, self->index, sizeof(self->index)); + // 0xff for each in bounds + auto results = + vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin)); + // 0xf for each 0xff + uint64_t mask = vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0); + + uint64_t compared = 0; + for (int i = 0; i < 16; ++i) { + compared |= + (uint64_t(self->childMaxVersion[i] > readVersion) << (i << 2)); + } + return !(compared & mask); +#else uint32_t compared = 0; for (int i = 0; i < 16; ++i) { compared |= ((self->childMaxVersion[i] > readVersion) << i); } - uint32_t mask = 0; for (int i = 0; i < 16; ++i) { mask |= inBounds(self->index[i]) << i; } - return !(compared & mask); +#endif } break; case Type_Node48: { auto *self = static_cast(n);