diff --git a/CMakeLists.txt b/CMakeLists.txt index 05ece1f..a4dd4a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_library(${PROJECT_NAME} SHARED ConflictSet.cpp) target_compile_options(${PROJECT_NAME} PRIVATE -fPIC -fno-exceptions -fvisibility=hidden) target_link_options(${PROJECT_NAME} PRIVATE $<$>: - -nodefaultlibs -lc>) + -nodefaultlibs -lc -lgcc_s>) if(NOT APPLE) target_link_options(${PROJECT_NAME} PRIVATE diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 114d515..2f7a538 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -64,17 +64,7 @@ struct Node16 : Node { Node16() { this->type = Type::Node16; } }; -struct Node48 : Node { - int8_t nextFree = 0; - int8_t index[256]; - Node *children[48] = {}; - Node48() { - this->type = Type::Node48; - memset(index, -1, 256); - } -}; - -struct PointerSet { +struct BitSet { bool test(int i) const { assert(0 <= i); assert(i < 256); @@ -132,8 +122,19 @@ private: __uint128_t hi = 0; }; +struct Node48 : Node { + BitSet bitSet; + Node *children[48] = {}; + int8_t nextFree = 0; + int8_t index[256]; + Node48() { + this->type = Type::Node48; + memset(index, -1, 256); + } +}; + struct Node256 : Node { - PointerSet pointerSet; + BitSet bitSet; Node *children[256] = {}; Node256() { this->type = Type::Node256; } }; @@ -205,54 +206,6 @@ int getNodeIndex(Node16 *self, uint8_t index) { #endif } -#ifdef HAS_AVX -int firstNonNeg1(const int8_t x[16]) { - __m128i key_vec = _mm_set1_epi8(-1); - __m128i indices; - memcpy(&indices, x, 16); - __m128i results = _mm_cmpeq_epi8(key_vec, indices); - uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff; - if (bitfield == 0) - return -1; - return std::countr_zero(bitfield); -} - -int lastNonNeg1(const int8_t x[16]) { - __m128i key_vec = _mm_set1_epi8(-1); - __m128i indices; - memcpy(&indices, x, 16); - __m128i results = _mm_cmpeq_epi8(key_vec, indices); - uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff; - if (bitfield == 0) - return -1; - return 31 - std::countl_zero(bitfield); -} -#endif - -#ifdef HAS_ARM_NEON -int firstNonNeg1(const int8_t x[16]) { - uint8x16_t indices; - memcpy(&indices, x, 16); - uint16x8_t results = vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(-1), indices)); - uint64_t bitfield = - ~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0); - if (bitfield == 0) - return -1; - return std::countr_zero(bitfield) / 4; -} - -int lastNonNeg1(const int8_t x[16]) { - uint8x16_t indices; - memcpy(&indices, x, 16); - uint16x8_t results = vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(-1), indices)); - uint64_t bitfield = - ~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0); - if (bitfield == 0) - return -1; - return 15 - std::countl_zero(bitfield) / 4; -} -#endif - [[maybe_unused]] Node *getChild(Node *self, uint8_t index) { if (self->type == Type::Node4) { auto *self4 = static_cast(self); @@ -372,36 +325,10 @@ int getChildGeq(Node *self, int child) { #endif } else if (self->type == Type::Node48) { auto *self48 = static_cast(self); -#if defined(HAS_AVX) || defined(HAS_ARM_NEON) - int i = child; - for (; (i & 0xf) != 0; ++i) { - if (self48->index[i] >= 0) { - assert(self48->children[self48->index[i]] != nullptr); - return i; - } - } - for (; i < 256; i += 16) { - auto result = firstNonNeg1(self48->index + i); - if (result != -1) { - return i + result; - } - } -#else - for (int i = child; i < 256; ++i) { - if (self48->index[i] >= 0) { - assert(self48->children[self48->index[i]] != nullptr); - return i; - } - } -#endif + return self48->bitSet.firstSetGeq(child); } else { auto *self256 = static_cast(self); -#ifndef NDEBUG - for (int i = 0; i < 256; ++i) { - assert(self256->pointerSet.test(i) == (self256->children[i] != nullptr)); - } -#endif - return self256->pointerSet.firstSetGeq(child); + return self256->bitSet.firstSetGeq(child); } return -1; } @@ -475,44 +402,10 @@ int getChildLeq(Node *self, int child) { #endif } else if (self->type == Type::Node48) { auto *self48 = static_cast(self); -#if defined(HAS_AVX) || defined(HAS_ARM_NEON) - int i = child; - if (i < 0) { - return -1; - } - for (; (i & 0xf) != 0; --i) { - if (self48->index[i] >= 0) { - assert(self48->children[self48->index[i]] != nullptr); - return i; - } - } - if (self48->index[i] >= 0) { - assert(self48->children[self48->index[i]] != nullptr); - return i; - } - i -= 16; - for (; i >= 0; i -= 16) { - auto result = lastNonNeg1(self48->index + i); - if (result != -1) { - return i + result; - } - } -#else - for (int i = child; i >= 0; --i) { - if (self48->index[i] >= 0) { - assert(self48->children[self48->index[i]] != nullptr); - return i; - } - } -#endif + return self48->bitSet.lastSetLeq(child); } else { auto *self256 = static_cast(self); -#ifndef NDEBUG - for (int i = 0; i < 256; ++i) { - assert(self256->pointerSet.test(i) == (self256->children[i] != nullptr)); - } -#endif - return self256->pointerSet.lastSetLeq(child); + return self256->bitSet.lastSetLeq(child); } return -1; } @@ -574,6 +467,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) { newSelf->nextFree = 16; int i = 0; for (auto x : self16->index) { + newSelf->bitSet.set(x); newSelf->children[i] = self16->children[i]; newSelf->index[x] = i; ++i; @@ -611,7 +505,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) { memcpy((void *)newSelf, self, offsetof(Node, type)); for (int i = 0; i < 256; ++i) { if (self48->index[i] >= 0) { - newSelf->pointerSet.set(i); + newSelf->bitSet.set(i); newSelf->children[i] = self48->children[self48->index[i]]; } } @@ -620,6 +514,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) { setChildrenParents(self); goto insert256; } else { + self48->bitSet.set(index); ++self->numChildren; assert(self48->nextFree < 48); self48->index[index] = self48->nextFree; @@ -632,7 +527,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) { if (!self256->children[index]) { ++self->numChildren; } - self256->pointerSet.set(index); + self256->bitSet.set(index); return self256->children[index]; } } @@ -1174,8 +1069,8 @@ void printTree() { } int main(void) { - // bench(); - printTree(); + bench(); + // printTree(); return 0; } #endif diff --git a/test_symbols.sh b/test_symbols.sh index 386ad44..f3d7865 100755 --- a/test_symbols.sh +++ b/test_symbols.sh @@ -3,4 +3,4 @@ set -euo pipefail diff -u "$2" <(nm "$1" | grep " T " | cut -f3 -d " " | sort) -nm "$1" | grep " U " | (! grep -Pv 'abort|free|malloc|mem[a-z]*') +nm "$1" | grep " U " | (! grep -Pv 'abort|free|malloc|mem[a-z]*|__ashlti3')