Use bitset index for Node48 too
This commit is contained in:
151
ConflictSet.cpp
151
ConflictSet.cpp
@@ -64,17 +64,7 @@ struct Node16 : Node {
|
||||
Node16() { this->type = Type::Node16; }
|
||||
};
|
||||
|
||||
struct Node48 : Node {
|
||||
int8_t nextFree = 0;
|
||||
int8_t index[256];
|
||||
Node *children[48] = {};
|
||||
Node48() {
|
||||
this->type = Type::Node48;
|
||||
memset(index, -1, 256);
|
||||
}
|
||||
};
|
||||
|
||||
struct PointerSet {
|
||||
struct BitSet {
|
||||
bool test(int i) const {
|
||||
assert(0 <= i);
|
||||
assert(i < 256);
|
||||
@@ -132,8 +122,19 @@ private:
|
||||
__uint128_t hi = 0;
|
||||
};
|
||||
|
||||
struct Node48 : Node {
|
||||
BitSet bitSet;
|
||||
Node *children[48] = {};
|
||||
int8_t nextFree = 0;
|
||||
int8_t index[256];
|
||||
Node48() {
|
||||
this->type = Type::Node48;
|
||||
memset(index, -1, 256);
|
||||
}
|
||||
};
|
||||
|
||||
struct Node256 : Node {
|
||||
PointerSet pointerSet;
|
||||
BitSet bitSet;
|
||||
Node *children[256] = {};
|
||||
Node256() { this->type = Type::Node256; }
|
||||
};
|
||||
@@ -205,54 +206,6 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HAS_AVX
|
||||
int firstNonNeg1(const int8_t x[16]) {
|
||||
__m128i key_vec = _mm_set1_epi8(-1);
|
||||
__m128i indices;
|
||||
memcpy(&indices, x, 16);
|
||||
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
|
||||
if (bitfield == 0)
|
||||
return -1;
|
||||
return std::countr_zero(bitfield);
|
||||
}
|
||||
|
||||
int lastNonNeg1(const int8_t x[16]) {
|
||||
__m128i key_vec = _mm_set1_epi8(-1);
|
||||
__m128i indices;
|
||||
memcpy(&indices, x, 16);
|
||||
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||
uint32_t bitfield = _mm_movemask_epi8(results) ^ 0xffff;
|
||||
if (bitfield == 0)
|
||||
return -1;
|
||||
return 31 - std::countl_zero(bitfield);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAS_ARM_NEON
|
||||
int firstNonNeg1(const int8_t x[16]) {
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, x, 16);
|
||||
uint16x8_t results = vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(-1), indices));
|
||||
uint64_t bitfield =
|
||||
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
||||
if (bitfield == 0)
|
||||
return -1;
|
||||
return std::countr_zero(bitfield) / 4;
|
||||
}
|
||||
|
||||
int lastNonNeg1(const int8_t x[16]) {
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, x, 16);
|
||||
uint16x8_t results = vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(-1), indices));
|
||||
uint64_t bitfield =
|
||||
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
||||
if (bitfield == 0)
|
||||
return -1;
|
||||
return 15 - std::countl_zero(bitfield) / 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
[[maybe_unused]] Node *getChild(Node *self, uint8_t index) {
|
||||
if (self->type == Type::Node4) {
|
||||
auto *self4 = static_cast<Node4 *>(self);
|
||||
@@ -372,36 +325,10 @@ int getChildGeq(Node *self, int child) {
|
||||
#endif
|
||||
} else if (self->type == Type::Node48) {
|
||||
auto *self48 = static_cast<Node48 *>(self);
|
||||
#if defined(HAS_AVX) || defined(HAS_ARM_NEON)
|
||||
int i = child;
|
||||
for (; (i & 0xf) != 0; ++i) {
|
||||
if (self48->index[i] >= 0) {
|
||||
assert(self48->children[self48->index[i]] != nullptr);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
for (; i < 256; i += 16) {
|
||||
auto result = firstNonNeg1(self48->index + i);
|
||||
if (result != -1) {
|
||||
return i + result;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = child; i < 256; ++i) {
|
||||
if (self48->index[i] >= 0) {
|
||||
assert(self48->children[self48->index[i]] != nullptr);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return self48->bitSet.firstSetGeq(child);
|
||||
} else {
|
||||
auto *self256 = static_cast<Node256 *>(self);
|
||||
#ifndef NDEBUG
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
assert(self256->pointerSet.test(i) == (self256->children[i] != nullptr));
|
||||
}
|
||||
#endif
|
||||
return self256->pointerSet.firstSetGeq(child);
|
||||
return self256->bitSet.firstSetGeq(child);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -475,44 +402,10 @@ int getChildLeq(Node *self, int child) {
|
||||
#endif
|
||||
} else if (self->type == Type::Node48) {
|
||||
auto *self48 = static_cast<Node48 *>(self);
|
||||
#if defined(HAS_AVX) || defined(HAS_ARM_NEON)
|
||||
int i = child;
|
||||
if (i < 0) {
|
||||
return -1;
|
||||
}
|
||||
for (; (i & 0xf) != 0; --i) {
|
||||
if (self48->index[i] >= 0) {
|
||||
assert(self48->children[self48->index[i]] != nullptr);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (self48->index[i] >= 0) {
|
||||
assert(self48->children[self48->index[i]] != nullptr);
|
||||
return i;
|
||||
}
|
||||
i -= 16;
|
||||
for (; i >= 0; i -= 16) {
|
||||
auto result = lastNonNeg1(self48->index + i);
|
||||
if (result != -1) {
|
||||
return i + result;
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = child; i >= 0; --i) {
|
||||
if (self48->index[i] >= 0) {
|
||||
assert(self48->children[self48->index[i]] != nullptr);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return self48->bitSet.lastSetLeq(child);
|
||||
} else {
|
||||
auto *self256 = static_cast<Node256 *>(self);
|
||||
#ifndef NDEBUG
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
assert(self256->pointerSet.test(i) == (self256->children[i] != nullptr));
|
||||
}
|
||||
#endif
|
||||
return self256->pointerSet.lastSetLeq(child);
|
||||
return self256->bitSet.lastSetLeq(child);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@@ -574,6 +467,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
newSelf->nextFree = 16;
|
||||
int i = 0;
|
||||
for (auto x : self16->index) {
|
||||
newSelf->bitSet.set(x);
|
||||
newSelf->children[i] = self16->children[i];
|
||||
newSelf->index[x] = i;
|
||||
++i;
|
||||
@@ -611,7 +505,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
memcpy((void *)newSelf, self, offsetof(Node, type));
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
if (self48->index[i] >= 0) {
|
||||
newSelf->pointerSet.set(i);
|
||||
newSelf->bitSet.set(i);
|
||||
newSelf->children[i] = self48->children[self48->index[i]];
|
||||
}
|
||||
}
|
||||
@@ -620,6 +514,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
setChildrenParents(self);
|
||||
goto insert256;
|
||||
} else {
|
||||
self48->bitSet.set(index);
|
||||
++self->numChildren;
|
||||
assert(self48->nextFree < 48);
|
||||
self48->index[index] = self48->nextFree;
|
||||
@@ -632,7 +527,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index) {
|
||||
if (!self256->children[index]) {
|
||||
++self->numChildren;
|
||||
}
|
||||
self256->pointerSet.set(index);
|
||||
self256->bitSet.set(index);
|
||||
return self256->children[index];
|
||||
}
|
||||
}
|
||||
@@ -1174,8 +1069,8 @@ void printTree() {
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
// bench();
|
||||
printTree();
|
||||
bench();
|
||||
// printTree();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user