Compare commits
2 Commits
e7719b6e0b
...
9f5a68e2c0
Author | SHA1 | Date | |
---|---|---|---|
9f5a68e2c0 | |||
dfbb3ce5f1 |
@@ -374,10 +374,8 @@ inline void Node16::copyChildrenAndKeyFrom(const Node48 &other) {
|
|||||||
other.bitSet.forEachInRange(
|
other.bitSet.forEachInRange(
|
||||||
[&](int c) {
|
[&](int c) {
|
||||||
// Suppress a false positive -Waggressive-loop-optimizations warning
|
// Suppress a false positive -Waggressive-loop-optimizations warning
|
||||||
// in gcc. `assume` doesn't work for some reason.
|
// in gcc
|
||||||
if (!(i < Node16::kMaxNodes)) {
|
assume(i < Node16::kMaxNodes);
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
||||||
}
|
|
||||||
index[i] = c;
|
index[i] = c;
|
||||||
children[i] = other.children[other.index[c]];
|
children[i] = other.children[other.index[c]];
|
||||||
assert(children[i].child->parent == &other);
|
assert(children[i].child->parent == &other);
|
||||||
@@ -430,10 +428,8 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
|
|||||||
bitSet.forEachInRange(
|
bitSet.forEachInRange(
|
||||||
[&](int c) {
|
[&](int c) {
|
||||||
// Suppress a false positive -Waggressive-loop-optimizations warning
|
// Suppress a false positive -Waggressive-loop-optimizations warning
|
||||||
// in gcc. `assume` doesn't work for some reason.
|
// in gcc.
|
||||||
if (!(i < Node48::kMaxNodes)) {
|
assume(i < Node48::kMaxNodes);
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
||||||
}
|
|
||||||
index[c] = i;
|
index[c] = i;
|
||||||
children[i] = other.children[c];
|
children[i] = other.children[c];
|
||||||
assert(children[i].child->parent == &other);
|
assert(children[i].child->parent == &other);
|
||||||
@@ -603,6 +599,21 @@ struct NodeAllocators {
|
|||||||
|
|
||||||
template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
||||||
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
||||||
|
|
||||||
|
// cachegrind says the plain loop is fewer instructions and more mis-predicted
|
||||||
|
// branches. Microbenchmark says plain loop is faster. It's written in this
|
||||||
|
// weird "generic" way though in case someday we can use the simd
|
||||||
|
// implementation easily if we want.
|
||||||
|
if constexpr (std::is_same_v<NodeT, Node3>) {
|
||||||
|
Node3 *n = (Node3 *)self;
|
||||||
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
|
if (n->index[i] == index) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
||||||
|
|
||||||
@@ -725,6 +736,21 @@ Node *getChild(Node *self, uint8_t index) {
|
|||||||
|
|
||||||
template <class NodeT> int getChildGeqSimd(NodeT *self, int child) {
|
template <class NodeT> int getChildGeqSimd(NodeT *self, int child) {
|
||||||
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
||||||
|
|
||||||
|
// cachegrind says the plain loop is fewer instructions and more mis-predicted
|
||||||
|
// branches. Microbenchmark says plain loop is faster. It's written in this
|
||||||
|
// weird "generic" way though in case someday we can use the simd
|
||||||
|
// implementation easily if we want.
|
||||||
|
if constexpr (std::is_same_v<NodeT, Node3>) {
|
||||||
|
Node3 *n = (Node3 *)self;
|
||||||
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
|
if (n->index[i] >= child) {
|
||||||
|
return n->index[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
__m128i key_vec = _mm_set1_epi8(child);
|
__m128i key_vec = _mm_set1_epi8(child);
|
||||||
__m128i indices;
|
__m128i indices;
|
||||||
|
Reference in New Issue
Block a user