Use plain loop for Node3
All checks were successful
Tests / Clang total: 932, passed: 932
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|3|0|3|0|:zzz:
Tests / Release [gcc] total: 932, passed: 932
Tests / Release [gcc,aarch64] total: 931, passed: 931
Tests / Coverage total: 930, passed: 930
weaselab/conflict-set/pipeline/head This commit looks good
All checks were successful
Tests / Clang total: 932, passed: 932
Clang |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|3|0|3|0|:zzz:
Tests / Release [gcc] total: 932, passed: 932
Tests / Release [gcc,aarch64] total: 931, passed: 931
Tests / Coverage total: 930, passed: 930
weaselab/conflict-set/pipeline/head This commit looks good
This commit is contained in:
@@ -599,6 +599,21 @@ struct NodeAllocators {
|
||||
|
||||
template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
||||
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
||||
|
||||
// cachegrind says the plain loop is fewer instructions and more mis-predicted
|
||||
// branches. Microbenchmark says plain loop is faster. It's written in this
|
||||
// weird "generic" way though in case someday we can use the simd
|
||||
// implementation easily if we want.
|
||||
if constexpr (std::is_same_v<NodeT, Node3>) {
|
||||
Node3 *n = (Node3 *)self;
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] == index) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef HAS_AVX
|
||||
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
||||
|
||||
@@ -721,6 +736,21 @@ Node *getChild(Node *self, uint8_t index) {
|
||||
|
||||
template <class NodeT> int getChildGeqSimd(NodeT *self, int child) {
|
||||
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
||||
|
||||
// cachegrind says the plain loop is fewer instructions and more mis-predicted
|
||||
// branches. Microbenchmark says plain loop is faster. It's written in this
|
||||
// weird "generic" way though in case someday we can use the simd
|
||||
// implementation easily if we want.
|
||||
if constexpr (std::is_same_v<NodeT, Node3>) {
|
||||
Node3 *n = (Node3 *)self;
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] >= child) {
|
||||
return n->index[i];
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef HAS_AVX
|
||||
__m128i key_vec = _mm_set1_epi8(child);
|
||||
__m128i indices;
|
||||
|
Reference in New Issue
Block a user