Speed up sparse queries
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap: Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good

This commit is contained in:
2024-02-25 23:08:01 -08:00
parent 9fcfc44dc3
commit c97c7eee8e

View File

@@ -41,8 +41,6 @@ limitations under the License.
// ==================== BEGIN IMPLEMENTATION ==================== // ==================== BEGIN IMPLEMENTATION ====================
constexpr int kSparseScanThreshold = 32;
struct Entry { struct Entry {
int64_t pointVersion; int64_t pointVersion;
int64_t rangeVersion; int64_t rangeVersion;
@@ -104,21 +102,19 @@ struct BitSet {
// this approach // this approach
if ((begin >> 6) == (end >> 6)) { if ((begin >> 6) == (end >> 6)) {
{ uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) & ~(uint64_t(-1) << (end & 63));
~(uint64_t(-1) << (end & 63)); while (word) {
while (word) { uint64_t temp = word & -word;
uint64_t temp = word & -word; int index = (begin & ~63) + std::countr_zero(word);
int index = (begin & ~63) + std::countr_zero(word); f(index);
f(index); word ^= temp;
word ^= temp;
}
} }
return; return;
} }
// Check begin partial word // Check begin partial word
{ if (begin & 63) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)); uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
while (word) { while (word) {
uint64_t temp = word & -word; uint64_t temp = word & -word;
@@ -126,27 +122,34 @@ struct BitSet {
f(index); f(index);
word ^= temp; word ^= temp;
} }
begin &= ~63;
begin += 64;
} }
// Check inner, full words // Check inner, full words
begin += 64; while (begin != (end & ~63)) {
while ((begin >> 6) != (end >> 6)) {
uint64_t word = words[begin >> 6]; uint64_t word = words[begin >> 6];
while (word) { if (word == uint64_t(-1)) {
uint64_t temp = word & -word; for (int i = 0; i < 64; ++i) {
int index = (begin & ~63) + std::countr_zero(word); f(begin + i);
f(index); }
word ^= temp; } else {
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
} }
begin += 64; begin += 64;
} }
if (end < 256) { if (end & 63) {
// Check end partial word // Check end partial word
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63)); uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
while (word) { while (word) {
uint64_t temp = word & -word; uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word); int index = begin + std::countr_zero(word);
f(index); f(index);
word ^= temp; word ^= temp;
} }
@@ -449,33 +452,19 @@ void setChildrenParents(Node16 *n) {
} }
void setChildrenParents(Node48 *n) { void setChildrenParents(Node48 *n) {
if (n->numChildren < kSparseScanThreshold) { for (int i = 0; i < 256; ++i) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0; int c = n->index[i];
i = n->bitSet.firstSetGeq(i + 1)) { if (c != -1) {
n->children[n->index[i]].child->parent = n; n->children[c].child->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
int c = n->index[i];
if (c != -1) {
n->children[c].child->parent = n;
}
} }
} }
} }
void setChildrenParents(Node256 *n) { void setChildrenParents(Node256 *n) {
if (n->numChildren < kSparseScanThreshold) { for (int i = 0; i < 256; ++i) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0; auto *child = n->children[i].child;
i = n->bitSet.firstSetGeq(i + 1)) { if (child != nullptr) {
n->children[i].child->parent = n; child->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
auto *child = n->children[i].child;
if (child != nullptr) {
child->parent = n;
}
} }
} }
} }
@@ -979,6 +968,9 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
if (child->entryPresent) { if (child->entryPresent) {
result = std::max(result, child->entry.rangeVersion); result = std::max(result, child->entry.rangeVersion);
} }
begin = c;
} else {
return result;
} }
} }
switch (n->type) { switch (n->type) {
@@ -987,7 +979,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
case Type::Node16: { case Type::Node16: {
auto *self = static_cast<Node16 *>(n); auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) { for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin < self->index[i]) { if (begin <= self->index[i]) {
result = std::max(result, self->children[i].childMaxVersion); result = std::max(result, self->children[i].childMaxVersion);
} }
} }
@@ -995,38 +987,21 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
case Type::Node48: { case Type::Node48: {
auto *self = static_cast<Node48 *>(n); auto *self = static_cast<Node48 *>(n);
if (self->numChildren < kSparseScanThreshold) { self->bitSet.forEachInRange(
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; [&](int i) {
i = self->bitSet.firstSetGeq(i + 1)) {
if (self->index[i] != -1) {
result = result =
std::max(result, self->children[self->index[i]].childMaxVersion); std::max(result, self->children[self->index[i]].childMaxVersion);
} },
} begin, end);
} else {
for (int i = begin + 1; i < end; ++i) {
if (self->index[i] != -1) {
result =
std::max(result, self->children[self->index[i]].childMaxVersion);
}
}
}
break; break;
} }
case Type::Node256: { case Type::Node256: {
auto *self = static_cast<Node256 *>(n); auto *self = static_cast<Node256 *>(n);
if (self->numChildren < kSparseScanThreshold) { self->bitSet.forEachInRange(
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; [&](int i) {
i = self->bitSet.firstSetGeq(i + 1)) {
result = std::max(result, self->children[i].childMaxVersion);
}
} else {
for (int i = begin + 1; i < end; ++i) {
if (self->children[i].child != nullptr) {
result = std::max(result, self->children[i].childMaxVersion); result = std::max(result, self->children[i].childMaxVersion);
} },
} begin, end);
}
break; break;
} }
case Type::Invalid: case Type::Invalid:
@@ -2193,8 +2168,21 @@ void printTree() {
debugPrintDot(stdout, cs.root, &cs); debugPrintDot(stdout, cs.root, &cs);
} }
#define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h"
int main(void) { int main(void) {
printTree(); ankerl::nanobench::Bench bench;
ConflictSet::Impl cs{0};
for (int j = 0; j < 256; ++j) {
getOrCreateChild(cs.root, j, &cs.allocators) =
cs.allocators.node4.allocate();
if (j % 10 == 0) {
bench.run("MaxExclusive " + std::to_string(j), [&]() {
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
});
}
}
return 0; return 0;
} }
#endif #endif