Speed up sparse queries
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap: Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good

This commit is contained in:
2024-02-25 23:08:01 -08:00
parent 9fcfc44dc3
commit c97c7eee8e

View File

@@ -41,8 +41,6 @@ limitations under the License.
// ==================== BEGIN IMPLEMENTATION ====================
constexpr int kSparseScanThreshold = 32;
struct Entry {
int64_t pointVersion;
int64_t rangeVersion;
@@ -104,21 +102,19 @@ struct BitSet {
// this approach
if ((begin >> 6) == (end >> 6)) {
{
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
~(uint64_t(-1) << (end & 63));
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
~(uint64_t(-1) << (end & 63));
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
return;
}
// Check begin partial word
{
if (begin & 63) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
while (word) {
uint64_t temp = word & -word;
@@ -126,27 +122,34 @@ struct BitSet {
f(index);
word ^= temp;
}
begin &= ~63;
begin += 64;
}
// Check inner, full words
begin += 64;
while ((begin >> 6) != (end >> 6)) {
while (begin != (end & ~63)) {
uint64_t word = words[begin >> 6];
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
if (word == uint64_t(-1)) {
for (int i = 0; i < 64; ++i) {
f(begin + i);
}
} else {
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
}
begin += 64;
}
if (end < 256) {
if (end & 63) {
// Check end partial word
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
@@ -449,33 +452,19 @@ void setChildrenParents(Node16 *n) {
}
void setChildrenParents(Node48 *n) {
if (n->numChildren < kSparseScanThreshold) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
i = n->bitSet.firstSetGeq(i + 1)) {
n->children[n->index[i]].child->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
int c = n->index[i];
if (c != -1) {
n->children[c].child->parent = n;
}
for (int i = 0; i < 256; ++i) {
int c = n->index[i];
if (c != -1) {
n->children[c].child->parent = n;
}
}
}
void setChildrenParents(Node256 *n) {
if (n->numChildren < kSparseScanThreshold) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
i = n->bitSet.firstSetGeq(i + 1)) {
n->children[i].child->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
auto *child = n->children[i].child;
if (child != nullptr) {
child->parent = n;
}
for (int i = 0; i < 256; ++i) {
auto *child = n->children[i].child;
if (child != nullptr) {
child->parent = n;
}
}
}
@@ -979,6 +968,9 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
if (child->entryPresent) {
result = std::max(result, child->entry.rangeVersion);
}
begin = c;
} else {
return result;
}
}
switch (n->type) {
@@ -987,7 +979,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
case Type::Node16: {
auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin < self->index[i]) {
if (begin <= self->index[i]) {
result = std::max(result, self->children[i].childMaxVersion);
}
}
@@ -995,38 +987,21 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
}
case Type::Node48: {
auto *self = static_cast<Node48 *>(n);
if (self->numChildren < kSparseScanThreshold) {
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
i = self->bitSet.firstSetGeq(i + 1)) {
if (self->index[i] != -1) {
self->bitSet.forEachInRange(
[&](int i) {
result =
std::max(result, self->children[self->index[i]].childMaxVersion);
}
}
} else {
for (int i = begin + 1; i < end; ++i) {
if (self->index[i] != -1) {
result =
std::max(result, self->children[self->index[i]].childMaxVersion);
}
}
}
},
begin, end);
break;
}
case Type::Node256: {
auto *self = static_cast<Node256 *>(n);
if (self->numChildren < kSparseScanThreshold) {
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
i = self->bitSet.firstSetGeq(i + 1)) {
result = std::max(result, self->children[i].childMaxVersion);
}
} else {
for (int i = begin + 1; i < end; ++i) {
if (self->children[i].child != nullptr) {
self->bitSet.forEachInRange(
[&](int i) {
result = std::max(result, self->children[i].childMaxVersion);
}
}
}
},
begin, end);
break;
}
case Type::Invalid:
@@ -2193,8 +2168,21 @@ void printTree() {
debugPrintDot(stdout, cs.root, &cs);
}
#define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h"
int main(void) {
printTree();
ankerl::nanobench::Bench bench;
ConflictSet::Impl cs{0};
for (int j = 0; j < 256; ++j) {
getOrCreateChild(cs.root, j, &cs.allocators) =
cs.allocators.node4.allocate();
if (j % 10 == 0) {
bench.run("MaxExclusive " + std::to_string(j), [&]() {
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
});
}
}
return 0;
}
#endif