Speed up sparse queries
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good
This commit is contained in:
@@ -41,8 +41,6 @@ limitations under the License.
|
||||
|
||||
// ==================== BEGIN IMPLEMENTATION ====================
|
||||
|
||||
constexpr int kSparseScanThreshold = 32;
|
||||
|
||||
struct Entry {
|
||||
int64_t pointVersion;
|
||||
int64_t rangeVersion;
|
||||
@@ -104,7 +102,6 @@ struct BitSet {
|
||||
// this approach
|
||||
|
||||
if ((begin >> 6) == (end >> 6)) {
|
||||
{
|
||||
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
|
||||
~(uint64_t(-1) << (end & 63));
|
||||
while (word) {
|
||||
@@ -113,12 +110,11 @@ struct BitSet {
|
||||
f(index);
|
||||
word ^= temp;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Check begin partial word
|
||||
{
|
||||
if (begin & 63) {
|
||||
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
|
||||
while (word) {
|
||||
uint64_t temp = word & -word;
|
||||
@@ -126,27 +122,34 @@ struct BitSet {
|
||||
f(index);
|
||||
word ^= temp;
|
||||
}
|
||||
begin &= ~63;
|
||||
begin += 64;
|
||||
}
|
||||
|
||||
// Check inner, full words
|
||||
begin += 64;
|
||||
while ((begin >> 6) != (end >> 6)) {
|
||||
while (begin != (end & ~63)) {
|
||||
uint64_t word = words[begin >> 6];
|
||||
if (word == uint64_t(-1)) {
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
f(begin + i);
|
||||
}
|
||||
} else {
|
||||
while (word) {
|
||||
uint64_t temp = word & -word;
|
||||
int index = (begin & ~63) + std::countr_zero(word);
|
||||
int index = begin + std::countr_zero(word);
|
||||
f(index);
|
||||
word ^= temp;
|
||||
}
|
||||
}
|
||||
begin += 64;
|
||||
}
|
||||
|
||||
if (end < 256) {
|
||||
if (end & 63) {
|
||||
// Check end partial word
|
||||
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
|
||||
while (word) {
|
||||
uint64_t temp = word & -word;
|
||||
int index = (begin & ~63) + std::countr_zero(word);
|
||||
int index = begin + std::countr_zero(word);
|
||||
f(index);
|
||||
word ^= temp;
|
||||
}
|
||||
@@ -449,12 +452,6 @@ void setChildrenParents(Node16 *n) {
|
||||
}
|
||||
|
||||
void setChildrenParents(Node48 *n) {
|
||||
if (n->numChildren < kSparseScanThreshold) {
|
||||
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
|
||||
i = n->bitSet.firstSetGeq(i + 1)) {
|
||||
n->children[n->index[i]].child->parent = n;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
int c = n->index[i];
|
||||
if (c != -1) {
|
||||
@@ -462,15 +459,8 @@ void setChildrenParents(Node48 *n) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void setChildrenParents(Node256 *n) {
|
||||
if (n->numChildren < kSparseScanThreshold) {
|
||||
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
|
||||
i = n->bitSet.firstSetGeq(i + 1)) {
|
||||
n->children[i].child->parent = n;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
auto *child = n->children[i].child;
|
||||
if (child != nullptr) {
|
||||
@@ -478,7 +468,6 @@ void setChildrenParents(Node256 *n) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Caller is responsible for assigning a non-null pointer to the returned
|
||||
// reference if null
|
||||
@@ -979,6 +968,9 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
||||
if (child->entryPresent) {
|
||||
result = std::max(result, child->entry.rangeVersion);
|
||||
}
|
||||
begin = c;
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
switch (n->type) {
|
||||
@@ -987,7 +979,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
||||
case Type::Node16: {
|
||||
auto *self = static_cast<Node16 *>(n);
|
||||
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
||||
if (begin < self->index[i]) {
|
||||
if (begin <= self->index[i]) {
|
||||
result = std::max(result, self->children[i].childMaxVersion);
|
||||
}
|
||||
}
|
||||
@@ -995,38 +987,21 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
||||
}
|
||||
case Type::Node48: {
|
||||
auto *self = static_cast<Node48 *>(n);
|
||||
if (self->numChildren < kSparseScanThreshold) {
|
||||
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
|
||||
i = self->bitSet.firstSetGeq(i + 1)) {
|
||||
if (self->index[i] != -1) {
|
||||
self->bitSet.forEachInRange(
|
||||
[&](int i) {
|
||||
result =
|
||||
std::max(result, self->children[self->index[i]].childMaxVersion);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = begin + 1; i < end; ++i) {
|
||||
if (self->index[i] != -1) {
|
||||
result =
|
||||
std::max(result, self->children[self->index[i]].childMaxVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
begin, end);
|
||||
break;
|
||||
}
|
||||
case Type::Node256: {
|
||||
auto *self = static_cast<Node256 *>(n);
|
||||
if (self->numChildren < kSparseScanThreshold) {
|
||||
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
|
||||
i = self->bitSet.firstSetGeq(i + 1)) {
|
||||
self->bitSet.forEachInRange(
|
||||
[&](int i) {
|
||||
result = std::max(result, self->children[i].childMaxVersion);
|
||||
}
|
||||
} else {
|
||||
for (int i = begin + 1; i < end; ++i) {
|
||||
if (self->children[i].child != nullptr) {
|
||||
result = std::max(result, self->children[i].childMaxVersion);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
begin, end);
|
||||
break;
|
||||
}
|
||||
case Type::Invalid:
|
||||
@@ -2193,8 +2168,21 @@ void printTree() {
|
||||
debugPrintDot(stdout, cs.root, &cs);
|
||||
}
|
||||
|
||||
#define ANKERL_NANOBENCH_IMPLEMENT
|
||||
#include "third_party/nanobench.h"
|
||||
|
||||
int main(void) {
|
||||
printTree();
|
||||
ankerl::nanobench::Bench bench;
|
||||
ConflictSet::Impl cs{0};
|
||||
for (int j = 0; j < 256; ++j) {
|
||||
getOrCreateChild(cs.root, j, &cs.allocators) =
|
||||
cs.allocators.node4.allocate();
|
||||
if (j % 10 == 0) {
|
||||
bench.run("MaxExclusive " + std::to_string(j), [&]() {
|
||||
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
|
||||
});
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user