Speed up sparse queries
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend
|:-:|:-:|:-:|:-:|:-:
|0|0|0|0|:clap:
Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good
This commit is contained in:
128
ConflictSet.cpp
128
ConflictSet.cpp
@@ -41,8 +41,6 @@ limitations under the License.
|
|||||||
|
|
||||||
// ==================== BEGIN IMPLEMENTATION ====================
|
// ==================== BEGIN IMPLEMENTATION ====================
|
||||||
|
|
||||||
constexpr int kSparseScanThreshold = 32;
|
|
||||||
|
|
||||||
struct Entry {
|
struct Entry {
|
||||||
int64_t pointVersion;
|
int64_t pointVersion;
|
||||||
int64_t rangeVersion;
|
int64_t rangeVersion;
|
||||||
@@ -104,21 +102,19 @@ struct BitSet {
|
|||||||
// this approach
|
// this approach
|
||||||
|
|
||||||
if ((begin >> 6) == (end >> 6)) {
|
if ((begin >> 6) == (end >> 6)) {
|
||||||
{
|
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
|
||||||
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
|
~(uint64_t(-1) << (end & 63));
|
||||||
~(uint64_t(-1) << (end & 63));
|
while (word) {
|
||||||
while (word) {
|
uint64_t temp = word & -word;
|
||||||
uint64_t temp = word & -word;
|
int index = (begin & ~63) + std::countr_zero(word);
|
||||||
int index = (begin & ~63) + std::countr_zero(word);
|
f(index);
|
||||||
f(index);
|
word ^= temp;
|
||||||
word ^= temp;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check begin partial word
|
// Check begin partial word
|
||||||
{
|
if (begin & 63) {
|
||||||
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
|
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
|
||||||
while (word) {
|
while (word) {
|
||||||
uint64_t temp = word & -word;
|
uint64_t temp = word & -word;
|
||||||
@@ -126,27 +122,34 @@ struct BitSet {
|
|||||||
f(index);
|
f(index);
|
||||||
word ^= temp;
|
word ^= temp;
|
||||||
}
|
}
|
||||||
|
begin &= ~63;
|
||||||
|
begin += 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check inner, full words
|
// Check inner, full words
|
||||||
begin += 64;
|
while (begin != (end & ~63)) {
|
||||||
while ((begin >> 6) != (end >> 6)) {
|
|
||||||
uint64_t word = words[begin >> 6];
|
uint64_t word = words[begin >> 6];
|
||||||
while (word) {
|
if (word == uint64_t(-1)) {
|
||||||
uint64_t temp = word & -word;
|
for (int i = 0; i < 64; ++i) {
|
||||||
int index = (begin & ~63) + std::countr_zero(word);
|
f(begin + i);
|
||||||
f(index);
|
}
|
||||||
word ^= temp;
|
} else {
|
||||||
|
while (word) {
|
||||||
|
uint64_t temp = word & -word;
|
||||||
|
int index = begin + std::countr_zero(word);
|
||||||
|
f(index);
|
||||||
|
word ^= temp;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
begin += 64;
|
begin += 64;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end < 256) {
|
if (end & 63) {
|
||||||
// Check end partial word
|
// Check end partial word
|
||||||
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
|
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
|
||||||
while (word) {
|
while (word) {
|
||||||
uint64_t temp = word & -word;
|
uint64_t temp = word & -word;
|
||||||
int index = (begin & ~63) + std::countr_zero(word);
|
int index = begin + std::countr_zero(word);
|
||||||
f(index);
|
f(index);
|
||||||
word ^= temp;
|
word ^= temp;
|
||||||
}
|
}
|
||||||
@@ -449,33 +452,19 @@ void setChildrenParents(Node16 *n) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void setChildrenParents(Node48 *n) {
|
void setChildrenParents(Node48 *n) {
|
||||||
if (n->numChildren < kSparseScanThreshold) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
|
int c = n->index[i];
|
||||||
i = n->bitSet.firstSetGeq(i + 1)) {
|
if (c != -1) {
|
||||||
n->children[n->index[i]].child->parent = n;
|
n->children[c].child->parent = n;
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < 256; ++i) {
|
|
||||||
int c = n->index[i];
|
|
||||||
if (c != -1) {
|
|
||||||
n->children[c].child->parent = n;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void setChildrenParents(Node256 *n) {
|
void setChildrenParents(Node256 *n) {
|
||||||
if (n->numChildren < kSparseScanThreshold) {
|
for (int i = 0; i < 256; ++i) {
|
||||||
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
|
auto *child = n->children[i].child;
|
||||||
i = n->bitSet.firstSetGeq(i + 1)) {
|
if (child != nullptr) {
|
||||||
n->children[i].child->parent = n;
|
child->parent = n;
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < 256; ++i) {
|
|
||||||
auto *child = n->children[i].child;
|
|
||||||
if (child != nullptr) {
|
|
||||||
child->parent = n;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -979,6 +968,9 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
|||||||
if (child->entryPresent) {
|
if (child->entryPresent) {
|
||||||
result = std::max(result, child->entry.rangeVersion);
|
result = std::max(result, child->entry.rangeVersion);
|
||||||
}
|
}
|
||||||
|
begin = c;
|
||||||
|
} else {
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch (n->type) {
|
switch (n->type) {
|
||||||
@@ -987,7 +979,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
|||||||
case Type::Node16: {
|
case Type::Node16: {
|
||||||
auto *self = static_cast<Node16 *>(n);
|
auto *self = static_cast<Node16 *>(n);
|
||||||
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
|
||||||
if (begin < self->index[i]) {
|
if (begin <= self->index[i]) {
|
||||||
result = std::max(result, self->children[i].childMaxVersion);
|
result = std::max(result, self->children[i].childMaxVersion);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -995,38 +987,21 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
|
|||||||
}
|
}
|
||||||
case Type::Node48: {
|
case Type::Node48: {
|
||||||
auto *self = static_cast<Node48 *>(n);
|
auto *self = static_cast<Node48 *>(n);
|
||||||
if (self->numChildren < kSparseScanThreshold) {
|
self->bitSet.forEachInRange(
|
||||||
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
|
[&](int i) {
|
||||||
i = self->bitSet.firstSetGeq(i + 1)) {
|
|
||||||
if (self->index[i] != -1) {
|
|
||||||
result =
|
result =
|
||||||
std::max(result, self->children[self->index[i]].childMaxVersion);
|
std::max(result, self->children[self->index[i]].childMaxVersion);
|
||||||
}
|
},
|
||||||
}
|
begin, end);
|
||||||
} else {
|
|
||||||
for (int i = begin + 1; i < end; ++i) {
|
|
||||||
if (self->index[i] != -1) {
|
|
||||||
result =
|
|
||||||
std::max(result, self->children[self->index[i]].childMaxVersion);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Type::Node256: {
|
case Type::Node256: {
|
||||||
auto *self = static_cast<Node256 *>(n);
|
auto *self = static_cast<Node256 *>(n);
|
||||||
if (self->numChildren < kSparseScanThreshold) {
|
self->bitSet.forEachInRange(
|
||||||
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
|
[&](int i) {
|
||||||
i = self->bitSet.firstSetGeq(i + 1)) {
|
|
||||||
result = std::max(result, self->children[i].childMaxVersion);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (int i = begin + 1; i < end; ++i) {
|
|
||||||
if (self->children[i].child != nullptr) {
|
|
||||||
result = std::max(result, self->children[i].childMaxVersion);
|
result = std::max(result, self->children[i].childMaxVersion);
|
||||||
}
|
},
|
||||||
}
|
begin, end);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Type::Invalid:
|
case Type::Invalid:
|
||||||
@@ -2193,8 +2168,21 @@ void printTree() {
|
|||||||
debugPrintDot(stdout, cs.root, &cs);
|
debugPrintDot(stdout, cs.root, &cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ANKERL_NANOBENCH_IMPLEMENT
|
||||||
|
#include "third_party/nanobench.h"
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
printTree();
|
ankerl::nanobench::Bench bench;
|
||||||
|
ConflictSet::Impl cs{0};
|
||||||
|
for (int j = 0; j < 256; ++j) {
|
||||||
|
getOrCreateChild(cs.root, j, &cs.allocators) =
|
||||||
|
cs.allocators.node4.allocate();
|
||||||
|
if (j % 10 == 0) {
|
||||||
|
bench.run("MaxExclusive " + std::to_string(j), [&]() {
|
||||||
|
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user