2 Commits

Author SHA1 Message Date
c97c7eee8e Speed up sparse queries
All checks were successful
Tests / Release [gcc] total: 471, passed: 471
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap: Reference build: <a href="https://jenkins.weaselab.dev/job/weaselab/job/conflict-set/job/main/39//gcc">weaselab » conflict-set » main #39</a>
Tests / Coverage total: 469, passed: 469
weaselab/conflict-set/pipeline/head This commit looks good
2024-02-25 23:08:01 -08:00
9fcfc44dc3 Add forEachInRange 2024-02-25 21:04:39 -08:00

View File

@@ -41,8 +41,6 @@ limitations under the License.
// ==================== BEGIN IMPLEMENTATION ==================== // ==================== BEGIN IMPLEMENTATION ====================
constexpr int kSparseScanThreshold = 32;
struct Entry { struct Entry {
int64_t pointVersion; int64_t pointVersion;
int64_t rangeVersion; int64_t rangeVersion;
@@ -98,6 +96,66 @@ struct BitSet {
void reset(int i); void reset(int i);
int firstSetGeq(int i) const; int firstSetGeq(int i) const;
// Calls `f` with the index of each bit set in [begin, end)
template <class F> void forEachInRange(F f, int begin, int end) {
// See section 3.1 in https://arxiv.org/pdf/1709.07821.pdf for details about
// this approach
if ((begin >> 6) == (end >> 6)) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) &
~(uint64_t(-1) << (end & 63));
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
return;
}
// Check begin partial word
if (begin & 63) {
uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63));
while (word) {
uint64_t temp = word & -word;
int index = (begin & ~63) + std::countr_zero(word);
f(index);
word ^= temp;
}
begin &= ~63;
begin += 64;
}
// Check inner, full words
while (begin != (end & ~63)) {
uint64_t word = words[begin >> 6];
if (word == uint64_t(-1)) {
for (int i = 0; i < 64; ++i) {
f(begin + i);
}
} else {
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
}
begin += 64;
}
if (end & 63) {
// Check end partial word
uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63));
while (word) {
uint64_t temp = word & -word;
int index = begin + std::countr_zero(word);
f(index);
word ^= temp;
}
}
}
private: private:
uint64_t words[4] = {}; uint64_t words[4] = {};
}; };
@@ -394,33 +452,19 @@ void setChildrenParents(Node16 *n) {
} }
void setChildrenParents(Node48 *n) { void setChildrenParents(Node48 *n) {
if (n->numChildren < kSparseScanThreshold) { for (int i = 0; i < 256; ++i) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0; int c = n->index[i];
i = n->bitSet.firstSetGeq(i + 1)) { if (c != -1) {
n->children[n->index[i]].child->parent = n; n->children[c].child->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
int c = n->index[i];
if (c != -1) {
n->children[c].child->parent = n;
}
} }
} }
} }
void setChildrenParents(Node256 *n) { void setChildrenParents(Node256 *n) {
if (n->numChildren < kSparseScanThreshold) { for (int i = 0; i < 256; ++i) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0; auto *child = n->children[i].child;
i = n->bitSet.firstSetGeq(i + 1)) { if (child != nullptr) {
n->children[i].child->parent = n; child->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
auto *child = n->children[i].child;
if (child != nullptr) {
child->parent = n;
}
} }
} }
} }
@@ -924,6 +968,9 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
if (child->entryPresent) { if (child->entryPresent) {
result = std::max(result, child->entry.rangeVersion); result = std::max(result, child->entry.rangeVersion);
} }
begin = c;
} else {
return result;
} }
} }
switch (n->type) { switch (n->type) {
@@ -932,7 +979,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
case Type::Node16: { case Type::Node16: {
auto *self = static_cast<Node16 *>(n); auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) { for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) {
if (begin < self->index[i]) { if (begin <= self->index[i]) {
result = std::max(result, self->children[i].childMaxVersion); result = std::max(result, self->children[i].childMaxVersion);
} }
} }
@@ -940,38 +987,21 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
case Type::Node48: { case Type::Node48: {
auto *self = static_cast<Node48 *>(n); auto *self = static_cast<Node48 *>(n);
if (self->numChildren < kSparseScanThreshold) { self->bitSet.forEachInRange(
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; [&](int i) {
i = self->bitSet.firstSetGeq(i + 1)) {
if (self->index[i] != -1) {
result = result =
std::max(result, self->children[self->index[i]].childMaxVersion); std::max(result, self->children[self->index[i]].childMaxVersion);
} },
} begin, end);
} else {
for (int i = begin + 1; i < end; ++i) {
if (self->index[i] != -1) {
result =
std::max(result, self->children[self->index[i]].childMaxVersion);
}
}
}
break; break;
} }
case Type::Node256: { case Type::Node256: {
auto *self = static_cast<Node256 *>(n); auto *self = static_cast<Node256 *>(n);
if (self->numChildren < kSparseScanThreshold) { self->bitSet.forEachInRange(
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; [&](int i) {
i = self->bitSet.firstSetGeq(i + 1)) {
result = std::max(result, self->children[i].childMaxVersion);
}
} else {
for (int i = begin + 1; i < end; ++i) {
if (self->children[i].child != nullptr) {
result = std::max(result, self->children[i].childMaxVersion); result = std::max(result, self->children[i].childMaxVersion);
} },
} begin, end);
}
break; break;
} }
case Type::Invalid: case Type::Invalid:
@@ -2138,8 +2168,21 @@ void printTree() {
debugPrintDot(stdout, cs.root, &cs); debugPrintDot(stdout, cs.root, &cs);
} }
#define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h"
int main(void) { int main(void) {
printTree(); ankerl::nanobench::Bench bench;
ConflictSet::Impl cs{0};
for (int j = 0; j < 256; ++j) {
getOrCreateChild(cs.root, j, &cs.allocators) =
cs.allocators.node4.allocate();
if (j % 10 == 0) {
bench.run("MaxExclusive " + std::to_string(j), [&]() {
bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256));
});
}
}
return 0; return 0;
} }
#endif #endif