diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 2a8609d..012dd80 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -41,8 +41,6 @@ limitations under the License. // ==================== BEGIN IMPLEMENTATION ==================== -constexpr int kSparseScanThreshold = 32; - struct Entry { int64_t pointVersion; int64_t rangeVersion; @@ -104,21 +102,19 @@ struct BitSet { // this approach if ((begin >> 6) == (end >> 6)) { - { - uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) & - ~(uint64_t(-1) << (end & 63)); - while (word) { - uint64_t temp = word & -word; - int index = (begin & ~63) + std::countr_zero(word); - f(index); - word ^= temp; - } + uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)) & + ~(uint64_t(-1) << (end & 63)); + while (word) { + uint64_t temp = word & -word; + int index = (begin & ~63) + std::countr_zero(word); + f(index); + word ^= temp; } return; } // Check begin partial word - { + if (begin & 63) { uint64_t word = words[begin >> 6] & (uint64_t(-1) << (begin & 63)); while (word) { uint64_t temp = word & -word; @@ -126,27 +122,34 @@ struct BitSet { f(index); word ^= temp; } + begin &= ~63; + begin += 64; } // Check inner, full words - begin += 64; - while ((begin >> 6) != (end >> 6)) { + while (begin != (end & ~63)) { uint64_t word = words[begin >> 6]; - while (word) { - uint64_t temp = word & -word; - int index = (begin & ~63) + std::countr_zero(word); - f(index); - word ^= temp; + if (word == uint64_t(-1)) { + for (int i = 0; i < 64; ++i) { + f(begin + i); + } + } else { + while (word) { + uint64_t temp = word & -word; + int index = begin + std::countr_zero(word); + f(index); + word ^= temp; + } } begin += 64; } - if (end < 256) { + if (end & 63) { // Check end partial word uint64_t word = words[end >> 6] & ~(uint64_t(-1) << (end & 63)); while (word) { uint64_t temp = word & -word; - int index = (begin & ~63) + std::countr_zero(word); + int index = begin + std::countr_zero(word); f(index); word ^= temp; } @@ -449,33 +452,19 @@ void setChildrenParents(Node16 *n) { } void setChildrenParents(Node48 *n) { - if (n->numChildren < kSparseScanThreshold) { - for (int i = n->bitSet.firstSetGeq(0); i >= 0; - i = n->bitSet.firstSetGeq(i + 1)) { - n->children[n->index[i]].child->parent = n; - } - } else { - for (int i = 0; i < 256; ++i) { - int c = n->index[i]; - if (c != -1) { - n->children[c].child->parent = n; - } + for (int i = 0; i < 256; ++i) { + int c = n->index[i]; + if (c != -1) { + n->children[c].child->parent = n; } } } void setChildrenParents(Node256 *n) { - if (n->numChildren < kSparseScanThreshold) { - for (int i = n->bitSet.firstSetGeq(0); i >= 0; - i = n->bitSet.firstSetGeq(i + 1)) { - n->children[i].child->parent = n; - } - } else { - for (int i = 0; i < 256; ++i) { - auto *child = n->children[i].child; - if (child != nullptr) { - child->parent = n; - } + for (int i = 0; i < 256; ++i) { + auto *child = n->children[i].child; + if (child != nullptr) { + child->parent = n; } } } @@ -979,6 +968,9 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) { if (child->entryPresent) { result = std::max(result, child->entry.rangeVersion); } + begin = c; + } else { + return result; } } switch (n->type) { @@ -987,7 +979,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) { case Type::Node16: { auto *self = static_cast(n); for (int i = 0; i < self->numChildren && self->index[i] < end; ++i) { - if (begin < self->index[i]) { + if (begin <= self->index[i]) { result = std::max(result, self->children[i].childMaxVersion); } } @@ -995,38 +987,21 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) { } case Type::Node48: { auto *self = static_cast(n); - if (self->numChildren < kSparseScanThreshold) { - for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; - i = self->bitSet.firstSetGeq(i + 1)) { - if (self->index[i] != -1) { + self->bitSet.forEachInRange( + [&](int i) { result = std::max(result, self->children[self->index[i]].childMaxVersion); - } - } - } else { - for (int i = begin + 1; i < end; ++i) { - if (self->index[i] != -1) { - result = - std::max(result, self->children[self->index[i]].childMaxVersion); - } - } - } + }, + begin, end); break; } case Type::Node256: { auto *self = static_cast(n); - if (self->numChildren < kSparseScanThreshold) { - for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; - i = self->bitSet.firstSetGeq(i + 1)) { - result = std::max(result, self->children[i].childMaxVersion); - } - } else { - for (int i = begin + 1; i < end; ++i) { - if (self->children[i].child != nullptr) { + self->bitSet.forEachInRange( + [&](int i) { result = std::max(result, self->children[i].childMaxVersion); - } - } - } + }, + begin, end); break; } case Type::Invalid: @@ -2193,8 +2168,21 @@ void printTree() { debugPrintDot(stdout, cs.root, &cs); } +#define ANKERL_NANOBENCH_IMPLEMENT +#include "third_party/nanobench.h" + int main(void) { - printTree(); + ankerl::nanobench::Bench bench; + ConflictSet::Impl cs{0}; + for (int j = 0; j < 256; ++j) { + getOrCreateChild(cs.root, j, &cs.allocators) = + cs.allocators.node4.allocate(); + if (j % 10 == 0) { + bench.run("MaxExclusive " + std::to_string(j), [&]() { + bench.doNotOptimizeAway(maxBetweenExclusive(cs.root, 0, 256)); + }); + } + } return 0; } #endif