Specialize setChildrenParents for each node type

cachegrind says this increases the instruction count somehow. I'm not
sure what's going on there.
This commit is contained in:
2024-02-23 13:09:42 -08:00
parent 7136b5a450
commit 14de4ee297

View File

@@ -41,6 +41,8 @@ limitations under the License.
// ==================== BEGIN IMPLEMENTATION ====================
constexpr int kSparseScanThreshold = 32;
struct Entry {
int64_t pointVersion;
int64_t rangeVersion;
@@ -372,9 +374,41 @@ int getChildGeq(Node *self, int child) {
return -1;
}
void setChildrenParents(Node *node) {
for (int i = getChildGeq(node, 0); i >= 0; i = getChildGeq(node, i + 1)) {
getChildExists(node, i)->parent = node;
void setChildrenParents(Node16 *n) {
for (int i = 0; i < n->numChildren; ++i) {
n->children[i]->parent = n;
}
}
void setChildrenParents(Node48 *n) {
if (n->numChildren < kSparseScanThreshold) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
i = n->bitSet.firstSetGeq(i + 1)) {
n->children[n->index[i]]->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
int c = n->index[i];
if (c != -1) {
n->children[c]->parent = n;
}
}
}
}
void setChildrenParents(Node256 *n) {
if (n->numChildren < kSparseScanThreshold) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
i = n->bitSet.firstSetGeq(i + 1)) {
n->children[i]->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
auto *child = n->children[i];
if (child != nullptr) {
child->parent = n;
}
}
}
}
@@ -411,8 +445,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
memcpy((void *)newSelf, self, sizeof(Node4));
newSelf->type = Type::Node16;
allocators->node4.release(self4);
setChildrenParents(newSelf);
self = newSelf;
setChildrenParents(self);
}
goto insert16;
@@ -433,8 +467,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
}
assert(i == 16);
allocators->node16.release(self16);
setChildrenParents(newSelf);
self = newSelf;
setChildrenParents(self);
goto insert48;
}
@@ -470,8 +504,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
}
}
allocators->node48.release(self48);
setChildrenParents(newSelf);
self = newSelf;
setChildrenParents(self);
goto insert256;
}
insert48:
@@ -869,7 +903,6 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
assert(end <= 256);
assert(begin < end);
int64_t result = std::numeric_limits<int64_t>::lowest();
constexpr int kSparseThreshold = 32;
{
int c = getChildGeq(n, begin + 1);
if (c >= 0 && c < end) {
@@ -893,7 +926,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
}
case Type::Node48: {
auto *self = static_cast<Node48 *>(n);
if (self->numChildren < kSparseThreshold) {
if (self->numChildren < kSparseScanThreshold) {
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
i = self->bitSet.firstSetGeq(i + 1)) {
if (self->index[i] != -1) {
@@ -911,7 +944,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
}
case Type::Node256: {
auto *self = static_cast<Node256 *>(n);
if (self->numChildren < kSparseThreshold) {
if (self->numChildren < kSparseScanThreshold) {
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
i = self->bitSet.firstSetGeq(i + 1)) {
result = std::max(result, self->children[i]->maxVersion);