Specialize setChildrenParents for each node type

cachegrind says this increases the instruction count somehow. I'm not
sure what's going on there.
This commit is contained in:
2024-02-23 13:09:42 -08:00
parent 7136b5a450
commit 14de4ee297

View File

@@ -41,6 +41,8 @@ limitations under the License.
// ==================== BEGIN IMPLEMENTATION ==================== // ==================== BEGIN IMPLEMENTATION ====================
constexpr int kSparseScanThreshold = 32;
struct Entry { struct Entry {
int64_t pointVersion; int64_t pointVersion;
int64_t rangeVersion; int64_t rangeVersion;
@@ -372,9 +374,41 @@ int getChildGeq(Node *self, int child) {
return -1; return -1;
} }
void setChildrenParents(Node *node) { void setChildrenParents(Node16 *n) {
for (int i = getChildGeq(node, 0); i >= 0; i = getChildGeq(node, i + 1)) { for (int i = 0; i < n->numChildren; ++i) {
getChildExists(node, i)->parent = node; n->children[i]->parent = n;
}
}
void setChildrenParents(Node48 *n) {
if (n->numChildren < kSparseScanThreshold) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
i = n->bitSet.firstSetGeq(i + 1)) {
n->children[n->index[i]]->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
int c = n->index[i];
if (c != -1) {
n->children[c]->parent = n;
}
}
}
}
void setChildrenParents(Node256 *n) {
if (n->numChildren < kSparseScanThreshold) {
for (int i = n->bitSet.firstSetGeq(0); i >= 0;
i = n->bitSet.firstSetGeq(i + 1)) {
n->children[i]->parent = n;
}
} else {
for (int i = 0; i < 256; ++i) {
auto *child = n->children[i];
if (child != nullptr) {
child->parent = n;
}
}
} }
} }
@@ -411,8 +445,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
memcpy((void *)newSelf, self, sizeof(Node4)); memcpy((void *)newSelf, self, sizeof(Node4));
newSelf->type = Type::Node16; newSelf->type = Type::Node16;
allocators->node4.release(self4); allocators->node4.release(self4);
setChildrenParents(newSelf);
self = newSelf; self = newSelf;
setChildrenParents(self);
} }
goto insert16; goto insert16;
@@ -433,8 +467,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
} }
assert(i == 16); assert(i == 16);
allocators->node16.release(self16); allocators->node16.release(self16);
setChildrenParents(newSelf);
self = newSelf; self = newSelf;
setChildrenParents(self);
goto insert48; goto insert48;
} }
@@ -470,8 +504,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
} }
} }
allocators->node48.release(self48); allocators->node48.release(self48);
setChildrenParents(newSelf);
self = newSelf; self = newSelf;
setChildrenParents(self);
goto insert256; goto insert256;
} }
insert48: insert48:
@@ -869,7 +903,6 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
assert(end <= 256); assert(end <= 256);
assert(begin < end); assert(begin < end);
int64_t result = std::numeric_limits<int64_t>::lowest(); int64_t result = std::numeric_limits<int64_t>::lowest();
constexpr int kSparseThreshold = 32;
{ {
int c = getChildGeq(n, begin + 1); int c = getChildGeq(n, begin + 1);
if (c >= 0 && c < end) { if (c >= 0 && c < end) {
@@ -893,7 +926,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
case Type::Node48: { case Type::Node48: {
auto *self = static_cast<Node48 *>(n); auto *self = static_cast<Node48 *>(n);
if (self->numChildren < kSparseThreshold) { if (self->numChildren < kSparseScanThreshold) {
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
i = self->bitSet.firstSetGeq(i + 1)) { i = self->bitSet.firstSetGeq(i + 1)) {
if (self->index[i] != -1) { if (self->index[i] != -1) {
@@ -911,7 +944,7 @@ int64_t maxBetweenExclusive(Node *n, int begin, int end) {
} }
case Type::Node256: { case Type::Node256: {
auto *self = static_cast<Node256 *>(n); auto *self = static_cast<Node256 *>(n);
if (self->numChildren < kSparseThreshold) { if (self->numChildren < kSparseScanThreshold) {
for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0; for (int i = self->bitSet.firstSetGeq(begin + 1); i < end && i >= 0;
i = self->bitSet.firstSetGeq(i + 1)) { i = self->bitSet.firstSetGeq(i + 1)) {
result = std::max(result, self->children[i]->maxVersion); result = std::max(result, self->children[i]->maxVersion);