Optimize getOrCreateChild

Add fast path for if it exists

Avoid some unnecessary branches

Try to simplify some
This commit is contained in:
2024-02-23 12:34:28 -08:00
parent aaf0283f66
commit 116c79d3de

View File

@@ -382,11 +382,30 @@ void setChildrenParents(Node *node) {
// reference if null // reference if null
Node *&getOrCreateChild(Node *&self, uint8_t index, Node *&getOrCreateChild(Node *&self, uint8_t index,
NodeAllocators *allocators) { NodeAllocators *allocators) {
// Fast path for if it exists already
if (self->type <= Type::Node16) {
auto *self16 = static_cast<Node16 *>(self);
int i = getNodeIndex(self16, index);
if (i >= 0) {
return self16->children[i];
}
} else if (self->type == Type::Node48) {
auto *self48 = static_cast<Node48 *>(self);
int secondIndex = self48->index[index];
if (secondIndex >= 0) {
return self48->children[secondIndex];
}
} else {
auto *self256 = static_cast<Node256 *>(self);
if (auto &result = self256->children[index]; result != nullptr) {
return result;
}
}
if (self->type == Type::Node4) { if (self->type == Type::Node4) {
auto *self4 = static_cast<Node4 *>(self); auto *self4 = static_cast<Node4 *>(self);
if (int i = getNodeIndex((Node16 *)self4, index); i >= 0) {
return self4->children[i];
}
if (self->numChildren == 4) { if (self->numChildren == 4) {
auto *newSelf = allocators->node16.allocate(); auto *newSelf = allocators->node16.allocate();
memcpy((void *)newSelf, self, offsetof(Node, type)); memcpy((void *)newSelf, self, offsetof(Node, type));
@@ -395,32 +414,14 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
allocators->node4.release(self4); allocators->node4.release(self4);
self = newSelf; self = newSelf;
setChildrenParents(self); setChildrenParents(self);
goto insert16;
} else {
++self->numChildren;
for (int i = 0; i < int(self->numChildren) - 1; ++i) {
if (int(self4->index[i]) > int(index)) {
memmove(self4->index + i + 1, self4->index + i,
self->numChildren - (i + 1));
memmove(self4->children + i + 1, self4->children + i,
(self->numChildren - (i + 1)) * sizeof(void *));
self4->index[i] = index;
self4->children[i] = nullptr;
return self4->children[i];
}
}
self4->index[self->numChildren - 1] = index;
self4->children[self->numChildren - 1] = nullptr;
return self4->children[self->numChildren - 1];
} }
} else if (self->type == Type::Node16) {
insert16:
auto *self16 = static_cast<Node16 *>(self);
if (int i = getNodeIndex(self16, index); i >= 0) { goto insert16;
return self16->children[i];
} } else if (self->type == Type::Node16) {
if (self->numChildren == 16) { if (self->numChildren == 16) {
auto *self16 = static_cast<Node16 *>(self);
auto *newSelf = allocators->node48.allocate(); auto *newSelf = allocators->node48.allocate();
memcpy((void *)newSelf, self, offsetof(Node, type)); memcpy((void *)newSelf, self, offsetof(Node, type));
newSelf->nextFree = 16; newSelf->nextFree = 16;
@@ -436,62 +437,61 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
self = newSelf; self = newSelf;
setChildrenParents(self); setChildrenParents(self);
goto insert48; goto insert48;
} else {
++self->numChildren;
for (int i = 0; i < int(self->numChildren) - 1; ++i) {
if (int(self16->index[i]) > int(index)) {
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(void *));
self16->index[i] = index;
self16->children[i] = nullptr;
return self16->children[i];
}
}
self16->index[self->numChildren - 1] = index;
self16->children[self->numChildren - 1] = nullptr;
return self16->children[self->numChildren - 1];
} }
} else if (self->type == Type::Node48) {
insert48:
auto *self48 = static_cast<Node48 *>(self);
if (int c = self48->index[index];
c >= 0) { insert16:
return self48->children[c]; auto *self16 = static_cast<Node16 *>(self);
++self->numChildren;
int i = 0;
for (; i < int(self->numChildren) - 1; ++i) {
if (int(self16->index[i]) > int(index)) {
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(void *));
break;
}
} }
self16->index[i] = index;
auto &result = self16->children[i];
result = nullptr;
return result;
} else if (self->type == Type::Node48) {
if (self->numChildren == 48) { if (self->numChildren == 48) {
auto *self48 = static_cast<Node48 *>(self);
auto *newSelf = allocators->node256.allocate(); auto *newSelf = allocators->node256.allocate();
memcpy((void *)newSelf, self, offsetof(Node, type)); memcpy((void *)newSelf, self, offsetof(Node, type));
newSelf->bitSet = self48->bitSet;
for (int i = 0; i < 256; ++i) { for (int i = 0; i < 256; ++i) {
if (self48->bitSet.test(i)) { int c = self48->index[i];
newSelf->bitSet.set(i); if (c >= 0) {
newSelf->children[i] = self48->children[self48->index[i]]; newSelf->children[i] = self48->children[c];
} }
} }
allocators->node48.release(self48); allocators->node48.release(self48);
self = newSelf; self = newSelf;
setChildrenParents(self); setChildrenParents(self);
goto insert256; goto insert256;
} else {
self48->bitSet.set(index);
++self->numChildren;
assert(self48->nextFree < 48);
self48->index[index] = self48->nextFree;
self48->children[self48->nextFree] = nullptr;
return self48->children[self48->nextFree++];
} }
insert48:
auto *self48 = static_cast<Node48 *>(self);
self48->bitSet.set(index);
++self->numChildren;
assert(self48->nextFree < 48);
int nextFree = self48->nextFree++;
self48->index[index] = nextFree;
auto &result = self48->children[nextFree];
result = nullptr;
return result;
} else { } else {
insert256: insert256:
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
auto *&result = self256->children[index];
if (result) {
return result;
}
++self->numChildren; ++self->numChildren;
self256->bitSet.set(index); self256->bitSet.set(index);
return result; return self256->children[index];
} }
} }