From 116c79d3deeb191601fac70f46f75678a3ced511 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 23 Feb 2024 12:34:28 -0800 Subject: [PATCH] Optimize getOrCreateChild Add fast path for if it exists Avoid some unnecessary branches Try to simplify some --- ConflictSet.cpp | 126 ++++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 9558e43..31247f8 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -382,11 +382,30 @@ void setChildrenParents(Node *node) { // reference if null Node *&getOrCreateChild(Node *&self, uint8_t index, NodeAllocators *allocators) { + + // Fast path for if it exists already + if (self->type <= Type::Node16) { + auto *self16 = static_cast(self); + int i = getNodeIndex(self16, index); + if (i >= 0) { + return self16->children[i]; + } + } else if (self->type == Type::Node48) { + auto *self48 = static_cast(self); + int secondIndex = self48->index[index]; + if (secondIndex >= 0) { + return self48->children[secondIndex]; + } + } else { + auto *self256 = static_cast(self); + if (auto &result = self256->children[index]; result != nullptr) { + return result; + } + } + if (self->type == Type::Node4) { auto *self4 = static_cast(self); - if (int i = getNodeIndex((Node16 *)self4, index); i >= 0) { - return self4->children[i]; - } + if (self->numChildren == 4) { auto *newSelf = allocators->node16.allocate(); memcpy((void *)newSelf, self, offsetof(Node, type)); @@ -395,32 +414,14 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, allocators->node4.release(self4); self = newSelf; setChildrenParents(self); - goto insert16; - } else { - ++self->numChildren; - for (int i = 0; i < int(self->numChildren) - 1; ++i) { - if (int(self4->index[i]) > int(index)) { - memmove(self4->index + i + 1, self4->index + i, - self->numChildren - (i + 1)); - memmove(self4->children + i + 1, self4->children + i, - (self->numChildren - (i + 1)) * sizeof(void *)); - self4->index[i] = index; - self4->children[i] = nullptr; - return self4->children[i]; - } - } - self4->index[self->numChildren - 1] = index; - self4->children[self->numChildren - 1] = nullptr; - return self4->children[self->numChildren - 1]; } - } else if (self->type == Type::Node16) { - insert16: - auto *self16 = static_cast(self); - if (int i = getNodeIndex(self16, index); i >= 0) { - return self16->children[i]; - } + goto insert16; + + } else if (self->type == Type::Node16) { + if (self->numChildren == 16) { + auto *self16 = static_cast(self); auto *newSelf = allocators->node48.allocate(); memcpy((void *)newSelf, self, offsetof(Node, type)); newSelf->nextFree = 16; @@ -436,62 +437,61 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, self = newSelf; setChildrenParents(self); goto insert48; - } else { - ++self->numChildren; - for (int i = 0; i < int(self->numChildren) - 1; ++i) { - if (int(self16->index[i]) > int(index)) { - memmove(self16->index + i + 1, self16->index + i, - self->numChildren - (i + 1)); - memmove(self16->children + i + 1, self16->children + i, - (self->numChildren - (i + 1)) * sizeof(void *)); - self16->index[i] = index; - self16->children[i] = nullptr; - return self16->children[i]; - } - } - self16->index[self->numChildren - 1] = index; - self16->children[self->numChildren - 1] = nullptr; - return self16->children[self->numChildren - 1]; } - } else if (self->type == Type::Node48) { - insert48: - auto *self48 = static_cast(self); - if (int c = self48->index[index]; - c >= 0) { - return self48->children[c]; + insert16: + auto *self16 = static_cast(self); + + ++self->numChildren; + int i = 0; + for (; i < int(self->numChildren) - 1; ++i) { + if (int(self16->index[i]) > int(index)) { + memmove(self16->index + i + 1, self16->index + i, + self->numChildren - (i + 1)); + memmove(self16->children + i + 1, self16->children + i, + (self->numChildren - (i + 1)) * sizeof(void *)); + break; + } } + self16->index[i] = index; + auto &result = self16->children[i]; + result = nullptr; + return result; + } else if (self->type == Type::Node48) { + if (self->numChildren == 48) { + auto *self48 = static_cast(self); auto *newSelf = allocators->node256.allocate(); memcpy((void *)newSelf, self, offsetof(Node, type)); + newSelf->bitSet = self48->bitSet; for (int i = 0; i < 256; ++i) { - if (self48->bitSet.test(i)) { - newSelf->bitSet.set(i); - newSelf->children[i] = self48->children[self48->index[i]]; + int c = self48->index[i]; + if (c >= 0) { + newSelf->children[i] = self48->children[c]; } } allocators->node48.release(self48); self = newSelf; setChildrenParents(self); goto insert256; - } else { - self48->bitSet.set(index); - ++self->numChildren; - assert(self48->nextFree < 48); - self48->index[index] = self48->nextFree; - self48->children[self48->nextFree] = nullptr; - return self48->children[self48->nextFree++]; } + insert48: + + auto *self48 = static_cast(self); + self48->bitSet.set(index); + ++self->numChildren; + assert(self48->nextFree < 48); + int nextFree = self48->nextFree++; + self48->index[index] = nextFree; + auto &result = self48->children[nextFree]; + result = nullptr; + return result; } else { insert256: auto *self256 = static_cast(self); - auto *&result = self256->children[index]; - if (result) { - return result; - } ++self->numChildren; self256->bitSet.set(index); - return result; + return self256->children[index]; } }