SoA instead of AoS for child, maxVersion

This commit is contained in:
2024-06-27 09:57:54 -07:00
parent f8bf1c6eb4
commit 9108ee209a

View File

@@ -237,7 +237,6 @@ constexpr int kNodeCopySize =
offsetof(Node, parentsIndex) + sizeof(Node::parentsIndex) - kNodeCopyBegin;
struct Child {
int64_t childMaxVersion;
Node *child;
};
@@ -263,6 +262,7 @@ struct Node3 : Node {
// Sorted
uint8_t index[kMaxNodes];
Child children[kMaxNodes];
int64_t childMaxVersion[kMaxNodes];
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node0 &other);
@@ -278,6 +278,7 @@ struct Node16 : Node {
// Sorted
uint8_t index[kMaxNodes];
Child children[kMaxNodes];
int64_t childMaxVersion[kMaxNodes];
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
void copyChildrenAndKeyFrom(const Node3 &other);
@@ -294,6 +295,7 @@ struct Node48 : Node {
int8_t nextFree;
int8_t index[256];
Child children[kMaxNodes];
int64_t childMaxVersion[kMaxNodes];
uint8_t reverseIndex[kMaxNodes];
constexpr static int kMaxOfMaxPageSize = 8;
constexpr static int kMaxOfMaxShift =
@@ -314,6 +316,7 @@ struct Node256 : Node {
constexpr static auto kType = Type_Node256;
BitSet bitSet;
Child children[256];
int64_t childMaxVersion[256];
constexpr static int kMaxOfMaxPageSize = 8;
constexpr static int kMaxOfMaxShift =
std::countr_zero(uint32_t(kMaxOfMaxPageSize));
@@ -359,7 +362,9 @@ inline void Node3::copyChildrenAndKeyFrom(const Node16 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(index, other.index, kMaxNodes);
memcpy(children, other.children, kMaxNodes * sizeof(Child));
memcpy(children, other.children, kMaxNodes * sizeof(children[0]));
memcpy(childMaxVersion, other.childMaxVersion,
kMaxNodes * sizeof(childMaxVersion[0]));
memcpy(partialKey(), &other + 1, partialKeyLen);
for (int i = 0; i < numChildren; ++i) {
assert(children[i].child->parent == &other);
@@ -371,7 +376,9 @@ inline void Node16::copyChildrenAndKeyFrom(const Node3 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memcpy(index, other.index, Node3::kMaxNodes);
memcpy(children, other.children, Node3::kMaxNodes * sizeof(Child));
memcpy(children, other.children, Node3::kMaxNodes * sizeof(children[0]));
memcpy(childMaxVersion, other.childMaxVersion,
Node3::kMaxNodes * sizeof(childMaxVersion[0]));
memcpy(partialKey(), &other + 1, partialKeyLen);
assert(numChildren == Node3::kMaxNodes);
for (int i = 0; i < Node3::kMaxNodes; ++i) {
@@ -386,6 +393,7 @@ inline void Node16::copyChildrenAndKeyFrom(const Node16 &other) {
memcpy(index, other.index, sizeof(index));
for (int i = 0; i < numChildren; ++i) {
children[i] = other.children[i];
childMaxVersion[i] = other.childMaxVersion[i];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
@@ -403,6 +411,7 @@ inline void Node16::copyChildrenAndKeyFrom(const Node48 &other) {
assume(i < Node16::kMaxNodes);
index[i] = c;
children[i] = other.children[other.index[c]];
childMaxVersion[i] = other.childMaxVersion[other.index[c]];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
++i;
@@ -417,6 +426,7 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
assert(numChildren == Node16::kMaxNodes);
memset(index, -1, sizeof(index));
memset(children, 0, sizeof(children));
memset(childMaxVersion, 0, sizeof(childMaxVersion));
memcpy(partialKey(), &other + 1, partialKeyLen);
bitSet.init();
nextFree = Node16::kMaxNodes;
@@ -425,11 +435,12 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
bitSet.set(x);
index[x] = i;
children[i] = other.children[i];
childMaxVersion[i] = other.childMaxVersion[i];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
reverseIndex[i] = x;
maxOfMax[i >> Node48::kMaxOfMaxShift] = std::max(
maxOfMax[i >> Node48::kMaxOfMaxShift], children[i].childMaxVersion);
maxOfMax[i >> Node48::kMaxOfMaxShift] =
std::max(maxOfMax[i >> Node48::kMaxOfMaxShift], childMaxVersion[i]);
++i;
}
}
@@ -441,8 +452,10 @@ inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) {
nextFree = other.nextFree;
memcpy(index, other.index, sizeof(index));
memset(children, 0, sizeof(children));
memset(childMaxVersion, 0, sizeof(childMaxVersion));
for (int i = 0; i < numChildren; ++i) {
children[i] = other.children[i];
childMaxVersion[i] = other.childMaxVersion[i];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
}
@@ -456,6 +469,7 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
kNodeCopySize);
memset(index, -1, sizeof(index));
memset(children, 0, sizeof(children));
memset(childMaxVersion, 0, sizeof(childMaxVersion));
nextFree = other.numChildren;
bitSet = other.bitSet;
int i = 0;
@@ -466,11 +480,12 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
assume(i < Node48::kMaxNodes);
index[c] = i;
children[i] = other.children[c];
childMaxVersion[i] = other.childMaxVersion[c];
assert(children[i].child->parent == &other);
children[i].child->parent = this;
reverseIndex[i] = c;
maxOfMax[i >> Node48::kMaxOfMaxShift] = std::max(
maxOfMax[i >> Node48::kMaxOfMaxShift], children[i].childMaxVersion);
maxOfMax[i >> Node48::kMaxOfMaxShift] =
std::max(maxOfMax[i >> Node48::kMaxOfMaxShift], childMaxVersion[i]);
++i;
},
0, 256);
@@ -482,15 +497,16 @@ inline void Node256::copyChildrenAndKeyFrom(const Node48 &other) {
kNodeCopySize);
bitSet = other.bitSet;
memset(children, 0, sizeof(children));
memset(childMaxVersion, 0, sizeof(childMaxVersion));
memset(maxOfMax, 0, sizeof(maxOfMax));
bitSet.forEachInRange(
[&](int c) {
children[c] = other.children[other.index[c]];
childMaxVersion[c] = other.childMaxVersion[other.index[c]];
assert(children[c].child->parent == &other);
children[c].child->parent = this;
maxOfMax[c >> Node256::kMaxOfMaxShift] =
std::max(maxOfMax[c >> Node256::kMaxOfMaxShift],
children[c].childMaxVersion);
maxOfMax[c >> Node256::kMaxOfMaxShift] = std::max(
maxOfMax[c >> Node256::kMaxOfMaxShift], childMaxVersion[c]);
},
0, 256);
memcpy(partialKey(), &other + 1, partialKeyLen);
@@ -500,10 +516,12 @@ inline void Node256::copyChildrenAndKeyFrom(const Node256 &other) {
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
kNodeCopySize);
memset(children, 0, sizeof(children));
memset(childMaxVersion, 0, sizeof(childMaxVersion));
bitSet = other.bitSet;
bitSet.forEachInRange(
[&](int c) {
children[c] = other.children[c];
childMaxVersion[c] = other.childMaxVersion[c];
assert(children[c].child->parent == &other);
children[c].child->parent = this;
},
@@ -592,6 +610,7 @@ template <class T> struct BoundedFreeListAllocator {
T *result = allocate_helper(partialKeyCapacity);
if constexpr (!std::is_same_v<T, Node0>) {
memset(result->children, 0, sizeof(result->children));
memset(result->childMaxVersion, 0, sizeof(result->childMaxVersion));
}
if constexpr (std::is_same_v<T, Node48> || std::is_same_v<T, Node256>) {
memset(result->maxOfMax, 0, sizeof(result->maxOfMax));
@@ -971,7 +990,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
memmove(self3->index + i + 1, self3->index + i,
self->numChildren - (i + 1));
memmove(self3->children + i + 1, self3->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
(self->numChildren - (i + 1)) * sizeof(self3->children[0]));
memmove(self3->childMaxVersion + i + 1, self3->childMaxVersion + i,
(self->numChildren - (i + 1)) *
sizeof(self3->childMaxVersion[0]));
break;
}
}
@@ -1008,7 +1030,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
(self->numChildren - (i + 1)) * sizeof(self16->children[0]));
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
(self->numChildren - (i + 1)) *
sizeof(self16->childMaxVersion[0]));
}
#elif defined(HAS_ARM_NEON)
uint8x16_t indices;
@@ -1028,7 +1053,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
(self->numChildren - (i + 1)) * sizeof(self16->children[0]));
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
(self->numChildren - (i + 1)) *
sizeof(self16->childMaxVersion[0]));
}
#else
int i = 0;
@@ -1037,7 +1065,10 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
(self->numChildren - (i + 1)) * sizeof(self16->children[0]));
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
(self->numChildren - (i + 1)) *
sizeof(self16->childMaxVersion[0]));
break;
}
}
@@ -1334,6 +1365,10 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
memmove(parent3->children + nodeIndex, parent3->children + nodeIndex + 1,
sizeof(parent3->children[0]) *
(parent->numChildren - (nodeIndex + 1)));
memmove(parent3->childMaxVersion + nodeIndex,
parent3->childMaxVersion + nodeIndex + 1,
sizeof(parent3->childMaxVersion[0]) *
(parent->numChildren - (nodeIndex + 1)));
--parent->numChildren;
assert(parent->numChildren > 0 || parent->entryPresent);
@@ -1348,6 +1383,10 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
memmove(parent16->children + nodeIndex, parent16->children + nodeIndex + 1,
sizeof(parent16->children[0]) *
(parent->numChildren - (nodeIndex + 1)));
memmove(parent16->childMaxVersion + nodeIndex,
parent16->childMaxVersion + nodeIndex + 1,
sizeof(parent16->childMaxVersion[0]) *
(parent->numChildren - (nodeIndex + 1)));
--parent->numChildren;
@@ -1365,16 +1404,18 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl,
if (toRemoveChildrenIndex != lastChildrenIndex) {
parent48->children[toRemoveChildrenIndex] =
parent48->children[lastChildrenIndex];
parent48->childMaxVersion[toRemoveChildrenIndex] =
parent48->childMaxVersion[lastChildrenIndex];
parent48->maxOfMax[toRemoveChildrenIndex >> Node48::kMaxOfMaxShift] =
std::max(parent48->maxOfMax[toRemoveChildrenIndex >>
Node48::kMaxOfMaxShift],
parent48->children[toRemoveChildrenIndex].childMaxVersion);
parent48->childMaxVersion[toRemoveChildrenIndex]);
auto parentIndex =
parent48->children[toRemoveChildrenIndex].child->parentsIndex;
parent48->index[parentIndex] = toRemoveChildrenIndex;
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
}
parent48->children[lastChildrenIndex].childMaxVersion = 0;
parent48->childMaxVersion[lastChildrenIndex] = 0;
--parent->numChildren;
@@ -1708,7 +1749,7 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
auto *self = static_cast<Node3 *>(n);
bool result = true;
for (int i = 0; i < 3; ++i) {
result &= !((self->children[i].childMaxVersion > readVersion) &
result &= !((self->childMaxVersion[i] > readVersion) &
inBounds(self->index[i]));
}
return result;
@@ -1717,7 +1758,7 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
auto *self = static_cast<Node16 *>(n);
bool result = true;
for (int i = 0; i < 16; ++i) {
result &= !((self->children[i].childMaxVersion > readVersion) &
result &= !((self->childMaxVersion[i] > readVersion) &
inBounds(self->index[i]));
}
return result;
@@ -1730,7 +1771,7 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
if (self->maxOfMax[i] > readVersion) {
for (int j = 0; j < Node48::kMaxOfMaxPageSize; ++j) {
int k = (i << Node48::kMaxOfMaxShift) + j;
result &= !((self->children[k].childMaxVersion > readVersion) &
result &= !((self->childMaxVersion[k] > readVersion) &
inBounds(self->reverseIndex[k]));
}
}
@@ -1744,8 +1785,7 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
bool result = true;
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
int j = (begin & ~(Node256::kMaxOfMaxPageSize - 1)) + i;
result &=
!((self->children[j].childMaxVersion > readVersion) & inBounds(j));
result &= !((self->childMaxVersion[j] > readVersion) & inBounds(j));
}
if (!result) {
return result;
@@ -1757,8 +1797,7 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
bool result = true;
for (int i = 0; i < Node256::kMaxOfMaxPageSize; ++i) {
int j = ((end - 1) & ~(Node256::kMaxOfMaxPageSize - 1)) + i;
result &=
!((self->children[j].childMaxVersion > readVersion) & inBounds(j));
result &= !((self->childMaxVersion[j] > readVersion) & inBounds(j));
}
if (!result) {
return result;
@@ -2649,22 +2688,22 @@ int64_t maxVersion(Node *n, ConflictSet::Impl *impl) {
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndex(n3, index);
return n3->children[i].childMaxVersion;
return n3->childMaxVersion[i];
}
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndex(n16, index);
return n16->children[i].childMaxVersion;
return n16->childMaxVersion[i];
}
case Type_Node48: {
auto *n48 = static_cast<Node48 *>(n);
assert(n48->bitSet.test(index));
return n48->children[n48->index[index]].childMaxVersion;
return n48->childMaxVersion[n48->index[index]];
}
case Type_Node256: {
auto *n256 = static_cast<Node256 *>(n);
assert(n256->bitSet.test(index));
return n256->children[index].childMaxVersion;
return n256->childMaxVersion[index];
}
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
@@ -2684,20 +2723,20 @@ void setMaxVersion(Node *n, ConflictSet::Impl *impl, int64_t newMax) {
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndex(n3, index);
n3->children[i].childMaxVersion = newMax;
n3->childMaxVersion[i] = newMax;
return;
}
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndex(n16, index);
n16->children[i].childMaxVersion = newMax;
n16->childMaxVersion[i] = newMax;
return;
}
case Type_Node48: {
auto *n48 = static_cast<Node48 *>(n);
assert(n48->bitSet.test(index));
int i = n48->index[index];
n48->children[i].childMaxVersion = newMax;
n48->childMaxVersion[i] = newMax;
n48->maxOfMax[i >> Node48::kMaxOfMaxShift] =
std::max(n48->maxOfMax[i >> Node48::kMaxOfMaxShift], newMax);
return;
@@ -2705,7 +2744,7 @@ void setMaxVersion(Node *n, ConflictSet::Impl *impl, int64_t newMax) {
case Type_Node256: {
auto *n256 = static_cast<Node256 *>(n);
assert(n256->bitSet.test(index));
n256->children[index].childMaxVersion = newMax;
n256->childMaxVersion[index] = newMax;
n256->maxOfMax[index >> Node256::kMaxOfMaxShift] =
std::max(n256->maxOfMax[index >> Node256::kMaxOfMaxShift], newMax);
return;