10 Commits

Author SHA1 Message Date
23c2a3e1c6 SIMD for eraseBetween (Node16)
Some checks failed
Tests / Clang total: 2688, passed: 2688
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Debug total: 2686, passed: 2686
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-08-14 18:12:46 -07:00
a64e792964 Remove unused function 2024-08-14 17:40:04 -07:00
5e362d5330 Add to corpus 2024-08-14 17:37:18 -07:00
cc526cb6ba Call eraseBetween on useAsRoot in addWriteRange 2024-08-14 17:08:55 -07:00
7e49888bec More eraseBetween optimizations 2024-08-14 16:40:29 -07:00
e64ebabced eraseBetween optimizations 2024-08-14 16:13:37 -07:00
1e34951a77 Fix use-of-uninit in eraseBetween (Node256) 2024-08-14 15:25:10 -07:00
baf64520d6 Have eraseBetween take in-tree node by reference 2024-08-14 15:04:11 -07:00
3499626127 Fix potential strict aliasing issues 2024-08-14 15:01:34 -07:00
b7f9084694 destroyTree -> eraseTree. Use freelist 2024-08-14 14:47:22 -07:00
107 changed files with 153 additions and 146 deletions

View File

@@ -195,7 +195,6 @@ struct Node {
/* end section that's copied to the next node */
uint8_t *partialKey();
size_t size() const;
Type getType() const { return type; }
int32_t getCapacity() const { return partialKeyCapacity; }
@@ -771,23 +770,6 @@ uint8_t *Node::partialKey() {
}
}
size_t Node::size() const {
switch (type) {
case Type_Node0:
return ((Node0 *)this)->size();
case Type_Node3:
return ((Node3 *)this)->size();
case Type_Node16:
return ((Node16 *)this)->size();
case Type_Node48:
return ((Node48 *)this)->size();
case Type_Node256:
return ((Node256 *)this)->size();
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
}
// A type that's plumbed along the check call tree. Lifetime ends after each
// check call.
struct ReadContext {
@@ -1715,55 +1697,55 @@ void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
}
}
void destroyTree(Node *root, WriteContext::Accum *accum) {
void eraseTree(Node *root, WriteContext *tls) {
Arena arena;
auto toFree = vector<Node *>(arena);
toFree.push_back(root);
#if SHOW_MEMORY
for (auto *iter = root; iter != nullptr; iter = nextPhysical(iter)) {
removeNode(iter);
removeKey(iter);
}
#endif
while (toFree.size() > 0) {
auto *n = toFree.back();
toFree.pop_back();
accum->entries_erased += n->entryPresent;
++accum->nodes_released;
tls->accum.entries_erased += n->entryPresent;
++tls->accum.nodes_released;
removeNode(n);
removeKey(n);
switch (n->getType()) {
case Type_Node0: {
auto *n0 = static_cast<Node0 *>(n);
tls->release(n0);
} break;
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
toFree.append(std::span<Node *>(n3->children, n3->numChildren));
tls->release(n3);
} break;
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
toFree.append(std::span<Node *>(n16->children, n16->numChildren));
tls->release(n16);
} break;
case Type_Node48: {
auto *n48 = static_cast<Node48 *>(n);
toFree.append(std::span<Node *>(n48->children, n48->numChildren));
tls->release(n48);
} break;
case Type_Node256: {
auto *n256 = static_cast<Node256 *>(n);
auto *out = toFree.unsafePrepareAppend(n256->numChildren).data();
n256->bitSet.forEachSet([&](int i) { *out++ = n256->children[i]; });
assert(out == toFree.end());
tls->release(n256);
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
removeNode(n);
safe_free(n, n->size());
}
}
void eraseBetween(Node3 *&n, int begin, int end, WriteContext *tls,
ConflictSet::Impl *impl) {
void eraseBetween(Node **inTree, Node3 *n, int begin, int end,
WriteContext *tls) {
const unsigned shiftUpperBound = end - begin;
const unsigned shiftAmount = begin;
auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
@@ -1772,7 +1754,7 @@ void eraseBetween(Node3 *&n, int begin, int end, WriteContext *tls,
InternalVersionT *maxVOut = n->childMaxVersion;
for (int i = 0; i < n->numChildren; ++i) {
if (inBounds(n->index[i])) {
destroyTree(n->children[i], &tls->accum);
eraseTree(n->children[i], tls);
} else {
*nodeOut++ = n->children[i];
*indexOut++ = n->index[i];
@@ -1784,144 +1766,165 @@ void eraseBetween(Node3 *&n, int begin, int end, WriteContext *tls,
if (n->numChildren == 0) {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
*inTree = newNode;
}
}
void eraseBetween(Node16 *&n, int begin, int end, WriteContext *tls,
ConflictSet::Impl *impl) {
void eraseBetween(Node **inTree, Node16 *n, int begin, int end,
WriteContext *tls) {
if (end - begin == 256) {
for (int i = 0; i < n->numChildren; ++i) {
eraseTree(n->children[i], tls);
}
n->numChildren = 0;
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
return;
}
assert(end - begin < 256);
#ifdef HAS_ARM_NEON
uint8x16_t indices;
memcpy(&indices, n->index, 16);
// 0xff for each in bounds
auto results =
vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin));
// 0xf for each 0xff
uint64_t mask = vget_lane_u64(
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
#elif defined(HAS_AVX)
__m128i indices;
memcpy(&indices, n->index, 16);
indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin));
uint32_t mask = ~_mm_movemask_epi8(_mm_cmpeq_epi8(
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
#else
const unsigned shiftUpperBound = end - begin;
const unsigned shiftAmount = begin;
auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
Node **nodeOut = n->children;
uint8_t *indexOut = n->index;
InternalVersionT *maxVOut = n->childMaxVersion;
for (int i = 0; i < n->numChildren; ++i) {
if (inBounds(n->index[i])) {
destroyTree(n->children[i], &tls->accum);
} else {
*nodeOut++ = n->children[i];
*indexOut++ = n->index[i];
*maxVOut++ = n->childMaxVersion[i];
}
uint32_t mask = 0;
for (int i = 0; i < 16; ++i) {
mask |= inBounds(is[i]) << i;
}
n->numChildren = nodeOut - n->children;
#endif
mask &= (decltype(mask)(1) << n->numChildren) - 1;
if (n->numChildren == 0) {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
} else if (n->numChildren <= Node3::kMaxNodes) {
if (!mask) {
return;
}
int first = std::countr_zero(mask);
int count = std::popcount(mask);
n->numChildren -= count;
for (int i = first; i < first + count; ++i) {
eraseTree(n->children[i], tls);
}
for (int i = first; i < n->numChildren; ++i) {
n->children[i] = n->children[i + count];
n->childMaxVersion[i] = n->childMaxVersion[i + count];
n->index[i] = n->index[i + count];
}
if (n->numChildren > Node3::kMaxNodes) {
// nop
} else if (n->numChildren > 0) {
auto *newNode = tls->allocate<Node3>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
*inTree = newNode;
} else {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
}
}
void eraseBetween(Node48 *&n, int begin, int end, WriteContext *tls,
ConflictSet::Impl *impl) {
const unsigned shiftUpperBound = end - begin;
const unsigned shiftAmount = begin;
auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
Node **nodeOut = n->children;
uint8_t *indexOut = n->reverseIndex;
InternalVersionT *maxVOut = n->childMaxVersion;
for (auto &v : n->maxOfMax) {
v = tls->zero;
}
n->bitSet = {};
memset(n->index, -1, sizeof(n->index));
n->nextFree = 0;
for (int i = 0; i < n->numChildren; ++i) {
if (inBounds(n->reverseIndex[i])) {
destroyTree(n->children[i], &tls->accum);
} else {
*nodeOut++ = n->children[i];
*indexOut++ = n->reverseIndex[i];
*maxVOut++ = n->childMaxVersion[i];
n->maxOfMax[i >> Node48::kMaxOfMaxShift] = std::max(
n->maxOfMax[i >> Node48::kMaxOfMaxShift], n->childMaxVersion[i]);
n->bitSet.set(n->reverseIndex[i]);
n->index[n->reverseIndex[i]] = n->nextFree++;
void eraseBetween(Node **inTree, Node48 *n, int begin, int end,
WriteContext *tls) {
for (int i = n->bitSet.firstSetGeq(begin); i >= 0 && i < end;
i = n->bitSet.firstSetGeq(i)) {
n->bitSet.reset(i);
int8_t toRemoveChildrenIndex = std::exchange(n->index[i], -1);
int8_t lastChildrenIndex = --n->nextFree;
assert(toRemoveChildrenIndex >= 0);
assert(lastChildrenIndex >= 0);
eraseTree(n->children[toRemoveChildrenIndex], tls);
if (toRemoveChildrenIndex != lastChildrenIndex) {
n->children[toRemoveChildrenIndex] = n->children[lastChildrenIndex];
n->childMaxVersion[toRemoveChildrenIndex] =
n->childMaxVersion[lastChildrenIndex];
n->maxOfMax[toRemoveChildrenIndex >> Node48::kMaxOfMaxShift] =
std::max(n->maxOfMax[toRemoveChildrenIndex >> Node48::kMaxOfMaxShift],
n->childMaxVersion[toRemoveChildrenIndex]);
auto parentIndex = n->children[toRemoveChildrenIndex]->parentsIndex;
n->index[parentIndex] = toRemoveChildrenIndex;
n->reverseIndex[toRemoveChildrenIndex] = parentIndex;
}
n->childMaxVersion[lastChildrenIndex] = tls->zero;
--n->numChildren;
}
n->numChildren = n->nextFree;
if (n->numChildren == 0) {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
} else if (n->numChildren <= Node3::kMaxNodes) {
auto *newNode = tls->allocate<Node3>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
} else if (n->numChildren <= Node16::kMaxNodes) {
if (n->numChildren > Node16::kMaxNodes) {
// nop
} else if (n->numChildren > Node3::kMaxNodes) {
auto *newNode = tls->allocate<Node16>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
*inTree = newNode;
} else if (n->numChildren > 0) {
auto *newNode = tls->allocate<Node3>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
} else {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
}
}
void eraseBetween(Node256 *&n, int begin, int end, WriteContext *tls,
ConflictSet::Impl *impl) {
const unsigned shiftUpperBound = end - begin;
const unsigned shiftAmount = begin;
auto inBounds = [&](unsigned c) { return c - shiftAmount < shiftUpperBound; };
n->numChildren = 0;
BitSet newBitSet;
n->bitSet.forEachSet([&](int i) {
if (inBounds(i)) {
destroyTree(std::exchange(n->children[i], nullptr), &tls->accum);
} else {
++n->numChildren;
newBitSet.set(i);
}
});
n->bitSet = newBitSet;
// Don't need to update childMaxVersion or maxOfMax because of monotonicity
if (n->numChildren == 0) {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
} else if (n->numChildren <= Node3::kMaxNodes) {
auto *newNode = tls->allocate<Node3>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
} else if (n->numChildren <= Node16::kMaxNodes) {
auto *newNode = tls->allocate<Node16>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
} else if (n->numChildren <= Node48::kMaxNodes) {
void eraseBetween(Node **inTree, Node256 *n, int begin, int end,
WriteContext *tls) {
for (int i = n->bitSet.firstSetGeq(begin); i >= 0 && i < end;
i = n->bitSet.firstSetGeq(i)) {
assert(n->children[i] != nullptr);
eraseTree(std::exchange(n->children[i], nullptr), tls);
n->bitSet.reset(i);
--n->numChildren;
}
if (n->numChildren > Node48::kMaxNodes) {
// nop
} else if (n->numChildren > Node16::kMaxNodes) {
auto *newNode = tls->allocate<Node48>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
getInTree(n, impl) = newNode;
tls->release(n);
(Node *&)n = newNode;
*inTree = newNode;
} else if (n->numChildren > Node3::kMaxNodes) {
auto *newNode = tls->allocate<Node16>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
} else if (n->numChildren > 0) {
auto *newNode = tls->allocate<Node3>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
} else {
auto *newNode = tls->allocate<Node0>(n->partialKeyLen);
newNode->copyChildrenAndKeyFrom(*n);
tls->release(n);
*inTree = newNode;
}
}
// Erase all nodes with a search path starting with n + [child],
// where child in [begin, end).
void eraseBetween(Node *&n, int begin, int end, WriteContext *tls,
ConflictSet::Impl *impl) {
void eraseBetween(Node *&n, int begin, int end, WriteContext *tls) {
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "eraseBetween: %s + [%d,%d)\n",
getSearchPathPrintable(n).c_str(), begin, end);
@@ -1930,16 +1933,16 @@ void eraseBetween(Node *&n, int begin, int end, WriteContext *tls,
case Type_Node0:
break;
case Type_Node3:
eraseBetween((Node3 *&)n, begin, end, tls, impl);
eraseBetween(&n, (Node3 *)n, begin, end, tls);
break;
case Type_Node16:
eraseBetween((Node16 *&)n, begin, end, tls, impl);
eraseBetween(&n, (Node16 *)n, begin, end, tls);
break;
case Type_Node48:
eraseBetween((Node48 *&)n, begin, end, tls, impl);
eraseBetween(&n, (Node48 *)n, begin, end, tls);
break;
case Type_Node256:
eraseBetween((Node256 *&)n, begin, end, tls, impl);
eraseBetween(&n, (Node256 *)n, begin, end, tls);
break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
@@ -3228,7 +3231,7 @@ void addPrefixWrite(Node *&root, std::span<const uint8_t> begin,
}
endNode->entry.rangeVersion = writeVersion;
eraseBetween(beginNode, 0, 256, tls, impl);
eraseBetween(getInTree(beginNode, impl), 0, 256, tls);
// Inserting end trashed endNode's maxVersion. Fix that
fixupMaxVersion(endNode, impl, tls);
@@ -3296,6 +3299,10 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
}
endNode->entry.rangeVersion = writeVersion;
if (!beginIsPrefix) {
eraseBetween(*useAsRoot, begin[0] + 1, end[0], tls);
}
for (beginNode = nextLogical(beginNode); beginNode != endNode;
beginNode = erase(beginNode, tls, impl, /*logical*/ true, endNode)) {
}
@@ -3412,7 +3419,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
if (oldestExtantVersion < writeVersion - kMaxCorrectVersionWindow)
[[unlikely]] {
if (writeVersion > newestVersionFullPrecision + kNominalVersionWindow) {
destroyTree(root, &tls.accum);
eraseTree(root, &tls);
init(writeVersion - kNominalVersionWindow);
}
@@ -3581,7 +3588,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
initMetrics();
}
~Impl() {
destroyTree(root, &tls.accum);
eraseTree(root, &tls);
safe_free(metrics, metricsCount * sizeof(metrics[0]));
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More