Compare commits

...

5 Commits

Author SHA1 Message Date
e4a77c88d8 Fix two canonicalization bugs 2024-06-14 21:51:59 -07:00
a63fd3970b Reworking the representation for the beginning of a clear range
Most tests pass. Some tests fail.
2024-06-14 19:13:51 -07:00
685b49c96d WIP adhoc test looks ok 2024-06-14 17:58:33 -07:00
8b22fbe261 Tinker with main 2024-06-14 17:19:02 -07:00
729fcdb616 Avoid some insertions in point clears 2024-06-14 16:00:08 -07:00
2 changed files with 183 additions and 124 deletions

View File

@@ -1,56 +1,40 @@
#pragma once
#include "VersionedMap.h"
#include <cassert>
#include <ctype.h>
#include <inttypes.h>
#include <stdio.h>
inline void printBinary(const weaselab::VersionedMap::Key k) {
for (int i = 0; i < k.len; ++i) {
auto c = k.p[i];
if (isprint(c)) {
printf("%c", c);
} else {
printf("x%02x", c);
}
}
}
inline void
printMutation(const weaselab::VersionedMap::Iterator::VersionedMutation &m) {
switch (m.type) {
case weaselab::VersionedMap::Set:
printf("set ");
for (int i = 0; i < m.param1Len; ++i) {
auto c = m.param1[i];
if (isprint(c)) {
printf("%c", c);
} else {
printf("x%02x", c);
}
}
printBinary({m.param1, m.param1Len});
printf(" -> '");
for (int i = 0; i < m.param2Len; ++i) {
auto c = m.param2[i];
if (isprint(c)) {
printf("%c", c);
} else {
printf("x%02x", c);
}
}
printBinary({m.param2, m.param2Len});
printf("' @ %" PRId64 "\n", m.notModifiedSince);
break;
case weaselab::VersionedMap::Clear:
printf("clear [");
for (int i = 0; i < m.param1Len; ++i) {
auto c = m.param1[i];
if (isprint(c)) {
printf("%c", c);
} else {
printf("x%02x", c);
}
}
printBinary({m.param1, m.param1Len});
printf(", ");
for (int i = 0; i < m.param2Len; ++i) {
auto c = m.param2[i];
if (isprint(c)) {
printf("%c", c);
} else {
printf("x%02x", c);
}
}
printBinary({m.param2, m.param2Len});
printf(") @ %" PRId64 "\n", m.notModifiedSince);
break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
default: // GCOVR_EXCL_LINE
assert(false); // GCOVR_EXCL_LINE
}
}

View File

@@ -1,6 +1,7 @@
#include "VersionedMap.h"
#include "Internal.h"
#include "KeyCompare.h"
#include "PrintMutation.h"
#include "RootSet.h"
#include <assert.h>
@@ -478,6 +479,13 @@ private:
int searchPathSize_;
};
VersionedMap::Key keyAfter(VersionedMap::Key k, Arena &arena) {
uint8_t *result = new (arena) uint8_t[k.len + 1];
memcpy(result, k.p, k.len);
result[k.len] = 0;
return {result, k.len + 1};
}
struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
// The last node is allowed to be 0, in which case this is the search path of
@@ -616,15 +624,17 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
}
}
// If `val` is set, then this is a point mutation at `latestVersion`.
// Otherwise it's the end of a range mutation at `latestVersion`.
// If `val` is true, then this is a point set at `latestVersion`.
// If `endRange` is true, then this is a range end marker at `latestVersion`.
// Otherwise it's the beginning of a range at `latestVersion`.
// `finger` is a valid finger to the insertion path of `key` in the latest
// version (which can be obtained with `search`)
void insert(Key key, std::optional<Val> val, Finger &finger) {
void insert(Key key, std::optional<Val> val, bool endRange, Finger &finger) {
const bool inserted = finger.backNode() == 0;
int64_t pointVersion, rangeVersion;
if (val.has_value()) {
// Point set
pointVersion = latestVersion;
if (inserted) {
Finger copy;
@@ -639,25 +649,40 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
auto *entry = mm.base[finger.backNode()].entry;
rangeVersion = entry->rangeVersion;
}
} else {
} else if (endRange) {
rangeVersion = latestVersion;
if (inserted) {
val = {nullptr, -1}; // Sentinel for "no point mutation here"
val = {nullptr, -1};
pointVersion = -1; // Sentinel for "no point mutation here"
#ifndef NDEBUG
// If we inserted this, there would be adjacent clears and so the
// range would not be canonical
Finger copy;
finger.copyTo(copy);
move<std::memory_order_relaxed, true>(copy, latestVersion);
if (copy.searchPathSize() == 0) {
pointVersion = -1; // Sentinel for "no mutation ending here"
} else {
pointVersion = mm.base[copy.backNode()].entry->rangeVersion;
}
assert(copy.searchPathSize() == 0 ||
mm.base[copy.backNode()].entry->rangeVersion < 0);
#endif
} else {
auto *entry = mm.base[finger.backNode()].entry;
val = {entry->getVal(), entry->valLen};
pointVersion = entry->pointVersion;
}
} else {
// Beginning of a clear range
pointVersion = -1; // Sentinel for "no point mutation here"
if (inserted) {
// If there were a clear range here, it wouldn't be canonical
rangeVersion = -1; // Sentinel for "no mutation ending here"
} else {
auto *entry = mm.base[finger.backNode()].entry;
rangeVersion = entry->rangeVersion;
}
val = {nullptr, -1};
}
// TODO check for noop?
// Prepare new node
const uint32_t node = newNode(
pointVersion, rangeVersion, key.p, key.len, val->p, val->len,
@@ -797,9 +822,9 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
int64_t getBytes() const { return totalMallocBytes + mm.getBytes(); }
void printInOrder(int64_t version);
void printInOrder(int64_t version) const;
void printInOrderHelper(int64_t version, uint32_t node, int depth);
void printInOrderHelper(int64_t version, uint32_t node, int depth) const;
int accumulatedFuel = 0;
@@ -885,79 +910,142 @@ struct __attribute__((__visibility__("hidden"))) VersionedMap::Impl {
// TODO tune?
scanAndRemoveOldEntries(2 * numMutations + 10);
Arena arena;
// TODO Improve ILP?
for (int i = 0; i < numMutations; ++i) {
const auto &m = mutations[i];
Finger iter;
switch (m.type) {
case Set: {
Finger iter;
search<std::memory_order_relaxed>({m.param1, m.param1Len}, latestRoot,
latestVersion, iter);
insert({m.param1, m.param1Len}, {{m.param2, m.param2Len}}, iter);
insert({m.param1, m.param1Len}, {{m.param2, m.param2Len}},
/*endRange*/ false, iter);
} break;
case Clear: {
// TODO we can avoid some insertions here. Complexity is getting out of
// hand though.
if (m.param2Len == 0) {
Finger iter;
search<std::memory_order_relaxed>({m.param1, m.param1Len}, latestRoot,
latestVersion, iter);
insert({m.param1, m.param1Len}, {{nullptr, -1}}, iter);
const bool found = iter.searchPathSize() > 0 && iter.backNode() != 0;
const bool engulfLeft = mm.base[iter.backNode()].entry->clearTo();
move<std::memory_order_relaxed, true>(iter, latestVersion);
const auto *next = iter.searchPathSize() > 0
? mm.base[iter.backNode()].entry
: nullptr;
if (engulfLeft && next && next->clearTo()) {
insert({next->getKey(), next->keyLen}, {}, iter);
move<std::memory_order_relaxed, false>(iter, latestVersion);
remove(iter);
bool engulfLeft = found && mm.base[iter.backNode()].entry->clearTo();
bool engulfRight = false;
const Entry *next;
Finger copy;
if (iter.searchPathSize() > 0) {
iter.copyTo(copy);
move<std::memory_order_relaxed, true>(copy, latestVersion);
next = copy.searchPathSize() > 0 ? mm.base[copy.backNode()].entry
: nullptr;
if (next && next->clearTo()) {
engulfRight = true;
if (!found) {
engulfLeft = true;
}
}
}
if (engulfLeft && engulfRight) {
insert({next->getKey(), next->keyLen}, {}, /*endRange*/ true, copy);
if (found) {
remove(iter);
}
} else if (engulfLeft) {
assert(found);
remove(iter);
insert(keyAfter({m.param1, m.param1Len}, arena), {},
/*endRange*/ true, iter);
} else if (engulfRight) {
insert({m.param1, m.param1Len}, {}, /*endRange*/ false, iter);
} else {
insert({m.param1, m.param1Len}, {{nullptr, -1}}, /*endRange*/ false,
iter);
}
} else {
// TODO ILP these
Finger begin;
search<std::memory_order_relaxed>({m.param1, m.param1Len}, latestRoot,
latestVersion, iter);
insert({m.param1, m.param1Len}, {{nullptr, -1}}, iter);
latestVersion, begin);
const bool foundBegin =
begin.searchPathSize() > 0 && begin.backNode() != 0;
Finger end;
search<std::memory_order_relaxed>({m.param2, m.param2Len}, latestRoot,
latestVersion, end);
const bool foundEnd = end.searchPathSize() > 0 && end.backNode() != 0;
// Check if we can engulf on the left
{
const auto *entry = mm.base[iter.backNode()].entry;
if (entry->clearTo()) {
remove(iter);
}
bool engulfLeft;
Finger copy;
begin.copyTo(copy);
move<std::memory_order_relaxed, true>(copy, latestVersion);
if (foundBegin) {
engulfLeft = begin.searchPathSize() > 0 &&
mm.base[begin.backNode()].entry->clearTo();
} else {
engulfLeft = copy.searchPathSize() > 0 &&
mm.base[copy.backNode()].entry->clearTo();
}
move<std::memory_order_relaxed, true>(iter, latestVersion);
while (iter.searchPathSize() > 0 &&
mm.base[iter.backNode()] < Key{m.param2, m.param2Len}) {
remove(iter);
move<std::memory_order_relaxed, true>(iter, latestVersion);
}
// TODO reuse finger? It should be one rank away from its insertion
// point
search<std::memory_order_relaxed>({m.param2, m.param2Len}, latestRoot,
latestVersion, iter);
insert({m.param2, m.param2Len}, {}, iter);
// Check if we can engulf on the right
{
const auto *entry = mm.base[iter.backNode()].entry;
move<std::memory_order_relaxed, true>(iter, latestVersion);
const auto *next = iter.searchPathSize() > 0
? mm.base[iter.backNode()].entry
bool engulfRight = false;
if (!foundEnd) {
end.copyTo(copy);
move<std::memory_order_relaxed, true>(copy, latestVersion);
const auto *next = copy.searchPathSize() > 0
? mm.base[copy.backNode()].entry
: nullptr;
if (entry->pointClear() && next && next->clearTo()) {
insert({next->getKey(), next->keyLen}, {}, iter);
move<std::memory_order_relaxed, false>(iter, latestVersion);
remove(iter);
engulfRight = next && next->clearTo();
}
if (engulfLeft && foundBegin) {
remove(begin);
} else if (!engulfLeft) {
insert({m.param1, m.param1Len}, {}, /*rangeEntry*/ false, begin);
}
move<std::memory_order_relaxed, true>(begin, latestVersion);
while (begin.searchPathSize() > 0 &&
mm.base[begin.backNode()] < Key{m.param2, m.param2Len}) {
remove(begin);
move<std::memory_order_relaxed, true>(begin, latestVersion);
}
#ifndef NDEBUG
if (foundEnd) {
[[maybe_unused]] bool beginEqEnd =
mm.base[begin.backNode()] <=> Key{m.param2, m.param2Len} == 0;
assert(beginEqEnd);
}
#endif
if (engulfRight) {
if (foundEnd) {
remove(begin);
move<std::memory_order_relaxed, true>(begin, latestVersion);
}
assert(begin.searchPathSize() > 0 && begin.backNode() != 0);
insert({mm.base[begin.backNode()].entry->getKey(),
mm.base[begin.backNode()].entry->keyLen},
{}, /*rangeEntry*/ true, begin);
} else {
if (!foundEnd) {
// TODO remove this search
search<std::memory_order_relaxed>(
{m.param2, m.param2Len}, latestRoot, latestVersion, begin);
}
insert({m.param2, m.param2Len}, {}, /*rangeEntry*/ true, begin);
}
}
} break;
default: // GCOVR_EXCL_LINE
assert(false); // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
default: // GCOVR_EXCL_LINE
assert(false); // GCOVR_EXCL_LINE
}
}
roots.add(latestRoot, latestVersion);
@@ -1139,18 +1227,17 @@ void materializeMutations(VersionedMap::Iterator::Impl *impl, const Entry *prev,
impl->mutations[impl->mutationCount++] = {
prev->getKey(),
entry.getKey(),
prev->pointClear() ? prev->keyLen : prev->keyLen + 1,
prev->pointSet() ? prev->keyLen + 1 : prev->keyLen,
entry.keyLen,
VersionedMap::Clear,
entry.rangeVersion};
}
if (entry.pointMutation()) {
if (entry.valLen < 0 /* pointClear */) {
if (next == nullptr || !next->clearTo()) {
impl->mutations[impl->mutationCount++] = {
entry.getKey(), nullptr, entry.keyLen, 0,
VersionedMap::Clear, entry.pointVersion};
}
assert(next == nullptr || !next->clearTo());
impl->mutations[impl->mutationCount++] = {
entry.getKey(), nullptr, entry.keyLen, 0,
VersionedMap::Clear, entry.pointVersion};
} else {
impl->mutations[impl->mutationCount++] = {
entry.getKey(), entry.getVal(), entry.keyLen,
@@ -1498,7 +1585,7 @@ int64_t VersionedMap::getBytes() const { return impl->getBytes(); }
inline
#endif
void
VersionedMap::Impl::printInOrder(int64_t version) {
VersionedMap::Impl::printInOrder(int64_t version) const {
printInOrderHelper(version,
roots.getThreadSafeHandle().rootForVersion(version), 0);
}
@@ -1508,7 +1595,7 @@ inline
#endif
void
VersionedMap::Impl::printInOrderHelper(int64_t version, uint32_t node,
int depth) {
int depth) const {
if (node == 0) {
return;
}
@@ -1519,16 +1606,22 @@ inline
printf(" ");
}
printf("node %u: ", node);
printf("%.*s", mm.base[node].entry->keyLen, mm.base[node].entry->getKey());
if (mm.base[node].entry->valLen >= 0) {
printf(" -> '%.*s' @ %" PRId64, mm.base[node].entry->valLen,
mm.base[node].entry->getVal(), mm.base[node].entry->pointVersion);
} else {
printBinary({mm.base[node].entry->getKey(), mm.base[node].entry->keyLen});
if (mm.base[node].entry->pointSet()) {
printf(" -> '");
printBinary({mm.base[node].entry->getVal(), mm.base[node].entry->valLen});
printf("' @ %" PRId64, mm.base[node].entry->pointVersion);
}
if (mm.base[node].entry->pointClear()) {
printf(" <cleared @ %" PRId64 ">", mm.base[node].entry->pointVersion);
}
if (mm.base[node].entry->clearTo()) {
printf(" <clearTo @ %" PRId64 ">", mm.base[node].entry->rangeVersion);
}
if (mm.base[node].entry->pointVersion < 0 &&
mm.base[node].entry->rangeVersion < 0) {
printf(" <noop>");
}
printf("\n");
VersionedMap::Impl::printInOrderHelper(
version, child<std::memory_order_relaxed>(node, false, version),
@@ -1556,48 +1649,30 @@ struct __attribute__((visibility("default"))) PeakPrinter {
#ifdef ENABLE_MAIN
#include <nanobench.h>
#include "PrintMutation.h"
void breakpoint_me() {}
int main() {
{
weaselab::VersionedMap versionedMap{0};
printf("Bytes: %" PRId64 "\n", versionedMap.getBytes());
{
weaselab::VersionedMap::Mutation m[] = {
{(const uint8_t *)"a", 1, nullptr, 0, weaselab::VersionedMap::Set},
{(const uint8_t *)"b", 1, nullptr, 0, weaselab::VersionedMap::Set},
{(const uint8_t *)"c", 1, nullptr, 0, weaselab::VersionedMap::Set},
{(const uint8_t *)"d", 1, nullptr, 0, weaselab::VersionedMap::Set},
{(const uint8_t *)"e", 1, nullptr, 0, weaselab::VersionedMap::Set},
{(const uint8_t *)"f", 1, nullptr, 0, weaselab::VersionedMap::Set},
{(const uint8_t *)"a", 1, (const uint8_t *)"b", 1,
weaselab::VersionedMap::Clear},
};
versionedMap.addMutations(m, sizeof(m) / sizeof(m[0]), 1);
}
printf("Bytes: %" PRId64 "\n", versionedMap.getBytes());
{
weaselab::VersionedMap::Mutation m[] = {
{(const uint8_t *)"a", 1, (const uint8_t *)"d", 1,
weaselab::VersionedMap::Clear},
{(const uint8_t *)"b", 1, nullptr, 0, weaselab::VersionedMap::Clear},
};
versionedMap.addMutations(m, sizeof(m) / sizeof(m[0]), 2);
}
{
weaselab::VersionedMap::Mutation m[] = {
{(const uint8_t *)"b", 1, (const uint8_t *)"", 0,
weaselab::VersionedMap::Clear},
};
versionedMap.addMutations(m, sizeof(m) / sizeof(m[0]), 3);
}
const int64_t v = 3;
const int64_t v = versionedMap.getVersion();
cast(versionedMap)->printInOrder(v);
weaselab::VersionedMap::Key k = {(const uint8_t *)"a", 2};
weaselab::VersionedMap::Iterator iter;
versionedMap.firstGeq(&k, &v, &iter, 1);
printf("Bytes: %" PRId64 "\n", versionedMap.getBytes());
versionedMap.setOldestVersion(2);
printf("Bytes: %" PRId64 "\n", versionedMap.getBytes());
breakpoint_me();
for (auto end = versionedMap.end(v); iter != end; ++iter) {
printMutation(*iter);