29 Commits

Author SHA1 Message Date
5a132799a4 Add cycles_total
Some checks failed
Tests / Clang total: 2840, passed: 2840
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Debug total: 2838, passed: 2838
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-08-15 15:13:00 -07:00
72469ebb6e Erase along left spine. Not faster 2024-08-15 15:07:44 -07:00
6c79847a42 Add instructions_total for linux 2024-08-15 15:06:53 -07:00
405a2ca161 Fix typo 2024-08-15 13:52:51 -07:00
f93466316a Pass in-tree reference to mergeWithChild 2024-08-15 13:52:06 -07:00
5626cd09d9 Add to corpus 2024-08-15 11:50:04 -07:00
41840220c3 Optimize version handling in mergeWithChild 2024-08-15 11:49:13 -07:00
7ff00e7846 Extract mergeWithChild to function 2024-08-15 11:40:52 -07:00
6242f40d48 Require that eraseBetween leave at least one child or entryPresent 2024-08-15 11:37:36 -07:00
403d70a1d3 Prefer not copying node in eraseBetween
If numChildren + entryPresent is enough, we don't have to copy even if
it would fit in a smaller node.

If we have to copy, we might as well use the smallest acceptable node
type.
2024-08-15 11:33:16 -07:00
9763452713 Separate beginIsPrefix path and simplify slightly 2024-08-15 11:29:15 -07:00
73d0593fca Remove separate prefix write codepath for now 2024-08-14 21:29:43 -07:00
23c2a3e1c6 SIMD for eraseBetween (Node16)
Some checks failed
Tests / Clang total: 2688, passed: 2688
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Debug total: 2686, passed: 2686
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-08-14 18:12:46 -07:00
a64e792964 Remove unused function 2024-08-14 17:40:04 -07:00
5e362d5330 Add to corpus 2024-08-14 17:37:18 -07:00
cc526cb6ba Call eraseBetween on useAsRoot in addWriteRange 2024-08-14 17:08:55 -07:00
7e49888bec More eraseBetween optimizations 2024-08-14 16:40:29 -07:00
e64ebabced eraseBetween optimizations 2024-08-14 16:13:37 -07:00
1e34951a77 Fix use-of-uninit in eraseBetween (Node256) 2024-08-14 15:25:10 -07:00
baf64520d6 Have eraseBetween take in-tree node by reference 2024-08-14 15:04:11 -07:00
3499626127 Fix potential strict aliasing issues 2024-08-14 15:01:34 -07:00
b7f9084694 destroyTree -> eraseTree. Use freelist 2024-08-14 14:47:22 -07:00
4b82502946 Accept node by ref for eraseBetween again 2024-08-14 14:43:19 -07:00
68bbacb69a Use getInTree in eraseBetween 2024-08-14 14:43:19 -07:00
3078845673 Fix nodes_released accounting 2024-08-14 14:43:19 -07:00
43f6126cc4 Add a missing assert, call to removeNode 2024-08-14 14:43:19 -07:00
b911d87d55 eraseBetween bug fixes 2024-08-14 14:43:19 -07:00
0c65a82b78 Separate codepath for prefix writes
Uses the newly-added eraseBetween
2024-08-14 14:43:19 -07:00
e024cb8291 Track entriesErased in destroyTree 2024-08-14 14:43:19 -07:00
260 changed files with 842 additions and 526 deletions

View File

@@ -361,7 +361,21 @@ void benchWorstCaseForRadixRangeRead() {
void benchCreateAndDestroy() {
ankerl::nanobench::Bench bench;
bench.run("create and destroy", [&]() { ConflictSet cs{0}; });
bench.run("create and destroy", [&]() {
ConflictSet cs{0};
ConflictSet::WriteRange w;
uint8_t b[9];
b[8] = 0;
for (int64_t i = 0; i < 1000; i += 7) {
auto x = __builtin_bswap64(i);
memcpy(b, &x, 8);
w.begin.p = b;
w.begin.len = 8;
w.end.len = 0;
w.end.p = b;
cs.addWrites(&w, 1, 1);
}
});
}
int main(void) {

View File

@@ -276,15 +276,9 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
find_program(VALGRIND_EXE valgrind)
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
list(LENGTH CORPUS_TESTS len)
math(EXPR last "${len} - 1")
set(partition_size 100)
foreach(i RANGE 0 ${last} ${partition_size})
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
add_test(NAME conflict_set_blackbox_valgrind_${i}
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
$<TARGET_FILE:driver> ${partition})
endforeach()
add_test(NAME conflict_set_blackbox_valgrind
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
$<TARGET_FILE:driver> ${CORPUS_TESTS})
endif()
# api smoke tests

File diff suppressed because it is too large Load Diff

View File

@@ -748,10 +748,7 @@ struct TestDriver {
fprintf(stderr, "%p Set oldest version: %" PRId64 "\n", this,
oldestVersion);
#endif
CALLGRIND_START_INSTRUMENTATION;
cs.setOldestVersion(oldestVersion);
CALLGRIND_STOP_INSTRUMENTATION;
if constexpr (kEnableAssertions) {
refImpl.setOldestVersion(oldestVersion);
}

View File

@@ -24,15 +24,16 @@ Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-3
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 11.18 | 89,455,125.34 | 0.6% | 185.37 | 57.08 | 3.248 | 41.51 | 0.4% | 0.01 | `point reads`
| 14.53 | 68,800,688.89 | 0.4% | 282.41 | 74.80 | 3.776 | 55.06 | 0.3% | 0.01 | `prefix reads`
| 36.54 | 27,367,576.87 | 0.2% | 798.06 | 188.90 | 4.225 | 141.69 | 0.2% | 0.01 | `range reads`
| 16.69 | 59,912,106.02 | 0.6% | 314.57 | 86.29 | 3.645 | 39.84 | 0.4% | 0.01 | `point writes`
| 30.09 | 33,235,744.07 | 0.5% | 591.33 | 155.92 | 3.793 | 82.69 | 0.2% | 0.01 | `prefix writes`
| 35.77 | 27,956,388.03 | 1.4% | 682.25 | 187.63 | 3.636 | 96.12 | 0.1% | 0.01 | `range writes`
| 74.04 | 13,505,408.41 | 2.7% | 1,448.95 | 392.10 | 3.695 | 260.53 | 0.1% | 0.01 | `monotonic increasing point writes`
| 330,984.50 | 3,021.29 | 1.9% | 3,994,153.50 | 1,667,309.00 | 2.396 | 806,019.50 | 0.0% | 0.01 | `worst case for radix tree`
| 92.46 | 10,814,961.65 | 0.5% | 1,800.00 | 463.41 | 3.884 | 297.00 | 0.0% | 0.01 | `create and destroy`
| 10.80 | 92,600,541.52 | 0.6% | 180.38 | 54.49 | 3.310 | 41.51 | 0.4% | 0.01 | `point reads`
| 15.00 | 66,687,691.68 | 0.4% | 278.44 | 76.44 | 3.642 | 55.56 | 0.3% | 0.01 | `prefix reads`
| 36.81 | 27,163,394.61 | 0.4% | 795.06 | 187.91 | 4.231 | 142.67 | 0.2% | 0.01 | `range reads`
| 18.14 | 55,137,674.01 | 1.2% | 338.19 | 92.86 | 3.642 | 42.81 | 0.4% | 0.01 | `point writes`
| 33.19 | 30,127,119.71 | 0.1% | 681.03 | 170.05 | 4.005 | 98.68 | 0.2% | 0.01 | `prefix writes`
| 37.37 | 26,759,432.70 | 1.9% | 779.70 | 195.45 | 3.989 | 114.21 | 0.0% | 0.01 | `range writes`
| 74.36 | 13,448,582.47 | 1.9% | 1,425.68 | 389.08 | 3.664 | 258.88 | 0.1% | 0.01 | `monotonic increasing point writes`
| 316,928.00 | 3,155.29 | 1.5% | 3,992,986.00 | 1,699,813.00 | 2.349 | 806,226.50 | 0.0% | 0.01 | `worst case for radix tree`
| 75.26 | 13,286,517.16 | 0.5% | 1,590.01 | 386.67 | 4.112 | 258.00 | 0.0% | 0.01 | `create and destroy`
# "Real data" test
@@ -47,7 +48,7 @@ Check: 4.47891 seconds, 364.05 MB/s, Add: 4.55599 seconds, 123.058 MB/s, Gc rati
## radix tree
```
Check: 0.953012 seconds, 1710.94 MB/s, Add: 1.30025 seconds, 431.188 MB/s, Gc ratio: 43.9816%, Peak idle memory: 2.28375e+06
Check: 0.910234 seconds, 1791.35 MB/s, Add: 1.25908 seconds, 445.287 MB/s, Gc ratio: 44.0415%
```
## hash table

View File

@@ -6,15 +6,11 @@
#include <string.h>
#include <string>
#include <string_view>
#include <sys/ioctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <thread>
#include <unistd.h>
#include <utility>
#include <vector>
#include "ConflictSet.h"
#include "third_party/nadeau.h"
@@ -24,8 +20,8 @@ std::atomic<int64_t> transactions;
constexpr int kBaseSearchDepth = 32;
constexpr int kWindowSize = 10000000;
std::string numToKey(int64_t num) {
std::string result;
std::basic_string<uint8_t> numToKey(int64_t num) {
std::basic_string<uint8_t> result;
result.resize(kBaseSearchDepth + sizeof(int64_t));
memset(result.data(), 0, kBaseSearchDepth);
int64_t be = __builtin_bswap64(num);
@@ -45,13 +41,13 @@ void workload(weaselab::ConflictSet *cs) {
auto pointK = numToKey(pointRv);
weaselab::ConflictSet::ReadRange reads[] = {
{
{(const uint8_t *)pointK.data(), int(pointK.size())},
{pointK.data(), int(pointK.size())},
{nullptr, 0},
pointRv,
},
{
{(const uint8_t *)beginK.data(), int(beginK.size())},
{(const uint8_t *)endK.data(), int(endK.size())},
{beginK.data(), int(beginK.size())},
{endK.data(), int(endK.size())},
version - 2,
},
};
@@ -70,7 +66,7 @@ void workload(weaselab::ConflictSet *cs) {
{
weaselab::ConflictSet::WriteRange w;
auto k = numToKey(version);
w.begin.p = (const uint8_t *)k.data();
w.begin.p = k.data();
w.end.len = 0;
if (version % (kWindowSize / 2) == 0) {
for (int l = 0; l <= k.size(); ++l) {
@@ -83,9 +79,9 @@ void workload(weaselab::ConflictSet *cs) {
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
auto b = numToKey(beginN);
auto e = numToKey(beginN + 1000);
w.begin.p = (const uint8_t *)b.data();
w.begin.p = b.data();
w.begin.len = b.size();
w.end.p = (const uint8_t *)e.data();
w.end.p = e.data();
w.end.len = e.size();
cs->addWrites(&w, 1, version);
}
@@ -168,29 +164,36 @@ double toSeconds(timeval t) {
return double(t.tv_sec) + double(t.tv_usec) * 1e-6;
}
#ifdef __linux__
#include <linux/perf_event.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#ifdef __linux__
struct PerfCounter {
PerfCounter(int type, int config, const std::string &labels = {},
int groupLeaderFd = -1)
: labels(labels) {
explicit PerfCounter(int event) {
struct perf_event_attr pe;
memset(&pe, 0, sizeof(pe));
pe.type = type;
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(pe);
pe.config = config;
pe.config = event;
pe.inherit = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, groupLeaderFd, 0);
if (fd < 0 && errno != ENOENT && errno != EINVAL) {
perror(labels.c_str());
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
}
int64_t total() const {
int64_t total() {
int64_t count;
if (read(fd, &count, sizeof(count)) != sizeof(count)) {
perror("read instructions from perf");
@@ -199,27 +202,10 @@ struct PerfCounter {
return count;
}
PerfCounter(PerfCounter &&other)
: fd(std::exchange(other.fd, -1)), labels(std::move(other.labels)) {}
PerfCounter &operator=(PerfCounter &&other) {
fd = std::exchange(other.fd, -1);
labels = std::move(other.labels);
return *this;
}
~PerfCounter() {
if (fd >= 0) {
close(fd);
}
}
bool ok() const { return fd >= 0; }
const std::string &getLabels() const { return labels; }
int getFd() const { return fd; }
~PerfCounter() { close(fd); }
private:
int fd;
std::string labels;
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags) {
int ret;
@@ -228,6 +214,11 @@ private:
return ret;
}
};
#else
struct PerfCounter {
explicit PerPerfCounter(int) {}
int64_t total() { return 0; }
};
#endif
int main(int argc, char **argv) {
@@ -242,50 +233,8 @@ int main(int argc, char **argv) {
int metricsCount;
cs.getMetricsV1(&metrics, &metricsCount);
#ifdef __linux__
PerfCounter instructions{PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS};
PerfCounter cycles{PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, "",
instructions.getFd()};
std::vector<PerfCounter> cacheCounters;
for (auto [id, idStr] : std::initializer_list<std::pair<int, std::string>>{
{PERF_COUNT_HW_CACHE_L1D, "l1d"},
{PERF_COUNT_HW_CACHE_L1I, "l1i"},
{PERF_COUNT_HW_CACHE_LL, "ll"},
{PERF_COUNT_HW_CACHE_DTLB, "dtlb"},
{PERF_COUNT_HW_CACHE_ITLB, "itlb"},
{PERF_COUNT_HW_CACHE_BPU, "bpu"},
{PERF_COUNT_HW_CACHE_NODE, "node"},
}) {
for (auto [op, opStr] :
std::initializer_list<std::pair<int, std::string>>{
{PERF_COUNT_HW_CACHE_OP_READ, "read"},
{PERF_COUNT_HW_CACHE_OP_WRITE, "write"},
{PERF_COUNT_HW_CACHE_OP_PREFETCH, "prefetch"},
}) {
int groupLeaderFd = -1;
for (auto [result, resultStr] :
std::initializer_list<std::pair<int, std::string>>{
{PERF_COUNT_HW_CACHE_RESULT_MISS, "miss"},
{PERF_COUNT_HW_CACHE_RESULT_ACCESS, "access"},
}) {
auto labels = "{id=\"" + idStr + "\", op=\"" + opStr +
"\", result=\"" + resultStr + "\"}";
cacheCounters.emplace_back(PERF_TYPE_HW_CACHE,
id | (op << 8) | (result << 16), labels,
groupLeaderFd);
if (!cacheCounters.back().ok()) {
cacheCounters.pop_back();
} else {
if (groupLeaderFd == -1) {
groupLeaderFd = cacheCounters.back().getFd();
}
}
}
}
}
#endif
PerfCounter instructions{PERF_COUNT_HW_INSTRUCTIONS};
PerfCounter cycles{PERF_COUNT_HW_CPU_CYCLES};
auto w = std::thread{workload, &cs};
for (;;) {
@@ -313,7 +262,6 @@ int main(int argc, char **argv) {
"transactions_total ";
body += std::to_string(transactions.load(std::memory_order_relaxed));
body += "\n";
#ifdef __linux__
body += "# HELP instructions_total Total number of instructions\n"
"# TYPE instructions_total counter\n"
"instructions_total ";
@@ -324,13 +272,6 @@ int main(int argc, char **argv) {
"cycles_total ";
body += std::to_string(cycles.total());
body += "\n";
body += "# HELP cache_event_total Total number of cache events\n"
"# TYPE cache_event_total counter\n";
for (const auto &counter : cacheCounters) {
body += "cache_event_total" + counter.getLabels() + " " +
std::to_string(counter.total()) + "\n";
}
#endif
for (int i = 0; i < metricsCount; ++i) {
body += "# HELP ";

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More