Compare commits
51 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5a132799a4 | |||
| 72469ebb6e | |||
| 6c79847a42 | |||
| 405a2ca161 | |||
| f93466316a | |||
| 5626cd09d9 | |||
| 41840220c3 | |||
| 7ff00e7846 | |||
| 6242f40d48 | |||
| 403d70a1d3 | |||
| 9763452713 | |||
| 73d0593fca | |||
| 23c2a3e1c6 | |||
| a64e792964 | |||
| 5e362d5330 | |||
| cc526cb6ba | |||
| 7e49888bec | |||
| e64ebabced | |||
| 1e34951a77 | |||
| baf64520d6 | |||
| 3499626127 | |||
| b7f9084694 | |||
| 4b82502946 | |||
| 68bbacb69a | |||
| 3078845673 | |||
| 43f6126cc4 | |||
| b911d87d55 | |||
| 0c65a82b78 | |||
| e024cb8291 | |||
| 0740dcad43 | |||
| 176df61321 | |||
| 0a850f22e9 | |||
| 479b39d055 | |||
| 482408d725 | |||
| 45995e3307 | |||
| 359b0b29ff | |||
| 54e47ebd40 | |||
| 1c9dda68a6 | |||
| 142455dd28 | |||
| 567d385fbd | |||
| 8a44055533 | |||
| 62516825d1 | |||
| 3d592bd6a9 | |||
| f5f5fb620b | |||
| e3d1b2e842 | |||
| 9f8800af16 | |||
| 182c065c8e | |||
| 2dba0d5be3 | |||
| a1dfdf355c | |||
| 15919cb1c4 | |||
| 5ed9003a83 |
@@ -361,7 +361,21 @@ void benchWorstCaseForRadixRangeRead() {
|
||||
void benchCreateAndDestroy() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
|
||||
bench.run("create and destroy", [&]() { ConflictSet cs{0}; });
|
||||
bench.run("create and destroy", [&]() {
|
||||
ConflictSet cs{0};
|
||||
ConflictSet::WriteRange w;
|
||||
uint8_t b[9];
|
||||
b[8] = 0;
|
||||
for (int64_t i = 0; i < 1000; i += 7) {
|
||||
auto x = __builtin_bswap64(i);
|
||||
memcpy(b, &x, 8);
|
||||
w.begin.p = b;
|
||||
w.begin.len = 8;
|
||||
w.end.len = 0;
|
||||
w.end.p = b;
|
||||
cs.addWrites(&w, 1, 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(
|
||||
conflict-set
|
||||
VERSION 0.0.11
|
||||
VERSION 0.0.12
|
||||
DESCRIPTION
|
||||
"A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys."
|
||||
HOMEPAGE_URL "https://git.weaselab.dev/weaselab/conflict-set"
|
||||
|
||||
+642
-486
File diff suppressed because it is too large
Load Diff
+10
@@ -273,6 +273,16 @@ template <class T> struct Vector {
|
||||
size_ += slice.size();
|
||||
}
|
||||
|
||||
// Caller must write to the returned slice
|
||||
std::span<T> unsafePrepareAppend(int appendSize) {
|
||||
if (size_ + appendSize > capacity) {
|
||||
grow(std::max<int>(size_ + appendSize, capacity * 2));
|
||||
}
|
||||
auto result = std::span<T>(t + size_, appendSize);
|
||||
size_ += appendSize;
|
||||
return result;
|
||||
}
|
||||
|
||||
void push_back(const T &t) { append(std::span<const T>(&t, 1)); }
|
||||
|
||||
T *begin() { return t; }
|
||||
|
||||
Vendored
+10
-7
@@ -117,15 +117,18 @@ pipeline {
|
||||
}
|
||||
}
|
||||
steps {
|
||||
script {
|
||||
filter_args = "-f ConflictSet.cpp -f LongestCommonPrefix.h"
|
||||
}
|
||||
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_FLAGS=--coverage -DCMAKE_CXX_FLAGS=--coverage -DCMAKE_BUILD_TYPE=Debug -DDISABLE_TSAN=ON")
|
||||
sh '''
|
||||
gcovr -f ConflictSet.cpp --cobertura > build/coverage.xml
|
||||
'''
|
||||
sh """
|
||||
gcovr ${filter_args} --cobertura > build/coverage.xml
|
||||
"""
|
||||
recordCoverage qualityGates: [[criticality: 'NOTE', metric: 'MODULE']], tools: [[parser: 'COBERTURA', pattern: 'build/coverage.xml']]
|
||||
sh '''
|
||||
gcovr -f ConflictSet.cpp
|
||||
gcovr -f ConflictSet.cpp --fail-under-line 100 > /dev/null
|
||||
'''
|
||||
sh """
|
||||
gcovr ${filter_args}
|
||||
gcovr ${filter_args} --fail-under-line 100 > /dev/null
|
||||
"""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <bit>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef HAS_AVX
|
||||
#include <immintrin.h>
|
||||
#elif HAS_ARM_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#ifndef __SANITIZE_THREAD__
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#define __SANITIZE_THREAD__
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(HAS_AVX) || defined(HAS_ARM_NEON)
|
||||
constexpr int kStride = 64;
|
||||
#else
|
||||
constexpr int kStride = 16;
|
||||
#endif
|
||||
|
||||
constexpr int kUnrollFactor = 4;
|
||||
|
||||
inline bool compareStride(const uint8_t *ap, const uint8_t *bp) {
|
||||
#if defined(HAS_ARM_NEON)
|
||||
static_assert(kStride == 64);
|
||||
uint8x16_t x[4]; // GCOVR_EXCL_LINE
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
x[i] = vceqq_u8(vld1q_u8(ap + i * 16), vld1q_u8(bp + i * 16));
|
||||
}
|
||||
auto results = vreinterpretq_u16_u8(
|
||||
vandq_u8(vandq_u8(x[0], x[1]), vandq_u8(x[2], x[3])));
|
||||
bool eq = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) ==
|
||||
uint64_t(-1);
|
||||
#elif defined(HAS_AVX)
|
||||
static_assert(kStride == 64);
|
||||
__m128i x[4]; // GCOVR_EXCL_LINE
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
x[i] = _mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)(ap + i * 16)),
|
||||
_mm_loadu_si128((__m128i *)(bp + i * 16)));
|
||||
}
|
||||
auto eq =
|
||||
_mm_movemask_epi8(_mm_and_si128(_mm_and_si128(x[0], x[1]),
|
||||
_mm_and_si128(x[2], x[3]))) == 0xffff;
|
||||
#else
|
||||
// Hope it gets vectorized
|
||||
auto eq = memcmp(ap, bp, kStride) == 0;
|
||||
#endif
|
||||
return eq;
|
||||
}
|
||||
|
||||
// Precondition: ap[:kStride] != bp[:kStride]
|
||||
inline int firstNeqStride(const uint8_t *ap, const uint8_t *bp) {
|
||||
#if defined(HAS_AVX)
|
||||
static_assert(kStride == 64);
|
||||
uint64_t c[kStride / 16]; // GCOVR_EXCL_LINE
|
||||
for (int i = 0; i < kStride; i += 16) {
|
||||
const auto a = _mm_loadu_si128((__m128i *)(ap + i));
|
||||
const auto b = _mm_loadu_si128((__m128i *)(bp + i));
|
||||
const auto compared = _mm_cmpeq_epi8(a, b);
|
||||
c[i / 16] = _mm_movemask_epi8(compared) & 0xffff;
|
||||
}
|
||||
return std::countr_zero(~(c[0] | c[1] << 16 | c[2] << 32 | c[3] << 48));
|
||||
#elif defined(HAS_ARM_NEON)
|
||||
static_assert(kStride == 64);
|
||||
for (int i = 0; i < kStride; i += 16) {
|
||||
// 0xff for each match
|
||||
uint16x8_t results =
|
||||
vreinterpretq_u16_u8(vceqq_u8(vld1q_u8(ap + i), vld1q_u8(bp + i)));
|
||||
// 0xf for each mismatch
|
||||
uint64_t bitfield =
|
||||
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
||||
if (bitfield) {
|
||||
return i + (std::countr_zero(bitfield) >> 2);
|
||||
}
|
||||
}
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
#else
|
||||
int i = 0;
|
||||
for (; i < kStride - 1; ++i) {
|
||||
if (*ap++ != *bp++) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
#endif
|
||||
}
|
||||
|
||||
// This gets covered in local development
|
||||
// GCOVR_EXCL_START
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f,avx512bw"))) inline int
|
||||
longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
||||
int i = 0;
|
||||
int end = cl & ~63;
|
||||
while (i < end) {
|
||||
const uint64_t eq =
|
||||
_mm512_cmpeq_epi8_mask(_mm512_loadu_epi8(ap), _mm512_loadu_epi8(bp));
|
||||
if (eq != uint64_t(-1)) {
|
||||
return i + std::countr_one(eq);
|
||||
}
|
||||
i += 64;
|
||||
ap += 64;
|
||||
bp += 64;
|
||||
}
|
||||
if (i < cl) {
|
||||
const uint64_t mask = (uint64_t(1) << (cl - i)) - 1;
|
||||
const uint64_t eq = _mm512_cmpeq_epi8_mask(
|
||||
_mm512_maskz_loadu_epi8(mask, ap), _mm512_maskz_loadu_epi8(mask, bp));
|
||||
return i + std::countr_one(eq & mask);
|
||||
}
|
||||
assert(i == cl);
|
||||
return i;
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
// GCOVR_EXCL_STOP
|
||||
|
||||
inline int
|
||||
longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
||||
if (!(cl >= 0)) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
int end;
|
||||
|
||||
// kStride * kUnrollCount at a time
|
||||
end = cl & ~(kStride * kUnrollFactor - 1);
|
||||
while (i < end) {
|
||||
for (int j = 0; j < kUnrollFactor; ++j) {
|
||||
if (!compareStride(ap, bp)) {
|
||||
return i + firstNeqStride(ap, bp);
|
||||
}
|
||||
i += kStride;
|
||||
ap += kStride;
|
||||
bp += kStride;
|
||||
}
|
||||
}
|
||||
|
||||
// kStride at a time
|
||||
end = cl & ~(kStride - 1);
|
||||
while (i < end) {
|
||||
if (!compareStride(ap, bp)) {
|
||||
return i + firstNeqStride(ap, bp);
|
||||
}
|
||||
i += kStride;
|
||||
ap += kStride;
|
||||
bp += kStride;
|
||||
}
|
||||
|
||||
// word at a time
|
||||
end = cl & ~(sizeof(uint64_t) - 1);
|
||||
while (i < end) {
|
||||
uint64_t a; // GCOVR_EXCL_LINE
|
||||
uint64_t b; // GCOVR_EXCL_LINE
|
||||
memcpy(&a, ap, 8);
|
||||
memcpy(&b, bp, 8);
|
||||
const auto mismatched = a ^ b;
|
||||
if (mismatched) {
|
||||
return i + std::countr_zero(mismatched) / 8;
|
||||
}
|
||||
i += 8;
|
||||
ap += 8;
|
||||
bp += 8;
|
||||
}
|
||||
|
||||
// byte at a time
|
||||
while (i < cl) {
|
||||
if (*ap != *bp) {
|
||||
break;
|
||||
}
|
||||
++ap;
|
||||
++bp;
|
||||
++i;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
@@ -24,15 +24,16 @@ Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-3
|
||||
|
||||
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
||||
| 12.42 | 80,500,398.66 | 0.8% | 180.38 | 61.57 | 2.930 | 41.51 | 0.4% | 0.01 | `point reads`
|
||||
| 15.17 | 65,917,580.99 | 0.2% | 279.47 | 74.95 | 3.729 | 55.54 | 0.3% | 0.01 | `prefix reads`
|
||||
| 38.16 | 26,202,393.91 | 0.1% | 803.07 | 189.13 | 4.246 | 141.68 | 0.2% | 0.01 | `range reads`
|
||||
| 20.20 | 49,504,615.44 | 0.4% | 363.00 | 100.35 | 3.617 | 49.81 | 0.3% | 0.01 | `point writes`
|
||||
| 41.99 | 23,816,559.99 | 0.3% | 799.27 | 209.63 | 3.813 | 154.32 | 0.1% | 0.01 | `prefix writes`
|
||||
| 46.28 | 21,607,605.88 | 1.5% | 953.79 | 231.47 | 4.121 | 168.34 | 0.0% | 0.01 | `range writes`
|
||||
| 80.99 | 12,347,449.98 | 0.9% | 1,501.97 | 406.50 | 3.695 | 281.89 | 0.1% | 0.01 | `monotonic increasing point writes`
|
||||
| 318,010.00 | 3,144.56 | 1.0% | 3,994,511.50 | 1,657,831.50 | 2.409 | 805,969.50 | 0.0% | 0.01 | `worst case for radix tree`
|
||||
| 75.85 | 13,183,612.56 | 0.5% | 1,590.01 | 385.64 | 4.123 | 258.00 | 0.0% | 0.01 | `create and destroy`
|
||||
| 10.80 | 92,600,541.52 | 0.6% | 180.38 | 54.49 | 3.310 | 41.51 | 0.4% | 0.01 | `point reads`
|
||||
| 15.00 | 66,687,691.68 | 0.4% | 278.44 | 76.44 | 3.642 | 55.56 | 0.3% | 0.01 | `prefix reads`
|
||||
| 36.81 | 27,163,394.61 | 0.4% | 795.06 | 187.91 | 4.231 | 142.67 | 0.2% | 0.01 | `range reads`
|
||||
| 18.14 | 55,137,674.01 | 1.2% | 338.19 | 92.86 | 3.642 | 42.81 | 0.4% | 0.01 | `point writes`
|
||||
| 33.19 | 30,127,119.71 | 0.1% | 681.03 | 170.05 | 4.005 | 98.68 | 0.2% | 0.01 | `prefix writes`
|
||||
| 37.37 | 26,759,432.70 | 1.9% | 779.70 | 195.45 | 3.989 | 114.21 | 0.0% | 0.01 | `range writes`
|
||||
| 74.36 | 13,448,582.47 | 1.9% | 1,425.68 | 389.08 | 3.664 | 258.88 | 0.1% | 0.01 | `monotonic increasing point writes`
|
||||
| 316,928.00 | 3,155.29 | 1.5% | 3,992,986.00 | 1,699,813.00 | 2.349 | 806,226.50 | 0.0% | 0.01 | `worst case for radix tree`
|
||||
| 75.26 | 13,286,517.16 | 0.5% | 1,590.01 | 386.67 | 4.112 | 258.00 | 0.0% | 0.01 | `create and destroy`
|
||||
|
||||
|
||||
# "Real data" test
|
||||
|
||||
@@ -47,7 +48,7 @@ Check: 4.47891 seconds, 364.05 MB/s, Add: 4.55599 seconds, 123.058 MB/s, Gc rati
|
||||
## radix tree
|
||||
|
||||
```
|
||||
Check: 0.963721 seconds, 1691.93 MB/s, Add: 1.3288 seconds, 421.924 MB/s, Gc ratio: 42.8819%
|
||||
Check: 0.910234 seconds, 1791.35 MB/s, Add: 1.25908 seconds, 445.287 MB/s, Gc ratio: 44.0415%
|
||||
```
|
||||
|
||||
## hash table
|
||||
|
||||
@@ -76,6 +76,14 @@ void workload(weaselab::ConflictSet *cs) {
|
||||
} else {
|
||||
w.begin.len = k.size();
|
||||
cs->addWrites(&w, 1, version);
|
||||
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
|
||||
auto b = numToKey(beginN);
|
||||
auto e = numToKey(beginN + 1000);
|
||||
w.begin.p = b.data();
|
||||
w.begin.len = b.size();
|
||||
w.end.p = e.data();
|
||||
w.end.len = e.size();
|
||||
cs->addWrites(&w, 1, version);
|
||||
}
|
||||
}
|
||||
// GC
|
||||
@@ -156,6 +164,63 @@ double toSeconds(timeval t) {
|
||||
return double(t.tv_sec) + double(t.tv_usec) * 1e-6;
|
||||
}
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef __linux__
|
||||
struct PerfCounter {
|
||||
explicit PerfCounter(int event) {
|
||||
struct perf_event_attr pe;
|
||||
|
||||
memset(&pe, 0, sizeof(pe));
|
||||
pe.type = PERF_TYPE_HARDWARE;
|
||||
pe.size = sizeof(pe);
|
||||
pe.config = event;
|
||||
pe.inherit = 1;
|
||||
pe.exclude_kernel = 1;
|
||||
pe.exclude_hv = 1;
|
||||
|
||||
fd = perf_event_open(&pe, 0, -1, -1, 0);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx\n", pe.config);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int64_t total() {
|
||||
int64_t count;
|
||||
if (read(fd, &count, sizeof(count)) != sizeof(count)) {
|
||||
perror("read instructions from perf");
|
||||
abort();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
~PerfCounter() { close(fd); }
|
||||
|
||||
private:
|
||||
int fd;
|
||||
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags) {
|
||||
int ret;
|
||||
|
||||
ret = syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
#else
|
||||
struct PerfCounter {
|
||||
explicit PerPerfCounter(int) {}
|
||||
int64_t total() { return 0; }
|
||||
};
|
||||
#endif
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc != 3) {
|
||||
goto fail;
|
||||
@@ -168,6 +233,8 @@ int main(int argc, char **argv) {
|
||||
int metricsCount;
|
||||
cs.getMetricsV1(&metrics, &metricsCount);
|
||||
|
||||
PerfCounter instructions{PERF_COUNT_HW_INSTRUCTIONS};
|
||||
PerfCounter cycles{PERF_COUNT_HW_CPU_CYCLES};
|
||||
auto w = std::thread{workload, &cs};
|
||||
|
||||
for (;;) {
|
||||
@@ -195,6 +262,16 @@ int main(int argc, char **argv) {
|
||||
"transactions_total ";
|
||||
body += std::to_string(transactions.load(std::memory_order_relaxed));
|
||||
body += "\n";
|
||||
body += "# HELP instructions_total Total number of instructions\n"
|
||||
"# TYPE instructions_total counter\n"
|
||||
"instructions_total ";
|
||||
body += std::to_string(instructions.total());
|
||||
body += "\n";
|
||||
body += "# HELP cycles_total Total number of cycles\n"
|
||||
"# TYPE cycles_total counter\n"
|
||||
"cycles_total ";
|
||||
body += std::to_string(cycles.total());
|
||||
body += "\n";
|
||||
|
||||
for (int i = 0; i < metricsCount; ++i) {
|
||||
body += "# HELP ";
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
:ゥゥゥゥゥゥゥゥゥゥ:::::
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user