Compare commits
68 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 55e23bafba | |||
| 235938b5aa | |||
| 8b71852495 | |||
| e5e6402b43 | |||
| 61f5612e1f | |||
| 406b27936c | |||
| 7972ed919b | |||
| 0619b6325c | |||
| 7b14c8f9d5 | |||
| 22632fc9f2 | |||
| 1fccb65bd8 | |||
| ee5972f946 | |||
| 7166811387 | |||
| d68f208d9b | |||
| 81323972aa | |||
| 8694ba8b6a | |||
| 0cea5565b5 | |||
| 972f16ed8f | |||
| 2412684316 | |||
| 8190d2f24e | |||
| 8251631087 | |||
| 90fb2a9542 | |||
| 7c01f8ba0f | |||
| 0df2db7f8a | |||
| 5e975f3b2b | |||
| bcbae026b2 | |||
| e125b599b5 | |||
| 3f4d3b685a | |||
| 4198b8b090 | |||
| 8757d2387c | |||
| 4a22b95d53 | |||
| 03d6c7e471 | |||
| ceecc62a63 | |||
| 80f0697e79 | |||
| ce23d3995c | |||
| 6f899e063b | |||
| e5b9c03e77 | |||
| a158d375f5 | |||
| ee5a84cd7b | |||
| 33f14e3d9b | |||
| 77262ee2d3 | |||
| 9945998e05 | |||
| 2777e016ff | |||
| 661ffcd843 | |||
| 3a34d3cecb | |||
| 189c73e3bd | |||
| 35987030fc | |||
| 0621741ec3 | |||
| f5ec9f726a | |||
| 552fc11c5d | |||
| 71ace9cc55 | |||
| bcf459304f | |||
| f403c78410 | |||
| 08958d4109 | |||
| dcc5275ec9 | |||
| c5ef843f9e | |||
| b78e817e24 | |||
| 9c82f17e20 | |||
| 665a9313a4 | |||
| 6e66202d5e | |||
| a92271a205 | |||
| 0dbfb4deae | |||
| 6e229b6b36 | |||
| 2200de11c8 | |||
| b37feb58dd | |||
| 94a4802824 | |||
| 707dbdb391 | |||
| bdd343bb57 |
@@ -57,6 +57,7 @@ ConflictSet::ReadRange prefixRange(Arena &arena, TrivialSpan key) {
|
||||
|
||||
void benchConflictSet() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.minEpochIterations(10000);
|
||||
ConflictSet cs{0};
|
||||
|
||||
bench.batch(kOpsPerTx);
|
||||
|
||||
+40
-16
@@ -31,11 +31,20 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
"MinSizeRel" "RelWithDebInfo")
|
||||
endif()
|
||||
|
||||
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
|
||||
-Werror=switch-enum -fPIC)
|
||||
add_compile_options(
|
||||
# -Werror=switch-enum
|
||||
-Wswitch-enum -Wunused-variable -fPIC -fdata-sections -ffunction-sections
|
||||
-fno-jump-tables # https://github.com/llvm/llvm-project/issues/54247
|
||||
)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
add_link_options("-Wno-unused-command-line-argument")
|
||||
find_program(LLVM_OBJCOPY llvm-objcopy)
|
||||
if(LLVM_OBJCOPY)
|
||||
set(CMAKE_OBJCOPY
|
||||
${LLVM_OBJCOPY}
|
||||
CACHE FILEPATH "path to objcopy binary" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
@@ -56,6 +65,22 @@ if(HAS_FULL_RELRO)
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL
|
||||
arm64)
|
||||
add_compile_options(-mbranch-protection=standard)
|
||||
else()
|
||||
add_compile_options(-fcf-protection)
|
||||
set(rewrite_endbr_flags "-fuse-ld=mold;LINKER:-z,rewrite-endbr")
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS ${rewrite_endbr_flags})
|
||||
check_cxx_source_compiles("int main(){}" HAS_REWRITE_ENDBR FAIL_REGEX
|
||||
"warning:")
|
||||
if(HAS_REWRITE_ENDBR)
|
||||
add_link_options(${rewrite_endbr_flags})
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
endif()
|
||||
|
||||
set(version_script_flags
|
||||
LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/linker.map)
|
||||
cmake_push_check_state()
|
||||
@@ -73,27 +98,17 @@ option(DISABLE_TSAN "Disable TSAN" OFF)
|
||||
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
||||
include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/third_party/valgrind)
|
||||
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>)
|
||||
|
||||
if(APPLE)
|
||||
add_link_options(-Wl,-dead_strip)
|
||||
else()
|
||||
add_link_options(-Wl,--gc-sections)
|
||||
endif()
|
||||
|
||||
if(NOT USE_SIMD_FALLBACK)
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
|
||||
check_include_file_cxx("immintrin.h" HAS_AVX)
|
||||
if(HAS_AVX)
|
||||
if(USE_SIMD_FALLBACK)
|
||||
add_compile_definitions(USE_SIMD_FALLBACK)
|
||||
else()
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
|
||||
add_compile_options(-mavx)
|
||||
add_compile_definitions(HAS_AVX)
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
|
||||
check_include_file_cxx("arm_neon.h" HAS_ARM_NEON)
|
||||
if(HAS_ARM_NEON)
|
||||
add_compile_definitions(HAS_ARM_NEON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -356,6 +371,15 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
${symbol_imports})
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
find_program(HARDENING_CHECK hardening-check)
|
||||
if(HARDENING_CHECK)
|
||||
add_test(NAME hardening_check
|
||||
COMMAND ${HARDENING_CHECK} $<TARGET_FILE:${PROJECT_NAME}>
|
||||
--nofortify --nostackprotector)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# bench
|
||||
add_executable(conflict_set_bench Bench.cpp)
|
||||
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME} nanobench)
|
||||
|
||||
+1356
-1084
File diff suppressed because it is too large
Load Diff
@@ -13,12 +13,14 @@ RUN TZ=America/Los_Angeles DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
ccache \
|
||||
cmake \
|
||||
curl \
|
||||
devscripts \
|
||||
g++-aarch64-linux-gnu \
|
||||
gcovr \
|
||||
git \
|
||||
gnupg \
|
||||
libc6-dbg \
|
||||
lsb-release \
|
||||
mold \
|
||||
ninja-build \
|
||||
pre-commit \
|
||||
python3-requests \
|
||||
|
||||
-18
@@ -18,7 +18,6 @@ using namespace weaselab;
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#include <callgrind.h>
|
||||
@@ -368,23 +367,6 @@ template <class T, class C = std::less<T>> auto set(Arena &arena) {
|
||||
return Set<T, C>(ArenaAlloc<T>(&arena));
|
||||
}
|
||||
|
||||
template <class T> struct MyHash;
|
||||
|
||||
template <class T> struct MyHash<T *> {
|
||||
size_t operator()(const T *t) const noexcept {
|
||||
size_t result;
|
||||
memcpy(&result, &t, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
using HashSet =
|
||||
std::unordered_set<T, MyHash<T>, std::equal_to<T>, ArenaAlloc<T>>;
|
||||
template <class T> auto hashSet(Arena &arena) {
|
||||
return HashSet<T>(ArenaAlloc<T>(&arena));
|
||||
}
|
||||
|
||||
template <class T, class U>
|
||||
bool operator==(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
|
||||
return lhs.arena == rhs.arena;
|
||||
|
||||
Vendored
+2
-2
@@ -91,7 +91,7 @@ pipeline {
|
||||
minio bucket: 'jenkins', credentialsId: 'jenkins-minio', excludes: '', host: 'minio.weaselab.dev', includes: 'build/*.deb,build/*.rpm,paper/*.pdf', targetFolder: '${JOB_NAME}/${BUILD_NUMBER}/${STAGE_NAME}/'
|
||||
}
|
||||
}
|
||||
stage('Release [gcc]') {
|
||||
stage('gcc') {
|
||||
agent {
|
||||
dockerfile {
|
||||
args '-v /home/jenkins/ccache:/ccache'
|
||||
@@ -99,7 +99,7 @@ pipeline {
|
||||
}
|
||||
}
|
||||
steps {
|
||||
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_FLAGS=-DNVALGRIND")
|
||||
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++")
|
||||
recordIssues(tools: [gcc()])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,14 @@ Intended as an alternative to FoundationDB's skip list.
|
||||
|
||||
Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-34-34-89 1.35V RAM.
|
||||
|
||||
Compiler is `Ubuntu clang version 20.0.0 (++20241029082144+7544d3af0e28-1~exp1~20241029082307.506)`.
|
||||
```
|
||||
$ clang++ --version
|
||||
|
||||
Ubuntu clang version 20.0.0 (++20241120082228+86734c857724-1~exp1~20241120202359.554)
|
||||
Target: x86_64-pc-linux-gnu
|
||||
Thread model: posix
|
||||
InstalledDir: /usr/lib/llvm-20/bin
|
||||
```
|
||||
|
||||
# Microbenchmark
|
||||
|
||||
@@ -12,44 +19,45 @@ Compiler is `Ubuntu clang version 20.0.0 (++20241029082144+7544d3af0e28-1~exp1~2
|
||||
|
||||
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
||||
| 159.65 | 6,263,576.52 | 1.6% | 2,972.36 | 820.37 | 3.623 | 504.59 | 0.0% | 0.01 | `point reads`
|
||||
| 156.32 | 6,397,320.65 | 0.7% | 2,913.62 | 806.87 | 3.611 | 490.19 | 0.0% | 0.01 | `prefix reads`
|
||||
| 229.18 | 4,363,293.65 | 1.2% | 3,541.05 | 1,219.75 | 2.903 | 629.33 | 0.0% | 0.01 | `range reads`
|
||||
| 363.37 | 2,752,026.30 | 0.3% | 5,273.63 | 1,951.54 | 2.702 | 851.66 | 1.7% | 0.01 | `point writes`
|
||||
| 364.99 | 2,739,787.02 | 0.3% | 5,250.92 | 1,958.54 | 2.681 | 839.24 | 1.7% | 0.01 | `prefix writes`
|
||||
| 242.26 | 4,127,796.58 | 2.9% | 3,117.33 | 1,304.41 | 2.390 | 541.07 | 2.8% | 0.02 | `range writes`
|
||||
| 562.48 | 1,777,855.27 | 0.8% | 7,305.21 | 3,034.34 | 2.408 | 1,329.30 | 1.3% | 0.01 | `monotonic increasing point writes`
|
||||
| 122,688.57 | 8,150.72 | 0.7% | 798,766.00 | 666,842.00 | 1.198 | 144,584.50 | 0.1% | 0.01 | `worst case for radix tree`
|
||||
| 41.71 | 23,976,459.34 | 1.7% | 885.00 | 219.17 | 4.038 | 132.00 | 0.0% | 0.01 | `create and destroy`
|
||||
| 161.29 | 6,200,056.17 | 0.1% | 3,014.03 | 831.04 | 3.627 | 504.59 | 0.0% | 1.93 | `point reads`
|
||||
| 158.32 | 6,316,160.64 | 0.1% | 2,954.16 | 815.80 | 3.621 | 490.17 | 0.0% | 1.89 | `prefix reads`
|
||||
| 237.39 | 4,212,409.50 | 0.2% | 3,592.41 | 1,233.96 | 2.911 | 629.31 | 0.0% | 2.84 | `range reads`
|
||||
| 442.11 | 2,261,878.94 | 0.0% | 4,450.57 | 2,314.25 | 1.923 | 707.92 | 2.1% | 5.28 | `point writes`
|
||||
| 439.89 | 2,273,308.53 | 0.1% | 4,410.22 | 2,302.29 | 1.916 | 694.74 | 2.1% | 5.25 | `prefix writes`
|
||||
| 290.96 | 3,436,936.78 | 0.0% | 2,315.38 | 1,528.68 | 1.515 | 396.69 | 3.3% | 3.49 | `range writes`
|
||||
| 476.93 | 2,096,762.02 | 0.6% | 6,999.33 | 2,484.94 | 2.817 | 1,251.73 | 1.3% | 0.06 | `monotonic increasing point writes`
|
||||
| 131,736.57 | 7,590.91 | 1.1% | 807,444.50 | 704,941.71 | 1.145 | 144,584.60 | 0.9% | 0.01 | `worst case for radix tree`
|
||||
| 45.50 | 21,978,369.95 | 1.1% | 902.00 | 232.36 | 3.882 | 132.00 | 0.0% | 0.01 | `create and destroy`
|
||||
|
||||
## Radix tree (this implementation)
|
||||
|
||||
|
||||
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
||||
| 12.63 | 79,186,868.18 | 1.4% | 241.61 | 64.76 | 3.731 | 31.64 | 0.8% | 0.01 | `point reads`
|
||||
| 14.48 | 69,078,073.40 | 0.3% | 292.42 | 74.69 | 3.915 | 41.49 | 0.5% | 0.01 | `prefix reads`
|
||||
| 34.37 | 29,094,694.11 | 0.2% | 759.53 | 179.77 | 4.225 | 100.38 | 0.2% | 0.01 | `range reads`
|
||||
| 19.34 | 51,713,896.36 | 0.7% | 369.70 | 101.81 | 3.631 | 47.88 | 0.6% | 0.01 | `point writes`
|
||||
| 39.16 | 25,538,968.61 | 0.2% | 653.16 | 206.77 | 3.159 | 89.62 | 0.8% | 0.01 | `prefix writes`
|
||||
| 40.58 | 24,642,681.12 | 4.7% | 718.44 | 216.44 | 3.319 | 99.28 | 0.6% | 0.01 | `range writes`
|
||||
| 78.77 | 12,694,520.69 | 3.8% | 1,395.55 | 421.73 | 3.309 | 249.81 | 0.1% | 0.01 | `monotonic increasing point writes`
|
||||
| 287,760.50 | 3,475.11 | 0.5% | 3,929,266.50 | 1,550,225.50 | 2.535 | 639,064.00 | 0.0% | 0.01 | `worst case for radix tree`
|
||||
| 104.76 | 9,545,250.65 | 3.1% | 2,000.00 | 552.82 | 3.618 | 342.00 | 0.0% | 0.01 | `create and destroy`
|
||||
| 12.36 | 80,885,626.43 | 0.2% | 243.56 | 63.62 | 3.828 | 31.07 | 0.6% | 0.15 | `point reads`
|
||||
| 14.18 | 70,502,196.81 | 0.1% | 297.72 | 73.13 | 4.071 | 40.31 | 0.5% | 0.17 | `prefix reads`
|
||||
| 33.44 | 29,901,623.04 | 0.1% | 767.90 | 172.42 | 4.454 | 101.32 | 0.2% | 0.40 | `range reads`
|
||||
| 19.48 | 51,342,564.70 | 0.3% | 374.45 | 100.43 | 3.728 | 48.92 | 0.5% | 0.23 | `point writes`
|
||||
| 37.46 | 26,694,471.44 | 0.1% | 672.00 | 193.14 | 3.479 | 101.28 | 0.3% | 0.45 | `prefix writes`
|
||||
| 38.78 | 25,784,784.34 | 0.0% | 738.26 | 199.93 | 3.693 | 111.59 | 0.1% | 0.47 | `range writes`
|
||||
| 76.05 | 13,148,995.74 | 0.7% | 1,450.77 | 397.16 | 3.653 | 275.72 | 0.0% | 0.01 | `monotonic increasing point writes`
|
||||
| 286,920.33 | 3,485.29 | 0.4% | 4,117,948.00 | 1,521,352.00 | 2.707 | 714,833.00 | 0.1% | 0.01 | `worst case for radix tree`
|
||||
| 95.66 | 10,453,798.72 | 0.5% | 1,986.00 | 495.04 | 4.012 | 315.00 | 0.0% | 0.01 | `create and destroy`
|
||||
|
||||
# "Real data" test
|
||||
|
||||
Point queries only, best of three runs. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better.
|
||||
Point queries only. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better.
|
||||
|
||||
## skip list
|
||||
|
||||
```
|
||||
Check: 4.39702 seconds, 370.83 MB/s, Add: 4.50025 seconds, 124.583 MB/s, Gc ratio: 29.1333%, Peak idle memory: 5.51852e+06
|
||||
Check: 4.53508 seconds, 371.81 MB/s, Add: 3.81222 seconds, 150.919 MB/s, Gc ratio: 33.66%, Peak idle memory: 5.61007e+06
|
||||
```
|
||||
|
||||
## radix tree
|
||||
|
||||
```
|
||||
Check: 0.987757 seconds, 1650.76 MB/s, Add: 1.24815 seconds, 449.186 MB/s, Gc ratio: 41.4675%, Peak idle memory: 2.02872e+06
|
||||
Check: 0.957735 seconds, 1760.6 MB/s, Add: 1.19942 seconds, 479.678 MB/s, Gc ratio: 38.6069%, Peak idle memory: 2.05667e+06
|
||||
```
|
||||
|
||||
## hash table
|
||||
@@ -57,5 +65,6 @@ Check: 0.987757 seconds, 1650.76 MB/s, Add: 1.24815 seconds, 449.186 MB/s, Gc ra
|
||||
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
|
||||
|
||||
```
|
||||
Check: 0.84256 seconds, 1935.23 MB/s, Add: 0.697204 seconds, 804.146 MB/s, Gc ratio: 35.4091%
|
||||
Check: 0.804598 seconds, 2095.69 MB/s, Add: 0.671221 seconds, 857.147 MB/s, Gc ratio: 35.0034%, Peak idle memory: 0
|
||||
```
|
||||
|
||||
|
||||
+4
-4
@@ -133,10 +133,10 @@ int main(int argc, const char **argv) {
|
||||
int metricsCount;
|
||||
cs.getMetricsV1(&metrics, &metricsCount);
|
||||
for (int i = 0; i < metricsCount; ++i) {
|
||||
printf("# HELP %s %s\n", metrics[i].name, metrics[i].help);
|
||||
printf("# TYPE %s %s\n", metrics[i].name,
|
||||
metrics[i].type == metrics[i].Counter ? "counter" : "gauge");
|
||||
printf("%s %g\n", metrics[i].name, metrics[i].getValue());
|
||||
fprintf(stderr, "# HELP %s %s\n", metrics[i].name, metrics[i].help);
|
||||
fprintf(stderr, "# TYPE %s %s\n", metrics[i].name,
|
||||
metrics[i].type == metrics[i].Counter ? "counter" : "gauge");
|
||||
fprintf(stderr, "%s %g\n", metrics[i].name, metrics[i].getValue());
|
||||
}
|
||||
|
||||
printf("Check: %g seconds, %g MB/s, Add: %g seconds, %g MB/s, Gc ratio: "
|
||||
|
||||
+174
-27
@@ -23,6 +23,97 @@
|
||||
#include "Internal.h"
|
||||
#include "third_party/nadeau.h"
|
||||
|
||||
constexpr int kCacheLine = 64; // TODO mac m1 is 128
|
||||
|
||||
template <class T> struct TxQueue {
|
||||
|
||||
explicit TxQueue(int lgSlotCount)
|
||||
: slotCount(1 << lgSlotCount), slotCountMask(slotCount - 1),
|
||||
slots(new T[slotCount]) {
|
||||
// Otherwise we can't tell the difference between full and empty.
|
||||
assert(!(slotCountMask & 0x80000000));
|
||||
}
|
||||
|
||||
/// Call from producer thread, after ensuring consumer is no longer accessing
|
||||
/// it somehow
|
||||
~TxQueue() { delete[] slots; }
|
||||
|
||||
/// Must be called from the producer thread
|
||||
void push(T t) {
|
||||
if (wouldBlock()) {
|
||||
// Wait for pops to change and try again
|
||||
consumer.pops.wait(producer.lastPopRead, std::memory_order_relaxed);
|
||||
producer.lastPopRead = consumer.pops.load(std::memory_order_acquire);
|
||||
}
|
||||
slots[producer.pushesNonAtomic++ & slotCountMask] = std::move(t);
|
||||
// seq_cst so that the notify can't be ordered before the store
|
||||
producer.pushes.store(producer.pushesNonAtomic, std::memory_order_seq_cst);
|
||||
// We have to notify every time, since we don't know if this is the last
|
||||
// push ever
|
||||
producer.pushes.notify_one();
|
||||
}
|
||||
|
||||
/// Must be called from the producer thread
|
||||
uint32_t outstanding() {
|
||||
return producer.pushesNonAtomic -
|
||||
consumer.pops.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
/// Returns true if a call to push might block. Must be called from the
|
||||
/// producer thread.
|
||||
bool wouldBlock() {
|
||||
// See if we can determine that overflow won't happen entirely from state
|
||||
// local to the producer
|
||||
if (producer.pushesNonAtomic - producer.lastPopRead == slotCount - 1) {
|
||||
// Re-read pops with memory order
|
||||
producer.lastPopRead = consumer.pops.load(std::memory_order_acquire);
|
||||
return producer.pushesNonAtomic - producer.lastPopRead == slotCount - 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Valid until the next pop, or until this queue is destroyed.
|
||||
T *pop() {
|
||||
// See if we can determine that there's an entry we can pop entirely from
|
||||
// state local to the consumer
|
||||
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
|
||||
// Re-read pushes with memory order and try again
|
||||
consumer.lastPushRead = producer.pushes.load(std::memory_order_acquire);
|
||||
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
|
||||
// Wait for pushes to change and try again
|
||||
producer.pushes.wait(consumer.lastPushRead, std::memory_order_relaxed);
|
||||
consumer.lastPushRead = producer.pushes.load(std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
auto result = &slots[consumer.popsNonAtomic++ & slotCountMask];
|
||||
// We only have to write pops with memory order if we've run out of items.
|
||||
// We know that we'll eventually run out.
|
||||
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
|
||||
// seq_cst so that the notify can't be ordered before the store
|
||||
consumer.pops.store(consumer.popsNonAtomic, std::memory_order_seq_cst);
|
||||
consumer.pops.notify_one();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
const uint32_t slotCount;
|
||||
const uint32_t slotCountMask;
|
||||
T *slots;
|
||||
struct alignas(kCacheLine) ProducerState {
|
||||
std::atomic<uint32_t> pushes{0};
|
||||
uint32_t pushesNonAtomic{0};
|
||||
uint32_t lastPopRead{0};
|
||||
};
|
||||
struct alignas(kCacheLine) ConsumerState {
|
||||
std::atomic<uint32_t> pops{0};
|
||||
uint32_t popsNonAtomic{0};
|
||||
uint32_t lastPushRead{0};
|
||||
};
|
||||
ProducerState producer;
|
||||
ConsumerState consumer;
|
||||
};
|
||||
|
||||
std::atomic<int64_t> transactions;
|
||||
|
||||
int64_t safeUnaryMinus(int64_t x) {
|
||||
@@ -47,13 +138,17 @@ void tupleAppend(std::string &output, int64_t value) {
|
||||
|
||||
void tupleAppend(std::string &output, std::string_view value) {
|
||||
output.push_back('\x02');
|
||||
for (auto c : value) {
|
||||
if (c == '\x00') {
|
||||
output.push_back('\x00');
|
||||
output.push_back('\xff');
|
||||
} else {
|
||||
output.push_back(c);
|
||||
if (memchr(value.data(), '\x00', value.size()) != nullptr) {
|
||||
for (auto c : value) {
|
||||
if (c == '\x00') {
|
||||
output.push_back('\x00');
|
||||
output.push_back('\xff');
|
||||
} else {
|
||||
output.push_back(c);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
output.insert(output.end(), value.begin(), value.end());
|
||||
}
|
||||
output.push_back('\x00');
|
||||
}
|
||||
@@ -64,35 +159,71 @@ template <class... Ts> std::string tupleKey(const Ts &...ts) {
|
||||
return result;
|
||||
}
|
||||
|
||||
constexpr int kWindowSize = 300000;
|
||||
constexpr int kTotalKeyRange = 1'000'000'000;
|
||||
constexpr int kWindowSize = 1'000'000;
|
||||
constexpr int kNumReadKeysPerTx = 5;
|
||||
constexpr int kNumWriteKeysPerTx = 10;
|
||||
|
||||
void workload(weaselab::ConflictSet *cs) {
|
||||
int64_t version = kWindowSize;
|
||||
constexpr int kNumWrites = 16;
|
||||
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
|
||||
struct Transaction {
|
||||
std::vector<std::string> keys;
|
||||
std::vector<weaselab::ConflictSet::ReadRange> reads;
|
||||
std::vector<weaselab::ConflictSet::WriteRange> writes;
|
||||
int64_t version;
|
||||
int64_t oldestVersion;
|
||||
Transaction() = default;
|
||||
explicit Transaction(int64_t version)
|
||||
: version(version), oldestVersion(version - kWindowSize) {
|
||||
std::vector<int64_t> keyIndices;
|
||||
for (int i = 0; i < kNumWrites; ++i) {
|
||||
keyIndices.push_back(rand() % 100'000'000);
|
||||
for (int i = 0; i < std::max(kNumReadKeysPerTx, kNumWriteKeysPerTx); ++i) {
|
||||
keyIndices.push_back(rand() % kTotalKeyRange);
|
||||
}
|
||||
std::sort(keyIndices.begin(), keyIndices.end());
|
||||
std::vector<std::string> keys;
|
||||
std::vector<weaselab::ConflictSet::WriteRange> writes;
|
||||
constexpr std::string_view suffix = "this is a suffix";
|
||||
for (int i = 0; i < kNumWrites; ++i) {
|
||||
keys.push_back(tupleKey(0x100, i, keyIndices[i],
|
||||
suffix.substr(0, rand() % suffix.size()),
|
||||
rand()));
|
||||
constexpr std::string_view fullString =
|
||||
"this is a string, where a prefix of it is used as an element of the "
|
||||
"tuple forming the key";
|
||||
for (int i = 0; i < int(keyIndices.size()); ++i) {
|
||||
keys.push_back(
|
||||
tupleKey(0x100, keyIndices[i] / fullString.size(),
|
||||
fullString.substr(0, keyIndices[i] % fullString.size())));
|
||||
// printf("%s\n", printable(keys.back()).c_str());
|
||||
}
|
||||
for (int i = 0; i < kNumWrites; ++i) {
|
||||
for (int i = 0; i < kNumWriteKeysPerTx; ++i) {
|
||||
writes.push_back({{(const uint8_t *)keys[i].data(), int(keys[i].size())},
|
||||
{nullptr, 0}});
|
||||
}
|
||||
cs->addWrites(writes.data(), writes.size(), version);
|
||||
cs->setOldestVersion(version - kWindowSize);
|
||||
++version;
|
||||
reads.push_back({{(const uint8_t *)keys[0].data(), int(keys[0].size())},
|
||||
{(const uint8_t *)keys[1].data(), int(keys[1].size())},
|
||||
version - std::min(10, kWindowSize)});
|
||||
static_assert(kNumReadKeysPerTx >= 3);
|
||||
for (int i = 2; i < kNumReadKeysPerTx; ++i) {
|
||||
reads.push_back({{(const uint8_t *)keys[i].data(), int(keys[i].size())},
|
||||
{nullptr, 0},
|
||||
version - kWindowSize});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Transaction(Transaction &&) = default;
|
||||
Transaction &operator=(Transaction &&) = default;
|
||||
Transaction(Transaction const &) = delete;
|
||||
Transaction const &operator=(Transaction const &) = delete;
|
||||
};
|
||||
|
||||
struct Resolver {
|
||||
|
||||
void resolve(const weaselab::ConflictSet::ReadRange *reads, int readCount,
|
||||
const weaselab::ConflictSet::WriteRange *writes, int writeCount,
|
||||
int64_t newVersion, int64_t newOldestVersion) {
|
||||
results.resize(readCount);
|
||||
cs.check(reads, results.data(), readCount);
|
||||
cs.addWrites(writes, writeCount, newVersion);
|
||||
cs.setOldestVersion(newOldestVersion);
|
||||
}
|
||||
|
||||
ConflictSet cs{0};
|
||||
|
||||
private:
|
||||
std::vector<weaselab::ConflictSet::Result> results;
|
||||
};
|
||||
|
||||
// Adapted from getaddrinfo man page
|
||||
int getListenFd(const char *node, const char *service) {
|
||||
@@ -235,7 +366,8 @@ int main(int argc, char **argv) {
|
||||
{
|
||||
int listenFd = getListenFd(argv[1], argv[2]);
|
||||
|
||||
weaselab::ConflictSet cs{0};
|
||||
Resolver resolver;
|
||||
auto &cs = resolver.cs;
|
||||
weaselab::ConflictSet::MetricsV1 *metrics;
|
||||
int metricsCount;
|
||||
cs.getMetricsV1(&metrics, &metricsCount);
|
||||
@@ -284,7 +416,22 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
#endif
|
||||
|
||||
auto w = std::thread{workload, &cs};
|
||||
TxQueue<Transaction> queue{10};
|
||||
|
||||
auto workloadThread = std::thread{[&]() {
|
||||
for (int64_t version = kWindowSize;;
|
||||
++version, transactions.fetch_add(1, std::memory_order_relaxed)) {
|
||||
queue.push(Transaction(version));
|
||||
}
|
||||
}};
|
||||
|
||||
auto resolverThread = std::thread{[&]() {
|
||||
for (;;) {
|
||||
auto tx = queue.pop();
|
||||
resolver.resolve(tx->reads.data(), tx->reads.size(), tx->writes.data(),
|
||||
tx->writes.size(), tx->version, tx->oldestVersion);
|
||||
}
|
||||
}};
|
||||
|
||||
for (;;) {
|
||||
struct sockaddr_storage peer_addr = {};
|
||||
|
||||
+3
-2
@@ -767,7 +767,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
false, true);
|
||||
}
|
||||
|
||||
sortPoints(points);
|
||||
if (!std::is_sorted(points.begin(), points.end())) {
|
||||
sortPoints(points);
|
||||
}
|
||||
|
||||
int activeWriteCount = 0;
|
||||
std::vector<std::pair<StringRef, StringRef>> combinedWriteConflictRanges;
|
||||
@@ -794,7 +796,6 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
int temp[stripeSize];
|
||||
int stripes = (stringCount + stripeSize - 1) / stripeSize;
|
||||
StringRef values[stripeSize];
|
||||
int64_t writeVersions[stripeSize / 2];
|
||||
int ss = stringCount - (stripes - 1) * stripeSize;
|
||||
int64_t entryDelta = 0;
|
||||
for (int s = stripes - 1; s >= 0; s--) {
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user