From a158d375f590d9a64294331036f3f0c6b8615414 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Mon, 18 Nov 2024 14:05:14 -0800 Subject: [PATCH] Add script for updating readme --- Bench.cpp | 1 + README.md | 55 +++++++++++++++++++-------------- RealDataBench.cpp | 8 ++--- update-readme.sh | 79 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 27 deletions(-) create mode 100755 update-readme.sh diff --git a/Bench.cpp b/Bench.cpp index cf04f0d..275149a 100644 --- a/Bench.cpp +++ b/Bench.cpp @@ -57,6 +57,7 @@ ConflictSet::ReadRange prefixRange(Arena &arena, TrivialSpan key) { void benchConflictSet() { ankerl::nanobench::Bench bench; + bench.minEpochIterations(10000); ConflictSet cs{0}; bench.batch(kOpsPerTx); diff --git a/README.md b/README.md index ed762ab..2eb9b59 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,14 @@ Intended as an alternative to FoundationDB's skip list. Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-34-34-89 1.35V RAM. -Compiler is `Ubuntu clang version 20.0.0 (++20241029082144+7544d3af0e28-1~exp1~20241029082307.506)`. +```` +$ clang++ --version + +Ubuntu clang version 20.0.0 (++20241118082208+63b926af5ff4-1~exp1~20241118082226.549) +Target: x86_64-pc-linux-gnu +Thread model: posix +InstalledDir: /usr/lib/llvm-20/bin +``` # Microbenchmark @@ -12,44 +19,45 @@ Compiler is `Ubuntu clang version 20.0.0 (++20241029082144+7544d3af0e28-1~exp1~2 | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- -| 159.65 | 6,263,576.52 | 1.6% | 2,972.36 | 820.37 | 3.623 | 504.59 | 0.0% | 0.01 | `point reads` -| 156.32 | 6,397,320.65 | 0.7% | 2,913.62 | 806.87 | 3.611 | 490.19 | 0.0% | 0.01 | `prefix reads` -| 229.18 | 4,363,293.65 | 1.2% | 3,541.05 | 1,219.75 | 2.903 | 629.33 | 0.0% | 0.01 | `range reads` -| 363.37 | 2,752,026.30 | 0.3% | 5,273.63 | 1,951.54 | 2.702 | 851.66 | 1.7% | 0.01 | `point writes` -| 364.99 | 2,739,787.02 | 0.3% | 5,250.92 | 1,958.54 | 2.681 | 839.24 | 1.7% | 0.01 | `prefix writes` -| 242.26 | 4,127,796.58 | 2.9% | 3,117.33 | 1,304.41 | 2.390 | 541.07 | 2.8% | 0.02 | `range writes` -| 562.48 | 1,777,855.27 | 0.8% | 7,305.21 | 3,034.34 | 2.408 | 1,329.30 | 1.3% | 0.01 | `monotonic increasing point writes` -| 122,688.57 | 8,150.72 | 0.7% | 798,766.00 | 666,842.00 | 1.198 | 144,584.50 | 0.1% | 0.01 | `worst case for radix tree` -| 41.71 | 23,976,459.34 | 1.7% | 885.00 | 219.17 | 4.038 | 132.00 | 0.0% | 0.01 | `create and destroy` +| 164.92 | 6,063,693.08 | 0.0% | 3,014.03 | 829.48 | 3.634 | 504.59 | 0.0% | 1.97 | `point reads` +| 162.85 | 6,140,505.89 | 0.0% | 2,954.16 | 819.09 | 3.607 | 490.17 | 0.0% | 1.94 | `prefix reads` +| 243.06 | 4,114,132.65 | 0.0% | 3,592.41 | 1,224.74 | 2.933 | 629.31 | 0.0% | 2.90 | `range reads` +| 455.46 | 2,195,561.38 | 0.0% | 4,450.57 | 2,301.93 | 1.933 | 707.92 | 2.1% | 5.44 | `point writes` +| 454.16 | 2,201,858.08 | 0.0% | 4,410.22 | 2,295.25 | 1.921 | 694.74 | 2.1% | 5.42 | `prefix writes` +| 302.97 | 3,300,699.07 | 0.0% | 2,315.38 | 1,531.20 | 1.512 | 396.69 | 3.3% | 3.64 | `range writes` +| 493.69 | 2,025,564.75 | 0.9% | 6,999.33 | 2,493.53 | 2.807 | 1,251.74 | 1.3% | 0.06 | `monotonic increasing point writes` +| 136,298.50 | 7,336.84 | 0.9% | 807,444.50 | 693,845.50 | 1.164 | 144,584.50 | 0.9% | 0.01 | `worst case for radix tree` +| 47.68 | 20,974,738.11 | 0.7% | 902.00 | 238.47 | 3.783 | 132.00 | 0.0% | 0.01 | `create and destroy` ## Radix tree (this implementation) + | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- -| 12.83 | 77,947,334.88 | 0.8% | 247.13 | 63.80 | 3.873 | 32.64 | 0.6% | 0.01 | `point reads` -| 14.73 | 67,908,470.74 | 0.1% | 299.99 | 73.66 | 4.073 | 42.50 | 0.5% | 0.01 | `prefix reads` -| 35.63 | 28,066,165.78 | 0.1% | 782.70 | 178.49 | 4.385 | 106.65 | 0.2% | 0.01 | `range reads` -| 20.00 | 49,993,123.62 | 0.1% | 376.83 | 100.50 | 3.749 | 50.05 | 0.5% | 0.01 | `point writes` -| 38.04 | 26,287,266.49 | 0.1% | 665.86 | 191.21 | 3.482 | 100.41 | 0.4% | 0.01 | `prefix writes` -| 40.48 | 24,703,557.31 | 1.3% | 732.80 | 204.36 | 3.586 | 111.26 | 0.2% | 0.01 | `range writes` -| 81.01 | 12,343,591.64 | 1.4% | 1,551.57 | 409.23 | 3.791 | 292.66 | 0.1% | 0.01 | `monotonic increasing point writes` -| 315,672.00 | 3,167.85 | 1.7% | 4,043,066.00 | 1,590,315.00 | 2.542 | 714,828.00 | 0.1% | 0.01 | `worst case for radix tree` -| 114.81 | 8,710,164.86 | 0.7% | 2,178.00 | 578.69 | 3.764 | 345.00 | 0.0% | 0.01 | `create and destroy` +| 12.73 | 78,578,520.69 | 0.0% | 247.13 | 64.01 | 3.861 | 32.64 | 0.6% | 0.15 | `point reads` +| 14.48 | 69,077,363.01 | 0.1% | 299.99 | 72.83 | 4.119 | 42.50 | 0.4% | 0.17 | `prefix reads` +| 35.33 | 28,307,787.32 | 0.1% | 782.70 | 177.67 | 4.405 | 106.65 | 0.2% | 0.42 | `range reads` +| 20.64 | 48,445,877.80 | 0.0% | 376.04 | 103.84 | 3.621 | 49.97 | 0.7% | 0.25 | `point writes` +| 38.32 | 26,098,688.72 | 0.0% | 665.25 | 192.72 | 3.452 | 101.33 | 0.4% | 0.46 | `prefix writes` +| 39.46 | 25,343,458.68 | 0.0% | 732.48 | 198.44 | 3.691 | 111.75 | 0.1% | 0.48 | `range writes` +| 80.63 | 12,401,923.78 | 2.4% | 1,461.96 | 407.42 | 3.588 | 278.93 | 0.1% | 0.01 | `monotonic increasing point writes` +| 311,077.67 | 3,214.63 | 0.5% | 4,016,995.00 | 1,582,232.00 | 2.539 | 714,572.00 | 0.1% | 0.01 | `worst case for radix tree` +| 106.36 | 9,401,962.15 | 0.9% | 2,046.00 | 538.76 | 3.798 | 329.00 | 0.0% | 0.01 | `create and destroy` # "Real data" test -Point queries only, best of three runs. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better. +Point queries only. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better. ## skip list ``` -Check: 4.39702 seconds, 370.83 MB/s, Add: 4.50025 seconds, 124.583 MB/s, Gc ratio: 29.1333%, Peak idle memory: 5.51852e+06 +Check: 4.35798 seconds, 386.919 MB/s, Add: 3.69297 seconds, 155.792 MB/s, Gc ratio: 33.6285%, Peak idle memory: 5.61007e+06 ``` ## radix tree ``` -Check: 0.975666 seconds, 1728.24 MB/s, Add: 1.19751 seconds, 480.444 MB/s, Gc ratio: 36.8478%, Peak idle memory: 2.39447e+06 +Check: 1.0184 seconds, 1655.72 MB/s, Add: 1.3866 seconds, 414.924 MB/s, Gc ratio: 34.8706%, Peak idle memory: 2.32922e+06 ``` ## hash table @@ -57,5 +65,6 @@ Check: 0.975666 seconds, 1728.24 MB/s, Add: 1.19751 seconds, 480.444 MB/s, Gc ra (The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be) ``` -Check: 0.84256 seconds, 1935.23 MB/s, Add: 0.697204 seconds, 804.146 MB/s, Gc ratio: 35.4091% +Check: 0.855065 seconds, 1972 MB/s, Add: 0.711856 seconds, 808.219 MB/s, Gc ratio: 34.9436%, Peak idle memory: 0 ``` + diff --git a/RealDataBench.cpp b/RealDataBench.cpp index 6c6b515..a9dad0e 100644 --- a/RealDataBench.cpp +++ b/RealDataBench.cpp @@ -133,10 +133,10 @@ int main(int argc, const char **argv) { int metricsCount; cs.getMetricsV1(&metrics, &metricsCount); for (int i = 0; i < metricsCount; ++i) { - printf("# HELP %s %s\n", metrics[i].name, metrics[i].help); - printf("# TYPE %s %s\n", metrics[i].name, - metrics[i].type == metrics[i].Counter ? "counter" : "gauge"); - printf("%s %g\n", metrics[i].name, metrics[i].getValue()); + fprintf(stderr, "# HELP %s %s\n", metrics[i].name, metrics[i].help); + fprintf(stderr, "# TYPE %s %s\n", metrics[i].name, + metrics[i].type == metrics[i].Counter ? "counter" : "gauge"); + fprintf(stderr, "%s %g\n", metrics[i].name, metrics[i].getValue()); } printf("Check: %g seconds, %g MB/s, Add: %g seconds, %g MB/s, Gc ratio: " diff --git a/update-readme.sh b/update-readme.sh new file mode 100755 index 0000000..270e8b4 --- /dev/null +++ b/update-readme.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# Disable frequency scaling +for i in $(seq "$(nproc)") ; do sudo cat /sys/devices/system/cpu/cpu$((i-1))/cpufreq/scaling_max_freq | sudo tee /sys/devices/system/cpu/cpu$((i-1))/cpufreq/scaling_min_freq >/dev/null ; done + +cmake -S. -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang++ -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_CXX_FLAGS=-DNVALGRIND > /dev/null +cmake --build build > /dev/null + +cat << 'EOF' +A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys. + +Intended as an alternative to FoundationDB's skip list. + +Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-34-34-89 1.35V RAM. + +```` +$ clang++ --version + +EOF + +clang++ --version + +cat << 'EOF' +``` + +# Microbenchmark + +## Skip list +EOF + +LD_LIBRARY_PATH=build/skip_list build/conflict_set_bench + +cat << 'EOF' + +## Radix tree (this implementation) + +EOF + +LD_LIBRARY_PATH=build/radix_tree build/conflict_set_bench + +cat << 'EOF' + +# "Real data" test + +Point queries only. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better. + +## skip list + +``` +EOF + +LD_LIBRARY_PATH=build/skip_list build/real_data_bench ~/Downloads/quotes_2008-08.txt + +cat << 'EOF' +``` + +## radix tree + +``` +EOF + +LD_LIBRARY_PATH=build/radix_tree build/real_data_bench ~/Downloads/quotes_2008-08.txt + +cat << 'EOF' +``` + +## hash table + +(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be) + +``` +EOF + +LD_LIBRARY_PATH=build/hash_table build/real_data_bench ~/Downloads/quotes_2008-08.txt + +cat << 'EOF' +``` + +EOF