2 Commits

Author SHA1 Message Date
af1e2299de Include childType in ChildAndMaxVersion
Some checks failed
Tests / Clang total: 3339, passed: 3339
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / 64 bit versions total: 3339, passed: 3339
Tests / Debug total: 3337, passed: 3337
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-09-23 18:38:06 -07:00
230e96063d Make children tagged pointers 2024-09-23 17:58:09 -07:00
1953 changed files with 1551 additions and 4133 deletions

View File

@@ -17,26 +17,26 @@ constexpr int kPrefixLen = 0;
constexpr int kMvccWindow = 100000;
TrivialSpan makeKey(Arena &arena, int index) {
std::span<const uint8_t> makeKey(Arena &arena, int index) {
uint8_t *buf = new (arena) uint8_t[4 + kPrefixLen];
auto result = TrivialSpan{buf, 4 + kPrefixLen};
auto result =
std::span<uint8_t>{new (arena) uint8_t[4 + kPrefixLen], 4 + kPrefixLen};
index = __builtin_bswap32(index);
memset(buf, 0, kPrefixLen);
memcpy(buf, &index, 4);
memset(result.data(), 0, kPrefixLen);
memcpy(result.data() + kPrefixLen, &index, 4);
return result;
}
ConflictSet::ReadRange singleton(Arena &arena, TrivialSpan key) {
uint8_t *buf = new (arena) uint8_t[key.size() + 1];
auto r = TrivialSpan(buf, key.size() + 1);
memcpy(buf, key.data(), key.size());
buf[key.size()] = 0;
ConflictSet::ReadRange singleton(Arena &arena, std::span<const uint8_t> key) {
auto r =
std::span<uint8_t>(new (arena) uint8_t[key.size() + 1], key.size() + 1);
memcpy(r.data(), key.data(), key.size());
r[key.size()] = 0;
return {{key.data(), int(key.size())}, {r.data(), int(r.size())}, 0};
}
ConflictSet::ReadRange prefixRange(Arena &arena, TrivialSpan key) {
ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) {
int index;
for (index = key.size() - 1; index >= 0; index--)
if ((key[index]) != 255)
@@ -48,16 +48,14 @@ ConflictSet::ReadRange prefixRange(Arena &arena, TrivialSpan key) {
assert(false);
}
uint8_t *buf = new (arena) uint8_t[index + 1];
auto r = TrivialSpan(buf, index + 1);
memcpy(buf, key.data(), index + 1);
buf[r.size() - 1]++;
auto r = std::span<uint8_t>(new (arena) uint8_t[index + 1], index + 1);
memcpy(r.data(), key.data(), index + 1);
r[r.size() - 1]++;
return {{key.data(), int(key.size())}, {r.data(), int(r.size())}, 0};
}
void benchConflictSet() {
ankerl::nanobench::Bench bench;
bench.minEpochIterations(10000);
ConflictSet cs{0};
bench.batch(kOpsPerTx);
@@ -83,7 +81,14 @@ void benchConflictSet() {
++version;
}
auto points = set<TrivialSpan, std::less<>>(arena);
// I don't know why std::less didn't work /shrug
struct Less {
bool operator()(const std::span<const uint8_t> &lhs,
const std::span<const uint8_t> &rhs) const {
return lhs < rhs;
}
};
auto points = set<std::span<const uint8_t>, Less>(arena);
while (points.size() < kOpsPerTx * 2 + 1) {
// TODO don't use rand?
@@ -327,22 +332,16 @@ void benchWorstCaseForRadixRangeRead() {
auto end = std::vector<uint8_t>(kKeyLenForWorstCase - 1, 255);
end.push_back(254);
weaselab::ConflictSet::ReadRange r[] = {
{{begin.data(), int(begin.size())}, {end.data(), int(end.size())}, 0},
};
weaselab::ConflictSet::Result results[sizeof(r) / sizeof(r[0])];
for (auto &result : results) {
result = weaselab::ConflictSet::TooOld;
}
bench.batch(sizeof(r) / sizeof(r[0]));
weaselab::ConflictSet::Result result;
weaselab::ConflictSet::ReadRange r{
{begin.data(), int(begin.size())}, {end.data(), int(end.size())}, 0};
bench.run("worst case for radix tree", [&]() {
for (int i = 0; i < 256; ++i) {
cs[i]->check(r, results, sizeof(r) / sizeof(r[0]));
for (auto result : results) {
if (result != weaselab::ConflictSet::Commit) {
abort();
}
result = weaselab::ConflictSet::TooOld;
cs[i]->check(&r, &result, 1);
if (result != weaselab::ConflictSet::Commit) {
abort();
}
}
});

View File

@@ -31,26 +31,8 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
"MinSizeRel" "RelWithDebInfo")
endif()
add_compile_options(
# -Werror=switch-enum
-Wswitch-enum -Wunused-variable -fPIC -fdata-sections -ffunction-sections
-fno-jump-tables # https://github.com/llvm/llvm-project/issues/54247
)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_link_options("-Wno-unused-command-line-argument")
find_program(LLVM_OBJCOPY llvm-objcopy)
if(LLVM_OBJCOPY)
set(CMAKE_OBJCOPY
${LLVM_OBJCOPY}
CACHE FILEPATH "path to objcopy binary" FORCE)
endif()
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
add_compile_options("-Wno-maybe-uninitialized")
endif()
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
-Werror=switch-enum -fPIC)
if(NOT APPLE)
# This causes some versions of clang to crash on macos
add_compile_options(-g -fno-omit-frame-pointer)
@@ -65,22 +47,6 @@ if(HAS_FULL_RELRO)
endif()
cmake_pop_check_state()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL
arm64)
add_compile_options(-mbranch-protection=standard)
else()
add_compile_options(-fcf-protection)
set(rewrite_endbr_flags "-fuse-ld=mold;LINKER:-z,rewrite-endbr")
cmake_push_check_state()
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS ${rewrite_endbr_flags})
check_cxx_source_compiles("int main(){}" HAS_REWRITE_ENDBR FAIL_REGEX
"warning:")
if(HAS_REWRITE_ENDBR)
add_link_options(${rewrite_endbr_flags})
endif()
cmake_pop_check_state()
endif()
set(version_script_flags
LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/linker.map)
cmake_push_check_state()
@@ -98,17 +64,27 @@ option(DISABLE_TSAN "Disable TSAN" OFF)
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/third_party/valgrind)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>)
if(APPLE)
add_link_options(-Wl,-dead_strip)
else()
add_link_options(-Wl,--gc-sections)
endif()
if(USE_SIMD_FALLBACK)
add_compile_definitions(USE_SIMD_FALLBACK)
else()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
if(NOT USE_SIMD_FALLBACK)
cmake_push_check_state()
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
check_include_file_cxx("immintrin.h" HAS_AVX)
if(HAS_AVX)
add_compile_options(-mavx)
add_compile_definitions(HAS_AVX)
endif()
cmake_pop_check_state()
check_include_file_cxx("arm_neon.h" HAS_ARM_NEON)
if(HAS_ARM_NEON)
add_compile_definitions(HAS_ARM_NEON)
endif()
endif()
@@ -119,23 +95,12 @@ target_compile_options(${PROJECT_NAME}-object PRIVATE -fno-exceptions
-fvisibility=hidden)
target_include_directories(${PROJECT_NAME}-object
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
if(NOT LD_EXE)
set(LD_EXE ld)
endif()
add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o
COMMAND ${LD_EXE} -r $<TARGET_OBJECTS:${PROJECT_NAME}-object> -o
${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o
DEPENDS $<TARGET_OBJECTS:${PROJECT_NAME}-object>
COMMAND_EXPAND_LISTS)
add_library(${PROJECT_NAME} SHARED ${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o)
add_library(${PROJECT_NAME} SHARED $<TARGET_OBJECTS:${PROJECT_NAME}-object>)
set_target_properties(
${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/radix_tree")
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX)
else()
if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE C)
endif()
@@ -145,13 +110,19 @@ if(HAS_VERSION_SCRIPT)
LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/linker.map)
endif()
add_library(${PROJECT_NAME}-static STATIC ${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o)
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME}-static PROPERTIES LINKER_LANGUAGE CXX)
else()
add_library(${PROJECT_NAME}-static STATIC
$<TARGET_OBJECTS:${PROJECT_NAME}-object>)
if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME}-static PROPERTIES LINKER_LANGUAGE C)
endif()
if(NOT APPLE)
if(APPLE)
add_custom_command(
TARGET ${PROJECT_NAME}-static
PRE_LINK
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/privatize_symbols_macos.sh
$<TARGET_OBJECTS:${PROJECT_NAME}-object>)
else()
add_custom_command(
TARGET ${PROJECT_NAME}-static
POST_BUILD
@@ -206,13 +177,10 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
target_compile_options(driver_skip_list PRIVATE ${TEST_FLAGS})
target_link_libraries(driver_skip_list PRIVATE skip_list)
# enable to test skip list
if(0)
foreach(TEST ${CORPUS_TESTS})
get_filename_component(hash ${TEST} NAME)
add_test(NAME skip_list_${hash} COMMAND driver_skip_list ${TEST})
endforeach()
endif()
foreach(TEST ${CORPUS_TESTS})
get_filename_component(hash ${TEST} NAME)
add_test(NAME skip_list_${hash} COMMAND driver_skip_list ${TEST})
endforeach()
# ad hoc testing
add_executable(conflict_set_main ConflictSet.cpp)
@@ -371,15 +339,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
${symbol_imports})
endif()
if(NOT CMAKE_CROSSCOMPILING)
find_program(HARDENING_CHECK hardening-check)
if(HARDENING_CHECK)
add_test(NAME hardening_check
COMMAND ${HARDENING_CHECK} $<TARGET_FILE:${PROJECT_NAME}>
--nofortify --nostackprotector)
endif()
endif()
# bench
add_executable(conflict_set_bench Bench.cpp)
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME} nanobench)

File diff suppressed because it is too large Load Diff

View File

@@ -8,27 +8,25 @@ RUN chmod -R 777 /tmp
RUN apt-get update
RUN apt-get upgrade -y
RUN TZ=America/Los_Angeles DEBIAN_FRONTEND=noninteractive apt-get install -y \
binutils-aarch64-linux-gnu \
build-essential \
ccache \
clang \
cmake \
curl \
devscripts \
doxygen \
file \
g++-aarch64-linux-gnu \
gcovr \
git \
gnupg \
gperf \
graphviz \
libc6-dbg \
lsb-release \
mold \
ninja-build \
pre-commit \
python3-requests \
qemu-user \
rpm \
software-properties-common \
texlive-full \
wget \
zstd
# Install recent valgrind from source
@@ -44,11 +42,6 @@ RUN curl -Ls https://sourceware.org/pub/valgrind/valgrind-3.22.0.tar.bz2 -o valg
cd .. && \
rm -rf /tmp/*
# Recent clang
RUN wget https://apt.llvm.org/llvm.sh && chmod +x ./llvm.sh && ./llvm.sh 20
RUN apt-get -y install clang llvm
# Set after building valgrind, which doesn't build with clang for some reason
ENV CC=clang
ENV CXX=clang++

View File

@@ -18,6 +18,7 @@ using namespace weaselab;
#include <span>
#include <string>
#include <thread>
#include <unordered_set>
#include <utility>
#include <callgrind.h>
@@ -25,38 +26,9 @@ using namespace weaselab;
#define DEBUG_VERBOSE 0
#define SHOW_MEMORY 0
// std::span is not trivially constructible. We want a span that leaves its
// members uninitialized for performance reasons.
struct TrivialSpan {
TrivialSpan() = default;
TrivialSpan(const uint8_t *begin, int len) : begin(begin), len(len) {}
uint8_t back() const {
assert(len > 0);
return begin[len - 1];
}
uint8_t front() const {
assert(len > 0);
return begin[0];
}
uint8_t operator[](int i) const {
assert(0 <= i);
assert(i < len);
return begin[i];
}
int size() const { return len; }
TrivialSpan subspan(int offset, int len) { return {begin + offset, len}; }
const uint8_t *data() const { return begin; }
private:
const uint8_t *begin;
int len;
};
static_assert(std::is_trivial_v<TrivialSpan>);
[[nodiscard]] inline auto operator<=>(const TrivialSpan &lhs,
const TrivialSpan &rhs) noexcept {
[[nodiscard]] inline auto
operator<=>(const std::span<const uint8_t> &lhs,
const std::span<const uint8_t> &rhs) noexcept {
int cl = std::min<int>(lhs.size(), rhs.size());
if (cl > 0) {
if (auto c = memcmp(lhs.data(), rhs.data(), cl) <=> 0; c != 0) {
@@ -66,7 +38,7 @@ static_assert(std::is_trivial_v<TrivialSpan>);
return lhs.size() <=> rhs.size();
}
[[nodiscard]] inline auto operator<=>(const TrivialSpan &lhs,
[[nodiscard]] inline auto operator<=>(const std::span<const uint8_t> &lhs,
const ConflictSet::Key &rhs) noexcept {
int cl = std::min<int>(lhs.size(), rhs.len);
if (cl > 0) {
@@ -74,18 +46,7 @@ static_assert(std::is_trivial_v<TrivialSpan>);
return c;
}
}
return lhs.size() <=> rhs.len;
}
[[nodiscard]] inline auto operator<=>(const ConflictSet::Key &lhs,
const ConflictSet::Key &rhs) noexcept {
int cl = std::min<int>(lhs.len, rhs.len);
if (cl > 0) {
if (auto c = memcmp(lhs.p, rhs.p, cl) <=> 0; c != 0) {
return c;
}
}
return lhs.len <=> rhs.len;
return lhs.size() <=> size_t(rhs.len);
}
// This header contains code that we want to reuse outside of ConflictSet.cpp or
@@ -367,6 +328,23 @@ template <class T, class C = std::less<T>> auto set(Arena &arena) {
return Set<T, C>(ArenaAlloc<T>(&arena));
}
template <class T> struct MyHash;
template <class T> struct MyHash<T *> {
size_t operator()(const T *t) const noexcept {
size_t result;
memcpy(&result, &t, sizeof(result));
return result;
}
};
template <class T>
using HashSet =
std::unordered_set<T, MyHash<T>, std::equal_to<T>, ArenaAlloc<T>>;
template <class T> auto hashSet(Arena &arena) {
return HashSet<T>(ArenaAlloc<T>(&arena));
}
template <class T, class U>
bool operator==(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
return lhs.arena == rhs.arena;
@@ -560,7 +538,7 @@ struct ReferenceImpl {
using Key = ConflictSet::Key;
inline Key operator""_s(const char *str, size_t size) {
inline Key operator"" _s(const char *str, size_t size) {
return {reinterpret_cast<const uint8_t *>(str), int(size)};
}
@@ -591,7 +569,7 @@ inline std::string printable(const Key &key) {
return printable(std::string_view((const char *)key.p, key.len));
}
inline std::string printable(TrivialSpan key) {
inline std::string printable(std::span<const uint8_t> key) {
return printable(std::string_view((const char *)key.data(), key.size()));
}
@@ -699,8 +677,10 @@ struct TestDriver {
arbitrary->randomBytes(begin + prefixLen, keyLen - prefixLen);
writes[i].end.len = keyLen;
writes[i].end.p = begin;
auto c = TrivialSpan(writes[i].begin.p, writes[i].begin.len) <=>
TrivialSpan(writes[i].end.p, writes[i].end.len);
auto c =
std::span<const uint8_t>(writes[i].begin.p,
writes[i].begin.len) <=>
std::span<const uint8_t>(writes[i].end.p, writes[i].end.len);
if (c > 0) {
using std::swap;
swap(writes[i].begin, writes[i].end);

46
Jenkinsfile vendored
View File

@@ -11,11 +11,11 @@ def CleanBuildAndTest(String cmakeArgs) {
catchError {
sh '''
cd build
ctest --no-compress-output --test-output-size-passed 100000 --test-output-size-failed 100000 -T Test -j `nproc` --timeout 90 > /dev/null
ctest --no-compress-output --test-output-size-passed 100000 --test-output-size-failed 100000 -T Test -j `nproc` --timeout 90
zstd Testing/*/Test.xml
'''
}
xunit tools: [CTest(pattern: 'build/Testing/*/Test.xml')], skipPublishingChecks: false
xunit tools: [CTest(pattern: 'build/Testing/*/Test.xml')], reduceLog: false, skipPublishingChecks: false
minio bucket: 'jenkins', credentialsId: 'jenkins-minio', excludes: '', host: 'minio.weaselab.dev', includes: 'build/Testing/*/Test.xml.zst', targetFolder: '${JOB_NAME}/${BUILD_NUMBER}/${STAGE_NAME}/'
}
@@ -36,6 +36,18 @@ pipeline {
sh 'pre-commit run --all-files --show-diff-on-failure'
}
}
stage('Clang') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
reuseNode true
}
}
steps {
CleanBuildAndTest("")
recordIssues(tools: [clang()])
}
}
stage('64 bit versions') {
agent {
dockerfile {
@@ -69,7 +81,7 @@ pipeline {
CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON")
}
}
stage('Release [clang]') {
stage('Release [gcc]') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
@@ -77,8 +89,8 @@ pipeline {
}
}
steps {
CleanBuildAndTest("-DCMAKE_CXX_FLAGS=-DNVALGRIND")
recordIssues(tools: [clang()])
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_FLAGS=-DNVALGRIND")
recordIssues(tools: [gcc()])
sh '''
cd build
cpack -G DEB
@@ -91,19 +103,7 @@ pipeline {
minio bucket: 'jenkins', credentialsId: 'jenkins-minio', excludes: '', host: 'minio.weaselab.dev', includes: 'build/*.deb,build/*.rpm,paper/*.pdf', targetFolder: '${JOB_NAME}/${BUILD_NUMBER}/${STAGE_NAME}/'
}
}
stage('gcc') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
reuseNode true
}
}
steps {
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++")
recordIssues(tools: [gcc()])
}
}
stage('Release [clang,aarch64]') {
stage('Release [gcc,aarch64]') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
@@ -129,16 +129,16 @@ pipeline {
}
steps {
script {
gcov_args = "-f ConflictSet.cpp -f LongestCommonPrefix.h -f Metrics.h --gcov-executable 'llvm-cov gcov' --exclude-noncode-lines"
filter_args = "-f ConflictSet.cpp -f LongestCommonPrefix.h -f Metrics.h"
}
CleanBuildAndTest("-DCMAKE_C_FLAGS=--coverage -DCMAKE_CXX_FLAGS=--coverage -DCMAKE_BUILD_TYPE=Debug -DDISABLE_TSAN=ON")
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_FLAGS=--coverage -DCMAKE_CXX_FLAGS=--coverage -DCMAKE_BUILD_TYPE=Debug -DDISABLE_TSAN=ON")
sh """
gcovr ${gcov_args} --cobertura > build/coverage.xml
gcovr ${filter_args} --cobertura > build/coverage.xml
"""
recordCoverage qualityGates: [[criticality: 'NOTE', metric: 'MODULE']], tools: [[parser: 'COBERTURA', pattern: 'build/coverage.xml']]
sh """
gcovr ${gcov_args}
gcovr ${gcov_args} --fail-under-line 100 > /dev/null
gcovr ${filter_args}
gcovr ${filter_args} --fail-under-line 100 > /dev/null
"""
}
}

View File

@@ -129,7 +129,7 @@ longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
}
int i = 0;
int end; // GCOVR_EXCL_LINE
int end;
// kStride * kUnrollCount at a time
end = cl & ~(kStride * kUnrollFactor - 1);

View File

@@ -2,16 +2,7 @@ A data structure for optimistic concurrency control on ranges of bitwise-lexicog
Intended as an alternative to FoundationDB's skip list.
Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-34-34-89 1.35V RAM.
```
$ clang++ --version
Ubuntu clang version 20.0.0 (++20241120082228+86734c857724-1~exp1~20241120202359.554)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/lib/llvm-20/bin
```
Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-34-34-89 1.35V RAM
# Microbenchmark
@@ -19,45 +10,44 @@ InstalledDir: /usr/lib/llvm-20/bin
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 161.29 | 6,200,056.17 | 0.1% | 3,014.03 | 831.04 | 3.627 | 504.59 | 0.0% | 1.93 | `point reads`
| 158.32 | 6,316,160.64 | 0.1% | 2,954.16 | 815.80 | 3.621 | 490.17 | 0.0% | 1.89 | `prefix reads`
| 237.39 | 4,212,409.50 | 0.2% | 3,592.41 | 1,233.96 | 2.911 | 629.31 | 0.0% | 2.84 | `range reads`
| 442.11 | 2,261,878.94 | 0.0% | 4,450.57 | 2,314.25 | 1.923 | 707.92 | 2.1% | 5.28 | `point writes`
| 439.89 | 2,273,308.53 | 0.1% | 4,410.22 | 2,302.29 | 1.916 | 694.74 | 2.1% | 5.25 | `prefix writes`
| 290.96 | 3,436,936.78 | 0.0% | 2,315.38 | 1,528.68 | 1.515 | 396.69 | 3.3% | 3.49 | `range writes`
| 476.93 | 2,096,762.02 | 0.6% | 6,999.33 | 2,484.94 | 2.817 | 1,251.73 | 1.3% | 0.06 | `monotonic increasing point writes`
| 131,736.57 | 7,590.91 | 1.1% | 807,444.50 | 704,941.71 | 1.145 | 144,584.60 | 0.9% | 0.01 | `worst case for radix tree`
| 45.50 | 21,978,369.95 | 1.1% | 902.00 | 232.36 | 3.882 | 132.00 | 0.0% | 0.01 | `create and destroy`
| 172.03 | 5,812,791.77 | 0.4% | 3,130.62 | 879.00 | 3.562 | 509.23 | 0.0% | 0.01 | `point reads`
| 167.44 | 5,972,130.71 | 0.2% | 3,065.14 | 862.27 | 3.555 | 494.30 | 0.0% | 0.01 | `prefix reads`
| 238.77 | 4,188,130.84 | 0.9% | 3,589.93 | 1,259.30 | 2.851 | 637.12 | 0.0% | 0.01 | `range reads`
| 424.01 | 2,358,426.70 | 0.2% | 5,620.05 | 2,242.35 | 2.506 | 854.80 | 1.7% | 0.01 | `point writes`
| 418.45 | 2,389,780.56 | 0.4% | 5,525.07 | 2,211.05 | 2.499 | 831.71 | 1.7% | 0.01 | `prefix writes`
| 254.87 | 3,923,568.88 | 2.6% | 3,187.01 | 1,366.50 | 2.332 | 529.11 | 2.7% | 0.02 | `range writes`
| 675.96 | 1,479,374.50 | 3.3% | 7,735.41 | 3,468.60 | 2.230 | 1,386.02 | 1.8% | 0.01 | `monotonic increasing point writes`
| 137,986.20 | 7,247.10 | 0.6% | 789,752.33 | 699,462.00 | 1.129 | 144,824.14 | 0.0% | 0.01 | `worst case for radix tree`
| 21.63 | 46,231,564.03 | 1.0% | 448.00 | 107.14 | 4.181 | 84.00 | 0.0% | 0.01 | `create and destroy`
## Radix tree (this implementation)
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 12.36 | 80,885,626.43 | 0.2% | 243.56 | 63.62 | 3.828 | 31.07 | 0.6% | 0.15 | `point reads`
| 14.18 | 70,502,196.81 | 0.1% | 297.72 | 73.13 | 4.071 | 40.31 | 0.5% | 0.17 | `prefix reads`
| 33.44 | 29,901,623.04 | 0.1% | 767.90 | 172.42 | 4.454 | 101.32 | 0.2% | 0.40 | `range reads`
| 19.48 | 51,342,564.70 | 0.3% | 374.45 | 100.43 | 3.728 | 48.92 | 0.5% | 0.23 | `point writes`
| 37.46 | 26,694,471.44 | 0.1% | 672.00 | 193.14 | 3.479 | 101.28 | 0.3% | 0.45 | `prefix writes`
| 38.78 | 25,784,784.34 | 0.0% | 738.26 | 199.93 | 3.693 | 111.59 | 0.1% | 0.47 | `range writes`
| 76.05 | 13,148,995.74 | 0.7% | 1,450.77 | 397.16 | 3.653 | 275.72 | 0.0% | 0.01 | `monotonic increasing point writes`
| 286,920.33 | 3,485.29 | 0.4% | 4,117,948.00 | 1,521,352.00 | 2.707 | 714,833.00 | 0.1% | 0.01 | `worst case for radix tree`
| 95.66 | 10,453,798.72 | 0.5% | 1,986.00 | 495.04 | 4.012 | 315.00 | 0.0% | 0.01 | `create and destroy`
| 12.88 | 77,653,350.77 | 0.5% | 185.37 | 64.45 | 2.876 | 41.51 | 0.4% | 0.01 | `point reads`
| 14.67 | 68,179,354.49 | 0.1% | 271.44 | 73.40 | 3.698 | 53.70 | 0.3% | 0.01 | `prefix reads`
| 34.84 | 28,701,444.36 | 0.3% | 715.74 | 175.27 | 4.084 | 127.30 | 0.2% | 0.01 | `range reads`
| 17.12 | 58,422,988.28 | 0.2% | 314.30 | 86.11 | 3.650 | 39.82 | 0.4% | 0.01 | `point writes`
| 31.42 | 31,830,804.65 | 0.1% | 591.06 | 158.07 | 3.739 | 82.67 | 0.2% | 0.01 | `prefix writes`
| 37.37 | 26,759,432.70 | 2.2% | 681.98 | 188.95 | 3.609 | 96.10 | 0.1% | 0.01 | `range writes`
| 76.72 | 13,035,140.63 | 2.3% | 1,421.28 | 387.17 | 3.671 | 257.76 | 0.1% | 0.01 | `monotonic increasing point writes`
| 297,452.00 | 3,361.89 | 0.9% | 3,508,083.00 | 1,500,834.67 | 2.337 | 727,525.33 | 0.1% | 0.01 | `worst case for radix tree`
| 87.70 | 11,402,490.60 | 1.0% | 1,795.00 | 442.09 | 4.060 | 297.00 | 0.0% | 0.01 | `create and destroy`
# "Real data" test
Point queries only. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better.
Point queries only, best of three runs. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better.
## skip list
```
Check: 4.53508 seconds, 371.81 MB/s, Add: 3.81222 seconds, 150.919 MB/s, Gc ratio: 33.66%, Peak idle memory: 5.61007e+06
Check: 4.47891 seconds, 364.05 MB/s, Add: 4.55599 seconds, 123.058 MB/s, Gc ratio: 37.1145%
```
## radix tree
```
Check: 0.957735 seconds, 1760.6 MB/s, Add: 1.19942 seconds, 479.678 MB/s, Gc ratio: 38.6069%, Peak idle memory: 2.05667e+06
Check: 0.953012 seconds, 1710.94 MB/s, Add: 1.30025 seconds, 431.188 MB/s, Gc ratio: 43.9816%, Peak idle memory: 2.28375e+06
```
## hash table
@@ -65,6 +55,5 @@ Check: 0.957735 seconds, 1760.6 MB/s, Add: 1.19942 seconds, 479.678 MB/s, Gc rat
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
```
Check: 0.804598 seconds, 2095.69 MB/s, Add: 0.671221 seconds, 857.147 MB/s, Gc ratio: 35.0034%, Peak idle memory: 0
Check: 0.804094 seconds, 2027.81 MB/s, Add: 0.652952 seconds, 858.645 MB/s, Gc ratio: 35.3885%
```

View File

@@ -5,7 +5,7 @@
#include <cstdio>
#include <cstring>
#include <fcntl.h>
#include <span>
#include <string_view>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -64,7 +64,7 @@ int main(int argc, const char **argv) {
auto *const mapOriginal = begin;
const auto sizeOriginal = size;
using StringView = std::span<const uint8_t>;
using StringView = std::basic_string_view<uint8_t>;
StringView write;
std::vector<StringView> reads;
@@ -78,9 +78,9 @@ int main(int argc, const char **argv) {
end = (uint8_t *)memchr(begin, '\n', size);
if (line.size() > 0 && line[0] == 'P') {
write = line.subspan(2, line.size());
write = line.substr(2, line.size());
} else if (line.size() > 0 && line[0] == 'L') {
reads.push_back(line.subspan(2, line.size()));
reads.push_back(line.substr(2, line.size()));
} else if (line.empty()) {
{
readRanges.resize(reads.size());
@@ -133,10 +133,10 @@ int main(int argc, const char **argv) {
int metricsCount;
cs.getMetricsV1(&metrics, &metricsCount);
for (int i = 0; i < metricsCount; ++i) {
fprintf(stderr, "# HELP %s %s\n", metrics[i].name, metrics[i].help);
fprintf(stderr, "# TYPE %s %s\n", metrics[i].name,
metrics[i].type == metrics[i].Counter ? "counter" : "gauge");
fprintf(stderr, "%s %g\n", metrics[i].name, metrics[i].getValue());
printf("# HELP %s %s\n", metrics[i].name, metrics[i].help);
printf("# TYPE %s %s\n", metrics[i].name,
metrics[i].type == metrics[i].Counter ? "counter" : "gauge");
printf("%s %g\n", metrics[i].name, metrics[i].getValue());
}
printf("Check: %g seconds, %g MB/s, Add: %g seconds, %g MB/s, Gc ratio: "

View File

@@ -1,7 +1,4 @@
#include <algorithm>
#include <atomic>
#include <cstdint>
#include <cstdlib>
#include <errno.h>
#include <netdb.h>
#include <stdio.h>
@@ -20,210 +17,84 @@
#include <vector>
#include "ConflictSet.h"
#include "Internal.h"
#include "third_party/nadeau.h"
constexpr int kCacheLine = 64; // TODO mac m1 is 128
template <class T> struct TxQueue {
explicit TxQueue(int lgSlotCount)
: slotCount(1 << lgSlotCount), slotCountMask(slotCount - 1),
slots(new T[slotCount]) {
// Otherwise we can't tell the difference between full and empty.
assert(!(slotCountMask & 0x80000000));
}
/// Call from producer thread, after ensuring consumer is no longer accessing
/// it somehow
~TxQueue() { delete[] slots; }
/// Must be called from the producer thread
void push(T t) {
if (wouldBlock()) {
// Wait for pops to change and try again
consumer.pops.wait(producer.lastPopRead, std::memory_order_relaxed);
producer.lastPopRead = consumer.pops.load(std::memory_order_acquire);
}
slots[producer.pushesNonAtomic++ & slotCountMask] = std::move(t);
// seq_cst so that the notify can't be ordered before the store
producer.pushes.store(producer.pushesNonAtomic, std::memory_order_seq_cst);
// We have to notify every time, since we don't know if this is the last
// push ever
producer.pushes.notify_one();
}
/// Must be called from the producer thread
uint32_t outstanding() {
return producer.pushesNonAtomic -
consumer.pops.load(std::memory_order_relaxed);
}
/// Returns true if a call to push might block. Must be called from the
/// producer thread.
bool wouldBlock() {
// See if we can determine that overflow won't happen entirely from state
// local to the producer
if (producer.pushesNonAtomic - producer.lastPopRead == slotCount - 1) {
// Re-read pops with memory order
producer.lastPopRead = consumer.pops.load(std::memory_order_acquire);
return producer.pushesNonAtomic - producer.lastPopRead == slotCount - 1;
}
return false;
}
/// Valid until the next pop, or until this queue is destroyed.
T *pop() {
// See if we can determine that there's an entry we can pop entirely from
// state local to the consumer
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
// Re-read pushes with memory order and try again
consumer.lastPushRead = producer.pushes.load(std::memory_order_acquire);
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
// Wait for pushes to change and try again
producer.pushes.wait(consumer.lastPushRead, std::memory_order_relaxed);
consumer.lastPushRead = producer.pushes.load(std::memory_order_acquire);
}
}
auto result = &slots[consumer.popsNonAtomic++ & slotCountMask];
// We only have to write pops with memory order if we've run out of items.
// We know that we'll eventually run out.
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
// seq_cst so that the notify can't be ordered before the store
consumer.pops.store(consumer.popsNonAtomic, std::memory_order_seq_cst);
consumer.pops.notify_one();
}
return result;
}
private:
const uint32_t slotCount;
const uint32_t slotCountMask;
T *slots;
struct alignas(kCacheLine) ProducerState {
std::atomic<uint32_t> pushes{0};
uint32_t pushesNonAtomic{0};
uint32_t lastPopRead{0};
};
struct alignas(kCacheLine) ConsumerState {
std::atomic<uint32_t> pops{0};
uint32_t popsNonAtomic{0};
uint32_t lastPushRead{0};
};
ProducerState producer;
ConsumerState consumer;
};
std::atomic<int64_t> transactions;
int64_t safeUnaryMinus(int64_t x) {
return x == std::numeric_limits<int64_t>::min() ? x : -x;
}
constexpr int kBaseSearchDepth = 115;
constexpr int kWindowSize = 10000000;
void tupleAppend(std::string &output, int64_t value) {
if (value == 0) {
output.push_back(0x14);
return;
}
uint32_t size = 8 - __builtin_clrsbll(value) / 8;
int typeCode = 0x14 + (value < 0 ? -1 : 1) * size;
output.push_back(typeCode);
if (value < 0) {
value = ~safeUnaryMinus(value);
}
uint64_t swap = __builtin_bswap64(value);
output.insert(output.end(), (uint8_t *)&swap + 8 - size,
(uint8_t *)&swap + 8);
}
void tupleAppend(std::string &output, std::string_view value) {
output.push_back('\x02');
if (memchr(value.data(), '\x00', value.size()) != nullptr) {
for (auto c : value) {
if (c == '\x00') {
output.push_back('\x00');
output.push_back('\xff');
} else {
output.push_back(c);
}
}
} else {
output.insert(output.end(), value.begin(), value.end());
}
output.push_back('\x00');
}
template <class... Ts> std::string tupleKey(const Ts &...ts) {
std::string numToKey(int64_t num) {
std::string result;
(tupleAppend(result, ts), ...);
result.resize(kBaseSearchDepth + sizeof(int64_t));
memset(result.data(), 0, kBaseSearchDepth);
int64_t be = __builtin_bswap64(num);
memcpy(result.data() + kBaseSearchDepth, &be, sizeof(int64_t));
return result;
}
constexpr int kTotalKeyRange = 1'000'000'000;
constexpr int kWindowSize = 1'000'000;
constexpr int kNumReadKeysPerTx = 5;
constexpr int kNumWriteKeysPerTx = 10;
struct Transaction {
std::vector<std::string> keys;
std::vector<weaselab::ConflictSet::ReadRange> reads;
std::vector<weaselab::ConflictSet::WriteRange> writes;
int64_t version;
int64_t oldestVersion;
Transaction() = default;
explicit Transaction(int64_t version)
: version(version), oldestVersion(version - kWindowSize) {
std::vector<int64_t> keyIndices;
for (int i = 0; i < std::max(kNumReadKeysPerTx, kNumWriteKeysPerTx); ++i) {
keyIndices.push_back(rand() % kTotalKeyRange);
void workload(weaselab::ConflictSet *cs) {
int64_t version = kWindowSize;
cs->addWrites(nullptr, 0, version);
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
// Reads
{
auto beginK = numToKey(version - kWindowSize);
auto endK = numToKey(version - 1);
auto pointRv = version - kWindowSize + rand() % kWindowSize + 1;
auto pointK = numToKey(pointRv);
weaselab::ConflictSet::ReadRange reads[] = {
{
{(const uint8_t *)pointK.data(), int(pointK.size())},
{nullptr, 0},
pointRv,
},
{
{(const uint8_t *)beginK.data(), int(beginK.size())},
{(const uint8_t *)endK.data(), int(endK.size())},
version - 2,
},
};
weaselab::ConflictSet::Result result[sizeof(reads) / sizeof(reads[0])];
cs->check(reads, result, sizeof(reads) / sizeof(reads[0]));
// for (int i = 0; i < sizeof(reads) / sizeof(reads[0]); ++i) {
// if (result[i] != weaselab::ConflictSet::Commit) {
// fprintf(stderr, "Unexpected conflict: [%s, %s) @ %" PRId64 "\n",
// printable(reads[i].begin).c_str(),
// printable(reads[i].end).c_str(), reads[i].readVersion);
// abort();
// }
// }
}
std::sort(keyIndices.begin(), keyIndices.end());
constexpr std::string_view fullString =
"this is a string, where a prefix of it is used as an element of the "
"tuple forming the key";
for (int i = 0; i < int(keyIndices.size()); ++i) {
keys.push_back(
tupleKey(0x100, keyIndices[i] / fullString.size(),
fullString.substr(0, keyIndices[i] % fullString.size())));
// printf("%s\n", printable(keys.back()).c_str());
}
for (int i = 0; i < kNumWriteKeysPerTx; ++i) {
writes.push_back({{(const uint8_t *)keys[i].data(), int(keys[i].size())},
{nullptr, 0}});
}
reads.push_back({{(const uint8_t *)keys[0].data(), int(keys[0].size())},
{(const uint8_t *)keys[1].data(), int(keys[1].size())},
version - std::min(10, kWindowSize)});
static_assert(kNumReadKeysPerTx >= 3);
for (int i = 2; i < kNumReadKeysPerTx; ++i) {
reads.push_back({{(const uint8_t *)keys[i].data(), int(keys[i].size())},
{nullptr, 0},
version - kWindowSize});
// Writes
{
weaselab::ConflictSet::WriteRange w;
auto k = numToKey(version);
w.begin.p = (const uint8_t *)k.data();
w.end.len = 0;
if (version % (kWindowSize / 2) == 0) {
for (int l = 0; l <= k.size(); ++l) {
w.begin.len = l;
cs->addWrites(&w, 1, version);
}
} else {
w.begin.len = k.size();
cs->addWrites(&w, 1, version);
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
auto b = numToKey(beginN);
auto e = numToKey(beginN + 1000);
w.begin.p = (const uint8_t *)b.data();
w.begin.len = b.size();
w.end.p = (const uint8_t *)e.data();
w.end.len = e.size();
cs->addWrites(&w, 1, version);
}
}
// GC
cs->setOldestVersion(version - kWindowSize);
++version;
}
Transaction(Transaction &&) = default;
Transaction &operator=(Transaction &&) = default;
Transaction(Transaction const &) = delete;
Transaction const &operator=(Transaction const &) = delete;
};
struct Resolver {
void resolve(const weaselab::ConflictSet::ReadRange *reads, int readCount,
const weaselab::ConflictSet::WriteRange *writes, int writeCount,
int64_t newVersion, int64_t newOldestVersion) {
results.resize(readCount);
cs.check(reads, results.data(), readCount);
cs.addWrites(writes, writeCount, newVersion);
cs.setOldestVersion(newOldestVersion);
}
ConflictSet cs{0};
private:
std::vector<weaselab::ConflictSet::Result> results;
};
}
// Adapted from getaddrinfo man page
int getListenFd(const char *node, const char *service) {
@@ -366,8 +237,7 @@ int main(int argc, char **argv) {
{
int listenFd = getListenFd(argv[1], argv[2]);
Resolver resolver;
auto &cs = resolver.cs;
weaselab::ConflictSet cs{0};
weaselab::ConflictSet::MetricsV1 *metrics;
int metricsCount;
cs.getMetricsV1(&metrics, &metricsCount);
@@ -416,22 +286,7 @@ int main(int argc, char **argv) {
}
#endif
TxQueue<Transaction> queue{10};
auto workloadThread = std::thread{[&]() {
for (int64_t version = kWindowSize;;
++version, transactions.fetch_add(1, std::memory_order_relaxed)) {
queue.push(Transaction(version));
}
}};
auto resolverThread = std::thread{[&]() {
for (;;) {
auto tx = queue.pop();
resolver.resolve(tx->reads.data(), tx->reads.size(), tx->writes.data(),
tx->writes.size(), tx->version, tx->oldestVersion);
}
}};
auto w = std::thread{workload, &cs};
for (;;) {
struct sockaddr_storage peer_addr = {};

View File

@@ -767,9 +767,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
false, true);
}
if (!std::is_sorted(points.begin(), points.end())) {
sortPoints(points);
}
sortPoints(points);
int activeWriteCount = 0;
std::vector<std::pair<StringRef, StringRef>> combinedWriteConflictRanges;
@@ -796,6 +794,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
int temp[stripeSize];
int stripes = (stringCount + stripeSize - 1) / stripeSize;
StringRef values[stripeSize];
int64_t writeVersions[stripeSize / 2];
int ss = stringCount - (stripes - 1) * stripeSize;
int64_t entryDelta = 0;
for (int s = stripes - 1; s >= 0; s--) {

View File

@@ -5,7 +5,6 @@ __stack_chk_guard@GLIBC_2.17
abort@GLIBC_2.17
free@GLIBC_2.17
malloc@GLIBC_2.17
memcmp@GLIBC_2.17
memcpy@GLIBC_2.17
memmove@GLIBC_2.17
memset@GLIBC_2.17

View File

@@ -1,8 +1,7 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER "clang;--target=aarch64-linux-gnu")
set(CMAKE_CXX_COMPILER "clang++;--target=aarch64-linux-gnu")
set(CMAKE_C_COMPILER "/usr/bin/aarch64-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "/usr/bin/aarch64-linux-gnu-g++")
set(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-aarch64;-L;/usr/aarch64-linux-gnu/")
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE arm64)
set(LD_EXE "/usr/bin/aarch64-linux-gnu-ld")

View File

@@ -1,4 +1,3 @@
___chkstk_darwin
___stack_chk_fail
___stack_chk_guard
__tlv_bootstrap
@@ -6,7 +5,6 @@ _abort
_bzero
_free
_malloc
_memcmp
_memcpy
_memmove
dyld_stub_binder

Some files were not shown because too many files have changed in this diff Show More