Compare commits
34 Commits
v0.0.1
...
b7e16b31ff
Author | SHA1 | Date | |
---|---|---|---|
b7e16b31ff | |||
a324d31518 | |||
fdb05e0e33 | |||
7c27d4a972 | |||
738de01cb4 | |||
325cab6a95 | |||
0b2821941a | |||
a40b5dcd74 | |||
193b1926ff | |||
1c900c5a8c | |||
90fdcdd51a | |||
eb3f6823eb | |||
1534e10b75 | |||
3c100ccee8 | |||
5cf45d1c35 | |||
4f97932893 | |||
24b0f6b7e4 | |||
e77c3fdee6 | |||
383b956bc0 | |||
5fad15305a | |||
ad91fb36a5 | |||
38c1481432 | |||
771ae896e7 | |||
5bf72bda61 | |||
a534f3b758 | |||
ae4fa889c7 | |||
e3d3b0ec0d | |||
dea8d6ae01 | |||
dbc6f2313a | |||
4f51878642 | |||
215865a462 | |||
348ebf016a | |||
377259ffa0 | |||
70220d95e7 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
.cache
|
||||
__pycache__
|
||||
build
|
||||
|
@@ -1,11 +1,11 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/mirrors-clang-format
|
||||
rev: b111689e7b5cba60be3c62d5db2bd1357f4d36ca
|
||||
rev: 6d365699efc33b1b432eab5b4ae331a19e1857de # frozen: v18.1.2
|
||||
hooks:
|
||||
- id: clang-format
|
||||
exclude: ".*third_party/.*"
|
||||
- repo: https://github.com/cheshirekow/cmake-format-precommit
|
||||
rev: e2c2116d86a80e72e7146a06e68b7c228afc6319
|
||||
rev: e2c2116d86a80e72e7146a06e68b7c228afc6319 # frozen: v0.6.13
|
||||
hooks:
|
||||
- id: cmake-format
|
||||
- repo: local
|
||||
@@ -13,7 +13,7 @@ repos:
|
||||
- id: debug verbose check
|
||||
name: disallow checking in DEBUG_VERBOSE=1
|
||||
description: disallow checking in DEBUG_VERBOSE=1
|
||||
entry: '^#define DEBUG_VERBOSE 1$'
|
||||
entry: "^#define DEBUG_VERBOSE 1$"
|
||||
language: pygrep
|
||||
types: [c++]
|
||||
- repo: local
|
||||
@@ -21,6 +21,14 @@ repos:
|
||||
- id: debug verbose check
|
||||
name: disallow checking in SHOW_MEMORY=1
|
||||
description: disallow checking in SHOW_MEMORY=1
|
||||
entry: '^#define SHOW_MEMORY 1$'
|
||||
entry: "^#define SHOW_MEMORY 1$"
|
||||
language: pygrep
|
||||
types: [c++]
|
||||
- repo: https://github.com/shellcheck-py/shellcheck-py
|
||||
rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1
|
||||
hooks:
|
||||
- id: shellcheck
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 552baf822992936134cbd31a38f69c8cfe7c0f05 # frozen: 24.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
|
228
CMakeLists.txt
228
CMakeLists.txt
@@ -1,15 +1,28 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(
|
||||
conflict-set
|
||||
VERSION 0.0.1
|
||||
VERSION 0.0.4
|
||||
DESCRIPTION
|
||||
"A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys."
|
||||
HOMEPAGE_URL "https://git.weaselab.dev/weaselab/conflict-set"
|
||||
LANGUAGES C CXX)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/version.txt ${PROJECT_VERSION})
|
||||
|
||||
include(CMakePushCheckState)
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckIncludeFileCXX)
|
||||
include(CheckCXXSourceCompiles)
|
||||
|
||||
set(DEFAULT_BUILD_TYPE "Release")
|
||||
|
||||
if(EMSCRIPTEN OR CMAKE_SYSTEM_NAME STREQUAL WASI)
|
||||
set(WASM ON)
|
||||
else()
|
||||
set(WASM OFF)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
message(
|
||||
STATUS
|
||||
@@ -23,7 +36,23 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||
endif()
|
||||
|
||||
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
|
||||
-Werror=switch-enum)
|
||||
-Werror=switch-enum -fPIC)
|
||||
|
||||
set(full_relro_flags "-pie;LINKER:-z,relro,-z,now,-z,noexecstack")
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS ${full_relro_flags})
|
||||
check_cxx_source_compiles("int main(){}" HAS_FULL_RELRO FAIL_REGEX "warning:")
|
||||
if(HAS_FULL_RELRO)
|
||||
add_link_options(${full_relro_flags})
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
|
||||
set(version_script_flags LINKER:--version-script=${CMAKE_SOURCE_DIR}/linker.map)
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_LINK_OPTIONS ${version_script_flags})
|
||||
check_cxx_source_compiles("int main(){}" HAS_VERSION_SCRIPT FAIL_REGEX
|
||||
"warning:")
|
||||
cmake_pop_check_state()
|
||||
|
||||
option(USE_SIMD_FALLBACK
|
||||
"Use fallback implementations of functions that use SIMD" OFF)
|
||||
@@ -32,10 +61,7 @@ option(USE_SIMD_FALLBACK
|
||||
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
||||
include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/third_party/valgrind)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
add_compile_options(-Wno-maybe-uninitialized
|
||||
$<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>)
|
||||
endif()
|
||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Wno-invalid-offsetof>)
|
||||
|
||||
if(APPLE)
|
||||
add_link_options(-Wl,-dead_strip)
|
||||
@@ -43,10 +69,22 @@ else()
|
||||
add_link_options(-Wl,--gc-sections)
|
||||
endif()
|
||||
|
||||
include(CheckIncludeFileCXX)
|
||||
include(CMakePushCheckState)
|
||||
if(EMSCRIPTEN)
|
||||
# https://github.com/emscripten-core/emscripten/issues/15377#issuecomment-1285167486
|
||||
add_link_options(-lnodefs.js -lnoderawfs.js)
|
||||
add_link_options(-s ALLOW_MEMORY_GROWTH)
|
||||
endif()
|
||||
|
||||
if(NOT USE_SIMD_FALLBACK)
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_FLAGS -msimd128)
|
||||
check_include_file_cxx("wasm_simd128.h" HAS_WASM_SIMD)
|
||||
if(HAS_WASM_SIMD)
|
||||
add_compile_options(-msimd128)
|
||||
add_compile_definitions(HAS_WASM_SIMD)
|
||||
endif()
|
||||
cmake_pop_check_state()
|
||||
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
|
||||
check_include_file_cxx("immintrin.h" HAS_AVX)
|
||||
@@ -64,13 +102,13 @@ endif()
|
||||
|
||||
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
||||
|
||||
add_library(${PROJECT_NAME}_object OBJECT ConflictSet.cpp)
|
||||
target_compile_options(${PROJECT_NAME}_object PRIVATE -fPIC -fno-exceptions
|
||||
add_library(${PROJECT_NAME}-object OBJECT ConflictSet.cpp)
|
||||
target_compile_options(${PROJECT_NAME}-object PRIVATE -fno-exceptions
|
||||
-fvisibility=hidden)
|
||||
target_include_directories(${PROJECT_NAME}_object
|
||||
target_include_directories(${PROJECT_NAME}-object
|
||||
PRIVATE ${CMAKE_SOURCE_DIR}/include)
|
||||
|
||||
add_library(${PROJECT_NAME} SHARED $<TARGET_OBJECTS:${PROJECT_NAME}_object>)
|
||||
add_library(${PROJECT_NAME} SHARED $<TARGET_OBJECTS:${PROJECT_NAME}-object>)
|
||||
set_target_properties(
|
||||
${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
"${CMAKE_BINARY_DIR}/radix_tree")
|
||||
@@ -78,24 +116,32 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE C)
|
||||
endif()
|
||||
|
||||
if(NOT APPLE)
|
||||
if(HAS_VERSION_SCRIPT)
|
||||
target_link_options(${PROJECT_NAME} PRIVATE
|
||||
LINKER:--version-script=${CMAKE_SOURCE_DIR}/linker.map)
|
||||
endif()
|
||||
|
||||
add_library(${PROJECT_NAME}-static STATIC
|
||||
$<TARGET_OBJECTS:${PROJECT_NAME}_object>)
|
||||
$<TARGET_OBJECTS:${PROJECT_NAME}-object>)
|
||||
if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
set_target_properties(${PROJECT_NAME}-static PROPERTIES LINKER_LANGUAGE C)
|
||||
endif()
|
||||
|
||||
if(NOT APPLE AND CMAKE_OBJCOPY)
|
||||
if(APPLE)
|
||||
add_custom_command(
|
||||
TARGET conflict-set-static
|
||||
TARGET ${PROJECT_NAME}-static
|
||||
PRE_LINK
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/privatize_symbols_macos.sh
|
||||
$<TARGET_OBJECTS:${PROJECT_NAME}-object>)
|
||||
else()
|
||||
add_custom_command(
|
||||
TARGET ${PROJECT_NAME}-static
|
||||
POST_BUILD
|
||||
COMMAND
|
||||
${CMAKE_OBJCOPY} --keep-global-symbols=${CMAKE_SOURCE_DIR}/symbols.txt
|
||||
$<TARGET_FILE:${PROJECT_NAME}-static>)
|
||||
${CMAKE_OBJCOPY}
|
||||
--keep-global-symbols=${CMAKE_SOURCE_DIR}/symbol-exports.txt
|
||||
$<TARGET_FILE:${PROJECT_NAME}-static> || echo
|
||||
"Proceeding with all symbols global in static library")
|
||||
endif()
|
||||
|
||||
set(TEST_FLAGS -Wall -Wextra -Wunreachable-code -Wpedantic -UNDEBUG)
|
||||
@@ -104,30 +150,51 @@ include(CTest)
|
||||
|
||||
if(BUILD_TESTING)
|
||||
|
||||
# Shared library version of FoundationDB's skip list implementation
|
||||
add_library(skip_list SHARED SkipList.cpp)
|
||||
target_compile_options(skip_list PRIVATE -fPIC -fno-exceptions
|
||||
-fvisibility=hidden)
|
||||
target_include_directories(skip_list PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
||||
set_target_properties(skip_list PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
"${CMAKE_BINARY_DIR}/skip_list")
|
||||
set_target_properties(skip_list PROPERTIES OUTPUT_NAME ${PROJECT_NAME})
|
||||
set_target_properties(skip_list PROPERTIES VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
# corpus tests, which are tests curated by libfuzzer. The goal is to get broad
|
||||
# coverage with a small number of tests.
|
||||
|
||||
# Shared library version of a std::unordered_map-based conflict set (point
|
||||
# queries only)
|
||||
add_library(hash_table SHARED HashTable.cpp)
|
||||
target_compile_options(hash_table PRIVATE -fPIC -fno-exceptions
|
||||
-fvisibility=hidden)
|
||||
target_include_directories(hash_table PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
||||
set_target_properties(hash_table PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
"${CMAKE_BINARY_DIR}/hash_table")
|
||||
set_target_properties(hash_table PROPERTIES OUTPUT_NAME ${PROJECT_NAME})
|
||||
set_target_properties(
|
||||
hash_table PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION
|
||||
${PROJECT_VERSION_MAJOR})
|
||||
file(GLOB CORPUS_TESTS ${CMAKE_SOURCE_DIR}/corpus/*)
|
||||
|
||||
# extra testing that relies on shared libraries, which aren't available with
|
||||
# wasm
|
||||
if(NOT WASM)
|
||||
# Shared library version of FoundationDB's skip list implementation
|
||||
add_library(skip_list SHARED SkipList.cpp)
|
||||
target_compile_options(skip_list PRIVATE -fno-exceptions
|
||||
-fvisibility=hidden)
|
||||
target_include_directories(skip_list PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
||||
set_target_properties(skip_list PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
"${CMAKE_BINARY_DIR}/skip_list")
|
||||
set_target_properties(skip_list PROPERTIES OUTPUT_NAME ${PROJECT_NAME})
|
||||
set_target_properties(
|
||||
skip_list PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION
|
||||
${PROJECT_VERSION_MAJOR})
|
||||
|
||||
# Shared library version of a std::unordered_map-based conflict set (point
|
||||
# queries only)
|
||||
add_library(hash_table SHARED HashTable.cpp)
|
||||
target_compile_options(hash_table PRIVATE -fno-exceptions
|
||||
-fvisibility=hidden)
|
||||
target_include_directories(hash_table PUBLIC ${CMAKE_SOURCE_DIR}/include)
|
||||
set_target_properties(
|
||||
hash_table PROPERTIES LIBRARY_OUTPUT_DIRECTORY
|
||||
"${CMAKE_BINARY_DIR}/hash_table")
|
||||
set_target_properties(hash_table PROPERTIES OUTPUT_NAME ${PROJECT_NAME})
|
||||
set_target_properties(
|
||||
hash_table PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION
|
||||
${PROJECT_VERSION_MAJOR})
|
||||
|
||||
add_executable(driver_skip_list TestDriver.cpp)
|
||||
target_compile_options(driver_skip_list PRIVATE ${TEST_FLAGS})
|
||||
target_link_libraries(driver_skip_list PRIVATE skip_list)
|
||||
|
||||
foreach(TEST ${CORPUS_TESTS})
|
||||
get_filename_component(hash ${TEST} NAME)
|
||||
add_test(NAME skip_list_${hash} COMMAND driver_skip_list ${TEST})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# ad hoc testing
|
||||
add_executable(conflict_set_main ConflictSet.cpp)
|
||||
target_include_directories(conflict_set_main
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
@@ -154,10 +221,7 @@ if(BUILD_TESTING)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# corpus tests
|
||||
|
||||
file(GLOB CORPUS_TESTS ${CMAKE_SOURCE_DIR}/corpus/*)
|
||||
|
||||
# whitebox tests
|
||||
add_executable(fuzz_driver ConflictSet.cpp FuzzTestDriver.cpp)
|
||||
target_compile_options(fuzz_driver PRIVATE ${TEST_FLAGS})
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
@@ -172,8 +236,7 @@ if(BUILD_TESTING)
|
||||
add_test(NAME conflict_set_fuzz_${hash} COMMAND fuzz_driver ${TEST})
|
||||
endforeach()
|
||||
|
||||
# tsan
|
||||
|
||||
# tsan tests
|
||||
if(NOT CMAKE_CROSSCOMPILING AND NOT CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
add_executable(tsan_driver ConflictSet.cpp FuzzTestDriver.cpp)
|
||||
target_compile_options(tsan_driver PRIVATE ${TEST_FLAGS} -fsanitize=thread)
|
||||
@@ -187,24 +250,26 @@ if(BUILD_TESTING)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# blackbox tests
|
||||
add_executable(driver TestDriver.cpp)
|
||||
target_compile_options(driver PRIVATE ${TEST_FLAGS})
|
||||
target_link_libraries(driver PRIVATE ${PROJECT_NAME})
|
||||
foreach(TEST ${CORPUS_TESTS})
|
||||
get_filename_component(hash ${TEST} NAME)
|
||||
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
|
||||
endforeach()
|
||||
|
||||
# scripted tests. Written manually to fill in anything libfuzzer couldn't
|
||||
# find.
|
||||
add_executable(script_test ScriptTest.cpp)
|
||||
target_compile_options(script_test PRIVATE ${TEST_FLAGS})
|
||||
target_link_libraries(script_test PRIVATE ${PROJECT_NAME})
|
||||
|
||||
file(GLOB SCRIPT_TESTS ${CMAKE_SOURCE_DIR}/script_tests/*)
|
||||
foreach(TEST ${SCRIPT_TESTS})
|
||||
get_filename_component(name ${TEST} NAME)
|
||||
add_test(NAME conflict_set_script_${name} COMMAND script_test ${TEST})
|
||||
endforeach()
|
||||
|
||||
add_executable(driver_skip_list TestDriver.cpp)
|
||||
target_compile_options(driver_skip_list PRIVATE ${TEST_FLAGS})
|
||||
target_link_libraries(driver_skip_list PRIVATE skip_list)
|
||||
|
||||
find_program(VALGRIND_EXE valgrind)
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
add_test(NAME conflict_set_blackbox_valgrind
|
||||
@@ -212,12 +277,6 @@ if(BUILD_TESTING)
|
||||
$<TARGET_FILE:driver> ${CORPUS_TESTS})
|
||||
endif()
|
||||
|
||||
foreach(TEST ${CORPUS_TESTS})
|
||||
get_filename_component(hash ${TEST} NAME)
|
||||
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
|
||||
add_test(NAME skip_list_${hash} COMMAND driver_skip_list ${TEST})
|
||||
endforeach()
|
||||
|
||||
# api smoke tests
|
||||
|
||||
# c90
|
||||
@@ -238,31 +297,45 @@ if(BUILD_TESTING)
|
||||
PROPERTIES CXX_STANDARD_REQUIRED ON)
|
||||
add_test(NAME conflict_set_cxx_api_test COMMAND conflict_set_cxx_api_test)
|
||||
|
||||
if(NOT APPLE AND NOT CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
# symbol visibility tests
|
||||
if(NOT WASM AND NOT CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
if(APPLE)
|
||||
set(symbol_exports ${CMAKE_SOURCE_DIR}/apple-symbol-exports.txt)
|
||||
set(symbol_imports ${CMAKE_SOURCE_DIR}/apple-symbol-imports.txt)
|
||||
else()
|
||||
set(symbol_exports ${CMAKE_SOURCE_DIR}/symbol-exports.txt)
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
|
||||
set(symbol_imports ${CMAKE_SOURCE_DIR}/aarch64-symbol-imports.txt)
|
||||
else()
|
||||
set(symbol_imports ${CMAKE_SOURCE_DIR}/symbol-imports.txt)
|
||||
endif()
|
||||
endif()
|
||||
add_test(
|
||||
NAME conflict_set_shared_symbols
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/test_symbols.sh
|
||||
$<TARGET_FILE:${PROJECT_NAME}> ${CMAKE_SOURCE_DIR}/symbols.txt)
|
||||
COMMAND
|
||||
${CMAKE_SOURCE_DIR}/test_symbols.sh $<TARGET_FILE:${PROJECT_NAME}>
|
||||
${symbol_exports} ${symbol_imports})
|
||||
add_test(
|
||||
NAME conflict_set_static_symbols
|
||||
COMMAND
|
||||
${CMAKE_SOURCE_DIR}/test_symbols.sh
|
||||
$<TARGET_FILE:${PROJECT_NAME}-static> ${CMAKE_SOURCE_DIR}/symbols.txt)
|
||||
$<TARGET_FILE:${PROJECT_NAME}-static> ${symbol_exports}
|
||||
${symbol_imports})
|
||||
endif()
|
||||
|
||||
# bench
|
||||
|
||||
add_executable(conflict_set_bench Bench.cpp)
|
||||
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME})
|
||||
set_target_properties(conflict_set_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
||||
|
||||
add_executable(real_data_bench RealDataBench.cpp)
|
||||
target_link_libraries(real_data_bench PRIVATE ${PROJECT_NAME})
|
||||
set_target_properties(real_data_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
||||
endif()
|
||||
|
||||
# packaging
|
||||
|
||||
set(CPACK_PACKAGE_CONTACT andrew@weaselab.dev)
|
||||
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME all)
|
||||
|
||||
set(CPACK_PACKAGE_VENDOR "Weaselab")
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||
@@ -276,6 +349,26 @@ set(CPACK_RPM_FILE_NAME RPM-DEFAULT)
|
||||
|
||||
# deb
|
||||
set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT)
|
||||
# see *-imports.txt - dependency versions need to be synced with symbol versions
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.17)")
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.14)")
|
||||
endif()
|
||||
|
||||
# macos
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET 11.0)
|
||||
if(APPLE)
|
||||
find_program(PANDOC_EXE pandoc)
|
||||
if(PANDOC_EXE)
|
||||
execute_process(COMMAND ${PANDOC_EXE} ${CMAKE_SOURCE_DIR}/README.md -o
|
||||
${CMAKE_BINARY_DIR}/README.txt)
|
||||
set(CPACK_RESOURCE_FILE_README ${CMAKE_BINARY_DIR}/README.txt)
|
||||
endif()
|
||||
configure_file(${CMAKE_SOURCE_DIR}/LICENSE ${CMAKE_BINARY_DIR}/LICENSE.txt
|
||||
COPYONLY)
|
||||
set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_BINARY_DIR}/LICENSE.txt)
|
||||
endif()
|
||||
|
||||
include(CPack)
|
||||
|
||||
@@ -297,12 +390,15 @@ set_target_properties(
|
||||
|
||||
install(
|
||||
TARGETS ${PROJECT_NAME} ${PROJECT_NAME}-static
|
||||
EXPORT conflict-setConfig
|
||||
EXPORT ${PROJECT_NAME}Config
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
|
||||
install(DIRECTORY include/
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
|
||||
|
||||
install(EXPORT conflict-setConfig
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/conflict-set/cmake)
|
||||
install(EXPORT ${PROJECT_NAME}Config
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
|
||||
|
||||
cpack_add_component(all)
|
||||
|
@@ -2614,7 +2614,7 @@ int64_t ConflictSet::getBytes() const { return impl->totalBytes; }
|
||||
|
||||
ConflictSet::ConflictSet(int64_t oldestVersion)
|
||||
: impl((mallocBytesDelta = 0,
|
||||
new (safe_malloc(sizeof(Impl))) Impl{oldestVersion})) {
|
||||
new(safe_malloc(sizeof(Impl))) Impl{oldestVersion})) {
|
||||
impl->totalBytes += mallocBytesDelta;
|
||||
}
|
||||
|
||||
|
@@ -99,7 +99,7 @@ void ConflictSet::setOldestVersion(int64_t oldestVersion) {
|
||||
int64_t ConflictSet::getBytes() const { return -1; }
|
||||
|
||||
ConflictSet::ConflictSet(int64_t oldestVersion)
|
||||
: impl(new (safe_malloc(sizeof(Impl))) Impl{oldestVersion}) {}
|
||||
: impl(new(safe_malloc(sizeof(Impl))) Impl{oldestVersion}) {}
|
||||
|
||||
ConflictSet::~ConflictSet() {
|
||||
if (impl) {
|
||||
|
10
Internal.h
10
Internal.h
@@ -154,6 +154,7 @@ inline void *operator new[](size_t size, std::align_val_t align,
|
||||
|
||||
/// align must be a power of two
|
||||
template <class T> T *align_up(T *t, size_t align) {
|
||||
assert(std::popcount(align) == 1);
|
||||
auto unaligned = uintptr_t(t);
|
||||
auto aligned = (unaligned + align - 1) & ~(align - 1);
|
||||
return reinterpret_cast<T *>(reinterpret_cast<char *>(t) + aligned -
|
||||
@@ -162,6 +163,7 @@ template <class T> T *align_up(T *t, size_t align) {
|
||||
|
||||
/// align must be a power of two
|
||||
constexpr inline int align_up(uint32_t unaligned, uint32_t align) {
|
||||
assert(std::popcount(align) == 1);
|
||||
return (unaligned + align - 1) & ~(align - 1);
|
||||
}
|
||||
|
||||
@@ -177,12 +179,10 @@ struct Arena::ArenaImpl {
|
||||
uint8_t *begin() { return reinterpret_cast<uint8_t *>(this + 1); }
|
||||
};
|
||||
|
||||
static_assert(sizeof(Arena::ArenaImpl) == 16);
|
||||
static_assert(alignof(Arena::ArenaImpl) == 8);
|
||||
|
||||
inline Arena::Arena(int initialSize) : impl(nullptr) {
|
||||
if (initialSize > 0) {
|
||||
auto allocationSize = align_up(initialSize + sizeof(ArenaImpl), 16);
|
||||
auto allocationSize =
|
||||
align_up(initialSize + sizeof(ArenaImpl), alignof(ArenaImpl));
|
||||
impl = (Arena::ArenaImpl *)safe_malloc(allocationSize);
|
||||
impl->prev = nullptr;
|
||||
impl->capacity = allocationSize - sizeof(ArenaImpl);
|
||||
@@ -218,7 +218,7 @@ inline void *operator new(size_t size, std::align_val_t align, Arena &arena) {
|
||||
(arena.impl ? std::max<int>(sizeof(Arena::ArenaImpl),
|
||||
arena.impl->capacity * 2)
|
||||
: 0)),
|
||||
16);
|
||||
alignof(Arena::ArenaImpl));
|
||||
auto *impl = (Arena::ArenaImpl *)safe_malloc(allocationSize);
|
||||
impl->prev = arena.impl;
|
||||
impl->capacity = allocationSize - sizeof(Arena::ArenaImpl);
|
||||
|
36
README.md
36
README.md
@@ -58,27 +58,27 @@ Performance counters:
|
||||
|
||||
## Skip list
|
||||
|
||||
| ns/op | op/s | err% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||
| 246.99 | 4,048,700.59 | 0.2% | 0.01 | `point reads`
|
||||
| 260.16 | 3,843,784.65 | 0.1% | 0.01 | `prefix reads`
|
||||
| 493.35 | 2,026,953.19 | 0.1% | 0.01 | `range reads`
|
||||
| 462.05 | 2,164,289.23 | 0.6% | 0.01 | `point writes`
|
||||
| 448.19 | 2,231,205.25 | 0.9% | 0.01 | `prefix writes`
|
||||
| 255.83 | 3,908,845.72 | 1.5% | 0.02 | `range writes`
|
||||
| 582.63 | 1,716,349.02 | 1.3% | 0.01 | `monotonic increasing point writes`
|
||||
| ns/op | op/s | err% | total | benchmark |
|
||||
| -----: | -----------: | ---: | ----: | :---------------------------------- |
|
||||
| 246.99 | 4,048,700.59 | 0.2% | 0.01 | `point reads` |
|
||||
| 260.16 | 3,843,784.65 | 0.1% | 0.01 | `prefix reads` |
|
||||
| 493.35 | 2,026,953.19 | 0.1% | 0.01 | `range reads` |
|
||||
| 462.05 | 2,164,289.23 | 0.6% | 0.01 | `point writes` |
|
||||
| 448.19 | 2,231,205.25 | 0.9% | 0.01 | `prefix writes` |
|
||||
| 255.83 | 3,908,845.72 | 1.5% | 0.02 | `range writes` |
|
||||
| 582.63 | 1,716,349.02 | 1.3% | 0.01 | `monotonic increasing point writes` |
|
||||
|
||||
## Radix tree (this implementation)
|
||||
|
||||
| ns/op | op/s | err% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||
| 19.42 | 51,483,206.67 | 0.3% | 0.01 | `point reads`
|
||||
| 58.43 | 17,115,612.57 | 0.1% | 0.01 | `prefix reads`
|
||||
| 216.09 | 4,627,766.60 | 0.2% | 0.01 | `range reads`
|
||||
| 28.35 | 35,267,567.72 | 0.2% | 0.01 | `point writes`
|
||||
| 43.43 | 23,026,226.17 | 0.2% | 0.01 | `prefix writes`
|
||||
| 50.00 | 20,000,000.00 | 0.0% | 0.01 | `range writes`
|
||||
| 92.38 | 10,824,863.69 | 4.1% | 0.01 | `monotonic increasing point writes`
|
||||
| ns/op | op/s | err% | total | benchmark |
|
||||
| -----: | ------------: | ---: | ----: | :---------------------------------- |
|
||||
| 19.42 | 51,483,206.67 | 0.3% | 0.01 | `point reads` |
|
||||
| 58.43 | 17,115,612.57 | 0.1% | 0.01 | `prefix reads` |
|
||||
| 216.09 | 4,627,766.60 | 0.2% | 0.01 | `range reads` |
|
||||
| 28.35 | 35,267,567.72 | 0.2% | 0.01 | `point writes` |
|
||||
| 43.43 | 23,026,226.17 | 0.2% | 0.01 | `prefix writes` |
|
||||
| 50.00 | 20,000,000.00 | 0.0% | 0.01 | `range writes` |
|
||||
| 92.38 | 10,824,863.69 | 4.1% | 0.01 | `monotonic increasing point writes` |
|
||||
|
||||
# "Real data" test
|
||||
|
||||
|
@@ -1,8 +1,11 @@
|
||||
#include <ConflictSet.h>
|
||||
|
||||
#include <cerrno>
|
||||
#include <chrono>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <fcntl.h>
|
||||
#include <string_view>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
@@ -669,7 +669,7 @@ int64_t ConflictSet::getBytes() const { return impl->totalBytes; }
|
||||
|
||||
ConflictSet::ConflictSet(int64_t oldestVersion)
|
||||
: impl((mallocBytesDelta = 0,
|
||||
new (safe_malloc(sizeof(Impl))) Impl{oldestVersion})) {
|
||||
new(safe_malloc(sizeof(Impl))) Impl{oldestVersion})) {
|
||||
impl->totalBytes += mallocBytesDelta;
|
||||
}
|
||||
|
||||
|
8
aarch64-symbol-imports.txt
Normal file
8
aarch64-symbol-imports.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
__stack_chk_fail@GLIBC_2.17
|
||||
__stack_chk_guard@GLIBC_2.17
|
||||
abort@GLIBC_2.17
|
||||
free@GLIBC_2.17
|
||||
malloc@GLIBC_2.17
|
||||
memcpy@GLIBC_2.17
|
||||
memmove@GLIBC_2.17
|
||||
memset@GLIBC_2.17
|
17
apple-symbol-exports.txt
Normal file
17
apple-symbol-exports.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
_ConflictSet_addWrites
|
||||
_ConflictSet_check
|
||||
_ConflictSet_create
|
||||
_ConflictSet_destroy
|
||||
_ConflictSet_getBytes
|
||||
_ConflictSet_setOldestVersion
|
||||
__ZN8weaselab11ConflictSet16setOldestVersionEx
|
||||
__ZN8weaselab11ConflictSet9addWritesEPKNS0_10WriteRangeEix
|
||||
__ZN8weaselab11ConflictSetC1EOS0_
|
||||
__ZN8weaselab11ConflictSetC1Ex
|
||||
__ZN8weaselab11ConflictSetC2EOS0_
|
||||
__ZN8weaselab11ConflictSetC2Ex
|
||||
__ZN8weaselab11ConflictSetD1Ev
|
||||
__ZN8weaselab11ConflictSetD2Ev
|
||||
__ZN8weaselab11ConflictSetaSEOS0_
|
||||
__ZNK8weaselab11ConflictSet5checkEPKNS0_9ReadRangeEPNS0_6ResultEi
|
||||
__ZNK8weaselab11ConflictSet8getBytesEv
|
8
apple-symbol-imports.txt
Normal file
8
apple-symbol-imports.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
__tlv_bootstrap
|
||||
_abort
|
||||
_bzero
|
||||
_free
|
||||
_malloc
|
||||
_memcpy
|
||||
_memmove
|
||||
dyld_stub_binder
|
126
conflict_set.py
Normal file
126
conflict_set.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import ctypes
|
||||
import enum
|
||||
import os
|
||||
|
||||
from typing import Optional
|
||||
|
||||
_lib = None
|
||||
for f in (
|
||||
os.path.dirname(__file__) + "/build/radix_tree/libconflict-set.so.0",
|
||||
os.path.dirname(__file__) + "/build/radix_tree/libconflict-set.0.dylib",
|
||||
):
|
||||
try:
|
||||
_lib = ctypes.cdll.LoadLibrary(f)
|
||||
except:
|
||||
pass
|
||||
|
||||
if _lib is None:
|
||||
import sys
|
||||
|
||||
print("Could not find libconflict-set", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class _Key(ctypes.Structure):
|
||||
_fields_ = [("p", ctypes.POINTER(ctypes.c_ubyte)), ("len", ctypes.c_int)]
|
||||
|
||||
|
||||
class ReadRange(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("begin", _Key),
|
||||
("end", _Key),
|
||||
("readVersion", ctypes.c_int64),
|
||||
]
|
||||
|
||||
|
||||
class WriteRange(ctypes.Structure):
|
||||
_fields_ = [("begin", _Key), ("end", _Key)]
|
||||
|
||||
|
||||
_lib.ConflictSet_create.argtypes = (ctypes.c_int64,)
|
||||
_lib.ConflictSet_create.restype = ctypes.c_void_p
|
||||
|
||||
_lib.ConflictSet_check.argtypes = (
|
||||
ctypes.c_void_p,
|
||||
ctypes.POINTER(ReadRange),
|
||||
ctypes.POINTER(ctypes.c_int),
|
||||
ctypes.c_int,
|
||||
)
|
||||
|
||||
_lib.ConflictSet_addWrites.argtypes = (
|
||||
ctypes.c_void_p,
|
||||
ctypes.POINTER(WriteRange),
|
||||
ctypes.c_int,
|
||||
ctypes.c_int64,
|
||||
)
|
||||
|
||||
_lib.ConflictSet_setOldestVersion.argtypes = (ctypes.c_void_p, ctypes.c_int64)
|
||||
|
||||
_lib.ConflictSet_destroy.argtypes = (ctypes.c_void_p,)
|
||||
|
||||
_lib.ConflictSet_getBytes.argtypes = (ctypes.c_void_p,)
|
||||
_lib.ConflictSet_getBytes.restype = ctypes.c_int64
|
||||
|
||||
|
||||
class Result(enum.Enum):
|
||||
COMMIT = 0
|
||||
CONFLICT = 1
|
||||
TOO_OLD = 2
|
||||
|
||||
|
||||
def write(begin: bytes, end: Optional[bytes] = None) -> WriteRange:
|
||||
b = (ctypes.c_ubyte * len(begin))()
|
||||
b.value = begin
|
||||
if end is None:
|
||||
e = (ctypes.c_ubyte * 0)()
|
||||
e.value = b""
|
||||
else:
|
||||
e = (ctypes.c_ubyte * len(end))()
|
||||
e.value = end
|
||||
return WriteRange(_Key(b, len(b)), _Key(e, len(e)))
|
||||
|
||||
|
||||
def read(version: int, begin: bytes, end: Optional[bytes] = None) -> ReadRange:
|
||||
b = (ctypes.c_ubyte * len(begin))()
|
||||
b.value = begin
|
||||
if end is None:
|
||||
e = (ctypes.c_ubyte * 0)()
|
||||
e.value = b""
|
||||
else:
|
||||
e = (ctypes.c_ubyte * len(end))()
|
||||
e.value = end
|
||||
return ReadRange(_Key(b, len(b)), _Key(e, len(e)), version)
|
||||
|
||||
|
||||
class ConflictSet:
|
||||
def __init__(self, version: int = 0) -> None:
|
||||
self.p = _lib.ConflictSet_create(version)
|
||||
|
||||
def addWrites(self, version: int, *writes: WriteRange):
|
||||
_lib.ConflictSet_addWrites(
|
||||
self.p, (WriteRange * len(writes))(*writes), len(writes), version
|
||||
)
|
||||
|
||||
def check(self, *reads: ReadRange) -> list[Result]:
|
||||
r = (ctypes.c_int * len(reads))()
|
||||
_lib.ConflictSet_check(self.p, *reads, r, 1)
|
||||
return [Result(x) for x in r]
|
||||
|
||||
def setOldestVersion(self, version: int) -> None:
|
||||
_lib.ConflictSet_setOldestVersion(self.p, version)
|
||||
|
||||
def getBytes(self) -> int:
|
||||
return _lib.ConflictSet_getBytes(self.p)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def close(self) -> None:
|
||||
if self.p is not None:
|
||||
_lib.ConflictSet_destroy(self.p)
|
||||
self.p = None
|
||||
|
||||
def __exit__(self, exception_type, exception_value, exception_traceback):
|
||||
if self.p is not None:
|
||||
_lib.ConflictSet_destroy(self.p)
|
||||
self.p = None
|
33
package_macos.sh
Executable file
33
package_macos.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euxo pipefail
|
||||
|
||||
umask 022
|
||||
|
||||
SRC_DIR="${0%/*}"
|
||||
BUILD_ARM="$(mktemp -d -t conflict-set-arm)"
|
||||
BUILD_X86="$(mktemp -d -t conflict-set-x86)"
|
||||
|
||||
cmake_args=(-DCMAKE_CXX_FLAGS=-DNVALGRIND -DCPACK_PACKAGING_INSTALL_PREFIX=/usr/local)
|
||||
|
||||
cmake -S"$SRC_DIR" -B"$BUILD_ARM" -DCMAKE_OSX_ARCHITECTURES=arm64 "${cmake_args[@]}"
|
||||
cmake --build "$BUILD_ARM" --target conflict-set --target conflict-set-static
|
||||
|
||||
cmake -S"$SRC_DIR" -B"$BUILD_X86" -DCMAKE_OSX_ARCHITECTURES=x86_64 "${cmake_args[@]}"
|
||||
cmake --build "$BUILD_X86" --target conflict-set --target conflict-set-static
|
||||
|
||||
VERSION="$(cat "$BUILD_ARM/version.txt")"
|
||||
|
||||
lipo -create "$BUILD_ARM/radix_tree/libconflict-set.$VERSION.dylib" "$BUILD_X86/radix_tree/libconflict-set.$VERSION.dylib" -output "libconflict-set.$VERSION.dylib.tmp"
|
||||
lipo -create "$BUILD_ARM"/libconflict-set-static.a "$BUILD_X86"/libconflict-set-static.a -output libconflict-set-static.a.tmp
|
||||
|
||||
mv "libconflict-set.$VERSION.dylib.tmp" "$BUILD_ARM/radix_tree/libconflict-set.$VERSION.dylib"
|
||||
mv libconflict-set-static.a.tmp "$BUILD_ARM/libconflict-set-static.a"
|
||||
|
||||
pushd "$BUILD_ARM"
|
||||
cpack -G productbuild
|
||||
popd
|
||||
|
||||
mv "$BUILD_ARM/conflict-set-$VERSION-Darwin.pkg" .
|
||||
|
||||
rm -rf "$BUILD_ARM" "$BUILD_X86"
|
@@ -155,3 +155,16 @@ keywords = {data structures, searching, trees}
|
||||
year={1981},
|
||||
publisher={ACM New York, NY, USA}
|
||||
}
|
||||
|
||||
@article{Lemire_2018,
|
||||
title={Roaring bitmaps: Implementation of an optimized software library},
|
||||
volume={48},
|
||||
ISSN={1097-024X},
|
||||
url={http://dx.doi.org/10.1002/spe.2560},
|
||||
DOI={10.1002/spe.2560},
|
||||
number={4},
|
||||
journal={Software: Practice and Experience},
|
||||
publisher={Wiley},
|
||||
author={Lemire, Daniel and Kaser, Owen and Kurz, Nathan and Deri, Luca and O’Hara, Chris and Saint‐Jacques, François and Ssi‐Yan‐Kai, Gregory},
|
||||
year={2018},
|
||||
month=jan, pages={867–895} }
|
||||
|
102
paper/paper.tex
102
paper/paper.tex
@@ -6,6 +6,7 @@
|
||||
\usepackage{tikz}
|
||||
\usepackage{tikzscale}
|
||||
\usepackage[edges]{forest}
|
||||
\usepackage{amsmath}
|
||||
|
||||
\title{ARTful Conflict Checking for FoundationDB}
|
||||
\author{Andrew Noyes \thanks{\href{mailto:andrew@weaselab.dev}{andrew@weaselab.dev}}}
|
||||
@@ -20,7 +21,7 @@
|
||||
|
||||
\section*{Abstract}
|
||||
|
||||
FoundationDB \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21} provides serializability using a specialized data structure called \textit{lastCommit} \footnote{See Algorithm 1 referenced in \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21}} to implement optimistic concurrency control \cite{kung1981optimistic}.
|
||||
FoundationDB \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21} provides serializability using a specialized data structure called \textit{lastCommit} \footnote{See Algorithm 1 referenced in \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21}.} to implement optimistic concurrency control \cite{kung1981optimistic}.
|
||||
This data structure encodes the write sets for recent transactions as a map from key ranges (represented as bitwise-lexicographically-ordered half-open intervals) to most recent write versions.
|
||||
FoundationDB implements \textit{lastCommit} as a version-augmented probabilistic skip list \cite{10.1145/78973.78977}.
|
||||
In this paper, we propose an alternative implementation of \textit{lastCommit} as a version-augmented Adaptive Radix Tree (ART) \cite{DBLP:conf/icde/LeisK013}, and evaluate its performance.
|
||||
@@ -68,9 +69,9 @@ See figure \ref{fig:tree} for an example tree after inserting
|
||||
$\{ANY\} \rightarrow 2$,
|
||||
$\{ARE\} \rightarrow 3$, and
|
||||
$\{ART\} \rightarrow 4$.
|
||||
Each node shows its partial prefix annotated with $(max,point,range)$.
|
||||
Each node shows its partial prefix annotated with $max$ or $max,point,range$.
|
||||
|
||||
\subsection{Checking point reads}
|
||||
\subsection{Checking point reads} \label{Checking point reads}
|
||||
|
||||
The algorithm for checking point reads follows directly from the definitions of the \emph{point} and \emph{range} fields.
|
||||
Our input is a key $k$ and a read version $r$, and we must report whether or not the write version $v_{k}$ of $k$ is less than or equal to $r$.
|
||||
@@ -84,10 +85,101 @@ As an optimization, during the search phase for a point read we can inspect the
|
||||
If the max version among all keys starting with a prefix of $k$ is less than or equal to $r$, then $v_{k} \leq r$.
|
||||
|
||||
\subsection{Checking range reads}
|
||||
\subsection{Adding point writes}
|
||||
\subsection{Adding range writes}
|
||||
|
||||
Checking range reads is more involved. Logically the idea is to partition the range read so that each subrange in the partition is a single point or coincides with the set of keys beginning with a prefix (a \emph{prefix range}).
|
||||
The max version of a single point is $v$ as described in \ref{Checking point reads}.
|
||||
The max version of a prefix range is the $max$ of the node associated with the prefix if such a node exists, and $range$ of the next node with a $range$ field otherwise.
|
||||
If there is no next node with a range field, then we ignore that subrange in our max version calculation.
|
||||
The max version among all versions and max versions of subranges in this partition is the max version of the whole range, which we compare to $r$.
|
||||
|
||||
Let's start with partitioning the range in the case where the beginning of the range is a prefix of the end of the range.
|
||||
We'll be able to use this as a subroutine in the general case.
|
||||
Suppose our range is $[a_{0}\dots a_{k}, a_{0}\dots a_{n})$ where $k < n$, and $a_{i} \in [0, 256)$.
|
||||
The partition starts with the singleton set containing the first key in the range.
|
||||
\[
|
||||
\{a_{0}\dots a_{k}\}
|
||||
\]
|
||||
or equivalently
|
||||
\[
|
||||
[a_{0}\dots a_{k}, a_{0}\dots a_{k} 0)
|
||||
\]
|
||||
and continues with a sequence of prefix ranges ending in each digit up until $a_{k+1}$.
|
||||
Recall that the range $[a_{0}\dots a_{k} 0, a_{0}\dots a_{k} 1)$ is equivalent to the set of keys starting with $a_{0}\dots a_{k} 0$.
|
||||
|
||||
\begin{align*}
|
||||
\dots \quad \cup \quad & [a_{0}\dots a_{k} 0, a_{0}\dots a_{k} 1) \quad \cup \\
|
||||
& [a_{0}\dots a_{k} 1, a_{0}\dots a_{k} 2) \quad \cup \\
|
||||
& \dots \\
|
||||
& [a_{0}\dots a_{k} (a_{k+1}-1), a_{0}\dots a_{k+1})
|
||||
\end{align*}
|
||||
|
||||
The remainder of the partition begins with the singleton set
|
||||
\[
|
||||
\dots \quad \cup \quad [a_{0}\dots a_{k + 1}, a_{0}\dots a_{k + 1} 0) \quad \cup\ \quad \dots
|
||||
\]
|
||||
and proceeds as above until a range ending at $a_{0}\dots a_{n}$.
|
||||
|
||||
Let's now consider a range where begin is not a prefix of end.
|
||||
|
||||
\[
|
||||
[a_{0}\dots a_{m}, b_{0}\dots b_{n})
|
||||
\]
|
||||
|
||||
Let $i$ be the lowest index such that $a_{i} \neq b_{i}$.
|
||||
For brevity we will elide the common prefix up until $i$ in the following discussion.
|
||||
We'll start with partitioning this range coarsely:
|
||||
|
||||
\begin{align*}
|
||||
& [a_{i}\dots a_{m}, a_{i} + 1) \quad \cup \\
|
||||
& [a_{i} + 1, a_{i} + 2) \quad \cup \\
|
||||
& \dots \\
|
||||
& [b_{i} - 1, b_{i}) \quad \cup \\
|
||||
& [b_{i}, b_{i}\dots b_{n})
|
||||
\end{align*}
|
||||
|
||||
The last range has a begin that's a prefix of end, and so we'll partition that as before.
|
||||
The inner ranges are already prefix ranges.
|
||||
This leaves only $[a_{i}\dots a_{m}, a_{i} + 1)$.
|
||||
|
||||
If $m = i$, then this range is adjacent to the first inner range above, and we're done.
|
||||
Otherwise we'll partition this into
|
||||
|
||||
\begin{align*}
|
||||
& [a_{i}\dots a_{m}, a_{i}\dots (a_{m} + 1)) \quad \cup \\
|
||||
& [a_{i}\dots (a_{m} + 1), a_{i}\dots (a_{m} + 2)) \quad \cup \\
|
||||
& \dots \\
|
||||
& [a_{i}\dots 254, a_{i}\dots 255) \quad \cup \\
|
||||
& [a_{i}\dots 255, a_{i}\dots (a_{m-1} + 1) )
|
||||
\end{align*}
|
||||
|
||||
and repeat starting at \footnote{This doesn't explicitly describe how to handle the case where $a_{m-1} = 255$. In this case we would skip to the largest $j < m$ such that $a_{j} \neq 255$. We know $j \geq i$ since if $a_{i} = 255$ then the range is inverted.}
|
||||
\[
|
||||
\dots \quad \cup \quad [a_{i}\dots (a_{m-1} + 1), a_{i}\dots (a_{m-1} + 2))
|
||||
\]
|
||||
until we end at $a_{i} + 1$, adjacent to the first inner range.
|
||||
|
||||
A few notes on implementation:
|
||||
\begin{itemize}
|
||||
\item{For clarity, the above algorithm decouples the logical partitioning from the physical structure of the tree. An optimized implementation would merge adjacent prefix ranges that don't correspond to nodes in the tree as it scans, so that it only calculates the version of such merged ranges once. Additionally, our implementation stores an index of which child pointers are valid as a bitset for Node48 and Node256 to speed up this scan using techniques inspired by \cite{Lemire_2018}.}
|
||||
\item{In order to avoid many costly pointer indirections, we can store the max version not in each node itself but next to each node's parent pointer. Without this, the range read performance is not competetive with the skip list.}
|
||||
\item{An optimized implementation would construct the partition of $[a_{i}\dots a_{m}, a_{i} + 1)$ in reverse order, as it descends along the search path to $a_{i}\dots a_{m}$}
|
||||
\item{An optimized implementation would search for the common prefix first, and return early if any prefix of the common prefix has a $max \leq r$.}
|
||||
\end{itemize}
|
||||
|
||||
\subsection{Reclaiming old entries}
|
||||
|
||||
In order to bound memory usage, we track an \emph{oldest version}, reject transactions with read versions before \emph{oldest version}, and reclaim nodes made redundant by \emph{oldest version}.
|
||||
We track the rate of insertions of new nodes and make sure that our incremental reclaiming of old nodes according to \emph{oldest version} outpaces inserts.
|
||||
|
||||
\subsection{Adding point writes}
|
||||
|
||||
A point write of $k$ at version $v$ simply sets $max \gets v$ \footnote{Recall that write versions are non-decreasing.} for every node along $k$'s search path, and sets $range$ for $k$'s node to the $range$ of the first node greater than $k$, or the \emph{oldest version} if none exists.
|
||||
|
||||
\subsection{Adding range writes}
|
||||
|
||||
A range write of $[b, e)$ at version $v$ performs a point write of $b$ at $v$, and then inserts a node at $e$ with $range$ set to $v$, and $point$ set such that the result of checking a read of $e$ is unaffected.
|
||||
Nodes along the search path to $e$ that are a strict prefix of $e$ get $max$ set to $v$, and all nodes between $b$ and $e$ are removed.
|
||||
|
||||
\begin{figure}
|
||||
\caption{}
|
||||
\label{fig:tree}
|
||||
|
8
privatize_symbols_macos.sh
Executable file
8
privatize_symbols_macos.sh
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This has the effect of making visibility=hidden symbols private in object files
|
||||
for obj in "$@" ; do
|
||||
ld -r "$obj" -o "$obj.tmp"
|
||||
touch -r "$obj" "$obj.tmp"
|
||||
mv "$obj.tmp" "$obj"
|
||||
done
|
9
symbol-imports.txt
Normal file
9
symbol-imports.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
_GLOBAL_OFFSET_TABLE_
|
||||
__stack_chk_fail@GLIBC_2.4
|
||||
__tls_get_addr@GLIBC_2.3
|
||||
abort@GLIBC_2.2.5
|
||||
free@GLIBC_2.2.5
|
||||
malloc@GLIBC_2.2.5
|
||||
memcpy@GLIBC_2.14
|
||||
memmove@GLIBC_2.2.5
|
||||
memset@GLIBC_2.2.5
|
12
test_conflict_set.py
Normal file
12
test_conflict_set.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from conflict_set import *
|
||||
|
||||
|
||||
def test_conflict_set():
|
||||
with ConflictSet() as cs:
|
||||
before = cs.getBytes()
|
||||
key = b"a key"
|
||||
cs.addWrites(1, write(key))
|
||||
assert cs.getBytes() - before > 0
|
||||
assert cs.check(read(0, key)) == [Result.CONFLICT]
|
||||
cs.setOldestVersion(1)
|
||||
assert cs.check(read(0, key)) == [Result.TOO_OLD]
|
@@ -2,5 +2,15 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ./test_symbols.sh <library> <expected exported symbols file> <allowed imported symbols file>
|
||||
|
||||
diff -u <(sort < "$2") <(nm "$1" | grep " T " | cut -f3 -d " " | sort)
|
||||
nm "$1" | grep " U " | (! grep -Pv 'abort|free|malloc|mem[a-z]*|__ashlti3|__stack_chk_[a-z]*|__tls_get_addr|_GLOBAL_OFFSET_TABLE_')
|
||||
ec=0
|
||||
for symbol in $(nm "$1" | grep " U " | sed 's/ U //') ; do
|
||||
if ! grep --fixed-strings "$symbol" "$3" > /dev/null ; then
|
||||
echo "Imported symbol $symbol not present in $3"
|
||||
ec=1
|
||||
fi
|
||||
done
|
||||
|
||||
exit $ec
|
Reference in New Issue
Block a user