From 8bf3aa7f563cfd25fc02e03035f25e0a760f5007 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Sun, 3 Mar 2024 20:32:19 -0800 Subject: [PATCH] Add std::unordered_map implementation As a rough upper bound for point query throughput --- CMakeLists.txt | 13 +++++ HashTable.cpp | 141 ++++++++++++++++++++++++++++++++++++++++++++++ RealDataBench.cpp | 1 - 3 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 HashTable.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b1f360d..6f35416 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,19 @@ if(BUILD_TESTING) set_target_properties(skip_list PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) + # Shared library version of a std::unordered_map-based conflict set (point + # queries only) + add_library(hash_table SHARED HashTable.cpp) + target_compile_options(hash_table PRIVATE -fPIC -fno-exceptions + -fvisibility=hidden) + target_include_directories(hash_table PUBLIC ${CMAKE_SOURCE_DIR}/include) + set_target_properties(hash_table PROPERTIES LIBRARY_OUTPUT_DIRECTORY + "${CMAKE_BINARY_DIR}/hash_table") + set_target_properties(hash_table PROPERTIES OUTPUT_NAME ${PROJECT_NAME}) + set_target_properties( + hash_table PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION + ${PROJECT_VERSION_MAJOR}) + add_executable(conflict_set_main ConflictSet.cpp) target_include_directories(conflict_set_main PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) diff --git a/HashTable.cpp b/HashTable.cpp new file mode 100644 index 0000000..8c42207 --- /dev/null +++ b/HashTable.cpp @@ -0,0 +1,141 @@ +#include "ConflictSet.h" +#include "Internal.h" +#include +#include +#include + +// This implementation isn't correct for range queries :). It's just intended as +// a reference for performance comparison with point queries. + +// struct is from "https://www.cppstories.com/2021/heterogeneous-access-cpp20/" +struct string_hash { + using is_transparent = void; + [[nodiscard]] size_t operator()(std::string_view txt) const { + return std::hash{}(txt); + } + [[nodiscard]] size_t operator()(const std::string &txt) const { + return std::hash{}(txt); + } +}; + +struct __attribute__((visibility("hidden"))) ConflictSet::Impl { + Impl(int64_t oldestVersion) : oldestVersion(oldestVersion) {} + void check(const ConflictSet::ReadRange *reads, ConflictSet::Result *results, + int count) const { + for (int i = 0; i < count; ++i) { + auto key = + std::string_view((const char *)reads[i].begin.p, reads[i].begin.len); + auto version = reads[i].readVersion; + if (version < oldestVersion) { + results[i] = TooOld; + continue; + } + auto iter = map.find(key); + results[i] = + iter == map.end() || iter->second <= version ? Commit : Conflict; + } + } + + void addWrites(const ConflictSet::WriteRange *writes, int count) { + for (int i = 0; i < count; ++i) { + auto &max = map[std::string((const char *)writes[i].begin.p, + writes[i].begin.len)]; + max = std::max(max, writes[i].writeVersion); + keyUpdates += 2; + } + } + + void setOldestVersion(int64_t oldestVersion) { + if (oldestVersion <= this->oldestVersion) { + return; + } + this->oldestVersion = oldestVersion; + if (keyUpdates < 100) { + return; + } + + for (; keyUpdates > 0 && removalIter != map.end(); --keyUpdates) { + if (removalIter->second <= oldestVersion) { + removalIter = map.erase(removalIter); + } else { + ++removalIter; + } + } + + if (removalIter == map.end()) { + removalIter = map.begin(); + } + } + +private: + int64_t keyUpdates = 0; + int64_t oldestVersion; + std::unordered_map> map; + + // This iterator outliving the call to setOldestVersion is only safe because + // we only erase from within setOldestVersion + decltype(map.begin()) removalIter; +}; + +void ConflictSet::check(const ReadRange *reads, Result *results, + int count) const { + return impl->check(reads, results, count); +} + +void ConflictSet::addWrites(const WriteRange *writes, int count) { + return impl->addWrites(writes, count); +} + +void ConflictSet::setOldestVersion(int64_t oldestVersion) { + return impl->setOldestVersion(oldestVersion); +} + +ConflictSet::ConflictSet(int64_t oldestVersion) + : impl(new (safe_malloc(sizeof(Impl))) Impl{oldestVersion}) {} + +ConflictSet::~ConflictSet() { + if (impl) { + impl->~Impl(); + free(impl); + } +} + +ConflictSet::ConflictSet(ConflictSet &&other) noexcept + : impl(std::exchange(other.impl, nullptr)) {} + +ConflictSet &ConflictSet::operator=(ConflictSet &&other) noexcept { + impl = std::exchange(other.impl, nullptr); + return *this; +} + +using ConflictSet_Result = ConflictSet::Result; +using ConflictSet_Key = ConflictSet::Key; +using ConflictSet_ReadRange = ConflictSet::ReadRange; +using ConflictSet_WriteRange = ConflictSet::WriteRange; + +extern "C" { +__attribute__((__visibility__("default"))) void +ConflictSet_check(void *cs, const ConflictSet_ReadRange *reads, + ConflictSet_Result *results, int count) { + ((ConflictSet::Impl *)cs)->check(reads, results, count); +} +__attribute__((__visibility__("default"))) void +ConflictSet_addWrites(void *cs, const ConflictSet_WriteRange *writes, + int count) { + ((ConflictSet::Impl *)cs)->addWrites(writes, count); +} +__attribute__((__visibility__("default"))) void +ConflictSet_setOldestVersion(void *cs, int64_t oldestVersion) { + ((ConflictSet::Impl *)cs)->setOldestVersion(oldestVersion); +} +__attribute__((__visibility__("default"))) void * +ConflictSet_create(int64_t oldestVersion) { + return new (safe_malloc(sizeof(ConflictSet::Impl))) + ConflictSet::Impl{oldestVersion}; +} +__attribute__((__visibility__("default"))) void ConflictSet_destroy(void *cs) { + using Impl = ConflictSet::Impl; + ((Impl *)cs)->~Impl(); + free(cs); +} +} diff --git a/RealDataBench.cpp b/RealDataBench.cpp index a66cbf8..e51c3f4 100644 --- a/RealDataBench.cpp +++ b/RealDataBench.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include