6 Commits

Author SHA1 Message Date
c46f633dbf Skip sorting if already sorted for skip list
All checks were successful
Tests / 64 bit versions total: 7950, passed: 7950
Tests / Debug total: 7948, passed: 7948
Tests / SIMD fallback total: 7950, passed: 7950
Tests / Release [clang] total: 7950, passed: 7950
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc] total: 7950, passed: 7950
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [clang,aarch64] total: 5268, passed: 5268
Tests / Coverage total: 5316, passed: 5316
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 97.67% (3138/3213) * Branch Coverage: 42.05% (18734/44548) * Complexity Density: 0.00 * Lines of Code: 3213 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head This commit looks good
2024-11-12 13:25:29 -08:00
400350946c Point read/write workload for server_bench 2024-11-12 10:50:03 -08:00
607a4ef6e2 Require musttail and preserve_none for interleaved 2024-11-11 21:40:05 -08:00
b0750772ec Don't do hardening check if cross compiling
All checks were successful
Tests / 64 bit versions total: 7950, passed: 7950
Tests / Debug total: 7948, passed: 7948
Tests / SIMD fallback total: 7950, passed: 7950
Tests / Release [clang] total: 7950, passed: 7950
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc] total: 7950, passed: 7950
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [clang,aarch64] total: 5268, passed: 5268
Tests / Coverage total: 5316, passed: 5316
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 97.67% (3139/3214) * Branch Coverage: 42.05% (18734/44548) * Complexity Density: 0.00 * Lines of Code: 3214 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head This commit looks good
2024-11-11 16:56:28 -08:00
86abc02188 Build binaries compatible with cf-protection
Some checks failed
Tests / 64 bit versions total: 7950, passed: 7950
Tests / Debug total: 7948, passed: 7948
Tests / SIMD fallback total: 7950, passed: 7950
Tests / Release [clang] total: 7950, passed: 7950
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc] total: 7950, passed: 7950
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [clang,aarch64] total: 5269, failed: 1, passed: 5268
Tests / Coverage total: 5316, passed: 5316
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-11-11 16:17:57 -08:00
a90e353fcd Use llvm-objcopy if using clang and it's available
This works around a weird error I was getting when trying to link a
translation unit that included Internal.h with libconflict-set-static.a
with clang + gnu objcopy
2024-11-11 12:09:36 -08:00
100 changed files with 187 additions and 322 deletions

View File

@@ -32,12 +32,7 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
endif()
add_compile_options(
-Werror=switch-enum
-Wswitch-enum
-Wunused-variable
-fPIC
-fdata-sections
-ffunction-sections
-Werror=switch-enum -Wswitch-enum -fPIC -fdata-sections -ffunction-sections
-fno-jump-tables # https://github.com/llvm/llvm-project/issues/54247
)
@@ -109,11 +104,19 @@ else()
add_link_options(-Wl,--gc-sections)
endif()
if(USE_SIMD_FALLBACK)
add_compile_definitions(USE_SIMD_FALLBACK)
else()
if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)
if(NOT USE_SIMD_FALLBACK)
cmake_push_check_state()
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
check_include_file_cxx("immintrin.h" HAS_AVX)
if(HAS_AVX)
add_compile_options(-mavx)
add_compile_definitions(HAS_AVX)
endif()
cmake_pop_check_state()
check_include_file_cxx("arm_neon.h" HAS_ARM_NEON)
if(HAS_ARM_NEON)
add_compile_definitions(HAS_ARM_NEON)
endif()
endif()
@@ -343,7 +346,8 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
# c++98
add_executable(conflict_set_cxx_api_test conflict_set_cxx_api_test.cpp)
target_compile_options(conflict_set_cxx_api_test PRIVATE ${TEST_FLAGS})
target_link_libraries(conflict_set_cxx_api_test PRIVATE ${PROJECT_NAME})
target_link_libraries(conflict_set_cxx_api_test
PRIVATE ${PROJECT_NAME}-static)
set_target_properties(conflict_set_cxx_api_test PROPERTIES CXX_STANDARD 98)
set_target_properties(conflict_set_cxx_api_test
PROPERTIES CXX_STANDARD_REQUIRED ON)

View File

@@ -14,16 +14,6 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#if !defined(USE_SIMD_FALLBACK) && defined(__has_include)
#if defined(__x86_64__) && __has_include("immintrin.h")
#define HAS_AVX 1
#include <immintrin.h>
#elif __has_include("arm_neon.h")
#define HAS_ARM_NEON 1
#include <arm_neon.h>
#endif
#endif
#include "ConflictSet.h"
#include "Internal.h"
#include "LongestCommonPrefix.h"
@@ -44,6 +34,12 @@ limitations under the License.
#include <type_traits>
#include <utility>
#ifdef HAS_AVX
#include <immintrin.h>
#elif defined(HAS_ARM_NEON)
#include <arm_neon.h>
#endif
#ifndef __SANITIZE_THREAD__
#if defined(__has_feature)
#if __has_feature(thread_sanitizer)
@@ -345,8 +341,8 @@ struct Node3 : Node {
// Sorted
uint8_t index[kMaxNodes];
InternalVersionT childMaxVersion[kMaxNodes];
TaggedNodePointer children[kMaxNodes];
InternalVersionT childMaxVersion[kMaxNodes];
uint8_t *partialKey() {
assert(!releaseDeferred);
@@ -700,6 +696,8 @@ constexpr int64_t kMaxFreeListBytes = 1 << 20;
// doesn't meet the capacity constraints, it's freed and a new node is allocated
// with the minimum capacity. The hope is that "unfit" nodes don't get stuck in
// the free list.
//
// TODO valgrind annotations
template <class T> struct NodeAllocator {
static_assert(std::derived_from<T, Node>);
@@ -736,7 +734,6 @@ template <class T> struct NodeAllocator {
p->parent = freeList;
freeList = p;
freeListSize += sizeof(T) + p->partialKeyCapacity;
VALGRIND_MAKE_MEM_NOACCESS(p, sizeof(T) + p->partialKeyCapacity);
}
void deferRelease(T *p, Node *forwardTo) {
@@ -758,13 +755,6 @@ template <class T> struct NodeAllocator {
void releaseDeferred() {
if (deferredList != nullptr) {
deferredListFront->parent = freeList;
#ifndef NVALGRIND
for (auto *iter = deferredList; iter != freeList;) {
auto *tmp = iter;
iter = (T *)iter->parent;
VALGRIND_MAKE_MEM_NOACCESS(tmp, sizeof(T) + tmp->partialKeyCapacity);
}
#endif
freeList = std::exchange(deferredList, nullptr);
}
for (T *n = std::exchange(deferredListOverflow, nullptr); n != nullptr;) {
@@ -785,7 +775,6 @@ template <class T> struct NodeAllocator {
assert(deferredList == nullptr);
assert(deferredListOverflow == nullptr);
for (T *iter = freeList; iter != nullptr;) {
VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(T));
auto *tmp = iter;
iter = (T *)iter->parent;
removeNode(tmp);
@@ -803,7 +792,6 @@ private:
T *allocate_helper(int minCapacity, int maxCapacity) {
if (freeList != nullptr) {
VALGRIND_MAKE_MEM_DEFINED(freeList, sizeof(T));
freeListSize -= sizeof(T) + freeList->partialKeyCapacity;
assume(freeList->partialKeyCapacity >= 0);
assume(minCapacity >= 0);
@@ -812,11 +800,6 @@ private:
freeList->partialKeyCapacity <= maxCapacity) {
auto *result = freeList;
freeList = (T *)freeList->parent;
VALGRIND_MAKE_MEM_UNDEFINED(result,
sizeof(T) + result->partialKeyCapacity);
VALGRIND_MAKE_MEM_DEFINED(&result->partialKeyCapacity,
sizeof(result->partialKeyCapacity));
VALGRIND_MAKE_MEM_DEFINED(&result->type, sizeof(result->type));
return result;
} else {
auto *p = freeList;
@@ -960,7 +943,8 @@ private:
NodeAllocator<Node256> node256;
};
int getNodeIndex(Node3 *n, uint8_t index) {
int getNodeIndex(Node3 *self, uint8_t index) {
Node3 *n = (Node3 *)self;
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
@@ -971,7 +955,8 @@ int getNodeIndex(Node3 *n, uint8_t index) {
return -1;
}
int getNodeIndexExists(Node3 *n, uint8_t index) {
int getNodeIndexExists(Node3 *self, uint8_t index) {
Node3 *n = (Node3 *)self;
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
@@ -1272,32 +1257,33 @@ TaggedNodePointer getChild(Node *self, uint8_t index) {
struct ChildAndMaxVersion {
TaggedNodePointer child;
InternalVersionT maxVersion;
static ChildAndMaxVersion empty() {
ChildAndMaxVersion result;
result.child = nullptr;
return result;
}
};
ChildAndMaxVersion getChildAndMaxVersion(Node0 *, uint8_t) { return {}; }
ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
int i = getNodeIndex(self, index);
if (i < 0) {
return ChildAndMaxVersion::empty();
ChildAndMaxVersion result;
result.child = nullptr;
return result;
}
return {self->children[i], self->childMaxVersion[i]};
}
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
int i = getNodeIndex(self, index);
if (i < 0) {
return ChildAndMaxVersion::empty();
ChildAndMaxVersion result;
result.child = nullptr;
return result;
}
return {self->children[i], self->childMaxVersion[i]};
}
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
int i = self->index[index];
if (i < 0) {
return ChildAndMaxVersion::empty();
ChildAndMaxVersion result;
result.child = nullptr;
return result;
}
return {self->children[i], self->childMaxVersion[i]};
}
@@ -1380,11 +1366,17 @@ TaggedNodePointer getChildGeq(Node16 *self, int child) {
TaggedNodePointer getChildGeq(Node48 *self, int child) {
int c = self->bitSet.firstSetGeq(child);
return c < 0 ? nullptr : self->children[self->index[c]];
if (c < 0) {
return nullptr;
}
return self->children[self->index[c]];
}
TaggedNodePointer getChildGeq(Node256 *self, int child) {
int c = self->bitSet.firstSetGeq(child);
return c < 0 ? nullptr : self->children[c];
if (c < 0) {
return nullptr;
}
return self->children[c];
}
TaggedNodePointer getChildGeq(Node *self, int child) {
@@ -1404,25 +1396,22 @@ TaggedNodePointer getChildGeq(Node *self, int child) {
}
}
TaggedNodePointer getFirstChild(Node0 *) { return nullptr; }
TaggedNodePointer getFirstChild(Node3 *self) {
// Improves scan performance
__builtin_prefetch(self->children[1]);
return self->children[0];
Node *getFirstChild(Node0 *) { return nullptr; }
Node *getFirstChild(Node3 *self) {
return self->numChildren == 0 ? nullptr : self->children[0];
}
TaggedNodePointer getFirstChild(Node16 *self) {
// Improves scan performance
__builtin_prefetch(self->children[1]);
return self->children[0];
Node *getFirstChild(Node16 *self) {
return self->numChildren == 0 ? nullptr : self->children[0];
}
TaggedNodePointer getFirstChild(Node48 *self) {
return self->children[self->index[self->bitSet.firstSetGeq(0)]];
Node *getFirstChild(Node48 *self) {
int index = self->index[self->bitSet.firstSetGeq(0)];
return index < 0 ? nullptr : self->children[index];
}
TaggedNodePointer getFirstChild(Node256 *self) {
Node *getFirstChild(Node256 *self) {
return self->children[self->bitSet.firstSetGeq(0)];
}
TaggedNodePointer getFirstChild(Node *self) {
Node *getFirstChild(Node *self) {
// Only require that the node-specific overloads are covered
// GCOVR_EXCL_START
switch (self->getType()) {
@@ -1442,6 +1431,46 @@ TaggedNodePointer getFirstChild(Node *self) {
// GCOVR_EXCL_STOP
}
// Precondition: self has a child
TaggedNodePointer getFirstChildExists(Node3 *self) {
assert(self->numChildren > 0);
return self->children[0];
}
// Precondition: self has a child
TaggedNodePointer getFirstChildExists(Node16 *self) {
assert(self->numChildren > 0);
return self->children[0];
}
// Precondition: self has a child
TaggedNodePointer getFirstChildExists(Node48 *self) {
return self->children[self->index[self->bitSet.firstSetGeq(0)]];
}
// Precondition: self has a child
TaggedNodePointer getFirstChildExists(Node256 *self) {
return self->children[self->bitSet.firstSetGeq(0)];
}
// Precondition: self has a child
TaggedNodePointer getFirstChildExists(Node *self) {
// Only require that the node-specific overloads are covered
// GCOVR_EXCL_START
switch (self->getType()) {
case Type_Node0:
__builtin_unreachable();
case Type_Node3:
return getFirstChildExists(static_cast<Node3 *>(self));
case Type_Node16:
return getFirstChildExists(static_cast<Node16 *>(self));
case Type_Node48:
return getFirstChildExists(static_cast<Node48 *>(self));
case Type_Node256:
return getFirstChildExists(static_cast<Node256 *>(self));
default:
__builtin_unreachable();
}
// GCOVR_EXCL_STOP
}
// self must not be the root
void maybeDecreaseCapacity(Node *&self, WriteContext *writeContext,
ConflictSet::Impl *impl);
@@ -1695,7 +1724,7 @@ TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self, TrivialSpan &key,
}
Node *nextPhysical(Node *node) {
Node *nextChild = getFirstChild(node);
auto nextChild = getFirstChild(node);
if (nextChild != nullptr) {
return nextChild;
}
@@ -1713,7 +1742,7 @@ Node *nextPhysical(Node *node) {
}
Node *nextLogical(Node *node) {
Node *nextChild = getFirstChild(node);
auto nextChild = getFirstChild(node);
if (nextChild != nullptr) {
node = nextChild;
goto downLeftSpine;
@@ -1731,7 +1760,7 @@ Node *nextLogical(Node *node) {
}
}
downLeftSpine:
for (; !node->entryPresent; node = getFirstChild(node)) {
for (; !node->entryPresent; node = getFirstChildExists(node)) {
}
return node;
}
@@ -1901,7 +1930,7 @@ void mergeWithChild(TaggedNodePointer &self, WriteContext *writeContext,
child->parentsIndex = self->parentsIndex;
// Max versions are stored in the parent, so we need to update it now
// that we have a new parent. Safe to call since the root never has a partial
// that we have a new parent. Safe we call since the root never has a partial
// key.
setMaxVersion(child, std::max(childMaxVersion, writeContext->zero));
@@ -2779,7 +2808,7 @@ bool checkRangeStartsWith(NodeT *nTyped, TrivialSpan key, int begin, int end,
__builtin_unreachable(); // GCOVR_EXCL_LINE
downLeftSpine:
for (; !n->entryPresent; n = getFirstChild(n)) {
for (; !n->entryPresent; n = getFirstChildExists(n)) {
}
return n->entry.rangeVersion <= readVersion;
}
@@ -3236,7 +3265,7 @@ PRESERVE_NONE void down_left_spine(Job *job, Context *context) {
job->setResult(n->entry.rangeVersion <= job->readVersion);
MUSTTAIL return complete(job, context);
}
auto child = getFirstChild(n);
auto child = getFirstChildExists(n);
job->n = child;
__builtin_prefetch(job->n);
job->continuation = downLeftSpineTable[child.getType()];
@@ -3331,7 +3360,7 @@ template <class NodeT> void iter(Job *job, Context *context) {
job->setResult(n->entry.pointVersion <= job->readVersion);
MUSTTAIL return complete(job, context);
}
auto c = getFirstChild(n);
auto c = getFirstChildExists(n);
job->n = c;
job->continuation = downLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
@@ -3855,7 +3884,7 @@ void left_side_down_left_spine(Job *job, Context *context) {
}
MUSTTAIL return done_left_side_iter(job, context);
}
auto c = getFirstChild(n);
auto c = getFirstChildExists(n);
job->n = c;
job->continuation = leftSideDownLeftSpineTable[c.getType()];
__builtin_prefetch(job->n);
@@ -4518,7 +4547,7 @@ bool checkPointRead(Node *n, const TrivialSpan key,
if (n->entryPresent) {
return n->entry.pointVersion <= readVersion;
}
n = getFirstChild(n);
n = getFirstChildExists(n);
goto downLeftSpine;
}
@@ -4572,7 +4601,7 @@ bool checkPointRead(Node *n, const TrivialSpan key,
}
}
downLeftSpine:
for (; !n->entryPresent; n = getFirstChild(n)) {
for (; !n->entryPresent; n = getFirstChildExists(n)) {
}
return n->entry.rangeVersion <= readVersion;
}
@@ -4647,7 +4676,7 @@ bool checkPrefixRead(Node *n, const TrivialSpan key,
}
}
downLeftSpine:
for (; !n->entryPresent; n = getFirstChild(n)) {
for (; !n->entryPresent; n = getFirstChildExists(n)) {
}
return n->entry.rangeVersion <= readVersion;
}
@@ -4734,7 +4763,7 @@ bool checkRangeLeftSide(Node *n, TrivialSpan key, int prefixLen,
}
}
downLeftSpine:
for (; !n->entryPresent; n = getFirstChild(n)) {
for (; !n->entryPresent; n = getFirstChildExists(n)) {
}
return n->entry.rangeVersion <= readVersion;
}
@@ -4823,12 +4852,14 @@ backtrack:
searchPathLen -= 1 + n->partialKeyLen;
n = n->parent;
} else {
searchPathLen -= n->partialKeyLen;
n = next;
searchPathLen += n->partialKeyLen;
goto downLeftSpine;
}
}
downLeftSpine:
for (; !n->entryPresent; n = getFirstChild(n)) {
for (; !n->entryPresent; n = getFirstChildExists(n)) {
}
return n->entry.rangeVersion <= readVersion;
}
@@ -5299,11 +5330,6 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
gc_iterations_total.add(set_oldest_iterations_accum);
if (n == nullptr) {
removalKey = {};
if (removalBufferSize > kMaxRemovalBufferSize) {
safe_free(removalBuffer, removalBufferSize);
removalBufferSize = kMinRemovalBufferSize;
removalBuffer = (uint8_t *)safe_malloc(removalBufferSize);
}
oldestExtantVersion = oldestVersionAtGcBegin;
oldest_extant_version.set(oldestExtantVersion);
oldestVersionAtGcBegin = oldestVersionFullPrecision;
@@ -5314,47 +5340,12 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
oldestExtantVersion, oldestVersionAtGcBegin);
#endif
} else {
// Store the current search path to resume the scan later
saveRemovalKey(n);
removalKeyArena = Arena();
removalKey = getSearchPath(removalKeyArena, n);
}
return fuel;
}
void saveRemovalKey(Node *n) {
uint8_t *cursor = removalBuffer + removalBufferSize;
int size = 0;
auto reserve = [&](int delta) {
if (size + delta > removalBufferSize) [[unlikely]] {
int newBufSize = std::max(removalBufferSize * 2, size + delta);
uint8_t *newBuf = (uint8_t *)safe_malloc(newBufSize);
memcpy(newBuf + newBufSize - size, cursor, size);
safe_free(removalBuffer, removalBufferSize);
removalBuffer = newBuf;
removalBufferSize = newBufSize;
cursor = newBuf + newBufSize - size;
}
};
for (;;) {
auto partialKey = TrivialSpan{n->partialKey(), n->partialKeyLen};
reserve(partialKey.size());
size += partialKey.size();
cursor -= partialKey.size();
memcpy(cursor, partialKey.data(), partialKey.size());
if (n->parent == nullptr) {
break;
}
reserve(1);
++size;
--cursor;
*cursor = n->parentsIndex;
n = n->parent;
}
removalKey = {cursor, size};
}
void setOldestVersion(int64_t newOldestVersion) {
assert(newOldestVersion >= 0);
assert(newOldestVersion <= newestVersionFullPrecision);
@@ -5405,7 +5396,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
writeContext.~WriteContext();
new (&writeContext) WriteContext();
// Leave removalBuffer as is
removalKeyArena = Arena{};
removalKey = {};
keyUpdates = 10;
@@ -5437,16 +5428,11 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
~Impl() {
eraseTree(root, &writeContext);
safe_free(metrics, metricsCount * sizeof(metrics[0]));
safe_free(removalBuffer, removalBufferSize);
}
WriteContext writeContext;
static constexpr int kMinRemovalBufferSize = 1 << 10;
// Eventually downsize if larger than this value
static constexpr int kMaxRemovalBufferSize = 1 << 16;
uint8_t *removalBuffer = (uint8_t *)safe_malloc(kMinRemovalBufferSize);
int removalBufferSize = kMinRemovalBufferSize;
Arena removalKeyArena;
TrivialSpan removalKey;
int64_t keyUpdates;
@@ -5601,7 +5587,7 @@ Node *firstGeqLogical(Node *n, const TrivialSpan key) {
if (n->entryPresent) {
return n;
}
n = getFirstChild(n);
n = getFirstChildExists(n);
goto downLeftSpine;
}
@@ -5646,7 +5632,7 @@ Node *firstGeqLogical(Node *n, const TrivialSpan key) {
}
}
downLeftSpine:
for (; !n->entryPresent; n = getFirstChild(n)) {
for (; !n->entryPresent; n = getFirstChildExists(n)) {
}
return n;
}
@@ -5872,13 +5858,13 @@ void checkVersionsGeqOldestExtant(Node *n,
case Type_Node0: {
} break;
case Type_Node3: {
[[maybe_unused]] auto *self = static_cast<Node3 *>(n);
auto *self = static_cast<Node3 *>(n);
for (int i = 0; i < 3; ++i) {
assert(self->childMaxVersion[i] >= oldestExtantVersion);
}
} break;
case Type_Node16: {
[[maybe_unused]] auto *self = static_cast<Node16 *>(n);
auto *self = static_cast<Node16 *>(n);
for (int i = 0; i < 16; ++i) {
assert(self->childMaxVersion[i] >= oldestExtantVersion);
}
@@ -5888,7 +5874,7 @@ void checkVersionsGeqOldestExtant(Node *n,
for (int i = 0; i < 48; ++i) {
assert(self->childMaxVersion[i] >= oldestExtantVersion);
}
for ([[maybe_unused]] auto m : self->maxOfMax) {
for (auto m : self->maxOfMax) {
assert(m >= oldestExtantVersion);
}
} break;
@@ -5897,7 +5883,7 @@ void checkVersionsGeqOldestExtant(Node *n,
for (int i = 0; i < 256; ++i) {
assert(self->childMaxVersion[i] >= oldestExtantVersion);
}
for ([[maybe_unused]] auto m : self->maxOfMax) {
for (auto m : self->maxOfMax) {
assert(m >= oldestExtantVersion);
}
} break;

View File

@@ -18,6 +18,7 @@ using namespace weaselab;
#include <span>
#include <string>
#include <thread>
#include <unordered_set>
#include <utility>
#include <callgrind.h>
@@ -367,6 +368,23 @@ template <class T, class C = std::less<T>> auto set(Arena &arena) {
return Set<T, C>(ArenaAlloc<T>(&arena));
}
template <class T> struct MyHash;
template <class T> struct MyHash<T *> {
size_t operator()(const T *t) const noexcept {
size_t result;
memcpy(&result, &t, sizeof(result));
return result;
}
};
template <class T>
using HashSet =
std::unordered_set<T, MyHash<T>, std::equal_to<T>, ArenaAlloc<T>>;
template <class T> auto hashSet(Arena &arena) {
return HashSet<T>(ArenaAlloc<T>(&arena));
}
template <class T, class U>
bool operator==(const ArenaAlloc<T> &lhs, const ArenaAlloc<U> &rhs) {
return lhs.arena == rhs.arena;

4
Jenkinsfile vendored
View File

@@ -91,7 +91,7 @@ pipeline {
minio bucket: 'jenkins', credentialsId: 'jenkins-minio', excludes: '', host: 'minio.weaselab.dev', includes: 'build/*.deb,build/*.rpm,paper/*.pdf', targetFolder: '${JOB_NAME}/${BUILD_NUMBER}/${STAGE_NAME}/'
}
}
stage('gcc') {
stage('Release [gcc]') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
@@ -99,7 +99,7 @@ pipeline {
}
}
steps {
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++")
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_FLAGS=-DNVALGRIND")
recordIssues(tools: [gcc()])
}
}

View File

@@ -26,15 +26,15 @@ Compiler is `Ubuntu clang version 20.0.0 (++20241029082144+7544d3af0e28-1~exp1~2
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
| 12.83 | 77,947,334.88 | 0.8% | 247.13 | 63.80 | 3.873 | 32.64 | 0.6% | 0.01 | `point reads`
| 14.73 | 67,908,470.74 | 0.1% | 299.99 | 73.66 | 4.073 | 42.50 | 0.5% | 0.01 | `prefix reads`
| 35.63 | 28,066,165.78 | 0.1% | 782.70 | 178.49 | 4.385 | 106.65 | 0.2% | 0.01 | `range reads`
| 20.00 | 49,993,123.62 | 0.1% | 376.83 | 100.50 | 3.749 | 50.05 | 0.5% | 0.01 | `point writes`
| 38.04 | 26,287,266.49 | 0.1% | 665.86 | 191.21 | 3.482 | 100.41 | 0.4% | 0.01 | `prefix writes`
| 40.48 | 24,703,557.31 | 1.3% | 732.80 | 204.36 | 3.586 | 111.26 | 0.2% | 0.01 | `range writes`
| 81.01 | 12,343,591.64 | 1.4% | 1,551.57 | 409.23 | 3.791 | 292.66 | 0.1% | 0.01 | `monotonic increasing point writes`
| 315,672.00 | 3,167.85 | 1.7% | 4,043,066.00 | 1,590,315.00 | 2.542 | 714,828.00 | 0.1% | 0.01 | `worst case for radix tree`
| 114.81 | 8,710,164.86 | 0.7% | 2,178.00 | 578.69 | 3.764 | 345.00 | 0.0% | 0.01 | `create and destroy`
| 12.63 | 79,186,868.18 | 1.4% | 241.61 | 64.76 | 3.731 | 31.64 | 0.8% | 0.01 | `point reads`
| 14.48 | 69,078,073.40 | 0.3% | 292.42 | 74.69 | 3.915 | 41.49 | 0.5% | 0.01 | `prefix reads`
| 34.37 | 29,094,694.11 | 0.2% | 759.53 | 179.77 | 4.225 | 100.38 | 0.2% | 0.01 | `range reads`
| 19.34 | 51,713,896.36 | 0.7% | 369.70 | 101.81 | 3.631 | 47.88 | 0.6% | 0.01 | `point writes`
| 39.16 | 25,538,968.61 | 0.2% | 653.16 | 206.77 | 3.159 | 89.62 | 0.8% | 0.01 | `prefix writes`
| 40.58 | 24,642,681.12 | 4.7% | 718.44 | 216.44 | 3.319 | 99.28 | 0.6% | 0.01 | `range writes`
| 78.77 | 12,694,520.69 | 3.8% | 1,395.55 | 421.73 | 3.309 | 249.81 | 0.1% | 0.01 | `monotonic increasing point writes`
| 287,760.50 | 3,475.11 | 0.5% | 3,929,266.50 | 1,550,225.50 | 2.535 | 639,064.00 | 0.0% | 0.01 | `worst case for radix tree`
| 104.76 | 9,545,250.65 | 3.1% | 2,000.00 | 552.82 | 3.618 | 342.00 | 0.0% | 0.01 | `create and destroy`
# "Real data" test
@@ -49,7 +49,7 @@ Check: 4.39702 seconds, 370.83 MB/s, Add: 4.50025 seconds, 124.583 MB/s, Gc rati
## radix tree
```
Check: 0.975666 seconds, 1728.24 MB/s, Add: 1.19751 seconds, 480.444 MB/s, Gc ratio: 36.8478%, Peak idle memory: 2.39447e+06
Check: 0.987757 seconds, 1650.76 MB/s, Add: 1.24815 seconds, 449.186 MB/s, Gc ratio: 41.4675%, Peak idle memory: 2.02872e+06
```
## hash table

View File

@@ -5,7 +5,7 @@
#include <cstdio>
#include <cstring>
#include <fcntl.h>
#include <span>
#include <string_view>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -64,7 +64,7 @@ int main(int argc, const char **argv) {
auto *const mapOriginal = begin;
const auto sizeOriginal = size;
using StringView = std::span<const uint8_t>;
using StringView = std::basic_string_view<uint8_t>;
StringView write;
std::vector<StringView> reads;
@@ -78,9 +78,9 @@ int main(int argc, const char **argv) {
end = (uint8_t *)memchr(begin, '\n', size);
if (line.size() > 0 && line[0] == 'P') {
write = line.subspan(2, line.size());
write = line.substr(2, line.size());
} else if (line.size() > 0 && line[0] == 'L') {
reads.push_back(line.subspan(2, line.size()));
reads.push_back(line.substr(2, line.size()));
} else if (line.empty()) {
{
readRanges.resize(reads.size());

View File

@@ -23,97 +23,6 @@
#include "Internal.h"
#include "third_party/nadeau.h"
constexpr int kCacheLine = 64; // TODO mac m1 is 128
template <class T> struct TxQueue {
explicit TxQueue(int lgSlotCount)
: slotCount(1 << lgSlotCount), slotCountMask(slotCount - 1),
slots(new T[slotCount]) {
// Otherwise we can't tell the difference between full and empty.
assert(!(slotCountMask & 0x80000000));
}
/// Call from producer thread, after ensuring consumer is no longer accessing
/// it somehow
~TxQueue() { delete[] slots; }
/// Must be called from the producer thread
void push(T t) {
if (wouldBlock()) {
// Wait for pops to change and try again
consumer.pops.wait(producer.lastPopRead, std::memory_order_relaxed);
producer.lastPopRead = consumer.pops.load(std::memory_order_acquire);
}
slots[producer.pushesNonAtomic++ & slotCountMask] = std::move(t);
// seq_cst so that the notify can't be ordered before the store
producer.pushes.store(producer.pushesNonAtomic, std::memory_order_seq_cst);
// We have to notify every time, since we don't know if this is the last
// push ever
producer.pushes.notify_one();
}
/// Must be called from the producer thread
uint32_t outstanding() {
return producer.pushesNonAtomic -
consumer.pops.load(std::memory_order_relaxed);
}
/// Returns true if a call to push might block. Must be called from the
/// producer thread.
bool wouldBlock() {
// See if we can determine that overflow won't happen entirely from state
// local to the producer
if (producer.pushesNonAtomic - producer.lastPopRead == slotCount - 1) {
// Re-read pops with memory order
producer.lastPopRead = consumer.pops.load(std::memory_order_acquire);
return producer.pushesNonAtomic - producer.lastPopRead == slotCount - 1;
}
return false;
}
/// Valid until the next pop, or until this queue is destroyed.
T *pop() {
// See if we can determine that there's an entry we can pop entirely from
// state local to the consumer
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
// Re-read pushes with memory order and try again
consumer.lastPushRead = producer.pushes.load(std::memory_order_acquire);
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
// Wait for pushes to change and try again
producer.pushes.wait(consumer.lastPushRead, std::memory_order_relaxed);
consumer.lastPushRead = producer.pushes.load(std::memory_order_acquire);
}
}
auto result = &slots[consumer.popsNonAtomic++ & slotCountMask];
// We only have to write pops with memory order if we've run out of items.
// We know that we'll eventually run out.
if (consumer.lastPushRead - consumer.popsNonAtomic == 0) {
// seq_cst so that the notify can't be ordered before the store
consumer.pops.store(consumer.popsNonAtomic, std::memory_order_seq_cst);
consumer.pops.notify_one();
}
return result;
}
private:
const uint32_t slotCount;
const uint32_t slotCountMask;
T *slots;
struct alignas(kCacheLine) ProducerState {
std::atomic<uint32_t> pushes{0};
uint32_t pushesNonAtomic{0};
uint32_t lastPopRead{0};
};
struct alignas(kCacheLine) ConsumerState {
std::atomic<uint32_t> pops{0};
uint32_t popsNonAtomic{0};
uint32_t lastPushRead{0};
};
ProducerState producer;
ConsumerState consumer;
};
std::atomic<int64_t> transactions;
int64_t safeUnaryMinus(int64_t x) {
@@ -138,7 +47,6 @@ void tupleAppend(std::string &output, int64_t value) {
void tupleAppend(std::string &output, std::string_view value) {
output.push_back('\x02');
if (memchr(value.data(), '\x00', value.size()) != nullptr) {
for (auto c : value) {
if (c == '\x00') {
output.push_back('\x00');
@@ -147,9 +55,6 @@ void tupleAppend(std::string &output, std::string_view value) {
output.push_back(c);
}
}
} else {
output.insert(output.end(), value.begin(), value.end());
}
output.push_back('\x00');
}
@@ -161,69 +66,49 @@ template <class... Ts> std::string tupleKey(const Ts &...ts) {
constexpr int kTotalKeyRange = 1'000'000'000;
constexpr int kWindowSize = 1'000'000;
constexpr int kNumReadKeysPerTx = 10;
constexpr int kNumWriteKeysPerTx = 5;
constexpr int kNumKeys = 10;
struct Transaction {
std::vector<std::string> keys;
std::vector<weaselab::ConflictSet::ReadRange> reads;
std::vector<weaselab::ConflictSet::WriteRange> writes;
int64_t version;
int64_t oldestVersion;
Transaction() = default;
explicit Transaction(int64_t version)
: version(version), oldestVersion(version - kWindowSize) {
void workload(weaselab::ConflictSet *cs) {
int64_t version = kWindowSize;
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
std::vector<int64_t> keyIndices;
for (int i = 0; i < std::max(kNumReadKeysPerTx, kNumWriteKeysPerTx); ++i) {
for (int i = 0; i < kNumKeys; ++i) {
keyIndices.push_back(rand() % kTotalKeyRange);
}
std::sort(keyIndices.begin(), keyIndices.end());
std::vector<std::string> keys;
constexpr std::string_view fullString =
"this is a string, where a prefix of it is used as an element of the "
"tuple forming the key";
for (int i = 0; i < int(keyIndices.size()); ++i) {
for (int i = 0; i < kNumKeys; ++i) {
keys.push_back(
tupleKey(0x100, keyIndices[i] / fullString.size(),
fullString.substr(0, keyIndices[i] % fullString.size())));
// printf("%s\n", printable(keys.back()).c_str());
}
for (int i = 0; i < kNumWriteKeysPerTx; ++i) {
std::vector<weaselab::ConflictSet::ReadRange> reads;
std::vector<weaselab::ConflictSet::WriteRange> writes;
std::vector<weaselab::ConflictSet::Result> results;
for (int i = 0; i < kNumKeys; ++i) {
writes.push_back({{(const uint8_t *)keys[i].data(), int(keys[i].size())},
{nullptr, 0}});
}
reads.push_back({{(const uint8_t *)keys[0].data(), int(keys[0].size())},
{(const uint8_t *)keys[1].data(), int(keys[1].size())},
version - std::min(10, kWindowSize)});
static_assert(kNumReadKeysPerTx >= 3);
for (int i = 2; i < kNumReadKeysPerTx; ++i) {
reads.push_back({{(const uint8_t *)keys[i].data(), int(keys[i].size())},
{nullptr, 0},
version - kWindowSize});
}
results.resize(reads.size());
cs->check(reads.data(), results.data(), reads.size());
bool ok = true;
for (auto result : results) {
ok &= result == weaselab::ConflictSet::Commit;
}
Transaction(Transaction &&) = default;
Transaction &operator=(Transaction &&) = default;
Transaction(Transaction const &) = delete;
Transaction const &operator=(Transaction const &) = delete;
};
struct Resolver {
void resolve(const weaselab::ConflictSet::ReadRange *reads, int readCount,
const weaselab::ConflictSet::WriteRange *writes, int writeCount,
int64_t newVersion, int64_t newOldestVersion) {
results.resize(readCount);
cs.check(reads, results.data(), readCount);
cs.addWrites(writes, writeCount, newVersion);
cs.setOldestVersion(newOldestVersion);
cs->addWrites(writes.data(), ok ? writes.size() : 0, version);
cs->setOldestVersion(version - kWindowSize);
++version;
}
ConflictSet cs{0};
private:
std::vector<weaselab::ConflictSet::Result> results;
};
}
// Adapted from getaddrinfo man page
int getListenFd(const char *node, const char *service) {
@@ -366,8 +251,7 @@ int main(int argc, char **argv) {
{
int listenFd = getListenFd(argv[1], argv[2]);
Resolver resolver;
auto &cs = resolver.cs;
weaselab::ConflictSet cs{0};
weaselab::ConflictSet::MetricsV1 *metrics;
int metricsCount;
cs.getMetricsV1(&metrics, &metricsCount);
@@ -416,22 +300,7 @@ int main(int argc, char **argv) {
}
#endif
TxQueue<Transaction> queue{10};
auto workloadThread = std::thread{[&]() {
for (int64_t version = kWindowSize;;
++version, transactions.fetch_add(1, std::memory_order_relaxed)) {
queue.push(Transaction(version));
}
}};
auto resolverThread = std::thread{[&]() {
for (;;) {
auto tx = queue.pop();
resolver.resolve(tx->reads.data(), tx->reads.size(), tx->writes.data(),
tx->writes.size(), tx->version, tx->oldestVersion);
}
}};
auto w = std::thread{workload, &cs};
for (;;) {
struct sockaddr_storage peer_addr = {};

View File

@@ -796,6 +796,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
int temp[stripeSize];
int stripes = (stringCount + stripeSize - 1) / stripeSize;
StringRef values[stripeSize];
int64_t writeVersions[stripeSize / 2];
int ss = stringCount - (stripes - 1) * stripeSize;
int64_t entryDelta = 0;
for (int s = stripes - 1; s >= 0; s--) {

View File

@@ -1,4 +1,3 @@
___chkstk_darwin
___stack_chk_fail
___stack_chk_guard
__tlv_bootstrap
@@ -6,7 +5,6 @@ _abort
_bzero
_free
_malloc
_memcmp
_memcpy
_memmove
dyld_stub_binder

View File

@@ -8,7 +8,7 @@ SRC_DIR="${0%/*}"
BUILD_ARM="$(mktemp -d -t conflict-set-arm)"
BUILD_X86="$(mktemp -d -t conflict-set-x86)"
cmake_args=(-DCMAKE_CXX_FLAGS=-DNVALGRIND -DCPACK_PACKAGING_INSTALL_PREFIX=/usr/local -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++)
cmake_args=(-DCMAKE_CXX_FLAGS=-DNVALGRIND -DCPACK_PACKAGING_INSTALL_PREFIX=/usr/local)
cmake -S"$SRC_DIR" -B"$BUILD_ARM" -DCMAKE_OSX_ARCHITECTURES=arm64 "${cmake_args[@]}"
cmake --build "$BUILD_ARM" --target conflict-set --target conflict-set-static

View File

@@ -1,4 +1,3 @@
import struct
from conflict_set import *
@@ -165,16 +164,6 @@ def test_fixup_256():
cs.check(read(0, bytes([1]), bytes([2])))
def test_large_removal_buffer():
with DebugConflictSet() as cs:
for i in range(1000):
# create extra gc work
for j in range(100):
cs.addWrites(1000 + i)
cs.addWrites(1000 + i, write(struct.pack(">l", i) + bytes([0] * 100000)))
cs.setOldestVersion(i)
if __name__ == "__main__":
# budget "pytest" for ctest integration without pulling in a dependency. You can of course still use pytest in local development.
import argparse