Compare commits
13 Commits
v0.0.10
...
62516825d1
| Author | SHA1 | Date | |
|---|---|---|---|
| 62516825d1 | |||
| 3d592bd6a9 | |||
| f5f5fb620b | |||
| e3d1b2e842 | |||
| 9f8800af16 | |||
| 182c065c8e | |||
| 2dba0d5be3 | |||
| a1dfdf355c | |||
| 15919cb1c4 | |||
| 5ed9003a83 | |||
| 84c6a2bfc2 | |||
| b5772a6aa0 | |||
| e6c39981b9 |
+7
-9
@@ -1,7 +1,7 @@
|
|||||||
cmake_minimum_required(VERSION 3.18)
|
cmake_minimum_required(VERSION 3.18)
|
||||||
project(
|
project(
|
||||||
conflict-set
|
conflict-set
|
||||||
VERSION 0.0.10
|
VERSION 0.0.12
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
"A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys."
|
"A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys."
|
||||||
HOMEPAGE_URL "https://git.weaselab.dev/weaselab/conflict-set"
|
HOMEPAGE_URL "https://git.weaselab.dev/weaselab/conflict-set"
|
||||||
@@ -31,14 +31,12 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
|||||||
"MinSizeRel" "RelWithDebInfo")
|
"MinSizeRel" "RelWithDebInfo")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_compile_options(
|
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
|
||||||
-fdata-sections
|
-Werror=switch-enum -fPIC)
|
||||||
-ffunction-sections
|
if(NOT APPLE)
|
||||||
-Wswitch-enum
|
# This causes some versions of clang to crash on macos
|
||||||
-Werror=switch-enum
|
add_compile_options(-g -fno-omit-frame-pointer)
|
||||||
-fPIC
|
endif()
|
||||||
-g
|
|
||||||
-fno-omit-frame-pointer)
|
|
||||||
|
|
||||||
set(full_relro_flags "-pie;LINKER:-z,relro,-z,now,-z,noexecstack")
|
set(full_relro_flags "-pie;LINKER:-z,relro,-z,now,-z,noexecstack")
|
||||||
cmake_push_check_state()
|
cmake_push_check_state()
|
||||||
|
|||||||
+25
-208
@@ -16,12 +16,12 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "ConflictSet.h"
|
#include "ConflictSet.h"
|
||||||
#include "Internal.h"
|
#include "Internal.h"
|
||||||
|
#include "LongestCommonPrefix.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <bit>
|
#include <bit>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <compare>
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
@@ -1687,167 +1687,6 @@ Node *nextSibling(Node *node) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_AVX) || defined(HAS_ARM_NEON)
|
|
||||||
constexpr int kStride = 64;
|
|
||||||
#else
|
|
||||||
constexpr int kStride = 16;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
constexpr int kUnrollFactor = 4;
|
|
||||||
|
|
||||||
bool compareStride(const uint8_t *ap, const uint8_t *bp) {
|
|
||||||
#if defined(HAS_ARM_NEON)
|
|
||||||
static_assert(kStride == 64);
|
|
||||||
uint8x16_t x[4]; // GCOVR_EXCL_LINE
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
|
||||||
x[i] = vceqq_u8(vld1q_u8(ap + i * 16), vld1q_u8(bp + i * 16));
|
|
||||||
}
|
|
||||||
auto results = vreinterpretq_u16_u8(
|
|
||||||
vandq_u8(vandq_u8(x[0], x[1]), vandq_u8(x[2], x[3])));
|
|
||||||
bool eq = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) ==
|
|
||||||
uint64_t(-1);
|
|
||||||
#elif defined(HAS_AVX)
|
|
||||||
static_assert(kStride == 64);
|
|
||||||
__m128i x[4]; // GCOVR_EXCL_LINE
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
|
||||||
x[i] = _mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)(ap + i * 16)),
|
|
||||||
_mm_loadu_si128((__m128i *)(bp + i * 16)));
|
|
||||||
}
|
|
||||||
auto eq =
|
|
||||||
_mm_movemask_epi8(_mm_and_si128(_mm_and_si128(x[0], x[1]),
|
|
||||||
_mm_and_si128(x[2], x[3]))) == 0xffff;
|
|
||||||
#else
|
|
||||||
// Hope it gets vectorized
|
|
||||||
auto eq = memcmp(ap, bp, kStride) == 0;
|
|
||||||
#endif
|
|
||||||
return eq;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Precondition: ap[:kStride] != bp[:kStride]
|
|
||||||
int firstNeqStride(const uint8_t *ap, const uint8_t *bp) {
|
|
||||||
#if defined(HAS_AVX)
|
|
||||||
static_assert(kStride == 64);
|
|
||||||
uint64_t c[kStride / 16]; // GCOVR_EXCL_LINE
|
|
||||||
for (int i = 0; i < kStride; i += 16) {
|
|
||||||
const auto a = _mm_loadu_si128((__m128i *)(ap + i));
|
|
||||||
const auto b = _mm_loadu_si128((__m128i *)(bp + i));
|
|
||||||
const auto compared = _mm_cmpeq_epi8(a, b);
|
|
||||||
c[i / 16] = _mm_movemask_epi8(compared) & 0xffff;
|
|
||||||
}
|
|
||||||
return std::countr_zero(~(c[0] | c[1] << 16 | c[2] << 32 | c[3] << 48));
|
|
||||||
#elif defined(HAS_ARM_NEON)
|
|
||||||
static_assert(kStride == 64);
|
|
||||||
for (int i = 0; i < kStride; i += 16) {
|
|
||||||
// 0xff for each match
|
|
||||||
uint16x8_t results =
|
|
||||||
vreinterpretq_u16_u8(vceqq_u8(vld1q_u8(ap + i), vld1q_u8(bp + i)));
|
|
||||||
// 0xf for each mismatch
|
|
||||||
uint64_t bitfield =
|
|
||||||
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
|
||||||
if (bitfield) {
|
|
||||||
return i + (std::countr_zero(bitfield) >> 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
||||||
#else
|
|
||||||
int i = 0;
|
|
||||||
for (; i < kStride - 1; ++i) {
|
|
||||||
if (*ap++ != *bp++) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return i;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// This gets covered in local development
|
|
||||||
// GCOVR_EXCL_START
|
|
||||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
|
||||||
__attribute__((target("avx512f,avx512bw"))) int
|
|
||||||
longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
|
||||||
int i = 0;
|
|
||||||
int end = cl & ~63;
|
|
||||||
while (i < end) {
|
|
||||||
const uint64_t eq =
|
|
||||||
_mm512_cmpeq_epi8_mask(_mm512_loadu_epi8(ap), _mm512_loadu_epi8(bp));
|
|
||||||
if (eq != uint64_t(-1)) {
|
|
||||||
return i + std::countr_one(eq);
|
|
||||||
}
|
|
||||||
i += 64;
|
|
||||||
ap += 64;
|
|
||||||
bp += 64;
|
|
||||||
}
|
|
||||||
if (i < cl) {
|
|
||||||
const uint64_t mask = (uint64_t(1) << (cl - i)) - 1;
|
|
||||||
const uint64_t eq = _mm512_cmpeq_epi8_mask(
|
|
||||||
_mm512_maskz_loadu_epi8(mask, ap), _mm512_maskz_loadu_epi8(mask, bp));
|
|
||||||
return i + std::countr_one(eq & mask);
|
|
||||||
}
|
|
||||||
assert(i == cl);
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
__attribute__((target("default")))
|
|
||||||
#endif
|
|
||||||
// GCOVR_EXCL_STOP
|
|
||||||
|
|
||||||
int longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
|
||||||
assume(cl >= 0);
|
|
||||||
int i = 0;
|
|
||||||
int end;
|
|
||||||
|
|
||||||
// kStride * kUnrollCount at a time
|
|
||||||
end = cl & ~(kStride * kUnrollFactor - 1);
|
|
||||||
while (i < end) {
|
|
||||||
for (int j = 0; j < kUnrollFactor; ++j) {
|
|
||||||
if (!compareStride(ap, bp)) {
|
|
||||||
return i + firstNeqStride(ap, bp);
|
|
||||||
}
|
|
||||||
i += kStride;
|
|
||||||
ap += kStride;
|
|
||||||
bp += kStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// kStride at a time
|
|
||||||
end = cl & ~(kStride - 1);
|
|
||||||
while (i < end) {
|
|
||||||
if (!compareStride(ap, bp)) {
|
|
||||||
return i + firstNeqStride(ap, bp);
|
|
||||||
}
|
|
||||||
i += kStride;
|
|
||||||
ap += kStride;
|
|
||||||
bp += kStride;
|
|
||||||
}
|
|
||||||
|
|
||||||
// word at a time
|
|
||||||
end = cl & ~(sizeof(uint64_t) - 1);
|
|
||||||
while (i < end) {
|
|
||||||
uint64_t a; // GCOVR_EXCL_LINE
|
|
||||||
uint64_t b; // GCOVR_EXCL_LINE
|
|
||||||
memcpy(&a, ap, 8);
|
|
||||||
memcpy(&b, bp, 8);
|
|
||||||
const auto mismatched = a ^ b;
|
|
||||||
if (mismatched) {
|
|
||||||
return i + std::countr_zero(mismatched) / 8;
|
|
||||||
}
|
|
||||||
i += 8;
|
|
||||||
ap += 8;
|
|
||||||
bp += 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
// byte at a time
|
|
||||||
while (i < cl) {
|
|
||||||
if (*ap != *bp) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
++ap;
|
|
||||||
++bp;
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Logically this is the same as performing firstGeq and then checking against
|
// Logically this is the same as performing firstGeq and then checking against
|
||||||
// point or range version according to cmp, but this version short circuits as
|
// point or range version according to cmp, but this version short circuits as
|
||||||
// soon as it can prove that there's no conflict.
|
// soon as it can prove that there's no conflict.
|
||||||
@@ -2922,25 +2761,22 @@ void consumePartialKey(Node *&self, std::span<const uint8_t> &key,
|
|||||||
key = key.subspan(partialKeyIndex, key.size() - partialKeyIndex);
|
key = key.subspan(partialKeyIndex, key.size() - partialKeyIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a pointer to the newly inserted node. Caller must set
|
// Returns a pointer the pointer to the newly inserted node in the tree. Caller
|
||||||
// `entryPresent`, and `entry` fields. All nodes along the search path of the
|
// must set `entryPresent`, and `entry` fields. All nodes along the search path
|
||||||
// result will have `maxVersion` set to `writeVersion` as a postcondition. Nodes
|
// of the result will have `maxVersion` set to `writeVersion` as a
|
||||||
// along the search path may be invalidated.
|
// postcondition. Nodes along the search path may be invalidated.
|
||||||
[[nodiscard]]
|
[[nodiscard]]
|
||||||
Node *insert(Node **self, std::span<const uint8_t> key,
|
Node **insert(Node **self, std::span<const uint8_t> key,
|
||||||
InternalVersionT writeVersion, WriteContext *tls,
|
InternalVersionT writeVersion, WriteContext *tls,
|
||||||
ConflictSet::Impl *impl) {
|
ConflictSet::Impl *impl) {
|
||||||
|
|
||||||
if ((*self)->partialKeyLen > 0) {
|
|
||||||
consumePartialKey(*self, key, writeVersion, tls);
|
|
||||||
}
|
|
||||||
assert(maxVersion(*self, impl) <= writeVersion);
|
assert(maxVersion(*self, impl) <= writeVersion);
|
||||||
setMaxVersion(*self, impl, writeVersion);
|
setMaxVersion(*self, impl, writeVersion);
|
||||||
|
|
||||||
for (;; ++tls->accum.insert_iterations) {
|
for (;; ++tls->accum.insert_iterations) {
|
||||||
|
|
||||||
if (key.size() == 0) {
|
if (key.size() == 0) {
|
||||||
return *self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &child = getOrCreateChild(*self, key.front(), writeVersion, tls);
|
auto &child = getOrCreateChild(*self, key.front(), writeVersion, tls);
|
||||||
@@ -2953,7 +2789,7 @@ Node *insert(Node **self, std::span<const uint8_t> key,
|
|||||||
child->parentsIndex = key.front();
|
child->parentsIndex = key.front();
|
||||||
setMaxVersion(child, impl, writeVersion);
|
setMaxVersion(child, impl, writeVersion);
|
||||||
memcpy(child->partialKey(), key.data() + 1, child->partialKeyLen);
|
memcpy(child->partialKey(), key.data() + 1, child->partialKeyLen);
|
||||||
return child;
|
return &child;
|
||||||
}
|
}
|
||||||
|
|
||||||
self = &child;
|
self = &child;
|
||||||
@@ -2996,7 +2832,7 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
|||||||
InternalVersionT writeVersion, WriteContext *tls,
|
InternalVersionT writeVersion, WriteContext *tls,
|
||||||
ConflictSet::Impl *impl) {
|
ConflictSet::Impl *impl) {
|
||||||
++tls->accum.point_writes;
|
++tls->accum.point_writes;
|
||||||
auto *n = insert(&root, key, writeVersion, tls, impl);
|
auto *n = *insert(&root, key, writeVersion, tls, impl);
|
||||||
if (!n->entryPresent) {
|
if (!n->entryPresent) {
|
||||||
++tls->accum.entries_inserted;
|
++tls->accum.entries_inserted;
|
||||||
auto *p = nextLogical(n);
|
auto *p = nextLogical(n);
|
||||||
@@ -3063,41 +2899,16 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
|||||||
}
|
}
|
||||||
++tls->accum.range_writes;
|
++tls->accum.range_writes;
|
||||||
const bool beginIsPrefix = lcp == int(begin.size());
|
const bool beginIsPrefix = lcp == int(begin.size());
|
||||||
auto remaining = begin.subspan(0, lcp);
|
|
||||||
|
|
||||||
auto *n = root;
|
Node **useAsRoot =
|
||||||
|
insert(&root, begin.subspan(0, lcp), writeVersion, tls, impl);
|
||||||
|
|
||||||
for (;;) {
|
int consumed = lcp;
|
||||||
if (int(remaining.size()) <= n->partialKeyLen) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
int i = longestCommonPrefix(n->partialKey(), remaining.data(),
|
|
||||||
n->partialKeyLen);
|
|
||||||
if (i != n->partialKeyLen) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto *child = getChild(n, remaining[n->partialKeyLen]);
|
|
||||||
if (child == nullptr) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(maxVersion(n, impl) <= writeVersion);
|
|
||||||
setMaxVersion(n, impl, writeVersion);
|
|
||||||
|
|
||||||
remaining = remaining.subspan(n->partialKeyLen + 1,
|
|
||||||
remaining.size() - (n->partialKeyLen + 1));
|
|
||||||
n = child;
|
|
||||||
}
|
|
||||||
|
|
||||||
Node **useAsRoot = &getInTree(n, impl);
|
|
||||||
|
|
||||||
int consumed = lcp - remaining.size();
|
|
||||||
|
|
||||||
begin = begin.subspan(consumed, begin.size() - consumed);
|
begin = begin.subspan(consumed, begin.size() - consumed);
|
||||||
end = end.subspan(consumed, end.size() - consumed);
|
end = end.subspan(consumed, end.size() - consumed);
|
||||||
|
|
||||||
auto *beginNode = insert(useAsRoot, begin, writeVersion, tls, impl);
|
auto *beginNode = *insert(useAsRoot, begin, writeVersion, tls, impl);
|
||||||
|
|
||||||
const bool insertedBegin = !beginNode->entryPresent;
|
const bool insertedBegin = !beginNode->entryPresent;
|
||||||
|
|
||||||
@@ -3114,7 +2925,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
|||||||
assert(writeVersion >= beginNode->entry.pointVersion);
|
assert(writeVersion >= beginNode->entry.pointVersion);
|
||||||
beginNode->entry.pointVersion = writeVersion;
|
beginNode->entry.pointVersion = writeVersion;
|
||||||
|
|
||||||
auto *endNode = insert(useAsRoot, end, writeVersion, tls, impl);
|
auto *endNode = *insert(useAsRoot, end, writeVersion, tls, impl);
|
||||||
|
|
||||||
const bool insertedEnd = !endNode->entryPresent;
|
const bool insertedEnd = !endNode->entryPresent;
|
||||||
|
|
||||||
@@ -3132,7 +2943,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
|||||||
if (beginIsPrefix && insertedEnd) {
|
if (beginIsPrefix && insertedEnd) {
|
||||||
// beginNode may have been invalidated when inserting end. TODO can we do
|
// beginNode may have been invalidated when inserting end. TODO can we do
|
||||||
// better?
|
// better?
|
||||||
beginNode = insert(useAsRoot, begin, writeVersion, tls, impl);
|
beginNode = *insert(useAsRoot, begin, writeVersion, tls, impl);
|
||||||
assert(beginNode->entryPresent);
|
assert(beginNode->entryPresent);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3246,6 +3057,8 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
InternalVersionT::zero = tls.zero = oldestVersion;
|
InternalVersionT::zero = tls.zero = oldestVersion;
|
||||||
|
|
||||||
assert(writeVersion >= newestVersionFullPrecision);
|
assert(writeVersion >= newestVersionFullPrecision);
|
||||||
|
assert(tls.accum.entries_erased == 0);
|
||||||
|
assert(tls.accum.entries_inserted == 0);
|
||||||
|
|
||||||
if (oldestExtantVersion < writeVersion - kMaxCorrectVersionWindow)
|
if (oldestExtantVersion < writeVersion - kMaxCorrectVersionWindow)
|
||||||
[[unlikely]] {
|
[[unlikely]] {
|
||||||
@@ -3273,15 +3086,19 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
|
auto begin = std::span<const uint8_t>(w.begin.p, w.begin.len);
|
||||||
auto end = std::span<const uint8_t>(w.end.p, w.end.len);
|
auto end = std::span<const uint8_t>(w.end.p, w.end.len);
|
||||||
if (w.end.len > 0) {
|
if (w.end.len > 0) {
|
||||||
keyUpdates += 3;
|
|
||||||
addWriteRange(root, begin, end, InternalVersionT(writeVersion), &tls,
|
addWriteRange(root, begin, end, InternalVersionT(writeVersion), &tls,
|
||||||
this);
|
this);
|
||||||
} else {
|
} else {
|
||||||
keyUpdates += 2;
|
|
||||||
addPointWrite(root, begin, InternalVersionT(writeVersion), &tls, this);
|
addPointWrite(root, begin, InternalVersionT(writeVersion), &tls, this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run gc at least 200% the rate we're inserting entries
|
||||||
|
keyUpdates +=
|
||||||
|
std::max<int64_t>(tls.accum.entries_inserted - tls.accum.entries_erased,
|
||||||
|
0) *
|
||||||
|
2;
|
||||||
|
|
||||||
memory_bytes.set(totalBytes);
|
memory_bytes.set(totalBytes);
|
||||||
point_writes_total.add(tls.accum.point_writes);
|
point_writes_total.add(tls.accum.point_writes);
|
||||||
range_writes_total.add(tls.accum.range_writes);
|
range_writes_total.add(tls.accum.range_writes);
|
||||||
|
|||||||
Vendored
+10
-7
@@ -117,15 +117,18 @@ pipeline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
steps {
|
steps {
|
||||||
|
script {
|
||||||
|
filter_args = "-f ConflictSet.cpp -f LongestCommonPrefix.h"
|
||||||
|
}
|
||||||
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_FLAGS=--coverage -DCMAKE_CXX_FLAGS=--coverage -DCMAKE_BUILD_TYPE=Debug -DDISABLE_TSAN=ON")
|
CleanBuildAndTest("-DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_FLAGS=--coverage -DCMAKE_CXX_FLAGS=--coverage -DCMAKE_BUILD_TYPE=Debug -DDISABLE_TSAN=ON")
|
||||||
sh '''
|
sh """
|
||||||
gcovr -f ConflictSet.cpp --cobertura > build/coverage.xml
|
gcovr ${filter_args} --cobertura > build/coverage.xml
|
||||||
'''
|
"""
|
||||||
recordCoverage qualityGates: [[criticality: 'NOTE', metric: 'MODULE']], tools: [[parser: 'COBERTURA', pattern: 'build/coverage.xml']]
|
recordCoverage qualityGates: [[criticality: 'NOTE', metric: 'MODULE']], tools: [[parser: 'COBERTURA', pattern: 'build/coverage.xml']]
|
||||||
sh '''
|
sh """
|
||||||
gcovr -f ConflictSet.cpp
|
gcovr ${filter_args}
|
||||||
gcovr -f ConflictSet.cpp --fail-under-line 100 > /dev/null
|
gcovr ${filter_args} --fail-under-line 100 > /dev/null
|
||||||
'''
|
"""
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,177 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <bit>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef HAS_AVX
|
||||||
|
#include <immintrin.h>
|
||||||
|
#elif HAS_ARM_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAS_AVX) || defined(HAS_ARM_NEON)
|
||||||
|
constexpr int kStride = 64;
|
||||||
|
#else
|
||||||
|
constexpr int kStride = 16;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
constexpr int kUnrollFactor = 4;
|
||||||
|
|
||||||
|
inline bool compareStride(const uint8_t *ap, const uint8_t *bp) {
|
||||||
|
#if defined(HAS_ARM_NEON)
|
||||||
|
static_assert(kStride == 64);
|
||||||
|
uint8x16_t x[4]; // GCOVR_EXCL_LINE
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
x[i] = vceqq_u8(vld1q_u8(ap + i * 16), vld1q_u8(bp + i * 16));
|
||||||
|
}
|
||||||
|
auto results = vreinterpretq_u16_u8(
|
||||||
|
vandq_u8(vandq_u8(x[0], x[1]), vandq_u8(x[2], x[3])));
|
||||||
|
bool eq = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) ==
|
||||||
|
uint64_t(-1);
|
||||||
|
#elif defined(HAS_AVX)
|
||||||
|
static_assert(kStride == 64);
|
||||||
|
__m128i x[4]; // GCOVR_EXCL_LINE
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
x[i] = _mm_cmpeq_epi8(_mm_loadu_si128((__m128i *)(ap + i * 16)),
|
||||||
|
_mm_loadu_si128((__m128i *)(bp + i * 16)));
|
||||||
|
}
|
||||||
|
auto eq =
|
||||||
|
_mm_movemask_epi8(_mm_and_si128(_mm_and_si128(x[0], x[1]),
|
||||||
|
_mm_and_si128(x[2], x[3]))) == 0xffff;
|
||||||
|
#else
|
||||||
|
// Hope it gets vectorized
|
||||||
|
auto eq = memcmp(ap, bp, kStride) == 0;
|
||||||
|
#endif
|
||||||
|
return eq;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Precondition: ap[:kStride] != bp[:kStride]
|
||||||
|
inline int firstNeqStride(const uint8_t *ap, const uint8_t *bp) {
|
||||||
|
#if defined(HAS_AVX)
|
||||||
|
static_assert(kStride == 64);
|
||||||
|
uint64_t c[kStride / 16]; // GCOVR_EXCL_LINE
|
||||||
|
for (int i = 0; i < kStride; i += 16) {
|
||||||
|
const auto a = _mm_loadu_si128((__m128i *)(ap + i));
|
||||||
|
const auto b = _mm_loadu_si128((__m128i *)(bp + i));
|
||||||
|
const auto compared = _mm_cmpeq_epi8(a, b);
|
||||||
|
c[i / 16] = _mm_movemask_epi8(compared) & 0xffff;
|
||||||
|
}
|
||||||
|
return std::countr_zero(~(c[0] | c[1] << 16 | c[2] << 32 | c[3] << 48));
|
||||||
|
#elif defined(HAS_ARM_NEON)
|
||||||
|
static_assert(kStride == 64);
|
||||||
|
for (int i = 0; i < kStride; i += 16) {
|
||||||
|
// 0xff for each match
|
||||||
|
uint16x8_t results =
|
||||||
|
vreinterpretq_u16_u8(vceqq_u8(vld1q_u8(ap + i), vld1q_u8(bp + i)));
|
||||||
|
// 0xf for each mismatch
|
||||||
|
uint64_t bitfield =
|
||||||
|
~vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0);
|
||||||
|
if (bitfield) {
|
||||||
|
return i + (std::countr_zero(bitfield) >> 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
|
#else
|
||||||
|
int i = 0;
|
||||||
|
for (; i < kStride - 1; ++i) {
|
||||||
|
if (*ap++ != *bp++) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// This gets covered in local development
|
||||||
|
// GCOVR_EXCL_START
|
||||||
|
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||||
|
__attribute__((target("avx512f,avx512bw"))) inline int
|
||||||
|
longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
||||||
|
int i = 0;
|
||||||
|
int end = cl & ~63;
|
||||||
|
while (i < end) {
|
||||||
|
const uint64_t eq =
|
||||||
|
_mm512_cmpeq_epi8_mask(_mm512_loadu_epi8(ap), _mm512_loadu_epi8(bp));
|
||||||
|
if (eq != uint64_t(-1)) {
|
||||||
|
return i + std::countr_one(eq);
|
||||||
|
}
|
||||||
|
i += 64;
|
||||||
|
ap += 64;
|
||||||
|
bp += 64;
|
||||||
|
}
|
||||||
|
if (i < cl) {
|
||||||
|
const uint64_t mask = (uint64_t(1) << (cl - i)) - 1;
|
||||||
|
const uint64_t eq = _mm512_cmpeq_epi8_mask(
|
||||||
|
_mm512_maskz_loadu_epi8(mask, ap), _mm512_maskz_loadu_epi8(mask, bp));
|
||||||
|
return i + std::countr_one(eq & mask);
|
||||||
|
}
|
||||||
|
assert(i == cl);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
__attribute__((target("default")))
|
||||||
|
#endif
|
||||||
|
// GCOVR_EXCL_STOP
|
||||||
|
|
||||||
|
inline int
|
||||||
|
longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
||||||
|
if (!(cl >= 0)) {
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
int end;
|
||||||
|
|
||||||
|
// kStride * kUnrollCount at a time
|
||||||
|
end = cl & ~(kStride * kUnrollFactor - 1);
|
||||||
|
while (i < end) {
|
||||||
|
for (int j = 0; j < kUnrollFactor; ++j) {
|
||||||
|
if (!compareStride(ap, bp)) {
|
||||||
|
return i + firstNeqStride(ap, bp);
|
||||||
|
}
|
||||||
|
i += kStride;
|
||||||
|
ap += kStride;
|
||||||
|
bp += kStride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// kStride at a time
|
||||||
|
end = cl & ~(kStride - 1);
|
||||||
|
while (i < end) {
|
||||||
|
if (!compareStride(ap, bp)) {
|
||||||
|
return i + firstNeqStride(ap, bp);
|
||||||
|
}
|
||||||
|
i += kStride;
|
||||||
|
ap += kStride;
|
||||||
|
bp += kStride;
|
||||||
|
}
|
||||||
|
|
||||||
|
// word at a time
|
||||||
|
end = cl & ~(sizeof(uint64_t) - 1);
|
||||||
|
while (i < end) {
|
||||||
|
uint64_t a; // GCOVR_EXCL_LINE
|
||||||
|
uint64_t b; // GCOVR_EXCL_LINE
|
||||||
|
memcpy(&a, ap, 8);
|
||||||
|
memcpy(&b, bp, 8);
|
||||||
|
const auto mismatched = a ^ b;
|
||||||
|
if (mismatched) {
|
||||||
|
return i + std::countr_zero(mismatched) / 8;
|
||||||
|
}
|
||||||
|
i += 8;
|
||||||
|
ap += 8;
|
||||||
|
bp += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// byte at a time
|
||||||
|
while (i < cl) {
|
||||||
|
if (*ap != *bp) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++ap;
|
||||||
|
++bp;
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
@@ -76,6 +76,14 @@ void workload(weaselab::ConflictSet *cs) {
|
|||||||
} else {
|
} else {
|
||||||
w.begin.len = k.size();
|
w.begin.len = k.size();
|
||||||
cs->addWrites(&w, 1, version);
|
cs->addWrites(&w, 1, version);
|
||||||
|
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
|
||||||
|
auto b = numToKey(beginN);
|
||||||
|
auto e = numToKey(beginN + 1000);
|
||||||
|
w.begin.p = b.data();
|
||||||
|
w.begin.len = b.size();
|
||||||
|
w.end.p = e.data();
|
||||||
|
w.end.len = e.size();
|
||||||
|
cs->addWrites(&w, 1, version);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// GC
|
// GC
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
|||||||
|
:ゥゥゥゥゥゥゥゥゥゥ:::::
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user