Compare commits
3 Commits
tagged-poi
...
fe5cfb0336
Author | SHA1 | Date | |
---|---|---|---|
fe5cfb0336 | |||
82203515a0 | |||
465372c734 |
@@ -248,19 +248,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
|
||||
endforeach()
|
||||
|
||||
find_program(VALGRIND_EXE valgrind)
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
list(LENGTH CORPUS_TESTS len)
|
||||
math(EXPR last "${len} - 1")
|
||||
set(partition_size 100)
|
||||
foreach(i RANGE 0 ${last} ${partition_size})
|
||||
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
||||
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
||||
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
||||
$<TARGET_FILE:driver> ${partition})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# scripted tests. Written manually to fill in anything libfuzzer couldn't
|
||||
# find.
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
@@ -281,14 +268,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
${Python3_EXECUTABLE}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
||||
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
add_test(
|
||||
NAME script_test_${TEST}_valgrind
|
||||
COMMAND
|
||||
${VALGRIND_EXE} ${Python3_EXECUTABLE}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
||||
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
find_program(VALGRIND_EXE valgrind)
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
list(LENGTH CORPUS_TESTS len)
|
||||
math(EXPR last "${len} - 1")
|
||||
set(partition_size 100)
|
||||
foreach(i RANGE 0 ${last} ${partition_size})
|
||||
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
||||
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
||||
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
||||
$<TARGET_FILE:driver> ${partition})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@@ -358,11 +350,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
||||
|
||||
add_executable(interleaving_test InterleavingTest.cpp)
|
||||
# work around lack of musttail for gcc
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
target_compile_options(interleaving_test PRIVATE -Og
|
||||
-foptimize-sibling-calls)
|
||||
endif()
|
||||
target_link_libraries(interleaving_test PRIVATE nanobench)
|
||||
endif()
|
||||
|
||||
|
534
ConflictSet.cpp
534
ConflictSet.cpp
@@ -203,60 +203,6 @@ enum Type : int8_t {
|
||||
|
||||
template <class T> struct BoundedFreeListAllocator;
|
||||
|
||||
struct TaggedNodePointer {
|
||||
TaggedNodePointer() = default;
|
||||
operator struct Node *() { return (struct Node *)(uintptr_t)p; }
|
||||
operator struct Node0 *() {
|
||||
assert(t == Type_Node0);
|
||||
return (struct Node0 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node3 *() {
|
||||
assert(t == Type_Node3);
|
||||
return (struct Node3 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node16 *() {
|
||||
assert(t == Type_Node16);
|
||||
return (struct Node16 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node48 *() {
|
||||
assert(t == Type_Node48);
|
||||
return (struct Node48 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node256 *() {
|
||||
assert(t == Type_Node256);
|
||||
return (struct Node256 *)(uintptr_t)p;
|
||||
}
|
||||
/*implicit*/ TaggedNodePointer(std::nullptr_t) : p(0), t(Type_Node0) {}
|
||||
/*implicit*/ TaggedNodePointer(Node0 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node0) {}
|
||||
/*implicit*/ TaggedNodePointer(Node3 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node3) {}
|
||||
/*implicit*/ TaggedNodePointer(Node16 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node16) {}
|
||||
/*implicit*/ TaggedNodePointer(Node48 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node48) {}
|
||||
/*implicit*/ TaggedNodePointer(Node256 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node256) {}
|
||||
|
||||
bool operator!=(std::nullptr_t) { return p != 0 || t != Type_Node0; }
|
||||
bool operator==(std::nullptr_t) { return p == 0 && t == Type_Node0; }
|
||||
bool operator==(const TaggedNodePointer &) const = default;
|
||||
bool operator==(Node *n) const { return (uintptr_t)n == p; }
|
||||
Node *operator->() { return (Node *)(uintptr_t)p; }
|
||||
Type getType() { return t; }
|
||||
|
||||
TaggedNodePointer(const TaggedNodePointer &) = default;
|
||||
TaggedNodePointer &operator=(const TaggedNodePointer &) = default;
|
||||
/*implicit*/ TaggedNodePointer(Node *n);
|
||||
|
||||
private:
|
||||
TaggedNodePointer(struct Node *p, Type t) : p((uintptr_t)p), t(t) {
|
||||
assume(p != 0);
|
||||
}
|
||||
uintptr_t p : 56;
|
||||
Type t : 8;
|
||||
};
|
||||
|
||||
struct Node {
|
||||
|
||||
/* begin section that's copied to the next node */
|
||||
@@ -282,9 +228,6 @@ private:
|
||||
int32_t partialKeyCapacity;
|
||||
};
|
||||
|
||||
TaggedNodePointer::TaggedNodePointer(Node *n)
|
||||
: TaggedNodePointer(n, n->getType()) {}
|
||||
|
||||
constexpr int kNodeCopyBegin = offsetof(Node, entry);
|
||||
constexpr int kNodeCopySize =
|
||||
offsetof(Node, parentsIndex) + sizeof(Node::parentsIndex) - kNodeCopyBegin;
|
||||
@@ -308,7 +251,7 @@ struct Node3 : Node {
|
||||
constexpr static auto kMaxNodes = 3;
|
||||
constexpr static auto kType = Type_Node3;
|
||||
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
Node *children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
// Sorted
|
||||
uint8_t index[kMaxNodes];
|
||||
@@ -324,7 +267,7 @@ struct Node16 : Node {
|
||||
constexpr static auto kType = Type_Node16;
|
||||
constexpr static auto kMaxNodes = 16;
|
||||
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
Node *children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
// Sorted
|
||||
uint8_t index[kMaxNodes];
|
||||
@@ -345,7 +288,7 @@ struct Node48 : Node {
|
||||
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
||||
|
||||
BitSet bitSet;
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
Node *children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
||||
uint8_t reverseIndex[kMaxNodes];
|
||||
@@ -367,7 +310,7 @@ struct Node256 : Node {
|
||||
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
||||
|
||||
BitSet bitSet;
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
Node *children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
||||
|
||||
@@ -823,8 +766,6 @@ private:
|
||||
|
||||
int getNodeIndex(Node3 *self, uint8_t index) {
|
||||
Node3 *n = (Node3 *)self;
|
||||
assume(n->numChildren >= 1);
|
||||
assume(n->numChildren <= 3);
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] == index) {
|
||||
return i;
|
||||
@@ -833,18 +774,6 @@ int getNodeIndex(Node3 *self, uint8_t index) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int getNodeIndexExists(Node3 *self, uint8_t index) {
|
||||
Node3 *n = (Node3 *)self;
|
||||
assume(n->numChildren >= 1);
|
||||
assume(n->numChildren <= 3);
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] == index) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
|
||||
#ifdef HAS_AVX
|
||||
@@ -905,66 +834,27 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
#endif
|
||||
}
|
||||
|
||||
int getNodeIndexExists(Node16 *self, uint8_t index) {
|
||||
|
||||
#ifdef HAS_AVX
|
||||
__m128i key_vec = _mm_set1_epi8(index);
|
||||
__m128i indices;
|
||||
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||
uint32_t mask = (1 << self->numChildren) - 1;
|
||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||
assume(bitfield != 0);
|
||||
return std::countr_zero(bitfield);
|
||||
#elif defined(HAS_ARM_NEON)
|
||||
// Based on
|
||||
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||
// 0xff for each match
|
||||
uint16x8_t results =
|
||||
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
||||
assume(self->numChildren <= Node16::kMaxNodes);
|
||||
uint64_t mask = self->numChildren == 16
|
||||
? uint64_t(-1)
|
||||
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||
// 0xf for each match in valid range
|
||||
uint64_t bitfield =
|
||||
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
|
||||
assume(bitfield != 0);
|
||||
return std::countr_zero(bitfield) / 4;
|
||||
#else
|
||||
for (int i = 0; i < self->numChildren; ++i) {
|
||||
if (self->index[i] == index) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
#endif
|
||||
}
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
TaggedNodePointer &getChildExists(Node3 *self, uint8_t index) {
|
||||
return self->children[getNodeIndexExists(self, index)];
|
||||
Node *&getChildExists(Node3 *self, uint8_t index) {
|
||||
return self->children[getNodeIndex(self, index)];
|
||||
}
|
||||
// Precondition - an entry for index must exist in the node
|
||||
TaggedNodePointer &getChildExists(Node16 *self, uint8_t index) {
|
||||
return self->children[getNodeIndexExists(self, index)];
|
||||
Node *&getChildExists(Node16 *self, uint8_t index) {
|
||||
return self->children[getNodeIndex(self, index)];
|
||||
}
|
||||
// Precondition - an entry for index must exist in the node
|
||||
TaggedNodePointer &getChildExists(Node48 *self, uint8_t index) {
|
||||
Node *&getChildExists(Node48 *self, uint8_t index) {
|
||||
assert(self->bitSet.test(index));
|
||||
return self->children[self->index[index]];
|
||||
}
|
||||
// Precondition - an entry for index must exist in the node
|
||||
TaggedNodePointer &getChildExists(Node256 *self, uint8_t index) {
|
||||
Node *&getChildExists(Node256 *self, uint8_t index) {
|
||||
assert(self->bitSet.test(index));
|
||||
return self->children[index];
|
||||
}
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
TaggedNodePointer &getChildExists(Node *self, uint8_t index) {
|
||||
Node *&getChildExists(Node *self, uint8_t index) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -995,12 +885,12 @@ InternalVersionT maxVersion(Node *n) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
int i = getNodeIndexExists(n3, index);
|
||||
int i = getNodeIndex(n3, index);
|
||||
return n3->childMaxVersion[i];
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
int i = getNodeIndexExists(n16, index);
|
||||
int i = getNodeIndex(n16, index);
|
||||
return n16->childMaxVersion[i];
|
||||
}
|
||||
case Type_Node48: {
|
||||
@@ -1028,12 +918,12 @@ InternalVersionT exchangeMaxVersion(Node *n, InternalVersionT newMax) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
int i = getNodeIndexExists(n3, index);
|
||||
int i = getNodeIndex(n3, index);
|
||||
return std::exchange(n3->childMaxVersion[i], newMax);
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
int i = getNodeIndexExists(n16, index);
|
||||
int i = getNodeIndex(n16, index);
|
||||
return std::exchange(n16->childMaxVersion[i], newMax);
|
||||
}
|
||||
case Type_Node48: {
|
||||
@@ -1062,13 +952,13 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
int i = getNodeIndexExists(n3, index);
|
||||
int i = getNodeIndex(n3, index);
|
||||
n3->childMaxVersion[i] = newMax;
|
||||
return;
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
int i = getNodeIndexExists(n16, index);
|
||||
int i = getNodeIndex(n16, index);
|
||||
n16->childMaxVersion[i] = newMax;
|
||||
return;
|
||||
}
|
||||
@@ -1095,7 +985,7 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
|
||||
}
|
||||
}
|
||||
|
||||
TaggedNodePointer &getInTree(Node *n, ConflictSet::Impl *);
|
||||
Node *&getInTree(Node *n, ConflictSet::Impl *);
|
||||
|
||||
Node *getChild(Node0 *, uint8_t) { return nullptr; }
|
||||
Node *getChild(Node3 *self, uint8_t index) {
|
||||
@@ -1132,7 +1022,6 @@ Node *getChild(Node *self, uint8_t index) {
|
||||
struct ChildAndMaxVersion {
|
||||
Node *child;
|
||||
InternalVersionT maxVersion;
|
||||
Type childType;
|
||||
};
|
||||
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node0 *, uint8_t) { return {}; }
|
||||
@@ -1141,28 +1030,24 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
|
||||
if (i < 0) {
|
||||
return {};
|
||||
}
|
||||
return {self->children[i], self->childMaxVersion[i],
|
||||
self->children[i].getType()};
|
||||
return {self->children[i], self->childMaxVersion[i]};
|
||||
}
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
||||
int i = getNodeIndex(self, index);
|
||||
if (i < 0) {
|
||||
return {};
|
||||
}
|
||||
return {self->children[i], self->childMaxVersion[i],
|
||||
self->children[i].getType()};
|
||||
return {self->children[i], self->childMaxVersion[i]};
|
||||
}
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
||||
int i = self->index[index];
|
||||
if (i < 0) {
|
||||
return {};
|
||||
}
|
||||
return {self->children[i], self->childMaxVersion[i],
|
||||
self->children[i].getType()};
|
||||
return {self->children[i], self->childMaxVersion[i]};
|
||||
}
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
|
||||
return {self->children[index], self->childMaxVersion[index],
|
||||
self->children[index].getType()};
|
||||
return {self->children[index], self->childMaxVersion[index]};
|
||||
}
|
||||
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
|
||||
@@ -1185,8 +1070,6 @@ ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
|
||||
Node *getChildGeq(Node0 *, int) { return nullptr; }
|
||||
|
||||
Node *getChildGeq(Node3 *n, int child) {
|
||||
assume(n->numChildren >= 1);
|
||||
assume(n->numChildren <= 3);
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] >= child) {
|
||||
return n->children[i];
|
||||
@@ -1307,15 +1190,14 @@ Node *getFirstChildExists(Node *self) {
|
||||
}
|
||||
}
|
||||
|
||||
void consumePartialKeyFull(TaggedNodePointer &self,
|
||||
std::span<const uint8_t> &key,
|
||||
void consumePartialKeyFull(Node *&self, std::span<const uint8_t> &key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
// Handle an existing partial key
|
||||
int commonLen = std::min<int>(self->partialKeyLen, key.size());
|
||||
int partialKeyIndex =
|
||||
longestCommonPrefix(self->partialKey(), key.data(), commonLen);
|
||||
if (partialKeyIndex < self->partialKeyLen) {
|
||||
Node *old = self;
|
||||
auto *old = self;
|
||||
// Since root cannot have a partial key
|
||||
assert(old->parent != nullptr);
|
||||
InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion);
|
||||
@@ -1353,7 +1235,7 @@ void consumePartialKeyFull(TaggedNodePointer &self,
|
||||
// Consume any partial key of `self`, and update `self` and
|
||||
// `key` such that `self` is along the search path of `key`
|
||||
inline __attribute__((always_inline)) void
|
||||
consumePartialKey(TaggedNodePointer &self, std::span<const uint8_t> &key,
|
||||
consumePartialKey(Node *&self, std::span<const uint8_t> &key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
if (self->partialKeyLen > 0) {
|
||||
consumePartialKeyFull(self, key, writeVersion, tls);
|
||||
@@ -1364,10 +1246,8 @@ consumePartialKey(TaggedNodePointer &self, std::span<const uint8_t> &key,
|
||||
// such that the search path of the result + key is the same as the search path
|
||||
// of self + key before the call. Creates a node if necessary. Updates
|
||||
// `maxVersion` for result.
|
||||
TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self,
|
||||
std::span<const uint8_t> &key,
|
||||
InternalVersionT newMaxVersion,
|
||||
WriteContext *tls) {
|
||||
Node *&getOrCreateChild(Node *&self, std::span<const uint8_t> &key,
|
||||
InternalVersionT newMaxVersion, WriteContext *tls) {
|
||||
|
||||
int index = key.front();
|
||||
key = key.subspan(1, key.size() - 1);
|
||||
@@ -1669,6 +1549,7 @@ __attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
||||
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
||||
_mm512_mask_storeu_epi32(vs, m, zvec);
|
||||
}
|
||||
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
@@ -1725,11 +1606,10 @@ void rezero(Node *n, InternalVersionT z) {
|
||||
}
|
||||
#endif
|
||||
|
||||
void mergeWithChild(TaggedNodePointer &self, WriteContext *tls,
|
||||
ConflictSet::Impl *impl, Node *&dontInvalidate,
|
||||
Node3 *self3) {
|
||||
void mergeWithChild(Node *&self, WriteContext *tls, ConflictSet::Impl *impl,
|
||||
Node *&dontInvalidate, Node3 *self3) {
|
||||
assert(!self3->entryPresent);
|
||||
Node *child = self3->children[0];
|
||||
auto *child = self3->children[0];
|
||||
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
|
||||
|
||||
if (minCapacity > child->getCapacity()) {
|
||||
@@ -1978,7 +1858,7 @@ bool checkPointRead(Node *n, const std::span<const uint8_t> key,
|
||||
goto downLeftSpine;
|
||||
}
|
||||
|
||||
auto [child, maxV, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, maxV] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(n, remaining[0]);
|
||||
if (c != nullptr) {
|
||||
@@ -2048,7 +1928,7 @@ bool checkPrefixRead(Node *n, const std::span<const uint8_t> key,
|
||||
return maxVersion(n) <= readVersion;
|
||||
}
|
||||
|
||||
auto [child, maxV, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, maxV] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(n, remaining[0]);
|
||||
if (c != nullptr) {
|
||||
@@ -2321,7 +2201,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
auto *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
@@ -2396,7 +2276,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
auto *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
|
||||
@@ -2441,7 +2321,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
Node *child = self->children[self->index[c]];
|
||||
auto *child = self->children[self->index[c]];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
@@ -2475,7 +2355,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
Node *child = self->children[c];
|
||||
auto *child = self->children[c];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
@@ -2536,7 +2416,6 @@ checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
@@ -2647,7 +2526,7 @@ bool checkRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
||||
}
|
||||
}
|
||||
|
||||
auto [child, maxV, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, maxV] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(n, remaining[0]);
|
||||
if (c != nullptr) {
|
||||
@@ -2835,7 +2714,7 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
if (remaining.size() == 0) {
|
||||
break;
|
||||
}
|
||||
auto [child, v, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, v] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
break;
|
||||
}
|
||||
@@ -2901,10 +2780,8 @@ checkMaxBetweenExclusiveImpl<true>(Node *n, int begin, int end,
|
||||
// of the result will have `maxVersion` set to `writeVersion` as a
|
||||
// postcondition. Nodes along the search path may be invalidated. Callers must
|
||||
// ensure that the max version of the self argument is updated.
|
||||
[[nodiscard]] TaggedNodePointer *insert(TaggedNodePointer *self,
|
||||
std::span<const uint8_t> key,
|
||||
InternalVersionT writeVersion,
|
||||
WriteContext *tls) {
|
||||
[[nodiscard]] Node **insert(Node **self, std::span<const uint8_t> key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
|
||||
for (; key.size() != 0; ++tls->accum.insert_iterations) {
|
||||
self = &getOrCreateChild(*self, key, writeVersion, tls);
|
||||
@@ -2932,23 +2809,17 @@ void eraseTree(Node *root, WriteContext *tls) {
|
||||
} break;
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
for (int i = 0; i < n3->numChildren; ++i) {
|
||||
toFree.push_back(n3->children[i]);
|
||||
}
|
||||
toFree.append(std::span<Node *>(n3->children, n3->numChildren));
|
||||
tls->release(n3);
|
||||
} break;
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
for (int i = 0; i < n16->numChildren; ++i) {
|
||||
toFree.push_back(n16->children[i]);
|
||||
}
|
||||
toFree.append(std::span<Node *>(n16->children, n16->numChildren));
|
||||
tls->release(n16);
|
||||
} break;
|
||||
case Type_Node48: {
|
||||
auto *n48 = static_cast<Node48 *>(n);
|
||||
for (int i = 0; i < n48->numChildren; ++i) {
|
||||
toFree.push_back(n48->children[i]);
|
||||
}
|
||||
toFree.append(std::span<Node *>(n48->children, n48->numChildren));
|
||||
tls->release(n48);
|
||||
} break;
|
||||
case Type_Node256: {
|
||||
@@ -2964,10 +2835,10 @@ void eraseTree(Node *root, WriteContext *tls) {
|
||||
}
|
||||
}
|
||||
|
||||
void addPointWrite(TaggedNodePointer &root, std::span<const uint8_t> key,
|
||||
void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
++tls->accum.point_writes;
|
||||
auto n = *insert(&root, key, writeVersion, tls);
|
||||
auto *n = *insert(&root, key, writeVersion, tls);
|
||||
if (!n->entryPresent) {
|
||||
++tls->accum.entries_inserted;
|
||||
auto *p = nextLogical(n);
|
||||
@@ -2984,72 +2855,6 @@ void addPointWrite(TaggedNodePointer &root, std::span<const uint8_t> key,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) InternalVersionT horizontalMaxUpTo16(
|
||||
InternalVersionT *vs, [[maybe_unused]] InternalVersionT z, int len) {
|
||||
assume(len <= 16);
|
||||
#if USE_64_BIT
|
||||
// Hope it gets vectorized
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < len; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
#else
|
||||
uint32_t zero;
|
||||
memcpy(&zero, &z, sizeof(zero));
|
||||
auto zeroVec = _mm512_set1_epi32(zero);
|
||||
auto max = InternalVersionT(
|
||||
zero +
|
||||
_mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
|
||||
zeroVec)));
|
||||
return max;
|
||||
#endif
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
InternalVersionT
|
||||
horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT, int len) {
|
||||
assume(len <= 16);
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < len; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) InternalVersionT
|
||||
horizontalMax16(InternalVersionT *vs, [[maybe_unused]] InternalVersionT z) {
|
||||
#if USE_64_BIT
|
||||
// Hope it gets vectorized
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
#else
|
||||
uint32_t zero; // GCOVR_EXCL_LINE
|
||||
memcpy(&zero, &z, sizeof(zero));
|
||||
auto zeroVec = _mm512_set1_epi32(zero);
|
||||
return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||
_mm512_loadu_epi32(vs), zeroVec)));
|
||||
#endif
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
InternalVersionT
|
||||
horizontalMax16(InternalVersionT *vs, InternalVersionT) {
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// Precondition: `node->entryPresent`, and node is not the root
|
||||
void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
assert(node->parent);
|
||||
@@ -3061,13 +2866,15 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
break;
|
||||
case Type_Node3: {
|
||||
auto *self3 = static_cast<Node3 *>(node);
|
||||
max = std::max(max, horizontalMaxUpTo16(self3->childMaxVersion, tls->zero,
|
||||
self3->numChildren));
|
||||
for (int i = 0; i < self3->numChildren; ++i) {
|
||||
max = std::max(self3->childMaxVersion[i], max);
|
||||
}
|
||||
} break;
|
||||
case Type_Node16: {
|
||||
auto *self16 = static_cast<Node16 *>(node);
|
||||
max = std::max(max, horizontalMaxUpTo16(self16->childMaxVersion, tls->zero,
|
||||
self16->numChildren));
|
||||
for (int i = 0; i < self16->numChildren; ++i) {
|
||||
max = std::max(self16->childMaxVersion[i], max);
|
||||
}
|
||||
} break;
|
||||
case Type_Node48: {
|
||||
auto *self48 = static_cast<Node48 *>(node);
|
||||
@@ -3077,7 +2884,9 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
} break;
|
||||
case Type_Node256: {
|
||||
auto *self256 = static_cast<Node256 *>(node);
|
||||
max = std::max(max, horizontalMax16(self256->childMaxVersion, tls->zero));
|
||||
for (auto v : self256->maxOfMax) {
|
||||
max = std::max(v, max);
|
||||
}
|
||||
} break;
|
||||
default: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -3085,7 +2894,7 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
setMaxVersion(node, max);
|
||||
}
|
||||
|
||||
void addWriteRange(TaggedNodePointer &root, std::span<const uint8_t> begin,
|
||||
void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end, InternalVersionT writeVersion,
|
||||
WriteContext *tls, ConflictSet::Impl *impl) {
|
||||
|
||||
@@ -3098,12 +2907,12 @@ void addWriteRange(TaggedNodePointer &root, std::span<const uint8_t> begin,
|
||||
++tls->accum.range_writes;
|
||||
const bool beginIsPrefix = lcp == int(begin.size());
|
||||
|
||||
auto useAsRoot = insert(&root, begin.subspan(0, lcp), writeVersion, tls);
|
||||
Node **useAsRoot = insert(&root, begin.subspan(0, lcp), writeVersion, tls);
|
||||
|
||||
begin = begin.subspan(lcp, begin.size() - lcp);
|
||||
end = end.subspan(lcp, end.size() - lcp);
|
||||
|
||||
Node *beginNode = *insert(useAsRoot, begin, writeVersion, tls);
|
||||
auto *beginNode = *insert(useAsRoot, begin, writeVersion, tls);
|
||||
addKey(beginNode);
|
||||
if (!beginNode->entryPresent) {
|
||||
++tls->accum.entries_inserted;
|
||||
@@ -3114,7 +2923,7 @@ void addWriteRange(TaggedNodePointer &root, std::span<const uint8_t> begin,
|
||||
}
|
||||
beginNode->entry.pointVersion = writeVersion;
|
||||
|
||||
Node *endNode = *insert(useAsRoot, end, writeVersion, tls);
|
||||
auto *endNode = *insert(useAsRoot, end, writeVersion, tls);
|
||||
addKey(endNode);
|
||||
if (!endNode->entryPresent) {
|
||||
++tls->accum.entries_inserted;
|
||||
@@ -3134,7 +2943,7 @@ void addWriteRange(TaggedNodePointer &root, std::span<const uint8_t> begin,
|
||||
assert(!beginNode->endOfRange);
|
||||
assert(!endNode->endOfRange);
|
||||
endNode->endOfRange = true;
|
||||
Node *iter = beginNode;
|
||||
auto *iter = beginNode;
|
||||
for (iter = nextLogical(iter); !iter->endOfRange;
|
||||
iter = erase(iter, tls, impl, /*logical*/ true)) {
|
||||
assert(!iter->endOfRange);
|
||||
@@ -3200,34 +3009,203 @@ Node *firstGeqPhysical(Node *n, const std::span<const uint8_t> key) {
|
||||
}
|
||||
}
|
||||
|
||||
struct CheckJob {
|
||||
Node *n;
|
||||
std::span<const uint8_t> begin;
|
||||
InternalVersionT readVersion;
|
||||
ReadContext *tls;
|
||||
ConflictSet::Result *result;
|
||||
|
||||
void setResult(bool ok) {
|
||||
*result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
|
||||
}
|
||||
|
||||
typedef void (*typeErasedContinuation)(void *);
|
||||
|
||||
// The type of a function that takes a CheckJob* and returns its own type
|
||||
struct continuation {
|
||||
typedef continuation (*functionPtrType)(CheckJob *);
|
||||
functionPtrType func;
|
||||
continuation operator()(CheckJob *job) { return func(job); }
|
||||
/*implicit*/ continuation(functionPtrType func) : func(func) {}
|
||||
continuation() = default;
|
||||
operator bool() { return func != nullptr; }
|
||||
};
|
||||
|
||||
continuation next;
|
||||
void init(const ConflictSet::ReadRange *read, ConflictSet::Result *result,
|
||||
Node *root, int64_t oldestVersionFullPrecision, ReadContext *tls);
|
||||
};
|
||||
|
||||
namespace check_point_read_state_machine {
|
||||
|
||||
CheckJob::continuation down_left_spine(CheckJob *job);
|
||||
|
||||
// Logically this is the same as performing firstGeq and then checking against
|
||||
// point or range version according to cmp, but this version short circuits as
|
||||
// soon as it can prove that there's no conflict.
|
||||
CheckJob::continuation begin(CheckJob *job) {
|
||||
++job->tls->point_read_accum;
|
||||
#if DEBUG_VERBOSE && !defined(NDEBUG)
|
||||
fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
|
||||
#endif
|
||||
for (;; ++job->tls->point_read_iterations_accum) {
|
||||
if (job->begin.size() == 0) {
|
||||
if (job->n->entryPresent) {
|
||||
job->setResult(job->n->entry.pointVersion <= job->readVersion);
|
||||
return nullptr; // Done
|
||||
}
|
||||
job->n = getFirstChildExists(job->n);
|
||||
return down_left_spine;
|
||||
}
|
||||
|
||||
auto [child, maxV] = getChildAndMaxVersion(job->n, job->begin[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(job->n, job->begin[0]);
|
||||
if (c != nullptr) {
|
||||
job->n = c;
|
||||
return down_left_spine;
|
||||
} else {
|
||||
job->n = nextSibling(job->n);
|
||||
if (job->n == nullptr) {
|
||||
job->setResult(true);
|
||||
return nullptr; // Done
|
||||
}
|
||||
return down_left_spine;
|
||||
}
|
||||
}
|
||||
|
||||
job->n = child;
|
||||
job->begin = job->begin.subspan(1, job->begin.size() - 1);
|
||||
|
||||
if (job->n->partialKeyLen > 0) {
|
||||
int commonLen = std::min<int>(job->n->partialKeyLen, job->begin.size());
|
||||
int i = longestCommonPrefix(job->n->partialKey(), job->begin.data(),
|
||||
commonLen);
|
||||
if (i < commonLen) {
|
||||
auto c = job->n->partialKey()[i] <=> job->begin[i];
|
||||
if (c > 0) {
|
||||
return down_left_spine;
|
||||
} else {
|
||||
job->n = nextSibling(job->n);
|
||||
if (job->n == nullptr) {
|
||||
job->setResult(true);
|
||||
return nullptr; // Done
|
||||
}
|
||||
return down_left_spine;
|
||||
}
|
||||
}
|
||||
if (commonLen == job->n->partialKeyLen) {
|
||||
// partial key matches
|
||||
job->begin =
|
||||
job->begin.subspan(commonLen, job->begin.size() - commonLen);
|
||||
} else if (job->n->partialKeyLen > int(job->begin.size())) {
|
||||
// n is the first physical node greater than remaining, and there's no
|
||||
// eq node
|
||||
return down_left_spine;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxV <= job->readVersion) {
|
||||
++job->tls->point_read_short_circuit_accum;
|
||||
job->setResult(true);
|
||||
return nullptr; // Done
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CheckJob::continuation down_left_spine(CheckJob *job) {
|
||||
if (job->n->entryPresent) {
|
||||
job->setResult(job->n->entry.rangeVersion <= job->readVersion);
|
||||
return nullptr; // Done
|
||||
}
|
||||
job->n = getFirstChildExists(job->n);
|
||||
return down_left_spine;
|
||||
}
|
||||
|
||||
} // namespace check_point_read_state_machine
|
||||
|
||||
void CheckJob::init(const ConflictSet::ReadRange *read,
|
||||
ConflictSet::Result *result, Node *root,
|
||||
int64_t oldestVersionFullPrecision, ReadContext *tls) {
|
||||
auto begin = std::span<const uint8_t>(read->begin.p, read->begin.len);
|
||||
auto end = std::span<const uint8_t>(read->end.p, read->end.len);
|
||||
if (read->readVersion < oldestVersionFullPrecision) {
|
||||
*result = ConflictSet::TooOld;
|
||||
next = +[](CheckJob *) -> continuation { return nullptr; };
|
||||
} else if (end.size() == 0) {
|
||||
this->begin = begin;
|
||||
this->n = root;
|
||||
this->readVersion = InternalVersionT(read->readVersion);
|
||||
this->result = result;
|
||||
this->tls = tls;
|
||||
this->next = check_point_read_state_machine::begin;
|
||||
} else {
|
||||
*result = checkRangeRead(root, begin, end,
|
||||
InternalVersionT(read->readVersion), tls)
|
||||
? ConflictSet::Commit
|
||||
: ConflictSet::Conflict;
|
||||
next = +[](CheckJob *) -> continuation { return nullptr; };
|
||||
}
|
||||
}
|
||||
|
||||
struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
|
||||
void check(const ReadRange *reads, Result *result, int count) {
|
||||
assert(oldestVersionFullPrecision >=
|
||||
newestVersionFullPrecision - kNominalVersionWindow);
|
||||
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ReadContext tls;
|
||||
tls.impl = this;
|
||||
int64_t check_byte_accum = 0;
|
||||
|
||||
constexpr int kConcurrent = 32;
|
||||
CheckJob inProgress[kConcurrent];
|
||||
int nextJob[kConcurrent];
|
||||
|
||||
int started = std::min(kConcurrent, count);
|
||||
for (int i = 0; i < started; i++) {
|
||||
inProgress[i].init(reads + i, result + i, root,
|
||||
oldestVersionFullPrecision, &tls);
|
||||
nextJob[i] = i + 1;
|
||||
}
|
||||
nextJob[started - 1] = 0;
|
||||
|
||||
int prevJob = started - 1;
|
||||
int job = 0;
|
||||
for (;;) {
|
||||
auto next = inProgress[job].next(inProgress + job);
|
||||
inProgress[job].next = next;
|
||||
if (!next) {
|
||||
if (started == count) {
|
||||
if (prevJob == job)
|
||||
break;
|
||||
nextJob[prevJob] = nextJob[job];
|
||||
job = prevJob;
|
||||
} else {
|
||||
int temp = started++;
|
||||
inProgress[job].init(reads + temp, result + temp, root,
|
||||
oldestVersionFullPrecision, &tls);
|
||||
}
|
||||
}
|
||||
prevJob = job;
|
||||
job = nextJob[job];
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
assert(reads[i].readVersion >= 0);
|
||||
assert(reads[i].readVersion <= newestVersionFullPrecision);
|
||||
const auto &r = reads[i];
|
||||
check_byte_accum += r.begin.len + r.end.len;
|
||||
auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
|
||||
auto end = std::span<const uint8_t>(r.end.p, r.end.len);
|
||||
assert(oldestVersionFullPrecision >=
|
||||
newestVersionFullPrecision - kNominalVersionWindow);
|
||||
result[i] =
|
||||
reads[i].readVersion < oldestVersionFullPrecision ? TooOld
|
||||
: (end.size() > 0
|
||||
? checkRangeRead(root, begin, end,
|
||||
InternalVersionT(reads[i].readVersion), &tls)
|
||||
: checkPointRead(root, begin,
|
||||
InternalVersionT(reads[i].readVersion), &tls))
|
||||
? Commit
|
||||
: Conflict;
|
||||
tls.commits_accum += result[i] == Commit;
|
||||
tls.conflicts_accum += result[i] == Conflict;
|
||||
tls.too_olds_accum += result[i] == TooOld;
|
||||
}
|
||||
|
||||
point_read_total.add(tls.point_read_accum);
|
||||
prefix_read_total.add(tls.prefix_read_accum);
|
||||
range_read_total.add(tls.range_read_accum);
|
||||
@@ -3449,7 +3427,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
std::span<const uint8_t> removalKey;
|
||||
int64_t keyUpdates;
|
||||
|
||||
TaggedNodePointer root;
|
||||
Node *root;
|
||||
InternalVersionT oldestVersion;
|
||||
int64_t oldestVersionFullPrecision;
|
||||
int64_t oldestExtantVersion;
|
||||
@@ -3530,7 +3508,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
}
|
||||
};
|
||||
|
||||
TaggedNodePointer &getInTree(Node *n, ConflictSet::Impl *impl) {
|
||||
Node *&getInTree(Node *n, ConflictSet::Impl *impl) {
|
||||
return n->parent == nullptr ? impl->root
|
||||
: getChildExists(n->parent, n->parentsIndex);
|
||||
}
|
||||
@@ -4169,24 +4147,6 @@ template <int kN> void benchScan2() {
|
||||
});
|
||||
}
|
||||
|
||||
void benchHorizontal16() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
InternalVersionT vs[16];
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
vs[i] = InternalVersionT(rand() % 1000 + 1000);
|
||||
}
|
||||
#if !USE_64_BIT
|
||||
InternalVersionT::zero = InternalVersionT(rand() % 1000);
|
||||
#endif
|
||||
bench.run("horizontal16", [&]() {
|
||||
bench.doNotOptimizeAway(horizontalMax16(vs, InternalVersionT::zero));
|
||||
});
|
||||
int x = rand() % 15 + 1;
|
||||
bench.run("horizontalUpTo16", [&]() {
|
||||
bench.doNotOptimizeAway(horizontalMaxUpTo16(vs, InternalVersionT::zero, x));
|
||||
});
|
||||
}
|
||||
|
||||
void benchLCP(int len) {
|
||||
ankerl::nanobench::Bench bench;
|
||||
std::vector<uint8_t> lhs(len);
|
||||
@@ -4219,7 +4179,11 @@ void printTree() {
|
||||
debugPrintDot(stdout, cs.root, &cs);
|
||||
}
|
||||
|
||||
int main(void) { benchHorizontal16(); }
|
||||
int main(void) {
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
benchLCP(i);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_FUZZ
|
||||
|
@@ -22,6 +22,9 @@ void *stepJob(Job *j) {
|
||||
return done ? nullptr : (void *)stepJob;
|
||||
}
|
||||
|
||||
// So we can look at the disassembly more easily
|
||||
|
||||
extern "C" {
|
||||
void sequential(Job **jobs, int count) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
do {
|
||||
@@ -91,87 +94,6 @@ void interleaveBoundedCyclicList(Job **jobs, int count) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
|
||||
#if __has_attribute(musttail)
|
||||
#define MUSTTAIL __attribute__((musttail))
|
||||
#else
|
||||
#define MUSTTAIL
|
||||
#endif
|
||||
|
||||
struct Context {
|
||||
constexpr static int kConcurrent = 32;
|
||||
Job **jobs;
|
||||
Job *inProgress[kConcurrent];
|
||||
void (*continuation[kConcurrent])(Context *, int64_t prevJob, int64_t job,
|
||||
int64_t started, int64_t count);
|
||||
int nextJob[kConcurrent];
|
||||
};
|
||||
|
||||
void keepGoing(Context *context, int64_t prevJob, int64_t job, int64_t started,
|
||||
int64_t count) {
|
||||
prevJob = job;
|
||||
job = context->nextJob[job];
|
||||
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
|
||||
count);
|
||||
}
|
||||
|
||||
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
|
||||
int64_t started, int64_t count);
|
||||
|
||||
void complete(Context *context, int64_t prevJob, int64_t job, int64_t started,
|
||||
int64_t count) {
|
||||
if (started == count) {
|
||||
if (prevJob == job) {
|
||||
return;
|
||||
}
|
||||
context->nextJob[prevJob] = context->nextJob[job];
|
||||
job = prevJob;
|
||||
} else {
|
||||
context->inProgress[job] = context->jobs[started++];
|
||||
context->continuation[job] = stepJobTailCall;
|
||||
}
|
||||
prevJob = job;
|
||||
job = context->nextJob[job];
|
||||
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
|
||||
count);
|
||||
}
|
||||
|
||||
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
|
||||
int64_t started, int64_t count) {
|
||||
auto *j = context->inProgress[job];
|
||||
auto done = --(*j->input) == 0;
|
||||
#ifdef __x86_64__
|
||||
_mm_clflush(j->input);
|
||||
#endif
|
||||
if (done) {
|
||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
||||
} else {
|
||||
context->continuation[job] = stepJobTailCall;
|
||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
||||
}
|
||||
}
|
||||
|
||||
void useTailCalls(Job **jobs, int count) {
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
Context context;
|
||||
context.jobs = jobs;
|
||||
int64_t started = std::min(Context::kConcurrent, count);
|
||||
for (int i = 0; i < started; i++) {
|
||||
context.inProgress[i] = jobs[i];
|
||||
context.nextJob[i] = i + 1;
|
||||
context.continuation[i] = stepJobTailCall;
|
||||
}
|
||||
context.nextJob[started - 1] = 0;
|
||||
int prevJob = started - 1;
|
||||
int job = 0;
|
||||
return context.continuation[job](&context, prevJob, job, started, count);
|
||||
}
|
||||
|
||||
void interleaveCyclicList(Job **jobs, int count) {
|
||||
auto *nextJob = (int *)alloca(sizeof(int) * count);
|
||||
|
||||
@@ -195,11 +117,12 @@ void interleaveCyclicList(Job **jobs, int count) {
|
||||
job = nextJob[job];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
|
||||
constexpr int kNumJobs = 10000;
|
||||
constexpr int kNumJobs = 100;
|
||||
bench.relative(true);
|
||||
|
||||
Job jobs[kNumJobs];
|
||||
@@ -217,7 +140,6 @@ int main() {
|
||||
for (auto [scheduler, name] :
|
||||
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
|
||||
std::make_pair(sequential, "sequential"),
|
||||
std::make_pair(useTailCalls, "useTailCalls"),
|
||||
std::make_pair(interleaveSwapping, "interleavingSwapping"),
|
||||
std::make_pair(interleaveBoundedCyclicList,
|
||||
"interleaveBoundedCyclicList"),
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user