Compare commits
21 Commits
5c0cc1edf5
...
tagged-poi
Author | SHA1 | Date | |
---|---|---|---|
af1e2299de | |||
230e96063d | |||
f41a62471b | |||
d8f85dedc4 | |||
656939560b | |||
5580f9b71d | |||
628d16b7e6 | |||
d9e4a7d1b6 | |||
52201fa4c7 | |||
0814822d82 | |||
41df2398e8 | |||
84c4d0fcba | |||
6241533dfb | |||
0abf6a1ecf | |||
867136ff1b | |||
4b8f7320d3 | |||
6628092384 | |||
a0a4f1afea | |||
ca479c03ce | |||
0a2e133ab9 | |||
b0b31419b0 |
@@ -7,7 +7,6 @@
|
||||
void showMemory(const ConflictSet &cs);
|
||||
#endif
|
||||
|
||||
#define ANKERL_NANOBENCH_IMPLEMENT
|
||||
#include "third_party/nanobench.h"
|
||||
|
||||
constexpr int kNumKeys = 1000000;
|
||||
|
@@ -72,12 +72,6 @@ else()
|
||||
add_link_options(-Wl,--gc-sections)
|
||||
endif()
|
||||
|
||||
if(EMSCRIPTEN)
|
||||
# https://github.com/emscripten-core/emscripten/issues/15377#issuecomment-1285167486
|
||||
add_link_options(-lnodefs.js -lnoderawfs.js)
|
||||
add_link_options(-s ALLOW_MEMORY_GROWTH)
|
||||
endif()
|
||||
|
||||
if(NOT USE_SIMD_FALLBACK)
|
||||
cmake_push_check_state()
|
||||
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
|
||||
@@ -144,6 +138,8 @@ include(CTest)
|
||||
# disable tests if this is being used through e.g. FetchContent
|
||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
|
||||
add_library(nanobench ${CMAKE_CURRENT_SOURCE_DIR}/nanobench.cpp)
|
||||
|
||||
set(TEST_FLAGS -Wall -Wextra -Wunreachable-code -Wpedantic -UNDEBUG)
|
||||
|
||||
# corpus tests, which are tests curated by libfuzzer. The goal is to get broad
|
||||
@@ -191,6 +187,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
target_include_directories(conflict_set_main
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
target_compile_definitions(conflict_set_main PRIVATE ENABLE_MAIN)
|
||||
target_link_libraries(conflict_set_main PRIVATE nanobench)
|
||||
|
||||
if(NOT APPLE)
|
||||
# libfuzzer target, to generate/manage corpus
|
||||
@@ -251,6 +248,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
|
||||
endforeach()
|
||||
|
||||
find_program(VALGRIND_EXE valgrind)
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
list(LENGTH CORPUS_TESTS len)
|
||||
math(EXPR last "${len} - 1")
|
||||
set(partition_size 100)
|
||||
foreach(i RANGE 0 ${last} ${partition_size})
|
||||
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
||||
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
||||
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
||||
$<TARGET_FILE:driver> ${partition})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# scripted tests. Written manually to fill in anything libfuzzer couldn't
|
||||
# find.
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
@@ -271,19 +281,14 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
${Python3_EXECUTABLE}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
||||
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
find_program(VALGRIND_EXE valgrind)
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
list(LENGTH CORPUS_TESTS len)
|
||||
math(EXPR last "${len} - 1")
|
||||
set(partition_size 100)
|
||||
foreach(i RANGE 0 ${last} ${partition_size})
|
||||
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
||||
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
||||
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
||||
$<TARGET_FILE:driver> ${partition})
|
||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||
add_test(
|
||||
NAME script_test_${TEST}_valgrind
|
||||
COMMAND
|
||||
${VALGRIND_EXE} ${Python3_EXECUTABLE}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
||||
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
@@ -336,7 +341,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
|
||||
# bench
|
||||
add_executable(conflict_set_bench Bench.cpp)
|
||||
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME})
|
||||
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME} nanobench)
|
||||
set_target_properties(conflict_set_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
||||
add_executable(real_data_bench RealDataBench.cpp)
|
||||
target_link_libraries(real_data_bench PRIVATE ${PROJECT_NAME})
|
||||
@@ -351,6 +356,14 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
||||
add_executable(server_bench ServerBench.cpp)
|
||||
target_link_libraries(server_bench PRIVATE ${PROJECT_NAME})
|
||||
set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
||||
|
||||
add_executable(interleaving_test InterleavingTest.cpp)
|
||||
# work around lack of musttail for gcc
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
target_compile_options(interleaving_test PRIVATE -Og
|
||||
-foptimize-sibling-calls)
|
||||
endif()
|
||||
target_link_libraries(interleaving_test PRIVATE nanobench)
|
||||
endif()
|
||||
|
||||
# packaging
|
||||
|
340
ConflictSet.cpp
340
ConflictSet.cpp
@@ -203,6 +203,60 @@ enum Type : int8_t {
|
||||
|
||||
template <class T> struct BoundedFreeListAllocator;
|
||||
|
||||
struct TaggedNodePointer {
|
||||
TaggedNodePointer() = default;
|
||||
operator struct Node *() { return (struct Node *)(uintptr_t)p; }
|
||||
operator struct Node0 *() {
|
||||
assert(t == Type_Node0);
|
||||
return (struct Node0 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node3 *() {
|
||||
assert(t == Type_Node3);
|
||||
return (struct Node3 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node16 *() {
|
||||
assert(t == Type_Node16);
|
||||
return (struct Node16 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node48 *() {
|
||||
assert(t == Type_Node48);
|
||||
return (struct Node48 *)(uintptr_t)p;
|
||||
}
|
||||
operator struct Node256 *() {
|
||||
assert(t == Type_Node256);
|
||||
return (struct Node256 *)(uintptr_t)p;
|
||||
}
|
||||
/*implicit*/ TaggedNodePointer(std::nullptr_t) : p(0), t(Type_Node0) {}
|
||||
/*implicit*/ TaggedNodePointer(Node0 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node0) {}
|
||||
/*implicit*/ TaggedNodePointer(Node3 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node3) {}
|
||||
/*implicit*/ TaggedNodePointer(Node16 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node16) {}
|
||||
/*implicit*/ TaggedNodePointer(Node48 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node48) {}
|
||||
/*implicit*/ TaggedNodePointer(Node256 *x)
|
||||
: TaggedNodePointer((struct Node *)x, Type_Node256) {}
|
||||
|
||||
bool operator!=(std::nullptr_t) { return p != 0 || t != Type_Node0; }
|
||||
bool operator==(std::nullptr_t) { return p == 0 && t == Type_Node0; }
|
||||
bool operator==(const TaggedNodePointer &) const = default;
|
||||
bool operator==(Node *n) const { return (uintptr_t)n == p; }
|
||||
Node *operator->() { return (Node *)(uintptr_t)p; }
|
||||
Type getType() { return t; }
|
||||
|
||||
TaggedNodePointer(const TaggedNodePointer &) = default;
|
||||
TaggedNodePointer &operator=(const TaggedNodePointer &) = default;
|
||||
/*implicit*/ TaggedNodePointer(Node *n);
|
||||
|
||||
private:
|
||||
TaggedNodePointer(struct Node *p, Type t) : p((uintptr_t)p), t(t) {
|
||||
assume(p != 0);
|
||||
}
|
||||
uintptr_t p : 56;
|
||||
Type t : 8;
|
||||
};
|
||||
|
||||
struct Node {
|
||||
|
||||
/* begin section that's copied to the next node */
|
||||
@@ -228,6 +282,9 @@ private:
|
||||
int32_t partialKeyCapacity;
|
||||
};
|
||||
|
||||
TaggedNodePointer::TaggedNodePointer(Node *n)
|
||||
: TaggedNodePointer(n, n->getType()) {}
|
||||
|
||||
constexpr int kNodeCopyBegin = offsetof(Node, entry);
|
||||
constexpr int kNodeCopySize =
|
||||
offsetof(Node, parentsIndex) + sizeof(Node::parentsIndex) - kNodeCopyBegin;
|
||||
@@ -251,7 +308,7 @@ struct Node3 : Node {
|
||||
constexpr static auto kMaxNodes = 3;
|
||||
constexpr static auto kType = Type_Node3;
|
||||
|
||||
Node *children[kMaxNodes];
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
// Sorted
|
||||
uint8_t index[kMaxNodes];
|
||||
@@ -267,7 +324,7 @@ struct Node16 : Node {
|
||||
constexpr static auto kType = Type_Node16;
|
||||
constexpr static auto kMaxNodes = 16;
|
||||
|
||||
Node *children[kMaxNodes];
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
// Sorted
|
||||
uint8_t index[kMaxNodes];
|
||||
@@ -288,7 +345,7 @@ struct Node48 : Node {
|
||||
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
||||
|
||||
BitSet bitSet;
|
||||
Node *children[kMaxNodes];
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
||||
uint8_t reverseIndex[kMaxNodes];
|
||||
@@ -310,7 +367,7 @@ struct Node256 : Node {
|
||||
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
||||
|
||||
BitSet bitSet;
|
||||
Node *children[kMaxNodes];
|
||||
TaggedNodePointer children[kMaxNodes];
|
||||
InternalVersionT childMaxVersion[kMaxNodes];
|
||||
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
||||
|
||||
@@ -766,6 +823,8 @@ private:
|
||||
|
||||
int getNodeIndex(Node3 *self, uint8_t index) {
|
||||
Node3 *n = (Node3 *)self;
|
||||
assume(n->numChildren >= 1);
|
||||
assume(n->numChildren <= 3);
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] == index) {
|
||||
return i;
|
||||
@@ -774,6 +833,18 @@ int getNodeIndex(Node3 *self, uint8_t index) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int getNodeIndexExists(Node3 *self, uint8_t index) {
|
||||
Node3 *n = (Node3 *)self;
|
||||
assume(n->numChildren >= 1);
|
||||
assume(n->numChildren <= 3);
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] == index) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
|
||||
#ifdef HAS_AVX
|
||||
@@ -834,27 +905,66 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
||||
#endif
|
||||
}
|
||||
|
||||
int getNodeIndexExists(Node16 *self, uint8_t index) {
|
||||
|
||||
#ifdef HAS_AVX
|
||||
__m128i key_vec = _mm_set1_epi8(index);
|
||||
__m128i indices;
|
||||
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||
uint32_t mask = (1 << self->numChildren) - 1;
|
||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||
assume(bitfield != 0);
|
||||
return std::countr_zero(bitfield);
|
||||
#elif defined(HAS_ARM_NEON)
|
||||
// Based on
|
||||
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||
|
||||
uint8x16_t indices;
|
||||
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||
// 0xff for each match
|
||||
uint16x8_t results =
|
||||
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
||||
assume(self->numChildren <= Node16::kMaxNodes);
|
||||
uint64_t mask = self->numChildren == 16
|
||||
? uint64_t(-1)
|
||||
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||
// 0xf for each match in valid range
|
||||
uint64_t bitfield =
|
||||
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
|
||||
assume(bitfield != 0);
|
||||
return std::countr_zero(bitfield) / 4;
|
||||
#else
|
||||
for (int i = 0; i < self->numChildren; ++i) {
|
||||
if (self->index[i] == index) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
#endif
|
||||
}
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node3 *self, uint8_t index) {
|
||||
return self->children[getNodeIndex(self, index)];
|
||||
TaggedNodePointer &getChildExists(Node3 *self, uint8_t index) {
|
||||
return self->children[getNodeIndexExists(self, index)];
|
||||
}
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node16 *self, uint8_t index) {
|
||||
return self->children[getNodeIndex(self, index)];
|
||||
TaggedNodePointer &getChildExists(Node16 *self, uint8_t index) {
|
||||
return self->children[getNodeIndexExists(self, index)];
|
||||
}
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node48 *self, uint8_t index) {
|
||||
TaggedNodePointer &getChildExists(Node48 *self, uint8_t index) {
|
||||
assert(self->bitSet.test(index));
|
||||
return self->children[self->index[index]];
|
||||
}
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node256 *self, uint8_t index) {
|
||||
TaggedNodePointer &getChildExists(Node256 *self, uint8_t index) {
|
||||
assert(self->bitSet.test(index));
|
||||
return self->children[index];
|
||||
}
|
||||
|
||||
// Precondition - an entry for index must exist in the node
|
||||
Node *&getChildExists(Node *self, uint8_t index) {
|
||||
TaggedNodePointer &getChildExists(Node *self, uint8_t index) {
|
||||
switch (self->getType()) {
|
||||
case Type_Node0: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -885,12 +995,12 @@ InternalVersionT maxVersion(Node *n) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
int i = getNodeIndex(n3, index);
|
||||
int i = getNodeIndexExists(n3, index);
|
||||
return n3->childMaxVersion[i];
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
int i = getNodeIndex(n16, index);
|
||||
int i = getNodeIndexExists(n16, index);
|
||||
return n16->childMaxVersion[i];
|
||||
}
|
||||
case Type_Node48: {
|
||||
@@ -918,12 +1028,12 @@ InternalVersionT exchangeMaxVersion(Node *n, InternalVersionT newMax) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
int i = getNodeIndex(n3, index);
|
||||
int i = getNodeIndexExists(n3, index);
|
||||
return std::exchange(n3->childMaxVersion[i], newMax);
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
int i = getNodeIndex(n16, index);
|
||||
int i = getNodeIndexExists(n16, index);
|
||||
return std::exchange(n16->childMaxVersion[i], newMax);
|
||||
}
|
||||
case Type_Node48: {
|
||||
@@ -952,13 +1062,13 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
int i = getNodeIndex(n3, index);
|
||||
int i = getNodeIndexExists(n3, index);
|
||||
n3->childMaxVersion[i] = newMax;
|
||||
return;
|
||||
}
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
int i = getNodeIndex(n16, index);
|
||||
int i = getNodeIndexExists(n16, index);
|
||||
n16->childMaxVersion[i] = newMax;
|
||||
return;
|
||||
}
|
||||
@@ -985,7 +1095,7 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
|
||||
}
|
||||
}
|
||||
|
||||
Node *&getInTree(Node *n, ConflictSet::Impl *);
|
||||
TaggedNodePointer &getInTree(Node *n, ConflictSet::Impl *);
|
||||
|
||||
Node *getChild(Node0 *, uint8_t) { return nullptr; }
|
||||
Node *getChild(Node3 *self, uint8_t index) {
|
||||
@@ -1022,6 +1132,7 @@ Node *getChild(Node *self, uint8_t index) {
|
||||
struct ChildAndMaxVersion {
|
||||
Node *child;
|
||||
InternalVersionT maxVersion;
|
||||
Type childType;
|
||||
};
|
||||
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node0 *, uint8_t) { return {}; }
|
||||
@@ -1030,24 +1141,28 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
|
||||
if (i < 0) {
|
||||
return {};
|
||||
}
|
||||
return {self->children[i], self->childMaxVersion[i]};
|
||||
return {self->children[i], self->childMaxVersion[i],
|
||||
self->children[i].getType()};
|
||||
}
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
||||
int i = getNodeIndex(self, index);
|
||||
if (i < 0) {
|
||||
return {};
|
||||
}
|
||||
return {self->children[i], self->childMaxVersion[i]};
|
||||
return {self->children[i], self->childMaxVersion[i],
|
||||
self->children[i].getType()};
|
||||
}
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
||||
int i = self->index[index];
|
||||
if (i < 0) {
|
||||
return {};
|
||||
}
|
||||
return {self->children[i], self->childMaxVersion[i]};
|
||||
return {self->children[i], self->childMaxVersion[i],
|
||||
self->children[i].getType()};
|
||||
}
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
|
||||
return {self->children[index], self->childMaxVersion[index]};
|
||||
return {self->children[index], self->childMaxVersion[index],
|
||||
self->children[index].getType()};
|
||||
}
|
||||
|
||||
ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
|
||||
@@ -1070,6 +1185,8 @@ ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
|
||||
Node *getChildGeq(Node0 *, int) { return nullptr; }
|
||||
|
||||
Node *getChildGeq(Node3 *n, int child) {
|
||||
assume(n->numChildren >= 1);
|
||||
assume(n->numChildren <= 3);
|
||||
for (int i = 0; i < n->numChildren; ++i) {
|
||||
if (n->index[i] >= child) {
|
||||
return n->children[i];
|
||||
@@ -1190,14 +1307,15 @@ Node *getFirstChildExists(Node *self) {
|
||||
}
|
||||
}
|
||||
|
||||
void consumePartialKeyFull(Node *&self, std::span<const uint8_t> &key,
|
||||
void consumePartialKeyFull(TaggedNodePointer &self,
|
||||
std::span<const uint8_t> &key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
// Handle an existing partial key
|
||||
int commonLen = std::min<int>(self->partialKeyLen, key.size());
|
||||
int partialKeyIndex =
|
||||
longestCommonPrefix(self->partialKey(), key.data(), commonLen);
|
||||
if (partialKeyIndex < self->partialKeyLen) {
|
||||
auto *old = self;
|
||||
Node *old = self;
|
||||
// Since root cannot have a partial key
|
||||
assert(old->parent != nullptr);
|
||||
InternalVersionT oldMaxVersion = exchangeMaxVersion(old, writeVersion);
|
||||
@@ -1235,7 +1353,7 @@ void consumePartialKeyFull(Node *&self, std::span<const uint8_t> &key,
|
||||
// Consume any partial key of `self`, and update `self` and
|
||||
// `key` such that `self` is along the search path of `key`
|
||||
inline __attribute__((always_inline)) void
|
||||
consumePartialKey(Node *&self, std::span<const uint8_t> &key,
|
||||
consumePartialKey(TaggedNodePointer &self, std::span<const uint8_t> &key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
if (self->partialKeyLen > 0) {
|
||||
consumePartialKeyFull(self, key, writeVersion, tls);
|
||||
@@ -1246,8 +1364,10 @@ consumePartialKey(Node *&self, std::span<const uint8_t> &key,
|
||||
// such that the search path of the result + key is the same as the search path
|
||||
// of self + key before the call. Creates a node if necessary. Updates
|
||||
// `maxVersion` for result.
|
||||
Node *&getOrCreateChild(Node *&self, std::span<const uint8_t> &key,
|
||||
InternalVersionT newMaxVersion, WriteContext *tls) {
|
||||
TaggedNodePointer &getOrCreateChild(TaggedNodePointer &self,
|
||||
std::span<const uint8_t> &key,
|
||||
InternalVersionT newMaxVersion,
|
||||
WriteContext *tls) {
|
||||
|
||||
int index = key.front();
|
||||
key = key.subspan(1, key.size() - 1);
|
||||
@@ -1549,7 +1669,6 @@ __attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
||||
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
||||
_mm512_mask_storeu_epi32(vs, m, zvec);
|
||||
}
|
||||
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
@@ -1606,10 +1725,11 @@ void rezero(Node *n, InternalVersionT z) {
|
||||
}
|
||||
#endif
|
||||
|
||||
void mergeWithChild(Node *&self, WriteContext *tls, ConflictSet::Impl *impl,
|
||||
Node *&dontInvalidate, Node3 *self3) {
|
||||
void mergeWithChild(TaggedNodePointer &self, WriteContext *tls,
|
||||
ConflictSet::Impl *impl, Node *&dontInvalidate,
|
||||
Node3 *self3) {
|
||||
assert(!self3->entryPresent);
|
||||
auto *child = self3->children[0];
|
||||
Node *child = self3->children[0];
|
||||
int minCapacity = self3->partialKeyLen + 1 + child->partialKeyLen;
|
||||
|
||||
if (minCapacity > child->getCapacity()) {
|
||||
@@ -1858,7 +1978,7 @@ bool checkPointRead(Node *n, const std::span<const uint8_t> key,
|
||||
goto downLeftSpine;
|
||||
}
|
||||
|
||||
auto [child, maxV] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, maxV, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(n, remaining[0]);
|
||||
if (c != nullptr) {
|
||||
@@ -1928,7 +2048,7 @@ bool checkPrefixRead(Node *n, const std::span<const uint8_t> key,
|
||||
return maxVersion(n) <= readVersion;
|
||||
}
|
||||
|
||||
auto [child, maxV] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, maxV, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(n, remaining[0]);
|
||||
if (c != nullptr) {
|
||||
@@ -2201,7 +2321,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
auto *child = self->children[std::countr_zero(mask)];
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
uint32_t compared = 0;
|
||||
@@ -2276,7 +2396,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
if (!mask) {
|
||||
return true;
|
||||
}
|
||||
auto *child = self->children[std::countr_zero(mask)];
|
||||
Node *child = self->children[std::countr_zero(mask)];
|
||||
const bool firstRangeOk =
|
||||
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||
|
||||
@@ -2321,7 +2441,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
auto *child = self->children[self->index[c]];
|
||||
Node *child = self->children[self->index[c]];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
@@ -2355,7 +2475,7 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
||||
{
|
||||
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||
if (c >= 0 && c < end) {
|
||||
auto *child = self->children[c];
|
||||
Node *child = self->children[c];
|
||||
if (child->entryPresent && child->entry.rangeVersion > readVersion) {
|
||||
return false;
|
||||
}
|
||||
@@ -2416,6 +2536,7 @@ checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||
InternalVersionT readVersion, ReadContext *tls) {
|
||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||
@@ -2526,7 +2647,7 @@ bool checkRangeLeftSide(Node *n, std::span<const uint8_t> key, int prefixLen,
|
||||
}
|
||||
}
|
||||
|
||||
auto [child, maxV] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, maxV, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
auto c = getChildGeq(n, remaining[0]);
|
||||
if (c != nullptr) {
|
||||
@@ -2714,7 +2835,7 @@ bool checkRangeRead(Node *n, std::span<const uint8_t> begin,
|
||||
if (remaining.size() == 0) {
|
||||
break;
|
||||
}
|
||||
auto [child, v] = getChildAndMaxVersion(n, remaining[0]);
|
||||
auto [child, v, childT] = getChildAndMaxVersion(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
break;
|
||||
}
|
||||
@@ -2780,8 +2901,10 @@ checkMaxBetweenExclusiveImpl<true>(Node *n, int begin, int end,
|
||||
// of the result will have `maxVersion` set to `writeVersion` as a
|
||||
// postcondition. Nodes along the search path may be invalidated. Callers must
|
||||
// ensure that the max version of the self argument is updated.
|
||||
[[nodiscard]] Node **insert(Node **self, std::span<const uint8_t> key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
[[nodiscard]] TaggedNodePointer *insert(TaggedNodePointer *self,
|
||||
std::span<const uint8_t> key,
|
||||
InternalVersionT writeVersion,
|
||||
WriteContext *tls) {
|
||||
|
||||
for (; key.size() != 0; ++tls->accum.insert_iterations) {
|
||||
self = &getOrCreateChild(*self, key, writeVersion, tls);
|
||||
@@ -2809,17 +2932,23 @@ void eraseTree(Node *root, WriteContext *tls) {
|
||||
} break;
|
||||
case Type_Node3: {
|
||||
auto *n3 = static_cast<Node3 *>(n);
|
||||
toFree.append(std::span<Node *>(n3->children, n3->numChildren));
|
||||
for (int i = 0; i < n3->numChildren; ++i) {
|
||||
toFree.push_back(n3->children[i]);
|
||||
}
|
||||
tls->release(n3);
|
||||
} break;
|
||||
case Type_Node16: {
|
||||
auto *n16 = static_cast<Node16 *>(n);
|
||||
toFree.append(std::span<Node *>(n16->children, n16->numChildren));
|
||||
for (int i = 0; i < n16->numChildren; ++i) {
|
||||
toFree.push_back(n16->children[i]);
|
||||
}
|
||||
tls->release(n16);
|
||||
} break;
|
||||
case Type_Node48: {
|
||||
auto *n48 = static_cast<Node48 *>(n);
|
||||
toFree.append(std::span<Node *>(n48->children, n48->numChildren));
|
||||
for (int i = 0; i < n48->numChildren; ++i) {
|
||||
toFree.push_back(n48->children[i]);
|
||||
}
|
||||
tls->release(n48);
|
||||
} break;
|
||||
case Type_Node256: {
|
||||
@@ -2835,10 +2964,10 @@ void eraseTree(Node *root, WriteContext *tls) {
|
||||
}
|
||||
}
|
||||
|
||||
void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
||||
void addPointWrite(TaggedNodePointer &root, std::span<const uint8_t> key,
|
||||
InternalVersionT writeVersion, WriteContext *tls) {
|
||||
++tls->accum.point_writes;
|
||||
auto *n = *insert(&root, key, writeVersion, tls);
|
||||
auto n = *insert(&root, key, writeVersion, tls);
|
||||
if (!n->entryPresent) {
|
||||
++tls->accum.entries_inserted;
|
||||
auto *p = nextLogical(n);
|
||||
@@ -2855,6 +2984,72 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) InternalVersionT horizontalMaxUpTo16(
|
||||
InternalVersionT *vs, [[maybe_unused]] InternalVersionT z, int len) {
|
||||
assume(len <= 16);
|
||||
#if USE_64_BIT
|
||||
// Hope it gets vectorized
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < len; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
#else
|
||||
uint32_t zero;
|
||||
memcpy(&zero, &z, sizeof(zero));
|
||||
auto zeroVec = _mm512_set1_epi32(zero);
|
||||
auto max = InternalVersionT(
|
||||
zero +
|
||||
_mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
|
||||
zeroVec)));
|
||||
return max;
|
||||
#endif
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
InternalVersionT
|
||||
horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT, int len) {
|
||||
assume(len <= 16);
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < len; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||
__attribute__((target("avx512f"))) InternalVersionT
|
||||
horizontalMax16(InternalVersionT *vs, [[maybe_unused]] InternalVersionT z) {
|
||||
#if USE_64_BIT
|
||||
// Hope it gets vectorized
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
#else
|
||||
uint32_t zero; // GCOVR_EXCL_LINE
|
||||
memcpy(&zero, &z, sizeof(zero));
|
||||
auto zeroVec = _mm512_set1_epi32(zero);
|
||||
return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||
_mm512_loadu_epi32(vs), zeroVec)));
|
||||
#endif
|
||||
}
|
||||
__attribute__((target("default")))
|
||||
#endif
|
||||
|
||||
InternalVersionT
|
||||
horizontalMax16(InternalVersionT *vs, InternalVersionT) {
|
||||
InternalVersionT max = vs[0];
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
max = std::max(vs[i], max);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
// Precondition: `node->entryPresent`, and node is not the root
|
||||
void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
assert(node->parent);
|
||||
@@ -2866,15 +3061,13 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
break;
|
||||
case Type_Node3: {
|
||||
auto *self3 = static_cast<Node3 *>(node);
|
||||
for (int i = 0; i < self3->numChildren; ++i) {
|
||||
max = std::max(self3->childMaxVersion[i], max);
|
||||
}
|
||||
max = std::max(max, horizontalMaxUpTo16(self3->childMaxVersion, tls->zero,
|
||||
self3->numChildren));
|
||||
} break;
|
||||
case Type_Node16: {
|
||||
auto *self16 = static_cast<Node16 *>(node);
|
||||
for (int i = 0; i < self16->numChildren; ++i) {
|
||||
max = std::max(self16->childMaxVersion[i], max);
|
||||
}
|
||||
max = std::max(max, horizontalMaxUpTo16(self16->childMaxVersion, tls->zero,
|
||||
self16->numChildren));
|
||||
} break;
|
||||
case Type_Node48: {
|
||||
auto *self48 = static_cast<Node48 *>(node);
|
||||
@@ -2884,9 +3077,7 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
} break;
|
||||
case Type_Node256: {
|
||||
auto *self256 = static_cast<Node256 *>(node);
|
||||
for (auto v : self256->maxOfMax) {
|
||||
max = std::max(v, max);
|
||||
}
|
||||
max = std::max(max, horizontalMax16(self256->childMaxVersion, tls->zero));
|
||||
} break;
|
||||
default: // GCOVR_EXCL_LINE
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
@@ -2894,7 +3085,7 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||
setMaxVersion(node, max);
|
||||
}
|
||||
|
||||
void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
void addWriteRange(TaggedNodePointer &root, std::span<const uint8_t> begin,
|
||||
std::span<const uint8_t> end, InternalVersionT writeVersion,
|
||||
WriteContext *tls, ConflictSet::Impl *impl) {
|
||||
|
||||
@@ -2907,12 +3098,12 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
++tls->accum.range_writes;
|
||||
const bool beginIsPrefix = lcp == int(begin.size());
|
||||
|
||||
Node **useAsRoot = insert(&root, begin.subspan(0, lcp), writeVersion, tls);
|
||||
auto useAsRoot = insert(&root, begin.subspan(0, lcp), writeVersion, tls);
|
||||
|
||||
begin = begin.subspan(lcp, begin.size() - lcp);
|
||||
end = end.subspan(lcp, end.size() - lcp);
|
||||
|
||||
auto *beginNode = *insert(useAsRoot, begin, writeVersion, tls);
|
||||
Node *beginNode = *insert(useAsRoot, begin, writeVersion, tls);
|
||||
addKey(beginNode);
|
||||
if (!beginNode->entryPresent) {
|
||||
++tls->accum.entries_inserted;
|
||||
@@ -2923,7 +3114,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
}
|
||||
beginNode->entry.pointVersion = writeVersion;
|
||||
|
||||
auto *endNode = *insert(useAsRoot, end, writeVersion, tls);
|
||||
Node *endNode = *insert(useAsRoot, end, writeVersion, tls);
|
||||
addKey(endNode);
|
||||
if (!endNode->entryPresent) {
|
||||
++tls->accum.entries_inserted;
|
||||
@@ -2943,7 +3134,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
||||
assert(!beginNode->endOfRange);
|
||||
assert(!endNode->endOfRange);
|
||||
endNode->endOfRange = true;
|
||||
auto *iter = beginNode;
|
||||
Node *iter = beginNode;
|
||||
for (iter = nextLogical(iter); !iter->endOfRange;
|
||||
iter = erase(iter, tls, impl, /*logical*/ true)) {
|
||||
assert(!iter->endOfRange);
|
||||
@@ -3258,7 +3449,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
std::span<const uint8_t> removalKey;
|
||||
int64_t keyUpdates;
|
||||
|
||||
Node *root;
|
||||
TaggedNodePointer root;
|
||||
InternalVersionT oldestVersion;
|
||||
int64_t oldestVersionFullPrecision;
|
||||
int64_t oldestExtantVersion;
|
||||
@@ -3339,7 +3530,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
}
|
||||
};
|
||||
|
||||
Node *&getInTree(Node *n, ConflictSet::Impl *impl) {
|
||||
TaggedNodePointer &getInTree(Node *n, ConflictSet::Impl *impl) {
|
||||
return n->parent == nullptr ? impl->root
|
||||
: getChildExists(n->parent, n->parentsIndex);
|
||||
}
|
||||
@@ -3923,7 +4114,6 @@ struct __attribute__((visibility("default"))) PeakPrinter {
|
||||
|
||||
#ifdef ENABLE_MAIN
|
||||
|
||||
#define ANKERL_NANOBENCH_IMPLEMENT
|
||||
#include "third_party/nanobench.h"
|
||||
|
||||
template <int kN> void benchRezero() {
|
||||
@@ -3979,6 +4169,24 @@ template <int kN> void benchScan2() {
|
||||
});
|
||||
}
|
||||
|
||||
void benchHorizontal16() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
InternalVersionT vs[16];
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
vs[i] = InternalVersionT(rand() % 1000 + 1000);
|
||||
}
|
||||
#if !USE_64_BIT
|
||||
InternalVersionT::zero = InternalVersionT(rand() % 1000);
|
||||
#endif
|
||||
bench.run("horizontal16", [&]() {
|
||||
bench.doNotOptimizeAway(horizontalMax16(vs, InternalVersionT::zero));
|
||||
});
|
||||
int x = rand() % 15 + 1;
|
||||
bench.run("horizontalUpTo16", [&]() {
|
||||
bench.doNotOptimizeAway(horizontalMaxUpTo16(vs, InternalVersionT::zero, x));
|
||||
});
|
||||
}
|
||||
|
||||
void benchLCP(int len) {
|
||||
ankerl::nanobench::Bench bench;
|
||||
std::vector<uint8_t> lhs(len);
|
||||
@@ -4011,11 +4219,7 @@ void printTree() {
|
||||
debugPrintDot(stdout, cs.root, &cs);
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
benchLCP(i);
|
||||
}
|
||||
}
|
||||
int main(void) { benchHorizontal16(); }
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_FUZZ
|
||||
|
256
InterleavingTest.cpp
Normal file
256
InterleavingTest.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <alloca.h>
|
||||
#include <cassert>
|
||||
#ifdef __x86_64__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#include "third_party/nanobench.h"
|
||||
|
||||
struct Job {
|
||||
int *input;
|
||||
// Returned void* is a function pointer to the next continuation. We have to
|
||||
// use void* because otherwise the type would be recursive.
|
||||
typedef void *(*continuation)(Job *);
|
||||
continuation next;
|
||||
};
|
||||
|
||||
void *stepJob(Job *j) {
|
||||
auto done = --(*j->input) == 0;
|
||||
#ifdef __x86_64__
|
||||
_mm_clflush(j->input);
|
||||
#endif
|
||||
return done ? nullptr : (void *)stepJob;
|
||||
}
|
||||
|
||||
void sequential(Job **jobs, int count) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
do {
|
||||
jobs[i]->next = (Job::continuation)jobs[i]->next(jobs[i]);
|
||||
} while (jobs[i]->next);
|
||||
}
|
||||
}
|
||||
|
||||
void sequentialNoFuncPtr(Job **jobs, int count) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
while (stepJob(jobs[i]))
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
void interleaveSwapping(Job **jobs, int remaining) {
|
||||
int current = 0;
|
||||
while (remaining > 0) {
|
||||
auto next = (Job::continuation)jobs[current]->next(jobs[current]);
|
||||
jobs[current]->next = next;
|
||||
if (next == nullptr) {
|
||||
jobs[current] = jobs[remaining - 1];
|
||||
--remaining;
|
||||
} else {
|
||||
++current;
|
||||
}
|
||||
if (current == remaining) {
|
||||
current = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void interleaveBoundedCyclicList(Job **jobs, int count) {
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr int kConcurrent = 32;
|
||||
Job *inProgress[kConcurrent];
|
||||
int nextJob[kConcurrent];
|
||||
|
||||
int started = std::min(kConcurrent, count);
|
||||
for (int i = 0; i < started; i++) {
|
||||
inProgress[i] = jobs[i];
|
||||
nextJob[i] = i + 1;
|
||||
}
|
||||
nextJob[started - 1] = 0;
|
||||
|
||||
int prevJob = started - 1;
|
||||
int job = 0;
|
||||
for (;;) {
|
||||
auto next = (Job::continuation)inProgress[job]->next(inProgress[job]);
|
||||
inProgress[job]->next = next;
|
||||
if (next == nullptr) {
|
||||
if (started == count) {
|
||||
if (prevJob == job)
|
||||
break;
|
||||
nextJob[prevJob] = nextJob[job];
|
||||
job = prevJob;
|
||||
} else {
|
||||
int temp = started++;
|
||||
inProgress[job] = jobs[temp];
|
||||
}
|
||||
}
|
||||
prevJob = job;
|
||||
job = nextJob[job];
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0
|
||||
#endif
|
||||
|
||||
#if __has_attribute(musttail)
|
||||
#define MUSTTAIL __attribute__((musttail))
|
||||
#else
|
||||
#define MUSTTAIL
|
||||
#endif
|
||||
|
||||
struct Context {
|
||||
constexpr static int kConcurrent = 32;
|
||||
Job **jobs;
|
||||
Job *inProgress[kConcurrent];
|
||||
void (*continuation[kConcurrent])(Context *, int64_t prevJob, int64_t job,
|
||||
int64_t started, int64_t count);
|
||||
int nextJob[kConcurrent];
|
||||
};
|
||||
|
||||
void keepGoing(Context *context, int64_t prevJob, int64_t job, int64_t started,
|
||||
int64_t count) {
|
||||
prevJob = job;
|
||||
job = context->nextJob[job];
|
||||
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
|
||||
count);
|
||||
}
|
||||
|
||||
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
|
||||
int64_t started, int64_t count);
|
||||
|
||||
void complete(Context *context, int64_t prevJob, int64_t job, int64_t started,
|
||||
int64_t count) {
|
||||
if (started == count) {
|
||||
if (prevJob == job) {
|
||||
return;
|
||||
}
|
||||
context->nextJob[prevJob] = context->nextJob[job];
|
||||
job = prevJob;
|
||||
} else {
|
||||
context->inProgress[job] = context->jobs[started++];
|
||||
context->continuation[job] = stepJobTailCall;
|
||||
}
|
||||
prevJob = job;
|
||||
job = context->nextJob[job];
|
||||
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
|
||||
count);
|
||||
}
|
||||
|
||||
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
|
||||
int64_t started, int64_t count) {
|
||||
auto *j = context->inProgress[job];
|
||||
auto done = --(*j->input) == 0;
|
||||
#ifdef __x86_64__
|
||||
_mm_clflush(j->input);
|
||||
#endif
|
||||
if (done) {
|
||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
||||
} else {
|
||||
context->continuation[job] = stepJobTailCall;
|
||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
||||
}
|
||||
}
|
||||
|
||||
void useTailCalls(Job **jobs, int count) {
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
Context context;
|
||||
context.jobs = jobs;
|
||||
int64_t started = std::min(Context::kConcurrent, count);
|
||||
for (int i = 0; i < started; i++) {
|
||||
context.inProgress[i] = jobs[i];
|
||||
context.nextJob[i] = i + 1;
|
||||
context.continuation[i] = stepJobTailCall;
|
||||
}
|
||||
context.nextJob[started - 1] = 0;
|
||||
int prevJob = started - 1;
|
||||
int job = 0;
|
||||
return context.continuation[job](&context, prevJob, job, started, count);
|
||||
}
|
||||
|
||||
void interleaveCyclicList(Job **jobs, int count) {
|
||||
auto *nextJob = (int *)alloca(sizeof(int) * count);
|
||||
|
||||
for (int i = 0; i < count - 1; ++i) {
|
||||
nextJob[i] = i + 1;
|
||||
}
|
||||
nextJob[count - 1] = 0;
|
||||
|
||||
int prevJob = count - 1;
|
||||
int job = 0;
|
||||
for (;;) {
|
||||
auto next = (Job::continuation)jobs[job]->next(jobs[job]);
|
||||
jobs[job]->next = next;
|
||||
if (next == nullptr) {
|
||||
if (prevJob == job)
|
||||
break;
|
||||
nextJob[prevJob] = nextJob[job];
|
||||
job = prevJob;
|
||||
}
|
||||
prevJob = job;
|
||||
job = nextJob[job];
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
|
||||
constexpr int kNumJobs = 10000;
|
||||
bench.relative(true);
|
||||
|
||||
Job jobs[kNumJobs];
|
||||
Job jobsCopy[kNumJobs];
|
||||
int iters = 0;
|
||||
int originalInput[kNumJobs];
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
originalInput[i] = rand() % 5 + 3;
|
||||
jobs[i].input = new int{originalInput[i]};
|
||||
jobs[i].next = stepJob;
|
||||
iters += *jobs[i].input;
|
||||
}
|
||||
bench.batch(iters);
|
||||
|
||||
for (auto [scheduler, name] :
|
||||
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
|
||||
std::make_pair(sequential, "sequential"),
|
||||
std::make_pair(useTailCalls, "useTailCalls"),
|
||||
std::make_pair(interleaveSwapping, "interleavingSwapping"),
|
||||
std::make_pair(interleaveBoundedCyclicList,
|
||||
"interleaveBoundedCyclicList"),
|
||||
std::make_pair(interleaveCyclicList, "interleaveCyclicList")}) {
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
*jobs[i].input = originalInput[i];
|
||||
}
|
||||
memcpy(jobsCopy, jobs, sizeof(jobs));
|
||||
Job *ps[kNumJobs];
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
ps[i] = jobsCopy + i;
|
||||
}
|
||||
scheduler(ps, kNumJobs);
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
if (*jobsCopy[i].input != 0) {
|
||||
fprintf(stderr, "%s failed\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
bench.run(name, [&]() {
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
*jobs[i].input = originalInput[i];
|
||||
}
|
||||
memcpy(jobsCopy, jobs, sizeof(jobs));
|
||||
Job *ps[kNumJobs];
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
ps[i] = jobsCopy + i;
|
||||
}
|
||||
scheduler(ps, kNumJobs);
|
||||
});
|
||||
}
|
||||
for (int i = 0; i < kNumJobs; ++i) {
|
||||
delete jobs[i].input;
|
||||
}
|
||||
}
|
BIN
corpus/1863492b4bfa4e57a2dd04457f45dd1adbc1b43b
Normal file
BIN
corpus/1863492b4bfa4e57a2dd04457f45dd1adbc1b43b
Normal file
Binary file not shown.
BIN
corpus/1b73191605897b8fb531d18ce6309d5bd5118268
Normal file
BIN
corpus/1b73191605897b8fb531d18ce6309d5bd5118268
Normal file
Binary file not shown.
BIN
corpus/1d73c395d1885ce584ddc66ebea3137a89222307
Normal file
BIN
corpus/1d73c395d1885ce584ddc66ebea3137a89222307
Normal file
Binary file not shown.
BIN
corpus/290bc08207a6de4b9adceb42003d4aeb5758bfdd
Normal file
BIN
corpus/290bc08207a6de4b9adceb42003d4aeb5758bfdd
Normal file
Binary file not shown.
BIN
corpus/33cfb152724161924444fe07e0cdb07b8b8462fc
Normal file
BIN
corpus/33cfb152724161924444fe07e0cdb07b8b8462fc
Normal file
Binary file not shown.
BIN
corpus/3dd78f16a8656e4839afb2eb4cdc7a028f4d28b8
Normal file
BIN
corpus/3dd78f16a8656e4839afb2eb4cdc7a028f4d28b8
Normal file
Binary file not shown.
BIN
corpus/43950334272a0d818f646b118439717c19a99837
Normal file
BIN
corpus/43950334272a0d818f646b118439717c19a99837
Normal file
Binary file not shown.
BIN
corpus/4920f8c7afdef94d228a5683c8d31cebf40c27a8
Normal file
BIN
corpus/4920f8c7afdef94d228a5683c8d31cebf40c27a8
Normal file
Binary file not shown.
BIN
corpus/7e8f30ffd0b8f665d87159020d0c977bca72a972
Normal file
BIN
corpus/7e8f30ffd0b8f665d87159020d0c977bca72a972
Normal file
Binary file not shown.
BIN
corpus/904e5d8ff4df7175e5d97536d687bbfa2a8b6c50
Normal file
BIN
corpus/904e5d8ff4df7175e5d97536d687bbfa2a8b6c50
Normal file
Binary file not shown.
BIN
corpus/9097c0a5f4a1a725413d8ac7b5a42c1c17b6eb31
Normal file
BIN
corpus/9097c0a5f4a1a725413d8ac7b5a42c1c17b6eb31
Normal file
Binary file not shown.
BIN
corpus/91bf7637038424fdff9a6fea9afc9166399284f7
Normal file
BIN
corpus/91bf7637038424fdff9a6fea9afc9166399284f7
Normal file
Binary file not shown.
BIN
corpus/98060955d1d4b9ab23969fc8715d64d90b41b498
Normal file
BIN
corpus/98060955d1d4b9ab23969fc8715d64d90b41b498
Normal file
Binary file not shown.
BIN
corpus/a21decffaafb3e49c2f4984833f6cfe1d9d1d3ca
Normal file
BIN
corpus/a21decffaafb3e49c2f4984833f6cfe1d9d1d3ca
Normal file
Binary file not shown.
BIN
corpus/b38d17b3b3f4689098f13451b448688e3e2e0d4d
Normal file
BIN
corpus/b38d17b3b3f4689098f13451b448688e3e2e0d4d
Normal file
Binary file not shown.
BIN
corpus/bef22e17c90d2b121dbd1f48ba9e09b3684f54e1
Normal file
BIN
corpus/bef22e17c90d2b121dbd1f48ba9e09b3684f54e1
Normal file
Binary file not shown.
BIN
corpus/c90e64707700c71cd8848ace4f34825766255457
Normal file
BIN
corpus/c90e64707700c71cd8848ace4f34825766255457
Normal file
Binary file not shown.
BIN
corpus/cc104b98c539e338887b8d9d3aac60561e7fbfff
Normal file
BIN
corpus/cc104b98c539e338887b8d9d3aac60561e7fbfff
Normal file
Binary file not shown.
BIN
corpus/cc555c8529d6c31423947a7a4dcfdddfd713e5e7
Normal file
BIN
corpus/cc555c8529d6c31423947a7a4dcfdddfd713e5e7
Normal file
Binary file not shown.
BIN
corpus/d46a73ca220fd8462d7e07ddfd008721b682e9b1
Normal file
BIN
corpus/d46a73ca220fd8462d7e07ddfd008721b682e9b1
Normal file
Binary file not shown.
BIN
corpus/e2aa45c8babd79c5ae6ec0b53757f1a23d8f1c62
Normal file
BIN
corpus/e2aa45c8babd79c5ae6ec0b53757f1a23d8f1c62
Normal file
Binary file not shown.
2
nanobench.cpp
Normal file
2
nanobench.cpp
Normal file
@@ -0,0 +1,2 @@
|
||||
#define ANKERL_NANOBENCH_IMPLEMENT
|
||||
#include "third_party/nanobench.h"
|
Reference in New Issue
Block a user