Compare commits
11 Commits
interleave
...
d8f85dedc4
Author | SHA1 | Date | |
---|---|---|---|
d8f85dedc4 | |||
656939560b | |||
5580f9b71d | |||
628d16b7e6 | |||
d9e4a7d1b6 | |||
52201fa4c7 | |||
0814822d82 | |||
41df2398e8 | |||
84c4d0fcba | |||
6241533dfb | |||
0abf6a1ecf |
@@ -24,14 +24,6 @@ repos:
|
|||||||
entry: "^#define SHOW_MEMORY 1$"
|
entry: "^#define SHOW_MEMORY 1$"
|
||||||
language: pygrep
|
language: pygrep
|
||||||
types: [c++]
|
types: [c++]
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: sim cache misses check
|
|
||||||
name: disallow checking in SIM_CACHE_MISSES=1
|
|
||||||
description: disallow checking in SIM_CACHE_MISSES=1
|
|
||||||
entry: "^#define SIM_CACHE_MISSES 1$"
|
|
||||||
language: pygrep
|
|
||||||
types: [c++]
|
|
||||||
- repo: https://github.com/shellcheck-py/shellcheck-py
|
- repo: https://github.com/shellcheck-py/shellcheck-py
|
||||||
rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1
|
rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1
|
||||||
hooks:
|
hooks:
|
||||||
|
@@ -248,6 +248,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
|||||||
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
|
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
|
find_program(VALGRIND_EXE valgrind)
|
||||||
|
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||||
|
list(LENGTH CORPUS_TESTS len)
|
||||||
|
math(EXPR last "${len} - 1")
|
||||||
|
set(partition_size 100)
|
||||||
|
foreach(i RANGE 0 ${last} ${partition_size})
|
||||||
|
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
||||||
|
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
||||||
|
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
||||||
|
$<TARGET_FILE:driver> ${partition})
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
# scripted tests. Written manually to fill in anything libfuzzer couldn't
|
# scripted tests. Written manually to fill in anything libfuzzer couldn't
|
||||||
# find.
|
# find.
|
||||||
if(NOT CMAKE_CROSSCOMPILING)
|
if(NOT CMAKE_CROSSCOMPILING)
|
||||||
@@ -268,19 +281,14 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
|||||||
${Python3_EXECUTABLE}
|
${Python3_EXECUTABLE}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
||||||
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
endforeach()
|
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||||
endif()
|
add_test(
|
||||||
|
NAME script_test_${TEST}_valgrind
|
||||||
find_program(VALGRIND_EXE valgrind)
|
COMMAND
|
||||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
${VALGRIND_EXE} ${Python3_EXECUTABLE}
|
||||||
list(LENGTH CORPUS_TESTS len)
|
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
|
||||||
math(EXPR last "${len} - 1")
|
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
set(partition_size 100)
|
endif()
|
||||||
foreach(i RANGE 0 ${last} ${partition_size})
|
|
||||||
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
|
||||||
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
|
||||||
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
|
||||||
$<TARGET_FILE:driver> ${partition})
|
|
||||||
endforeach()
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@@ -350,11 +358,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
|||||||
set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
|
||||||
|
|
||||||
add_executable(interleaving_test InterleavingTest.cpp)
|
add_executable(interleaving_test InterleavingTest.cpp)
|
||||||
# work around lack of musttail for gcc
|
|
||||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_BUILD_TYPE STREQUAL "Debug")
|
|
||||||
target_compile_options(interleaving_test PRIVATE -Og
|
|
||||||
-foptimize-sibling-calls)
|
|
||||||
endif()
|
|
||||||
target_link_libraries(interleaving_test PRIVATE nanobench)
|
target_link_libraries(interleaving_test PRIVATE nanobench)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
505
ConflictSet.cpp
505
ConflictSet.cpp
@@ -48,17 +48,6 @@ limitations under the License.
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SIM_CACHE_MISSES 0
|
|
||||||
#if SIM_CACHE_MISSES
|
|
||||||
constexpr void simCacheMiss(void *x) {
|
|
||||||
if (x) {
|
|
||||||
_mm_clflush(x);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
constexpr void simCacheMiss(void *) {}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <memcheck.h>
|
#include <memcheck.h>
|
||||||
|
|
||||||
using namespace weaselab;
|
using namespace weaselab;
|
||||||
@@ -777,6 +766,8 @@ private:
|
|||||||
|
|
||||||
int getNodeIndex(Node3 *self, uint8_t index) {
|
int getNodeIndex(Node3 *self, uint8_t index) {
|
||||||
Node3 *n = (Node3 *)self;
|
Node3 *n = (Node3 *)self;
|
||||||
|
assume(n->numChildren >= 1);
|
||||||
|
assume(n->numChildren <= 3);
|
||||||
for (int i = 0; i < n->numChildren; ++i) {
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
if (n->index[i] == index) {
|
if (n->index[i] == index) {
|
||||||
return i;
|
return i;
|
||||||
@@ -785,6 +776,18 @@ int getNodeIndex(Node3 *self, uint8_t index) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getNodeIndexExists(Node3 *self, uint8_t index) {
|
||||||
|
Node3 *n = (Node3 *)self;
|
||||||
|
assume(n->numChildren >= 1);
|
||||||
|
assume(n->numChildren <= 3);
|
||||||
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
|
if (n->index[i] == index) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
|
}
|
||||||
|
|
||||||
int getNodeIndex(Node16 *self, uint8_t index) {
|
int getNodeIndex(Node16 *self, uint8_t index) {
|
||||||
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
@@ -845,31 +848,62 @@ int getNodeIndex(Node16 *self, uint8_t index) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getNodeIndexExists(Node16 *self, uint8_t index) {
|
||||||
|
|
||||||
|
#ifdef HAS_AVX
|
||||||
|
__m128i key_vec = _mm_set1_epi8(index);
|
||||||
|
__m128i indices;
|
||||||
|
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||||
|
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||||
|
uint32_t mask = (1 << self->numChildren) - 1;
|
||||||
|
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
|
assume(bitfield != 0);
|
||||||
|
return std::countr_zero(bitfield);
|
||||||
|
#elif defined(HAS_ARM_NEON)
|
||||||
|
// Based on
|
||||||
|
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||||
|
|
||||||
|
uint8x16_t indices;
|
||||||
|
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||||
|
// 0xff for each match
|
||||||
|
uint16x8_t results =
|
||||||
|
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
||||||
|
assume(self->numChildren <= Node16::kMaxNodes);
|
||||||
|
uint64_t mask = self->numChildren == 16
|
||||||
|
? uint64_t(-1)
|
||||||
|
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||||
|
// 0xf for each match in valid range
|
||||||
|
uint64_t bitfield =
|
||||||
|
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
|
||||||
|
assume(bitfield != 0);
|
||||||
|
return std::countr_zero(bitfield) / 4;
|
||||||
|
#else
|
||||||
|
for (int i = 0; i < self->numChildren; ++i) {
|
||||||
|
if (self->index[i] == index) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node3 *self, uint8_t index) {
|
Node *&getChildExists(Node3 *self, uint8_t index) {
|
||||||
auto &result = self->children[getNodeIndex(self, index)];
|
return self->children[getNodeIndexExists(self, index)];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node16 *self, uint8_t index) {
|
Node *&getChildExists(Node16 *self, uint8_t index) {
|
||||||
auto &result = self->children[getNodeIndex(self, index)];
|
return self->children[getNodeIndexExists(self, index)];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node48 *self, uint8_t index) {
|
Node *&getChildExists(Node48 *self, uint8_t index) {
|
||||||
assert(self->bitSet.test(index));
|
assert(self->bitSet.test(index));
|
||||||
auto &result = self->children[self->index[index]];
|
return self->children[self->index[index]];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
Node *&getChildExists(Node256 *self, uint8_t index) {
|
Node *&getChildExists(Node256 *self, uint8_t index) {
|
||||||
assert(self->bitSet.test(index));
|
assert(self->bitSet.test(index));
|
||||||
auto &result = self->children[index];
|
return self->children[index];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Precondition - an entry for index must exist in the node
|
// Precondition - an entry for index must exist in the node
|
||||||
@@ -904,12 +938,12 @@ InternalVersionT maxVersion(Node *n) {
|
|||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type_Node3: {
|
case Type_Node3: {
|
||||||
auto *n3 = static_cast<Node3 *>(n);
|
auto *n3 = static_cast<Node3 *>(n);
|
||||||
int i = getNodeIndex(n3, index);
|
int i = getNodeIndexExists(n3, index);
|
||||||
return n3->childMaxVersion[i];
|
return n3->childMaxVersion[i];
|
||||||
}
|
}
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *n16 = static_cast<Node16 *>(n);
|
auto *n16 = static_cast<Node16 *>(n);
|
||||||
int i = getNodeIndex(n16, index);
|
int i = getNodeIndexExists(n16, index);
|
||||||
return n16->childMaxVersion[i];
|
return n16->childMaxVersion[i];
|
||||||
}
|
}
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
@@ -937,12 +971,12 @@ InternalVersionT exchangeMaxVersion(Node *n, InternalVersionT newMax) {
|
|||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type_Node3: {
|
case Type_Node3: {
|
||||||
auto *n3 = static_cast<Node3 *>(n);
|
auto *n3 = static_cast<Node3 *>(n);
|
||||||
int i = getNodeIndex(n3, index);
|
int i = getNodeIndexExists(n3, index);
|
||||||
return std::exchange(n3->childMaxVersion[i], newMax);
|
return std::exchange(n3->childMaxVersion[i], newMax);
|
||||||
}
|
}
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *n16 = static_cast<Node16 *>(n);
|
auto *n16 = static_cast<Node16 *>(n);
|
||||||
int i = getNodeIndex(n16, index);
|
int i = getNodeIndexExists(n16, index);
|
||||||
return std::exchange(n16->childMaxVersion[i], newMax);
|
return std::exchange(n16->childMaxVersion[i], newMax);
|
||||||
}
|
}
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
@@ -971,13 +1005,13 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
|
|||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
case Type_Node3: {
|
case Type_Node3: {
|
||||||
auto *n3 = static_cast<Node3 *>(n);
|
auto *n3 = static_cast<Node3 *>(n);
|
||||||
int i = getNodeIndex(n3, index);
|
int i = getNodeIndexExists(n3, index);
|
||||||
n3->childMaxVersion[i] = newMax;
|
n3->childMaxVersion[i] = newMax;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *n16 = static_cast<Node16 *>(n);
|
auto *n16 = static_cast<Node16 *>(n);
|
||||||
int i = getNodeIndex(n16, index);
|
int i = getNodeIndexExists(n16, index);
|
||||||
n16->childMaxVersion[i] = newMax;
|
n16->childMaxVersion[i] = newMax;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1049,7 +1083,6 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
|
|||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
simCacheMiss(self->children[i]);
|
|
||||||
return {self->children[i], self->childMaxVersion[i]};
|
return {self->children[i], self->childMaxVersion[i]};
|
||||||
}
|
}
|
||||||
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
||||||
@@ -1057,7 +1090,6 @@ ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
|
|||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
simCacheMiss(self->children[i]);
|
|
||||||
return {self->children[i], self->childMaxVersion[i]};
|
return {self->children[i], self->childMaxVersion[i]};
|
||||||
}
|
}
|
||||||
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
||||||
@@ -1065,11 +1097,9 @@ ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
|
|||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
simCacheMiss(self->children[i]);
|
|
||||||
return {self->children[i], self->childMaxVersion[i]};
|
return {self->children[i], self->childMaxVersion[i]};
|
||||||
}
|
}
|
||||||
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
|
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
|
||||||
simCacheMiss(self->children[index]);
|
|
||||||
return {self->children[index], self->childMaxVersion[index]};
|
return {self->children[index], self->childMaxVersion[index]};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1093,9 +1123,10 @@ ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
|
|||||||
Node *getChildGeq(Node0 *, int) { return nullptr; }
|
Node *getChildGeq(Node0 *, int) { return nullptr; }
|
||||||
|
|
||||||
Node *getChildGeq(Node3 *n, int child) {
|
Node *getChildGeq(Node3 *n, int child) {
|
||||||
|
assume(n->numChildren >= 1);
|
||||||
|
assume(n->numChildren <= 3);
|
||||||
for (int i = 0; i < n->numChildren; ++i) {
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
if (n->index[i] >= child) {
|
if (n->index[i] >= child) {
|
||||||
simCacheMiss(n->children[i]);
|
|
||||||
return n->children[i];
|
return n->children[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1114,10 +1145,7 @@ Node *getChildGeq(Node16 *self, int child) {
|
|||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
int mask = (1 << self->numChildren) - 1;
|
int mask = (1 << self->numChildren) - 1;
|
||||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
auto *result =
|
return bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
|
||||||
bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
|
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
#elif defined(HAS_ARM_NEON)
|
#elif defined(HAS_ARM_NEON)
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self->index, sizeof(self->index));
|
memcpy(&indices, self->index, sizeof(self->index));
|
||||||
@@ -1153,16 +1181,13 @@ Node *getChildGeq(Node48 *self, int child) {
|
|||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
auto *result = self->children[self->index[c]];
|
return self->children[self->index[c]];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
Node *getChildGeq(Node256 *self, int child) {
|
Node *getChildGeq(Node256 *self, int child) {
|
||||||
int c = self->bitSet.firstSetGeq(child);
|
int c = self->bitSet.firstSetGeq(child);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
simCacheMiss(self->children[c]);
|
|
||||||
return self->children[c];
|
return self->children[c];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1186,26 +1211,20 @@ Node *getChildGeq(Node *self, int child) {
|
|||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node3 *self) {
|
Node *getFirstChildExists(Node3 *self) {
|
||||||
assert(self->numChildren > 0);
|
assert(self->numChildren > 0);
|
||||||
simCacheMiss(self->children[0]);
|
|
||||||
return self->children[0];
|
return self->children[0];
|
||||||
}
|
}
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node16 *self) {
|
Node *getFirstChildExists(Node16 *self) {
|
||||||
assert(self->numChildren > 0);
|
assert(self->numChildren > 0);
|
||||||
simCacheMiss(self->children[0]);
|
|
||||||
return self->children[0];
|
return self->children[0];
|
||||||
}
|
}
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node48 *self) {
|
Node *getFirstChildExists(Node48 *self) {
|
||||||
auto *result = self->children[self->index[self->bitSet.firstSetGeq(0)]];
|
return self->children[self->index[self->bitSet.firstSetGeq(0)]];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
Node *getFirstChildExists(Node256 *self) {
|
Node *getFirstChildExists(Node256 *self) {
|
||||||
auto *result = self->children[self->bitSet.firstSetGeq(0)];
|
return self->children[self->bitSet.firstSetGeq(0)];
|
||||||
simCacheMiss(result);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Precondition: self has a child
|
// Precondition: self has a child
|
||||||
@@ -1585,7 +1604,6 @@ __attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
|||||||
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
||||||
_mm512_mask_storeu_epi32(vs, m, zvec);
|
_mm512_mask_storeu_epi32(vs, m, zvec);
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((target("default")))
|
__attribute__((target("default")))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -2452,6 +2470,7 @@ checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
}
|
}
|
||||||
__attribute__((target("default")))
|
__attribute__((target("default")))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||||
InternalVersionT readVersion, ReadContext *tls) {
|
InternalVersionT readVersion, ReadContext *tls) {
|
||||||
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
|
||||||
@@ -2891,6 +2910,72 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||||
|
__attribute__((target("avx512f"))) InternalVersionT horizontalMaxUpTo16(
|
||||||
|
InternalVersionT *vs, [[maybe_unused]] InternalVersionT z, int len) {
|
||||||
|
assume(len <= 16);
|
||||||
|
#if USE_64_BIT
|
||||||
|
// Hope it gets vectorized
|
||||||
|
InternalVersionT max = vs[0];
|
||||||
|
for (int i = 1; i < len; ++i) {
|
||||||
|
max = std::max(vs[i], max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
#else
|
||||||
|
uint32_t zero;
|
||||||
|
memcpy(&zero, &z, sizeof(zero));
|
||||||
|
auto zeroVec = _mm512_set1_epi32(zero);
|
||||||
|
auto max = InternalVersionT(
|
||||||
|
zero +
|
||||||
|
_mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||||
|
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
|
||||||
|
zeroVec)));
|
||||||
|
return max;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__attribute__((target("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
InternalVersionT
|
||||||
|
horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT, int len) {
|
||||||
|
assume(len <= 16);
|
||||||
|
InternalVersionT max = vs[0];
|
||||||
|
for (int i = 1; i < len; ++i) {
|
||||||
|
max = std::max(vs[i], max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||||
|
__attribute__((target("avx512f"))) InternalVersionT
|
||||||
|
horizontalMax16(InternalVersionT *vs, [[maybe_unused]] InternalVersionT z) {
|
||||||
|
#if USE_64_BIT
|
||||||
|
// Hope it gets vectorized
|
||||||
|
InternalVersionT max = vs[0];
|
||||||
|
for (int i = 1; i < 16; ++i) {
|
||||||
|
max = std::max(vs[i], max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
#else
|
||||||
|
uint32_t zero; // GCOVR_EXCL_LINE
|
||||||
|
memcpy(&zero, &z, sizeof(zero));
|
||||||
|
auto zeroVec = _mm512_set1_epi32(zero);
|
||||||
|
return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32(
|
||||||
|
_mm512_loadu_epi32(vs), zeroVec)));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
__attribute__((target("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
InternalVersionT
|
||||||
|
horizontalMax16(InternalVersionT *vs, InternalVersionT) {
|
||||||
|
InternalVersionT max = vs[0];
|
||||||
|
for (int i = 1; i < 16; ++i) {
|
||||||
|
max = std::max(vs[i], max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
// Precondition: `node->entryPresent`, and node is not the root
|
// Precondition: `node->entryPresent`, and node is not the root
|
||||||
void fixupMaxVersion(Node *node, WriteContext *tls) {
|
void fixupMaxVersion(Node *node, WriteContext *tls) {
|
||||||
assert(node->parent);
|
assert(node->parent);
|
||||||
@@ -2902,15 +2987,13 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
|||||||
break;
|
break;
|
||||||
case Type_Node3: {
|
case Type_Node3: {
|
||||||
auto *self3 = static_cast<Node3 *>(node);
|
auto *self3 = static_cast<Node3 *>(node);
|
||||||
for (int i = 0; i < self3->numChildren; ++i) {
|
max = std::max(max, horizontalMaxUpTo16(self3->childMaxVersion, tls->zero,
|
||||||
max = std::max(self3->childMaxVersion[i], max);
|
self3->numChildren));
|
||||||
}
|
|
||||||
} break;
|
} break;
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *self16 = static_cast<Node16 *>(node);
|
auto *self16 = static_cast<Node16 *>(node);
|
||||||
for (int i = 0; i < self16->numChildren; ++i) {
|
max = std::max(max, horizontalMaxUpTo16(self16->childMaxVersion, tls->zero,
|
||||||
max = std::max(self16->childMaxVersion[i], max);
|
self16->numChildren));
|
||||||
}
|
|
||||||
} break;
|
} break;
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
auto *self48 = static_cast<Node48 *>(node);
|
auto *self48 = static_cast<Node48 *>(node);
|
||||||
@@ -2920,9 +3003,7 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
|
|||||||
} break;
|
} break;
|
||||||
case Type_Node256: {
|
case Type_Node256: {
|
||||||
auto *self256 = static_cast<Node256 *>(node);
|
auto *self256 = static_cast<Node256 *>(node);
|
||||||
for (auto v : self256->maxOfMax) {
|
max = std::max(max, horizontalMax16(self256->childMaxVersion, tls->zero));
|
||||||
max = std::max(v, max);
|
|
||||||
}
|
|
||||||
} break;
|
} break;
|
||||||
default: // GCOVR_EXCL_LINE
|
default: // GCOVR_EXCL_LINE
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||||
@@ -3045,288 +3126,34 @@ Node *firstGeqPhysical(Node *n, const std::span<const uint8_t> key) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __has_attribute
|
|
||||||
#define __has_attribute(x) 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __has_attribute(musttail)
|
|
||||||
#define MUSTTAIL __attribute__((musttail))
|
|
||||||
#else
|
|
||||||
#define MUSTTAIL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __has_attribute(preserve_none)
|
|
||||||
#define CONTINUATION_CALLING_CONVENTION __attribute__((preserve_none))
|
|
||||||
#else
|
|
||||||
#define CONTINUATION_CALLING_CONVENTION
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef CONTINUATION_CALLING_CONVENTION void (*continuation)(struct CheckAll *,
|
|
||||||
int64_t prevJob,
|
|
||||||
int64_t job,
|
|
||||||
int64_t started,
|
|
||||||
int64_t count);
|
|
||||||
|
|
||||||
// State relevant to a particular query
|
|
||||||
struct CheckJob {
|
|
||||||
void setResult(bool ok) {
|
|
||||||
*result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] continuation init(const ConflictSet::ReadRange *read,
|
|
||||||
ConflictSet::Result *result, Node *root,
|
|
||||||
int64_t oldestVersionFullPrecision,
|
|
||||||
ReadContext *tls);
|
|
||||||
|
|
||||||
Node *n;
|
|
||||||
ChildAndMaxVersion childAndVersion;
|
|
||||||
std::span<const uint8_t> begin;
|
|
||||||
InternalVersionT readVersion;
|
|
||||||
ConflictSet::Result *result;
|
|
||||||
};
|
|
||||||
|
|
||||||
// State relevant to all queries
|
|
||||||
struct CheckAll {
|
|
||||||
constexpr static int kConcurrent = 32;
|
|
||||||
CheckJob inProgress[kConcurrent];
|
|
||||||
continuation next[kConcurrent];
|
|
||||||
int nextJob[kConcurrent];
|
|
||||||
Node *root;
|
|
||||||
int64_t oldestVersionFullPrecision;
|
|
||||||
ReadContext *tls;
|
|
||||||
const ConflictSet::ReadRange *queries;
|
|
||||||
ConflictSet::Result *results;
|
|
||||||
};
|
|
||||||
|
|
||||||
CONTINUATION_CALLING_CONVENTION void keepGoing(CheckAll *context,
|
|
||||||
int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count) {
|
|
||||||
prevJob = job;
|
|
||||||
job = context->nextJob[job];
|
|
||||||
MUSTTAIL return context->next[job](context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
CONTINUATION_CALLING_CONVENTION void complete(CheckAll *context,
|
|
||||||
int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count) {
|
|
||||||
if (started == count) {
|
|
||||||
if (prevJob == job) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
context->nextJob[prevJob] = context->nextJob[job];
|
|
||||||
job = prevJob;
|
|
||||||
} else {
|
|
||||||
int temp = started++;
|
|
||||||
context->next[job] = context->inProgress[job].init(
|
|
||||||
context->queries + temp, context->results + temp, context->root,
|
|
||||||
context->oldestVersionFullPrecision, context->tls);
|
|
||||||
}
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace check_point_read_state_machine {
|
|
||||||
|
|
||||||
CONTINUATION_CALLING_CONVENTION void
|
|
||||||
down_left_spine(struct CheckAll *, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count);
|
|
||||||
CONTINUATION_CALLING_CONVENTION void iter(struct CheckAll *, int64_t prevJob,
|
|
||||||
int64_t job, int64_t started,
|
|
||||||
int64_t count);
|
|
||||||
CONTINUATION_CALLING_CONVENTION void begin(struct CheckAll *, int64_t prevJob,
|
|
||||||
int64_t job, int64_t started,
|
|
||||||
int64_t count);
|
|
||||||
|
|
||||||
void begin(struct CheckAll *context, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count) {
|
|
||||||
++context->tls->point_read_accum;
|
|
||||||
#if DEBUG_VERBOSE && !defined(NDEBUG)
|
|
||||||
fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
|
|
||||||
#endif
|
|
||||||
auto *j = context->inProgress + job;
|
|
||||||
|
|
||||||
if (j->begin.size() == 0) {
|
|
||||||
if (j->n->entryPresent) {
|
|
||||||
j->setResult(j->n->entry.pointVersion <= j->readVersion);
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
j->n = getFirstChildExists(j->n);
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
__builtin_prefetch(j->n);
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
|
|
||||||
context->next[job] = iter;
|
|
||||||
__builtin_prefetch(j->childAndVersion.child);
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void iter(struct CheckAll *context, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count) {
|
|
||||||
auto *j = context->inProgress + job;
|
|
||||||
if (j->childAndVersion.child == nullptr) {
|
|
||||||
auto c = getChildGeq(j->n, j->begin[0]);
|
|
||||||
if (c != nullptr) {
|
|
||||||
j->n = c;
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
__builtin_prefetch(j->n);
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
} else {
|
|
||||||
j->n = nextSibling(j->n);
|
|
||||||
if (j->n == nullptr) {
|
|
||||||
j->setResult(true);
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
__builtin_prefetch(j->n);
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
j->n = j->childAndVersion.child;
|
|
||||||
j->begin = j->begin.subspan(1, j->begin.size() - 1);
|
|
||||||
|
|
||||||
if (j->n->partialKeyLen > 0) {
|
|
||||||
int commonLen = std::min<int>(j->n->partialKeyLen, j->begin.size());
|
|
||||||
int i = longestCommonPrefix(j->n->partialKey(), j->begin.data(), commonLen);
|
|
||||||
if (i < commonLen) {
|
|
||||||
auto c = j->n->partialKey()[i] <=> j->begin[i];
|
|
||||||
if (c > 0) {
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
|
|
||||||
} else {
|
|
||||||
j->n = nextSibling(j->n);
|
|
||||||
if (j->n == nullptr) {
|
|
||||||
j->setResult(true);
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
__builtin_prefetch(j->n);
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (commonLen == j->n->partialKeyLen) {
|
|
||||||
// partial key matches
|
|
||||||
j->begin = j->begin.subspan(commonLen, j->begin.size() - commonLen);
|
|
||||||
} else if (j->n->partialKeyLen > int(j->begin.size())) {
|
|
||||||
// n is the first physical node greater than remaining, and there's no
|
|
||||||
// eq node
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (j->childAndVersion.maxVersion <= j->readVersion) {
|
|
||||||
++context->tls->point_read_short_circuit_accum;
|
|
||||||
j->setResult(true);
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
++context->tls->point_read_iterations_accum;
|
|
||||||
|
|
||||||
if (j->begin.size() == 0) {
|
|
||||||
if (j->n->entryPresent) {
|
|
||||||
j->setResult(j->n->entry.pointVersion <= j->readVersion);
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
j->n = getFirstChildExists(j->n);
|
|
||||||
context->next[job] = down_left_spine;
|
|
||||||
__builtin_prefetch(j->n);
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
|
|
||||||
__builtin_prefetch(j->childAndVersion.child);
|
|
||||||
// j->next is already iter
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void down_left_spine(struct CheckAll *context, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count) {
|
|
||||||
auto *j = context->inProgress + job;
|
|
||||||
if (j->n->entryPresent) {
|
|
||||||
j->setResult(j->n->entry.rangeVersion <= j->readVersion);
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
j->n = getFirstChildExists(j->n);
|
|
||||||
__builtin_prefetch(j->n);
|
|
||||||
// j->next is already down_left_spine
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace check_point_read_state_machine
|
|
||||||
|
|
||||||
continuation CheckJob::init(const ConflictSet::ReadRange *read,
|
|
||||||
ConflictSet::Result *result, Node *root,
|
|
||||||
int64_t oldestVersionFullPrecision,
|
|
||||||
ReadContext *tls) {
|
|
||||||
auto begin = std::span<const uint8_t>(read->begin.p, read->begin.len);
|
|
||||||
auto end = std::span<const uint8_t>(read->end.p, read->end.len);
|
|
||||||
if (read->readVersion < oldestVersionFullPrecision) {
|
|
||||||
*result = ConflictSet::TooOld;
|
|
||||||
return complete;
|
|
||||||
} else if (end.size() == 0) {
|
|
||||||
this->begin = begin;
|
|
||||||
this->n = root;
|
|
||||||
this->readVersion = InternalVersionT(read->readVersion);
|
|
||||||
this->result = result;
|
|
||||||
return check_point_read_state_machine::begin;
|
|
||||||
// *result =
|
|
||||||
// checkPointRead(root, begin, InternalVersionT(read->readVersion), tls)
|
|
||||||
// ? ConflictSet::Commit
|
|
||||||
// : ConflictSet::Conflict;
|
|
||||||
// return complete;
|
|
||||||
} else {
|
|
||||||
*result = checkRangeRead(root, begin, end,
|
|
||||||
InternalVersionT(read->readVersion), tls)
|
|
||||||
? ConflictSet::Commit
|
|
||||||
: ConflictSet::Conflict;
|
|
||||||
return complete;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||||
|
|
||||||
void check(const ReadRange *reads, Result *result, int count) {
|
void check(const ReadRange *reads, Result *result, int count) {
|
||||||
assert(oldestVersionFullPrecision >=
|
|
||||||
newestVersionFullPrecision - kNominalVersionWindow);
|
|
||||||
|
|
||||||
if (count == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
ReadContext tls;
|
ReadContext tls;
|
||||||
tls.impl = this;
|
tls.impl = this;
|
||||||
int64_t check_byte_accum = 0;
|
int64_t check_byte_accum = 0;
|
||||||
|
|
||||||
CheckAll context;
|
|
||||||
context.oldestVersionFullPrecision = oldestVersionFullPrecision;
|
|
||||||
context.queries = reads;
|
|
||||||
context.results = result;
|
|
||||||
context.root = root;
|
|
||||||
context.tls = &tls;
|
|
||||||
|
|
||||||
int64_t started = std::min(context.kConcurrent, count);
|
|
||||||
for (int i = 0; i < started; i++) {
|
|
||||||
context.next[i] = context.inProgress[i].init(
|
|
||||||
reads + i, result + i, root, oldestVersionFullPrecision, &tls);
|
|
||||||
context.nextJob[i] = i + 1;
|
|
||||||
}
|
|
||||||
context.nextJob[started - 1] = 0;
|
|
||||||
int prevJob = started - 1;
|
|
||||||
int job = 0;
|
|
||||||
context.next[job](&context, prevJob, job, started, count);
|
|
||||||
|
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
assert(reads[i].readVersion >= 0);
|
assert(reads[i].readVersion >= 0);
|
||||||
assert(reads[i].readVersion <= newestVersionFullPrecision);
|
assert(reads[i].readVersion <= newestVersionFullPrecision);
|
||||||
const auto &r = reads[i];
|
const auto &r = reads[i];
|
||||||
check_byte_accum += r.begin.len + r.end.len;
|
check_byte_accum += r.begin.len + r.end.len;
|
||||||
|
auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
|
||||||
|
auto end = std::span<const uint8_t>(r.end.p, r.end.len);
|
||||||
|
assert(oldestVersionFullPrecision >=
|
||||||
|
newestVersionFullPrecision - kNominalVersionWindow);
|
||||||
|
result[i] =
|
||||||
|
reads[i].readVersion < oldestVersionFullPrecision ? TooOld
|
||||||
|
: (end.size() > 0
|
||||||
|
? checkRangeRead(root, begin, end,
|
||||||
|
InternalVersionT(reads[i].readVersion), &tls)
|
||||||
|
: checkPointRead(root, begin,
|
||||||
|
InternalVersionT(reads[i].readVersion), &tls))
|
||||||
|
? Commit
|
||||||
|
: Conflict;
|
||||||
tls.commits_accum += result[i] == Commit;
|
tls.commits_accum += result[i] == Commit;
|
||||||
tls.conflicts_accum += result[i] == Conflict;
|
tls.conflicts_accum += result[i] == Conflict;
|
||||||
tls.too_olds_accum += result[i] == TooOld;
|
tls.too_olds_accum += result[i] == TooOld;
|
||||||
}
|
}
|
||||||
|
|
||||||
point_read_total.add(tls.point_read_accum);
|
point_read_total.add(tls.point_read_accum);
|
||||||
prefix_read_total.add(tls.prefix_read_accum);
|
prefix_read_total.add(tls.prefix_read_accum);
|
||||||
range_read_total.add(tls.range_read_accum);
|
range_read_total.add(tls.range_read_accum);
|
||||||
@@ -4268,6 +4095,24 @@ template <int kN> void benchScan2() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void benchHorizontal16() {
|
||||||
|
ankerl::nanobench::Bench bench;
|
||||||
|
InternalVersionT vs[16];
|
||||||
|
for (int i = 0; i < 16; ++i) {
|
||||||
|
vs[i] = InternalVersionT(rand() % 1000 + 1000);
|
||||||
|
}
|
||||||
|
#if !USE_64_BIT
|
||||||
|
InternalVersionT::zero = InternalVersionT(rand() % 1000);
|
||||||
|
#endif
|
||||||
|
bench.run("horizontal16", [&]() {
|
||||||
|
bench.doNotOptimizeAway(horizontalMax16(vs, InternalVersionT::zero));
|
||||||
|
});
|
||||||
|
int x = rand() % 15 + 1;
|
||||||
|
bench.run("horizontalUpTo16", [&]() {
|
||||||
|
bench.doNotOptimizeAway(horizontalMaxUpTo16(vs, InternalVersionT::zero, x));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void benchLCP(int len) {
|
void benchLCP(int len) {
|
||||||
ankerl::nanobench::Bench bench;
|
ankerl::nanobench::Bench bench;
|
||||||
std::vector<uint8_t> lhs(len);
|
std::vector<uint8_t> lhs(len);
|
||||||
@@ -4300,11 +4145,7 @@ void printTree() {
|
|||||||
debugPrintDot(stdout, cs.root, &cs);
|
debugPrintDot(stdout, cs.root, &cs);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) { benchHorizontal16(); }
|
||||||
for (int i = 0; i < 256; ++i) {
|
|
||||||
benchLCP(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef ENABLE_FUZZ
|
#ifdef ENABLE_FUZZ
|
||||||
|
@@ -22,6 +22,9 @@ void *stepJob(Job *j) {
|
|||||||
return done ? nullptr : (void *)stepJob;
|
return done ? nullptr : (void *)stepJob;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// So we can look at the disassembly more easily
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
void sequential(Job **jobs, int count) {
|
void sequential(Job **jobs, int count) {
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
do {
|
do {
|
||||||
@@ -91,87 +94,6 @@ void interleaveBoundedCyclicList(Job **jobs, int count) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __has_attribute
|
|
||||||
#define __has_attribute(x) 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __has_attribute(musttail)
|
|
||||||
#define MUSTTAIL __attribute__((musttail))
|
|
||||||
#else
|
|
||||||
#define MUSTTAIL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct Context {
|
|
||||||
constexpr static int kConcurrent = 32;
|
|
||||||
Job **jobs;
|
|
||||||
Job *inProgress[kConcurrent];
|
|
||||||
void (*continuation[kConcurrent])(Context *, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count);
|
|
||||||
int nextJob[kConcurrent];
|
|
||||||
};
|
|
||||||
|
|
||||||
void keepGoing(Context *context, int64_t prevJob, int64_t job, int64_t started,
|
|
||||||
int64_t count) {
|
|
||||||
prevJob = job;
|
|
||||||
job = context->nextJob[job];
|
|
||||||
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
|
|
||||||
count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count);
|
|
||||||
|
|
||||||
void complete(Context *context, int64_t prevJob, int64_t job, int64_t started,
|
|
||||||
int64_t count) {
|
|
||||||
if (started == count) {
|
|
||||||
if (prevJob == job) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
context->nextJob[prevJob] = context->nextJob[job];
|
|
||||||
job = prevJob;
|
|
||||||
} else {
|
|
||||||
context->inProgress[job] = context->jobs[started++];
|
|
||||||
context->continuation[job] = stepJobTailCall;
|
|
||||||
}
|
|
||||||
prevJob = job;
|
|
||||||
job = context->nextJob[job];
|
|
||||||
MUSTTAIL return context->continuation[job](context, prevJob, job, started,
|
|
||||||
count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void stepJobTailCall(Context *context, int64_t prevJob, int64_t job,
|
|
||||||
int64_t started, int64_t count) {
|
|
||||||
auto *j = context->inProgress[job];
|
|
||||||
auto done = --(*j->input) == 0;
|
|
||||||
#ifdef __x86_64__
|
|
||||||
_mm_clflush(j->input);
|
|
||||||
#endif
|
|
||||||
if (done) {
|
|
||||||
MUSTTAIL return complete(context, prevJob, job, started, count);
|
|
||||||
} else {
|
|
||||||
context->continuation[job] = stepJobTailCall;
|
|
||||||
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void useTailCalls(Job **jobs, int count) {
|
|
||||||
if (count == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Context context;
|
|
||||||
context.jobs = jobs;
|
|
||||||
int64_t started = std::min(Context::kConcurrent, count);
|
|
||||||
for (int i = 0; i < started; i++) {
|
|
||||||
context.inProgress[i] = jobs[i];
|
|
||||||
context.nextJob[i] = i + 1;
|
|
||||||
context.continuation[i] = stepJobTailCall;
|
|
||||||
}
|
|
||||||
context.nextJob[started - 1] = 0;
|
|
||||||
int prevJob = started - 1;
|
|
||||||
int job = 0;
|
|
||||||
return context.continuation[job](&context, prevJob, job, started, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void interleaveCyclicList(Job **jobs, int count) {
|
void interleaveCyclicList(Job **jobs, int count) {
|
||||||
auto *nextJob = (int *)alloca(sizeof(int) * count);
|
auto *nextJob = (int *)alloca(sizeof(int) * count);
|
||||||
|
|
||||||
@@ -195,11 +117,12 @@ void interleaveCyclicList(Job **jobs, int count) {
|
|||||||
job = nextJob[job];
|
job = nextJob[job];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
ankerl::nanobench::Bench bench;
|
ankerl::nanobench::Bench bench;
|
||||||
|
|
||||||
constexpr int kNumJobs = 10000;
|
constexpr int kNumJobs = 100;
|
||||||
bench.relative(true);
|
bench.relative(true);
|
||||||
|
|
||||||
Job jobs[kNumJobs];
|
Job jobs[kNumJobs];
|
||||||
@@ -217,7 +140,6 @@ int main() {
|
|||||||
for (auto [scheduler, name] :
|
for (auto [scheduler, name] :
|
||||||
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
|
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
|
||||||
std::make_pair(sequential, "sequential"),
|
std::make_pair(sequential, "sequential"),
|
||||||
std::make_pair(useTailCalls, "useTailCalls"),
|
|
||||||
std::make_pair(interleaveSwapping, "interleavingSwapping"),
|
std::make_pair(interleaveSwapping, "interleavingSwapping"),
|
||||||
std::make_pair(interleaveBoundedCyclicList,
|
std::make_pair(interleaveBoundedCyclicList,
|
||||||
"interleaveBoundedCyclicList"),
|
"interleaveBoundedCyclicList"),
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <cstdint>
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <netdb.h>
|
#include <netdb.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@@ -22,55 +21,78 @@
|
|||||||
|
|
||||||
std::atomic<int64_t> transactions;
|
std::atomic<int64_t> transactions;
|
||||||
|
|
||||||
|
constexpr int kBaseSearchDepth = 115;
|
||||||
constexpr int kWindowSize = 10000000;
|
constexpr int kWindowSize = 10000000;
|
||||||
|
|
||||||
constexpr int kNumPrefixes = 250000;
|
std::string numToKey(int64_t num) {
|
||||||
|
|
||||||
std::string makeKey(int64_t num, int suffixLen) {
|
|
||||||
std::string result;
|
std::string result;
|
||||||
result.resize(sizeof(int64_t) + suffixLen);
|
result.resize(kBaseSearchDepth + sizeof(int64_t));
|
||||||
|
memset(result.data(), 0, kBaseSearchDepth);
|
||||||
int64_t be = __builtin_bswap64(num);
|
int64_t be = __builtin_bswap64(num);
|
||||||
memcpy(result.data(), &be, sizeof(int64_t));
|
memcpy(result.data() + kBaseSearchDepth, &be, sizeof(int64_t));
|
||||||
memset(result.data() + sizeof(int64_t), 0, suffixLen);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void workload(weaselab::ConflictSet *cs) {
|
void workload(weaselab::ConflictSet *cs) {
|
||||||
int64_t version = kWindowSize;
|
int64_t version = kWindowSize;
|
||||||
for (int i = 0; i < kNumPrefixes; ++i) {
|
cs->addWrites(nullptr, 0, version);
|
||||||
for (int j = 0; j < 50; ++j) {
|
|
||||||
weaselab::ConflictSet::WriteRange wr;
|
|
||||||
auto k = makeKey(i, j);
|
|
||||||
wr.begin.p = (const uint8_t *)k.data();
|
|
||||||
wr.begin.len = k.size();
|
|
||||||
wr.end.len = 0;
|
|
||||||
cs->addWrites(&wr, 1, version);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
++version;
|
|
||||||
for (int i = 0; i < kNumPrefixes; ++i) {
|
|
||||||
weaselab::ConflictSet::WriteRange wr;
|
|
||||||
auto k = makeKey(i, 50);
|
|
||||||
wr.begin.p = (const uint8_t *)k.data();
|
|
||||||
wr.begin.len = k.size();
|
|
||||||
wr.end.len = 0;
|
|
||||||
cs->addWrites(&wr, 1, version);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<weaselab::ConflictSet::Result> results(10);
|
|
||||||
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
|
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
|
||||||
std::vector<std::string> keys(10);
|
// Reads
|
||||||
for (auto &k : keys) {
|
{
|
||||||
k = makeKey(rand() % kNumPrefixes, 49);
|
auto beginK = numToKey(version - kWindowSize);
|
||||||
|
auto endK = numToKey(version - 1);
|
||||||
|
auto pointRv = version - kWindowSize + rand() % kWindowSize + 1;
|
||||||
|
auto pointK = numToKey(pointRv);
|
||||||
|
weaselab::ConflictSet::ReadRange reads[] = {
|
||||||
|
{
|
||||||
|
{(const uint8_t *)pointK.data(), int(pointK.size())},
|
||||||
|
{nullptr, 0},
|
||||||
|
pointRv,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{(const uint8_t *)beginK.data(), int(beginK.size())},
|
||||||
|
{(const uint8_t *)endK.data(), int(endK.size())},
|
||||||
|
version - 2,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
weaselab::ConflictSet::Result result[sizeof(reads) / sizeof(reads[0])];
|
||||||
|
cs->check(reads, result, sizeof(reads) / sizeof(reads[0]));
|
||||||
|
// for (int i = 0; i < sizeof(reads) / sizeof(reads[0]); ++i) {
|
||||||
|
// if (result[i] != weaselab::ConflictSet::Commit) {
|
||||||
|
// fprintf(stderr, "Unexpected conflict: [%s, %s) @ %" PRId64 "\n",
|
||||||
|
// printable(reads[i].begin).c_str(),
|
||||||
|
// printable(reads[i].end).c_str(), reads[i].readVersion);
|
||||||
|
// abort();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
std::vector<weaselab::ConflictSet::ReadRange> reads(10);
|
// Writes
|
||||||
for (int i = 0; i < reads.size(); ++i) {
|
{
|
||||||
reads[i].begin.p = (const uint8_t *)(keys[i].data());
|
weaselab::ConflictSet::WriteRange w;
|
||||||
reads[i].begin.len = keys[i].size();
|
auto k = numToKey(version);
|
||||||
reads[i].end.len = 0;
|
w.begin.p = (const uint8_t *)k.data();
|
||||||
reads[i].readVersion = version - 1;
|
w.end.len = 0;
|
||||||
|
if (version % (kWindowSize / 2) == 0) {
|
||||||
|
for (int l = 0; l <= k.size(); ++l) {
|
||||||
|
w.begin.len = l;
|
||||||
|
cs->addWrites(&w, 1, version);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
w.begin.len = k.size();
|
||||||
|
cs->addWrites(&w, 1, version);
|
||||||
|
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
|
||||||
|
auto b = numToKey(beginN);
|
||||||
|
auto e = numToKey(beginN + 1000);
|
||||||
|
w.begin.p = (const uint8_t *)b.data();
|
||||||
|
w.begin.len = b.size();
|
||||||
|
w.end.p = (const uint8_t *)e.data();
|
||||||
|
w.end.len = e.size();
|
||||||
|
cs->addWrites(&w, 1, version);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
cs->check(reads.data(), results.data(), 10);
|
// GC
|
||||||
|
cs->setOldestVersion(version - kWindowSize);
|
||||||
|
++version;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BIN
corpus/1863492b4bfa4e57a2dd04457f45dd1adbc1b43b
Normal file
BIN
corpus/1863492b4bfa4e57a2dd04457f45dd1adbc1b43b
Normal file
Binary file not shown.
BIN
corpus/1b73191605897b8fb531d18ce6309d5bd5118268
Normal file
BIN
corpus/1b73191605897b8fb531d18ce6309d5bd5118268
Normal file
Binary file not shown.
BIN
corpus/1d73c395d1885ce584ddc66ebea3137a89222307
Normal file
BIN
corpus/1d73c395d1885ce584ddc66ebea3137a89222307
Normal file
Binary file not shown.
BIN
corpus/290bc08207a6de4b9adceb42003d4aeb5758bfdd
Normal file
BIN
corpus/290bc08207a6de4b9adceb42003d4aeb5758bfdd
Normal file
Binary file not shown.
BIN
corpus/33cfb152724161924444fe07e0cdb07b8b8462fc
Normal file
BIN
corpus/33cfb152724161924444fe07e0cdb07b8b8462fc
Normal file
Binary file not shown.
BIN
corpus/3dd78f16a8656e4839afb2eb4cdc7a028f4d28b8
Normal file
BIN
corpus/3dd78f16a8656e4839afb2eb4cdc7a028f4d28b8
Normal file
Binary file not shown.
BIN
corpus/43950334272a0d818f646b118439717c19a99837
Normal file
BIN
corpus/43950334272a0d818f646b118439717c19a99837
Normal file
Binary file not shown.
BIN
corpus/4920f8c7afdef94d228a5683c8d31cebf40c27a8
Normal file
BIN
corpus/4920f8c7afdef94d228a5683c8d31cebf40c27a8
Normal file
Binary file not shown.
BIN
corpus/7e8f30ffd0b8f665d87159020d0c977bca72a972
Normal file
BIN
corpus/7e8f30ffd0b8f665d87159020d0c977bca72a972
Normal file
Binary file not shown.
BIN
corpus/904e5d8ff4df7175e5d97536d687bbfa2a8b6c50
Normal file
BIN
corpus/904e5d8ff4df7175e5d97536d687bbfa2a8b6c50
Normal file
Binary file not shown.
BIN
corpus/9097c0a5f4a1a725413d8ac7b5a42c1c17b6eb31
Normal file
BIN
corpus/9097c0a5f4a1a725413d8ac7b5a42c1c17b6eb31
Normal file
Binary file not shown.
BIN
corpus/91bf7637038424fdff9a6fea9afc9166399284f7
Normal file
BIN
corpus/91bf7637038424fdff9a6fea9afc9166399284f7
Normal file
Binary file not shown.
BIN
corpus/98060955d1d4b9ab23969fc8715d64d90b41b498
Normal file
BIN
corpus/98060955d1d4b9ab23969fc8715d64d90b41b498
Normal file
Binary file not shown.
BIN
corpus/a21decffaafb3e49c2f4984833f6cfe1d9d1d3ca
Normal file
BIN
corpus/a21decffaafb3e49c2f4984833f6cfe1d9d1d3ca
Normal file
Binary file not shown.
BIN
corpus/b38d17b3b3f4689098f13451b448688e3e2e0d4d
Normal file
BIN
corpus/b38d17b3b3f4689098f13451b448688e3e2e0d4d
Normal file
Binary file not shown.
BIN
corpus/bef22e17c90d2b121dbd1f48ba9e09b3684f54e1
Normal file
BIN
corpus/bef22e17c90d2b121dbd1f48ba9e09b3684f54e1
Normal file
Binary file not shown.
BIN
corpus/c90e64707700c71cd8848ace4f34825766255457
Normal file
BIN
corpus/c90e64707700c71cd8848ace4f34825766255457
Normal file
Binary file not shown.
BIN
corpus/cc104b98c539e338887b8d9d3aac60561e7fbfff
Normal file
BIN
corpus/cc104b98c539e338887b8d9d3aac60561e7fbfff
Normal file
Binary file not shown.
BIN
corpus/cc555c8529d6c31423947a7a4dcfdddfd713e5e7
Normal file
BIN
corpus/cc555c8529d6c31423947a7a4dcfdddfd713e5e7
Normal file
Binary file not shown.
BIN
corpus/d46a73ca220fd8462d7e07ddfd008721b682e9b1
Normal file
BIN
corpus/d46a73ca220fd8462d7e07ddfd008721b682e9b1
Normal file
Binary file not shown.
BIN
corpus/e2aa45c8babd79c5ae6ec0b53757f1a23d8f1c62
Normal file
BIN
corpus/e2aa45c8babd79c5ae6ec0b53757f1a23d8f1c62
Normal file
Binary file not shown.
Reference in New Issue
Block a user