14 Commits

Author SHA1 Message Date
ed67486077 Reordering seems to improve codegen
Some checks failed
Tests / Clang total: 3244, passed: 3244
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / 64 bit versions total: 3244, passed: 3244
Tests / Debug total: 3242, passed: 3242
Tests / SIMD fallback total: 3244, passed: 3244
Tests / Release [gcc] total: 3244, passed: 3244
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 2419, passed: 2419
Tests / Coverage total: 2437, passed: 2437
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 98.98% (1938/1958) * Branch Coverage: 68.67% (1497/2180) * Complexity Density: 0.00 * Lines of Code: 1958 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-09-23 15:28:51 -07:00
b376f6fdd5 WIP
Some checks failed
Tests / Clang total: 3244, passed: 3244
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / 64 bit versions total: 3244, passed: 3244
Tests / Debug total: 3242, passed: 3242
Tests / SIMD fallback total: 3244, passed: 3244
Tests / Release [gcc] total: 3244, passed: 3244
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 2419, passed: 2419
Tests / Coverage total: 2437, passed: 2437
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 98.98% (1938/1958) * Branch Coverage: 68.67% (1497/2180) * Complexity Density: 0.00 * Lines of Code: 1958 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-09-23 15:11:48 -07:00
6de63dd3fe Use preserve_none and put continuation array in CheckAll 2024-09-23 14:53:16 -07:00
3e5f13bf54 WIP - tests pass 2024-09-23 13:32:56 -07:00
e7e1d1f7f5 Add tail-call based interleaving approach 2024-09-23 12:52:30 -07:00
442658e983 Target ~1GB memory usage in server bench 2024-09-21 14:28:15 -07:00
26f602215e Accentuate cache misses for point reads in server_bench 2024-09-14 22:42:40 -07:00
98236f81cb Add missing __builtin_prefetch 2024-09-14 22:41:58 -07:00
3593b72880 Disallow checking SIM_CACHE_MISSES=1 2024-09-10 22:23:37 -07:00
814aac4ea7 Experiment with causing cache misses 2024-09-10 22:06:00 -07:00
0550fa0016 Add "iter" state 2024-09-10 17:22:10 -07:00
fe5cfb0336 Remove redundant cast 2024-09-10 17:06:45 -07:00
82203515a0 check_point_read_state_machine::down_left_spine 2024-09-10 17:05:09 -07:00
465372c734 Scaffolding to prepare for interleaving checks 2024-09-10 16:10:57 -07:00
30 changed files with 414 additions and 277 deletions

View File

@@ -24,6 +24,14 @@ repos:
entry: "^#define SHOW_MEMORY 1$"
language: pygrep
types: [c++]
- repo: local
hooks:
- id: sim cache misses check
name: disallow checking in SIM_CACHE_MISSES=1
description: disallow checking in SIM_CACHE_MISSES=1
entry: "^#define SIM_CACHE_MISSES 1$"
language: pygrep
types: [c++]
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: a23f6b85d0fdd5bb9d564e2579e678033debbdff # frozen: v0.10.0.1
hooks:

View File

@@ -95,23 +95,12 @@ target_compile_options(${PROJECT_NAME}-object PRIVATE -fno-exceptions
-fvisibility=hidden)
target_include_directories(${PROJECT_NAME}-object
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
if(NOT LD_EXE)
set(LD_EXE ld)
endif()
add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o
COMMAND ${LD_EXE} -r $<TARGET_OBJECTS:${PROJECT_NAME}-object> -o
${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o
DEPENDS $<TARGET_OBJECTS:${PROJECT_NAME}-object>
COMMAND_EXPAND_LISTS)
add_library(${PROJECT_NAME} SHARED ${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o)
add_library(${PROJECT_NAME} SHARED $<TARGET_OBJECTS:${PROJECT_NAME}-object>)
set_target_properties(
${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/radix_tree")
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE CXX)
else()
if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME} PROPERTIES LINKER_LANGUAGE C)
endif()
@@ -121,13 +110,19 @@ if(HAS_VERSION_SCRIPT)
LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/linker.map)
endif()
add_library(${PROJECT_NAME}-static STATIC ${CMAKE_BINARY_DIR}/${PROJECT_NAME}.o)
if(CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME}-static PROPERTIES LINKER_LANGUAGE CXX)
else()
add_library(${PROJECT_NAME}-static STATIC
$<TARGET_OBJECTS:${PROJECT_NAME}-object>)
if(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
set_target_properties(${PROJECT_NAME}-static PROPERTIES LINKER_LANGUAGE C)
endif()
if(NOT APPLE)
if(APPLE)
add_custom_command(
TARGET ${PROJECT_NAME}-static
PRE_LINK
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/privatize_symbols_macos.sh
$<TARGET_OBJECTS:${PROJECT_NAME}-object>)
else()
add_custom_command(
TARGET ${PROJECT_NAME}-static
POST_BUILD
@@ -253,19 +248,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
add_test(NAME conflict_set_blackbox_${hash} COMMAND driver ${TEST})
endforeach()
find_program(VALGRIND_EXE valgrind)
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
list(LENGTH CORPUS_TESTS len)
math(EXPR last "${len} - 1")
set(partition_size 100)
foreach(i RANGE 0 ${last} ${partition_size})
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
add_test(NAME conflict_set_blackbox_valgrind_${i}
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
$<TARGET_FILE:driver> ${partition})
endforeach()
endif()
# scripted tests. Written manually to fill in anything libfuzzer couldn't
# find.
if(NOT CMAKE_CROSSCOMPILING)
@@ -286,14 +268,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
${Python3_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
add_test(
NAME script_test_${TEST}_valgrind
COMMAND
${VALGRIND_EXE} ${Python3_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/test_conflict_set.py test ${TEST}
--build-dir ${CMAKE_CURRENT_BINARY_DIR})
endif()
endforeach()
endif()
find_program(VALGRIND_EXE valgrind)
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
list(LENGTH CORPUS_TESTS len)
math(EXPR last "${len} - 1")
set(partition_size 100)
foreach(i RANGE 0 ${last} ${partition_size})
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
add_test(NAME conflict_set_blackbox_valgrind_${i}
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
$<TARGET_FILE:driver> ${partition})
endforeach()
endif()

View File

@@ -48,6 +48,17 @@ limitations under the License.
#endif
#endif
#define SIM_CACHE_MISSES 0
#if SIM_CACHE_MISSES
constexpr void simCacheMiss(void *x) {
if (x) {
_mm_clflush(x);
}
}
#else
constexpr void simCacheMiss(void *) {}
#endif
#include <memcheck.h>
using namespace weaselab;
@@ -766,8 +777,6 @@ private:
int getNodeIndex(Node3 *self, uint8_t index) {
Node3 *n = (Node3 *)self;
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] == index) {
return i;
@@ -776,18 +785,6 @@ int getNodeIndex(Node3 *self, uint8_t index) {
return -1;
}
int getNodeIndexExists(Node3 *self, uint8_t index) {
Node3 *n = (Node3 *)self;
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] == index) {
return i;
}
}
__builtin_unreachable(); // GCOVR_EXCL_LINE
}
int getNodeIndex(Node16 *self, uint8_t index) {
#ifdef HAS_AVX
@@ -848,62 +845,31 @@ int getNodeIndex(Node16 *self, uint8_t index) {
#endif
}
int getNodeIndexExists(Node16 *self, uint8_t index) {
#ifdef HAS_AVX
__m128i key_vec = _mm_set1_epi8(index);
__m128i indices;
memcpy(&indices, self->index, Node16::kMaxNodes);
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
uint32_t mask = (1 << self->numChildren) - 1;
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
assume(bitfield != 0);
return std::countr_zero(bitfield);
#elif defined(HAS_ARM_NEON)
// Based on
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
uint8x16_t indices;
memcpy(&indices, self->index, Node16::kMaxNodes);
// 0xff for each match
uint16x8_t results =
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
assume(self->numChildren <= Node16::kMaxNodes);
uint64_t mask = self->numChildren == 16
? uint64_t(-1)
: (uint64_t(1) << (self->numChildren * 4)) - 1;
// 0xf for each match in valid range
uint64_t bitfield =
vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(results, 4)), 0) & mask;
assume(bitfield != 0);
return std::countr_zero(bitfield) / 4;
#else
for (int i = 0; i < self->numChildren; ++i) {
if (self->index[i] == index) {
return i;
}
}
__builtin_unreachable(); // GCOVR_EXCL_LINE
#endif
}
// Precondition - an entry for index must exist in the node
Node *&getChildExists(Node3 *self, uint8_t index) {
return self->children[getNodeIndexExists(self, index)];
auto &result = self->children[getNodeIndex(self, index)];
simCacheMiss(result);
return result;
}
// Precondition - an entry for index must exist in the node
Node *&getChildExists(Node16 *self, uint8_t index) {
return self->children[getNodeIndexExists(self, index)];
auto &result = self->children[getNodeIndex(self, index)];
simCacheMiss(result);
return result;
}
// Precondition - an entry for index must exist in the node
Node *&getChildExists(Node48 *self, uint8_t index) {
assert(self->bitSet.test(index));
return self->children[self->index[index]];
auto &result = self->children[self->index[index]];
simCacheMiss(result);
return result;
}
// Precondition - an entry for index must exist in the node
Node *&getChildExists(Node256 *self, uint8_t index) {
assert(self->bitSet.test(index));
return self->children[index];
auto &result = self->children[index];
simCacheMiss(result);
return result;
}
// Precondition - an entry for index must exist in the node
@@ -938,12 +904,12 @@ InternalVersionT maxVersion(Node *n) {
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndexExists(n3, index);
int i = getNodeIndex(n3, index);
return n3->childMaxVersion[i];
}
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndexExists(n16, index);
int i = getNodeIndex(n16, index);
return n16->childMaxVersion[i];
}
case Type_Node48: {
@@ -971,12 +937,12 @@ InternalVersionT exchangeMaxVersion(Node *n, InternalVersionT newMax) {
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndexExists(n3, index);
int i = getNodeIndex(n3, index);
return std::exchange(n3->childMaxVersion[i], newMax);
}
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndexExists(n16, index);
int i = getNodeIndex(n16, index);
return std::exchange(n16->childMaxVersion[i], newMax);
}
case Type_Node48: {
@@ -1005,13 +971,13 @@ void setMaxVersion(Node *n, InternalVersionT newMax) {
__builtin_unreachable(); // GCOVR_EXCL_LINE
case Type_Node3: {
auto *n3 = static_cast<Node3 *>(n);
int i = getNodeIndexExists(n3, index);
int i = getNodeIndex(n3, index);
n3->childMaxVersion[i] = newMax;
return;
}
case Type_Node16: {
auto *n16 = static_cast<Node16 *>(n);
int i = getNodeIndexExists(n16, index);
int i = getNodeIndex(n16, index);
n16->childMaxVersion[i] = newMax;
return;
}
@@ -1083,6 +1049,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node3 *self, uint8_t index) {
if (i < 0) {
return {};
}
simCacheMiss(self->children[i]);
return {self->children[i], self->childMaxVersion[i]};
}
ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
@@ -1090,6 +1057,7 @@ ChildAndMaxVersion getChildAndMaxVersion(Node16 *self, uint8_t index) {
if (i < 0) {
return {};
}
simCacheMiss(self->children[i]);
return {self->children[i], self->childMaxVersion[i]};
}
ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
@@ -1097,9 +1065,11 @@ ChildAndMaxVersion getChildAndMaxVersion(Node48 *self, uint8_t index) {
if (i < 0) {
return {};
}
simCacheMiss(self->children[i]);
return {self->children[i], self->childMaxVersion[i]};
}
ChildAndMaxVersion getChildAndMaxVersion(Node256 *self, uint8_t index) {
simCacheMiss(self->children[index]);
return {self->children[index], self->childMaxVersion[index]};
}
@@ -1123,10 +1093,9 @@ ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
Node *getChildGeq(Node0 *, int) { return nullptr; }
Node *getChildGeq(Node3 *n, int child) {
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] >= child) {
simCacheMiss(n->children[i]);
return n->children[i];
}
}
@@ -1145,7 +1114,10 @@ Node *getChildGeq(Node16 *self, int child) {
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << self->numChildren) - 1;
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
return bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
auto *result =
bitfield == 0 ? nullptr : self->children[std::countr_zero(bitfield)];
simCacheMiss(result);
return result;
#elif defined(HAS_ARM_NEON)
uint8x16_t indices;
memcpy(&indices, self->index, sizeof(self->index));
@@ -1181,13 +1153,16 @@ Node *getChildGeq(Node48 *self, int child) {
if (c < 0) {
return nullptr;
}
return self->children[self->index[c]];
auto *result = self->children[self->index[c]];
simCacheMiss(result);
return result;
}
Node *getChildGeq(Node256 *self, int child) {
int c = self->bitSet.firstSetGeq(child);
if (c < 0) {
return nullptr;
}
simCacheMiss(self->children[c]);
return self->children[c];
}
@@ -1211,20 +1186,26 @@ Node *getChildGeq(Node *self, int child) {
// Precondition: self has a child
Node *getFirstChildExists(Node3 *self) {
assert(self->numChildren > 0);
simCacheMiss(self->children[0]);
return self->children[0];
}
// Precondition: self has a child
Node *getFirstChildExists(Node16 *self) {
assert(self->numChildren > 0);
simCacheMiss(self->children[0]);
return self->children[0];
}
// Precondition: self has a child
Node *getFirstChildExists(Node48 *self) {
return self->children[self->index[self->bitSet.firstSetGeq(0)]];
auto *result = self->children[self->index[self->bitSet.firstSetGeq(0)]];
simCacheMiss(result);
return result;
}
// Precondition: self has a child
Node *getFirstChildExists(Node256 *self) {
return self->children[self->bitSet.firstSetGeq(0)];
auto *result = self->children[self->bitSet.firstSetGeq(0)];
simCacheMiss(result);
return result;
}
// Precondition: self has a child
@@ -1604,6 +1585,7 @@ __attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
_mm512_mask_storeu_epi32(vs, m, zvec);
}
__attribute__((target("default")))
#endif
@@ -2470,7 +2452,6 @@ checkMaxBetweenExclusive(Node *n, int begin, int end,
}
__attribute__((target("default")))
#endif
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
InternalVersionT readVersion, ReadContext *tls) {
return checkMaxBetweenExclusiveImpl<false>(n, begin, end, readVersion, tls);
@@ -2910,72 +2891,6 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
}
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
__attribute__((target("avx512f"))) InternalVersionT horizontalMaxUpTo16(
InternalVersionT *vs, [[maybe_unused]] InternalVersionT z, int len) {
assume(len <= 16);
#if USE_64_BIT
// Hope it gets vectorized
InternalVersionT max = vs[0];
for (int i = 1; i < len; ++i) {
max = std::max(vs[i], max);
}
return max;
#else
uint32_t zero;
memcpy(&zero, &z, sizeof(zero));
auto zeroVec = _mm512_set1_epi32(zero);
auto max = InternalVersionT(
zero +
_mm512_reduce_max_epi32(_mm512_sub_epi32(
_mm512_mask_loadu_epi32(zeroVec, _mm512_int2mask((1 << len) - 1), vs),
zeroVec)));
return max;
#endif
}
__attribute__((target("default")))
#endif
InternalVersionT
horizontalMaxUpTo16(InternalVersionT *vs, InternalVersionT, int len) {
assume(len <= 16);
InternalVersionT max = vs[0];
for (int i = 1; i < len; ++i) {
max = std::max(vs[i], max);
}
return max;
}
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
__attribute__((target("avx512f"))) InternalVersionT
horizontalMax16(InternalVersionT *vs, [[maybe_unused]] InternalVersionT z) {
#if USE_64_BIT
// Hope it gets vectorized
InternalVersionT max = vs[0];
for (int i = 1; i < 16; ++i) {
max = std::max(vs[i], max);
}
return max;
#else
uint32_t zero; // GCOVR_EXCL_LINE
memcpy(&zero, &z, sizeof(zero));
auto zeroVec = _mm512_set1_epi32(zero);
return InternalVersionT(zero + _mm512_reduce_max_epi32(_mm512_sub_epi32(
_mm512_loadu_epi32(vs), zeroVec)));
#endif
}
__attribute__((target("default")))
#endif
InternalVersionT
horizontalMax16(InternalVersionT *vs, InternalVersionT) {
InternalVersionT max = vs[0];
for (int i = 1; i < 16; ++i) {
max = std::max(vs[i], max);
}
return max;
}
// Precondition: `node->entryPresent`, and node is not the root
void fixupMaxVersion(Node *node, WriteContext *tls) {
assert(node->parent);
@@ -2987,13 +2902,15 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
break;
case Type_Node3: {
auto *self3 = static_cast<Node3 *>(node);
max = std::max(max, horizontalMaxUpTo16(self3->childMaxVersion, tls->zero,
self3->numChildren));
for (int i = 0; i < self3->numChildren; ++i) {
max = std::max(self3->childMaxVersion[i], max);
}
} break;
case Type_Node16: {
auto *self16 = static_cast<Node16 *>(node);
max = std::max(max, horizontalMaxUpTo16(self16->childMaxVersion, tls->zero,
self16->numChildren));
for (int i = 0; i < self16->numChildren; ++i) {
max = std::max(self16->childMaxVersion[i], max);
}
} break;
case Type_Node48: {
auto *self48 = static_cast<Node48 *>(node);
@@ -3003,7 +2920,9 @@ void fixupMaxVersion(Node *node, WriteContext *tls) {
} break;
case Type_Node256: {
auto *self256 = static_cast<Node256 *>(node);
max = std::max(max, horizontalMax16(self256->childMaxVersion, tls->zero));
for (auto v : self256->maxOfMax) {
max = std::max(v, max);
}
} break;
default: // GCOVR_EXCL_LINE
__builtin_unreachable(); // GCOVR_EXCL_LINE
@@ -3126,34 +3045,288 @@ Node *firstGeqPhysical(Node *n, const std::span<const uint8_t> key) {
}
}
#ifndef __has_attribute
#define __has_attribute(x) 0
#endif
#if __has_attribute(musttail)
#define MUSTTAIL __attribute__((musttail))
#else
#define MUSTTAIL
#endif
#if __has_attribute(preserve_none)
#define CONTINUATION_CALLING_CONVENTION __attribute__((preserve_none))
#else
#define CONTINUATION_CALLING_CONVENTION
#endif
typedef CONTINUATION_CALLING_CONVENTION void (*continuation)(struct CheckAll *,
int64_t prevJob,
int64_t job,
int64_t started,
int64_t count);
// State relevant to a particular query
struct CheckJob {
void setResult(bool ok) {
*result = ok ? ConflictSet::Commit : ConflictSet::Conflict;
}
[[nodiscard]] continuation init(const ConflictSet::ReadRange *read,
ConflictSet::Result *result, Node *root,
int64_t oldestVersionFullPrecision,
ReadContext *tls);
Node *n;
ChildAndMaxVersion childAndVersion;
std::span<const uint8_t> begin;
InternalVersionT readVersion;
ConflictSet::Result *result;
};
// State relevant to all queries
struct CheckAll {
constexpr static int kConcurrent = 32;
CheckJob inProgress[kConcurrent];
continuation next[kConcurrent];
int nextJob[kConcurrent];
Node *root;
int64_t oldestVersionFullPrecision;
ReadContext *tls;
const ConflictSet::ReadRange *queries;
ConflictSet::Result *results;
};
CONTINUATION_CALLING_CONVENTION void keepGoing(CheckAll *context,
int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
prevJob = job;
job = context->nextJob[job];
MUSTTAIL return context->next[job](context, prevJob, job, started, count);
}
CONTINUATION_CALLING_CONVENTION void complete(CheckAll *context,
int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
if (started == count) {
if (prevJob == job) {
return;
}
context->nextJob[prevJob] = context->nextJob[job];
job = prevJob;
} else {
int temp = started++;
context->next[job] = context->inProgress[job].init(
context->queries + temp, context->results + temp, context->root,
context->oldestVersionFullPrecision, context->tls);
}
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
namespace check_point_read_state_machine {
CONTINUATION_CALLING_CONVENTION void
down_left_spine(struct CheckAll *, int64_t prevJob, int64_t job,
int64_t started, int64_t count);
CONTINUATION_CALLING_CONVENTION void iter(struct CheckAll *, int64_t prevJob,
int64_t job, int64_t started,
int64_t count);
CONTINUATION_CALLING_CONVENTION void begin(struct CheckAll *, int64_t prevJob,
int64_t job, int64_t started,
int64_t count);
void begin(struct CheckAll *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
++context->tls->point_read_accum;
#if DEBUG_VERBOSE && !defined(NDEBUG)
fprintf(stderr, "Check point read: %s\n", printable(key).c_str());
#endif
auto *j = context->inProgress + job;
if (j->begin.size() == 0) {
if (j->n->entryPresent) {
j->setResult(j->n->entry.pointVersion <= j->readVersion);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
j->n = getFirstChildExists(j->n);
context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
context->next[job] = iter;
__builtin_prefetch(j->childAndVersion.child);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
void iter(struct CheckAll *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
auto *j = context->inProgress + job;
if (j->childAndVersion.child == nullptr) {
auto c = getChildGeq(j->n, j->begin[0]);
if (c != nullptr) {
j->n = c;
context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
} else {
j->n = nextSibling(j->n);
if (j->n == nullptr) {
j->setResult(true);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
}
j->n = j->childAndVersion.child;
j->begin = j->begin.subspan(1, j->begin.size() - 1);
if (j->n->partialKeyLen > 0) {
int commonLen = std::min<int>(j->n->partialKeyLen, j->begin.size());
int i = longestCommonPrefix(j->n->partialKey(), j->begin.data(), commonLen);
if (i < commonLen) {
auto c = j->n->partialKey()[i] <=> j->begin[i];
if (c > 0) {
context->next[job] = down_left_spine;
MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
} else {
j->n = nextSibling(j->n);
if (j->n == nullptr) {
j->setResult(true);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
}
if (commonLen == j->n->partialKeyLen) {
// partial key matches
j->begin = j->begin.subspan(commonLen, j->begin.size() - commonLen);
} else if (j->n->partialKeyLen > int(j->begin.size())) {
// n is the first physical node greater than remaining, and there's no
// eq node
context->next[job] = down_left_spine;
MUSTTAIL return down_left_spine(context, prevJob, job, started, count);
}
}
if (j->childAndVersion.maxVersion <= j->readVersion) {
++context->tls->point_read_short_circuit_accum;
j->setResult(true);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
++context->tls->point_read_iterations_accum;
if (j->begin.size() == 0) {
if (j->n->entryPresent) {
j->setResult(j->n->entry.pointVersion <= j->readVersion);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
j->n = getFirstChildExists(j->n);
context->next[job] = down_left_spine;
__builtin_prefetch(j->n);
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
j->childAndVersion = getChildAndMaxVersion(j->n, j->begin[0]);
__builtin_prefetch(j->childAndVersion.child);
// j->next is already iter
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
void down_left_spine(struct CheckAll *context, int64_t prevJob, int64_t job,
int64_t started, int64_t count) {
auto *j = context->inProgress + job;
if (j->n->entryPresent) {
j->setResult(j->n->entry.rangeVersion <= j->readVersion);
MUSTTAIL return complete(context, prevJob, job, started, count);
}
j->n = getFirstChildExists(j->n);
__builtin_prefetch(j->n);
// j->next is already down_left_spine
MUSTTAIL return keepGoing(context, prevJob, job, started, count);
}
} // namespace check_point_read_state_machine
continuation CheckJob::init(const ConflictSet::ReadRange *read,
ConflictSet::Result *result, Node *root,
int64_t oldestVersionFullPrecision,
ReadContext *tls) {
auto begin = std::span<const uint8_t>(read->begin.p, read->begin.len);
auto end = std::span<const uint8_t>(read->end.p, read->end.len);
if (read->readVersion < oldestVersionFullPrecision) {
*result = ConflictSet::TooOld;
return complete;
} else if (end.size() == 0) {
this->begin = begin;
this->n = root;
this->readVersion = InternalVersionT(read->readVersion);
this->result = result;
return check_point_read_state_machine::begin;
// *result =
// checkPointRead(root, begin, InternalVersionT(read->readVersion), tls)
// ? ConflictSet::Commit
// : ConflictSet::Conflict;
// return complete;
} else {
*result = checkRangeRead(root, begin, end,
InternalVersionT(read->readVersion), tls)
? ConflictSet::Commit
: ConflictSet::Conflict;
return complete;
}
}
struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
void check(const ReadRange *reads, Result *result, int count) {
assert(oldestVersionFullPrecision >=
newestVersionFullPrecision - kNominalVersionWindow);
if (count == 0) {
return;
}
ReadContext tls;
tls.impl = this;
int64_t check_byte_accum = 0;
CheckAll context;
context.oldestVersionFullPrecision = oldestVersionFullPrecision;
context.queries = reads;
context.results = result;
context.root = root;
context.tls = &tls;
int64_t started = std::min(context.kConcurrent, count);
for (int i = 0; i < started; i++) {
context.next[i] = context.inProgress[i].init(
reads + i, result + i, root, oldestVersionFullPrecision, &tls);
context.nextJob[i] = i + 1;
}
context.nextJob[started - 1] = 0;
int prevJob = started - 1;
int job = 0;
context.next[job](&context, prevJob, job, started, count);
for (int i = 0; i < count; ++i) {
assert(reads[i].readVersion >= 0);
assert(reads[i].readVersion <= newestVersionFullPrecision);
const auto &r = reads[i];
check_byte_accum += r.begin.len + r.end.len;
auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
auto end = std::span<const uint8_t>(r.end.p, r.end.len);
assert(oldestVersionFullPrecision >=
newestVersionFullPrecision - kNominalVersionWindow);
result[i] =
reads[i].readVersion < oldestVersionFullPrecision ? TooOld
: (end.size() > 0
? checkRangeRead(root, begin, end,
InternalVersionT(reads[i].readVersion), &tls)
: checkPointRead(root, begin,
InternalVersionT(reads[i].readVersion), &tls))
? Commit
: Conflict;
tls.commits_accum += result[i] == Commit;
tls.conflicts_accum += result[i] == Conflict;
tls.too_olds_accum += result[i] == TooOld;
}
point_read_total.add(tls.point_read_accum);
prefix_read_total.add(tls.prefix_read_accum);
range_read_total.add(tls.range_read_accum);
@@ -4095,24 +4268,6 @@ template <int kN> void benchScan2() {
});
}
void benchHorizontal16() {
ankerl::nanobench::Bench bench;
InternalVersionT vs[16];
for (int i = 0; i < 16; ++i) {
vs[i] = InternalVersionT(rand() % 1000 + 1000);
}
#if !USE_64_BIT
InternalVersionT::zero = InternalVersionT(rand() % 1000);
#endif
bench.run("horizontal16", [&]() {
bench.doNotOptimizeAway(horizontalMax16(vs, InternalVersionT::zero));
});
int x = rand() % 15 + 1;
bench.run("horizontalUpTo16", [&]() {
bench.doNotOptimizeAway(horizontalMaxUpTo16(vs, InternalVersionT::zero, x));
});
}
void benchLCP(int len) {
ankerl::nanobench::Bench bench;
std::vector<uint8_t> lhs(len);
@@ -4145,7 +4300,11 @@ void printTree() {
debugPrintDot(stdout, cs.root, &cs);
}
int main(void) { benchHorizontal16(); }
int main(void) {
for (int i = 0; i < 256; ++i) {
benchLCP(i);
}
}
#endif
#ifdef ENABLE_FUZZ

View File

@@ -8,7 +8,6 @@ RUN chmod -R 777 /tmp
RUN apt-get update
RUN apt-get upgrade -y
RUN TZ=America/Los_Angeles DEBIAN_FRONTEND=noninteractive apt-get install -y \
binutils-aarch64-linux-gnu \
build-essential \
ccache \
clang \

View File

@@ -1,4 +1,5 @@
#include <atomic>
#include <cstdint>
#include <errno.h>
#include <netdb.h>
#include <stdio.h>
@@ -21,78 +22,55 @@
std::atomic<int64_t> transactions;
constexpr int kBaseSearchDepth = 115;
constexpr int kWindowSize = 10000000;
std::string numToKey(int64_t num) {
constexpr int kNumPrefixes = 250000;
std::string makeKey(int64_t num, int suffixLen) {
std::string result;
result.resize(kBaseSearchDepth + sizeof(int64_t));
memset(result.data(), 0, kBaseSearchDepth);
result.resize(sizeof(int64_t) + suffixLen);
int64_t be = __builtin_bswap64(num);
memcpy(result.data() + kBaseSearchDepth, &be, sizeof(int64_t));
memcpy(result.data(), &be, sizeof(int64_t));
memset(result.data() + sizeof(int64_t), 0, suffixLen);
return result;
}
void workload(weaselab::ConflictSet *cs) {
int64_t version = kWindowSize;
cs->addWrites(nullptr, 0, version);
for (int i = 0; i < kNumPrefixes; ++i) {
for (int j = 0; j < 50; ++j) {
weaselab::ConflictSet::WriteRange wr;
auto k = makeKey(i, j);
wr.begin.p = (const uint8_t *)k.data();
wr.begin.len = k.size();
wr.end.len = 0;
cs->addWrites(&wr, 1, version);
}
}
++version;
for (int i = 0; i < kNumPrefixes; ++i) {
weaselab::ConflictSet::WriteRange wr;
auto k = makeKey(i, 50);
wr.begin.p = (const uint8_t *)k.data();
wr.begin.len = k.size();
wr.end.len = 0;
cs->addWrites(&wr, 1, version);
}
std::vector<weaselab::ConflictSet::Result> results(10);
for (;; transactions.fetch_add(1, std::memory_order_relaxed)) {
// Reads
{
auto beginK = numToKey(version - kWindowSize);
auto endK = numToKey(version - 1);
auto pointRv = version - kWindowSize + rand() % kWindowSize + 1;
auto pointK = numToKey(pointRv);
weaselab::ConflictSet::ReadRange reads[] = {
{
{(const uint8_t *)pointK.data(), int(pointK.size())},
{nullptr, 0},
pointRv,
},
{
{(const uint8_t *)beginK.data(), int(beginK.size())},
{(const uint8_t *)endK.data(), int(endK.size())},
version - 2,
},
};
weaselab::ConflictSet::Result result[sizeof(reads) / sizeof(reads[0])];
cs->check(reads, result, sizeof(reads) / sizeof(reads[0]));
// for (int i = 0; i < sizeof(reads) / sizeof(reads[0]); ++i) {
// if (result[i] != weaselab::ConflictSet::Commit) {
// fprintf(stderr, "Unexpected conflict: [%s, %s) @ %" PRId64 "\n",
// printable(reads[i].begin).c_str(),
// printable(reads[i].end).c_str(), reads[i].readVersion);
// abort();
// }
// }
std::vector<std::string> keys(10);
for (auto &k : keys) {
k = makeKey(rand() % kNumPrefixes, 49);
}
// Writes
{
weaselab::ConflictSet::WriteRange w;
auto k = numToKey(version);
w.begin.p = (const uint8_t *)k.data();
w.end.len = 0;
if (version % (kWindowSize / 2) == 0) {
for (int l = 0; l <= k.size(); ++l) {
w.begin.len = l;
cs->addWrites(&w, 1, version);
}
} else {
w.begin.len = k.size();
cs->addWrites(&w, 1, version);
int64_t beginN = version - kWindowSize + rand() % kWindowSize;
auto b = numToKey(beginN);
auto e = numToKey(beginN + 1000);
w.begin.p = (const uint8_t *)b.data();
w.begin.len = b.size();
w.end.p = (const uint8_t *)e.data();
w.end.len = e.size();
cs->addWrites(&w, 1, version);
}
std::vector<weaselab::ConflictSet::ReadRange> reads(10);
for (int i = 0; i < reads.size(); ++i) {
reads[i].begin.p = (const uint8_t *)(keys[i].data());
reads[i].begin.len = keys[i].size();
reads[i].end.len = 0;
reads[i].readVersion = version - 1;
}
// GC
cs->setOldestVersion(version - kWindowSize);
++version;
cs->check(reads.data(), results.data(), 10);
}
}

View File

@@ -5,4 +5,3 @@ set(CMAKE_CXX_COMPILER "/usr/bin/aarch64-linux-gnu-g++")
set(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
set(CMAKE_CROSSCOMPILING_EMULATOR "qemu-aarch64;-L;/usr/aarch64-linux-gnu/")
set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE arm64)
set(LD_EXE "/usr/bin/aarch64-linux-gnu-ld")

8
privatize_symbols_macos.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/bin/bash
# This has the effect of making visibility=hidden symbols private in object files
for obj in "$@" ; do
ld -r "$obj" -o "$obj.tmp"
touch -r "$obj" "$obj.tmp"
mv "$obj.tmp" "$obj"
done

View File

@@ -16,5 +16,4 @@ _ZN8weaselab11ConflictSetaSEOS0_
_ZNK8weaselab11ConflictSet12getMetricsV1EPPNS0_9MetricsV1EPi
_ZNK8weaselab11ConflictSet5checkEPKNS0_9ReadRangeEPNS0_6ResultEi
_ZNK8weaselab11ConflictSet8getBytesEv
_ZNK8weaselab11ConflictSet9MetricsV18getValueEv
___asan_globals_registered
_ZNK8weaselab11ConflictSet9MetricsV18getValueEv

View File

@@ -4,7 +4,7 @@ set -euo pipefail
# ./test_symbols.sh <library> <expected exported symbols file> <allowed imported symbols file>
diff -u <(sort < "$2" | grep -v ___asan_globals_registered) <(nm "$1" | grep " T " | cut -f3 -d " " | sort)
diff -u <(sort < "$2") <(nm "$1" | grep " T " | cut -f3 -d " " | sort)
ec=0
for symbol in $(nm "$1" | grep " U " | sed 's/ U //') ; do
if ! grep --fixed-strings "$symbol" "$3" > /dev/null ; then