7 Commits

Author SHA1 Message Date
0abf6a1ecf Improve Node3 search codegen
All checks were successful
Tests / Clang total: 3244, passed: 3244
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / 64 bit versions total: 3244, passed: 3244
Tests / Debug total: 3242, passed: 3242
Tests / SIMD fallback total: 3244, passed: 3244
Tests / Release [gcc] total: 3244, passed: 3244
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 2419, passed: 2419
Tests / Coverage total: 2437, passed: 2437
Code Coverage #### Project Overview No changes detected, that affect the code coverage. * Line Coverage: 99.23% (1807/1821) * Branch Coverage: 68.22% (1434/2102) * Complexity Density: 0.00 * Lines of Code: 1821 #### Quality Gates Summary Output truncated.
weaselab/conflict-set/pipeline/head This commit looks good
2024-09-12 17:12:41 -07:00
867136ff1b Return pointer to next function 2024-09-09 21:59:49 -07:00
4b8f7320d3 Call function pointer in job 2024-09-09 21:00:31 -07:00
6628092384 Tinker with interleaveBoundedCyclicList 2024-09-09 20:25:40 -07:00
a0a4f1afea Only compile nanobench once 2024-09-09 20:10:55 -07:00
ca479c03ce Induce a cache miss in interleaving test 2024-09-09 17:55:35 -07:00
0a2e133ab9 Add InterleavingTest to explore #5 2024-09-09 17:27:58 -07:00
5 changed files with 191 additions and 3 deletions

View File

@@ -7,7 +7,6 @@
void showMemory(const ConflictSet &cs);
#endif
#define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h"
constexpr int kNumKeys = 1000000;

View File

@@ -138,6 +138,8 @@ include(CTest)
# disable tests if this is being used through e.g. FetchContent
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
add_library(nanobench ${CMAKE_CURRENT_SOURCE_DIR}/nanobench.cpp)
set(TEST_FLAGS -Wall -Wextra -Wunreachable-code -Wpedantic -UNDEBUG)
# corpus tests, which are tests curated by libfuzzer. The goal is to get broad
@@ -185,6 +187,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
target_include_directories(conflict_set_main
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_compile_definitions(conflict_set_main PRIVATE ENABLE_MAIN)
target_link_libraries(conflict_set_main PRIVATE nanobench)
if(NOT APPLE)
# libfuzzer target, to generate/manage corpus
@@ -330,7 +333,7 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
# bench
add_executable(conflict_set_bench Bench.cpp)
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME})
target_link_libraries(conflict_set_bench PRIVATE ${PROJECT_NAME} nanobench)
set_target_properties(conflict_set_bench PROPERTIES SKIP_BUILD_RPATH ON)
add_executable(real_data_bench RealDataBench.cpp)
target_link_libraries(real_data_bench PRIVATE ${PROJECT_NAME})
@@ -345,6 +348,9 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
add_executable(server_bench ServerBench.cpp)
target_link_libraries(server_bench PRIVATE ${PROJECT_NAME})
set_target_properties(server_bench PROPERTIES SKIP_BUILD_RPATH ON)
add_executable(interleaving_test InterleavingTest.cpp)
target_link_libraries(interleaving_test PRIVATE nanobench)
endif()
# packaging

View File

@@ -766,6 +766,8 @@ private:
int getNodeIndex(Node3 *self, uint8_t index) {
Node3 *n = (Node3 *)self;
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] == index) {
return i;
@@ -1070,6 +1072,8 @@ ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
Node *getChildGeq(Node0 *, int) { return nullptr; }
Node *getChildGeq(Node3 *n, int child) {
assume(n->numChildren >= 1);
assume(n->numChildren <= 3);
for (int i = 0; i < n->numChildren; ++i) {
if (n->index[i] >= child) {
return n->children[i];
@@ -3923,7 +3927,6 @@ struct __attribute__((visibility("default"))) PeakPrinter {
#ifdef ENABLE_MAIN
#define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h"
template <int kN> void benchRezero() {

178
InterleavingTest.cpp Normal file
View File

@@ -0,0 +1,178 @@
#include <alloca.h>
#include <cassert>
#ifdef __x86_64__
#include <immintrin.h>
#endif
#include "third_party/nanobench.h"
struct Job {
int *input;
// Returned void* is a function pointer to the next continuation. We have to
// use void* because otherwise the type would be recursive.
typedef void *(*continuation)(Job *);
continuation next;
};
void *stepJob(Job *j) {
auto done = --(*j->input) == 0;
#ifdef __x86_64__
_mm_clflush(j->input);
#endif
return done ? nullptr : (void *)stepJob;
}
// So we can look at the disassembly more easily
extern "C" {
void sequential(Job **jobs, int count) {
for (int i = 0; i < count; ++i) {
do {
jobs[i]->next = (Job::continuation)jobs[i]->next(jobs[i]);
} while (jobs[i]->next);
}
}
void sequentialNoFuncPtr(Job **jobs, int count) {
for (int i = 0; i < count; ++i) {
while (stepJob(jobs[i]))
;
}
}
void interleaveSwapping(Job **jobs, int remaining) {
int current = 0;
while (remaining > 0) {
auto next = (Job::continuation)jobs[current]->next(jobs[current]);
jobs[current]->next = next;
if (next == nullptr) {
jobs[current] = jobs[remaining - 1];
--remaining;
} else {
++current;
}
if (current == remaining) {
current = 0;
}
}
}
void interleaveBoundedCyclicList(Job **jobs, int count) {
if (count == 0) {
return;
}
constexpr int kConcurrent = 32;
Job *inProgress[kConcurrent];
int nextJob[kConcurrent];
int started = std::min(kConcurrent, count);
for (int i = 0; i < started; i++) {
inProgress[i] = jobs[i];
nextJob[i] = i + 1;
}
nextJob[started - 1] = 0;
int prevJob = started - 1;
int job = 0;
for (;;) {
auto next = (Job::continuation)inProgress[job]->next(inProgress[job]);
inProgress[job]->next = next;
if (next == nullptr) {
if (started == count) {
if (prevJob == job)
break;
nextJob[prevJob] = nextJob[job];
job = prevJob;
} else {
int temp = started++;
inProgress[job] = jobs[temp];
}
}
prevJob = job;
job = nextJob[job];
}
}
void interleaveCyclicList(Job **jobs, int count) {
auto *nextJob = (int *)alloca(sizeof(int) * count);
for (int i = 0; i < count - 1; ++i) {
nextJob[i] = i + 1;
}
nextJob[count - 1] = 0;
int prevJob = count - 1;
int job = 0;
for (;;) {
auto next = (Job::continuation)jobs[job]->next(jobs[job]);
jobs[job]->next = next;
if (next == nullptr) {
if (prevJob == job)
break;
nextJob[prevJob] = nextJob[job];
job = prevJob;
}
prevJob = job;
job = nextJob[job];
}
}
}
int main() {
ankerl::nanobench::Bench bench;
constexpr int kNumJobs = 100;
bench.relative(true);
Job jobs[kNumJobs];
Job jobsCopy[kNumJobs];
int iters = 0;
int originalInput[kNumJobs];
for (int i = 0; i < kNumJobs; ++i) {
originalInput[i] = rand() % 5 + 3;
jobs[i].input = new int{originalInput[i]};
jobs[i].next = stepJob;
iters += *jobs[i].input;
}
bench.batch(iters);
for (auto [scheduler, name] :
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
std::make_pair(sequential, "sequential"),
std::make_pair(interleaveSwapping, "interleavingSwapping"),
std::make_pair(interleaveBoundedCyclicList,
"interleaveBoundedCyclicList"),
std::make_pair(interleaveCyclicList, "interleaveCyclicList")}) {
for (int i = 0; i < kNumJobs; ++i) {
*jobs[i].input = originalInput[i];
}
memcpy(jobsCopy, jobs, sizeof(jobs));
Job *ps[kNumJobs];
for (int i = 0; i < kNumJobs; ++i) {
ps[i] = jobsCopy + i;
}
scheduler(ps, kNumJobs);
for (int i = 0; i < kNumJobs; ++i) {
if (*jobsCopy[i].input != 0) {
fprintf(stderr, "%s failed\n", name);
abort();
}
}
bench.run(name, [&]() {
for (int i = 0; i < kNumJobs; ++i) {
*jobs[i].input = originalInput[i];
}
memcpy(jobsCopy, jobs, sizeof(jobs));
Job *ps[kNumJobs];
for (int i = 0; i < kNumJobs; ++i) {
ps[i] = jobsCopy + i;
}
scheduler(ps, kNumJobs);
});
}
for (int i = 0; i < kNumJobs; ++i) {
delete jobs[i].input;
}
}

2
nanobench.cpp Normal file
View File

@@ -0,0 +1,2 @@
#define ANKERL_NANOBENCH_IMPLEMENT
#include "third_party/nanobench.h"