Add initial thread pipeline benchmark
This commit is contained in:
@@ -212,6 +212,10 @@ target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
|
|||||||
target_include_directories(bench_parser_comparison
|
target_include_directories(bench_parser_comparison
|
||||||
PRIVATE src ${rapidjson_SOURCE_DIR}/include)
|
PRIVATE src ${rapidjson_SOURCE_DIR}/include)
|
||||||
|
|
||||||
|
add_executable(bench_thread_pipeline benchmarks/bench_thread_pipeline.cpp)
|
||||||
|
target_link_libraries(bench_thread_pipeline nanobench Threads::Threads)
|
||||||
|
target_include_directories(bench_thread_pipeline PRIVATE src)
|
||||||
|
|
||||||
# Debug tools
|
# Debug tools
|
||||||
add_executable(
|
add_executable(
|
||||||
debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp
|
debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp
|
||||||
@@ -232,3 +236,4 @@ add_test(NAME server_connection_return_tests
|
|||||||
add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)
|
add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)
|
||||||
add_test(NAME commit_request_benchmarks COMMAND bench_commit_request)
|
add_test(NAME commit_request_benchmarks COMMAND bench_commit_request)
|
||||||
add_test(NAME parser_comparison_benchmarks COMMAND bench_parser_comparison)
|
add_test(NAME parser_comparison_benchmarks COMMAND bench_parser_comparison)
|
||||||
|
add_test(NAME thread_pipeline_benchmarks COMMAND bench_thread_pipeline)
|
||||||
|
|||||||
93
benchmarks/bench_thread_pipeline.cpp
Normal file
93
benchmarks/bench_thread_pipeline.cpp
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
#include "thread_pipeline.hpp"
|
||||||
|
|
||||||
|
#include <latch>
|
||||||
|
#include <nanobench.h>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
{
|
||||||
|
constexpr int LOG_PIPELINE_SIZE = 10; // 2^10 = 1024 slots
|
||||||
|
constexpr int NUM_ITEMS = 100'000;
|
||||||
|
constexpr int BATCH_SIZE = 16;
|
||||||
|
constexpr int BUSY_ITERS = 100;
|
||||||
|
|
||||||
|
auto bench = ankerl::nanobench::Bench()
|
||||||
|
.title("Pipeline Throughput")
|
||||||
|
.unit("item")
|
||||||
|
.batch(NUM_ITEMS)
|
||||||
|
.relative(true)
|
||||||
|
.warmup(100);
|
||||||
|
bench.run("Zero stage pipeline", [&] {
|
||||||
|
for (int i = 0; i < NUM_ITEMS; ++i) {
|
||||||
|
for (volatile int i = 0; i < BUSY_ITERS; i = i + 1) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
std::vector<int> threads_per_stage = {1};
|
||||||
|
ThreadPipeline<std::latch *> pipeline(LOG_PIPELINE_SIZE, threads_per_stage);
|
||||||
|
|
||||||
|
std::latch done{0};
|
||||||
|
|
||||||
|
// Stage 0 consumer thread
|
||||||
|
std::thread stage0_thread([&pipeline, &done]() {
|
||||||
|
const int stage = 0;
|
||||||
|
const int thread_id = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
auto guard = pipeline.acquire(stage, thread_id);
|
||||||
|
|
||||||
|
for (auto &item : guard.batch) {
|
||||||
|
for (volatile int i = 0; i < BUSY_ITERS; i = i + 1) {
|
||||||
|
}
|
||||||
|
if (item == &done) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (item) {
|
||||||
|
item->count_down();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
bench.run("One stage pipeline", [&] {
|
||||||
|
// Producer (main thread)
|
||||||
|
int items_pushed = 0;
|
||||||
|
while (items_pushed < NUM_ITEMS - 1) {
|
||||||
|
auto guard = pipeline.push(
|
||||||
|
std::min(NUM_ITEMS - 1 - items_pushed, BATCH_SIZE), true);
|
||||||
|
|
||||||
|
auto it = guard.batch.begin();
|
||||||
|
items_pushed += guard.batch.size();
|
||||||
|
for (size_t i = 0; i < guard.batch.size(); ++i, ++it) {
|
||||||
|
*it = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::latch finish{1};
|
||||||
|
{
|
||||||
|
auto guard = pipeline.push(1, true);
|
||||||
|
*guard.batch.begin() = &finish;
|
||||||
|
}
|
||||||
|
finish.wait();
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
auto guard = pipeline.push(1, true);
|
||||||
|
*guard.batch.begin() = &done;
|
||||||
|
}
|
||||||
|
|
||||||
|
stage0_thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Add more benchmarks for:
|
||||||
|
// - Multi-stage pipelines (3+ stages)
|
||||||
|
// - Multiple threads per stage
|
||||||
|
// - Different batch sizes
|
||||||
|
// - Pipeline contention under load
|
||||||
|
// - Memory usage patterns
|
||||||
|
// - Latency measurements
|
||||||
|
// - Different wait strategies
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user