diff --git a/benchmarks/bench_metric.cpp b/benchmarks/bench_metric.cpp index 85c6a1c..d424719 100644 --- a/benchmarks/bench_metric.cpp +++ b/benchmarks/bench_metric.cpp @@ -4,7 +4,6 @@ #include "metric.hpp" #include -#include #include #include #include @@ -17,6 +16,10 @@ struct ContentionEnvironment { std::vector background_threads; std::atomic stop_flag{false}; + // Synchronization latches - must be members to avoid use-after-return + std::unique_ptr contention_latch; + std::unique_ptr render_latch; + // Metrics for testing metric::Family counter_family; metric::Family gauge_family; @@ -40,6 +43,7 @@ struct ContentionEnvironment { void start_background_contention(int num_threads = 4) { stop_flag.store(false); + contention_latch = std::make_unique(num_threads + 1); for (int i = 0; i < num_threads; ++i) { background_threads.emplace_back([this, i]() { @@ -53,6 +57,9 @@ struct ContentionEnvironment { std::mt19937 rng(i); std::uniform_real_distribution dist(0.0, 10.0); + contention_latch + ->arrive_and_wait(); // All background threads start together + while (!stop_flag.load(std::memory_order_relaxed)) { // Simulate mixed workload bg_counter.inc(1.0); @@ -61,18 +68,27 @@ struct ContentionEnvironment { } }); } + + contention_latch + ->arrive_and_wait(); // Wait for all background threads to be ready } void start_render_thread() { + render_latch = std::make_unique(2); + background_threads.emplace_back([this]() { ArenaAllocator arena; + render_latch->arrive_and_wait(); // Render thread signals it's ready + while (!stop_flag.load(std::memory_order_relaxed)) { auto output = metric::render(arena); static_cast(output); // Suppress unused variable warning arena.reset(); } }); + + render_latch->arrive_and_wait(); // Wait for render thread to be ready } void stop_background_threads() { @@ -190,17 +206,23 @@ int main() { std::atomic stop_shared{false}; std::vector shared_threads; + std::latch start_latch{ + 9}; // Force threads to start concurrently (8 background + 1 benchmark) for (int i = 0; i < 8; ++i) { - shared_threads.emplace_back([&gauge_family, &stop_shared]() { - auto gauge = gauge_family.create({{"shared", "true"}}); - while (!stop_shared.load(std::memory_order_relaxed)) { - gauge.inc(1.0); - } - }); + shared_threads.emplace_back( + [&gauge_family, &stop_shared, &start_latch]() { + auto gauge = gauge_family.create({{"shared", "true"}}); + start_latch.arrive_and_wait(); // All threads start together + while (!stop_shared.load(std::memory_order_relaxed)) { + gauge.inc(1.0); + } + }); } auto gauge_for_benchmark = gauge_family.create({{"shared", "true"}}); + start_latch + .arrive_and_wait(); // Benchmark thread waits for all background threads bench.run("gauge.inc() - 8 threads, same labels (contention)", [&]() { gauge_for_benchmark.inc(1.0); ankerl::nanobench::doNotOptimizeAway(gauge_for_benchmark); @@ -277,7 +299,10 @@ int main() { // Background thread updating callback values std::atomic stop_callback{false}; + std::latch start_latch{2}; // Background thread + benchmark thread + std::thread callback_updater([&]() { + start_latch.arrive_and_wait(); // Wait for benchmark to start while (!stop_callback.load()) { counter_value.fetch_add(1); gauge_value.store(gauge_value.load() + 1); @@ -286,6 +311,7 @@ int main() { ArenaAllocator arena; + start_latch.arrive_and_wait(); // Wait for background thread to be ready bench.run("render() - with callback metrics", [&]() { auto output = metric::render(arena); ankerl::nanobench::doNotOptimizeAway(output); diff --git a/style.md b/style.md index f2da1e0..689ab94 100644 --- a/style.md +++ b/style.md @@ -568,6 +568,24 @@ TEST_CASE("Server accepts connections") { - `std::latch`, `std::barrier`, futures/promises - **Force concurrent execution** using `std::latch` to synchronize thread startup +#### Threading Checklist for Tests/Benchmarks + +**MANDATORY: Before writing any `std::thread` or `threads.emplace_back()`:** + +1. **Count total threads** - Include main/benchmark thread in count +2. **Always assume concurrent execution needed** - Tests/benchmarks require real concurrency +3. **Add `std::latch start_latch{N}`** where N = total concurrent threads +4. **Each thread calls `start_latch.arrive_and_wait()`** before doing work +5. **Main/benchmark thread calls `start_latch.arrive_and_wait()`** before measurement + +**Red flags to catch immediately:** +- ❌ Creating threads in a loop without `std::latch` +- ❌ Background threads starting work immediately +- ❌ Benchmark measuring before all threads synchronized +- ❌ Any use of `sleep_for`, `wait_for`, or timeouts + +**Simple rule:** Multiple threads = `std::latch` synchronization. No exceptions, even for "simple" background threads. + ```cpp // BAD: Race likely over before threads start std::atomic counter{0};