Use std::latch sync in benchmarks too
This commit is contained in:
@@ -4,7 +4,6 @@
|
||||
#include "metric.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <latch>
|
||||
#include <random>
|
||||
@@ -17,6 +16,10 @@ struct ContentionEnvironment {
|
||||
std::vector<std::thread> background_threads;
|
||||
std::atomic<bool> stop_flag{false};
|
||||
|
||||
// Synchronization latches - must be members to avoid use-after-return
|
||||
std::unique_ptr<std::latch> contention_latch;
|
||||
std::unique_ptr<std::latch> render_latch;
|
||||
|
||||
// Metrics for testing
|
||||
metric::Family<metric::Counter> counter_family;
|
||||
metric::Family<metric::Gauge> gauge_family;
|
||||
@@ -40,6 +43,7 @@ struct ContentionEnvironment {
|
||||
|
||||
void start_background_contention(int num_threads = 4) {
|
||||
stop_flag.store(false);
|
||||
contention_latch = std::make_unique<std::latch>(num_threads + 1);
|
||||
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
background_threads.emplace_back([this, i]() {
|
||||
@@ -53,6 +57,9 @@ struct ContentionEnvironment {
|
||||
std::mt19937 rng(i);
|
||||
std::uniform_real_distribution<double> dist(0.0, 10.0);
|
||||
|
||||
contention_latch
|
||||
->arrive_and_wait(); // All background threads start together
|
||||
|
||||
while (!stop_flag.load(std::memory_order_relaxed)) {
|
||||
// Simulate mixed workload
|
||||
bg_counter.inc(1.0);
|
||||
@@ -61,18 +68,27 @@ struct ContentionEnvironment {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
contention_latch
|
||||
->arrive_and_wait(); // Wait for all background threads to be ready
|
||||
}
|
||||
|
||||
void start_render_thread() {
|
||||
render_latch = std::make_unique<std::latch>(2);
|
||||
|
||||
background_threads.emplace_back([this]() {
|
||||
ArenaAllocator arena;
|
||||
|
||||
render_latch->arrive_and_wait(); // Render thread signals it's ready
|
||||
|
||||
while (!stop_flag.load(std::memory_order_relaxed)) {
|
||||
auto output = metric::render(arena);
|
||||
static_cast<void>(output); // Suppress unused variable warning
|
||||
arena.reset();
|
||||
}
|
||||
});
|
||||
|
||||
render_latch->arrive_and_wait(); // Wait for render thread to be ready
|
||||
}
|
||||
|
||||
void stop_background_threads() {
|
||||
@@ -190,17 +206,23 @@ int main() {
|
||||
|
||||
std::atomic<bool> stop_shared{false};
|
||||
std::vector<std::thread> shared_threads;
|
||||
std::latch start_latch{
|
||||
9}; // Force threads to start concurrently (8 background + 1 benchmark)
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
shared_threads.emplace_back([&gauge_family, &stop_shared]() {
|
||||
auto gauge = gauge_family.create({{"shared", "true"}});
|
||||
while (!stop_shared.load(std::memory_order_relaxed)) {
|
||||
gauge.inc(1.0);
|
||||
}
|
||||
});
|
||||
shared_threads.emplace_back(
|
||||
[&gauge_family, &stop_shared, &start_latch]() {
|
||||
auto gauge = gauge_family.create({{"shared", "true"}});
|
||||
start_latch.arrive_and_wait(); // All threads start together
|
||||
while (!stop_shared.load(std::memory_order_relaxed)) {
|
||||
gauge.inc(1.0);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
auto gauge_for_benchmark = gauge_family.create({{"shared", "true"}});
|
||||
start_latch
|
||||
.arrive_and_wait(); // Benchmark thread waits for all background threads
|
||||
bench.run("gauge.inc() - 8 threads, same labels (contention)", [&]() {
|
||||
gauge_for_benchmark.inc(1.0);
|
||||
ankerl::nanobench::doNotOptimizeAway(gauge_for_benchmark);
|
||||
@@ -277,7 +299,10 @@ int main() {
|
||||
|
||||
// Background thread updating callback values
|
||||
std::atomic<bool> stop_callback{false};
|
||||
std::latch start_latch{2}; // Background thread + benchmark thread
|
||||
|
||||
std::thread callback_updater([&]() {
|
||||
start_latch.arrive_and_wait(); // Wait for benchmark to start
|
||||
while (!stop_callback.load()) {
|
||||
counter_value.fetch_add(1);
|
||||
gauge_value.store(gauge_value.load() + 1);
|
||||
@@ -286,6 +311,7 @@ int main() {
|
||||
|
||||
ArenaAllocator arena;
|
||||
|
||||
start_latch.arrive_and_wait(); // Wait for background thread to be ready
|
||||
bench.run("render() - with callback metrics", [&]() {
|
||||
auto output = metric::render(arena);
|
||||
ankerl::nanobench::doNotOptimizeAway(output);
|
||||
|
||||
18
style.md
18
style.md
@@ -568,6 +568,24 @@ TEST_CASE("Server accepts connections") {
|
||||
- `std::latch`, `std::barrier`, futures/promises
|
||||
- **Force concurrent execution** using `std::latch` to synchronize thread startup
|
||||
|
||||
#### Threading Checklist for Tests/Benchmarks
|
||||
|
||||
**MANDATORY: Before writing any `std::thread` or `threads.emplace_back()`:**
|
||||
|
||||
1. **Count total threads** - Include main/benchmark thread in count
|
||||
2. **Always assume concurrent execution needed** - Tests/benchmarks require real concurrency
|
||||
3. **Add `std::latch start_latch{N}`** where N = total concurrent threads
|
||||
4. **Each thread calls `start_latch.arrive_and_wait()`** before doing work
|
||||
5. **Main/benchmark thread calls `start_latch.arrive_and_wait()`** before measurement
|
||||
|
||||
**Red flags to catch immediately:**
|
||||
- ❌ Creating threads in a loop without `std::latch`
|
||||
- ❌ Background threads starting work immediately
|
||||
- ❌ Benchmark measuring before all threads synchronized
|
||||
- ❌ Any use of `sleep_for`, `wait_for`, or timeouts
|
||||
|
||||
**Simple rule:** Multiple threads = `std::latch` synchronization. No exceptions, even for "simple" background threads.
|
||||
|
||||
```cpp
|
||||
// BAD: Race likely over before threads start
|
||||
std::atomic<int> counter{0};
|
||||
|
||||
Reference in New Issue
Block a user