#include #include "arena_allocator.hpp" #include "metric.hpp" #include #include #include #include #include #include // High-contention benchmark setup struct ContentionEnvironment { // Background threads for contention std::vector background_threads; std::atomic stop_flag{false}; // Synchronization latches - must be members to avoid use-after-return std::unique_ptr contention_latch; std::unique_ptr render_latch; // Metrics for testing metric::Family counter_family; metric::Family gauge_family; metric::Family histogram_family; // Test instances metric::Counter counter; metric::Gauge gauge; metric::Histogram histogram; ContentionEnvironment() : counter_family( metric::create_counter("bench_counter", "Benchmark counter")), gauge_family(metric::create_gauge("bench_gauge", "Benchmark gauge")), histogram_family( metric::create_histogram("bench_histogram", "Benchmark histogram", // 7 explicit buckets + automatic +Inf = 8 // total (optimal for SIMD: 2x4 buckets) std::initializer_list{ 0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0})), counter(counter_family.create({{"benchmark", "contention"}})), gauge(gauge_family.create({{"benchmark", "contention"}})), histogram(histogram_family.create({{"benchmark", "contention"}})) {} void start_background_contention(int num_threads = 4) { stop_flag.store(false); contention_latch = std::make_unique(num_threads + 1); for (int i = 0; i < num_threads; ++i) { background_threads.emplace_back([this, i]() { // Each background thread creates its own metrics to avoid conflicts auto bg_counter = counter_family.create({{"thread", std::to_string(i)}}); auto bg_gauge = gauge_family.create({{"bg_thread", std::to_string(i)}}); auto bg_histogram = histogram_family.create({{"bg_thread", std::to_string(i)}}); std::mt19937 rng(i); std::uniform_real_distribution dist(0.0, 10.0); contention_latch ->arrive_and_wait(); // All background threads start together while (!stop_flag.load(std::memory_order_relaxed)) { // Simulate mixed workload bg_counter.inc(1.0); bg_gauge.set(dist(rng)); bg_histogram.observe(dist(rng)); } }); } contention_latch ->arrive_and_wait(); // Wait for all background threads to be ready } void start_render_thread() { render_latch = std::make_unique(2); background_threads.emplace_back([this]() { ArenaAllocator arena; render_latch->arrive_and_wait(); // Render thread signals it's ready while (!stop_flag.load(std::memory_order_relaxed)) { auto output = metric::render(arena); static_cast(output); // Suppress unused variable warning arena.reset(); } }); render_latch->arrive_and_wait(); // Wait for render thread to be ready } void stop_background_threads() { stop_flag.store(true); for (auto &t : background_threads) { if (t.joinable()) { t.join(); } } background_threads.clear(); } ~ContentionEnvironment() { stop_background_threads(); } }; int main() { ankerl::nanobench::Bench bench; bench.title("WeaselDB Metrics Performance").unit("operation").warmup(1000); // Baseline performance without contention { auto counter_family = metric::create_counter("baseline_counter", "Baseline counter"); auto counter = counter_family.create({{"type", "baseline"}}); bench.run("counter.inc() - no contention", [&]() { counter.inc(1.0); ankerl::nanobench::doNotOptimizeAway(counter); }); auto gauge_family = metric::create_gauge("baseline_gauge", "Baseline gauge"); auto gauge = gauge_family.create({{"type", "baseline"}}); bench.run("gauge.inc() - no contention", [&]() { gauge.inc(1.0); ankerl::nanobench::doNotOptimizeAway(gauge); }); bench.run("gauge.set() - no contention", [&]() { gauge.set(42.0); ankerl::nanobench::doNotOptimizeAway(gauge); }); auto histogram_family = metric::create_histogram( "baseline_histogram", "Baseline histogram", std::initializer_list{0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0}); auto histogram = histogram_family.create({{"type", "baseline"}}); bench.run("histogram.observe() - no contention", [&]() { histogram.observe(0.5); ankerl::nanobench::doNotOptimizeAway(histogram); }); } // High contention with background threads { ContentionEnvironment env; // Start background threads creating contention env.start_background_contention(8); bench.run("counter.inc() - 8 background threads", [&]() { env.counter.inc(1.0); ankerl::nanobench::doNotOptimizeAway(env.counter); }); bench.run("gauge.inc() - 8 background threads", [&]() { env.gauge.inc(1.0); ankerl::nanobench::doNotOptimizeAway(env.gauge); }); bench.run("gauge.set() - 8 background threads", [&]() { env.gauge.set(42.0); ankerl::nanobench::doNotOptimizeAway(env.gauge); }); bench.run("histogram.observe() - 8 background threads", [&]() { env.histogram.observe(1.5); ankerl::nanobench::doNotOptimizeAway(env.histogram); }); } // Concurrent render contention { ContentionEnvironment env; // Start background threads + render thread env.start_background_contention(4); env.start_render_thread(); bench.run("counter.inc() - with concurrent render", [&]() { env.counter.inc(1.0); ankerl::nanobench::doNotOptimizeAway(env.counter); }); bench.run("gauge.inc() - with concurrent render", [&]() { env.gauge.inc(1.0); ankerl::nanobench::doNotOptimizeAway(env.gauge); }); bench.run("histogram.observe() - with concurrent render", [&]() { env.histogram.observe(2.0); ankerl::nanobench::doNotOptimizeAway(env.histogram); }); } // Shared gauge contention { // Test the multi-writer CAS behavior of gauges when multiple threads // create gauges with the same labels. They will all point to the same // underlying state, causing high contention. auto gauge_family = metric::create_gauge("shared_gauge", "Shared gauge test"); std::atomic stop_shared{false}; std::vector shared_threads; std::latch start_latch{ 9}; // Force threads to start concurrently (8 background + 1 benchmark) for (int i = 0; i < 8; ++i) { shared_threads.emplace_back( [&gauge_family, &stop_shared, &start_latch]() { auto gauge = gauge_family.create({{"shared", "true"}}); start_latch.arrive_and_wait(); // All threads start together while (!stop_shared.load(std::memory_order_relaxed)) { gauge.inc(1.0); } }); } auto gauge_for_benchmark = gauge_family.create({{"shared", "true"}}); start_latch .arrive_and_wait(); // Benchmark thread waits for all background threads bench.run("gauge.inc() - 8 threads, same labels (contention)", [&]() { gauge_for_benchmark.inc(1.0); ankerl::nanobench::doNotOptimizeAway(gauge_for_benchmark); }); stop_shared.store(true); for (auto &t : shared_threads) { t.join(); } } // Render performance scaling { // Test render performance as number of metrics increases std::vector counters; std::vector gauges; std::vector histograms; auto counter_family = metric::create_counter("scale_counter", "Scale counter"); auto gauge_family = metric::create_gauge("scale_gauge", "Scale gauge"); auto histogram_family = metric::create_histogram( "scale_histogram", "Scale histogram", std::initializer_list{0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0}); // Create varying numbers of metrics for (int scale : {10, 100, 1000}) { // Clear previous metrics by creating new families // (Note: In real usage, metrics persist for application lifetime) for (int i = 0; i < scale; ++i) { counters.emplace_back( counter_family.create({{"id", std::to_string(i)}})); gauges.emplace_back(gauge_family.create({{"id", std::to_string(i)}})); histograms.emplace_back( histogram_family.create({{"id", std::to_string(i)}})); // Set some values counters.back().inc(static_cast(i)); gauges.back().set(static_cast(i * 2)); histograms.back().observe(static_cast(i) * 0.1); } ArenaAllocator arena; std::string bench_name = "render() - " + std::to_string(scale) + " metrics each type"; bench.run(bench_name, [&]() { auto output = metric::render(arena); ankerl::nanobench::doNotOptimizeAway(output); arena.reset(); }); } } // Callback metrics performance { auto counter_family = metric::create_counter("callback_counter", "Callback counter"); auto gauge_family = metric::create_gauge("callback_gauge", "Callback gauge"); std::atomic counter_value{0}; std::atomic gauge_value{100}; // Register callbacks counter_family.register_callback( {{"type", "callback"}}, [&counter_value]() { return counter_value.load(std::memory_order_relaxed); }); gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() { return gauge_value.load(std::memory_order_relaxed); }); // Background thread updating callback values std::atomic stop_callback{false}; std::latch start_latch{2}; // Background thread + benchmark thread std::thread callback_updater([&]() { start_latch.arrive_and_wait(); // Wait for benchmark to start while (!stop_callback.load()) { counter_value.fetch_add(1); gauge_value.store(gauge_value.load() + 1); } }); ArenaAllocator arena; start_latch.arrive_and_wait(); // Wait for background thread to be ready bench.run("render() - with callback metrics", [&]() { auto output = metric::render(arena); ankerl::nanobench::doNotOptimizeAway(output); arena.reset(); }); stop_callback.store(true); callback_updater.join(); } return 0; }