diff --git a/src/metric.cpp b/src/metric.cpp index a2dab1b..287f957 100644 --- a/src/metric.cpp +++ b/src/metric.cpp @@ -306,16 +306,15 @@ struct Gauge::State { // Histogram: Thread-local buckets, single writer, mutex protection per thread, // per histogram struct Histogram::State { - ArenaVector thresholds; // Bucket boundaries (sorted, deduplicated, - // sizes never change) - ArenaVector counts; // Count per bucket - double sum; // Sum of observations - uint64_t observations; // Total observation count + std::span thresholds; // Bucket boundaries (sorted, + // deduplicated, sizes never change) + std::span counts; // Count per bucket + double sum; // Sum of observations + uint64_t observations; // Total observation count std::mutex mutex; // Per-thread, per-histogram mutex for consistent reads/writes - State(ArenaAllocator &arena) - : thresholds(&arena), counts(&arena), sum(0.0), observations(0) {} + State() : sum(0.0), observations(0) {} friend struct Metric; }; @@ -428,20 +427,10 @@ struct Metric { // Acquire lock to get consistent snapshot std::lock_guard lock(instance->mutex); - // Ensure global accumulator exists + // Global accumulator should have been created when we made the + // histogram auto &global_state = family->global_accumulated_values[labels_key]; - if (!global_state) { - global_state = get_global_arena().construct( - get_global_arena()); - // Copy thresholds from instance - for (size_t i = 0; i < instance->thresholds.size(); ++i) { - global_state->thresholds.push_back(instance->thresholds[i]); - } - // Initialize counts with zeros - for (size_t i = 0; i < instance->counts.size(); ++i) { - global_state->counts.push_back(0); - } - } + assert(global_state); // Accumulate bucket counts (mutex already held) for (size_t i = 0; i < instance->counts.size(); ++i) { @@ -560,33 +549,43 @@ struct Metric { auto &ptr = family->p->per_thread_state[thread_id].instances[key]; if (!ptr) { - ptr = get_thread_local_arena().construct( - get_thread_local_arena()); + ptr = get_thread_local_arena().construct(); // DESIGN: Prometheus-compatible histogram buckets // Use buckets from family configuration - for (size_t i = 0; i < family->p->buckets.size(); ++i) { - ptr->thresholds.push_back(family->p->buckets[i]); - } + size_t bucket_count = family->p->buckets.size(); + double *thresholds_data = + get_thread_local_arena().allocate(bucket_count); + uint64_t *counts_data = + get_thread_local_arena().allocate(bucket_count); - // Initialize with zero values, mutex protects all operations - for (size_t i = 0; i < ptr->thresholds.size(); ++i) { - ptr->counts.push_back(0); - } + // Copy thresholds and initialize counts + std::memcpy(thresholds_data, family->p->buckets.data(), + bucket_count * sizeof(double)); + std::memset(counts_data, 0, bucket_count * sizeof(uint64_t)); + + ptr->thresholds = std::span(thresholds_data, bucket_count); + ptr->counts = std::span(counts_data, bucket_count); // Ensure global accumulator exists for this label set auto &global_state = family->p->global_accumulated_values[key]; if (!global_state) { - global_state = - get_global_arena().construct(get_global_arena()); - // Copy thresholds - for (size_t i = 0; i < ptr->thresholds.size(); ++i) { - global_state->thresholds.push_back(ptr->thresholds[i]); - } - // Initialize counts with zeros - for (size_t i = 0; i < ptr->thresholds.size(); ++i) { - global_state->counts.push_back(0); - } + global_state = get_global_arena().construct(); + + // Allocate and copy thresholds, initialize counts + double *global_thresholds_data = + get_global_arena().allocate(bucket_count); + uint64_t *global_counts_data = + get_global_arena().allocate(bucket_count); + + std::memcpy(global_thresholds_data, ptr->thresholds.data(), + bucket_count * sizeof(double)); + std::memset(global_counts_data, 0, bucket_count * sizeof(uint64_t)); + + global_state->thresholds = + std::span(global_thresholds_data, bucket_count); + global_state->counts = + std::span(global_counts_data, bucket_count); } } Histogram result; @@ -804,8 +803,8 @@ Histogram::Histogram() = default; // AVX-optimized implementation for high performance __attribute__((target("avx"))) static void -update_histogram_buckets_simd(const ArenaVector &thresholds, - ArenaVector &counts, double x, +update_histogram_buckets_simd(std::span thresholds, + std::span counts, double x, size_t start_idx) { const size_t size = thresholds.size(); size_t i = start_idx; @@ -1131,17 +1130,17 @@ compute_metric_values(ArenaAllocator &arena, // Compute histogram values - ITERATION ORDER MUST MATCH FORMAT PHASE size_t histogram_family_idx = 0; - for (const auto &family_pair : Metric::get_histogram_families()) { + for ([[maybe_unused]] const auto &[_name, _family] : + Metric::get_histogram_families()) { // Use pre-computed data with resolved pointers - no hash lookups! const auto &family_data = label_sets.histogram_data[histogram_family_idx++]; for (const auto &data : family_data) { size_t bucket_count = data.bucket_count; // Use cached bucket count - ArenaVector total_counts(&arena); - for (size_t i = 0; i < bucket_count; ++i) { - total_counts.push_back(0); - } + uint64_t *total_counts_data = arena.allocate(bucket_count); + std::memset(total_counts_data, 0, bucket_count * sizeof(uint64_t)); + std::span total_counts(total_counts_data, bucket_count); double total_sum = 0.0; uint64_t total_observations = 0;