From 87bbb477878f52cc9e3218890bf0d10233a5fcc0 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Mon, 1 Sep 2025 17:50:34 -0400 Subject: [PATCH] More precompute --- src/metric.cpp | 367 +++++++++++++++++++++++++++++-------------------- 1 file changed, 215 insertions(+), 152 deletions(-) diff --git a/src/metric.cpp b/src/metric.cpp index be2c698..dca4af9 100644 --- a/src/metric.cpp +++ b/src/metric.cpp @@ -585,6 +585,152 @@ struct Metric { result.p = ptr; return result; } + + // Pre-computed data structures with resolved pointers to eliminate hash + // lookups + struct CounterLabelData { + LabelsKey labels_key; + std::vector thread_states; // Pre-resolved pointers + Counter::State *global_state; // Pre-resolved global state pointer + + CounterLabelData(const LabelsKey &key) + : labels_key(key), global_state(nullptr) {} + }; + + struct GaugeLabelData { + LabelsKey labels_key; + Gauge::State *instance_state; // Direct pointer to gauge instance + + GaugeLabelData(const LabelsKey &key) + : labels_key(key), instance_state(nullptr) {} + }; + + struct HistogramLabelData { + LabelsKey labels_key; + std::vector thread_states; // Pre-resolved pointers + Histogram::State *global_state; // Pre-resolved global state pointer + size_t bucket_count; // Cache bucket count from family + + HistogramLabelData(const LabelsKey &key) + : labels_key(key), global_state(nullptr), bucket_count(0) {} + }; + + // Pre-computed data for each family type, built once and reused + struct LabelSets { + std::vector> counter_data; + std::vector> gauge_data; + std::vector> histogram_data; + }; + + // Build label sets once for reuse in both phases + static LabelSets build_label_sets(ArenaAllocator &arena) { + LabelSets label_sets; + + // Build counter data with pre-resolved pointers + for (const auto &[name, family] : Metric::get_counter_families()) { + // Collect all unique labels first + std::set, ArenaStlAllocator> + all_labels{ArenaStlAllocator(&arena)}; + + for (const auto &[thread_id, per_thread] : family->per_thread_state) { + for (const auto &[labels_key, instance] : per_thread.instances) { + all_labels.insert(labels_key); + } + } + for (const auto &[labels_key, global_state] : + family->global_accumulated_values) { + if (global_state) { + all_labels.insert(labels_key); + } + } + + // Pre-resolve all pointers for each label set + std::vector family_data; + for (const auto &labels_key : all_labels) { + CounterLabelData data(labels_key); + + // Pre-resolve thread-local state pointers + for (const auto &[thread_id, per_thread] : family->per_thread_state) { + auto it = per_thread.instances.find(labels_key); + if (it != per_thread.instances.end()) { + data.thread_states.push_back(it->second); + } + } + + // Pre-resolve global accumulated state pointer + auto global_it = family->global_accumulated_values.find(labels_key); + data.global_state = + (global_it != family->global_accumulated_values.end() && + global_it->second) + ? global_it->second + : nullptr; + + family_data.push_back(std::move(data)); + } + label_sets.counter_data.push_back(std::move(family_data)); + } + + // Build gauge data with pre-resolved pointers + for (const auto &[name, family] : Metric::get_gauge_families()) { + std::vector family_data; + + // Gauges iterate directly over instances + for (const auto &[labels_key, instance] : family->instances) { + GaugeLabelData data(labels_key); + data.instance_state = instance; + family_data.push_back(std::move(data)); + } + + label_sets.gauge_data.push_back(std::move(family_data)); + } + + // Build histogram data with pre-resolved pointers + for (const auto &[name, family] : Metric::get_histogram_families()) { + // Collect all unique labels first + std::set, ArenaStlAllocator> + all_labels{ArenaStlAllocator(&arena)}; + + for (const auto &[thread_id, per_thread] : family->per_thread_state) { + for (const auto &[labels_key, instance] : per_thread.instances) { + all_labels.insert(labels_key); + } + } + for (const auto &[labels_key, global_state] : + family->global_accumulated_values) { + if (global_state) { + all_labels.insert(labels_key); + } + } + + // Pre-resolve all pointers for each label set + std::vector family_data; + for (const auto &labels_key : all_labels) { + HistogramLabelData data(labels_key); + data.bucket_count = family->buckets.size(); // Cache bucket count + + // Pre-resolve thread-local state pointers + for (const auto &[thread_id, per_thread] : family->per_thread_state) { + auto it = per_thread.instances.find(labels_key); + if (it != per_thread.instances.end()) { + data.thread_states.push_back(it->second); + } + } + + // Pre-resolve global accumulated state pointer + auto global_it = family->global_accumulated_values.find(labels_key); + data.global_state = + (global_it != family->global_accumulated_values.end() && + global_it->second) + ? global_it->second + : nullptr; + + family_data.push_back(std::move(data)); + } + label_sets.histogram_data.push_back(std::move(family_data)); + } + + return label_sets; + } }; Counter::Counter() = default; @@ -928,79 +1074,10 @@ union MetricValue { uint64_t as_uint64; }; -// Label sets for each family type, built once and reused -struct LabelSets { - std::vector< - std::set, ArenaStlAllocator>> - counter_labels; - std::vector< - std::set, ArenaStlAllocator>> - gauge_labels; - std::vector< - std::set, ArenaStlAllocator>> - histogram_labels; -}; - -// Build label sets once for reuse in both phases -static LabelSets build_label_sets(ArenaAllocator &arena) { - LabelSets label_sets; - - // Build counter label sets - for (const auto &[name, family] : Metric::get_counter_families()) { - std::set, ArenaStlAllocator> - all_labels{ArenaStlAllocator(&arena)}; - - for (const auto &[thread_id, per_thread] : family->per_thread_state) { - for (const auto &[labels_key, instance] : per_thread.instances) { - all_labels.insert(labels_key); - } - } - for (const auto &[labels_key, global_state] : - family->global_accumulated_values) { - if (global_state) { - all_labels.insert(labels_key); - } - } - - label_sets.counter_labels.push_back(std::move(all_labels)); - } - - // Build gauge label sets (none needed - gauges iterate directly over - // instances) - for (const auto &[name, family] : Metric::get_gauge_families()) { - (void)name; - (void)family; // Suppress unused variable warnings - std::set, ArenaStlAllocator> - empty_set{ArenaStlAllocator(&arena)}; - label_sets.gauge_labels.push_back(std::move(empty_set)); - } - - // Build histogram label sets - for (const auto &[name, family] : Metric::get_histogram_families()) { - std::set, ArenaStlAllocator> - all_labels{ArenaStlAllocator(&arena)}; - - for (const auto &[thread_id, per_thread] : family->per_thread_state) { - for (const auto &[labels_key, instance] : per_thread.instances) { - all_labels.insert(labels_key); - } - } - for (const auto &[labels_key, global_state] : - family->global_accumulated_values) { - if (global_state) { - all_labels.insert(labels_key); - } - } - - label_sets.histogram_labels.push_back(std::move(all_labels)); - } - - return label_sets; -} - // Phase 1: Compute all metric values in deterministic order static ArenaVector -compute_metric_values(ArenaAllocator &arena, const LabelSets &label_sets) { +compute_metric_values(ArenaAllocator &arena, + const Metric::LabelSets &label_sets) { ArenaVector values(&arena); // Compute counter values - ITERATION ORDER MUST MATCH FORMAT PHASE @@ -1012,29 +1089,22 @@ compute_metric_values(ArenaAllocator &arena, const LabelSets &label_sets) { values.push_back({.as_double = value}); } - // Use pre-built label sets - const auto &all_labels = label_sets.counter_labels[counter_family_idx++]; - - // Iterate by label, lookup per thread (O(1) unordered_map lookup) - for (const auto &labels_key : all_labels) { + // Use pre-computed data with resolved pointers - no hash lookups! + const auto &family_data = label_sets.counter_data[counter_family_idx++]; + for (const auto &data : family_data) { double total_value = 0.0; - // Sum thread-local values for this label set - for (const auto &[thread_id, per_thread] : family->per_thread_state) { - auto it = per_thread.instances.find(labels_key); - if (it != per_thread.instances.end()) { - // Atomic read to match atomic store in Counter::inc() - double value; - __atomic_load(&it->second->value, &value, __ATOMIC_RELAXED); - total_value += value; - } + // Sum thread-local values using pre-resolved pointers + for (auto *state_ptr : data.thread_states) { + // Atomic read to match atomic store in Counter::inc() + double value; + __atomic_load(&state_ptr->value, &value, __ATOMIC_RELAXED); + total_value += value; } - // Add global accumulated value for this label set - auto global_it = family->global_accumulated_values.find(labels_key); - if (global_it != family->global_accumulated_values.end() && - global_it->second) { - total_value += global_it->second->value; + // Add global accumulated value using pre-resolved pointer + if (data.global_state) { + total_value += data.global_state->value; } values.push_back({.as_double = total_value}); @@ -1042,6 +1112,7 @@ compute_metric_values(ArenaAllocator &arena, const LabelSets &label_sets) { } // Compute gauge values - ITERATION ORDER MUST MATCH FORMAT PHASE + size_t gauge_family_idx = 0; for (const auto &[name, family] : Metric::get_gauge_families()) { // Callback values for (const auto &[labels_key, callback] : family->callbacks) { @@ -1049,25 +1120,23 @@ compute_metric_values(ArenaAllocator &arena, const LabelSets &label_sets) { values.push_back({.as_double = value}); } - // Instance values (gauges don't aggregate, just direct values) - for (const auto &[labels_key, instance] : family->instances) { + // Use pre-computed data with resolved pointers - no hash lookups! + const auto &family_data = label_sets.gauge_data[gauge_family_idx++]; + for (const auto &data : family_data) { auto value = std::bit_cast( - instance->value.load(std::memory_order_relaxed)); + data.instance_state->value.load(std::memory_order_relaxed)); values.push_back({.as_double = value}); } } // Compute histogram values - ITERATION ORDER MUST MATCH FORMAT PHASE size_t histogram_family_idx = 0; - for (const auto &[name, family] : Metric::get_histogram_families()) { - // Use pre-built label sets - const auto &all_labels = - label_sets.histogram_labels[histogram_family_idx++]; + for (const auto &family_pair : Metric::get_histogram_families()) { + // Use pre-computed data with resolved pointers - no hash lookups! + const auto &family_data = label_sets.histogram_data[histogram_family_idx++]; - // Iterate by label, lookup per thread (O(1) unordered_map lookup) - for (const auto &labels_key : all_labels) { - // Get bucket count from family config or first instance - size_t bucket_count = family->buckets.size(); + for (const auto &data : family_data) { + size_t bucket_count = data.bucket_count; // Use cached bucket count ArenaVector total_counts(&arena); for (size_t i = 0; i < bucket_count; ++i) { @@ -1076,40 +1145,33 @@ compute_metric_values(ArenaAllocator &arena, const LabelSets &label_sets) { double total_sum = 0.0; uint64_t total_observations = 0; - // Sum thread-local values for this label set - for (const auto &[thread_id, per_thread] : family->per_thread_state) { - auto it = per_thread.instances.find(labels_key); - if (it != per_thread.instances.end()) { - auto *instance = it->second; + // Sum thread-local values using pre-resolved pointers + for (auto *instance : data.thread_states) { + // Extract data under lock - minimize critical section + uint64_t *counts_snapshot = arena.allocate(bucket_count); + double sum_snapshot; + uint64_t observations_snapshot; - // Extract data under lock - minimize critical section - uint64_t *counts_snapshot = arena.allocate(bucket_count); - double sum_snapshot; - uint64_t observations_snapshot; - - { - std::lock_guard lock(instance->mutex); - for (size_t i = 0; i < instance->counts.size(); ++i) { - counts_snapshot[i] = instance->counts[i]; - } - sum_snapshot = instance->sum; - observations_snapshot = instance->observations; + { + std::lock_guard lock(instance->mutex); + for (size_t i = 0; i < instance->counts.size(); ++i) { + counts_snapshot[i] = instance->counts[i]; } - - // Add to totals - for (size_t i = 0; i < bucket_count; ++i) { - total_counts[i] += counts_snapshot[i]; - } - total_sum += sum_snapshot; - total_observations += observations_snapshot; + sum_snapshot = instance->sum; + observations_snapshot = instance->observations; } + + // Add to totals + for (size_t i = 0; i < bucket_count; ++i) { + total_counts[i] += counts_snapshot[i]; + } + total_sum += sum_snapshot; + total_observations += observations_snapshot; } - // Add global accumulated value for this label set - auto global_it = family->global_accumulated_values.find(labels_key); - if (global_it != family->global_accumulated_values.end() && - global_it->second) { - auto *global_state = global_it->second; + // Add global accumulated value using pre-resolved pointer + if (data.global_state) { + auto *global_state = data.global_state; for (size_t i = 0; i < global_state->counts.size(); ++i) { total_counts[i] += global_state->counts[i]; } @@ -1140,7 +1202,7 @@ std::span render(ArenaAllocator &arena) { std::unique_lock _{Metric::mutex}; // Build label sets once for both phases - LabelSets label_sets = build_label_sets(arena); + Metric::LabelSets label_sets = Metric::build_label_sets(arena); // Phase 1: Compute all metric values ArenaVector metric_values = @@ -1232,15 +1294,15 @@ std::span render(ArenaAllocator &arena) { value)); } - // Use pre-built label sets (same as compute phase) - const auto &all_labels = label_sets.counter_labels[counter_family_idx++]; + // Use pre-computed data (same as compute phase) + const auto &family_data = label_sets.counter_data[counter_family_idx++]; // Format counter values using pre-computed values - for (const auto &labels_key : all_labels) { + for (const auto &data : family_data) { auto total_value = next_value++->as_double; labels_sv.clear(); - for (size_t i = 0; i < labels_key.labels.size(); ++i) { - labels_sv.push_back(labels_key.labels[i]); + for (size_t i = 0; i < data.labels_key.labels.size(); ++i) { + labels_sv.push_back(data.labels_key.labels[i]); } auto labels = format_labels(labels_sv); output.push_back(format(arena, "%.*s%.*s %.17g\n", @@ -1251,6 +1313,7 @@ std::span render(ArenaAllocator &arena) { } // Format gauges - ITERATION ORDER MUST MATCH COMPUTE PHASE + size_t gauge_family_idx = 0; for (const auto &[name, family] : Metric::get_gauge_families()) { output.push_back(format(arena, "# HELP %.*s %.*s\n", static_cast(name.length()), name.data(), @@ -1276,12 +1339,13 @@ std::span render(ArenaAllocator &arena) { value)); } - // Format instance values - for (const auto &[labels_key, instance] : family->instances) { + // Use pre-computed data (same as compute phase) + const auto &family_data = label_sets.gauge_data[gauge_family_idx++]; + for (const auto &data : family_data) { auto value = next_value++->as_double; labels_sv.clear(); - for (size_t i = 0; i < labels_key.labels.size(); ++i) { - labels_sv.push_back(labels_key.labels[i]); + for (size_t i = 0; i < data.labels_key.labels.size(); ++i) { + labels_sv.push_back(data.labels_key.labels[i]); } auto labels = format_labels(labels_sv); output.push_back(format(arena, "%.*s%.*s %.17g\n", @@ -1301,24 +1365,23 @@ std::span render(ArenaAllocator &arena) { output.push_back(format(arena, "# TYPE %.*s histogram\n", static_cast(name.length()), name.data())); - // Use pre-built label sets (same as compute phase) - const auto &all_labels = - label_sets.histogram_labels[histogram_family_idx++]; + // Use pre-computed data (same as compute phase) + const auto &family_data = label_sets.histogram_data[histogram_family_idx++]; ArenaVector> bucket_labels_sv( &arena); // Format histogram data using pre-computed values - for (const auto &labels_key : all_labels) { - // Get bucket thresholds from family config - size_t bucket_count = family->buckets.size(); + for (const auto &data : family_data) { + // Get bucket count from pre-computed data + size_t bucket_count = data.bucket_count; // Format explicit bucket counts for (size_t i = 0; i < bucket_count; ++i) { auto count = next_value++->as_uint64; bucket_labels_sv.clear(); - for (size_t j = 0; j < labels_key.labels.size(); ++j) { - bucket_labels_sv.push_back(labels_key.labels[j]); + for (size_t j = 0; j < data.labels_key.labels.size(); ++j) { + bucket_labels_sv.push_back(data.labels_key.labels[j]); } bucket_labels_sv.push_back( {"le", static_format(arena, family->buckets[i])}); @@ -1332,8 +1395,8 @@ std::span render(ArenaAllocator &arena) { // Format +Inf bucket auto observations = next_value++->as_uint64; bucket_labels_sv.clear(); - for (size_t j = 0; j < labels_key.labels.size(); ++j) { - bucket_labels_sv.push_back(labels_key.labels[j]); + for (size_t j = 0; j < data.labels_key.labels.size(); ++j) { + bucket_labels_sv.push_back(data.labels_key.labels[j]); } bucket_labels_sv.push_back({"le", "+Inf"}); auto inf_labels = format_labels(bucket_labels_sv); @@ -1345,8 +1408,8 @@ std::span render(ArenaAllocator &arena) { // Format sum auto sum = next_value++->as_double; bucket_labels_sv.clear(); - for (size_t j = 0; j < labels_key.labels.size(); ++j) { - bucket_labels_sv.push_back(labels_key.labels[j]); + for (size_t j = 0; j < data.labels_key.labels.size(); ++j) { + bucket_labels_sv.push_back(data.labels_key.labels[j]); } auto labels = format_labels(bucket_labels_sv); output.push_back(format(