Replace some ArenaVector's with std::span

This commit is contained in:
2025-09-02 15:43:42 -04:00
parent d43e6c2be5
commit 4f1dcc54d9

View File

@@ -306,16 +306,15 @@ struct Gauge::State {
// Histogram: Thread-local buckets, single writer, mutex protection per thread, // Histogram: Thread-local buckets, single writer, mutex protection per thread,
// per histogram // per histogram
struct Histogram::State { struct Histogram::State {
ArenaVector<double> thresholds; // Bucket boundaries (sorted, deduplicated, std::span<const double> thresholds; // Bucket boundaries (sorted,
// sizes never change) // deduplicated, sizes never change)
ArenaVector<uint64_t> counts; // Count per bucket std::span<uint64_t> counts; // Count per bucket
double sum; // Sum of observations double sum; // Sum of observations
uint64_t observations; // Total observation count uint64_t observations; // Total observation count
std::mutex std::mutex
mutex; // Per-thread, per-histogram mutex for consistent reads/writes mutex; // Per-thread, per-histogram mutex for consistent reads/writes
State(ArenaAllocator &arena) State() : sum(0.0), observations(0) {}
: thresholds(&arena), counts(&arena), sum(0.0), observations(0) {}
friend struct Metric; friend struct Metric;
}; };
@@ -428,20 +427,10 @@ struct Metric {
// Acquire lock to get consistent snapshot // Acquire lock to get consistent snapshot
std::lock_guard<std::mutex> lock(instance->mutex); std::lock_guard<std::mutex> lock(instance->mutex);
// Ensure global accumulator exists // Global accumulator should have been created when we made the
// histogram
auto &global_state = family->global_accumulated_values[labels_key]; auto &global_state = family->global_accumulated_values[labels_key];
if (!global_state) { assert(global_state);
global_state = get_global_arena().construct<Histogram::State>(
get_global_arena());
// Copy thresholds from instance
for (size_t i = 0; i < instance->thresholds.size(); ++i) {
global_state->thresholds.push_back(instance->thresholds[i]);
}
// Initialize counts with zeros
for (size_t i = 0; i < instance->counts.size(); ++i) {
global_state->counts.push_back(0);
}
}
// Accumulate bucket counts (mutex already held) // Accumulate bucket counts (mutex already held)
for (size_t i = 0; i < instance->counts.size(); ++i) { for (size_t i = 0; i < instance->counts.size(); ++i) {
@@ -560,33 +549,43 @@ struct Metric {
auto &ptr = family->p->per_thread_state[thread_id].instances[key]; auto &ptr = family->p->per_thread_state[thread_id].instances[key];
if (!ptr) { if (!ptr) {
ptr = get_thread_local_arena().construct<Histogram::State>( ptr = get_thread_local_arena().construct<Histogram::State>();
get_thread_local_arena());
// DESIGN: Prometheus-compatible histogram buckets // DESIGN: Prometheus-compatible histogram buckets
// Use buckets from family configuration // Use buckets from family configuration
for (size_t i = 0; i < family->p->buckets.size(); ++i) { size_t bucket_count = family->p->buckets.size();
ptr->thresholds.push_back(family->p->buckets[i]); double *thresholds_data =
} get_thread_local_arena().allocate<double>(bucket_count);
uint64_t *counts_data =
get_thread_local_arena().allocate<uint64_t>(bucket_count);
// Initialize with zero values, mutex protects all operations // Copy thresholds and initialize counts
for (size_t i = 0; i < ptr->thresholds.size(); ++i) { std::memcpy(thresholds_data, family->p->buckets.data(),
ptr->counts.push_back(0); bucket_count * sizeof(double));
} std::memset(counts_data, 0, bucket_count * sizeof(uint64_t));
ptr->thresholds = std::span<const double>(thresholds_data, bucket_count);
ptr->counts = std::span<uint64_t>(counts_data, bucket_count);
// Ensure global accumulator exists for this label set // Ensure global accumulator exists for this label set
auto &global_state = family->p->global_accumulated_values[key]; auto &global_state = family->p->global_accumulated_values[key];
if (!global_state) { if (!global_state) {
global_state = global_state = get_global_arena().construct<Histogram::State>();
get_global_arena().construct<Histogram::State>(get_global_arena());
// Copy thresholds // Allocate and copy thresholds, initialize counts
for (size_t i = 0; i < ptr->thresholds.size(); ++i) { double *global_thresholds_data =
global_state->thresholds.push_back(ptr->thresholds[i]); get_global_arena().allocate<double>(bucket_count);
} uint64_t *global_counts_data =
// Initialize counts with zeros get_global_arena().allocate<uint64_t>(bucket_count);
for (size_t i = 0; i < ptr->thresholds.size(); ++i) {
global_state->counts.push_back(0); std::memcpy(global_thresholds_data, ptr->thresholds.data(),
} bucket_count * sizeof(double));
std::memset(global_counts_data, 0, bucket_count * sizeof(uint64_t));
global_state->thresholds =
std::span<const double>(global_thresholds_data, bucket_count);
global_state->counts =
std::span<uint64_t>(global_counts_data, bucket_count);
} }
} }
Histogram result; Histogram result;
@@ -804,8 +803,8 @@ Histogram::Histogram() = default;
// AVX-optimized implementation for high performance // AVX-optimized implementation for high performance
__attribute__((target("avx"))) static void __attribute__((target("avx"))) static void
update_histogram_buckets_simd(const ArenaVector<double> &thresholds, update_histogram_buckets_simd(std::span<const double> thresholds,
ArenaVector<uint64_t> &counts, double x, std::span<uint64_t> counts, double x,
size_t start_idx) { size_t start_idx) {
const size_t size = thresholds.size(); const size_t size = thresholds.size();
size_t i = start_idx; size_t i = start_idx;
@@ -1131,17 +1130,17 @@ compute_metric_values(ArenaAllocator &arena,
// Compute histogram values - ITERATION ORDER MUST MATCH FORMAT PHASE // Compute histogram values - ITERATION ORDER MUST MATCH FORMAT PHASE
size_t histogram_family_idx = 0; size_t histogram_family_idx = 0;
for (const auto &family_pair : Metric::get_histogram_families()) { for ([[maybe_unused]] const auto &[_name, _family] :
Metric::get_histogram_families()) {
// Use pre-computed data with resolved pointers - no hash lookups! // Use pre-computed data with resolved pointers - no hash lookups!
const auto &family_data = label_sets.histogram_data[histogram_family_idx++]; const auto &family_data = label_sets.histogram_data[histogram_family_idx++];
for (const auto &data : family_data) { for (const auto &data : family_data) {
size_t bucket_count = data.bucket_count; // Use cached bucket count size_t bucket_count = data.bucket_count; // Use cached bucket count
ArenaVector<uint64_t> total_counts(&arena); uint64_t *total_counts_data = arena.allocate<uint64_t>(bucket_count);
for (size_t i = 0; i < bucket_count; ++i) { std::memset(total_counts_data, 0, bucket_count * sizeof(uint64_t));
total_counts.push_back(0); std::span<uint64_t> total_counts(total_counts_data, bucket_count);
}
double total_sum = 0.0; double total_sum = 0.0;
uint64_t total_observations = 0; uint64_t total_observations = 0;