From 17efcf318ee435a225c552c8ad8583b2a9f72788 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 3 Sep 2025 11:12:01 -0400 Subject: [PATCH] Fix potential alignment issue and add more implementation comments --- src/metric.cpp | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/metric.cpp b/src/metric.cpp index c7eb05d..227c084 100644 --- a/src/metric.cpp +++ b/src/metric.cpp @@ -420,6 +420,9 @@ struct Metric { ThreadInit() {} ~ThreadInit() { // Accumulate thread-local state into global state before cleanup + // THREAD SAFETY: All operations below are protected by the global mutex, + // including writes to global accumulated state, preventing races with + // render thread std::unique_lock _{mutex}; // NOTE: registration_version increment is REQUIRED here because: // - Cached render plan has pre-resolved pointers to thread-local state @@ -491,17 +494,35 @@ struct Metric { std::span> labels) { auto &interned_set = get_interned_labels(); + // MEMORY EFFICIENCY PATTERN: + // Use temporary stack-allocated arena for lookup operations to avoid + // unbounded memory growth in global arena. Only allocate in global arena + // for genuinely new label combinations. + // + // SAFETY: This pattern is safe because: + // 1. std::unordered_set::find() uses lookup_key transiently for + // hashing/comparison + // 2. Hash set implementations don't retain references to lookup keys after + // find() + // 3. lookup_arena is destroyed after find() completes, but we don't use its + // memory anymore + // 4. All returned references point to global arena memory (application + // lifetime) + // Create temporary lookup key using stack-allocated arena - ArenaAllocator lookup_arena(1024); // Small arena for lookups + ArenaAllocator lookup_arena(1024); // Small arena for lookups only LabelsKey lookup_key{labels, lookup_arena}; - // Use standard hash set lookup + // Use standard hash set lookup - lookup_key memory used transiently only auto it = interned_set.find(lookup_key); if (it != interned_set.end()) { + // Found: return reference to permanently stored key in global arena + // lookup_arena will be destroyed but we're not using its memory return *it; } - // Not found - create and intern new key + // Not found - create and intern new key in global arena + // This is the ONLY place where we consume global arena memory for labels LabelsKey new_key{labels, get_global_arena()}; auto result = interned_set.emplace(std::move(new_key)); return *result.first; @@ -1227,11 +1248,15 @@ void Counter::inc(double x) { // accesses this value, and it does so via an atomic load. A concurrent // non-atomic read (writer) and atomic read (renderer) is not a data race. // + // 3. Overflow Check Safety: Both non-atomic reads of p->value are sequential + // in the same thread, so no races occur. + // // This contrasts with Gauges, whose state can be shared by multiple threads // and thus requires a fully atomic read-modify-write cycle (CAS loop). auto new_value = p->value + x; // Validate monotonic property (counter never decreases) + // Safe: both reads of p->value are sequential in this thread if (new_value < p->value) [[unlikely]] { validate_or_abort(false, "counter value overflow/wraparound detected", std::to_string(new_value)); @@ -1296,14 +1321,14 @@ update_histogram_buckets_simd(std::span thresholds, __m128i increments = _mm_and_si128(cmp_as_int, ones); // Load current counts and add increments - // NOTE: Using unaligned load/store (_mm_loadu_si128/_mm_storeu_si128) - // because uint64_t arrays from arena allocator are not guaranteed 16-byte - // aligned - __m128i current_counts = _mm_loadu_si128((__m128i *)&counts[i]); + // NOTE: Using unaligned load/store to avoid pointer casting issues + // uint64_t arrays from arena allocator are not guaranteed 16-byte aligned + __m128i current_counts; + std::memcpy(¤t_counts, &counts[i], sizeof(__m128i)); __m128i updated_counts = _mm_add_epi64(current_counts, increments); // Store updated counts - _mm_storeu_si128((__m128i *)&counts[i], updated_counts); + std::memcpy(&counts[i], &updated_counts, sizeof(__m128i)); } // Handle remainder with scalar operations @@ -1557,9 +1582,12 @@ union MetricValue { // New three-phase render implementation std::span render(ArenaAllocator &arena) { // Hold lock throughout all phases to prevent registry changes + // THREAD SAFETY: Global mutex protects cached_plan initialization and access, + // prevents races during static member initialization at program startup std::unique_lock _{Metric::mutex}; // Phase 1: Compile - generate static text and instructions + // Safe: cached_plan access/initialization protected by mutex above if (!Metric::cached_plan || Metric::cached_plan->registration_version != Metric::registration_version) { Metric::cached_plan = Metric::compile_render_plan();