Fix potential alignment issue and add more implementation comments

2025-09-03 11:12:01 -04:00
parent b3e48b904a
commit 17efcf318e
1 changed files with 36 additions and 8 deletions
--- a/src/metric.cpp
+++ b/src/metric.cpp
@@ -420,6 +420,9 @@ struct Metric {
    ThreadInit() {}
    ~ThreadInit() {
      // Accumulate thread-local state into global state before cleanup
+      // THREAD SAFETY: All operations below are protected by the global mutex,
+      // including writes to global accumulated state, preventing races with
+      // render thread
      std::unique_lock<std::mutex> _{mutex};
      // NOTE: registration_version increment is REQUIRED here because:
      // - Cached render plan has pre-resolved pointers to thread-local state
@@ -491,17 +494,35 @@ struct Metric {
      std::span<const std::pair<std::string_view, std::string_view>> labels) {
    auto &interned_set = get_interned_labels();

+    // MEMORY EFFICIENCY PATTERN:
+    // Use temporary stack-allocated arena for lookup operations to avoid
+    // unbounded memory growth in global arena. Only allocate in global arena
+    // for genuinely new label combinations.
+    //
+    // SAFETY: This pattern is safe because:
+    // 1. std::unordered_set::find() uses lookup_key transiently for
+    // hashing/comparison
+    // 2. Hash set implementations don't retain references to lookup keys after
+    // find()
+    // 3. lookup_arena is destroyed after find() completes, but we don't use its
+    // memory anymore
+    // 4. All returned references point to global arena memory (application
+    // lifetime)
+
    // Create temporary lookup key using stack-allocated arena
-    ArenaAllocator lookup_arena(1024); // Small arena for lookups
+    ArenaAllocator lookup_arena(1024); // Small arena for lookups only
    LabelsKey lookup_key{labels, lookup_arena};

-    // Use standard hash set lookup
+    // Use standard hash set lookup - lookup_key memory used transiently only
    auto it = interned_set.find(lookup_key);
    if (it != interned_set.end()) {
+      // Found: return reference to permanently stored key in global arena
+      // lookup_arena will be destroyed but we're not using its memory
      return *it;
    }

-    // Not found - create and intern new key
+    // Not found - create and intern new key in global arena
+    // This is the ONLY place where we consume global arena memory for labels
    LabelsKey new_key{labels, get_global_arena()};
    auto result = interned_set.emplace(std::move(new_key));
    return *result.first;
@@ -1227,11 +1248,15 @@ void Counter::inc(double x) {
  //    accesses this value, and it does so via an atomic load. A concurrent
  //    non-atomic read (writer) and atomic read (renderer) is not a data race.
  //
+  // 3. Overflow Check Safety: Both non-atomic reads of p->value are sequential
+  //    in the same thread, so no races occur.
+  //
  // This contrasts with Gauges, whose state can be shared by multiple threads
  // and thus requires a fully atomic read-modify-write cycle (CAS loop).
  auto new_value = p->value + x;

  // Validate monotonic property (counter never decreases)
+  // Safe: both reads of p->value are sequential in this thread
  if (new_value < p->value) [[unlikely]] {
    validate_or_abort(false, "counter value overflow/wraparound detected",
                      std::to_string(new_value));
@@ -1296,14 +1321,14 @@ update_histogram_buckets_simd(std::span<const double> thresholds,
    __m128i increments = _mm_and_si128(cmp_as_int, ones);

    // Load current counts and add increments
-    // NOTE: Using unaligned load/store (_mm_loadu_si128/_mm_storeu_si128)
-    // because uint64_t arrays from arena allocator are not guaranteed 16-byte
-    // aligned
-    __m128i current_counts = _mm_loadu_si128((__m128i *)&counts[i]);
+    // NOTE: Using unaligned load/store to avoid pointer casting issues
+    // uint64_t arrays from arena allocator are not guaranteed 16-byte aligned
+    __m128i current_counts;
+    std::memcpy(&current_counts, &counts[i], sizeof(__m128i));
    __m128i updated_counts = _mm_add_epi64(current_counts, increments);

    // Store updated counts
-    _mm_storeu_si128((__m128i *)&counts[i], updated_counts);
+    std::memcpy(&counts[i], &updated_counts, sizeof(__m128i));
  }

  // Handle remainder with scalar operations
@@ -1557,9 +1582,12 @@ union MetricValue {
 // New three-phase render implementation
 std::span<std::string_view> render(ArenaAllocator &arena) {
  // Hold lock throughout all phases to prevent registry changes
+  // THREAD SAFETY: Global mutex protects cached_plan initialization and access,
+  // prevents races during static member initialization at program startup
  std::unique_lock<std::mutex> _{Metric::mutex};

  // Phase 1: Compile - generate static text and instructions
+  // Safe: cached_plan access/initialization protected by mutex above
  if (!Metric::cached_plan || Metric::cached_plan->registration_version !=
                                  Metric::registration_version) {
    Metric::cached_plan = Metric::compile_render_plan();