Fix potential alignment issue and add more implementation comments

This commit is contained in:
2025-09-03 11:12:01 -04:00
parent b3e48b904a
commit 17efcf318e

View File

@@ -420,6 +420,9 @@ struct Metric {
ThreadInit() {}
~ThreadInit() {
// Accumulate thread-local state into global state before cleanup
// THREAD SAFETY: All operations below are protected by the global mutex,
// including writes to global accumulated state, preventing races with
// render thread
std::unique_lock<std::mutex> _{mutex};
// NOTE: registration_version increment is REQUIRED here because:
// - Cached render plan has pre-resolved pointers to thread-local state
@@ -491,17 +494,35 @@ struct Metric {
std::span<const std::pair<std::string_view, std::string_view>> labels) {
auto &interned_set = get_interned_labels();
// MEMORY EFFICIENCY PATTERN:
// Use temporary stack-allocated arena for lookup operations to avoid
// unbounded memory growth in global arena. Only allocate in global arena
// for genuinely new label combinations.
//
// SAFETY: This pattern is safe because:
// 1. std::unordered_set::find() uses lookup_key transiently for
// hashing/comparison
// 2. Hash set implementations don't retain references to lookup keys after
// find()
// 3. lookup_arena is destroyed after find() completes, but we don't use its
// memory anymore
// 4. All returned references point to global arena memory (application
// lifetime)
// Create temporary lookup key using stack-allocated arena
ArenaAllocator lookup_arena(1024); // Small arena for lookups
ArenaAllocator lookup_arena(1024); // Small arena for lookups only
LabelsKey lookup_key{labels, lookup_arena};
// Use standard hash set lookup
// Use standard hash set lookup - lookup_key memory used transiently only
auto it = interned_set.find(lookup_key);
if (it != interned_set.end()) {
// Found: return reference to permanently stored key in global arena
// lookup_arena will be destroyed but we're not using its memory
return *it;
}
// Not found - create and intern new key
// Not found - create and intern new key in global arena
// This is the ONLY place where we consume global arena memory for labels
LabelsKey new_key{labels, get_global_arena()};
auto result = interned_set.emplace(std::move(new_key));
return *result.first;
@@ -1227,11 +1248,15 @@ void Counter::inc(double x) {
// accesses this value, and it does so via an atomic load. A concurrent
// non-atomic read (writer) and atomic read (renderer) is not a data race.
//
// 3. Overflow Check Safety: Both non-atomic reads of p->value are sequential
// in the same thread, so no races occur.
//
// This contrasts with Gauges, whose state can be shared by multiple threads
// and thus requires a fully atomic read-modify-write cycle (CAS loop).
auto new_value = p->value + x;
// Validate monotonic property (counter never decreases)
// Safe: both reads of p->value are sequential in this thread
if (new_value < p->value) [[unlikely]] {
validate_or_abort(false, "counter value overflow/wraparound detected",
std::to_string(new_value));
@@ -1296,14 +1321,14 @@ update_histogram_buckets_simd(std::span<const double> thresholds,
__m128i increments = _mm_and_si128(cmp_as_int, ones);
// Load current counts and add increments
// NOTE: Using unaligned load/store (_mm_loadu_si128/_mm_storeu_si128)
// because uint64_t arrays from arena allocator are not guaranteed 16-byte
// aligned
__m128i current_counts = _mm_loadu_si128((__m128i *)&counts[i]);
// NOTE: Using unaligned load/store to avoid pointer casting issues
// uint64_t arrays from arena allocator are not guaranteed 16-byte aligned
__m128i current_counts;
std::memcpy(&current_counts, &counts[i], sizeof(__m128i));
__m128i updated_counts = _mm_add_epi64(current_counts, increments);
// Store updated counts
_mm_storeu_si128((__m128i *)&counts[i], updated_counts);
std::memcpy(&counts[i], &updated_counts, sizeof(__m128i));
}
// Handle remainder with scalar operations
@@ -1557,9 +1582,12 @@ union MetricValue {
// New three-phase render implementation
std::span<std::string_view> render(ArenaAllocator &arena) {
// Hold lock throughout all phases to prevent registry changes
// THREAD SAFETY: Global mutex protects cached_plan initialization and access,
// prevents races during static member initialization at program startup
std::unique_lock<std::mutex> _{Metric::mutex};
// Phase 1: Compile - generate static text and instructions
// Safe: cached_plan access/initialization protected by mutex above
if (!Metric::cached_plan || Metric::cached_plan->registration_version !=
Metric::registration_version) {
Metric::cached_plan = Metric::compile_render_plan();