Fix potential alignment issue and add more implementation comments
This commit is contained in:
@@ -420,6 +420,9 @@ struct Metric {
|
||||
ThreadInit() {}
|
||||
~ThreadInit() {
|
||||
// Accumulate thread-local state into global state before cleanup
|
||||
// THREAD SAFETY: All operations below are protected by the global mutex,
|
||||
// including writes to global accumulated state, preventing races with
|
||||
// render thread
|
||||
std::unique_lock<std::mutex> _{mutex};
|
||||
// NOTE: registration_version increment is REQUIRED here because:
|
||||
// - Cached render plan has pre-resolved pointers to thread-local state
|
||||
@@ -491,17 +494,35 @@ struct Metric {
|
||||
std::span<const std::pair<std::string_view, std::string_view>> labels) {
|
||||
auto &interned_set = get_interned_labels();
|
||||
|
||||
// MEMORY EFFICIENCY PATTERN:
|
||||
// Use temporary stack-allocated arena for lookup operations to avoid
|
||||
// unbounded memory growth in global arena. Only allocate in global arena
|
||||
// for genuinely new label combinations.
|
||||
//
|
||||
// SAFETY: This pattern is safe because:
|
||||
// 1. std::unordered_set::find() uses lookup_key transiently for
|
||||
// hashing/comparison
|
||||
// 2. Hash set implementations don't retain references to lookup keys after
|
||||
// find()
|
||||
// 3. lookup_arena is destroyed after find() completes, but we don't use its
|
||||
// memory anymore
|
||||
// 4. All returned references point to global arena memory (application
|
||||
// lifetime)
|
||||
|
||||
// Create temporary lookup key using stack-allocated arena
|
||||
ArenaAllocator lookup_arena(1024); // Small arena for lookups
|
||||
ArenaAllocator lookup_arena(1024); // Small arena for lookups only
|
||||
LabelsKey lookup_key{labels, lookup_arena};
|
||||
|
||||
// Use standard hash set lookup
|
||||
// Use standard hash set lookup - lookup_key memory used transiently only
|
||||
auto it = interned_set.find(lookup_key);
|
||||
if (it != interned_set.end()) {
|
||||
// Found: return reference to permanently stored key in global arena
|
||||
// lookup_arena will be destroyed but we're not using its memory
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Not found - create and intern new key
|
||||
// Not found - create and intern new key in global arena
|
||||
// This is the ONLY place where we consume global arena memory for labels
|
||||
LabelsKey new_key{labels, get_global_arena()};
|
||||
auto result = interned_set.emplace(std::move(new_key));
|
||||
return *result.first;
|
||||
@@ -1227,11 +1248,15 @@ void Counter::inc(double x) {
|
||||
// accesses this value, and it does so via an atomic load. A concurrent
|
||||
// non-atomic read (writer) and atomic read (renderer) is not a data race.
|
||||
//
|
||||
// 3. Overflow Check Safety: Both non-atomic reads of p->value are sequential
|
||||
// in the same thread, so no races occur.
|
||||
//
|
||||
// This contrasts with Gauges, whose state can be shared by multiple threads
|
||||
// and thus requires a fully atomic read-modify-write cycle (CAS loop).
|
||||
auto new_value = p->value + x;
|
||||
|
||||
// Validate monotonic property (counter never decreases)
|
||||
// Safe: both reads of p->value are sequential in this thread
|
||||
if (new_value < p->value) [[unlikely]] {
|
||||
validate_or_abort(false, "counter value overflow/wraparound detected",
|
||||
std::to_string(new_value));
|
||||
@@ -1296,14 +1321,14 @@ update_histogram_buckets_simd(std::span<const double> thresholds,
|
||||
__m128i increments = _mm_and_si128(cmp_as_int, ones);
|
||||
|
||||
// Load current counts and add increments
|
||||
// NOTE: Using unaligned load/store (_mm_loadu_si128/_mm_storeu_si128)
|
||||
// because uint64_t arrays from arena allocator are not guaranteed 16-byte
|
||||
// aligned
|
||||
__m128i current_counts = _mm_loadu_si128((__m128i *)&counts[i]);
|
||||
// NOTE: Using unaligned load/store to avoid pointer casting issues
|
||||
// uint64_t arrays from arena allocator are not guaranteed 16-byte aligned
|
||||
__m128i current_counts;
|
||||
std::memcpy(¤t_counts, &counts[i], sizeof(__m128i));
|
||||
__m128i updated_counts = _mm_add_epi64(current_counts, increments);
|
||||
|
||||
// Store updated counts
|
||||
_mm_storeu_si128((__m128i *)&counts[i], updated_counts);
|
||||
std::memcpy(&counts[i], &updated_counts, sizeof(__m128i));
|
||||
}
|
||||
|
||||
// Handle remainder with scalar operations
|
||||
@@ -1557,9 +1582,12 @@ union MetricValue {
|
||||
// New three-phase render implementation
|
||||
std::span<std::string_view> render(ArenaAllocator &arena) {
|
||||
// Hold lock throughout all phases to prevent registry changes
|
||||
// THREAD SAFETY: Global mutex protects cached_plan initialization and access,
|
||||
// prevents races during static member initialization at program startup
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
|
||||
// Phase 1: Compile - generate static text and instructions
|
||||
// Safe: cached_plan access/initialization protected by mutex above
|
||||
if (!Metric::cached_plan || Metric::cached_plan->registration_version !=
|
||||
Metric::registration_version) {
|
||||
Metric::cached_plan = Metric::compile_render_plan();
|
||||
|
||||
Reference in New Issue
Block a user