Add some clarifying implementation comments

This commit is contained in:
2025-09-03 11:01:01 -04:00
parent 721f814785
commit b3e48b904a

View File

@@ -421,6 +421,10 @@ struct Metric {
~ThreadInit() { ~ThreadInit() {
// Accumulate thread-local state into global state before cleanup // Accumulate thread-local state into global state before cleanup
std::unique_lock<std::mutex> _{mutex}; std::unique_lock<std::mutex> _{mutex};
// NOTE: registration_version increment is REQUIRED here because:
// - Cached render plan has pre-resolved pointers to thread-local state
// - When threads disappear, these pointers become invalid
// - Cache invalidation forces rebuild with updated pointer sets
++Metric::registration_version; ++Metric::registration_version;
auto thread_id = std::this_thread::get_id(); auto thread_id = std::this_thread::get_id();
@@ -994,6 +998,7 @@ struct Metric {
// Sum thread-local values // Sum thread-local values
for (auto *state_ptr : instruction.aggregate_counter.thread_states) { for (auto *state_ptr : instruction.aggregate_counter.thread_states) {
double value; double value;
// NOTE: __atomic_load works on plain double (not atomic<double>)
__atomic_load(&state_ptr->value, &value, __ATOMIC_RELAXED); __atomic_load(&state_ptr->value, &value, __ATOMIC_RELAXED);
total_value += value; total_value += value;
} }
@@ -1079,6 +1084,12 @@ struct Metric {
for (size_t i = 0; i < static_text.size(); ++i) { for (size_t i = 0; i < static_text.size(); ++i) {
// Copy static text into caller's arena // Copy static text into caller's arena
// NOTE: This copying is REQUIRED for memory safety:
// - static_text lives in cached_plan's arena (persistent across renders)
// - dynamic_text lives in caller's arena (single render lifetime)
// - output must live entirely in caller's arena for consistent lifetime
// Without copying, output would have mixed arena ownership causing
// use-after-free
output.push_back(arena_copy_string(static_text[i], arena)); output.push_back(arena_copy_string(static_text[i], arena));
// Add corresponding dynamic text // Add corresponding dynamic text
@@ -1279,9 +1290,15 @@ update_histogram_buckets_simd(std::span<const double> thresholds,
__m128d cmp_result = _mm_cmp_pd(x_vec, thresholds_vec, _CMP_LE_OQ); __m128d cmp_result = _mm_cmp_pd(x_vec, thresholds_vec, _CMP_LE_OQ);
__m128i cmp_as_int = _mm_castpd_si128(cmp_result); __m128i cmp_as_int = _mm_castpd_si128(cmp_result);
__m128i ones = _mm_set1_epi64x(1); __m128i ones = _mm_set1_epi64x(1);
// NOTE: _mm_cmp_pd returns 0xFFFFFFFFFFFFFFFF for true, 0x0000000000000000
// for false The AND with ones converts this to 1 or 0 per 64-bit lane
// (extracts low bit)
__m128i increments = _mm_and_si128(cmp_as_int, ones); __m128i increments = _mm_and_si128(cmp_as_int, ones);
// Load current counts and add increments // Load current counts and add increments
// NOTE: Using unaligned load/store (_mm_loadu_si128/_mm_storeu_si128)
// because uint64_t arrays from arena allocator are not guaranteed 16-byte
// aligned
__m128i current_counts = _mm_loadu_si128((__m128i *)&counts[i]); __m128i current_counts = _mm_loadu_si128((__m128i *)&counts[i]);
__m128i updated_counts = _mm_add_epi64(current_counts, increments); __m128i updated_counts = _mm_add_epi64(current_counts, increments);