Fix thread destroy bug
This commit is contained in:
177
src/metric.cpp
177
src/metric.cpp
@@ -121,6 +121,10 @@ template <> struct Family<Counter>::State {
|
||||
};
|
||||
std::unordered_map<std::thread::id, PerThreadState> perThreadState;
|
||||
|
||||
// Global accumulation state for destroyed threads
|
||||
std::unordered_map<LabelsKey, std::unique_ptr<Counter::State>>
|
||||
globalAccumulatedValues;
|
||||
|
||||
// Callback-based metrics (global, not per-thread)
|
||||
std::unordered_map<LabelsKey, MetricCallback<Counter>> callbacks;
|
||||
};
|
||||
@@ -144,6 +148,10 @@ template <> struct Family<Histogram>::State {
|
||||
};
|
||||
std::unordered_map<std::thread::id, PerThreadState> perThreadState;
|
||||
|
||||
// Global accumulation state for destroyed threads
|
||||
std::unordered_map<LabelsKey, std::unique_ptr<Histogram::State>>
|
||||
globalAccumulatedValues;
|
||||
|
||||
// Note: No callbacks map - histograms don't support callback-based metrics
|
||||
};
|
||||
|
||||
@@ -190,21 +198,62 @@ struct Metric {
|
||||
// Thread registration happens lazily when metrics are created
|
||||
}
|
||||
~ThreadInit() {
|
||||
// TODO we need to accumulate this threads counts into a global. Otherwise
|
||||
// it can go backwards when we destroy a thread.
|
||||
|
||||
// Clean up this thread's storage from all families
|
||||
// Accumulate thread-local state into global state before cleanup
|
||||
std::unique_lock<std::mutex> _{mutex};
|
||||
auto thread_id = std::this_thread::get_id();
|
||||
|
||||
// Clean up counter families
|
||||
// Accumulate counter families
|
||||
for (auto &[name, family] : counterFamilies) {
|
||||
family->perThreadState.erase(thread_id);
|
||||
auto thread_it = family->perThreadState.find(thread_id);
|
||||
if (thread_it != family->perThreadState.end()) {
|
||||
for (auto &[labels_key, instance] : thread_it->second.instances) {
|
||||
// Get current thread-local value
|
||||
double current_value = instance->value;
|
||||
|
||||
// Ensure global accumulator exists
|
||||
auto &global_state = family->globalAccumulatedValues[labels_key];
|
||||
if (!global_state) {
|
||||
global_state = std::make_unique<Counter::State>();
|
||||
global_state->value = 0.0;
|
||||
}
|
||||
|
||||
// Clean up histogram families
|
||||
// Add thread-local value to global accumulator (mutex already held)
|
||||
global_state->value += current_value;
|
||||
}
|
||||
family->perThreadState.erase(thread_it);
|
||||
}
|
||||
}
|
||||
|
||||
// Accumulate histogram families
|
||||
for (auto &[name, family] : histogramFamilies) {
|
||||
family->perThreadState.erase(thread_id);
|
||||
auto thread_it = family->perThreadState.find(thread_id);
|
||||
if (thread_it != family->perThreadState.end()) {
|
||||
for (auto &[labels_key, instance] : thread_it->second.instances) {
|
||||
// Acquire lock to get consistent snapshot
|
||||
std::lock_guard<std::mutex> lock(instance->mutex);
|
||||
|
||||
// Ensure global accumulator exists
|
||||
auto &global_state = family->globalAccumulatedValues[labels_key];
|
||||
if (!global_state) {
|
||||
global_state = std::make_unique<Histogram::State>();
|
||||
global_state->thresholds = instance->thresholds;
|
||||
global_state->counts =
|
||||
std::vector<uint64_t>(instance->counts.size(), 0);
|
||||
global_state->sum = 0.0;
|
||||
global_state->observations = 0;
|
||||
}
|
||||
|
||||
// Accumulate bucket counts (mutex already held)
|
||||
for (size_t i = 0; i < instance->counts.size(); ++i) {
|
||||
global_state->counts[i] += instance->counts[i];
|
||||
}
|
||||
|
||||
// Accumulate sum and observations
|
||||
global_state->sum += instance->sum;
|
||||
global_state->observations += instance->observations;
|
||||
}
|
||||
family->perThreadState.erase(thread_it);
|
||||
}
|
||||
}
|
||||
|
||||
// Gauges are global, no per-thread cleanup needed
|
||||
@@ -234,6 +283,13 @@ struct Metric {
|
||||
if (!ptr) {
|
||||
ptr = std::make_unique<Counter::State>();
|
||||
ptr->value = 0.0;
|
||||
|
||||
// Ensure global accumulator exists for this label set
|
||||
auto &global_state = family->p->globalAccumulatedValues[key];
|
||||
if (!global_state) {
|
||||
global_state = std::make_unique<Counter::State>();
|
||||
global_state->value = 0.0;
|
||||
}
|
||||
}
|
||||
Counter result;
|
||||
result.p = ptr.get();
|
||||
@@ -282,6 +338,16 @@ struct Metric {
|
||||
ptr->counts = std::vector<uint64_t>(ptr->thresholds.size(), 0);
|
||||
ptr->sum = 0.0;
|
||||
ptr->observations = 0;
|
||||
|
||||
// Ensure global accumulator exists for this label set
|
||||
auto &global_state = family->p->globalAccumulatedValues[key];
|
||||
if (!global_state) {
|
||||
global_state = std::make_unique<Histogram::State>();
|
||||
global_state->thresholds = ptr->thresholds;
|
||||
global_state->counts = std::vector<uint64_t>(ptr->thresholds.size(), 0);
|
||||
global_state->sum = 0.0;
|
||||
global_state->observations = 0;
|
||||
}
|
||||
}
|
||||
Histogram result;
|
||||
result.p = ptr.get();
|
||||
@@ -292,8 +358,8 @@ struct Metric {
|
||||
Counter::Counter() = default;
|
||||
|
||||
void Counter::inc(double x) {
|
||||
// DESIGN: Single writer per thread allows simple increment
|
||||
// No atomics needed since only one thread writes to this counter
|
||||
// DESIGN: Single writer per thread, but render thread reads concurrently
|
||||
// Need atomic store since render thread reads without writer's coordination
|
||||
auto new_value = p->value + x;
|
||||
|
||||
// Validate monotonic property (counter never decreases)
|
||||
@@ -665,21 +731,37 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
|
||||
value));
|
||||
}
|
||||
|
||||
// Aggregate all counter values (thread-local + global accumulated)
|
||||
std::unordered_map<LabelsKey, double> aggregated_values;
|
||||
|
||||
// First, add thread-local values
|
||||
for (const auto &[thread_id, per_thread] : family->perThreadState) {
|
||||
for (const auto &[labels_key, instance] : per_thread.instances) {
|
||||
// Atomic read from render thread - single writer doesn't need atomic
|
||||
// writes
|
||||
// Atomic read to match atomic store in Counter::inc()
|
||||
double value;
|
||||
__atomic_load(&instance->value, &value, __ATOMIC_RELAXED);
|
||||
aggregated_values[labels_key] += value;
|
||||
}
|
||||
}
|
||||
|
||||
// Then, add globally accumulated values from destroyed threads
|
||||
for (const auto &[labels_key, global_state] :
|
||||
family->globalAccumulatedValues) {
|
||||
if (global_state) {
|
||||
aggregated_values[labels_key] += global_state->value;
|
||||
}
|
||||
}
|
||||
|
||||
// Render aggregated counter values
|
||||
for (const auto &[labels_key, total_value] : aggregated_values) {
|
||||
labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
labels_sv.push_back(l);
|
||||
auto labels = format_labels(labels_sv);
|
||||
output.push_back(format(arena, "%.*s%.*s %.17g\n",
|
||||
static_cast<int>(name.length()), name.data(),
|
||||
static_cast<int>(labels.length()),
|
||||
labels.data(), value));
|
||||
}
|
||||
static_cast<int>(labels.length()), labels.data(),
|
||||
total_value));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -722,7 +804,15 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
|
||||
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
|
||||
output.push_back(format(arena, "# TYPE %s histogram\n", name.c_str()));
|
||||
|
||||
// Aggregate all histogram values (thread-local + global accumulated)
|
||||
std::unordered_map<LabelsKey,
|
||||
std::tuple<std::vector<double>, std::vector<uint64_t>,
|
||||
double, uint64_t>>
|
||||
aggregated_histograms;
|
||||
|
||||
std::vector<std::pair<std::string_view, std::string_view>> bucket_labels_sv;
|
||||
|
||||
// First, collect thread-local histogram data
|
||||
for (const auto &[thread_id, per_thread] : family->perThreadState) {
|
||||
for (const auto &[labels_key, instance] : per_thread.instances) {
|
||||
// Extract data under lock - minimize critical section
|
||||
@@ -739,7 +829,53 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
|
||||
observations_snapshot = instance->observations;
|
||||
}
|
||||
|
||||
// Render explicit bucket counts outside critical section
|
||||
// Initialize or aggregate into aggregated_histograms
|
||||
auto &[thresholds, counts, sum, observations] =
|
||||
aggregated_histograms[labels_key];
|
||||
if (thresholds.empty()) {
|
||||
thresholds = thresholds_snapshot;
|
||||
counts = counts_snapshot;
|
||||
sum = sum_snapshot;
|
||||
observations = observations_snapshot;
|
||||
} else {
|
||||
// Aggregate counts
|
||||
for (size_t i = 0; i < counts_snapshot.size(); ++i) {
|
||||
counts[i] += counts_snapshot[i];
|
||||
}
|
||||
sum += sum_snapshot;
|
||||
observations += observations_snapshot;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then, add globally accumulated values from destroyed threads
|
||||
for (const auto &[labels_key, global_state] :
|
||||
family->globalAccumulatedValues) {
|
||||
if (global_state) {
|
||||
auto &[thresholds, counts, sum, observations] =
|
||||
aggregated_histograms[labels_key];
|
||||
if (thresholds.empty()) {
|
||||
thresholds = global_state->thresholds;
|
||||
counts = global_state->counts;
|
||||
sum = global_state->sum;
|
||||
observations = global_state->observations;
|
||||
} else {
|
||||
// Add global accumulated values
|
||||
for (size_t i = 0; i < global_state->counts.size(); ++i) {
|
||||
counts[i] += global_state->counts[i];
|
||||
}
|
||||
sum += global_state->sum;
|
||||
observations += global_state->observations;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render aggregated histogram data
|
||||
for (const auto &[labels_key, histogram_data] : aggregated_histograms) {
|
||||
const auto &[thresholds_snapshot, counts_snapshot, sum_snapshot,
|
||||
observations_snapshot] = histogram_data;
|
||||
|
||||
// Render explicit bucket counts
|
||||
for (size_t i = 0; i < thresholds_snapshot.size(); ++i) {
|
||||
bucket_labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
@@ -765,23 +901,22 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
|
||||
static_cast<int>(inf_labels.length()), inf_labels.data(),
|
||||
static_cast<unsigned long long>(observations_snapshot)));
|
||||
|
||||
// Render sum outside critical section
|
||||
// Render sum
|
||||
bucket_labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
bucket_labels_sv.push_back(l);
|
||||
auto labels = format_labels(bucket_labels_sv);
|
||||
output.push_back(format(arena, "%s_sum%.*s %.17g\n", name.c_str(),
|
||||
static_cast<int>(labels.length()),
|
||||
labels.data(), sum_snapshot));
|
||||
static_cast<int>(labels.length()), labels.data(),
|
||||
sum_snapshot));
|
||||
|
||||
// Render count outside critical section
|
||||
// Render count
|
||||
output.push_back(
|
||||
format(arena, "%s_count%.*s %llu\n", name.c_str(),
|
||||
static_cast<int>(labels.length()), labels.data(),
|
||||
static_cast<unsigned long long>(observations_snapshot)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto result = arena.allocate<std::string_view>(output.size());
|
||||
std::copy(output.begin(), output.end(), result);
|
||||
|
||||
Reference in New Issue
Block a user