Clarify threading model for metrics

This commit is contained in:
2025-08-30 17:29:39 -04:00
parent 21ddcb75fb
commit affeeb674a
2 changed files with 62 additions and 21 deletions

View File

@@ -154,6 +154,26 @@ A high-performance, multi-stage, lock-free pipeline for inter-thread communicati
- **Builder pattern** for constructing commit requests
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
#### **Metrics System** (`src/metric.{hpp,cpp}`)
**High-Performance Metrics Implementation:**
- **Thread-local counters/histograms** with single writer for performance
- **Global gauges** with lock-free atomic CAS operations for multi-writer scenarios
- **SIMD-optimized histogram bucket updates** using AVX instructions for high throughput
- **Arena allocator integration** for efficient memory management during rendering
**Threading Model:**
- **Counters**: Per-thread storage, single writer, atomic write in `Counter::inc()`, atomic read in render thread
- **Histograms**: Per-thread storage, single writer, per-histogram mutex serializes all access (observe and render)
- **Gauges**: Lock-free atomic operations using `std::bit_cast` for double precision
- **Thread cleanup**: Automatic accumulation of thread-local state into global state on destruction
**Prometheus Compatibility:**
- **Standard metric types** with proper label handling and validation
- **Bucket generation helpers** for linear/exponential histogram distributions
- **Callback-based metrics** for dynamic values
- **UTF-8 validation** using simdutf for label values
#### **Configuration & Optimization**
**Configuration System** (`src/config.{hpp,cpp}`):

View File

@@ -11,7 +11,6 @@
#include <cstdlib>
#include <cstring>
#include <functional>
#include <limits>
#include <memory>
#include <mutex>
#include <string>
@@ -35,20 +34,30 @@ static_assert(__STDCPP_DEFAULT_NEW_ALIGNMENT__ >= 16,
// WeaselDB Metrics System Design:
//
// THREADING MODEL:
// - Counters: Per-thread storage, single writer per thread
// - Histograms: Per-thread storage with mutex protection for consistent reads
// - Gauges: Global storage with mutex protection (multi-writer)
// - Counters: Per-thread storage, single writer, atomic write/read coordination
// with render thread
// - Histograms: Per-thread storage, single writer, mutex protection for all
// access (both observe and render)
// - Gauges: Global storage with atomic CAS operations (multi-writer, no mutex
// needed)
//
// SYNCHRONIZATION STRATEGY:
// - Counters: Atomic store in Counter::inc(), atomic load in render thread
// - Histograms: Mutex serializes all access - updates in observe(), reads in
// render
// - Gauges: Lock-free atomic operations for all updates and reads
//
// PRECISION STRATEGY:
// - Use atomic<uint64_t> for lock-free storage
// - Store doubles using std::bit_cast to uint64_t (preserves full IEEE 754
// precision)
// - Single writer assumption allows simple load/store without CAS loops
// - Single writer for counters enables simple atomic store/load
//
// MEMORY MODEL:
// - Thread-local metrics auto-cleanup on thread destruction
// - Global metrics (gauges) persist for application lifetime
// - Histogram buckets are sorted, deduplicated, and include +Inf bucket
// - Histogram buckets are sorted, deduplicated, sizes never change after
// creation
namespace metric {
@@ -155,27 +164,29 @@ template <> struct Family<Histogram>::State {
// Note: No callbacks map - histograms don't support callback-based metrics
};
// Counter: Thread-local, monotonically increasing, single writer per thread
// Counter: Thread-local, monotonically increasing, single writer
struct Counter::State {
double value; // Single writer, no atomics needed
double value; // Single writer, atomic coordination with render thread
friend struct Metric;
};
// Gauge: Global, can increase/decrease, multiple writers (uses atomic CAS).
// TODO slow under contention.
// Gauge: Global, can increase/decrease, multiple writers (uses atomic CAS)
struct Gauge::State {
std::atomic<uint64_t> value; // Stores double as uint64_t bits, lock-free
std::atomic<uint64_t>
value; // Stores double as uint64_t bits, lock-free CAS operations
friend struct Metric;
};
// Histogram: Thread-local buckets with mutex protection per thread
// Histogram: Thread-local buckets, single writer, mutex protection per thread,
// per histogram
struct Histogram::State {
std::vector<double>
thresholds; // Bucket boundaries (sorted, deduplicated, includes +Inf)
std::vector<double> thresholds; // Bucket boundaries (sorted, deduplicated,
// sizes never change)
std::vector<uint64_t> counts; // Count per bucket
double sum; // Sum of observations
uint64_t observations; // Total observation count
std::mutex mutex; // Per-histogram mutex for consistent reads/writes
std::mutex
mutex; // Per-thread, per-histogram mutex for consistent reads/writes
friend struct Metric;
};
@@ -358,7 +369,7 @@ struct Metric {
Counter::Counter() = default;
void Counter::inc(double x) {
// DESIGN: Single writer per thread, but render thread reads concurrently
// DESIGN: Single writer, but render thread reads concurrently
// Need atomic store since render thread reads without writer's coordination
auto new_value = p->value + x;
@@ -816,15 +827,25 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
for (const auto &[thread_id, per_thread] : family->perThreadState) {
for (const auto &[labels_key, instance] : per_thread.instances) {
// Extract data under lock - minimize critical section
// Pre-allocate vectors to avoid malloc inside critical section
// Note: thresholds and counts sizes never change after histogram
// creation
std::vector<double> thresholds_snapshot;
std::vector<uint64_t> counts_snapshot;
double sum_snapshot;
uint64_t observations_snapshot;
// Pre-allocate outside critical section using immutable sizes
thresholds_snapshot.resize(instance->thresholds.size());
counts_snapshot.resize(instance->counts.size());
// Copy data with minimal critical section
{
std::lock_guard<std::mutex> lock(instance->mutex);
thresholds_snapshot = instance->thresholds;
counts_snapshot = instance->counts;
std::memcpy(thresholds_snapshot.data(), instance->thresholds.data(),
instance->thresholds.size() * sizeof(double));
std::memcpy(counts_snapshot.data(), instance->counts.data(),
instance->counts.size() * sizeof(uint64_t));
sum_snapshot = instance->sum;
observations_snapshot = instance->observations;
}