Clarify threading model for metrics
This commit is contained in:
20
design.md
20
design.md
@@ -154,6 +154,26 @@ A high-performance, multi-stage, lock-free pipeline for inter-thread communicati
|
|||||||
- **Builder pattern** for constructing commit requests
|
- **Builder pattern** for constructing commit requests
|
||||||
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
|
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
|
||||||
|
|
||||||
|
#### **Metrics System** (`src/metric.{hpp,cpp}`)
|
||||||
|
|
||||||
|
**High-Performance Metrics Implementation:**
|
||||||
|
- **Thread-local counters/histograms** with single writer for performance
|
||||||
|
- **Global gauges** with lock-free atomic CAS operations for multi-writer scenarios
|
||||||
|
- **SIMD-optimized histogram bucket updates** using AVX instructions for high throughput
|
||||||
|
- **Arena allocator integration** for efficient memory management during rendering
|
||||||
|
|
||||||
|
**Threading Model:**
|
||||||
|
- **Counters**: Per-thread storage, single writer, atomic write in `Counter::inc()`, atomic read in render thread
|
||||||
|
- **Histograms**: Per-thread storage, single writer, per-histogram mutex serializes all access (observe and render)
|
||||||
|
- **Gauges**: Lock-free atomic operations using `std::bit_cast` for double precision
|
||||||
|
- **Thread cleanup**: Automatic accumulation of thread-local state into global state on destruction
|
||||||
|
|
||||||
|
**Prometheus Compatibility:**
|
||||||
|
- **Standard metric types** with proper label handling and validation
|
||||||
|
- **Bucket generation helpers** for linear/exponential histogram distributions
|
||||||
|
- **Callback-based metrics** for dynamic values
|
||||||
|
- **UTF-8 validation** using simdutf for label values
|
||||||
|
|
||||||
#### **Configuration & Optimization**
|
#### **Configuration & Optimization**
|
||||||
|
|
||||||
**Configuration System** (`src/config.{hpp,cpp}`):
|
**Configuration System** (`src/config.{hpp,cpp}`):
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <limits>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -35,20 +34,30 @@ static_assert(__STDCPP_DEFAULT_NEW_ALIGNMENT__ >= 16,
|
|||||||
// WeaselDB Metrics System Design:
|
// WeaselDB Metrics System Design:
|
||||||
//
|
//
|
||||||
// THREADING MODEL:
|
// THREADING MODEL:
|
||||||
// - Counters: Per-thread storage, single writer per thread
|
// - Counters: Per-thread storage, single writer, atomic write/read coordination
|
||||||
// - Histograms: Per-thread storage with mutex protection for consistent reads
|
// with render thread
|
||||||
// - Gauges: Global storage with mutex protection (multi-writer)
|
// - Histograms: Per-thread storage, single writer, mutex protection for all
|
||||||
|
// access (both observe and render)
|
||||||
|
// - Gauges: Global storage with atomic CAS operations (multi-writer, no mutex
|
||||||
|
// needed)
|
||||||
|
//
|
||||||
|
// SYNCHRONIZATION STRATEGY:
|
||||||
|
// - Counters: Atomic store in Counter::inc(), atomic load in render thread
|
||||||
|
// - Histograms: Mutex serializes all access - updates in observe(), reads in
|
||||||
|
// render
|
||||||
|
// - Gauges: Lock-free atomic operations for all updates and reads
|
||||||
//
|
//
|
||||||
// PRECISION STRATEGY:
|
// PRECISION STRATEGY:
|
||||||
// - Use atomic<uint64_t> for lock-free storage
|
// - Use atomic<uint64_t> for lock-free storage
|
||||||
// - Store doubles using std::bit_cast to uint64_t (preserves full IEEE 754
|
// - Store doubles using std::bit_cast to uint64_t (preserves full IEEE 754
|
||||||
// precision)
|
// precision)
|
||||||
// - Single writer assumption allows simple load/store without CAS loops
|
// - Single writer for counters enables simple atomic store/load
|
||||||
//
|
//
|
||||||
// MEMORY MODEL:
|
// MEMORY MODEL:
|
||||||
// - Thread-local metrics auto-cleanup on thread destruction
|
// - Thread-local metrics auto-cleanup on thread destruction
|
||||||
// - Global metrics (gauges) persist for application lifetime
|
// - Global metrics (gauges) persist for application lifetime
|
||||||
// - Histogram buckets are sorted, deduplicated, and include +Inf bucket
|
// - Histogram buckets are sorted, deduplicated, sizes never change after
|
||||||
|
// creation
|
||||||
|
|
||||||
namespace metric {
|
namespace metric {
|
||||||
|
|
||||||
@@ -155,27 +164,29 @@ template <> struct Family<Histogram>::State {
|
|||||||
// Note: No callbacks map - histograms don't support callback-based metrics
|
// Note: No callbacks map - histograms don't support callback-based metrics
|
||||||
};
|
};
|
||||||
|
|
||||||
// Counter: Thread-local, monotonically increasing, single writer per thread
|
// Counter: Thread-local, monotonically increasing, single writer
|
||||||
struct Counter::State {
|
struct Counter::State {
|
||||||
double value; // Single writer, no atomics needed
|
double value; // Single writer, atomic coordination with render thread
|
||||||
friend struct Metric;
|
friend struct Metric;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Gauge: Global, can increase/decrease, multiple writers (uses atomic CAS).
|
// Gauge: Global, can increase/decrease, multiple writers (uses atomic CAS)
|
||||||
// TODO slow under contention.
|
|
||||||
struct Gauge::State {
|
struct Gauge::State {
|
||||||
std::atomic<uint64_t> value; // Stores double as uint64_t bits, lock-free
|
std::atomic<uint64_t>
|
||||||
|
value; // Stores double as uint64_t bits, lock-free CAS operations
|
||||||
friend struct Metric;
|
friend struct Metric;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Histogram: Thread-local buckets with mutex protection per thread
|
// Histogram: Thread-local buckets, single writer, mutex protection per thread,
|
||||||
|
// per histogram
|
||||||
struct Histogram::State {
|
struct Histogram::State {
|
||||||
std::vector<double>
|
std::vector<double> thresholds; // Bucket boundaries (sorted, deduplicated,
|
||||||
thresholds; // Bucket boundaries (sorted, deduplicated, includes +Inf)
|
// sizes never change)
|
||||||
std::vector<uint64_t> counts; // Count per bucket
|
std::vector<uint64_t> counts; // Count per bucket
|
||||||
double sum; // Sum of observations
|
double sum; // Sum of observations
|
||||||
uint64_t observations; // Total observation count
|
uint64_t observations; // Total observation count
|
||||||
std::mutex mutex; // Per-histogram mutex for consistent reads/writes
|
std::mutex
|
||||||
|
mutex; // Per-thread, per-histogram mutex for consistent reads/writes
|
||||||
friend struct Metric;
|
friend struct Metric;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -358,7 +369,7 @@ struct Metric {
|
|||||||
Counter::Counter() = default;
|
Counter::Counter() = default;
|
||||||
|
|
||||||
void Counter::inc(double x) {
|
void Counter::inc(double x) {
|
||||||
// DESIGN: Single writer per thread, but render thread reads concurrently
|
// DESIGN: Single writer, but render thread reads concurrently
|
||||||
// Need atomic store since render thread reads without writer's coordination
|
// Need atomic store since render thread reads without writer's coordination
|
||||||
auto new_value = p->value + x;
|
auto new_value = p->value + x;
|
||||||
|
|
||||||
@@ -816,15 +827,25 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
|
|||||||
for (const auto &[thread_id, per_thread] : family->perThreadState) {
|
for (const auto &[thread_id, per_thread] : family->perThreadState) {
|
||||||
for (const auto &[labels_key, instance] : per_thread.instances) {
|
for (const auto &[labels_key, instance] : per_thread.instances) {
|
||||||
// Extract data under lock - minimize critical section
|
// Extract data under lock - minimize critical section
|
||||||
|
// Pre-allocate vectors to avoid malloc inside critical section
|
||||||
|
// Note: thresholds and counts sizes never change after histogram
|
||||||
|
// creation
|
||||||
std::vector<double> thresholds_snapshot;
|
std::vector<double> thresholds_snapshot;
|
||||||
std::vector<uint64_t> counts_snapshot;
|
std::vector<uint64_t> counts_snapshot;
|
||||||
double sum_snapshot;
|
double sum_snapshot;
|
||||||
uint64_t observations_snapshot;
|
uint64_t observations_snapshot;
|
||||||
|
|
||||||
|
// Pre-allocate outside critical section using immutable sizes
|
||||||
|
thresholds_snapshot.resize(instance->thresholds.size());
|
||||||
|
counts_snapshot.resize(instance->counts.size());
|
||||||
|
|
||||||
|
// Copy data with minimal critical section
|
||||||
{
|
{
|
||||||
std::lock_guard<std::mutex> lock(instance->mutex);
|
std::lock_guard<std::mutex> lock(instance->mutex);
|
||||||
thresholds_snapshot = instance->thresholds;
|
std::memcpy(thresholds_snapshot.data(), instance->thresholds.data(),
|
||||||
counts_snapshot = instance->counts;
|
instance->thresholds.size() * sizeof(double));
|
||||||
|
std::memcpy(counts_snapshot.data(), instance->counts.data(),
|
||||||
|
instance->counts.size() * sizeof(uint64_t));
|
||||||
sum_snapshot = instance->sum;
|
sum_snapshot = instance->sum;
|
||||||
observations_snapshot = instance->observations;
|
observations_snapshot = instance->observations;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user