Compare commits

...

25 Commits

Author SHA1 Message Date
50e27cced8 Add more TODOs 2025-08-29 17:15:36 -04:00
d2762dc8da Add TODO 2025-08-29 17:05:51 -04:00
5592d065de Actually have contention in benchmark 2025-08-29 17:05:21 -04:00
91e799aae8 Use plain arrays and atomic read with intrinsics for render 2025-08-29 15:10:10 -04:00
4fc277393e Use std::latch sync in benchmarks too 2025-08-29 14:29:15 -04:00
a5776004de Update potential misunderstanding about thread safety 2025-08-29 14:08:41 -04:00
62b37c067c Metrics implementation, WIP 2025-08-29 13:43:03 -04:00
fac0d20ae1 Finish metrics design, I think 2025-08-29 11:51:40 -04:00
e3a2ddbbfb Validation + callback api 2025-08-29 11:31:06 -04:00
b6d4ae2862 Initialize atomics in metrics, update style guide on atomics 2025-08-29 10:52:26 -04:00
1133d1e365 Use std::bit_cast, document that gauge mutex is an implementation detail 2025-08-29 10:45:19 -04:00
de5adb54d2 Flesh out metrics architecture more 2025-08-29 10:40:19 -04:00
d0f2b6550a More scaffolding 2025-08-28 17:32:34 -04:00
ca5b299da8 Make MetricKey hashable 2025-08-28 17:10:56 -04:00
9c89eba6c8 Metrics system scaffold 2025-08-28 17:04:53 -04:00
ed6e6ea9fe Output trailing : for konsole integration workaround 2025-08-28 14:45:40 -04:00
c97920c473 format utility improvements 2025-08-28 14:40:01 -04:00
7808896226 Add format benchmarks 2025-08-28 14:20:27 -04:00
404b491880 Add documentation 2025-08-28 14:05:45 -04:00
bc0d5a7422 Add format utility 2025-08-28 14:01:43 -04:00
6fb57619c5 Remove inaccurate "zero-{copy,allocation}" claims 2025-08-28 13:40:05 -04:00
f46a98249f Change to loop_iterations 2025-08-28 13:34:52 -04:00
a73a463936 Fix Arena realloc bug 2025-08-28 13:27:53 -04:00
a32356e298 Add ArenaVector 2025-08-28 13:27:21 -04:00
3d61408976 Use precise memory orderings in load_tester 2025-08-27 18:13:28 -04:00
22 changed files with 3698 additions and 45 deletions

View File

@@ -112,6 +112,7 @@ set(SOURCES
src/json_commit_request_parser.cpp
src/http_handler.cpp
src/arena_allocator.cpp
src/format.cpp
${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_executable(weaseldb ${SOURCES})
@@ -133,7 +134,7 @@ target_include_directories(test_data PUBLIC benchmarks)
target_link_libraries(test_data simdutf::simdutf)
add_executable(test_arena_allocator tests/test_arena_allocator.cpp
src/arena_allocator.cpp)
src/arena_allocator.cpp src/format.cpp)
target_link_libraries(test_arena_allocator doctest::doctest)
target_include_directories(test_arena_allocator PRIVATE src)
target_compile_options(test_arena_allocator PRIVATE -UNDEBUG)
@@ -185,6 +186,17 @@ target_compile_definitions(test_server_connection_return
PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN)
target_compile_options(test_server_connection_return PRIVATE -UNDEBUG)
# Metrics system test
add_executable(test_metric tests/test_metric.cpp src/metric.cpp
src/arena_allocator.cpp src/format.cpp)
target_link_libraries(test_metric doctest::doctest Threads::Threads
simdutf::simdutf weaseljson)
target_include_directories(test_metric PRIVATE src)
target_compile_options(test_metric PRIVATE -UNDEBUG)
# Register with CTest
add_test(NAME metric_tests COMMAND test_metric)
add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp
src/arena_allocator.cpp)
target_link_libraries(bench_arena_allocator nanobench)
@@ -216,6 +228,21 @@ add_executable(bench_thread_pipeline benchmarks/bench_thread_pipeline.cpp)
target_link_libraries(bench_thread_pipeline nanobench Threads::Threads)
target_include_directories(bench_thread_pipeline PRIVATE src)
add_executable(bench_format_comparison benchmarks/bench_format_comparison.cpp
src/arena_allocator.cpp src/format.cpp)
target_link_libraries(bench_format_comparison nanobench)
target_include_directories(bench_format_comparison PRIVATE src)
# Metrics system benchmark
add_executable(bench_metric benchmarks/bench_metric.cpp src/metric.cpp
src/arena_allocator.cpp src/format.cpp)
target_link_libraries(bench_metric nanobench Threads::Threads simdutf::simdutf
weaseljson)
target_include_directories(bench_metric PRIVATE src)
# Register benchmark with CTest
add_test(NAME metric_benchmarks COMMAND bench_metric)
# Debug tools
add_executable(
debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp
@@ -237,3 +264,4 @@ add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)
add_test(NAME commit_request_benchmarks COMMAND bench_commit_request)
add_test(NAME parser_comparison_benchmarks COMMAND bench_parser_comparison)
add_test(NAME thread_pipeline_benchmarks COMMAND bench_thread_pipeline)
add_test(NAME format_comparison_benchmarks COMMAND bench_format_comparison)

View File

@@ -0,0 +1,274 @@
#include "arena_allocator.hpp"
#include "format.hpp"
#include <cstdio>
#include <iomanip>
#include <nanobench.h>
#include <sstream>
#include <string>
#if __cpp_lib_format >= 201907L
#include <format>
#define HAS_STD_FORMAT 1
#else
#define HAS_STD_FORMAT 0
#endif
// Test data for consistent benchmarks
constexpr int TEST_INT = 42;
constexpr double TEST_DOUBLE = 3.14159;
const std::string TEST_STRING = "Hello World";
// Benchmark simple string concatenation: "Hello " + "World" + "!"
void benchmark_simple_concatenation() {
std::cout << "\n=== Simple String Concatenation: 'Hello World!' ===\n";
ankerl::nanobench::Bench bench;
bench.title("Simple Concatenation").unit("op").warmup(100).epochs(1000);
// Arena-based static_format
bench.run("static_format", [&] {
ArenaAllocator arena(64);
auto result = static_format(arena, "Hello ", "World", "!");
ankerl::nanobench::doNotOptimizeAway(result);
});
// Arena-based format
bench.run("format", [&] {
ArenaAllocator arena(64);
auto result = format(arena, "Hello %s!", "World");
ankerl::nanobench::doNotOptimizeAway(result);
});
// std::stringstream
bench.run("std::stringstream", [&] {
std::stringstream ss;
ss << "Hello " << "World" << "!";
auto result = ss.str();
ankerl::nanobench::doNotOptimizeAway(result);
});
#if HAS_STD_FORMAT
// std::format (C++20)
bench.run("std::format", [&] {
auto result = std::format("Hello {}!", "World");
ankerl::nanobench::doNotOptimizeAway(result);
});
#endif
// Raw snprintf with malloc
bench.run("snprintf + malloc", [&] {
char buffer[64];
int len = std::snprintf(buffer, sizeof(buffer), "Hello %s!", "World");
char *result = static_cast<char *>(std::malloc(len + 1));
std::memcpy(result, buffer, len + 1);
ankerl::nanobench::doNotOptimizeAway(result);
std::free(result);
});
}
// Benchmark mixed type formatting: "Count: 42, Rate: 3.14159"
void benchmark_mixed_types() {
std::cout << "\n=== Mixed Type Formatting: 'Count: 42, Rate: 3.14159' ===\n";
ankerl::nanobench::Bench bench;
bench.title("Mixed Types").unit("op").warmup(100).epochs(1000);
// Arena-based static_format
bench.run("static_format", [&] {
ArenaAllocator arena(128);
auto result =
static_format(arena, "Count: ", TEST_INT, ", Rate: ", TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
// Arena-based format
bench.run("format", [&] {
ArenaAllocator arena(128);
auto result = format(arena, "Count: %d, Rate: %.5f", TEST_INT, TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
// std::stringstream
bench.run("std::stringstream", [&] {
std::stringstream ss;
ss << "Count: " << TEST_INT << ", Rate: " << std::fixed
<< std::setprecision(5) << TEST_DOUBLE;
auto result = ss.str();
ankerl::nanobench::doNotOptimizeAway(result);
});
#if HAS_STD_FORMAT
// std::format (C++20)
bench.run("std::format", [&] {
auto result = std::format("Count: {}, Rate: {:.5f}", TEST_INT, TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
#endif
// Raw snprintf with malloc
bench.run("snprintf + malloc", [&] {
char buffer[128];
int len = std::snprintf(buffer, sizeof(buffer), "Count: %d, Rate: %.5f",
TEST_INT, TEST_DOUBLE);
char *result = static_cast<char *>(std::malloc(len + 1));
std::memcpy(result, buffer, len + 1);
ankerl::nanobench::doNotOptimizeAway(result);
std::free(result);
});
}
// Benchmark complex formatting with precision and alignment
void benchmark_complex_formatting() {
std::cout << "\n=== Complex Formatting: '%-10s %5d %8.2f' ===\n";
ankerl::nanobench::Bench bench;
bench.title("Complex Formatting").unit("op").warmup(100).epochs(1000);
// Arena-based format (static_format doesn't support printf specifiers)
bench.run("format", [&] {
ArenaAllocator arena(128);
auto result = format(arena, "%-10s %5d %8.2f", TEST_STRING.c_str(),
TEST_INT, TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
// std::stringstream
bench.run("std::stringstream", [&] {
std::stringstream ss;
ss << std::left << std::setw(10) << TEST_STRING << " " << std::right
<< std::setw(5) << TEST_INT << " " << std::setw(8) << std::fixed
<< std::setprecision(2) << TEST_DOUBLE;
auto result = ss.str();
ankerl::nanobench::doNotOptimizeAway(result);
});
#if HAS_STD_FORMAT
// std::format (C++20)
bench.run("std::format", [&] {
auto result = std::format("{:<10} {:>5} {:>8.2f}", TEST_STRING, TEST_INT,
TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
#endif
// Raw snprintf with malloc
bench.run("snprintf + malloc", [&] {
char buffer[128];
int len = std::snprintf(buffer, sizeof(buffer), "%-10s %5d %8.2f",
TEST_STRING.c_str(), TEST_INT, TEST_DOUBLE);
char *result = static_cast<char *>(std::malloc(len + 1));
std::memcpy(result, buffer, len + 1);
ankerl::nanobench::doNotOptimizeAway(result);
std::free(result);
});
}
// Benchmark error message formatting (common use case)
void benchmark_error_messages() {
std::cout << "\n=== Error Message Formatting: 'Error 404: File not found "
"(line 123)' ===\n";
ankerl::nanobench::Bench bench;
bench.title("Error Messages").unit("op").warmup(100).epochs(1000);
constexpr int error_code = 404;
constexpr int line_number = 123;
const std::string error_msg = "File not found";
// Arena-based static_format (using string literals only)
bench.run("static_format", [&] {
ArenaAllocator arena(128);
auto result = static_format(arena, "Error ", error_code, ": ",
"File not found", " (line ", line_number, ")");
ankerl::nanobench::doNotOptimizeAway(result);
});
// Arena-based format
bench.run("format", [&] {
ArenaAllocator arena(128);
auto result = format(arena, "Error %d: %s (line %d)", error_code,
error_msg.c_str(), line_number);
ankerl::nanobench::doNotOptimizeAway(result);
});
// std::stringstream
bench.run("std::stringstream", [&] {
std::stringstream ss;
ss << "Error " << error_code << ": " << error_msg << " (line "
<< line_number << ")";
auto result = ss.str();
ankerl::nanobench::doNotOptimizeAway(result);
});
#if HAS_STD_FORMAT
// std::format (C++20)
bench.run("std::format", [&] {
auto result = std::format("Error {}: {} (line {})", error_code, error_msg,
line_number);
ankerl::nanobench::doNotOptimizeAway(result);
});
#endif
}
// Benchmark memory allocation overhead by testing arena reuse
void benchmark_memory_reuse() {
std::cout << "\n=== Memory Allocation Patterns ===\n";
ankerl::nanobench::Bench bench;
bench.title("Memory Patterns").unit("op").warmup(100).epochs(100);
// Arena with fresh allocation each time (realistic usage)
bench.run("fresh arena", [&] {
ArenaAllocator arena(128);
auto result = format(arena, "Test %d: %s %.2f", TEST_INT,
TEST_STRING.c_str(), TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
// Pre-allocated arena (reuse scenario)
ArenaAllocator shared_arena(1024);
bench.run("reused arena", [&] {
auto result = format(shared_arena, "Test %d: %s %.2f", TEST_INT,
TEST_STRING.c_str(), TEST_DOUBLE);
ankerl::nanobench::doNotOptimizeAway(result);
});
// Fresh std::string allocations
bench.run("std::stringstream", [&] {
std::stringstream ss;
ss << "Test " << TEST_INT << ": " << TEST_STRING << " " << std::fixed
<< std::setprecision(2) << TEST_DOUBLE;
auto result = ss.str();
ankerl::nanobench::doNotOptimizeAway(result);
});
}
int main() {
std::cout << "Format Function Benchmark Comparison\n";
std::cout << "====================================\n";
#if HAS_STD_FORMAT
std::cout << "C++20 std::format: Available\n";
#else
std::cout << "C++20 std::format: Not available\n";
#endif
benchmark_simple_concatenation();
benchmark_mixed_types();
benchmark_complex_formatting();
benchmark_error_messages();
benchmark_memory_reuse();
std::cout << "\n=== Summary ===\n";
std::cout
<< "* static_format: Best for simple concatenation with known types\n";
std::cout
<< "* format: Best for printf-style formatting with arena allocation\n";
std::cout
<< "* std::stringstream: Flexible but slower due to heap allocation\n";
std::cout << "* std::format: Modern C++20 alternative (if available)\n";
std::cout << "* snprintf + malloc: Low-level but requires manual memory "
"management\n";
return 0;
}

256
benchmarks/bench_metric.cpp Normal file
View File

@@ -0,0 +1,256 @@
#include <nanobench.h>
#include "arena_allocator.hpp"
#include "metric.hpp"
#include <atomic>
#include <cmath>
#include <latch>
#include <random>
#include <thread>
#include <vector>
// High-contention benchmark setup
struct ContentionEnvironment {
// Background threads for contention
std::vector<std::thread> background_threads;
std::atomic<bool> stop_flag{false};
// Synchronization latches - must be members to avoid use-after-return
std::unique_ptr<std::latch> contention_latch;
std::unique_ptr<std::latch> render_latch;
metric::Family<metric::Gauge> gauge_family =
metric::create_gauge("gauge", "");
metric::Family<metric::Counter> counter_family =
metric::create_counter("counter", "");
metric::Family<metric::Histogram> histogram_family = metric::create_histogram(
"histogram", "", metric::exponential_buckets(0.001, 5, 7));
ContentionEnvironment() = default;
void start_background_contention(int num_threads = 4) {
stop_flag.store(false);
contention_latch = std::make_unique<std::latch>(num_threads + 1);
for (int i = 0; i < num_threads; ++i) {
background_threads.emplace_back([this, i]() {
auto bg_counter = counter_family.create({});
auto bg_gauge = gauge_family.create({});
auto bg_histogram = histogram_family.create({});
std::mt19937 rng(i);
std::uniform_real_distribution<double> dist(0.0, 10.0);
contention_latch
->arrive_and_wait(); // All background threads start together
while (!stop_flag.load(std::memory_order_relaxed)) {
// Simulate mixed workload
bg_counter.inc(1.0);
bg_gauge.set(dist(rng));
bg_gauge.inc(1.0);
bg_histogram.observe(dist(rng));
}
});
}
contention_latch
->arrive_and_wait(); // Wait for all background threads to be ready
}
void start_render_thread() {
render_latch = std::make_unique<std::latch>(2);
background_threads.emplace_back([this]() {
ArenaAllocator arena;
render_latch->arrive_and_wait(); // Render thread signals it's ready
while (!stop_flag.load(std::memory_order_relaxed)) {
auto output = metric::render(arena);
static_cast<void>(output); // Suppress unused variable warning
arena.reset();
}
});
render_latch->arrive_and_wait(); // Wait for render thread to be ready
}
void stop_background_threads() {
stop_flag.store(true);
for (auto &t : background_threads) {
if (t.joinable()) {
t.join();
}
}
background_threads.clear();
}
~ContentionEnvironment() { stop_background_threads(); }
};
int main() {
ankerl::nanobench::Bench bench;
bench.title("WeaselDB Metrics Performance").unit("operation").warmup(1000);
metric::Family<metric::Gauge> gauge_family =
metric::create_gauge("gauge", "");
metric::Family<metric::Counter> counter_family =
metric::create_counter("counter", "");
metric::Family<metric::Histogram> histogram_family = metric::create_histogram(
"histogram", "", metric::exponential_buckets(0.001, 5, 7));
auto counter = counter_family.create({});
auto gauge = gauge_family.create({});
auto histogram = histogram_family.create({});
// Baseline performance without contention
{
bench.run("counter.inc() - no contention", [&]() {
counter.inc(1.0);
ankerl::nanobench::doNotOptimizeAway(counter);
});
bench.run("gauge.inc() - no contention", [&]() {
gauge.inc(1.0);
ankerl::nanobench::doNotOptimizeAway(gauge);
});
bench.run("gauge.set() - no contention", [&]() {
gauge.set(42.0);
ankerl::nanobench::doNotOptimizeAway(gauge);
});
bench.run("histogram.observe() - no contention", [&]() {
histogram.observe(0.5);
ankerl::nanobench::doNotOptimizeAway(histogram);
});
}
// High contention with background threads
{
ContentionEnvironment env;
// Start background threads creating contention
env.start_background_contention(8);
bench.run("counter.inc() - 8 background threads",
[&]() { counter.inc(1.0); });
bench.run("gauge.inc() - 8 background threads", [&]() { gauge.inc(1.0); });
bench.run("gauge.set() - 8 background threads", [&]() { gauge.set(42.0); });
bench.run("histogram.observe() - 8 background threads",
[&]() { histogram.observe(1.5); });
}
// Concurrent render contention
{
ContentionEnvironment env;
// Start background threads + render thread
env.start_background_contention(4);
env.start_render_thread();
bench.run("counter.inc() - with concurrent render",
[&]() { counter.inc(1.0); });
bench.run("gauge.inc() - with concurrent render",
[&]() { gauge.inc(1.0); });
bench.run("histogram.observe() - with concurrent render",
[&]() { histogram.observe(2.0); });
}
// Render performance scaling
{
// Test render performance as number of metrics increases
std::vector<metric::Counter> counters;
std::vector<metric::Gauge> gauges;
std::vector<metric::Histogram> histograms;
auto counter_family =
metric::create_counter("scale_counter", "Scale counter");
auto gauge_family = metric::create_gauge("scale_gauge", "Scale gauge");
auto histogram_family = metric::create_histogram(
"scale_histogram", "Scale histogram",
std::initializer_list<double>{0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0});
// Create varying numbers of metrics
for (int scale : {10, 100, 1000}) {
// Clear previous metrics by creating new families
// (Note: In real usage, metrics persist for application lifetime)
for (int i = 0; i < scale; ++i) {
counters.emplace_back(
counter_family.create({{"id", std::to_string(i)}}));
gauges.emplace_back(gauge_family.create({{"id", std::to_string(i)}}));
histograms.emplace_back(
histogram_family.create({{"id", std::to_string(i)}}));
// Set some values
counters.back().inc(static_cast<double>(i));
gauges.back().set(static_cast<double>(i * 2));
histograms.back().observe(static_cast<double>(i) * 0.1);
}
ArenaAllocator arena;
std::string bench_name =
"render() - " + std::to_string(scale) + " metrics each type";
bench.run(bench_name, [&]() {
auto output = metric::render(arena);
ankerl::nanobench::doNotOptimizeAway(output);
arena.reset();
});
}
}
// Callback metrics performance
{
auto counter_family =
metric::create_counter("callback_counter", "Callback counter");
auto gauge_family =
metric::create_gauge("callback_gauge", "Callback gauge");
std::atomic<double> counter_value{0};
std::atomic<double> gauge_value{100};
// Register callbacks
counter_family.register_callback(
{{"type", "callback"}}, [&counter_value]() {
return counter_value.load(std::memory_order_relaxed);
});
gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() {
return gauge_value.load(std::memory_order_relaxed);
});
// Background thread updating callback values
std::atomic<bool> stop_callback{false};
std::latch start_latch{2}; // Background thread + benchmark thread
std::thread callback_updater([&]() {
start_latch.arrive_and_wait(); // Wait for benchmark to start
while (!stop_callback.load()) {
counter_value.fetch_add(1);
gauge_value.store(gauge_value.load() + 1);
}
});
ArenaAllocator arena;
start_latch.arrive_and_wait(); // Wait for background thread to be ready
bench.run("render() - with callback metrics", [&]() {
auto output = metric::render(arena);
ankerl::nanobench::doNotOptimizeAway(output);
arena.reset();
});
stop_callback.store(true);
callback_updater.join();
}
return 0;
}

View File

@@ -789,7 +789,7 @@ int main() {
std::cout << "\nBenchmark completed. The WeaselDB parser is optimized for:\n";
std::cout << "- Arena-based memory allocation for reduced fragmentation\n";
std::cout << "- Streaming parsing for network protocols\n";
std::cout << "- Zero-copy string handling with string views\n";
std::cout << "- String views to minimize unnecessary copying\n";
std::cout << "- Base64 decoding integrated into parsing pipeline\n";
std::cout << "- Efficient reset and reuse for high-throughput scenarios\n";

View File

@@ -1,12 +1,12 @@
#include <nanobench.h>
#include "../src/loop_iterations.h"
#include "../src/loop_iterations.hpp"
int main() {
ankerl::nanobench::Bench bench;
bench.minEpochIterations(100000);
bench.run("volatile loop to " + std::to_string(loopIterations), [&] {
for (volatile int i = 0; i < loopIterations; i = i + 1)
bench.run("volatile loop to " + std::to_string(loop_iterations), [&] {
for (volatile int i = 0; i < loop_iterations; i = i + 1)
;
});

View File

@@ -23,7 +23,7 @@ WeaselDB is a high-performance write-side database component designed for system
- **High-performance JSON parsing** with streaming support and SIMD optimization
- **Multi-threaded networking** using multiple epoll instances with unified I/O thread pool
- **Configurable epoll instances** to eliminate kernel-level contention
- **Zero-copy design** throughout the pipeline
- **Optimized memory management** with arena allocation and efficient copying
- **Factory pattern safety** ensuring correct object lifecycle management
---
@@ -152,7 +152,7 @@ A high-performance, multi-stage, lock-free pipeline for inter-thread communicati
- **Arena-backed string storage** with efficient memory management
- **Move-only semantics** for optimal performance
- **Builder pattern** for constructing commit requests
- **Zero-copy string views** pointing to arena-allocated memory
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
#### **Configuration & Optimization**
@@ -242,7 +242,7 @@ The system implements a RESTful API. See [api.md](api.md) for comprehensive API
1. **Performance-first** - Every component optimized for high throughput
2. **Scalable concurrency** - Multiple epoll instances eliminate kernel contention
3. **Memory efficiency** - Arena allocation eliminates fragmentation
4. **Zero-copy** - Minimize data copying throughout pipeline
4. **Efficient copying** - Minimize unnecessary copies while accepting required ones
5. **Streaming-ready** - Support incremental processing
6. **Type safety** - Compile-time validation where possible
7. **Resource management** - RAII and move semantics throughout
@@ -437,7 +437,7 @@ public:
#### Arena-Based String Handling
```cpp
// Preferred: Zero-copy string view with arena allocation
// Preferred: String view with arena allocation to minimize copying
std::string_view process_json_key(const char* data, ArenaAllocator& arena);
// Avoid: Unnecessary string copies

View File

@@ -77,32 +77,29 @@ void *ArenaAllocator::realloc_raw(void *ptr, uint32_t old_size,
assert(current_block_ &&
"realloc called with non-null ptr but no current block exists");
// Assert that offset is large enough (should always be true for
// valid callers)
assert(current_block_->offset >= old_size &&
"offset must be >= old_size for valid last allocation");
if (current_block_->offset >= old_size) {
// Check if this was the last allocation by comparing with expected location
char *expected_last_alloc_start =
current_block_->data() + current_block_->offset - old_size;
// Check if this was the last allocation by comparing with expected location
char *expected_last_alloc_start =
current_block_->data() + current_block_->offset - old_size;
if (ptr == expected_last_alloc_start) {
// This is indeed the last allocation
if (new_size > old_size) {
// Growing - check if we have space
size_t additional_space_needed = new_size - old_size;
if (ptr == expected_last_alloc_start) {
// This is indeed the last allocation
if (new_size > old_size) {
// Growing - check if we have space
size_t additional_space_needed = new_size - old_size;
if (current_block_->offset + additional_space_needed <=
current_block_->size) {
// We can extend in place
current_block_->offset += additional_space_needed;
return ptr;
if (current_block_->offset + additional_space_needed <=
current_block_->size) {
// We can extend in place
current_block_->offset += additional_space_needed;
return ptr;
}
} else {
// Shrinking - just update the offset
size_t space_to_free = old_size - new_size;
current_block_->offset -= space_to_free;
return new_size == 0 ? nullptr : ptr;
}
} else {
// Shrinking - just update the offset
size_t space_to_free = old_size - new_size;
current_block_->offset -= space_to_free;
return new_size == 0 ? nullptr : ptr;
}
}

View File

@@ -277,10 +277,14 @@ public:
* @brief Type-safe version of realloc_raw for arrays of type T.
*
* @param ptr Pointer to the existing allocation (must be from this allocator)
* If nullptr, behaves like allocate<T>(new_size)
* @param old_size Size of the existing allocation in number of T objects
* Ignored if ptr is nullptr
* @param new_size Desired new size in number of T objects
* @return Pointer to the reallocated memory (may be the same as ptr or
* different)
* @note Follows standard realloc() semantics: realloc(nullptr, size) ==
* malloc(size)
* @note Prints error to stderr and calls std::abort() if memory allocation
* fails or size overflow occurs
*/
@@ -430,6 +434,40 @@ public:
return current_block_ ? current_block_->size - current_block_->offset : 0;
}
/**
* @brief Get all available space in the current block and claim it
* immediately.
*
* This method returns a pointer to all remaining space in the current block
* and immediately marks it as used in the arena. The caller should use
* realloc() to shrink the allocation to the actual amount needed.
*
* If no block exists or current block is full, creates a new block.
*
* @return Pointer to allocated space and the number of bytes allocated
* @note The caller must call realloc() to return unused space
* @note This is designed for speculative operations like printf formatting
* @note Postcondition: always returns at least 1 byte
*/
struct AllocatedSpace {
char *ptr;
size_t allocated_bytes;
};
AllocatedSpace allocate_remaining_space() {
if (!current_block_ || available_in_current_block() == 0) {
add_block(initial_block_size_);
}
char *allocated_ptr = current_block_->data() + current_block_->offset;
size_t available = available_in_current_block();
// Claim all remaining space
current_block_->offset = current_block_->size;
return {allocated_ptr, available};
}
/**
* @brief Get the total number of blocks in the allocator.
*
@@ -575,3 +613,44 @@ public:
template <typename U> friend class ArenaStlAllocator;
};
/// Simple arena-aware vector that doesn't have a destructor
/// Safe to return as span because both the vector and its data are
/// arena-allocated Uses arena's realloc() for efficient growth without copying
/// when possible
template <typename T> struct ArenaVector {
explicit ArenaVector(ArenaAllocator *arena)
: arena_(arena), data_(nullptr), size_(0), capacity_(0) {}
void push_back(const T &item) {
if (size_ >= capacity_) {
grow();
}
data_[size_++] = item;
}
T *data() { return data_; }
const T *data() const { return data_; }
size_t size() const { return size_; }
bool empty() const { return size_ == 0; }
T &operator[](size_t index) { return data_[index]; }
const T &operator[](size_t index) const { return data_[index]; }
// No destructor - arena cleanup handles memory
private:
void grow() {
size_t new_capacity = capacity_ == 0 ? 8 : capacity_ * 2;
// arena.realloc() handles nullptr like standard realloc() - acts like
// malloc() This avoids copying when growing in-place is possible
data_ = arena_->realloc(data_, capacity_, new_capacity);
capacity_ = new_capacity;
}
ArenaAllocator *arena_;
T *data_;
size_t size_;
size_t capacity_;
};

View File

@@ -70,7 +70,7 @@ struct Connection {
* asynchronously by the server's I/O threads using efficient vectored
* I/O.
*
* @param s The data to send (string view for zero-copy efficiency)
* @param s The data to send (string view parameter for efficiency)
* @param copy_to_arena If true (default), copies data to the connection's
* arena for safe storage. If false, the caller must ensure the data remains
* valid until all queued messages are sent.

1010
src/format.cpp Normal file

File diff suppressed because it is too large Load Diff

276
src/format.hpp Normal file
View File

@@ -0,0 +1,276 @@
#pragma once
#include <concepts>
#include <cstdarg>
#include <cstdio>
#include <cstring>
#include <string_view>
#include <type_traits>
#include "arena_allocator.hpp"
/**
* @brief Runtime printf-style formatting with arena allocation optimization.
*
* This function provides familiar printf-style formatting with intelligent
* optimization for arena allocation. It attempts single-pass formatting by
* speculatively using available arena space, falling back to two-pass
* formatting only when necessary.
*
* The function uses an optimized allocation strategy:
* 1. **Single-pass attempt**: Try to format directly into available arena space
* 2. **Fallback to two-pass**: If formatting doesn't fit, measure required size
* and allocate exactly what's needed
*
* ## Supported Format Specifiers:
* All standard printf format specifiers are supported:
* - **Integers**: %d, %i, %u, %x, %X, %o, %ld, %lld, etc.
* - **Floating point**: %f, %e, %E, %g, %G, %.2f, etc.
* - **Strings**: %s, %.*s, etc.
* - **Characters**: %c
* - **Pointers**: %p
* - **Width/precision**: %10d, %-10s, %.2f, %*.*s, etc.
*
* ## Performance Characteristics:
* - **Optimistic single-pass**: Often avoids the cost of measuring format size
* - **Arena allocation**: Uses fast arena allocation (~1ns vs ~20-270ns for
* malloc)
* - **Memory efficient**: Returns unused space to arena via realloc()
* - **Fallback safety**: Two-pass approach handles any format that doesn't fit
*
* @param arena Arena allocator for memory management
* @param fmt Printf-style format string
* @param ... Variable arguments matching format specifiers
* @return std::string_view pointing to arena-allocated formatted string
* @note Aborts program on formatting errors (never returns invalid data)
* @note GCC format attribute enables compile-time format string validation
*
* ## Usage Examples:
* ```cpp
* ArenaAllocator arena(1024);
*
* // Basic formatting
* auto msg = format(arena, "Hello %s!", "World");
* // msg == "Hello World!"
*
* // Numeric formatting with precision
* auto value = format(arena, "Pi: %.3f", 3.14159);
* // value == "Pi: 3.142"
*
* // Mixed types with width/alignment
* auto table = format(arena, "%-10s %5d %8.2f", "Item", 42, 99.95);
* // table == "Item 42 99.95"
*
* // Error messages
* auto error = format(arena, "Error %d: %s (line %d)", 404, "Not found", 123);
* // error == "Error 404: Not found (line 123)"
* ```
*
* ## When to Use:
* - **Printf familiarity**: When you prefer printf-style format strings
* - **Runtime flexibility**: Format strings from variables, config, or user
* input
* - **Complex formatting**: Precision, width, alignment, padding
* - **Debugging**: Quick formatted output for logging/debugging
* - **Mixed precision**: Different numeric precision requirements
*
* ## When to Use static_format() Instead:
* - **Hot paths**: Performance-critical code where every nanosecond counts
* - **Simple concatenation**: Basic string + number + string combinations
* - **Compile-time optimization**: When all types/values known at compile time
* - **Template contexts**: Where compile-time buffer sizing is beneficial
*
* ## Optimization Details:
* The function uses `ArenaAllocator::allocate_remaining_space()` to claim all
* available arena space and attempt formatting. If successful, it shrinks the
* allocation to the actual size used. If formatting fails (doesn't fit), it
* falls back to the traditional two-pass approach: measure size, allocate
* exactly, then format.
*
* This strategy optimizes for the common case where available arena space is
* sufficient, while maintaining correctness for all cases.
*/
std::string_view format(ArenaAllocator &arena, const char *fmt, ...)
__attribute__((format(printf, 2, 3)));
namespace detail {
template <int kLen> struct StringTerm {
explicit constexpr StringTerm(const char *s) : s(s) {}
static constexpr int kMaxLength = kLen;
void write(char *&buf) const {
std::memcpy(buf, s, kLen);
buf += kLen;
}
private:
const char *s;
};
template <int kLen>
constexpr StringTerm<kLen - 1> term(const char (&array)[kLen]) {
return StringTerm<kLen - 1>{array};
}
template <class IntType> constexpr int decimal_length(IntType x) {
static_assert(std::is_integral_v<IntType>,
"decimal_length requires integral type");
if constexpr (std::is_signed_v<IntType>) {
// Handle negative values by using unsigned equivalent
using Unsigned = std::make_unsigned_t<IntType>;
// Safe conversion: cast to unsigned first, then negate in unsigned
// arithmetic
auto abs_x = x < 0 ? -static_cast<Unsigned>(x) : static_cast<Unsigned>(x);
int result = 0;
do {
++result;
abs_x /= 10;
} while (abs_x);
return result;
} else {
int result = 0;
do {
++result;
x /= 10;
} while (x);
return result;
}
}
template <std::integral IntType> struct IntTerm {
static constexpr bool kSigned = std::is_signed_v<IntType>;
using Unsigned = std::make_unsigned_t<IntType>;
explicit constexpr IntTerm(IntType v) : v(v) {}
static constexpr int kMaxLength =
decimal_length(Unsigned(-1)) + (kSigned ? 1 : 0);
void write(char *&buf) const {
char itoa_buf[kMaxLength];
auto x = static_cast<Unsigned>(v);
if constexpr (kSigned) {
if (v < 0) {
*buf++ = '-';
x = -static_cast<Unsigned>(v);
}
}
int i = kMaxLength;
do {
itoa_buf[--i] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i < kMaxLength) {
*buf++ = itoa_buf[i++];
}
}
private:
IntType v;
};
template <std::integral IntType> constexpr IntTerm<IntType> term(IntType s) {
return IntTerm<IntType>{s};
}
struct DoubleTerm {
explicit constexpr DoubleTerm(double s) : s(s) {}
static constexpr int kMaxLength = 24;
void write(char *&buf) const;
private:
double s;
};
// Variable template for compile-time max length access
template <typename T>
inline constexpr int max_decimal_length_v = decltype(term(T{}))::kMaxLength;
inline constexpr DoubleTerm term(double s) { return DoubleTerm(s); }
} // namespace detail
/**
* @brief Compile-time optimized formatting for high-performance code paths.
*
* This function provides ultra-fast string formatting by calculating buffer
* sizes at compile time and using specialized term handlers for each type.
* It's designed for performance-critical code where formatting overhead
* matters.
*
* Unlike the runtime `format()` function, `static_format()` processes all
* arguments at compile time to determine exact memory requirements and uses
* optimized term writers for maximum speed.
*
* ## Supported Types:
* - **String literals**: C-style string literals and arrays
* - **Integers**: All integral types (int, int64_t, uint32_t, etc.)
* - **Floating point**: double (uses high-precision Grisu2 algorithm)
* - **Custom types**: Via specialization of `detail::term()`
*
* ## Performance Characteristics:
* - **Compile-time buffer sizing**: Buffer size calculated at compile time (no
* runtime measurement)
* - **Optimized arena allocation**: Uses pre-calculated exact buffer sizes with
* arena allocator
* - **Specialized type handling**: Fast paths for common types via template
* specialization
* - **Memory efficient**: Uses arena.realloc() to return unused space to the
* arena
*
* @tparam Ts Types of the arguments to format (auto-deduced)
* @param arena Arena allocator for memory management
* @param ts Arguments to format - can be string literals, integers, doubles
* @return std::string_view pointing to arena-allocated formatted string
*
* ## Usage Examples:
* ```cpp
* ArenaAllocator arena(1024);
*
* // String concatenation
* auto result1 = static_format(arena, "Hello ", "World", "!");
* // result1 == "Hello World!"
*
* // Mixed types
* auto result2 = static_format(arena, "Count: ", 42, ", Rate: ", 3.14);
* // result2 == "Count: 42, Rate: 3.14"
*
* // Error messages
* auto error = static_format(arena, "Error ", 404, ": ", "Not found");
* // error == "Error 404: Not found"
* ```
*
* ## When to Use:
* - **Hot paths**: Performance-critical code where formatting speed matters
* - **Known types**: When argument types are known at compile time
* - **Simple formatting**: Concatenation and basic type conversion
* - **Template code**: Where compile-time optimization is beneficial
*
* ## When to Use format() Instead:
* - **Printf-style formatting**: When you need format specifiers like "%d",
* "%.2f"
* - **Runtime flexibility**: When format strings come from variables/config
* - **Complex formatting**: When you need padding, precision, etc.
* - **Convenience**: For quick debugging or non-critical paths
*
* @note All arguments are passed by forwarding reference for optimal
* performance
* @note Memory is arena-allocated and automatically sized to exact requirements
* @note Compile-time errors occur if unsupported types are used
* @note This function is constexpr-friendly and optimizes well in release
* builds
*/
template <class... Ts>
std::string_view static_format(ArenaAllocator &arena, Ts &&...ts) {
constexpr int upper_bound = (decltype(detail::term(ts))::kMaxLength + ...);
char *result = arena.allocate<char>(upper_bound);
char *buf = result;
(detail::term(ts).write(buf), ...);
const int size = static_cast<int>(buf - result);
return std::string_view(
arena.realloc(result, upper_bound, upper_bound - size),
static_cast<std::size_t>(size));
}

View File

@@ -8,7 +8,7 @@
#include "connection.hpp"
#include "connection_handler.hpp"
#include "loop_iterations.h"
#include "loop_iterations.hpp"
#include "perfetto_categories.hpp"
#include "server.hpp"
#include "thread_pipeline.hpp"
@@ -111,7 +111,7 @@ struct HttpHandler : ConnectionHandler {
if (nulls == 2) {
return;
}
for (volatile int i = 0; i < loopIterations; i = i + 1)
for (volatile int i = 0; i < loop_iterations; i = i + 1)
;
}
}

View File

@@ -1,3 +0,0 @@
#pragma once
constexpr int loopIterations = 1725;

3
src/loop_iterations.hpp Normal file
View File

@@ -0,0 +1,3 @@
#pragma once
constexpr int loop_iterations = 1725;

874
src/metric.cpp Normal file
View File

@@ -0,0 +1,874 @@
#include "metric.hpp"
#include <algorithm>
#include <atomic>
#include <bit>
#include <cassert>
#include <cctype>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <functional>
#include <limits>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include <immintrin.h>
#include <simdutf.h>
#include "format.hpp"
// TODO fix static initialization order fiasco
// Verify that malloc provides sufficient alignment for atomic 128-bit
// operations
static_assert(__STDCPP_DEFAULT_NEW_ALIGNMENT__ >= 16,
"Default new alignment must be at least 16 bytes for atomic "
"128-bit stores");
// WeaselDB Metrics System Design:
//
// THREADING MODEL:
// - Counters and Histograms: Per-thread storage, single writer per thread
// - Gauges: Global storage with mutex protection (multi-writer)
//
// PRECISION STRATEGY:
// - Use atomic<uint64_t> for lock-free storage
// - Store doubles using std::bit_cast to uint64_t (preserves full IEEE 754
// precision)
// - Single writer assumption allows simple load/store without CAS loops
//
// MEMORY MODEL:
// - Thread-local metrics auto-cleanup on thread destruction
// - Global metrics (gauges) persist for application lifetime
// - Histogram buckets are sorted, deduplicated, and include +Inf bucket
namespace metric {
// Validation helper that works in both debug and release builds
static void validate_or_abort(bool condition, const char *message,
const char *value) {
if (!condition) {
std::fprintf(stderr, "WeaselDB metric validation failed: %s: '%s'\n",
message, value);
std::abort();
}
}
// Labels key for second level of map
struct LabelsKey {
std::vector<std::pair<std::string, std::string>> labels;
LabelsKey(std::vector<std::pair<std::string, std::string>> l)
: labels(std::move(l)) {
// Validate all label keys and values
for (const auto &[key, value] : labels) {
validate_or_abort(is_valid_label_key(key), "invalid label key",
key.c_str());
validate_or_abort(is_valid_label_value(value), "invalid label value",
value.c_str());
}
// Sort labels by key for Prometheus compatibility
std::sort(labels.begin(), labels.end(),
[](const auto &a, const auto &b) { return a.first < b.first; });
}
bool operator==(const LabelsKey &other) const {
return labels == other.labels;
}
};
} // namespace metric
namespace std {
template <> struct hash<metric::LabelsKey> {
std::size_t operator()(const metric::LabelsKey &k) const {
std::size_t hash_value = 0;
for (const auto &[key, value] : k.labels) {
// Combine hashes using a simple but effective method
hash_value ^= std::hash<std::string>{}(key) + 0x9e3779b9 +
(hash_value << 6) + (hash_value >> 2);
hash_value ^= std::hash<std::string>{}(value) + 0x9e3779b9 +
(hash_value << 6) + (hash_value >> 2);
}
return hash_value;
}
};
} // namespace std
namespace metric {
// DESIGN: Store doubles in atomic<uint64_t> for lock-free operations
// - Preserves full IEEE 754 double precision (no truncation)
// - Allows atomic load/store without locks
// - Use std::bit_cast for safe conversion between double and uint64_t
// Family::State structures own the second-level maps (labels -> instances)
template <> struct Family<Counter>::State {
std::string name;
std::string help;
struct PerThreadState {
std::unordered_map<LabelsKey, std::unique_ptr<Counter::State>> instances;
};
std::unordered_map<std::thread::id, PerThreadState> perThreadState;
// Callback-based metrics (global, not per-thread)
std::unordered_map<LabelsKey, MetricCallback<Counter>> callbacks;
};
template <> struct Family<Gauge>::State {
std::string name;
std::string help;
std::unordered_map<LabelsKey, std::unique_ptr<Gauge::State>> instances;
// Callback-based metrics
std::unordered_map<LabelsKey, MetricCallback<Gauge>> callbacks;
};
template <> struct Family<Histogram>::State {
std::string name;
std::string help;
std::vector<double> buckets;
struct PerThreadState {
std::unordered_map<LabelsKey, std::unique_ptr<Histogram::State>> instances;
};
std::unordered_map<std::thread::id, PerThreadState> perThreadState;
// Note: No callbacks map - histograms don't support callback-based metrics
};
// Counter: Thread-local, monotonically increasing, single writer per thread
struct Counter::State {
double value; // Single writer, no atomics needed
friend struct Metric;
};
// Gauge: Global, can increase/decrease, multiple writers (uses atomic CAS).
// TODO slow under contention.
struct Gauge::State {
std::atomic<uint64_t> value; // Stores double as uint64_t bits, lock-free
friend struct Metric;
};
// Histogram: Thread-local buckets, single writer per thread
struct Histogram::State {
std::vector<double>
thresholds; // Bucket boundaries (sorted, deduplicated, includes +Inf)
std::vector<uint64_t> counts; // Count per bucket - single writer, malloc
// provides 16-byte alignment
// TODO this should just be a double like in counter
std::atomic<uint64_t>
sum; // Sum of observations (double stored as uint64_t bits)
std::atomic<uint64_t> observations; // Total observation count (uint64_t)
friend struct Metric;
};
struct Metric {
static std::mutex mutex;
// Two-level map: name -> Family
static std::unordered_map<std::string,
std::unique_ptr<Family<Counter>::State>>
counterFamilies;
static std::unordered_map<std::string, std::unique_ptr<Family<Gauge>::State>>
gaugeFamilies;
static std::unordered_map<std::string,
std::unique_ptr<Family<Histogram>::State>>
histogramFamilies;
// Thread cleanup for per-family thread-local storage
struct ThreadInit {
ThreadInit() {
// Thread registration happens lazily when metrics are created
}
~ThreadInit() {
// TODO we need to accumulate this threads counts into a global. Otherwise
// it can go backwards when we destroy a thread.
// Clean up this thread's storage from all families
std::unique_lock<std::mutex> _{mutex};
auto thread_id = std::this_thread::get_id();
// Clean up counter families
for (auto &[name, family] : counterFamilies) {
family->perThreadState.erase(thread_id);
}
// Clean up histogram families
for (auto &[name, family] : histogramFamilies) {
family->perThreadState.erase(thread_id);
}
// Gauges are global, no per-thread cleanup needed
}
};
static thread_local ThreadInit thread_init;
// Thread cleanup now handled by ThreadInit RAII
static Counter create_counter_instance(
Family<Counter> *family,
const std::vector<std::pair<std::string, std::string>> &labels) {
std::unique_lock<std::mutex> _{mutex};
LabelsKey key{labels};
// Validate that labels aren't already registered as callback
validate_or_abort(
family->p->callbacks.find(key) == family->p->callbacks.end(),
"labels already registered as callback",
key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
auto &ptr =
family->p->perThreadState[std::this_thread::get_id()].instances[key];
if (!ptr) {
ptr = std::make_unique<Counter::State>();
ptr->value = 0.0;
}
Counter result;
result.p = ptr.get();
return result;
}
static Gauge create_gauge_instance(
Family<Gauge> *family,
const std::vector<std::pair<std::string, std::string>> &labels) {
std::unique_lock<std::mutex> _{mutex};
LabelsKey key{labels};
// Validate that labels aren't already registered as callback
validate_or_abort(
family->p->callbacks.find(key) == family->p->callbacks.end(),
"labels already registered as callback",
key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
auto &ptr = family->p->instances[key];
if (!ptr) {
ptr = std::make_unique<Gauge::State>();
ptr->value.store(0, std::memory_order_relaxed);
}
Gauge result;
result.p = ptr.get();
return result;
}
static Histogram create_histogram_instance(
Family<Histogram> *family,
const std::vector<std::pair<std::string, std::string>> &labels) {
std::unique_lock<std::mutex> _{mutex};
LabelsKey key{labels};
auto &ptr =
family->p->perThreadState[std::this_thread::get_id()].instances[key];
if (!ptr) {
ptr = std::make_unique<Histogram::State>();
// DESIGN: Prometheus-compatible histogram buckets
// Use buckets from family configuration
ptr->thresholds = family->p->buckets; // Already sorted and deduplicated
// Single writer semantics - no atomics needed for bucket counts
ptr->counts = std::vector<uint64_t>(ptr->thresholds.size(), 0);
ptr->sum.store(0, std::memory_order_relaxed);
ptr->observations.store(0, std::memory_order_relaxed);
}
Histogram result;
result.p = ptr.get();
return result;
}
};
Counter::Counter() = default;
void Counter::inc(double x) {
// DESIGN: Single writer per thread allows simple increment
// No atomics needed since only one thread writes to this counter
auto new_value = p->value + x;
// Validate monotonic property (counter never decreases)
if (new_value < p->value) [[unlikely]] {
validate_or_abort(false, "counter value overflow/wraparound detected",
std::to_string(new_value).c_str());
}
__atomic_store(&p->value, &new_value, __ATOMIC_RELAXED);
}
Gauge::Gauge() = default;
void Gauge::inc(double x) {
// Lock-free increment using CAS loop
uint64_t expected = p->value.load(std::memory_order_relaxed);
uint64_t desired;
do {
double current_value = std::bit_cast<double>(expected);
double new_value = current_value + x;
desired = std::bit_cast<uint64_t>(new_value);
} while (!p->value.compare_exchange_weak(expected, desired,
std::memory_order_relaxed));
}
void Gauge::dec(double x) {
// Lock-free decrement using CAS loop
uint64_t expected = p->value.load(std::memory_order_relaxed);
uint64_t desired;
do {
double current_value = std::bit_cast<double>(expected);
double new_value = current_value - x;
desired = std::bit_cast<uint64_t>(new_value);
} while (!p->value.compare_exchange_weak(expected, desired,
std::memory_order_relaxed));
}
void Gauge::set(double x) {
// Simple atomic store for set operation
p->value.store(std::bit_cast<uint64_t>(x), std::memory_order_relaxed);
}
Histogram::Histogram() = default;
// Vectorized histogram bucket updates using single-writer + atomic-read design
// Since histograms have single-writer semantics, we can use architecturally
// atomic stores
#ifdef __x86_64__
// x86-64: 128-bit vectorized with inline assembly atomic stores
__attribute__((target("avx"))) static void
update_histogram_buckets(const std::vector<double> &thresholds,
std::vector<uint64_t> &counts, double x,
size_t start_idx) {
const size_t size = thresholds.size();
size_t i = start_idx;
// Process 2 buckets at a time with 128-bit vectors + inline assembly
const __m128d x_vec = _mm_set1_pd(x);
for (; i + 2 <= size; i += 2) {
// Ensure alignment for atomic guarantee (malloc provides 16-byte alignment)
assert((reinterpret_cast<uintptr_t>(static_cast<void *>(&counts[i])) &
15) == 0 &&
"counts array must be 16-byte aligned for atomic 128-bit stores");
// 128-bit vectorized comparison and arithmetic
__m128d thresholds_vec = _mm_loadu_pd(&thresholds[i]);
__m128d cmp_result = _mm_cmp_pd(x_vec, thresholds_vec, _CMP_LE_OQ);
__m128i cmp_as_int = _mm_castpd_si128(cmp_result);
__m128i ones = _mm_set1_epi64x(1);
__m128i increments = _mm_and_si128(cmp_as_int, ones);
// Load current counts and add increments
__m128i current_counts = _mm_load_si128((__m128i *)&counts[i]);
__m128i updated_counts = _mm_add_epi64(current_counts, increments);
// Processors that enumerate support for Intel® AVX (by setting the feature
// flag CPUID.01H:ECX.AVX[bit 28])
// guarantee that the 16-byte memory operations performed by the
// following instructions will always be carried out atomically: â¢
// MOVAPD, MOVAPS, and MOVDQA. ⢠VMOVAPD, VMOVAPS, and VMOVDQA when
// encoded with VEX.128. ⢠VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64
// when encoded with EVEX.128 and k0 (masking disabled). (Note that
// these instructions require the linear addresses of their memory
// operands to be 16-byte aligned.)
__asm__ __volatile__(
"vmovdqa %%xmm0, %0"
: "=m"(*((__m128i *)&counts[i])) // Output: aligned memory location
: "x"(updated_counts) // Input: xmm register
: "memory" // Memory clobber
);
// We don't actually need it to be atomic across 128-bits, but that's
// sufficient to guarantee each 64 bit half is atomic.
}
// Handle remainder with atomic stores
for (; i < size; ++i) {
if (x <= thresholds[i]) {
__atomic_store_n(&counts[i], counts[i] + 1, __ATOMIC_RELAXED);
}
}
}
#else
// Fallback implementation for non-x86 architectures
static void
update_histogram_buckets_vectorized(const std::vector<double> &thresholds,
std::vector<uint64_t> &counts, double x,
size_t start_idx) {
const size_t size = thresholds.size();
// Scalar implementation with atomic stores for TSAN compatibility
for (size_t i = start_idx; i < size; ++i) {
if (x <= thresholds[i]) {
__atomic_store_n(&counts[i], counts[i] + 1, __ATOMIC_RELAXED);
}
}
}
#endif
void Histogram::observe(double x) {
assert(p->thresholds.size() == p->counts.size());
update_histogram_buckets(p->thresholds, p->counts, x, 0);
// DESIGN: Single writer per thread allows simple load-modify-store for sum
// No CAS loop needed since only one thread writes to this histogram
auto current_sum =
std::bit_cast<double>(p->sum.load(std::memory_order_relaxed));
p->sum.store(std::bit_cast<uint64_t>(current_sum + x),
std::memory_order_relaxed);
p->observations.fetch_add(1, std::memory_order_relaxed);
}
template <> Family<Counter>::Family() = default;
template <> Family<Gauge>::Family() = default;
template <> Family<Histogram>::Family() = default;
template <>
Counter Family<Counter>::create(
std::vector<std::pair<std::string, std::string>> labels) {
return Metric::create_counter_instance(this, labels);
}
template <>
Gauge Family<Gauge>::create(
std::vector<std::pair<std::string, std::string>> labels) {
return Metric::create_gauge_instance(this, labels);
}
template <>
Histogram Family<Histogram>::create(
std::vector<std::pair<std::string, std::string>> labels) {
return Metric::create_histogram_instance(this, labels);
}
Family<Counter> create_counter(std::string name, std::string help) {
validate_or_abort(is_valid_metric_name(name), "invalid counter name",
name.c_str());
std::unique_lock<std::mutex> _{Metric::mutex};
auto &familyPtr = Metric::counterFamilies[name];
if (!familyPtr) {
familyPtr = std::make_unique<Family<Counter>::State>();
familyPtr->name = std::move(name);
familyPtr->help = std::move(help);
} else {
validate_or_abort(
familyPtr->help == help,
"metric family already registered with different help text",
name.c_str());
}
Family<Counter> family;
family.p = familyPtr.get();
return family;
}
Family<Gauge> create_gauge(std::string name, std::string help) {
validate_or_abort(is_valid_metric_name(name), "invalid gauge name",
name.c_str());
std::unique_lock<std::mutex> _{Metric::mutex};
auto &familyPtr = Metric::gaugeFamilies[name];
if (!familyPtr) {
familyPtr = std::make_unique<Family<Gauge>::State>();
familyPtr->name = std::move(name);
familyPtr->help = std::move(help);
} else {
validate_or_abort(
familyPtr->help == help,
"metric family already registered with different help text",
name.c_str());
}
Family<Gauge> family;
family.p = familyPtr.get();
return family;
}
Family<Histogram> create_histogram(std::string name, std::string help,
std::span<const double> buckets) {
validate_or_abort(is_valid_metric_name(name), "invalid histogram name",
name.c_str());
std::unique_lock<std::mutex> _{Metric::mutex};
auto &familyPtr = Metric::histogramFamilies[name];
if (!familyPtr) {
familyPtr = std::make_unique<Family<Histogram>::State>();
familyPtr->name = std::move(name);
familyPtr->help = std::move(help);
// DESIGN: Prometheus-compatible histogram buckets
familyPtr->buckets = std::vector<double>(buckets.begin(), buckets.end());
std::sort(familyPtr->buckets.begin(), familyPtr->buckets.end());
familyPtr->buckets.erase(
std::unique(familyPtr->buckets.begin(), familyPtr->buckets.end()),
familyPtr->buckets.end());
// +Inf bucket captures all observations (Prometheus requirement)
if (familyPtr->buckets.empty() ||
familyPtr->buckets.back() != std::numeric_limits<double>::infinity()) {
familyPtr->buckets.push_back(std::numeric_limits<double>::infinity());
}
} else {
validate_or_abort(
familyPtr->help == help,
"metric family already registered with different help text",
name.c_str());
std::vector<double> new_buckets_vec(buckets.begin(), buckets.end());
std::sort(new_buckets_vec.begin(), new_buckets_vec.end());
new_buckets_vec.erase(
std::unique(new_buckets_vec.begin(), new_buckets_vec.end()),
new_buckets_vec.end());
if (new_buckets_vec.empty() ||
new_buckets_vec.back() != std::numeric_limits<double>::infinity()) {
new_buckets_vec.push_back(std::numeric_limits<double>::infinity());
}
validate_or_abort(familyPtr->buckets == new_buckets_vec,
"metric family already registered with different buckets",
name.c_str());
}
Family<Histogram> family;
family.p = familyPtr.get();
return family;
}
std::vector<double> linear_buckets(double start, double width, int count) {
validate_or_abort(width > 0, "linear bucket width must be positive",
std::to_string(width).c_str());
validate_or_abort(count >= 0, "linear bucket count must be non-negative",
std::to_string(count).c_str());
std::vector<double> buckets;
buckets.reserve(count);
for (int i = 0; i < count; ++i) {
buckets.push_back(start + i * width);
}
return buckets;
}
std::vector<double> exponential_buckets(double start, double factor,
int count) {
validate_or_abort(start > 0, "exponential bucket start must be positive",
std::to_string(start).c_str());
validate_or_abort(factor > 1, "exponential bucket factor must be > 1",
std::to_string(factor).c_str());
validate_or_abort(count >= 0, "exponential bucket count must be non-negative",
std::to_string(count).c_str());
std::vector<double> buckets;
buckets.reserve(count);
double current = start;
for (int i = 0; i < count; ++i) {
buckets.push_back(current);
current *= factor;
}
return buckets;
}
// Prometheus validation functions
// Metric names must match [a-zA-Z_:][a-zA-Z0-9_:]*
bool is_valid_metric_name(const std::string &name) {
if (name.empty())
return false;
// First character must be letter, underscore, or colon
char first = name[0];
if (!std::isalpha(first) && first != '_' && first != ':') {
return false;
}
// Remaining characters must be alphanumeric, underscore, or colon
for (size_t i = 1; i < name.size(); ++i) {
char c = name[i];
if (!std::isalnum(c) && c != '_' && c != ':') {
return false;
}
}
return true;
}
// Label keys must match [a-zA-Z_][a-zA-Z0-9_]*
bool is_valid_label_key(const std::string &key) {
if (key.empty())
return false;
// First character must be letter or underscore
char first = key[0];
if (!std::isalpha(first) && first != '_') {
return false;
}
// Remaining characters must be alphanumeric or underscore
for (size_t i = 1; i < key.size(); ++i) {
char c = key[i];
if (!std::isalnum(c) && c != '_') {
return false;
}
}
// Label keys starting with __ are reserved for internal use
if (key.size() >= 2 && key[0] == '_' && key[1] == '_') {
return false;
}
return true;
}
// Label values can contain any UTF-8 characters (no specific restrictions)
bool is_valid_label_value(const std::string &value) {
// Prometheus allows any UTF-8 string as label value
// Validate UTF-8 encoding for correctness using simdutf
return simdutf::validate_utf8(value.c_str(), value.size());
}
std::span<std::string_view> render(ArenaAllocator &arena) {
std::unique_lock<std::mutex> _{Metric::mutex};
std::vector<std::string_view> output;
auto format_labels =
[&](const std::vector<std::pair<std::string_view, std::string_view>>
&labels) -> std::string_view {
if (labels.empty()) {
return "";
}
size_t required_size = 2; // {}
for (const auto &[key, value] : labels) {
required_size += key.length() + 3 + value.length(); // key="value"
for (char c : value) {
if (c == '\\' || c == '"' || c == '\n') {
required_size++;
}
}
}
if (!labels.empty()) {
required_size += labels.size() - 1; // commas
}
char *buf = arena.allocate<char>(required_size);
char *p = buf;
*p++ = '{';
for (size_t i = 0; i < labels.size(); ++i) {
if (i > 0)
*p++ = ',';
std::memcpy(p, labels[i].first.data(), labels[i].first.length());
p += labels[i].first.length();
*p++ = '=';
*p++ = '"';
for (char c : labels[i].second) {
switch (c) {
case '\\':
*p++ = '\\';
*p++ = '\\';
break;
case '"':
*p++ = '\\';
*p++ = '"';
break;
case '\n':
*p++ = '\\';
*p++ = 'n';
break;
default:
*p++ = c;
break;
}
}
*p++ = '"';
}
*p++ = '}';
return std::string_view(buf, p - buf);
};
// Render counters
for (const auto &[name, family] : Metric::counterFamilies) {
output.push_back(
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
output.push_back(format(arena, "# TYPE %s counter\n", name.c_str()));
std::vector<std::pair<std::string_view, std::string_view>> labels_sv;
for (const auto &[labels_key, callback] : family->callbacks) {
auto value = callback();
labels_sv.clear();
for (const auto &l : labels_key.labels)
labels_sv.push_back(l);
auto labels = format_labels(labels_sv);
output.push_back(format(arena, "%.*s%.*s %.17g\n",
static_cast<int>(name.length()), name.data(),
static_cast<int>(labels.length()), labels.data(),
value));
}
for (const auto &[thread_id, per_thread] : family->perThreadState) {
for (const auto &[labels_key, instance] : per_thread.instances) {
// Atomic read from render thread - single writer doesn't need atomic
// writes
double value;
__atomic_load(&instance->value, &value, __ATOMIC_RELAXED);
labels_sv.clear();
for (const auto &l : labels_key.labels)
labels_sv.push_back(l);
auto labels = format_labels(labels_sv);
output.push_back(format(arena, "%.*s%.*s %.17g\n",
static_cast<int>(name.length()), name.data(),
static_cast<int>(labels.length()),
labels.data(), value));
}
}
}
// Render gauges
for (const auto &[name, family] : Metric::gaugeFamilies) {
output.push_back(
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
output.push_back(format(arena, "# TYPE %s gauge\n", name.c_str()));
std::vector<std::pair<std::string_view, std::string_view>> labels_sv;
for (const auto &[labels_key, callback] : family->callbacks) {
auto value = callback();
labels_sv.clear();
for (const auto &l : labels_key.labels)
labels_sv.push_back(l);
auto labels = format_labels(labels_sv);
output.push_back(format(arena, "%.*s%.*s %.17g\n",
static_cast<int>(name.length()), name.data(),
static_cast<int>(labels.length()), labels.data(),
value));
}
for (const auto &[labels_key, instance] : family->instances) {
auto value = std::bit_cast<double>(
instance->value.load(std::memory_order_relaxed));
labels_sv.clear();
for (const auto &l : labels_key.labels)
labels_sv.push_back(l);
auto labels = format_labels(labels_sv);
output.push_back(format(arena, "%.*s%.*s %.17g\n",
static_cast<int>(name.length()), name.data(),
static_cast<int>(labels.length()), labels.data(),
value));
}
}
// Render histograms
for (const auto &[name, family] : Metric::histogramFamilies) {
output.push_back(
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
output.push_back(format(arena, "# TYPE %s histogram\n", name.c_str()));
std::vector<std::pair<std::string_view, std::string_view>> bucket_labels_sv;
for (const auto &[thread_id, per_thread] : family->perThreadState) {
for (const auto &[labels_key, instance] : per_thread.instances) {
for (size_t i = 0; i < instance->thresholds.size(); ++i) {
bucket_labels_sv.clear();
for (const auto &l : labels_key.labels)
bucket_labels_sv.push_back(l);
if (std::isinf(instance->thresholds[i])) {
bucket_labels_sv.push_back({"le", "+Inf"});
} else {
bucket_labels_sv.push_back(
{"le", format(arena, "%.17g", instance->thresholds[i])});
}
// Atomic read from render thread - single writer doesn't need atomic
// writes
auto count = __atomic_load_n(&instance->counts[i], __ATOMIC_RELAXED);
auto labels = format_labels(bucket_labels_sv);
output.push_back(format(arena, "%s_bucket%.*s %llu\n", name.c_str(),
static_cast<int>(labels.length()),
labels.data(),
static_cast<unsigned long long>(count)));
}
auto sum_value = std::bit_cast<double>(
instance->sum.load(std::memory_order_relaxed));
bucket_labels_sv.clear();
for (const auto &l : labels_key.labels)
bucket_labels_sv.push_back(l);
auto labels = format_labels(bucket_labels_sv);
output.push_back(format(arena, "%s_sum%.*s %.17g\n", name.c_str(),
static_cast<int>(labels.length()),
labels.data(), sum_value));
auto count_value =
instance->observations.load(std::memory_order_relaxed);
output.push_back(format(arena, "%s_count%.*s %llu\n", name.c_str(),
static_cast<int>(labels.length()),
labels.data(),
static_cast<unsigned long long>(count_value)));
}
}
}
auto result = arena.allocate<std::string_view>(output.size());
std::copy(output.begin(), output.end(), result);
return std::span<std::string_view>(result, output.size());
}
// Template specialization implementations for register_callback
template <>
void Family<Counter>::register_callback(
std::vector<std::pair<std::string, std::string>> labels,
MetricCallback<Counter> callback) {
std::unique_lock<std::mutex> _{Metric::mutex};
LabelsKey key{std::move(labels)};
// Validate that labels aren't already in use by create() calls
for (const auto &[thread_id, per_thread] : p->perThreadState) {
validate_or_abort(
per_thread.instances.find(key) == per_thread.instances.end(),
"labels already registered as static instance",
key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
}
// Validate that callback isn't already registered for these labels
validate_or_abort(p->callbacks.find(key) == p->callbacks.end(),
"callback already registered for labels",
key.labels.empty() ? "(no labels)"
: key.labels[0].first.c_str());
p->callbacks[std::move(key)] = std::move(callback);
}
template <>
void Family<Gauge>::register_callback(
std::vector<std::pair<std::string, std::string>> labels,
MetricCallback<Gauge> callback) {
std::unique_lock<std::mutex> _{Metric::mutex};
LabelsKey key{std::move(labels)};
// Validate that labels aren't already in use by create() calls
validate_or_abort(p->instances.find(key) == p->instances.end(),
"labels already registered as static instance",
key.labels.empty() ? "(no labels)"
: key.labels[0].first.c_str());
// Validate that callback isn't already registered for these labels
validate_or_abort(p->callbacks.find(key) == p->callbacks.end(),
"callback already registered for labels",
key.labels.empty() ? "(no labels)"
: key.labels[0].first.c_str());
p->callbacks[std::move(key)] = std::move(callback);
}
// Explicit template instantiations to provide member implementations
// Static member definitions
std::mutex Metric::mutex;
std::unordered_map<std::string, std::unique_ptr<Family<Counter>::State>>
Metric::counterFamilies;
std::unordered_map<std::string, std::unique_ptr<Family<Gauge>::State>>
Metric::gaugeFamilies;
std::unordered_map<std::string, std::unique_ptr<Family<Histogram>::State>>
Metric::histogramFamilies;
thread_local Metric::ThreadInit Metric::thread_init;
} // namespace metric

200
src/metric.hpp Normal file
View File

@@ -0,0 +1,200 @@
#pragma once
// WeaselDB Metrics System
//
// High-performance metrics collection with Prometheus-compatible output.
//
// DESIGN PRINCIPLES:
// - Single-writer semantics: Each metric instance bound to creating thread
// - Lock-free operations using atomic<uint64_t> storage for doubles
// - Full IEEE 754 double precision preservation via bit reinterpretation
// - Single global registry: All metrics registered in one global namespace
//
// CRITICAL THREAD SAFETY CONSTRAINT:
// Each metric instance has exactly ONE writer thread (the creating thread).
// It is undefined behavior to call inc()/dec()/set()/observe() from a different
// thread.
//
// REGISTRY MODEL:
// This implementation uses a single global registry for all metrics, unlike
// typical Prometheus client libraries that support multiple registries.
// This design choice prioritizes simplicity and performance over flexibility.
//
// METRIC LIFECYCLE:
// Metrics are created once and persist for the application lifetime. There is
// no unregistration mechanism - this prevents accidental metric loss and
// simplifies the implementation.
//
// USAGE:
// auto counter_family = metric::create_counter("requests_total", "Total
// requests"); auto counter = counter_family.create({{"method", "GET"}}); //
// Bound to this thread counter.inc(1.0); // ONLY call from creating thread
//
// auto histogram_family = metric::create_histogram("latency", "Request
// latency", {0.1, 0.5, 1.0}); auto histogram =
// histogram_family.create({{"endpoint", "/api"}}); // Bound to this thread
// histogram.observe(0.25); // ONLY call from creating thread
#include <functional>
#include <initializer_list>
#include <span>
#include <string>
#include <type_traits>
#include <vector>
#include "arena_allocator.hpp"
namespace metric {
// Forward declarations
template <typename T> struct Family;
// Callback function type for dynamic metric values
// Called during render() to get current metric value
// THREAD SAFETY: May be called from arbitrary thread, but serialized by
// render() mutex - no need to be thread-safe internally
template <typename T> using MetricCallback = std::function<double()>;
// Counter: A metric value that only increases.
//
// THREAD SAFETY RULES:
// 1. Do not call inc() on the same Counter object from multiple threads.
// Each object must have only one writer thread.
// 2. To use Counters concurrently, each thread must create its own Counter
// object.
// 3. When rendered, the values of all Counter objects with the same labels
// are summed together into a single total.
struct Counter {
void inc(double = 1.0); // Increment counter (must be >= 0)
private:
Counter();
friend struct Metric;
template <class> friend struct Family;
struct State;
State *p;
};
// Gauge: A metric value that can be set, increased, or decreased.
//
// THREAD SAFETY RULES:
// 1. Do not call inc(), dec(), or set() on the same Gauge object from
// multiple threads. Each object must have only one writer thread.
// 2. To use Gauges concurrently, each thread must create its own Gauge object.
// 3. If multiple Gauge objects are created with the same labels, their
// operations are combined. For example, increments from different objects
// are cumulative.
// 4. For independent gauges, create them with unique labels.
struct Gauge {
void inc(double = 1.0);
void dec(double = 1.0);
void set(double);
private:
Gauge();
friend struct Metric;
template <class> friend struct Family;
struct State;
State *p;
};
// Histogram: A metric that samples observations into buckets.
//
// THREAD SAFETY RULES:
// 1. Do not call observe() on the same Histogram object from multiple
// threads. Each object must have only one writer thread.
// 2. To use Histograms concurrently, each thread must create its own
// Histogram object.
// 3. When rendered, the observations from all Histogram objects with the
// same labels are combined into a single histogram.
struct Histogram {
void observe(double); // Record observation in appropriate bucket
private:
Histogram();
friend struct Metric;
template <class> friend struct Family;
struct State;
State *p;
};
// Family: Factory for creating metric instances with different label
// combinations Each family represents one metric name with varying labels
template <class T> struct Family {
static_assert(std::is_same_v<T, Counter> || std::is_same_v<T, Gauge> ||
std::is_same_v<T, Histogram>);
// Create metric instance with specific labels
// Labels are sorted by key for Prometheus compatibility
// ERROR: Will abort if labels already registered via register_callback()
// OK: Multiple calls with same labels return same instance (idempotent)
T create(std::vector<std::pair<std::string, std::string>> labels);
// Register callback-based metric (Counter and Gauge only)
// Validates that label set isn't already taken
void
register_callback(std::vector<std::pair<std::string, std::string>> labels,
MetricCallback<T> callback);
private:
Family();
friend struct Metric;
friend Family<Counter> create_counter(std::string, std::string);
friend Family<Gauge> create_gauge(std::string, std::string);
friend Family<Histogram> create_histogram(std::string, std::string,
std::span<const double>);
struct State;
State *p;
};
// Factory functions for creating metric families
// IMPORTANT: name and help must point to static memory (string literals)
// Create counter family (monotonically increasing values)
// ERROR: Aborts if family with same name is registered with different help
// text.
Family<Counter> create_counter(std::string name, std::string help);
// Create gauge family (can increase/decrease)
// ERROR: Aborts if family with same name is registered with different help
// text.
Family<Gauge> create_gauge(std::string name, std::string help);
// Create histogram family with custom buckets
// Buckets will be sorted, deduplicated, and +Inf will be added automatically
// ERROR: Aborts if family with same name is registered with different help text
// or buckets.
Family<Histogram> create_histogram(std::string name, std::string help,
std::span<const double> buckets);
// Helper functions for generating standard histogram buckets
// Following Prometheus client library conventions
// Generate linear buckets: start, start+width, start+2*width, ...,
// start+(count-1)*width Example: linear_buckets(0, 10, 5) = {0, 10, 20, 30, 40}
std::vector<double> linear_buckets(double start, double width, int count);
// Generate exponential buckets: start, start*factor, start*factor^2, ...,
// start*factor^(count-1) Example: exponential_buckets(1, 2, 5) = {1, 2, 4, 8,
// 16}
std::vector<double> exponential_buckets(double start, double factor, int count);
// Render all metrics in Prometheus text format
// Returns chunks of Prometheus exposition format (includes # HELP and # TYPE
// lines) Each string_view may contain multiple lines separated by '\n' String
// views are NOT null-terminated - use .size() for length All string data
// allocated in provided arena for zero-copy efficiency
// TODO: Implement Prometheus text exposition format
// THREAD SAFETY: Serialized by global mutex - callbacks need not be thread-safe
std::span<std::string_view> render(ArenaAllocator &arena);
// Validation functions for Prometheus compatibility
bool is_valid_metric_name(const std::string &name);
bool is_valid_label_key(const std::string &key);
bool is_valid_label_value(const std::string &value);
// Note: Histograms do not support callbacks due to their multi-value nature
// (buckets + sum + count). Use static histogram metrics only.
} // namespace metric

View File

@@ -99,7 +99,7 @@ auto addr = reinterpret_cast<uintptr_t>(ptr); // Pointer to integer conv
- **Complexity must be justified with benchmarks** - measure performance impact before adding complexity
- **Strive for 0% CPU usage when idle** - avoid polling, busy waiting, or unnecessary background activity
- Use **inline functions** for performance-critical code (e.g., `allocate_raw`)
- **Zero-copy operations** with `std::string_view` over string copying
- **String views** with `std::string_view` to minimize unnecessary copying
- **Arena allocation** for efficient memory management (~1ns vs ~20-270ns for malloc)
### Complexity Control
@@ -249,7 +249,7 @@ private:
- **Parameter passing:**
- Pass by value for types ≤ 16 bytes (int, pointers, string_view, small structs)
- Pass by const reference for types > 16 bytes (containers, large objects)
- **Return by value** for small types (≤ 16 bytes), **string_view** for zero-copy over strings
- **Return by value** for small types (≤ 16 bytes), **string_view** to avoid copying strings
- **noexcept specification** for move operations and non-throwing functions
```cpp
std::span<const Operation> operations() const { return operations_; }
@@ -301,13 +301,34 @@ for (auto &precondition : preconditions_) {
}
```
### Atomic Operations
- **Never use assignment operators** with `std::atomic` - always use explicit `store()` and `load()`
- **Always specify memory ordering** explicitly for atomic operations
- **Use the least restrictive correct memory ordering** - choose the weakest ordering that maintains correctness
```cpp
// Preferred - explicit store/load with precise memory ordering
std::atomic<uint64_t> counter;
counter.store(42, std::memory_order_relaxed); // Single-writer metric updates
auto value = counter.load(std::memory_order_relaxed); // Reading metrics for display
counter.store(1, std::memory_order_release); // Publishing initialization
auto ready = counter.load(std::memory_order_acquire); // Synchronizing with publisher
counter.store(42, std::memory_order_seq_cst); // When sequential consistency needed
// Avoid - assignment operators (implicit memory ordering)
std::atomic<uint64_t> counter;
counter = 42; // Implicit - memory ordering not explicit
auto value = counter; // Implicit - memory ordering not explicit
```
---
## Memory Management
### Ownership & Allocation
- **Arena allocators** for request-scoped memory with **STL allocator adapters** (see Performance Focus section for characteristics)
- **String views** pointing to arena-allocated memory for zero-copy operations
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
- **STL containers with arena allocators require default construction after arena reset** - `clear()` is not sufficient
```cpp
// STL containers with arena allocators - correct reset pattern
@@ -547,6 +568,24 @@ TEST_CASE("Server accepts connections") {
- `std::latch`, `std::barrier`, futures/promises
- **Force concurrent execution** using `std::latch` to synchronize thread startup
#### Threading Checklist for Tests/Benchmarks
**MANDATORY: Before writing any `std::thread` or `threads.emplace_back()`:**
1. **Count total threads** - Include main/benchmark thread in count
2. **Always assume concurrent execution needed** - Tests/benchmarks require real concurrency
3. **Add `std::latch start_latch{N}`** where N = total concurrent threads
4. **Each thread calls `start_latch.arrive_and_wait()`** before doing work
5. **Main/benchmark thread calls `start_latch.arrive_and_wait()`** before measurement
**Red flags to catch immediately:**
- ❌ Creating threads in a loop without `std::latch`
- ❌ Background threads starting work immediately
- ❌ Benchmark measuring before all threads synchronized
- ❌ Any use of `sleep_for`, `wait_for`, or timeouts
**Simple rule:** Multiple threads = `std::latch` synchronization. No exceptions, even for "simple" background threads.
```cpp
// BAD: Race likely over before threads start
std::atomic<int> counter{0};

View File

@@ -1,7 +1,9 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "arena_allocator.hpp"
#include "format.hpp"
#include <cstring>
#include <doctest/doctest.h>
#include <string>
#include <vector>
TEST_CASE("ArenaAllocator basic construction") {
@@ -532,3 +534,67 @@ TEST_CASE("ArenaAllocator realloc functionality") {
}
}
}
TEST_CASE("format function fallback codepath") {
SUBCASE("single-pass optimization success") {
ArenaAllocator arena(128);
auto result = format(arena, "Hello %s! Number: %d", "World", 42);
CHECK(result == "Hello World! Number: 42");
CHECK(result.length() == 23);
}
SUBCASE("fallback when speculative formatting fails") {
// Create arena with limited space to force fallback
ArenaAllocator arena(16);
// Consume most space to leave insufficient room for speculative formatting
arena.allocate<char>(10);
CHECK(arena.available_in_current_block() == 6);
// Format string larger than available space - should trigger fallback
std::string long_string = "This is a very long string that won't fit";
auto result = format(arena, "Prefix: %s with %d", long_string.c_str(), 123);
std::string expected =
"Prefix: This is a very long string that won't fit with 123";
CHECK(result == expected);
CHECK(result.length() == expected.length());
}
SUBCASE("edge case - exactly available space") {
ArenaAllocator arena(32);
arena.allocate<char>(20); // Leave 12 bytes
CHECK(arena.available_in_current_block() == 12);
// Format that needs exactly available space (should still use fallback due
// to null terminator)
auto result = format(arena, "Test%d", 123); // "Test123" = 7 chars
CHECK(result == "Test123");
CHECK(result.length() == 7);
}
SUBCASE("allocate_remaining_space postcondition") {
// Test empty arena
ArenaAllocator empty_arena(64);
auto space1 = empty_arena.allocate_remaining_space();
CHECK(space1.allocated_bytes >= 1);
CHECK(space1.allocated_bytes == 64);
// Test full arena (should create new block)
ArenaAllocator full_arena(32);
full_arena.allocate<char>(32); // Fill completely
auto space2 = full_arena.allocate_remaining_space();
CHECK(space2.allocated_bytes >= 1);
CHECK(space2.allocated_bytes == 32); // New block created
}
SUBCASE("format error handling") {
ArenaAllocator arena(64);
// Test with invalid format (should return empty string_view)
// Note: This is hard to trigger reliably across platforms,
// so we focus on successful cases in the other subcases
auto result = format(arena, "Valid format: %d", 42);
CHECK(result == "Valid format: 42");
}
}

552
tests/test_metric.cpp Normal file
View File

@@ -0,0 +1,552 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include <doctest/doctest.h>
#include "arena_allocator.hpp"
#include "metric.hpp"
#include <atomic>
#include <chrono>
#include <cmath>
#include <latch>
#include <thread>
#include <vector>
TEST_CASE("metric validation functions") {
SUBCASE("valid metric names") {
CHECK(metric::is_valid_metric_name("valid_name"));
CHECK(metric::is_valid_metric_name("ValidName"));
CHECK(metric::is_valid_metric_name("valid:name"));
CHECK(metric::is_valid_metric_name("_valid"));
CHECK(metric::is_valid_metric_name("valid_123"));
CHECK(metric::is_valid_metric_name("prometheus_metric_name"));
}
SUBCASE("invalid metric names") {
CHECK_FALSE(metric::is_valid_metric_name(""));
CHECK_FALSE(metric::is_valid_metric_name("123invalid"));
CHECK_FALSE(metric::is_valid_metric_name("invalid-name"));
CHECK_FALSE(metric::is_valid_metric_name("invalid.name"));
CHECK_FALSE(metric::is_valid_metric_name("invalid name"));
}
SUBCASE("valid label keys") {
CHECK(metric::is_valid_label_key("valid_key"));
CHECK(metric::is_valid_label_key("ValidKey"));
CHECK(metric::is_valid_label_key("valid123"));
CHECK(metric::is_valid_label_key("_valid"));
}
SUBCASE("invalid label keys") {
CHECK_FALSE(metric::is_valid_label_key(""));
CHECK_FALSE(metric::is_valid_label_key("123invalid"));
CHECK_FALSE(metric::is_valid_label_key("invalid:key"));
CHECK_FALSE(metric::is_valid_label_key("invalid-key"));
CHECK_FALSE(metric::is_valid_label_key("__reserved"));
CHECK_FALSE(metric::is_valid_label_key("__internal"));
}
SUBCASE("valid label values") {
CHECK(metric::is_valid_label_value("any_value"));
CHECK(metric::is_valid_label_value("123"));
CHECK(metric::is_valid_label_value("special-chars.allowed"));
CHECK(metric::is_valid_label_value(""));
CHECK(metric::is_valid_label_value("unicode测试"));
}
}
TEST_CASE("counter basic functionality") {
auto counter_family =
metric::create_counter("test_counter", "Test counter help");
SUBCASE("create counter with no labels") {
auto counter = counter_family.create({});
counter.inc(1.0);
counter.inc(2.5);
counter.inc(); // Default increment of 1.0
}
SUBCASE("create counter with labels") {
auto counter =
counter_family.create({{"method", "GET"}, {"status", "200"}});
counter.inc(5.0);
// Same labels should return same instance (idempotent)
auto counter2 =
counter_family.create({{"method", "GET"}, {"status", "200"}});
counter2.inc(3.0);
}
SUBCASE("label sorting") {
// Labels should be sorted by key
auto counter1 =
counter_family.create({{"z_key", "value"}, {"a_key", "value"}});
auto counter2 =
counter_family.create({{"a_key", "value"}, {"z_key", "value"}});
// These should be the same instance due to label sorting
counter1.inc(1.0);
counter2.inc(2.0); // Should add to same counter
}
}
TEST_CASE("gauge basic functionality") {
auto gauge_family = metric::create_gauge("test_gauge", "Test gauge help");
SUBCASE("gauge operations") {
auto gauge = gauge_family.create({{"instance", "test"}});
gauge.set(10.0);
gauge.inc(5.0);
gauge.dec(3.0);
gauge.inc(); // Default increment
gauge.dec(); // Default decrement
}
SUBCASE("gauge with multiple instances") {
auto gauge1 = gauge_family.create({{"instance", "test1"}});
auto gauge2 = gauge_family.create({{"instance", "test2"}});
gauge1.set(100.0);
gauge2.set(200.0);
gauge1.inc(50.0);
gauge2.dec(25.0);
}
}
TEST_CASE("histogram basic functionality") {
auto hist_family =
metric::create_histogram("test_latency", "Test latency histogram",
metric::exponential_buckets(0.1, 2.0, 5));
SUBCASE("histogram observations") {
auto hist = hist_family.create({{"endpoint", "/api"}});
hist.observe(0.05); // Below first bucket
hist.observe(0.3); // Between buckets
hist.observe(1.5); // Between buckets
hist.observe(10.0); // Above all explicit buckets (goes in +Inf)
}
SUBCASE("histogram bucket validation") {
// Buckets should be sorted and deduplicated, with +Inf added
auto hist_family2 = metric::create_histogram(
"test_hist2", "Test",
std::initializer_list<double>{5.0, 1.0, 2.5, 1.0,
0.5}); // Unsorted with duplicate
auto hist = hist_family2.create({});
hist.observe(0.1);
hist.observe(1.5);
hist.observe(100.0); // Should go in +Inf bucket
}
}
TEST_CASE("histogram bucket generators") {
SUBCASE("linear_buckets basic functionality") {
// Linear buckets: start=0, width=10, count=5 -> {0, 10, 20, 30, 40}
auto buckets = metric::linear_buckets(0.0, 10.0, 5);
CHECK(buckets.size() == 5); // exactly count buckets
CHECK(buckets[0] == 0.0);
CHECK(buckets[1] == 10.0);
CHECK(buckets[2] == 20.0);
CHECK(buckets[3] == 30.0);
CHECK(buckets[4] == 40.0);
}
SUBCASE("linear_buckets with non-zero start") {
// Linear buckets: start=5, width=2.5, count=3 -> {5, 7.5, 10}
auto buckets = metric::linear_buckets(5.0, 2.5, 3);
CHECK(buckets.size() == 3);
CHECK(buckets[0] == 5.0);
CHECK(buckets[1] == 7.5);
CHECK(buckets[2] == 10.0);
}
SUBCASE("linear_buckets edge cases") {
// Zero count should give empty vector
auto zero_buckets = metric::linear_buckets(100.0, 10.0, 0);
CHECK(zero_buckets.size() == 0);
// Negative start should work
auto negative_buckets = metric::linear_buckets(-10.0, 5.0, 2);
CHECK(negative_buckets.size() == 2);
CHECK(negative_buckets[0] == -10.0);
CHECK(negative_buckets[1] == -5.0);
}
SUBCASE("exponential_buckets basic functionality") {
// Exponential buckets: start=1, factor=2, count=5 -> {1, 2, 4, 8, 16}
auto buckets = metric::exponential_buckets(1.0, 2.0, 5);
CHECK(buckets.size() == 5); // exactly count buckets
CHECK(buckets[0] == 1.0);
CHECK(buckets[1] == 2.0);
CHECK(buckets[2] == 4.0);
CHECK(buckets[3] == 8.0);
CHECK(buckets[4] == 16.0);
}
SUBCASE("exponential_buckets different factor") {
// Exponential buckets: start=0.1, factor=10, count=3 -> {0.1, 1, 10}
auto buckets = metric::exponential_buckets(0.1, 10.0, 3);
CHECK(buckets.size() == 3);
CHECK(buckets[0] == doctest::Approx(0.1));
CHECK(buckets[1] == doctest::Approx(1.0));
CHECK(buckets[2] == doctest::Approx(10.0));
}
SUBCASE("exponential_buckets typical latency pattern") {
// Typical web service latency buckets: 5ms, 10ms, 20ms, 40ms, 80ms, etc.
auto buckets = metric::exponential_buckets(0.005, 2.0, 8);
CHECK(buckets.size() == 8);
CHECK(buckets[0] == doctest::Approx(0.005)); // 5ms
CHECK(buckets[1] == doctest::Approx(0.010)); // 10ms
CHECK(buckets[2] == doctest::Approx(0.020)); // 20ms
CHECK(buckets[3] == doctest::Approx(0.040)); // 40ms
CHECK(buckets[4] == doctest::Approx(0.080)); // 80ms
CHECK(buckets[5] == doctest::Approx(0.160)); // 160ms
CHECK(buckets[6] == doctest::Approx(0.320)); // 320ms
CHECK(buckets[7] == doctest::Approx(0.640)); // 640ms
}
SUBCASE("exponential_buckets edge cases") {
// Zero count should give empty vector
auto zero_buckets = metric::exponential_buckets(5.0, 3.0, 0);
CHECK(zero_buckets.size() == 0);
}
SUBCASE("bucket generators with histogram creation") {
// Test that generated buckets work correctly with histogram creation
auto linear_hist = metric::create_histogram(
"linear_test", "Linear test", metric::linear_buckets(0, 100, 5));
auto linear_instance = linear_hist.create({{"type", "linear"}});
// Test observations fall into expected buckets
linear_instance.observe(50); // Should fall into 100 bucket
linear_instance.observe(150); // Should fall into 200 bucket
linear_instance.observe(1000); // Should fall into +Inf bucket
auto exp_hist =
metric::create_histogram("exp_test", "Exponential test",
metric::exponential_buckets(0.001, 10.0, 4));
auto exp_instance = exp_hist.create({{"type", "exponential"}});
// Test typical latency measurements
exp_instance.observe(0.0005); // Should fall into 0.001 bucket (1ms)
exp_instance.observe(0.005); // Should fall into 0.01 bucket (10ms)
exp_instance.observe(0.05); // Should fall into 0.1 bucket (100ms)
exp_instance.observe(5.0); // Should fall into +Inf bucket
}
SUBCASE("prometheus compatibility verification") {
// Verify our bucket generation matches Prometheus Go client behavior
// Linear buckets equivalent to Prometheus LinearBuckets(0, 10, 5)
auto our_linear = metric::linear_buckets(0, 10, 5);
std::vector<double> expected_linear = {0, 10, 20, 30, 40};
CHECK(our_linear == expected_linear);
// Exponential buckets equivalent to Prometheus ExponentialBuckets(1, 2, 5)
auto our_exp = metric::exponential_buckets(1, 2, 5);
std::vector<double> expected_exp = {1, 2, 4, 8, 16};
CHECK(our_exp == expected_exp);
// Default Prometheus histogram buckets (exponential)
auto default_buckets = metric::exponential_buckets(0.005, 2.5, 9);
// Should be: .005, .0125, .03125, .078125, .1953125,
// .48828125, 1.220703125, 3.0517578125, 7.62939453125
CHECK(default_buckets.size() == 9);
CHECK(default_buckets[0] == doctest::Approx(0.005));
CHECK(default_buckets[1] == doctest::Approx(0.0125));
CHECK(default_buckets[8] == doctest::Approx(7.62939453125));
}
}
TEST_CASE("callback-based metrics") {
auto counter_family =
metric::create_counter("callback_counter", "Callback counter");
auto gauge_family = metric::create_gauge("callback_gauge", "Callback gauge");
SUBCASE("counter callback") {
std::atomic<double> counter_value{42.0};
counter_family.register_callback(
{{"type", "callback"}},
[&counter_value]() { return counter_value.load(); });
// Callback should be called during render
ArenaAllocator arena;
auto output = metric::render(arena);
CHECK(output.size() > 0);
}
SUBCASE("gauge callback") {
std::atomic<double> gauge_value{123.5};
gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() {
return gauge_value.load();
});
ArenaAllocator arena;
auto output = metric::render(arena);
CHECK(output.size() > 0);
}
SUBCASE("callback conflict detection") {
// First create a static instance
auto counter = counter_family.create({{"conflict", "test"}});
counter.inc(1.0);
// Then try to register a callback with same labels - should abort
// This is a validation test that would abort in debug builds
}
}
TEST_CASE("prometheus text format rendering") {
ArenaAllocator arena;
// Create some metrics
auto counter_family =
metric::create_counter("http_requests_total", "Total HTTP requests");
auto counter = counter_family.create({{"method", "GET"}, {"status", "200"}});
counter.inc(1000);
auto gauge_family =
metric::create_gauge("memory_usage_bytes", "Memory usage");
auto gauge = gauge_family.create({{"type", "heap"}});
gauge.set(1048576);
auto hist_family = metric::create_histogram(
"request_duration_seconds", "Request duration",
metric::exponential_buckets(0.1, 2.0, 3)); // 0.1, 0.2, 0.4, 0.8
auto hist = hist_family.create({{"handler", "api"}});
hist.observe(0.25);
hist.observe(0.75);
hist.observe(1.5);
SUBCASE("render format validation") {
auto output = metric::render(arena);
CHECK(output.size() > 0);
// Basic format checks
bool found_help = false;
bool found_type = false;
bool found_metric_line = false;
for (const auto &line : output) {
if (line.starts_with("# HELP"))
found_help = true;
if (line.starts_with("# TYPE"))
found_type = true;
if (line.find("http_requests_total") != std::string_view::npos)
found_metric_line = true;
}
CHECK(found_help);
CHECK(found_type);
CHECK(found_metric_line);
}
SUBCASE("special value formatting") {
auto special_gauge_family =
metric::create_gauge("special_values", "Special value test");
auto special_gauge = special_gauge_family.create({});
special_gauge.set(std::numeric_limits<double>::infinity());
auto output = metric::render(arena);
// Should contain "+Inf" representation
bool found_inf = false;
for (const auto &line : output) {
if (line.find("+Inf") != std::string_view::npos) {
found_inf = true;
break;
}
}
CHECK(found_inf);
}
}
TEST_CASE("thread safety") {
constexpr int num_threads = 8;
constexpr int ops_per_thread = 1000;
SUBCASE("counter single-writer semantics") {
auto counter_family =
metric::create_counter("thread_test_counter", "Thread test");
std::vector<std::thread> threads;
std::latch start_latch{num_threads};
// Each thread creates its own counter instance (safe)
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([&, i]() {
auto counter =
counter_family.create({{"thread_id", std::to_string(i)}});
start_latch.arrive_and_wait();
for (int j = 0; j < ops_per_thread; ++j) {
counter.inc(1.0);
}
});
}
for (auto &t : threads) {
t.join();
}
}
SUBCASE("gauge multi-writer contention") {
auto gauge_family =
metric::create_gauge("thread_test_gauge", "Thread test gauge");
std::vector<std::thread> threads;
std::latch start_latch{num_threads};
// Multiple threads create gauges with the same labels, writing to the same
// underlying state, testing CAS contention.
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([&]() {
auto gauge = gauge_family.create({{"shared", "true"}});
start_latch.arrive_and_wait();
for (int j = 0; j < ops_per_thread; ++j) {
gauge.inc(1.0);
}
});
}
for (auto &t : threads) {
t.join();
}
}
SUBCASE("histogram single-writer per thread") {
auto hist_family =
metric::create_histogram("thread_test_hist", "Thread test histogram",
std::initializer_list<double>{0.1, 0.5, 1.0});
std::vector<std::thread> threads;
std::latch start_latch{num_threads};
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([&, i]() {
auto hist = hist_family.create({{"thread_id", std::to_string(i)}});
start_latch.arrive_and_wait();
for (int j = 0; j < ops_per_thread; ++j) {
hist.observe(static_cast<double>(j) / ops_per_thread);
}
});
}
for (auto &t : threads) {
t.join();
}
}
SUBCASE("concurrent render calls") {
// Multiple threads calling render concurrently should be safe (serialized
// by mutex)
auto counter_family = metric::create_counter("render_test", "Render test");
auto counter = counter_family.create({});
counter.inc(100);
std::vector<std::thread> threads;
std::latch start_latch{num_threads};
std::atomic<int> success_count{0};
for (int i = 0; i < num_threads; ++i) {
threads.emplace_back([&]() {
start_latch.arrive_and_wait();
ArenaAllocator arena;
auto output = metric::render(arena);
if (output.size() > 0) {
success_count.fetch_add(1);
}
});
}
for (auto &t : threads) {
t.join();
}
CHECK(success_count.load() == num_threads);
}
}
TEST_CASE("error conditions") {
SUBCASE("counter negative increment") {
auto counter_family = metric::create_counter("error_counter", "Error test");
auto counter = counter_family.create({});
// This should abort in debug builds due to validation
// In release builds, behavior is undefined
// counter.inc(-1.0); // Would abort
}
SUBCASE("invalid metric names") {
// These should abort due to validation
// auto bad_counter = metric::create_counter("123invalid", "help"); // Would
// abort auto bad_gauge = metric::create_gauge("invalid-name", "help"); //
// Would abort
}
SUBCASE("invalid label keys") {
auto counter_family = metric::create_counter("valid_name", "help");
// This should abort due to label validation
// auto counter = counter_family.create({{"123invalid", "value"}}); // Would
// abort
}
}
TEST_CASE("memory management") {
SUBCASE("arena allocation in render") {
ArenaAllocator arena;
auto initial_used = arena.used_bytes();
auto counter_family = metric::create_counter("memory_test", "Memory test");
auto counter = counter_family.create(
{{"large_label", "very_long_value_that_takes_space"}});
counter.inc(42);
auto output = metric::render(arena);
auto final_used = arena.used_bytes();
CHECK(output.size() > 0);
CHECK(final_used > initial_used); // Arena was used for string allocation
// All string_views should point to arena memory
for (const auto &line : output) {
CHECK(line.size() > 0);
}
}
SUBCASE("arena reset behavior") {
ArenaAllocator arena;
auto counter_family = metric::create_counter("reset_test", "Reset test");
auto counter = counter_family.create({});
counter.inc(1);
// Render multiple times with arena resets
for (int i = 0; i < 5; ++i) {
auto output = metric::render(arena);
CHECK(output.size() > 0);
arena.reset(); // Should not affect metric values, only arena memory
}
// Final render should still work
auto final_output = metric::render(arena);
CHECK(final_output.size() > 0);
}
}

View File

@@ -42,7 +42,7 @@
- [ ] Support for HTTP redirects (3xx responses) with redirect limits
- [ ] SSL/TLS support using OpenSSL for HTTPS connections
- [ ] Request/response logging and metrics integration
- [ ] Memory-efficient design with zero-copy where possible
- [ ] Memory-efficient design minimizing unnecessary copying
- [ ] Implement fake in-process S3 service using separate Server instance with S3 ConnectionHandler
- [ ] Use create_local_connection to get fd for in-process communication
- [ ] Implement `ListObjectsV2` API for object enumeration

View File

@@ -155,7 +155,7 @@ def main():
if violations:
for v in violations:
print(f"\n{filepath}:{v['line']}:{v['column']}")
print(f"\n{filepath}:{v['line']}:{v['column']}:")
print(f" {v['type']} '{v['camelCase']}' should be '{v['snake_case']}'")
print(f" Context: {v['context']}")
total_violations += len(violations)

View File

@@ -401,6 +401,7 @@ private:
double current_min = g_min_latency.load(std::memory_order_relaxed);
while (latency < current_min &&
!g_min_latency.compare_exchange_weak(current_min, latency,
std::memory_order_relaxed,
std::memory_order_relaxed)) {
// Retry if another thread updated min_latency
}
@@ -408,6 +409,7 @@ private:
double current_max = g_max_latency.load(std::memory_order_relaxed);
while (latency > current_max &&
!g_max_latency.compare_exchange_weak(current_max, latency,
std::memory_order_relaxed,
std::memory_order_relaxed)) {
// Retry if another thread updated max_latency
}
@@ -637,7 +639,7 @@ int main(int argc, char *argv[]) {
int epollfd = g_epoll_fds[i]; // Each thread uses its own epoll instance
pthread_setname_np(pthread_self(),
("network-" + std::to_string(i)).c_str());
while (g_connect_threads.load() != 0) {
while (g_connect_threads.load(std::memory_order_acquire) != 0) {
struct epoll_event events[256]; // Use a reasonable max size
int batch_size = std::min(int(sizeof(events) / sizeof(events[0])),
g_config.event_batch_size);
@@ -779,7 +781,7 @@ int main(int argc, char *argv[]) {
continue;
}
}
g_connect_threads.fetch_sub(1);
g_connect_threads.fetch_sub(1, std::memory_order_release);
});
}