Compare commits
25 Commits
b6e57f58af
...
50e27cced8
| Author | SHA1 | Date | |
|---|---|---|---|
| 50e27cced8 | |||
| d2762dc8da | |||
| 5592d065de | |||
| 91e799aae8 | |||
| 4fc277393e | |||
| a5776004de | |||
| 62b37c067c | |||
| fac0d20ae1 | |||
| e3a2ddbbfb | |||
| b6d4ae2862 | |||
| 1133d1e365 | |||
| de5adb54d2 | |||
| d0f2b6550a | |||
| ca5b299da8 | |||
| 9c89eba6c8 | |||
| ed6e6ea9fe | |||
| c97920c473 | |||
| 7808896226 | |||
| 404b491880 | |||
| bc0d5a7422 | |||
| 6fb57619c5 | |||
| f46a98249f | |||
| a73a463936 | |||
| a32356e298 | |||
| 3d61408976 |
@@ -112,6 +112,7 @@ set(SOURCES
|
||||
src/json_commit_request_parser.cpp
|
||||
src/http_handler.cpp
|
||||
src/arena_allocator.cpp
|
||||
src/format.cpp
|
||||
${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
|
||||
add_executable(weaseldb ${SOURCES})
|
||||
@@ -133,7 +134,7 @@ target_include_directories(test_data PUBLIC benchmarks)
|
||||
target_link_libraries(test_data simdutf::simdutf)
|
||||
|
||||
add_executable(test_arena_allocator tests/test_arena_allocator.cpp
|
||||
src/arena_allocator.cpp)
|
||||
src/arena_allocator.cpp src/format.cpp)
|
||||
target_link_libraries(test_arena_allocator doctest::doctest)
|
||||
target_include_directories(test_arena_allocator PRIVATE src)
|
||||
target_compile_options(test_arena_allocator PRIVATE -UNDEBUG)
|
||||
@@ -185,6 +186,17 @@ target_compile_definitions(test_server_connection_return
|
||||
PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN)
|
||||
target_compile_options(test_server_connection_return PRIVATE -UNDEBUG)
|
||||
|
||||
# Metrics system test
|
||||
add_executable(test_metric tests/test_metric.cpp src/metric.cpp
|
||||
src/arena_allocator.cpp src/format.cpp)
|
||||
target_link_libraries(test_metric doctest::doctest Threads::Threads
|
||||
simdutf::simdutf weaseljson)
|
||||
target_include_directories(test_metric PRIVATE src)
|
||||
target_compile_options(test_metric PRIVATE -UNDEBUG)
|
||||
|
||||
# Register with CTest
|
||||
add_test(NAME metric_tests COMMAND test_metric)
|
||||
|
||||
add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp
|
||||
src/arena_allocator.cpp)
|
||||
target_link_libraries(bench_arena_allocator nanobench)
|
||||
@@ -216,6 +228,21 @@ add_executable(bench_thread_pipeline benchmarks/bench_thread_pipeline.cpp)
|
||||
target_link_libraries(bench_thread_pipeline nanobench Threads::Threads)
|
||||
target_include_directories(bench_thread_pipeline PRIVATE src)
|
||||
|
||||
add_executable(bench_format_comparison benchmarks/bench_format_comparison.cpp
|
||||
src/arena_allocator.cpp src/format.cpp)
|
||||
target_link_libraries(bench_format_comparison nanobench)
|
||||
target_include_directories(bench_format_comparison PRIVATE src)
|
||||
|
||||
# Metrics system benchmark
|
||||
add_executable(bench_metric benchmarks/bench_metric.cpp src/metric.cpp
|
||||
src/arena_allocator.cpp src/format.cpp)
|
||||
target_link_libraries(bench_metric nanobench Threads::Threads simdutf::simdutf
|
||||
weaseljson)
|
||||
target_include_directories(bench_metric PRIVATE src)
|
||||
|
||||
# Register benchmark with CTest
|
||||
add_test(NAME metric_benchmarks COMMAND bench_metric)
|
||||
|
||||
# Debug tools
|
||||
add_executable(
|
||||
debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp
|
||||
@@ -237,3 +264,4 @@ add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)
|
||||
add_test(NAME commit_request_benchmarks COMMAND bench_commit_request)
|
||||
add_test(NAME parser_comparison_benchmarks COMMAND bench_parser_comparison)
|
||||
add_test(NAME thread_pipeline_benchmarks COMMAND bench_thread_pipeline)
|
||||
add_test(NAME format_comparison_benchmarks COMMAND bench_format_comparison)
|
||||
|
||||
274
benchmarks/bench_format_comparison.cpp
Normal file
274
benchmarks/bench_format_comparison.cpp
Normal file
@@ -0,0 +1,274 @@
|
||||
#include "arena_allocator.hpp"
|
||||
#include "format.hpp"
|
||||
#include <cstdio>
|
||||
#include <iomanip>
|
||||
#include <nanobench.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#if __cpp_lib_format >= 201907L
|
||||
#include <format>
|
||||
#define HAS_STD_FORMAT 1
|
||||
#else
|
||||
#define HAS_STD_FORMAT 0
|
||||
#endif
|
||||
|
||||
// Test data for consistent benchmarks
|
||||
constexpr int TEST_INT = 42;
|
||||
constexpr double TEST_DOUBLE = 3.14159;
|
||||
const std::string TEST_STRING = "Hello World";
|
||||
|
||||
// Benchmark simple string concatenation: "Hello " + "World" + "!"
|
||||
void benchmark_simple_concatenation() {
|
||||
std::cout << "\n=== Simple String Concatenation: 'Hello World!' ===\n";
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.title("Simple Concatenation").unit("op").warmup(100).epochs(1000);
|
||||
|
||||
// Arena-based static_format
|
||||
bench.run("static_format", [&] {
|
||||
ArenaAllocator arena(64);
|
||||
auto result = static_format(arena, "Hello ", "World", "!");
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// Arena-based format
|
||||
bench.run("format", [&] {
|
||||
ArenaAllocator arena(64);
|
||||
auto result = format(arena, "Hello %s!", "World");
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// std::stringstream
|
||||
bench.run("std::stringstream", [&] {
|
||||
std::stringstream ss;
|
||||
ss << "Hello " << "World" << "!";
|
||||
auto result = ss.str();
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
#if HAS_STD_FORMAT
|
||||
// std::format (C++20)
|
||||
bench.run("std::format", [&] {
|
||||
auto result = std::format("Hello {}!", "World");
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
#endif
|
||||
|
||||
// Raw snprintf with malloc
|
||||
bench.run("snprintf + malloc", [&] {
|
||||
char buffer[64];
|
||||
int len = std::snprintf(buffer, sizeof(buffer), "Hello %s!", "World");
|
||||
char *result = static_cast<char *>(std::malloc(len + 1));
|
||||
std::memcpy(result, buffer, len + 1);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
std::free(result);
|
||||
});
|
||||
}
|
||||
|
||||
// Benchmark mixed type formatting: "Count: 42, Rate: 3.14159"
|
||||
void benchmark_mixed_types() {
|
||||
std::cout << "\n=== Mixed Type Formatting: 'Count: 42, Rate: 3.14159' ===\n";
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.title("Mixed Types").unit("op").warmup(100).epochs(1000);
|
||||
|
||||
// Arena-based static_format
|
||||
bench.run("static_format", [&] {
|
||||
ArenaAllocator arena(128);
|
||||
auto result =
|
||||
static_format(arena, "Count: ", TEST_INT, ", Rate: ", TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// Arena-based format
|
||||
bench.run("format", [&] {
|
||||
ArenaAllocator arena(128);
|
||||
auto result = format(arena, "Count: %d, Rate: %.5f", TEST_INT, TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// std::stringstream
|
||||
bench.run("std::stringstream", [&] {
|
||||
std::stringstream ss;
|
||||
ss << "Count: " << TEST_INT << ", Rate: " << std::fixed
|
||||
<< std::setprecision(5) << TEST_DOUBLE;
|
||||
auto result = ss.str();
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
#if HAS_STD_FORMAT
|
||||
// std::format (C++20)
|
||||
bench.run("std::format", [&] {
|
||||
auto result = std::format("Count: {}, Rate: {:.5f}", TEST_INT, TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
#endif
|
||||
|
||||
// Raw snprintf with malloc
|
||||
bench.run("snprintf + malloc", [&] {
|
||||
char buffer[128];
|
||||
int len = std::snprintf(buffer, sizeof(buffer), "Count: %d, Rate: %.5f",
|
||||
TEST_INT, TEST_DOUBLE);
|
||||
char *result = static_cast<char *>(std::malloc(len + 1));
|
||||
std::memcpy(result, buffer, len + 1);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
std::free(result);
|
||||
});
|
||||
}
|
||||
|
||||
// Benchmark complex formatting with precision and alignment
|
||||
void benchmark_complex_formatting() {
|
||||
std::cout << "\n=== Complex Formatting: '%-10s %5d %8.2f' ===\n";
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.title("Complex Formatting").unit("op").warmup(100).epochs(1000);
|
||||
|
||||
// Arena-based format (static_format doesn't support printf specifiers)
|
||||
bench.run("format", [&] {
|
||||
ArenaAllocator arena(128);
|
||||
auto result = format(arena, "%-10s %5d %8.2f", TEST_STRING.c_str(),
|
||||
TEST_INT, TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// std::stringstream
|
||||
bench.run("std::stringstream", [&] {
|
||||
std::stringstream ss;
|
||||
ss << std::left << std::setw(10) << TEST_STRING << " " << std::right
|
||||
<< std::setw(5) << TEST_INT << " " << std::setw(8) << std::fixed
|
||||
<< std::setprecision(2) << TEST_DOUBLE;
|
||||
auto result = ss.str();
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
#if HAS_STD_FORMAT
|
||||
// std::format (C++20)
|
||||
bench.run("std::format", [&] {
|
||||
auto result = std::format("{:<10} {:>5} {:>8.2f}", TEST_STRING, TEST_INT,
|
||||
TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
#endif
|
||||
|
||||
// Raw snprintf with malloc
|
||||
bench.run("snprintf + malloc", [&] {
|
||||
char buffer[128];
|
||||
int len = std::snprintf(buffer, sizeof(buffer), "%-10s %5d %8.2f",
|
||||
TEST_STRING.c_str(), TEST_INT, TEST_DOUBLE);
|
||||
char *result = static_cast<char *>(std::malloc(len + 1));
|
||||
std::memcpy(result, buffer, len + 1);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
std::free(result);
|
||||
});
|
||||
}
|
||||
|
||||
// Benchmark error message formatting (common use case)
|
||||
void benchmark_error_messages() {
|
||||
std::cout << "\n=== Error Message Formatting: 'Error 404: File not found "
|
||||
"(line 123)' ===\n";
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.title("Error Messages").unit("op").warmup(100).epochs(1000);
|
||||
|
||||
constexpr int error_code = 404;
|
||||
constexpr int line_number = 123;
|
||||
const std::string error_msg = "File not found";
|
||||
|
||||
// Arena-based static_format (using string literals only)
|
||||
bench.run("static_format", [&] {
|
||||
ArenaAllocator arena(128);
|
||||
auto result = static_format(arena, "Error ", error_code, ": ",
|
||||
"File not found", " (line ", line_number, ")");
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// Arena-based format
|
||||
bench.run("format", [&] {
|
||||
ArenaAllocator arena(128);
|
||||
auto result = format(arena, "Error %d: %s (line %d)", error_code,
|
||||
error_msg.c_str(), line_number);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// std::stringstream
|
||||
bench.run("std::stringstream", [&] {
|
||||
std::stringstream ss;
|
||||
ss << "Error " << error_code << ": " << error_msg << " (line "
|
||||
<< line_number << ")";
|
||||
auto result = ss.str();
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
#if HAS_STD_FORMAT
|
||||
// std::format (C++20)
|
||||
bench.run("std::format", [&] {
|
||||
auto result = std::format("Error {}: {} (line {})", error_code, error_msg,
|
||||
line_number);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
// Benchmark memory allocation overhead by testing arena reuse
|
||||
void benchmark_memory_reuse() {
|
||||
std::cout << "\n=== Memory Allocation Patterns ===\n";
|
||||
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.title("Memory Patterns").unit("op").warmup(100).epochs(100);
|
||||
|
||||
// Arena with fresh allocation each time (realistic usage)
|
||||
bench.run("fresh arena", [&] {
|
||||
ArenaAllocator arena(128);
|
||||
auto result = format(arena, "Test %d: %s %.2f", TEST_INT,
|
||||
TEST_STRING.c_str(), TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// Pre-allocated arena (reuse scenario)
|
||||
ArenaAllocator shared_arena(1024);
|
||||
bench.run("reused arena", [&] {
|
||||
auto result = format(shared_arena, "Test %d: %s %.2f", TEST_INT,
|
||||
TEST_STRING.c_str(), TEST_DOUBLE);
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
|
||||
// Fresh std::string allocations
|
||||
bench.run("std::stringstream", [&] {
|
||||
std::stringstream ss;
|
||||
ss << "Test " << TEST_INT << ": " << TEST_STRING << " " << std::fixed
|
||||
<< std::setprecision(2) << TEST_DOUBLE;
|
||||
auto result = ss.str();
|
||||
ankerl::nanobench::doNotOptimizeAway(result);
|
||||
});
|
||||
}
|
||||
|
||||
int main() {
|
||||
std::cout << "Format Function Benchmark Comparison\n";
|
||||
std::cout << "====================================\n";
|
||||
|
||||
#if HAS_STD_FORMAT
|
||||
std::cout << "C++20 std::format: Available\n";
|
||||
#else
|
||||
std::cout << "C++20 std::format: Not available\n";
|
||||
#endif
|
||||
|
||||
benchmark_simple_concatenation();
|
||||
benchmark_mixed_types();
|
||||
benchmark_complex_formatting();
|
||||
benchmark_error_messages();
|
||||
benchmark_memory_reuse();
|
||||
|
||||
std::cout << "\n=== Summary ===\n";
|
||||
std::cout
|
||||
<< "* static_format: Best for simple concatenation with known types\n";
|
||||
std::cout
|
||||
<< "* format: Best for printf-style formatting with arena allocation\n";
|
||||
std::cout
|
||||
<< "* std::stringstream: Flexible but slower due to heap allocation\n";
|
||||
std::cout << "* std::format: Modern C++20 alternative (if available)\n";
|
||||
std::cout << "* snprintf + malloc: Low-level but requires manual memory "
|
||||
"management\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
256
benchmarks/bench_metric.cpp
Normal file
256
benchmarks/bench_metric.cpp
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <nanobench.h>
|
||||
|
||||
#include "arena_allocator.hpp"
|
||||
#include "metric.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <latch>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
// High-contention benchmark setup
|
||||
struct ContentionEnvironment {
|
||||
// Background threads for contention
|
||||
std::vector<std::thread> background_threads;
|
||||
std::atomic<bool> stop_flag{false};
|
||||
|
||||
// Synchronization latches - must be members to avoid use-after-return
|
||||
std::unique_ptr<std::latch> contention_latch;
|
||||
std::unique_ptr<std::latch> render_latch;
|
||||
|
||||
metric::Family<metric::Gauge> gauge_family =
|
||||
metric::create_gauge("gauge", "");
|
||||
metric::Family<metric::Counter> counter_family =
|
||||
metric::create_counter("counter", "");
|
||||
metric::Family<metric::Histogram> histogram_family = metric::create_histogram(
|
||||
"histogram", "", metric::exponential_buckets(0.001, 5, 7));
|
||||
|
||||
ContentionEnvironment() = default;
|
||||
|
||||
void start_background_contention(int num_threads = 4) {
|
||||
stop_flag.store(false);
|
||||
contention_latch = std::make_unique<std::latch>(num_threads + 1);
|
||||
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
background_threads.emplace_back([this, i]() {
|
||||
auto bg_counter = counter_family.create({});
|
||||
auto bg_gauge = gauge_family.create({});
|
||||
auto bg_histogram = histogram_family.create({});
|
||||
|
||||
std::mt19937 rng(i);
|
||||
std::uniform_real_distribution<double> dist(0.0, 10.0);
|
||||
|
||||
contention_latch
|
||||
->arrive_and_wait(); // All background threads start together
|
||||
|
||||
while (!stop_flag.load(std::memory_order_relaxed)) {
|
||||
// Simulate mixed workload
|
||||
bg_counter.inc(1.0);
|
||||
bg_gauge.set(dist(rng));
|
||||
bg_gauge.inc(1.0);
|
||||
bg_histogram.observe(dist(rng));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
contention_latch
|
||||
->arrive_and_wait(); // Wait for all background threads to be ready
|
||||
}
|
||||
|
||||
void start_render_thread() {
|
||||
render_latch = std::make_unique<std::latch>(2);
|
||||
|
||||
background_threads.emplace_back([this]() {
|
||||
ArenaAllocator arena;
|
||||
|
||||
render_latch->arrive_and_wait(); // Render thread signals it's ready
|
||||
|
||||
while (!stop_flag.load(std::memory_order_relaxed)) {
|
||||
auto output = metric::render(arena);
|
||||
static_cast<void>(output); // Suppress unused variable warning
|
||||
arena.reset();
|
||||
}
|
||||
});
|
||||
|
||||
render_latch->arrive_and_wait(); // Wait for render thread to be ready
|
||||
}
|
||||
|
||||
void stop_background_threads() {
|
||||
stop_flag.store(true);
|
||||
for (auto &t : background_threads) {
|
||||
if (t.joinable()) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
background_threads.clear();
|
||||
}
|
||||
|
||||
~ContentionEnvironment() { stop_background_threads(); }
|
||||
};
|
||||
|
||||
int main() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.title("WeaselDB Metrics Performance").unit("operation").warmup(1000);
|
||||
|
||||
metric::Family<metric::Gauge> gauge_family =
|
||||
metric::create_gauge("gauge", "");
|
||||
metric::Family<metric::Counter> counter_family =
|
||||
metric::create_counter("counter", "");
|
||||
metric::Family<metric::Histogram> histogram_family = metric::create_histogram(
|
||||
"histogram", "", metric::exponential_buckets(0.001, 5, 7));
|
||||
|
||||
auto counter = counter_family.create({});
|
||||
auto gauge = gauge_family.create({});
|
||||
auto histogram = histogram_family.create({});
|
||||
|
||||
// Baseline performance without contention
|
||||
{
|
||||
bench.run("counter.inc() - no contention", [&]() {
|
||||
counter.inc(1.0);
|
||||
ankerl::nanobench::doNotOptimizeAway(counter);
|
||||
});
|
||||
|
||||
bench.run("gauge.inc() - no contention", [&]() {
|
||||
gauge.inc(1.0);
|
||||
ankerl::nanobench::doNotOptimizeAway(gauge);
|
||||
});
|
||||
|
||||
bench.run("gauge.set() - no contention", [&]() {
|
||||
gauge.set(42.0);
|
||||
ankerl::nanobench::doNotOptimizeAway(gauge);
|
||||
});
|
||||
|
||||
bench.run("histogram.observe() - no contention", [&]() {
|
||||
histogram.observe(0.5);
|
||||
ankerl::nanobench::doNotOptimizeAway(histogram);
|
||||
});
|
||||
}
|
||||
|
||||
// High contention with background threads
|
||||
{
|
||||
ContentionEnvironment env;
|
||||
|
||||
// Start background threads creating contention
|
||||
env.start_background_contention(8);
|
||||
|
||||
bench.run("counter.inc() - 8 background threads",
|
||||
[&]() { counter.inc(1.0); });
|
||||
|
||||
bench.run("gauge.inc() - 8 background threads", [&]() { gauge.inc(1.0); });
|
||||
|
||||
bench.run("gauge.set() - 8 background threads", [&]() { gauge.set(42.0); });
|
||||
|
||||
bench.run("histogram.observe() - 8 background threads",
|
||||
[&]() { histogram.observe(1.5); });
|
||||
}
|
||||
|
||||
// Concurrent render contention
|
||||
{
|
||||
ContentionEnvironment env;
|
||||
|
||||
// Start background threads + render thread
|
||||
env.start_background_contention(4);
|
||||
env.start_render_thread();
|
||||
|
||||
bench.run("counter.inc() - with concurrent render",
|
||||
[&]() { counter.inc(1.0); });
|
||||
|
||||
bench.run("gauge.inc() - with concurrent render",
|
||||
[&]() { gauge.inc(1.0); });
|
||||
|
||||
bench.run("histogram.observe() - with concurrent render",
|
||||
[&]() { histogram.observe(2.0); });
|
||||
}
|
||||
|
||||
// Render performance scaling
|
||||
{
|
||||
// Test render performance as number of metrics increases
|
||||
std::vector<metric::Counter> counters;
|
||||
std::vector<metric::Gauge> gauges;
|
||||
std::vector<metric::Histogram> histograms;
|
||||
|
||||
auto counter_family =
|
||||
metric::create_counter("scale_counter", "Scale counter");
|
||||
auto gauge_family = metric::create_gauge("scale_gauge", "Scale gauge");
|
||||
auto histogram_family = metric::create_histogram(
|
||||
"scale_histogram", "Scale histogram",
|
||||
std::initializer_list<double>{0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0});
|
||||
|
||||
// Create varying numbers of metrics
|
||||
for (int scale : {10, 100, 1000}) {
|
||||
// Clear previous metrics by creating new families
|
||||
// (Note: In real usage, metrics persist for application lifetime)
|
||||
for (int i = 0; i < scale; ++i) {
|
||||
counters.emplace_back(
|
||||
counter_family.create({{"id", std::to_string(i)}}));
|
||||
gauges.emplace_back(gauge_family.create({{"id", std::to_string(i)}}));
|
||||
histograms.emplace_back(
|
||||
histogram_family.create({{"id", std::to_string(i)}}));
|
||||
|
||||
// Set some values
|
||||
counters.back().inc(static_cast<double>(i));
|
||||
gauges.back().set(static_cast<double>(i * 2));
|
||||
histograms.back().observe(static_cast<double>(i) * 0.1);
|
||||
}
|
||||
|
||||
ArenaAllocator arena;
|
||||
std::string bench_name =
|
||||
"render() - " + std::to_string(scale) + " metrics each type";
|
||||
|
||||
bench.run(bench_name, [&]() {
|
||||
auto output = metric::render(arena);
|
||||
ankerl::nanobench::doNotOptimizeAway(output);
|
||||
arena.reset();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Callback metrics performance
|
||||
{
|
||||
auto counter_family =
|
||||
metric::create_counter("callback_counter", "Callback counter");
|
||||
auto gauge_family =
|
||||
metric::create_gauge("callback_gauge", "Callback gauge");
|
||||
|
||||
std::atomic<double> counter_value{0};
|
||||
std::atomic<double> gauge_value{100};
|
||||
|
||||
// Register callbacks
|
||||
counter_family.register_callback(
|
||||
{{"type", "callback"}}, [&counter_value]() {
|
||||
return counter_value.load(std::memory_order_relaxed);
|
||||
});
|
||||
|
||||
gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() {
|
||||
return gauge_value.load(std::memory_order_relaxed);
|
||||
});
|
||||
|
||||
// Background thread updating callback values
|
||||
std::atomic<bool> stop_callback{false};
|
||||
std::latch start_latch{2}; // Background thread + benchmark thread
|
||||
|
||||
std::thread callback_updater([&]() {
|
||||
start_latch.arrive_and_wait(); // Wait for benchmark to start
|
||||
while (!stop_callback.load()) {
|
||||
counter_value.fetch_add(1);
|
||||
gauge_value.store(gauge_value.load() + 1);
|
||||
}
|
||||
});
|
||||
|
||||
ArenaAllocator arena;
|
||||
|
||||
start_latch.arrive_and_wait(); // Wait for background thread to be ready
|
||||
bench.run("render() - with callback metrics", [&]() {
|
||||
auto output = metric::render(arena);
|
||||
ankerl::nanobench::doNotOptimizeAway(output);
|
||||
arena.reset();
|
||||
});
|
||||
|
||||
stop_callback.store(true);
|
||||
callback_updater.join();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -789,7 +789,7 @@ int main() {
|
||||
std::cout << "\nBenchmark completed. The WeaselDB parser is optimized for:\n";
|
||||
std::cout << "- Arena-based memory allocation for reduced fragmentation\n";
|
||||
std::cout << "- Streaming parsing for network protocols\n";
|
||||
std::cout << "- Zero-copy string handling with string views\n";
|
||||
std::cout << "- String views to minimize unnecessary copying\n";
|
||||
std::cout << "- Base64 decoding integrated into parsing pipeline\n";
|
||||
std::cout << "- Efficient reset and reuse for high-throughput scenarios\n";
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#include <nanobench.h>
|
||||
|
||||
#include "../src/loop_iterations.h"
|
||||
#include "../src/loop_iterations.hpp"
|
||||
|
||||
int main() {
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.minEpochIterations(100000);
|
||||
bench.run("volatile loop to " + std::to_string(loopIterations), [&] {
|
||||
for (volatile int i = 0; i < loopIterations; i = i + 1)
|
||||
bench.run("volatile loop to " + std::to_string(loop_iterations), [&] {
|
||||
for (volatile int i = 0; i < loop_iterations; i = i + 1)
|
||||
;
|
||||
});
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ WeaselDB is a high-performance write-side database component designed for system
|
||||
- **High-performance JSON parsing** with streaming support and SIMD optimization
|
||||
- **Multi-threaded networking** using multiple epoll instances with unified I/O thread pool
|
||||
- **Configurable epoll instances** to eliminate kernel-level contention
|
||||
- **Zero-copy design** throughout the pipeline
|
||||
- **Optimized memory management** with arena allocation and efficient copying
|
||||
- **Factory pattern safety** ensuring correct object lifecycle management
|
||||
|
||||
---
|
||||
@@ -152,7 +152,7 @@ A high-performance, multi-stage, lock-free pipeline for inter-thread communicati
|
||||
- **Arena-backed string storage** with efficient memory management
|
||||
- **Move-only semantics** for optimal performance
|
||||
- **Builder pattern** for constructing commit requests
|
||||
- **Zero-copy string views** pointing to arena-allocated memory
|
||||
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
|
||||
|
||||
#### **Configuration & Optimization**
|
||||
|
||||
@@ -242,7 +242,7 @@ The system implements a RESTful API. See [api.md](api.md) for comprehensive API
|
||||
1. **Performance-first** - Every component optimized for high throughput
|
||||
2. **Scalable concurrency** - Multiple epoll instances eliminate kernel contention
|
||||
3. **Memory efficiency** - Arena allocation eliminates fragmentation
|
||||
4. **Zero-copy** - Minimize data copying throughout pipeline
|
||||
4. **Efficient copying** - Minimize unnecessary copies while accepting required ones
|
||||
5. **Streaming-ready** - Support incremental processing
|
||||
6. **Type safety** - Compile-time validation where possible
|
||||
7. **Resource management** - RAII and move semantics throughout
|
||||
@@ -437,7 +437,7 @@ public:
|
||||
|
||||
#### Arena-Based String Handling
|
||||
```cpp
|
||||
// Preferred: Zero-copy string view with arena allocation
|
||||
// Preferred: String view with arena allocation to minimize copying
|
||||
std::string_view process_json_key(const char* data, ArenaAllocator& arena);
|
||||
|
||||
// Avoid: Unnecessary string copies
|
||||
|
||||
@@ -77,32 +77,29 @@ void *ArenaAllocator::realloc_raw(void *ptr, uint32_t old_size,
|
||||
assert(current_block_ &&
|
||||
"realloc called with non-null ptr but no current block exists");
|
||||
|
||||
// Assert that offset is large enough (should always be true for
|
||||
// valid callers)
|
||||
assert(current_block_->offset >= old_size &&
|
||||
"offset must be >= old_size for valid last allocation");
|
||||
if (current_block_->offset >= old_size) {
|
||||
// Check if this was the last allocation by comparing with expected location
|
||||
char *expected_last_alloc_start =
|
||||
current_block_->data() + current_block_->offset - old_size;
|
||||
|
||||
// Check if this was the last allocation by comparing with expected location
|
||||
char *expected_last_alloc_start =
|
||||
current_block_->data() + current_block_->offset - old_size;
|
||||
if (ptr == expected_last_alloc_start) {
|
||||
// This is indeed the last allocation
|
||||
if (new_size > old_size) {
|
||||
// Growing - check if we have space
|
||||
size_t additional_space_needed = new_size - old_size;
|
||||
|
||||
if (ptr == expected_last_alloc_start) {
|
||||
// This is indeed the last allocation
|
||||
if (new_size > old_size) {
|
||||
// Growing - check if we have space
|
||||
size_t additional_space_needed = new_size - old_size;
|
||||
|
||||
if (current_block_->offset + additional_space_needed <=
|
||||
current_block_->size) {
|
||||
// We can extend in place
|
||||
current_block_->offset += additional_space_needed;
|
||||
return ptr;
|
||||
if (current_block_->offset + additional_space_needed <=
|
||||
current_block_->size) {
|
||||
// We can extend in place
|
||||
current_block_->offset += additional_space_needed;
|
||||
return ptr;
|
||||
}
|
||||
} else {
|
||||
// Shrinking - just update the offset
|
||||
size_t space_to_free = old_size - new_size;
|
||||
current_block_->offset -= space_to_free;
|
||||
return new_size == 0 ? nullptr : ptr;
|
||||
}
|
||||
} else {
|
||||
// Shrinking - just update the offset
|
||||
size_t space_to_free = old_size - new_size;
|
||||
current_block_->offset -= space_to_free;
|
||||
return new_size == 0 ? nullptr : ptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -277,10 +277,14 @@ public:
|
||||
* @brief Type-safe version of realloc_raw for arrays of type T.
|
||||
*
|
||||
* @param ptr Pointer to the existing allocation (must be from this allocator)
|
||||
* If nullptr, behaves like allocate<T>(new_size)
|
||||
* @param old_size Size of the existing allocation in number of T objects
|
||||
* Ignored if ptr is nullptr
|
||||
* @param new_size Desired new size in number of T objects
|
||||
* @return Pointer to the reallocated memory (may be the same as ptr or
|
||||
* different)
|
||||
* @note Follows standard realloc() semantics: realloc(nullptr, size) ==
|
||||
* malloc(size)
|
||||
* @note Prints error to stderr and calls std::abort() if memory allocation
|
||||
* fails or size overflow occurs
|
||||
*/
|
||||
@@ -430,6 +434,40 @@ public:
|
||||
return current_block_ ? current_block_->size - current_block_->offset : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get all available space in the current block and claim it
|
||||
* immediately.
|
||||
*
|
||||
* This method returns a pointer to all remaining space in the current block
|
||||
* and immediately marks it as used in the arena. The caller should use
|
||||
* realloc() to shrink the allocation to the actual amount needed.
|
||||
*
|
||||
* If no block exists or current block is full, creates a new block.
|
||||
*
|
||||
* @return Pointer to allocated space and the number of bytes allocated
|
||||
* @note The caller must call realloc() to return unused space
|
||||
* @note This is designed for speculative operations like printf formatting
|
||||
* @note Postcondition: always returns at least 1 byte
|
||||
*/
|
||||
struct AllocatedSpace {
|
||||
char *ptr;
|
||||
size_t allocated_bytes;
|
||||
};
|
||||
|
||||
AllocatedSpace allocate_remaining_space() {
|
||||
if (!current_block_ || available_in_current_block() == 0) {
|
||||
add_block(initial_block_size_);
|
||||
}
|
||||
|
||||
char *allocated_ptr = current_block_->data() + current_block_->offset;
|
||||
size_t available = available_in_current_block();
|
||||
|
||||
// Claim all remaining space
|
||||
current_block_->offset = current_block_->size;
|
||||
|
||||
return {allocated_ptr, available};
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the total number of blocks in the allocator.
|
||||
*
|
||||
@@ -575,3 +613,44 @@ public:
|
||||
|
||||
template <typename U> friend class ArenaStlAllocator;
|
||||
};
|
||||
|
||||
/// Simple arena-aware vector that doesn't have a destructor
|
||||
/// Safe to return as span because both the vector and its data are
|
||||
/// arena-allocated Uses arena's realloc() for efficient growth without copying
|
||||
/// when possible
|
||||
template <typename T> struct ArenaVector {
|
||||
explicit ArenaVector(ArenaAllocator *arena)
|
||||
: arena_(arena), data_(nullptr), size_(0), capacity_(0) {}
|
||||
|
||||
void push_back(const T &item) {
|
||||
if (size_ >= capacity_) {
|
||||
grow();
|
||||
}
|
||||
data_[size_++] = item;
|
||||
}
|
||||
|
||||
T *data() { return data_; }
|
||||
const T *data() const { return data_; }
|
||||
size_t size() const { return size_; }
|
||||
bool empty() const { return size_ == 0; }
|
||||
|
||||
T &operator[](size_t index) { return data_[index]; }
|
||||
const T &operator[](size_t index) const { return data_[index]; }
|
||||
|
||||
// No destructor - arena cleanup handles memory
|
||||
|
||||
private:
|
||||
void grow() {
|
||||
size_t new_capacity = capacity_ == 0 ? 8 : capacity_ * 2;
|
||||
|
||||
// arena.realloc() handles nullptr like standard realloc() - acts like
|
||||
// malloc() This avoids copying when growing in-place is possible
|
||||
data_ = arena_->realloc(data_, capacity_, new_capacity);
|
||||
capacity_ = new_capacity;
|
||||
}
|
||||
|
||||
ArenaAllocator *arena_;
|
||||
T *data_;
|
||||
size_t size_;
|
||||
size_t capacity_;
|
||||
};
|
||||
|
||||
@@ -70,7 +70,7 @@ struct Connection {
|
||||
* asynchronously by the server's I/O threads using efficient vectored
|
||||
* I/O.
|
||||
*
|
||||
* @param s The data to send (string view for zero-copy efficiency)
|
||||
* @param s The data to send (string view parameter for efficiency)
|
||||
* @param copy_to_arena If true (default), copies data to the connection's
|
||||
* arena for safe storage. If false, the caller must ensure the data remains
|
||||
* valid until all queued messages are sent.
|
||||
|
||||
1010
src/format.cpp
Normal file
1010
src/format.cpp
Normal file
File diff suppressed because it is too large
Load Diff
276
src/format.hpp
Normal file
276
src/format.hpp
Normal file
@@ -0,0 +1,276 @@
|
||||
#pragma once
|
||||
|
||||
#include <concepts>
|
||||
#include <cstdarg>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "arena_allocator.hpp"
|
||||
|
||||
/**
|
||||
* @brief Runtime printf-style formatting with arena allocation optimization.
|
||||
*
|
||||
* This function provides familiar printf-style formatting with intelligent
|
||||
* optimization for arena allocation. It attempts single-pass formatting by
|
||||
* speculatively using available arena space, falling back to two-pass
|
||||
* formatting only when necessary.
|
||||
*
|
||||
* The function uses an optimized allocation strategy:
|
||||
* 1. **Single-pass attempt**: Try to format directly into available arena space
|
||||
* 2. **Fallback to two-pass**: If formatting doesn't fit, measure required size
|
||||
* and allocate exactly what's needed
|
||||
*
|
||||
* ## Supported Format Specifiers:
|
||||
* All standard printf format specifiers are supported:
|
||||
* - **Integers**: %d, %i, %u, %x, %X, %o, %ld, %lld, etc.
|
||||
* - **Floating point**: %f, %e, %E, %g, %G, %.2f, etc.
|
||||
* - **Strings**: %s, %.*s, etc.
|
||||
* - **Characters**: %c
|
||||
* - **Pointers**: %p
|
||||
* - **Width/precision**: %10d, %-10s, %.2f, %*.*s, etc.
|
||||
*
|
||||
* ## Performance Characteristics:
|
||||
* - **Optimistic single-pass**: Often avoids the cost of measuring format size
|
||||
* - **Arena allocation**: Uses fast arena allocation (~1ns vs ~20-270ns for
|
||||
* malloc)
|
||||
* - **Memory efficient**: Returns unused space to arena via realloc()
|
||||
* - **Fallback safety**: Two-pass approach handles any format that doesn't fit
|
||||
*
|
||||
* @param arena Arena allocator for memory management
|
||||
* @param fmt Printf-style format string
|
||||
* @param ... Variable arguments matching format specifiers
|
||||
* @return std::string_view pointing to arena-allocated formatted string
|
||||
* @note Aborts program on formatting errors (never returns invalid data)
|
||||
* @note GCC format attribute enables compile-time format string validation
|
||||
*
|
||||
* ## Usage Examples:
|
||||
* ```cpp
|
||||
* ArenaAllocator arena(1024);
|
||||
*
|
||||
* // Basic formatting
|
||||
* auto msg = format(arena, "Hello %s!", "World");
|
||||
* // msg == "Hello World!"
|
||||
*
|
||||
* // Numeric formatting with precision
|
||||
* auto value = format(arena, "Pi: %.3f", 3.14159);
|
||||
* // value == "Pi: 3.142"
|
||||
*
|
||||
* // Mixed types with width/alignment
|
||||
* auto table = format(arena, "%-10s %5d %8.2f", "Item", 42, 99.95);
|
||||
* // table == "Item 42 99.95"
|
||||
*
|
||||
* // Error messages
|
||||
* auto error = format(arena, "Error %d: %s (line %d)", 404, "Not found", 123);
|
||||
* // error == "Error 404: Not found (line 123)"
|
||||
* ```
|
||||
*
|
||||
* ## When to Use:
|
||||
* - **Printf familiarity**: When you prefer printf-style format strings
|
||||
* - **Runtime flexibility**: Format strings from variables, config, or user
|
||||
* input
|
||||
* - **Complex formatting**: Precision, width, alignment, padding
|
||||
* - **Debugging**: Quick formatted output for logging/debugging
|
||||
* - **Mixed precision**: Different numeric precision requirements
|
||||
*
|
||||
* ## When to Use static_format() Instead:
|
||||
* - **Hot paths**: Performance-critical code where every nanosecond counts
|
||||
* - **Simple concatenation**: Basic string + number + string combinations
|
||||
* - **Compile-time optimization**: When all types/values known at compile time
|
||||
* - **Template contexts**: Where compile-time buffer sizing is beneficial
|
||||
*
|
||||
* ## Optimization Details:
|
||||
* The function uses `ArenaAllocator::allocate_remaining_space()` to claim all
|
||||
* available arena space and attempt formatting. If successful, it shrinks the
|
||||
* allocation to the actual size used. If formatting fails (doesn't fit), it
|
||||
* falls back to the traditional two-pass approach: measure size, allocate
|
||||
* exactly, then format.
|
||||
*
|
||||
* This strategy optimizes for the common case where available arena space is
|
||||
* sufficient, while maintaining correctness for all cases.
|
||||
*/
|
||||
std::string_view format(ArenaAllocator &arena, const char *fmt, ...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <int kLen> struct StringTerm {
|
||||
explicit constexpr StringTerm(const char *s) : s(s) {}
|
||||
static constexpr int kMaxLength = kLen;
|
||||
void write(char *&buf) const {
|
||||
std::memcpy(buf, s, kLen);
|
||||
buf += kLen;
|
||||
}
|
||||
|
||||
private:
|
||||
const char *s;
|
||||
};
|
||||
template <int kLen>
|
||||
constexpr StringTerm<kLen - 1> term(const char (&array)[kLen]) {
|
||||
return StringTerm<kLen - 1>{array};
|
||||
}
|
||||
|
||||
template <class IntType> constexpr int decimal_length(IntType x) {
|
||||
static_assert(std::is_integral_v<IntType>,
|
||||
"decimal_length requires integral type");
|
||||
|
||||
if constexpr (std::is_signed_v<IntType>) {
|
||||
// Handle negative values by using unsigned equivalent
|
||||
using Unsigned = std::make_unsigned_t<IntType>;
|
||||
// Safe conversion: cast to unsigned first, then negate in unsigned
|
||||
// arithmetic
|
||||
auto abs_x = x < 0 ? -static_cast<Unsigned>(x) : static_cast<Unsigned>(x);
|
||||
int result = 0;
|
||||
do {
|
||||
++result;
|
||||
abs_x /= 10;
|
||||
} while (abs_x);
|
||||
return result;
|
||||
} else {
|
||||
int result = 0;
|
||||
do {
|
||||
++result;
|
||||
x /= 10;
|
||||
} while (x);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
template <std::integral IntType> struct IntTerm {
|
||||
static constexpr bool kSigned = std::is_signed_v<IntType>;
|
||||
using Unsigned = std::make_unsigned_t<IntType>;
|
||||
|
||||
explicit constexpr IntTerm(IntType v) : v(v) {}
|
||||
|
||||
static constexpr int kMaxLength =
|
||||
decimal_length(Unsigned(-1)) + (kSigned ? 1 : 0);
|
||||
|
||||
void write(char *&buf) const {
|
||||
char itoa_buf[kMaxLength];
|
||||
auto x = static_cast<Unsigned>(v);
|
||||
|
||||
if constexpr (kSigned) {
|
||||
if (v < 0) {
|
||||
*buf++ = '-';
|
||||
x = -static_cast<Unsigned>(v);
|
||||
}
|
||||
}
|
||||
|
||||
int i = kMaxLength;
|
||||
do {
|
||||
itoa_buf[--i] = static_cast<char>('0' + (x % 10));
|
||||
x /= 10;
|
||||
} while (x);
|
||||
|
||||
while (i < kMaxLength) {
|
||||
*buf++ = itoa_buf[i++];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
IntType v;
|
||||
};
|
||||
|
||||
template <std::integral IntType> constexpr IntTerm<IntType> term(IntType s) {
|
||||
return IntTerm<IntType>{s};
|
||||
}
|
||||
|
||||
struct DoubleTerm {
|
||||
explicit constexpr DoubleTerm(double s) : s(s) {}
|
||||
static constexpr int kMaxLength = 24;
|
||||
void write(char *&buf) const;
|
||||
|
||||
private:
|
||||
double s;
|
||||
};
|
||||
|
||||
// Variable template for compile-time max length access
|
||||
template <typename T>
|
||||
inline constexpr int max_decimal_length_v = decltype(term(T{}))::kMaxLength;
|
||||
|
||||
inline constexpr DoubleTerm term(double s) { return DoubleTerm(s); }
|
||||
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* @brief Compile-time optimized formatting for high-performance code paths.
|
||||
*
|
||||
* This function provides ultra-fast string formatting by calculating buffer
|
||||
* sizes at compile time and using specialized term handlers for each type.
|
||||
* It's designed for performance-critical code where formatting overhead
|
||||
* matters.
|
||||
*
|
||||
* Unlike the runtime `format()` function, `static_format()` processes all
|
||||
* arguments at compile time to determine exact memory requirements and uses
|
||||
* optimized term writers for maximum speed.
|
||||
*
|
||||
* ## Supported Types:
|
||||
* - **String literals**: C-style string literals and arrays
|
||||
* - **Integers**: All integral types (int, int64_t, uint32_t, etc.)
|
||||
* - **Floating point**: double (uses high-precision Grisu2 algorithm)
|
||||
* - **Custom types**: Via specialization of `detail::term()`
|
||||
*
|
||||
* ## Performance Characteristics:
|
||||
* - **Compile-time buffer sizing**: Buffer size calculated at compile time (no
|
||||
* runtime measurement)
|
||||
* - **Optimized arena allocation**: Uses pre-calculated exact buffer sizes with
|
||||
* arena allocator
|
||||
* - **Specialized type handling**: Fast paths for common types via template
|
||||
* specialization
|
||||
* - **Memory efficient**: Uses arena.realloc() to return unused space to the
|
||||
* arena
|
||||
*
|
||||
* @tparam Ts Types of the arguments to format (auto-deduced)
|
||||
* @param arena Arena allocator for memory management
|
||||
* @param ts Arguments to format - can be string literals, integers, doubles
|
||||
* @return std::string_view pointing to arena-allocated formatted string
|
||||
*
|
||||
* ## Usage Examples:
|
||||
* ```cpp
|
||||
* ArenaAllocator arena(1024);
|
||||
*
|
||||
* // String concatenation
|
||||
* auto result1 = static_format(arena, "Hello ", "World", "!");
|
||||
* // result1 == "Hello World!"
|
||||
*
|
||||
* // Mixed types
|
||||
* auto result2 = static_format(arena, "Count: ", 42, ", Rate: ", 3.14);
|
||||
* // result2 == "Count: 42, Rate: 3.14"
|
||||
*
|
||||
* // Error messages
|
||||
* auto error = static_format(arena, "Error ", 404, ": ", "Not found");
|
||||
* // error == "Error 404: Not found"
|
||||
* ```
|
||||
*
|
||||
* ## When to Use:
|
||||
* - **Hot paths**: Performance-critical code where formatting speed matters
|
||||
* - **Known types**: When argument types are known at compile time
|
||||
* - **Simple formatting**: Concatenation and basic type conversion
|
||||
* - **Template code**: Where compile-time optimization is beneficial
|
||||
*
|
||||
* ## When to Use format() Instead:
|
||||
* - **Printf-style formatting**: When you need format specifiers like "%d",
|
||||
* "%.2f"
|
||||
* - **Runtime flexibility**: When format strings come from variables/config
|
||||
* - **Complex formatting**: When you need padding, precision, etc.
|
||||
* - **Convenience**: For quick debugging or non-critical paths
|
||||
*
|
||||
* @note All arguments are passed by forwarding reference for optimal
|
||||
* performance
|
||||
* @note Memory is arena-allocated and automatically sized to exact requirements
|
||||
* @note Compile-time errors occur if unsupported types are used
|
||||
* @note This function is constexpr-friendly and optimizes well in release
|
||||
* builds
|
||||
*/
|
||||
template <class... Ts>
|
||||
std::string_view static_format(ArenaAllocator &arena, Ts &&...ts) {
|
||||
constexpr int upper_bound = (decltype(detail::term(ts))::kMaxLength + ...);
|
||||
char *result = arena.allocate<char>(upper_bound);
|
||||
char *buf = result;
|
||||
(detail::term(ts).write(buf), ...);
|
||||
const int size = static_cast<int>(buf - result);
|
||||
return std::string_view(
|
||||
arena.realloc(result, upper_bound, upper_bound - size),
|
||||
static_cast<std::size_t>(size));
|
||||
}
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
#include "connection.hpp"
|
||||
#include "connection_handler.hpp"
|
||||
#include "loop_iterations.h"
|
||||
#include "loop_iterations.hpp"
|
||||
#include "perfetto_categories.hpp"
|
||||
#include "server.hpp"
|
||||
#include "thread_pipeline.hpp"
|
||||
@@ -111,7 +111,7 @@ struct HttpHandler : ConnectionHandler {
|
||||
if (nulls == 2) {
|
||||
return;
|
||||
}
|
||||
for (volatile int i = 0; i < loopIterations; i = i + 1)
|
||||
for (volatile int i = 0; i < loop_iterations; i = i + 1)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
constexpr int loopIterations = 1725;
|
||||
3
src/loop_iterations.hpp
Normal file
3
src/loop_iterations.hpp
Normal file
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
|
||||
constexpr int loop_iterations = 1725;
|
||||
874
src/metric.cpp
Normal file
874
src/metric.cpp
Normal file
@@ -0,0 +1,874 @@
|
||||
#include "metric.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <simdutf.h>
|
||||
|
||||
#include "format.hpp"
|
||||
|
||||
// TODO fix static initialization order fiasco
|
||||
|
||||
// Verify that malloc provides sufficient alignment for atomic 128-bit
|
||||
// operations
|
||||
static_assert(__STDCPP_DEFAULT_NEW_ALIGNMENT__ >= 16,
|
||||
"Default new alignment must be at least 16 bytes for atomic "
|
||||
"128-bit stores");
|
||||
|
||||
// WeaselDB Metrics System Design:
|
||||
//
|
||||
// THREADING MODEL:
|
||||
// - Counters and Histograms: Per-thread storage, single writer per thread
|
||||
// - Gauges: Global storage with mutex protection (multi-writer)
|
||||
//
|
||||
// PRECISION STRATEGY:
|
||||
// - Use atomic<uint64_t> for lock-free storage
|
||||
// - Store doubles using std::bit_cast to uint64_t (preserves full IEEE 754
|
||||
// precision)
|
||||
// - Single writer assumption allows simple load/store without CAS loops
|
||||
//
|
||||
// MEMORY MODEL:
|
||||
// - Thread-local metrics auto-cleanup on thread destruction
|
||||
// - Global metrics (gauges) persist for application lifetime
|
||||
// - Histogram buckets are sorted, deduplicated, and include +Inf bucket
|
||||
|
||||
namespace metric {
|
||||
|
||||
// Validation helper that works in both debug and release builds
|
||||
static void validate_or_abort(bool condition, const char *message,
|
||||
const char *value) {
|
||||
if (!condition) {
|
||||
std::fprintf(stderr, "WeaselDB metric validation failed: %s: '%s'\n",
|
||||
message, value);
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Labels key for second level of map
|
||||
struct LabelsKey {
|
||||
std::vector<std::pair<std::string, std::string>> labels;
|
||||
|
||||
LabelsKey(std::vector<std::pair<std::string, std::string>> l)
|
||||
: labels(std::move(l)) {
|
||||
// Validate all label keys and values
|
||||
for (const auto &[key, value] : labels) {
|
||||
validate_or_abort(is_valid_label_key(key), "invalid label key",
|
||||
key.c_str());
|
||||
validate_or_abort(is_valid_label_value(value), "invalid label value",
|
||||
value.c_str());
|
||||
}
|
||||
|
||||
// Sort labels by key for Prometheus compatibility
|
||||
std::sort(labels.begin(), labels.end(),
|
||||
[](const auto &a, const auto &b) { return a.first < b.first; });
|
||||
}
|
||||
|
||||
bool operator==(const LabelsKey &other) const {
|
||||
return labels == other.labels;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace metric
|
||||
|
||||
namespace std {
|
||||
template <> struct hash<metric::LabelsKey> {
|
||||
std::size_t operator()(const metric::LabelsKey &k) const {
|
||||
std::size_t hash_value = 0;
|
||||
for (const auto &[key, value] : k.labels) {
|
||||
// Combine hashes using a simple but effective method
|
||||
hash_value ^= std::hash<std::string>{}(key) + 0x9e3779b9 +
|
||||
(hash_value << 6) + (hash_value >> 2);
|
||||
hash_value ^= std::hash<std::string>{}(value) + 0x9e3779b9 +
|
||||
(hash_value << 6) + (hash_value >> 2);
|
||||
}
|
||||
return hash_value;
|
||||
}
|
||||
};
|
||||
} // namespace std
|
||||
|
||||
namespace metric {
|
||||
|
||||
// DESIGN: Store doubles in atomic<uint64_t> for lock-free operations
|
||||
// - Preserves full IEEE 754 double precision (no truncation)
|
||||
// - Allows atomic load/store without locks
|
||||
// - Use std::bit_cast for safe conversion between double and uint64_t
|
||||
|
||||
// Family::State structures own the second-level maps (labels -> instances)
|
||||
template <> struct Family<Counter>::State {
|
||||
std::string name;
|
||||
std::string help;
|
||||
|
||||
struct PerThreadState {
|
||||
std::unordered_map<LabelsKey, std::unique_ptr<Counter::State>> instances;
|
||||
};
|
||||
std::unordered_map<std::thread::id, PerThreadState> perThreadState;
|
||||
|
||||
// Callback-based metrics (global, not per-thread)
|
||||
std::unordered_map<LabelsKey, MetricCallback<Counter>> callbacks;
|
||||
};
|
||||
|
||||
template <> struct Family<Gauge>::State {
|
||||
std::string name;
|
||||
std::string help;
|
||||
std::unordered_map<LabelsKey, std::unique_ptr<Gauge::State>> instances;
|
||||
|
||||
// Callback-based metrics
|
||||
std::unordered_map<LabelsKey, MetricCallback<Gauge>> callbacks;
|
||||
};
|
||||
|
||||
template <> struct Family<Histogram>::State {
|
||||
std::string name;
|
||||
std::string help;
|
||||
std::vector<double> buckets;
|
||||
|
||||
struct PerThreadState {
|
||||
std::unordered_map<LabelsKey, std::unique_ptr<Histogram::State>> instances;
|
||||
};
|
||||
std::unordered_map<std::thread::id, PerThreadState> perThreadState;
|
||||
|
||||
// Note: No callbacks map - histograms don't support callback-based metrics
|
||||
};
|
||||
|
||||
// Counter: Thread-local, monotonically increasing, single writer per thread
|
||||
struct Counter::State {
|
||||
double value; // Single writer, no atomics needed
|
||||
friend struct Metric;
|
||||
};
|
||||
|
||||
// Gauge: Global, can increase/decrease, multiple writers (uses atomic CAS).
|
||||
// TODO slow under contention.
|
||||
struct Gauge::State {
|
||||
std::atomic<uint64_t> value; // Stores double as uint64_t bits, lock-free
|
||||
friend struct Metric;
|
||||
};
|
||||
|
||||
// Histogram: Thread-local buckets, single writer per thread
|
||||
struct Histogram::State {
|
||||
std::vector<double>
|
||||
thresholds; // Bucket boundaries (sorted, deduplicated, includes +Inf)
|
||||
std::vector<uint64_t> counts; // Count per bucket - single writer, malloc
|
||||
// provides 16-byte alignment
|
||||
// TODO this should just be a double like in counter
|
||||
std::atomic<uint64_t>
|
||||
sum; // Sum of observations (double stored as uint64_t bits)
|
||||
std::atomic<uint64_t> observations; // Total observation count (uint64_t)
|
||||
friend struct Metric;
|
||||
};
|
||||
|
||||
struct Metric {
|
||||
static std::mutex mutex;
|
||||
|
||||
// Two-level map: name -> Family
|
||||
static std::unordered_map<std::string,
|
||||
std::unique_ptr<Family<Counter>::State>>
|
||||
counterFamilies;
|
||||
static std::unordered_map<std::string, std::unique_ptr<Family<Gauge>::State>>
|
||||
gaugeFamilies;
|
||||
static std::unordered_map<std::string,
|
||||
std::unique_ptr<Family<Histogram>::State>>
|
||||
histogramFamilies;
|
||||
|
||||
// Thread cleanup for per-family thread-local storage
|
||||
struct ThreadInit {
|
||||
ThreadInit() {
|
||||
// Thread registration happens lazily when metrics are created
|
||||
}
|
||||
~ThreadInit() {
|
||||
// TODO we need to accumulate this threads counts into a global. Otherwise
|
||||
// it can go backwards when we destroy a thread.
|
||||
|
||||
// Clean up this thread's storage from all families
|
||||
std::unique_lock<std::mutex> _{mutex};
|
||||
auto thread_id = std::this_thread::get_id();
|
||||
|
||||
// Clean up counter families
|
||||
for (auto &[name, family] : counterFamilies) {
|
||||
family->perThreadState.erase(thread_id);
|
||||
}
|
||||
|
||||
// Clean up histogram families
|
||||
for (auto &[name, family] : histogramFamilies) {
|
||||
family->perThreadState.erase(thread_id);
|
||||
}
|
||||
|
||||
// Gauges are global, no per-thread cleanup needed
|
||||
}
|
||||
};
|
||||
static thread_local ThreadInit thread_init;
|
||||
|
||||
// Thread cleanup now handled by ThreadInit RAII
|
||||
|
||||
static Counter create_counter_instance(
|
||||
Family<Counter> *family,
|
||||
const std::vector<std::pair<std::string, std::string>> &labels) {
|
||||
std::unique_lock<std::mutex> _{mutex};
|
||||
LabelsKey key{labels};
|
||||
|
||||
// Validate that labels aren't already registered as callback
|
||||
validate_or_abort(
|
||||
family->p->callbacks.find(key) == family->p->callbacks.end(),
|
||||
"labels already registered as callback",
|
||||
key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
|
||||
|
||||
auto &ptr =
|
||||
family->p->perThreadState[std::this_thread::get_id()].instances[key];
|
||||
if (!ptr) {
|
||||
ptr = std::make_unique<Counter::State>();
|
||||
ptr->value = 0.0;
|
||||
}
|
||||
Counter result;
|
||||
result.p = ptr.get();
|
||||
return result;
|
||||
}
|
||||
|
||||
static Gauge create_gauge_instance(
|
||||
Family<Gauge> *family,
|
||||
const std::vector<std::pair<std::string, std::string>> &labels) {
|
||||
std::unique_lock<std::mutex> _{mutex};
|
||||
LabelsKey key{labels};
|
||||
|
||||
// Validate that labels aren't already registered as callback
|
||||
validate_or_abort(
|
||||
family->p->callbacks.find(key) == family->p->callbacks.end(),
|
||||
"labels already registered as callback",
|
||||
key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
|
||||
|
||||
auto &ptr = family->p->instances[key];
|
||||
if (!ptr) {
|
||||
ptr = std::make_unique<Gauge::State>();
|
||||
ptr->value.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
Gauge result;
|
||||
result.p = ptr.get();
|
||||
return result;
|
||||
}
|
||||
|
||||
static Histogram create_histogram_instance(
|
||||
Family<Histogram> *family,
|
||||
const std::vector<std::pair<std::string, std::string>> &labels) {
|
||||
std::unique_lock<std::mutex> _{mutex};
|
||||
LabelsKey key{labels};
|
||||
auto &ptr =
|
||||
family->p->perThreadState[std::this_thread::get_id()].instances[key];
|
||||
if (!ptr) {
|
||||
ptr = std::make_unique<Histogram::State>();
|
||||
// DESIGN: Prometheus-compatible histogram buckets
|
||||
// Use buckets from family configuration
|
||||
ptr->thresholds = family->p->buckets; // Already sorted and deduplicated
|
||||
|
||||
// Single writer semantics - no atomics needed for bucket counts
|
||||
ptr->counts = std::vector<uint64_t>(ptr->thresholds.size(), 0);
|
||||
ptr->sum.store(0, std::memory_order_relaxed);
|
||||
ptr->observations.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
Histogram result;
|
||||
result.p = ptr.get();
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
Counter::Counter() = default;
|
||||
|
||||
void Counter::inc(double x) {
|
||||
// DESIGN: Single writer per thread allows simple increment
|
||||
// No atomics needed since only one thread writes to this counter
|
||||
auto new_value = p->value + x;
|
||||
|
||||
// Validate monotonic property (counter never decreases)
|
||||
if (new_value < p->value) [[unlikely]] {
|
||||
validate_or_abort(false, "counter value overflow/wraparound detected",
|
||||
std::to_string(new_value).c_str());
|
||||
}
|
||||
|
||||
__atomic_store(&p->value, &new_value, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
Gauge::Gauge() = default;
|
||||
|
||||
void Gauge::inc(double x) {
|
||||
// Lock-free increment using CAS loop
|
||||
uint64_t expected = p->value.load(std::memory_order_relaxed);
|
||||
uint64_t desired;
|
||||
do {
|
||||
double current_value = std::bit_cast<double>(expected);
|
||||
double new_value = current_value + x;
|
||||
desired = std::bit_cast<uint64_t>(new_value);
|
||||
} while (!p->value.compare_exchange_weak(expected, desired,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
void Gauge::dec(double x) {
|
||||
// Lock-free decrement using CAS loop
|
||||
uint64_t expected = p->value.load(std::memory_order_relaxed);
|
||||
uint64_t desired;
|
||||
do {
|
||||
double current_value = std::bit_cast<double>(expected);
|
||||
double new_value = current_value - x;
|
||||
desired = std::bit_cast<uint64_t>(new_value);
|
||||
} while (!p->value.compare_exchange_weak(expected, desired,
|
||||
std::memory_order_relaxed));
|
||||
}
|
||||
void Gauge::set(double x) {
|
||||
// Simple atomic store for set operation
|
||||
p->value.store(std::bit_cast<uint64_t>(x), std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
Histogram::Histogram() = default;
|
||||
|
||||
// Vectorized histogram bucket updates using single-writer + atomic-read design
|
||||
// Since histograms have single-writer semantics, we can use architecturally
|
||||
// atomic stores
|
||||
|
||||
#ifdef __x86_64__
|
||||
// x86-64: 128-bit vectorized with inline assembly atomic stores
|
||||
__attribute__((target("avx"))) static void
|
||||
update_histogram_buckets(const std::vector<double> &thresholds,
|
||||
std::vector<uint64_t> &counts, double x,
|
||||
size_t start_idx) {
|
||||
const size_t size = thresholds.size();
|
||||
size_t i = start_idx;
|
||||
|
||||
// Process 2 buckets at a time with 128-bit vectors + inline assembly
|
||||
const __m128d x_vec = _mm_set1_pd(x);
|
||||
|
||||
for (; i + 2 <= size; i += 2) {
|
||||
// Ensure alignment for atomic guarantee (malloc provides 16-byte alignment)
|
||||
assert((reinterpret_cast<uintptr_t>(static_cast<void *>(&counts[i])) &
|
||||
15) == 0 &&
|
||||
"counts array must be 16-byte aligned for atomic 128-bit stores");
|
||||
|
||||
// 128-bit vectorized comparison and arithmetic
|
||||
__m128d thresholds_vec = _mm_loadu_pd(&thresholds[i]);
|
||||
__m128d cmp_result = _mm_cmp_pd(x_vec, thresholds_vec, _CMP_LE_OQ);
|
||||
__m128i cmp_as_int = _mm_castpd_si128(cmp_result);
|
||||
__m128i ones = _mm_set1_epi64x(1);
|
||||
__m128i increments = _mm_and_si128(cmp_as_int, ones);
|
||||
|
||||
// Load current counts and add increments
|
||||
__m128i current_counts = _mm_load_si128((__m128i *)&counts[i]);
|
||||
__m128i updated_counts = _mm_add_epi64(current_counts, increments);
|
||||
|
||||
// Processors that enumerate support for Intel® AVX (by setting the feature
|
||||
// flag CPUID.01H:ECX.AVX[bit 28])
|
||||
// guarantee that the 16-byte memory operations performed by the
|
||||
// following instructions will always be carried out atomically: â¢
|
||||
// MOVAPD, MOVAPS, and MOVDQA. ⢠VMOVAPD, VMOVAPS, and VMOVDQA when
|
||||
// encoded with VEX.128. ⢠VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64
|
||||
// when encoded with EVEX.128 and k0 (masking disabled). (Note that
|
||||
// these instructions require the linear addresses of their memory
|
||||
// operands to be 16-byte aligned.)
|
||||
__asm__ __volatile__(
|
||||
"vmovdqa %%xmm0, %0"
|
||||
: "=m"(*((__m128i *)&counts[i])) // Output: aligned memory location
|
||||
: "x"(updated_counts) // Input: xmm register
|
||||
: "memory" // Memory clobber
|
||||
);
|
||||
// We don't actually need it to be atomic across 128-bits, but that's
|
||||
// sufficient to guarantee each 64 bit half is atomic.
|
||||
}
|
||||
|
||||
// Handle remainder with atomic stores
|
||||
for (; i < size; ++i) {
|
||||
if (x <= thresholds[i]) {
|
||||
__atomic_store_n(&counts[i], counts[i] + 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// Fallback implementation for non-x86 architectures
|
||||
static void
|
||||
update_histogram_buckets_vectorized(const std::vector<double> &thresholds,
|
||||
std::vector<uint64_t> &counts, double x,
|
||||
size_t start_idx) {
|
||||
const size_t size = thresholds.size();
|
||||
|
||||
// Scalar implementation with atomic stores for TSAN compatibility
|
||||
for (size_t i = start_idx; i < size; ++i) {
|
||||
if (x <= thresholds[i]) {
|
||||
__atomic_store_n(&counts[i], counts[i] + 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void Histogram::observe(double x) {
|
||||
assert(p->thresholds.size() == p->counts.size());
|
||||
|
||||
update_histogram_buckets(p->thresholds, p->counts, x, 0);
|
||||
|
||||
// DESIGN: Single writer per thread allows simple load-modify-store for sum
|
||||
// No CAS loop needed since only one thread writes to this histogram
|
||||
auto current_sum =
|
||||
std::bit_cast<double>(p->sum.load(std::memory_order_relaxed));
|
||||
p->sum.store(std::bit_cast<uint64_t>(current_sum + x),
|
||||
std::memory_order_relaxed);
|
||||
|
||||
p->observations.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <> Family<Counter>::Family() = default;
|
||||
template <> Family<Gauge>::Family() = default;
|
||||
template <> Family<Histogram>::Family() = default;
|
||||
|
||||
template <>
|
||||
Counter Family<Counter>::create(
|
||||
std::vector<std::pair<std::string, std::string>> labels) {
|
||||
return Metric::create_counter_instance(this, labels);
|
||||
}
|
||||
|
||||
template <>
|
||||
Gauge Family<Gauge>::create(
|
||||
std::vector<std::pair<std::string, std::string>> labels) {
|
||||
return Metric::create_gauge_instance(this, labels);
|
||||
}
|
||||
|
||||
template <>
|
||||
Histogram Family<Histogram>::create(
|
||||
std::vector<std::pair<std::string, std::string>> labels) {
|
||||
return Metric::create_histogram_instance(this, labels);
|
||||
}
|
||||
|
||||
Family<Counter> create_counter(std::string name, std::string help) {
|
||||
validate_or_abort(is_valid_metric_name(name), "invalid counter name",
|
||||
name.c_str());
|
||||
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
auto &familyPtr = Metric::counterFamilies[name];
|
||||
if (!familyPtr) {
|
||||
familyPtr = std::make_unique<Family<Counter>::State>();
|
||||
familyPtr->name = std::move(name);
|
||||
familyPtr->help = std::move(help);
|
||||
} else {
|
||||
validate_or_abort(
|
||||
familyPtr->help == help,
|
||||
"metric family already registered with different help text",
|
||||
name.c_str());
|
||||
}
|
||||
Family<Counter> family;
|
||||
family.p = familyPtr.get();
|
||||
return family;
|
||||
}
|
||||
|
||||
Family<Gauge> create_gauge(std::string name, std::string help) {
|
||||
validate_or_abort(is_valid_metric_name(name), "invalid gauge name",
|
||||
name.c_str());
|
||||
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
auto &familyPtr = Metric::gaugeFamilies[name];
|
||||
if (!familyPtr) {
|
||||
familyPtr = std::make_unique<Family<Gauge>::State>();
|
||||
familyPtr->name = std::move(name);
|
||||
familyPtr->help = std::move(help);
|
||||
} else {
|
||||
validate_or_abort(
|
||||
familyPtr->help == help,
|
||||
"metric family already registered with different help text",
|
||||
name.c_str());
|
||||
}
|
||||
Family<Gauge> family;
|
||||
family.p = familyPtr.get();
|
||||
return family;
|
||||
}
|
||||
|
||||
Family<Histogram> create_histogram(std::string name, std::string help,
|
||||
std::span<const double> buckets) {
|
||||
validate_or_abort(is_valid_metric_name(name), "invalid histogram name",
|
||||
name.c_str());
|
||||
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
auto &familyPtr = Metric::histogramFamilies[name];
|
||||
if (!familyPtr) {
|
||||
familyPtr = std::make_unique<Family<Histogram>::State>();
|
||||
familyPtr->name = std::move(name);
|
||||
familyPtr->help = std::move(help);
|
||||
|
||||
// DESIGN: Prometheus-compatible histogram buckets
|
||||
familyPtr->buckets = std::vector<double>(buckets.begin(), buckets.end());
|
||||
std::sort(familyPtr->buckets.begin(), familyPtr->buckets.end());
|
||||
familyPtr->buckets.erase(
|
||||
std::unique(familyPtr->buckets.begin(), familyPtr->buckets.end()),
|
||||
familyPtr->buckets.end());
|
||||
// +Inf bucket captures all observations (Prometheus requirement)
|
||||
if (familyPtr->buckets.empty() ||
|
||||
familyPtr->buckets.back() != std::numeric_limits<double>::infinity()) {
|
||||
familyPtr->buckets.push_back(std::numeric_limits<double>::infinity());
|
||||
}
|
||||
} else {
|
||||
validate_or_abort(
|
||||
familyPtr->help == help,
|
||||
"metric family already registered with different help text",
|
||||
name.c_str());
|
||||
std::vector<double> new_buckets_vec(buckets.begin(), buckets.end());
|
||||
std::sort(new_buckets_vec.begin(), new_buckets_vec.end());
|
||||
new_buckets_vec.erase(
|
||||
std::unique(new_buckets_vec.begin(), new_buckets_vec.end()),
|
||||
new_buckets_vec.end());
|
||||
if (new_buckets_vec.empty() ||
|
||||
new_buckets_vec.back() != std::numeric_limits<double>::infinity()) {
|
||||
new_buckets_vec.push_back(std::numeric_limits<double>::infinity());
|
||||
}
|
||||
validate_or_abort(familyPtr->buckets == new_buckets_vec,
|
||||
"metric family already registered with different buckets",
|
||||
name.c_str());
|
||||
}
|
||||
Family<Histogram> family;
|
||||
family.p = familyPtr.get();
|
||||
return family;
|
||||
}
|
||||
|
||||
std::vector<double> linear_buckets(double start, double width, int count) {
|
||||
validate_or_abort(width > 0, "linear bucket width must be positive",
|
||||
std::to_string(width).c_str());
|
||||
validate_or_abort(count >= 0, "linear bucket count must be non-negative",
|
||||
std::to_string(count).c_str());
|
||||
|
||||
std::vector<double> buckets;
|
||||
buckets.reserve(count);
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
buckets.push_back(start + i * width);
|
||||
}
|
||||
|
||||
return buckets;
|
||||
}
|
||||
|
||||
std::vector<double> exponential_buckets(double start, double factor,
|
||||
int count) {
|
||||
validate_or_abort(start > 0, "exponential bucket start must be positive",
|
||||
std::to_string(start).c_str());
|
||||
validate_or_abort(factor > 1, "exponential bucket factor must be > 1",
|
||||
std::to_string(factor).c_str());
|
||||
validate_or_abort(count >= 0, "exponential bucket count must be non-negative",
|
||||
std::to_string(count).c_str());
|
||||
|
||||
std::vector<double> buckets;
|
||||
buckets.reserve(count);
|
||||
|
||||
double current = start;
|
||||
for (int i = 0; i < count; ++i) {
|
||||
buckets.push_back(current);
|
||||
current *= factor;
|
||||
}
|
||||
|
||||
return buckets;
|
||||
}
|
||||
|
||||
// Prometheus validation functions
|
||||
// Metric names must match [a-zA-Z_:][a-zA-Z0-9_:]*
|
||||
bool is_valid_metric_name(const std::string &name) {
|
||||
if (name.empty())
|
||||
return false;
|
||||
|
||||
// First character must be letter, underscore, or colon
|
||||
char first = name[0];
|
||||
if (!std::isalpha(first) && first != '_' && first != ':') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Remaining characters must be alphanumeric, underscore, or colon
|
||||
for (size_t i = 1; i < name.size(); ++i) {
|
||||
char c = name[i];
|
||||
if (!std::isalnum(c) && c != '_' && c != ':') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Label keys must match [a-zA-Z_][a-zA-Z0-9_]*
|
||||
bool is_valid_label_key(const std::string &key) {
|
||||
if (key.empty())
|
||||
return false;
|
||||
|
||||
// First character must be letter or underscore
|
||||
char first = key[0];
|
||||
if (!std::isalpha(first) && first != '_') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Remaining characters must be alphanumeric or underscore
|
||||
for (size_t i = 1; i < key.size(); ++i) {
|
||||
char c = key[i];
|
||||
if (!std::isalnum(c) && c != '_') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Label keys starting with __ are reserved for internal use
|
||||
if (key.size() >= 2 && key[0] == '_' && key[1] == '_') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Label values can contain any UTF-8 characters (no specific restrictions)
|
||||
bool is_valid_label_value(const std::string &value) {
|
||||
// Prometheus allows any UTF-8 string as label value
|
||||
// Validate UTF-8 encoding for correctness using simdutf
|
||||
return simdutf::validate_utf8(value.c_str(), value.size());
|
||||
}
|
||||
|
||||
std::span<std::string_view> render(ArenaAllocator &arena) {
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
|
||||
std::vector<std::string_view> output;
|
||||
|
||||
auto format_labels =
|
||||
[&](const std::vector<std::pair<std::string_view, std::string_view>>
|
||||
&labels) -> std::string_view {
|
||||
if (labels.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
size_t required_size = 2; // {}
|
||||
for (const auto &[key, value] : labels) {
|
||||
required_size += key.length() + 3 + value.length(); // key="value"
|
||||
for (char c : value) {
|
||||
if (c == '\\' || c == '"' || c == '\n') {
|
||||
required_size++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!labels.empty()) {
|
||||
required_size += labels.size() - 1; // commas
|
||||
}
|
||||
|
||||
char *buf = arena.allocate<char>(required_size);
|
||||
char *p = buf;
|
||||
|
||||
*p++ = '{';
|
||||
for (size_t i = 0; i < labels.size(); ++i) {
|
||||
if (i > 0)
|
||||
*p++ = ',';
|
||||
std::memcpy(p, labels[i].first.data(), labels[i].first.length());
|
||||
p += labels[i].first.length();
|
||||
*p++ = '=';
|
||||
*p++ = '"';
|
||||
for (char c : labels[i].second) {
|
||||
switch (c) {
|
||||
case '\\':
|
||||
*p++ = '\\';
|
||||
*p++ = '\\';
|
||||
break;
|
||||
case '"':
|
||||
*p++ = '\\';
|
||||
*p++ = '"';
|
||||
break;
|
||||
case '\n':
|
||||
*p++ = '\\';
|
||||
*p++ = 'n';
|
||||
break;
|
||||
default:
|
||||
*p++ = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*p++ = '"';
|
||||
}
|
||||
*p++ = '}';
|
||||
return std::string_view(buf, p - buf);
|
||||
};
|
||||
|
||||
// Render counters
|
||||
for (const auto &[name, family] : Metric::counterFamilies) {
|
||||
output.push_back(
|
||||
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
|
||||
output.push_back(format(arena, "# TYPE %s counter\n", name.c_str()));
|
||||
|
||||
std::vector<std::pair<std::string_view, std::string_view>> labels_sv;
|
||||
for (const auto &[labels_key, callback] : family->callbacks) {
|
||||
auto value = callback();
|
||||
labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
labels_sv.push_back(l);
|
||||
auto labels = format_labels(labels_sv);
|
||||
output.push_back(format(arena, "%.*s%.*s %.17g\n",
|
||||
static_cast<int>(name.length()), name.data(),
|
||||
static_cast<int>(labels.length()), labels.data(),
|
||||
value));
|
||||
}
|
||||
|
||||
for (const auto &[thread_id, per_thread] : family->perThreadState) {
|
||||
for (const auto &[labels_key, instance] : per_thread.instances) {
|
||||
// Atomic read from render thread - single writer doesn't need atomic
|
||||
// writes
|
||||
double value;
|
||||
__atomic_load(&instance->value, &value, __ATOMIC_RELAXED);
|
||||
labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
labels_sv.push_back(l);
|
||||
auto labels = format_labels(labels_sv);
|
||||
output.push_back(format(arena, "%.*s%.*s %.17g\n",
|
||||
static_cast<int>(name.length()), name.data(),
|
||||
static_cast<int>(labels.length()),
|
||||
labels.data(), value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render gauges
|
||||
for (const auto &[name, family] : Metric::gaugeFamilies) {
|
||||
output.push_back(
|
||||
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
|
||||
output.push_back(format(arena, "# TYPE %s gauge\n", name.c_str()));
|
||||
|
||||
std::vector<std::pair<std::string_view, std::string_view>> labels_sv;
|
||||
for (const auto &[labels_key, callback] : family->callbacks) {
|
||||
auto value = callback();
|
||||
labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
labels_sv.push_back(l);
|
||||
auto labels = format_labels(labels_sv);
|
||||
output.push_back(format(arena, "%.*s%.*s %.17g\n",
|
||||
static_cast<int>(name.length()), name.data(),
|
||||
static_cast<int>(labels.length()), labels.data(),
|
||||
value));
|
||||
}
|
||||
|
||||
for (const auto &[labels_key, instance] : family->instances) {
|
||||
auto value = std::bit_cast<double>(
|
||||
instance->value.load(std::memory_order_relaxed));
|
||||
labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
labels_sv.push_back(l);
|
||||
auto labels = format_labels(labels_sv);
|
||||
output.push_back(format(arena, "%.*s%.*s %.17g\n",
|
||||
static_cast<int>(name.length()), name.data(),
|
||||
static_cast<int>(labels.length()), labels.data(),
|
||||
value));
|
||||
}
|
||||
}
|
||||
|
||||
// Render histograms
|
||||
for (const auto &[name, family] : Metric::histogramFamilies) {
|
||||
output.push_back(
|
||||
format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
|
||||
output.push_back(format(arena, "# TYPE %s histogram\n", name.c_str()));
|
||||
|
||||
std::vector<std::pair<std::string_view, std::string_view>> bucket_labels_sv;
|
||||
for (const auto &[thread_id, per_thread] : family->perThreadState) {
|
||||
for (const auto &[labels_key, instance] : per_thread.instances) {
|
||||
for (size_t i = 0; i < instance->thresholds.size(); ++i) {
|
||||
bucket_labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
bucket_labels_sv.push_back(l);
|
||||
|
||||
if (std::isinf(instance->thresholds[i])) {
|
||||
bucket_labels_sv.push_back({"le", "+Inf"});
|
||||
} else {
|
||||
bucket_labels_sv.push_back(
|
||||
{"le", format(arena, "%.17g", instance->thresholds[i])});
|
||||
}
|
||||
// Atomic read from render thread - single writer doesn't need atomic
|
||||
// writes
|
||||
auto count = __atomic_load_n(&instance->counts[i], __ATOMIC_RELAXED);
|
||||
auto labels = format_labels(bucket_labels_sv);
|
||||
output.push_back(format(arena, "%s_bucket%.*s %llu\n", name.c_str(),
|
||||
static_cast<int>(labels.length()),
|
||||
labels.data(),
|
||||
static_cast<unsigned long long>(count)));
|
||||
}
|
||||
|
||||
auto sum_value = std::bit_cast<double>(
|
||||
instance->sum.load(std::memory_order_relaxed));
|
||||
bucket_labels_sv.clear();
|
||||
for (const auto &l : labels_key.labels)
|
||||
bucket_labels_sv.push_back(l);
|
||||
auto labels = format_labels(bucket_labels_sv);
|
||||
output.push_back(format(arena, "%s_sum%.*s %.17g\n", name.c_str(),
|
||||
static_cast<int>(labels.length()),
|
||||
labels.data(), sum_value));
|
||||
|
||||
auto count_value =
|
||||
instance->observations.load(std::memory_order_relaxed);
|
||||
output.push_back(format(arena, "%s_count%.*s %llu\n", name.c_str(),
|
||||
static_cast<int>(labels.length()),
|
||||
labels.data(),
|
||||
static_cast<unsigned long long>(count_value)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto result = arena.allocate<std::string_view>(output.size());
|
||||
std::copy(output.begin(), output.end(), result);
|
||||
return std::span<std::string_view>(result, output.size());
|
||||
}
|
||||
|
||||
// Template specialization implementations for register_callback
|
||||
template <>
|
||||
void Family<Counter>::register_callback(
|
||||
std::vector<std::pair<std::string, std::string>> labels,
|
||||
MetricCallback<Counter> callback) {
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
LabelsKey key{std::move(labels)};
|
||||
|
||||
// Validate that labels aren't already in use by create() calls
|
||||
for (const auto &[thread_id, per_thread] : p->perThreadState) {
|
||||
validate_or_abort(
|
||||
per_thread.instances.find(key) == per_thread.instances.end(),
|
||||
"labels already registered as static instance",
|
||||
key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
|
||||
}
|
||||
|
||||
// Validate that callback isn't already registered for these labels
|
||||
validate_or_abort(p->callbacks.find(key) == p->callbacks.end(),
|
||||
"callback already registered for labels",
|
||||
key.labels.empty() ? "(no labels)"
|
||||
: key.labels[0].first.c_str());
|
||||
|
||||
p->callbacks[std::move(key)] = std::move(callback);
|
||||
}
|
||||
|
||||
template <>
|
||||
void Family<Gauge>::register_callback(
|
||||
std::vector<std::pair<std::string, std::string>> labels,
|
||||
MetricCallback<Gauge> callback) {
|
||||
std::unique_lock<std::mutex> _{Metric::mutex};
|
||||
LabelsKey key{std::move(labels)};
|
||||
|
||||
// Validate that labels aren't already in use by create() calls
|
||||
validate_or_abort(p->instances.find(key) == p->instances.end(),
|
||||
"labels already registered as static instance",
|
||||
key.labels.empty() ? "(no labels)"
|
||||
: key.labels[0].first.c_str());
|
||||
|
||||
// Validate that callback isn't already registered for these labels
|
||||
validate_or_abort(p->callbacks.find(key) == p->callbacks.end(),
|
||||
"callback already registered for labels",
|
||||
key.labels.empty() ? "(no labels)"
|
||||
: key.labels[0].first.c_str());
|
||||
|
||||
p->callbacks[std::move(key)] = std::move(callback);
|
||||
}
|
||||
|
||||
// Explicit template instantiations to provide member implementations
|
||||
|
||||
// Static member definitions
|
||||
std::mutex Metric::mutex;
|
||||
std::unordered_map<std::string, std::unique_ptr<Family<Counter>::State>>
|
||||
Metric::counterFamilies;
|
||||
std::unordered_map<std::string, std::unique_ptr<Family<Gauge>::State>>
|
||||
Metric::gaugeFamilies;
|
||||
std::unordered_map<std::string, std::unique_ptr<Family<Histogram>::State>>
|
||||
Metric::histogramFamilies;
|
||||
thread_local Metric::ThreadInit Metric::thread_init;
|
||||
|
||||
} // namespace metric
|
||||
200
src/metric.hpp
Normal file
200
src/metric.hpp
Normal file
@@ -0,0 +1,200 @@
|
||||
#pragma once
|
||||
|
||||
// WeaselDB Metrics System
|
||||
//
|
||||
// High-performance metrics collection with Prometheus-compatible output.
|
||||
//
|
||||
// DESIGN PRINCIPLES:
|
||||
// - Single-writer semantics: Each metric instance bound to creating thread
|
||||
// - Lock-free operations using atomic<uint64_t> storage for doubles
|
||||
// - Full IEEE 754 double precision preservation via bit reinterpretation
|
||||
// - Single global registry: All metrics registered in one global namespace
|
||||
//
|
||||
// CRITICAL THREAD SAFETY CONSTRAINT:
|
||||
// Each metric instance has exactly ONE writer thread (the creating thread).
|
||||
// It is undefined behavior to call inc()/dec()/set()/observe() from a different
|
||||
// thread.
|
||||
//
|
||||
// REGISTRY MODEL:
|
||||
// This implementation uses a single global registry for all metrics, unlike
|
||||
// typical Prometheus client libraries that support multiple registries.
|
||||
// This design choice prioritizes simplicity and performance over flexibility.
|
||||
//
|
||||
// METRIC LIFECYCLE:
|
||||
// Metrics are created once and persist for the application lifetime. There is
|
||||
// no unregistration mechanism - this prevents accidental metric loss and
|
||||
// simplifies the implementation.
|
||||
//
|
||||
// USAGE:
|
||||
// auto counter_family = metric::create_counter("requests_total", "Total
|
||||
// requests"); auto counter = counter_family.create({{"method", "GET"}}); //
|
||||
// Bound to this thread counter.inc(1.0); // ONLY call from creating thread
|
||||
//
|
||||
// auto histogram_family = metric::create_histogram("latency", "Request
|
||||
// latency", {0.1, 0.5, 1.0}); auto histogram =
|
||||
// histogram_family.create({{"endpoint", "/api"}}); // Bound to this thread
|
||||
// histogram.observe(0.25); // ONLY call from creating thread
|
||||
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "arena_allocator.hpp"
|
||||
|
||||
namespace metric {
|
||||
|
||||
// Forward declarations
|
||||
template <typename T> struct Family;
|
||||
|
||||
// Callback function type for dynamic metric values
|
||||
// Called during render() to get current metric value
|
||||
// THREAD SAFETY: May be called from arbitrary thread, but serialized by
|
||||
// render() mutex - no need to be thread-safe internally
|
||||
template <typename T> using MetricCallback = std::function<double()>;
|
||||
|
||||
// Counter: A metric value that only increases.
|
||||
//
|
||||
// THREAD SAFETY RULES:
|
||||
// 1. Do not call inc() on the same Counter object from multiple threads.
|
||||
// Each object must have only one writer thread.
|
||||
// 2. To use Counters concurrently, each thread must create its own Counter
|
||||
// object.
|
||||
// 3. When rendered, the values of all Counter objects with the same labels
|
||||
// are summed together into a single total.
|
||||
struct Counter {
|
||||
void inc(double = 1.0); // Increment counter (must be >= 0)
|
||||
|
||||
private:
|
||||
Counter();
|
||||
friend struct Metric;
|
||||
template <class> friend struct Family;
|
||||
struct State;
|
||||
State *p;
|
||||
};
|
||||
|
||||
// Gauge: A metric value that can be set, increased, or decreased.
|
||||
//
|
||||
// THREAD SAFETY RULES:
|
||||
// 1. Do not call inc(), dec(), or set() on the same Gauge object from
|
||||
// multiple threads. Each object must have only one writer thread.
|
||||
// 2. To use Gauges concurrently, each thread must create its own Gauge object.
|
||||
// 3. If multiple Gauge objects are created with the same labels, their
|
||||
// operations are combined. For example, increments from different objects
|
||||
// are cumulative.
|
||||
// 4. For independent gauges, create them with unique labels.
|
||||
struct Gauge {
|
||||
void inc(double = 1.0);
|
||||
void dec(double = 1.0);
|
||||
void set(double);
|
||||
|
||||
private:
|
||||
Gauge();
|
||||
friend struct Metric;
|
||||
template <class> friend struct Family;
|
||||
struct State;
|
||||
State *p;
|
||||
};
|
||||
|
||||
// Histogram: A metric that samples observations into buckets.
|
||||
//
|
||||
// THREAD SAFETY RULES:
|
||||
// 1. Do not call observe() on the same Histogram object from multiple
|
||||
// threads. Each object must have only one writer thread.
|
||||
// 2. To use Histograms concurrently, each thread must create its own
|
||||
// Histogram object.
|
||||
// 3. When rendered, the observations from all Histogram objects with the
|
||||
// same labels are combined into a single histogram.
|
||||
struct Histogram {
|
||||
void observe(double); // Record observation in appropriate bucket
|
||||
|
||||
private:
|
||||
Histogram();
|
||||
friend struct Metric;
|
||||
template <class> friend struct Family;
|
||||
struct State;
|
||||
State *p;
|
||||
};
|
||||
|
||||
// Family: Factory for creating metric instances with different label
|
||||
// combinations Each family represents one metric name with varying labels
|
||||
template <class T> struct Family {
|
||||
static_assert(std::is_same_v<T, Counter> || std::is_same_v<T, Gauge> ||
|
||||
std::is_same_v<T, Histogram>);
|
||||
|
||||
// Create metric instance with specific labels
|
||||
// Labels are sorted by key for Prometheus compatibility
|
||||
// ERROR: Will abort if labels already registered via register_callback()
|
||||
// OK: Multiple calls with same labels return same instance (idempotent)
|
||||
T create(std::vector<std::pair<std::string, std::string>> labels);
|
||||
|
||||
// Register callback-based metric (Counter and Gauge only)
|
||||
// Validates that label set isn't already taken
|
||||
void
|
||||
register_callback(std::vector<std::pair<std::string, std::string>> labels,
|
||||
MetricCallback<T> callback);
|
||||
|
||||
private:
|
||||
Family();
|
||||
friend struct Metric;
|
||||
friend Family<Counter> create_counter(std::string, std::string);
|
||||
friend Family<Gauge> create_gauge(std::string, std::string);
|
||||
friend Family<Histogram> create_histogram(std::string, std::string,
|
||||
std::span<const double>);
|
||||
|
||||
struct State;
|
||||
State *p;
|
||||
};
|
||||
|
||||
// Factory functions for creating metric families
|
||||
// IMPORTANT: name and help must point to static memory (string literals)
|
||||
|
||||
// Create counter family (monotonically increasing values)
|
||||
// ERROR: Aborts if family with same name is registered with different help
|
||||
// text.
|
||||
Family<Counter> create_counter(std::string name, std::string help);
|
||||
|
||||
// Create gauge family (can increase/decrease)
|
||||
// ERROR: Aborts if family with same name is registered with different help
|
||||
// text.
|
||||
Family<Gauge> create_gauge(std::string name, std::string help);
|
||||
|
||||
// Create histogram family with custom buckets
|
||||
// Buckets will be sorted, deduplicated, and +Inf will be added automatically
|
||||
// ERROR: Aborts if family with same name is registered with different help text
|
||||
// or buckets.
|
||||
Family<Histogram> create_histogram(std::string name, std::string help,
|
||||
std::span<const double> buckets);
|
||||
|
||||
// Helper functions for generating standard histogram buckets
|
||||
// Following Prometheus client library conventions
|
||||
|
||||
// Generate linear buckets: start, start+width, start+2*width, ...,
|
||||
// start+(count-1)*width Example: linear_buckets(0, 10, 5) = {0, 10, 20, 30, 40}
|
||||
std::vector<double> linear_buckets(double start, double width, int count);
|
||||
|
||||
// Generate exponential buckets: start, start*factor, start*factor^2, ...,
|
||||
// start*factor^(count-1) Example: exponential_buckets(1, 2, 5) = {1, 2, 4, 8,
|
||||
// 16}
|
||||
std::vector<double> exponential_buckets(double start, double factor, int count);
|
||||
|
||||
// Render all metrics in Prometheus text format
|
||||
// Returns chunks of Prometheus exposition format (includes # HELP and # TYPE
|
||||
// lines) Each string_view may contain multiple lines separated by '\n' String
|
||||
// views are NOT null-terminated - use .size() for length All string data
|
||||
// allocated in provided arena for zero-copy efficiency
|
||||
// TODO: Implement Prometheus text exposition format
|
||||
// THREAD SAFETY: Serialized by global mutex - callbacks need not be thread-safe
|
||||
std::span<std::string_view> render(ArenaAllocator &arena);
|
||||
|
||||
// Validation functions for Prometheus compatibility
|
||||
bool is_valid_metric_name(const std::string &name);
|
||||
bool is_valid_label_key(const std::string &key);
|
||||
bool is_valid_label_value(const std::string &value);
|
||||
|
||||
// Note: Histograms do not support callbacks due to their multi-value nature
|
||||
// (buckets + sum + count). Use static histogram metrics only.
|
||||
|
||||
} // namespace metric
|
||||
45
style.md
45
style.md
@@ -99,7 +99,7 @@ auto addr = reinterpret_cast<uintptr_t>(ptr); // Pointer to integer conv
|
||||
- **Complexity must be justified with benchmarks** - measure performance impact before adding complexity
|
||||
- **Strive for 0% CPU usage when idle** - avoid polling, busy waiting, or unnecessary background activity
|
||||
- Use **inline functions** for performance-critical code (e.g., `allocate_raw`)
|
||||
- **Zero-copy operations** with `std::string_view` over string copying
|
||||
- **String views** with `std::string_view` to minimize unnecessary copying
|
||||
- **Arena allocation** for efficient memory management (~1ns vs ~20-270ns for malloc)
|
||||
|
||||
### Complexity Control
|
||||
@@ -249,7 +249,7 @@ private:
|
||||
- **Parameter passing:**
|
||||
- Pass by value for types ≤ 16 bytes (int, pointers, string_view, small structs)
|
||||
- Pass by const reference for types > 16 bytes (containers, large objects)
|
||||
- **Return by value** for small types (≤ 16 bytes), **string_view** for zero-copy over strings
|
||||
- **Return by value** for small types (≤ 16 bytes), **string_view** to avoid copying strings
|
||||
- **noexcept specification** for move operations and non-throwing functions
|
||||
```cpp
|
||||
std::span<const Operation> operations() const { return operations_; }
|
||||
@@ -301,13 +301,34 @@ for (auto &precondition : preconditions_) {
|
||||
}
|
||||
```
|
||||
|
||||
### Atomic Operations
|
||||
- **Never use assignment operators** with `std::atomic` - always use explicit `store()` and `load()`
|
||||
- **Always specify memory ordering** explicitly for atomic operations
|
||||
- **Use the least restrictive correct memory ordering** - choose the weakest ordering that maintains correctness
|
||||
```cpp
|
||||
// Preferred - explicit store/load with precise memory ordering
|
||||
std::atomic<uint64_t> counter;
|
||||
counter.store(42, std::memory_order_relaxed); // Single-writer metric updates
|
||||
auto value = counter.load(std::memory_order_relaxed); // Reading metrics for display
|
||||
|
||||
counter.store(1, std::memory_order_release); // Publishing initialization
|
||||
auto ready = counter.load(std::memory_order_acquire); // Synchronizing with publisher
|
||||
|
||||
counter.store(42, std::memory_order_seq_cst); // When sequential consistency needed
|
||||
|
||||
// Avoid - assignment operators (implicit memory ordering)
|
||||
std::atomic<uint64_t> counter;
|
||||
counter = 42; // Implicit - memory ordering not explicit
|
||||
auto value = counter; // Implicit - memory ordering not explicit
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Memory Management
|
||||
|
||||
### Ownership & Allocation
|
||||
- **Arena allocators** for request-scoped memory with **STL allocator adapters** (see Performance Focus section for characteristics)
|
||||
- **String views** pointing to arena-allocated memory for zero-copy operations
|
||||
- **String views** pointing to arena-allocated memory to avoid unnecessary copying
|
||||
- **STL containers with arena allocators require default construction after arena reset** - `clear()` is not sufficient
|
||||
```cpp
|
||||
// STL containers with arena allocators - correct reset pattern
|
||||
@@ -547,6 +568,24 @@ TEST_CASE("Server accepts connections") {
|
||||
- `std::latch`, `std::barrier`, futures/promises
|
||||
- **Force concurrent execution** using `std::latch` to synchronize thread startup
|
||||
|
||||
#### Threading Checklist for Tests/Benchmarks
|
||||
|
||||
**MANDATORY: Before writing any `std::thread` or `threads.emplace_back()`:**
|
||||
|
||||
1. **Count total threads** - Include main/benchmark thread in count
|
||||
2. **Always assume concurrent execution needed** - Tests/benchmarks require real concurrency
|
||||
3. **Add `std::latch start_latch{N}`** where N = total concurrent threads
|
||||
4. **Each thread calls `start_latch.arrive_and_wait()`** before doing work
|
||||
5. **Main/benchmark thread calls `start_latch.arrive_and_wait()`** before measurement
|
||||
|
||||
**Red flags to catch immediately:**
|
||||
- ❌ Creating threads in a loop without `std::latch`
|
||||
- ❌ Background threads starting work immediately
|
||||
- ❌ Benchmark measuring before all threads synchronized
|
||||
- ❌ Any use of `sleep_for`, `wait_for`, or timeouts
|
||||
|
||||
**Simple rule:** Multiple threads = `std::latch` synchronization. No exceptions, even for "simple" background threads.
|
||||
|
||||
```cpp
|
||||
// BAD: Race likely over before threads start
|
||||
std::atomic<int> counter{0};
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
|
||||
#include "arena_allocator.hpp"
|
||||
#include "format.hpp"
|
||||
#include <cstring>
|
||||
#include <doctest/doctest.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
TEST_CASE("ArenaAllocator basic construction") {
|
||||
@@ -532,3 +534,67 @@ TEST_CASE("ArenaAllocator realloc functionality") {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("format function fallback codepath") {
|
||||
SUBCASE("single-pass optimization success") {
|
||||
ArenaAllocator arena(128);
|
||||
auto result = format(arena, "Hello %s! Number: %d", "World", 42);
|
||||
CHECK(result == "Hello World! Number: 42");
|
||||
CHECK(result.length() == 23);
|
||||
}
|
||||
|
||||
SUBCASE("fallback when speculative formatting fails") {
|
||||
// Create arena with limited space to force fallback
|
||||
ArenaAllocator arena(16);
|
||||
|
||||
// Consume most space to leave insufficient room for speculative formatting
|
||||
arena.allocate<char>(10);
|
||||
CHECK(arena.available_in_current_block() == 6);
|
||||
|
||||
// Format string larger than available space - should trigger fallback
|
||||
std::string long_string = "This is a very long string that won't fit";
|
||||
auto result = format(arena, "Prefix: %s with %d", long_string.c_str(), 123);
|
||||
|
||||
std::string expected =
|
||||
"Prefix: This is a very long string that won't fit with 123";
|
||||
CHECK(result == expected);
|
||||
CHECK(result.length() == expected.length());
|
||||
}
|
||||
|
||||
SUBCASE("edge case - exactly available space") {
|
||||
ArenaAllocator arena(32);
|
||||
arena.allocate<char>(20); // Leave 12 bytes
|
||||
CHECK(arena.available_in_current_block() == 12);
|
||||
|
||||
// Format that needs exactly available space (should still use fallback due
|
||||
// to null terminator)
|
||||
auto result = format(arena, "Test%d", 123); // "Test123" = 7 chars
|
||||
CHECK(result == "Test123");
|
||||
CHECK(result.length() == 7);
|
||||
}
|
||||
|
||||
SUBCASE("allocate_remaining_space postcondition") {
|
||||
// Test empty arena
|
||||
ArenaAllocator empty_arena(64);
|
||||
auto space1 = empty_arena.allocate_remaining_space();
|
||||
CHECK(space1.allocated_bytes >= 1);
|
||||
CHECK(space1.allocated_bytes == 64);
|
||||
|
||||
// Test full arena (should create new block)
|
||||
ArenaAllocator full_arena(32);
|
||||
full_arena.allocate<char>(32); // Fill completely
|
||||
auto space2 = full_arena.allocate_remaining_space();
|
||||
CHECK(space2.allocated_bytes >= 1);
|
||||
CHECK(space2.allocated_bytes == 32); // New block created
|
||||
}
|
||||
|
||||
SUBCASE("format error handling") {
|
||||
ArenaAllocator arena(64);
|
||||
|
||||
// Test with invalid format (should return empty string_view)
|
||||
// Note: This is hard to trigger reliably across platforms,
|
||||
// so we focus on successful cases in the other subcases
|
||||
auto result = format(arena, "Valid format: %d", 42);
|
||||
CHECK(result == "Valid format: 42");
|
||||
}
|
||||
}
|
||||
|
||||
552
tests/test_metric.cpp
Normal file
552
tests/test_metric.cpp
Normal file
@@ -0,0 +1,552 @@
|
||||
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
|
||||
#include <doctest/doctest.h>
|
||||
|
||||
#include "arena_allocator.hpp"
|
||||
#include "metric.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <latch>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
TEST_CASE("metric validation functions") {
|
||||
SUBCASE("valid metric names") {
|
||||
CHECK(metric::is_valid_metric_name("valid_name"));
|
||||
CHECK(metric::is_valid_metric_name("ValidName"));
|
||||
CHECK(metric::is_valid_metric_name("valid:name"));
|
||||
CHECK(metric::is_valid_metric_name("_valid"));
|
||||
CHECK(metric::is_valid_metric_name("valid_123"));
|
||||
CHECK(metric::is_valid_metric_name("prometheus_metric_name"));
|
||||
}
|
||||
|
||||
SUBCASE("invalid metric names") {
|
||||
CHECK_FALSE(metric::is_valid_metric_name(""));
|
||||
CHECK_FALSE(metric::is_valid_metric_name("123invalid"));
|
||||
CHECK_FALSE(metric::is_valid_metric_name("invalid-name"));
|
||||
CHECK_FALSE(metric::is_valid_metric_name("invalid.name"));
|
||||
CHECK_FALSE(metric::is_valid_metric_name("invalid name"));
|
||||
}
|
||||
|
||||
SUBCASE("valid label keys") {
|
||||
CHECK(metric::is_valid_label_key("valid_key"));
|
||||
CHECK(metric::is_valid_label_key("ValidKey"));
|
||||
CHECK(metric::is_valid_label_key("valid123"));
|
||||
CHECK(metric::is_valid_label_key("_valid"));
|
||||
}
|
||||
|
||||
SUBCASE("invalid label keys") {
|
||||
CHECK_FALSE(metric::is_valid_label_key(""));
|
||||
CHECK_FALSE(metric::is_valid_label_key("123invalid"));
|
||||
CHECK_FALSE(metric::is_valid_label_key("invalid:key"));
|
||||
CHECK_FALSE(metric::is_valid_label_key("invalid-key"));
|
||||
CHECK_FALSE(metric::is_valid_label_key("__reserved"));
|
||||
CHECK_FALSE(metric::is_valid_label_key("__internal"));
|
||||
}
|
||||
|
||||
SUBCASE("valid label values") {
|
||||
CHECK(metric::is_valid_label_value("any_value"));
|
||||
CHECK(metric::is_valid_label_value("123"));
|
||||
CHECK(metric::is_valid_label_value("special-chars.allowed"));
|
||||
CHECK(metric::is_valid_label_value(""));
|
||||
CHECK(metric::is_valid_label_value("unicode测试"));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("counter basic functionality") {
|
||||
auto counter_family =
|
||||
metric::create_counter("test_counter", "Test counter help");
|
||||
|
||||
SUBCASE("create counter with no labels") {
|
||||
auto counter = counter_family.create({});
|
||||
counter.inc(1.0);
|
||||
counter.inc(2.5);
|
||||
counter.inc(); // Default increment of 1.0
|
||||
}
|
||||
|
||||
SUBCASE("create counter with labels") {
|
||||
auto counter =
|
||||
counter_family.create({{"method", "GET"}, {"status", "200"}});
|
||||
counter.inc(5.0);
|
||||
|
||||
// Same labels should return same instance (idempotent)
|
||||
auto counter2 =
|
||||
counter_family.create({{"method", "GET"}, {"status", "200"}});
|
||||
counter2.inc(3.0);
|
||||
}
|
||||
|
||||
SUBCASE("label sorting") {
|
||||
// Labels should be sorted by key
|
||||
auto counter1 =
|
||||
counter_family.create({{"z_key", "value"}, {"a_key", "value"}});
|
||||
auto counter2 =
|
||||
counter_family.create({{"a_key", "value"}, {"z_key", "value"}});
|
||||
|
||||
// These should be the same instance due to label sorting
|
||||
counter1.inc(1.0);
|
||||
counter2.inc(2.0); // Should add to same counter
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("gauge basic functionality") {
|
||||
auto gauge_family = metric::create_gauge("test_gauge", "Test gauge help");
|
||||
|
||||
SUBCASE("gauge operations") {
|
||||
auto gauge = gauge_family.create({{"instance", "test"}});
|
||||
|
||||
gauge.set(10.0);
|
||||
gauge.inc(5.0);
|
||||
gauge.dec(3.0);
|
||||
gauge.inc(); // Default increment
|
||||
gauge.dec(); // Default decrement
|
||||
}
|
||||
|
||||
SUBCASE("gauge with multiple instances") {
|
||||
auto gauge1 = gauge_family.create({{"instance", "test1"}});
|
||||
auto gauge2 = gauge_family.create({{"instance", "test2"}});
|
||||
|
||||
gauge1.set(100.0);
|
||||
gauge2.set(200.0);
|
||||
|
||||
gauge1.inc(50.0);
|
||||
gauge2.dec(25.0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("histogram basic functionality") {
|
||||
auto hist_family =
|
||||
metric::create_histogram("test_latency", "Test latency histogram",
|
||||
metric::exponential_buckets(0.1, 2.0, 5));
|
||||
|
||||
SUBCASE("histogram observations") {
|
||||
auto hist = hist_family.create({{"endpoint", "/api"}});
|
||||
|
||||
hist.observe(0.05); // Below first bucket
|
||||
hist.observe(0.3); // Between buckets
|
||||
hist.observe(1.5); // Between buckets
|
||||
hist.observe(10.0); // Above all explicit buckets (goes in +Inf)
|
||||
}
|
||||
|
||||
SUBCASE("histogram bucket validation") {
|
||||
// Buckets should be sorted and deduplicated, with +Inf added
|
||||
auto hist_family2 = metric::create_histogram(
|
||||
"test_hist2", "Test",
|
||||
std::initializer_list<double>{5.0, 1.0, 2.5, 1.0,
|
||||
0.5}); // Unsorted with duplicate
|
||||
|
||||
auto hist = hist_family2.create({});
|
||||
hist.observe(0.1);
|
||||
hist.observe(1.5);
|
||||
hist.observe(100.0); // Should go in +Inf bucket
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("histogram bucket generators") {
|
||||
SUBCASE("linear_buckets basic functionality") {
|
||||
// Linear buckets: start=0, width=10, count=5 -> {0, 10, 20, 30, 40}
|
||||
auto buckets = metric::linear_buckets(0.0, 10.0, 5);
|
||||
|
||||
CHECK(buckets.size() == 5); // exactly count buckets
|
||||
CHECK(buckets[0] == 0.0);
|
||||
CHECK(buckets[1] == 10.0);
|
||||
CHECK(buckets[2] == 20.0);
|
||||
CHECK(buckets[3] == 30.0);
|
||||
CHECK(buckets[4] == 40.0);
|
||||
}
|
||||
|
||||
SUBCASE("linear_buckets with non-zero start") {
|
||||
// Linear buckets: start=5, width=2.5, count=3 -> {5, 7.5, 10}
|
||||
auto buckets = metric::linear_buckets(5.0, 2.5, 3);
|
||||
|
||||
CHECK(buckets.size() == 3);
|
||||
CHECK(buckets[0] == 5.0);
|
||||
CHECK(buckets[1] == 7.5);
|
||||
CHECK(buckets[2] == 10.0);
|
||||
}
|
||||
|
||||
SUBCASE("linear_buckets edge cases") {
|
||||
// Zero count should give empty vector
|
||||
auto zero_buckets = metric::linear_buckets(100.0, 10.0, 0);
|
||||
CHECK(zero_buckets.size() == 0);
|
||||
|
||||
// Negative start should work
|
||||
auto negative_buckets = metric::linear_buckets(-10.0, 5.0, 2);
|
||||
CHECK(negative_buckets.size() == 2);
|
||||
CHECK(negative_buckets[0] == -10.0);
|
||||
CHECK(negative_buckets[1] == -5.0);
|
||||
}
|
||||
|
||||
SUBCASE("exponential_buckets basic functionality") {
|
||||
// Exponential buckets: start=1, factor=2, count=5 -> {1, 2, 4, 8, 16}
|
||||
auto buckets = metric::exponential_buckets(1.0, 2.0, 5);
|
||||
|
||||
CHECK(buckets.size() == 5); // exactly count buckets
|
||||
CHECK(buckets[0] == 1.0);
|
||||
CHECK(buckets[1] == 2.0);
|
||||
CHECK(buckets[2] == 4.0);
|
||||
CHECK(buckets[3] == 8.0);
|
||||
CHECK(buckets[4] == 16.0);
|
||||
}
|
||||
|
||||
SUBCASE("exponential_buckets different factor") {
|
||||
// Exponential buckets: start=0.1, factor=10, count=3 -> {0.1, 1, 10}
|
||||
auto buckets = metric::exponential_buckets(0.1, 10.0, 3);
|
||||
|
||||
CHECK(buckets.size() == 3);
|
||||
CHECK(buckets[0] == doctest::Approx(0.1));
|
||||
CHECK(buckets[1] == doctest::Approx(1.0));
|
||||
CHECK(buckets[2] == doctest::Approx(10.0));
|
||||
}
|
||||
|
||||
SUBCASE("exponential_buckets typical latency pattern") {
|
||||
// Typical web service latency buckets: 5ms, 10ms, 20ms, 40ms, 80ms, etc.
|
||||
auto buckets = metric::exponential_buckets(0.005, 2.0, 8);
|
||||
|
||||
CHECK(buckets.size() == 8);
|
||||
CHECK(buckets[0] == doctest::Approx(0.005)); // 5ms
|
||||
CHECK(buckets[1] == doctest::Approx(0.010)); // 10ms
|
||||
CHECK(buckets[2] == doctest::Approx(0.020)); // 20ms
|
||||
CHECK(buckets[3] == doctest::Approx(0.040)); // 40ms
|
||||
CHECK(buckets[4] == doctest::Approx(0.080)); // 80ms
|
||||
CHECK(buckets[5] == doctest::Approx(0.160)); // 160ms
|
||||
CHECK(buckets[6] == doctest::Approx(0.320)); // 320ms
|
||||
CHECK(buckets[7] == doctest::Approx(0.640)); // 640ms
|
||||
}
|
||||
|
||||
SUBCASE("exponential_buckets edge cases") {
|
||||
// Zero count should give empty vector
|
||||
auto zero_buckets = metric::exponential_buckets(5.0, 3.0, 0);
|
||||
CHECK(zero_buckets.size() == 0);
|
||||
}
|
||||
|
||||
SUBCASE("bucket generators with histogram creation") {
|
||||
// Test that generated buckets work correctly with histogram creation
|
||||
auto linear_hist = metric::create_histogram(
|
||||
"linear_test", "Linear test", metric::linear_buckets(0, 100, 5));
|
||||
auto linear_instance = linear_hist.create({{"type", "linear"}});
|
||||
|
||||
// Test observations fall into expected buckets
|
||||
linear_instance.observe(50); // Should fall into 100 bucket
|
||||
linear_instance.observe(150); // Should fall into 200 bucket
|
||||
linear_instance.observe(1000); // Should fall into +Inf bucket
|
||||
|
||||
auto exp_hist =
|
||||
metric::create_histogram("exp_test", "Exponential test",
|
||||
metric::exponential_buckets(0.001, 10.0, 4));
|
||||
auto exp_instance = exp_hist.create({{"type", "exponential"}});
|
||||
|
||||
// Test typical latency measurements
|
||||
exp_instance.observe(0.0005); // Should fall into 0.001 bucket (1ms)
|
||||
exp_instance.observe(0.005); // Should fall into 0.01 bucket (10ms)
|
||||
exp_instance.observe(0.05); // Should fall into 0.1 bucket (100ms)
|
||||
exp_instance.observe(5.0); // Should fall into +Inf bucket
|
||||
}
|
||||
|
||||
SUBCASE("prometheus compatibility verification") {
|
||||
// Verify our bucket generation matches Prometheus Go client behavior
|
||||
|
||||
// Linear buckets equivalent to Prometheus LinearBuckets(0, 10, 5)
|
||||
auto our_linear = metric::linear_buckets(0, 10, 5);
|
||||
std::vector<double> expected_linear = {0, 10, 20, 30, 40};
|
||||
CHECK(our_linear == expected_linear);
|
||||
|
||||
// Exponential buckets equivalent to Prometheus ExponentialBuckets(1, 2, 5)
|
||||
auto our_exp = metric::exponential_buckets(1, 2, 5);
|
||||
std::vector<double> expected_exp = {1, 2, 4, 8, 16};
|
||||
CHECK(our_exp == expected_exp);
|
||||
|
||||
// Default Prometheus histogram buckets (exponential)
|
||||
auto default_buckets = metric::exponential_buckets(0.005, 2.5, 9);
|
||||
// Should be: .005, .0125, .03125, .078125, .1953125,
|
||||
// .48828125, 1.220703125, 3.0517578125, 7.62939453125
|
||||
CHECK(default_buckets.size() == 9);
|
||||
CHECK(default_buckets[0] == doctest::Approx(0.005));
|
||||
CHECK(default_buckets[1] == doctest::Approx(0.0125));
|
||||
CHECK(default_buckets[8] == doctest::Approx(7.62939453125));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("callback-based metrics") {
|
||||
auto counter_family =
|
||||
metric::create_counter("callback_counter", "Callback counter");
|
||||
auto gauge_family = metric::create_gauge("callback_gauge", "Callback gauge");
|
||||
|
||||
SUBCASE("counter callback") {
|
||||
std::atomic<double> counter_value{42.0};
|
||||
|
||||
counter_family.register_callback(
|
||||
{{"type", "callback"}},
|
||||
[&counter_value]() { return counter_value.load(); });
|
||||
|
||||
// Callback should be called during render
|
||||
ArenaAllocator arena;
|
||||
auto output = metric::render(arena);
|
||||
CHECK(output.size() > 0);
|
||||
}
|
||||
|
||||
SUBCASE("gauge callback") {
|
||||
std::atomic<double> gauge_value{123.5};
|
||||
|
||||
gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() {
|
||||
return gauge_value.load();
|
||||
});
|
||||
|
||||
ArenaAllocator arena;
|
||||
auto output = metric::render(arena);
|
||||
CHECK(output.size() > 0);
|
||||
}
|
||||
|
||||
SUBCASE("callback conflict detection") {
|
||||
// First create a static instance
|
||||
auto counter = counter_family.create({{"conflict", "test"}});
|
||||
counter.inc(1.0);
|
||||
|
||||
// Then try to register a callback with same labels - should abort
|
||||
// This is a validation test that would abort in debug builds
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("prometheus text format rendering") {
|
||||
ArenaAllocator arena;
|
||||
|
||||
// Create some metrics
|
||||
auto counter_family =
|
||||
metric::create_counter("http_requests_total", "Total HTTP requests");
|
||||
auto counter = counter_family.create({{"method", "GET"}, {"status", "200"}});
|
||||
counter.inc(1000);
|
||||
|
||||
auto gauge_family =
|
||||
metric::create_gauge("memory_usage_bytes", "Memory usage");
|
||||
auto gauge = gauge_family.create({{"type", "heap"}});
|
||||
gauge.set(1048576);
|
||||
|
||||
auto hist_family = metric::create_histogram(
|
||||
"request_duration_seconds", "Request duration",
|
||||
metric::exponential_buckets(0.1, 2.0, 3)); // 0.1, 0.2, 0.4, 0.8
|
||||
auto hist = hist_family.create({{"handler", "api"}});
|
||||
hist.observe(0.25);
|
||||
hist.observe(0.75);
|
||||
hist.observe(1.5);
|
||||
|
||||
SUBCASE("render format validation") {
|
||||
auto output = metric::render(arena);
|
||||
CHECK(output.size() > 0);
|
||||
|
||||
// Basic format checks
|
||||
bool found_help = false;
|
||||
bool found_type = false;
|
||||
bool found_metric_line = false;
|
||||
|
||||
for (const auto &line : output) {
|
||||
if (line.starts_with("# HELP"))
|
||||
found_help = true;
|
||||
if (line.starts_with("# TYPE"))
|
||||
found_type = true;
|
||||
if (line.find("http_requests_total") != std::string_view::npos)
|
||||
found_metric_line = true;
|
||||
}
|
||||
|
||||
CHECK(found_help);
|
||||
CHECK(found_type);
|
||||
CHECK(found_metric_line);
|
||||
}
|
||||
|
||||
SUBCASE("special value formatting") {
|
||||
auto special_gauge_family =
|
||||
metric::create_gauge("special_values", "Special value test");
|
||||
auto special_gauge = special_gauge_family.create({});
|
||||
|
||||
special_gauge.set(std::numeric_limits<double>::infinity());
|
||||
auto output = metric::render(arena);
|
||||
|
||||
// Should contain "+Inf" representation
|
||||
bool found_inf = false;
|
||||
for (const auto &line : output) {
|
||||
if (line.find("+Inf") != std::string_view::npos) {
|
||||
found_inf = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
CHECK(found_inf);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("thread safety") {
|
||||
constexpr int num_threads = 8;
|
||||
constexpr int ops_per_thread = 1000;
|
||||
|
||||
SUBCASE("counter single-writer semantics") {
|
||||
auto counter_family =
|
||||
metric::create_counter("thread_test_counter", "Thread test");
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::latch start_latch{num_threads};
|
||||
|
||||
// Each thread creates its own counter instance (safe)
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back([&, i]() {
|
||||
auto counter =
|
||||
counter_family.create({{"thread_id", std::to_string(i)}});
|
||||
|
||||
start_latch.arrive_and_wait();
|
||||
|
||||
for (int j = 0; j < ops_per_thread; ++j) {
|
||||
counter.inc(1.0);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &t : threads) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
|
||||
SUBCASE("gauge multi-writer contention") {
|
||||
auto gauge_family =
|
||||
metric::create_gauge("thread_test_gauge", "Thread test gauge");
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::latch start_latch{num_threads};
|
||||
|
||||
// Multiple threads create gauges with the same labels, writing to the same
|
||||
// underlying state, testing CAS contention.
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back([&]() {
|
||||
auto gauge = gauge_family.create({{"shared", "true"}});
|
||||
start_latch.arrive_and_wait();
|
||||
|
||||
for (int j = 0; j < ops_per_thread; ++j) {
|
||||
gauge.inc(1.0);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &t : threads) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
|
||||
SUBCASE("histogram single-writer per thread") {
|
||||
auto hist_family =
|
||||
metric::create_histogram("thread_test_hist", "Thread test histogram",
|
||||
std::initializer_list<double>{0.1, 0.5, 1.0});
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::latch start_latch{num_threads};
|
||||
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back([&, i]() {
|
||||
auto hist = hist_family.create({{"thread_id", std::to_string(i)}});
|
||||
|
||||
start_latch.arrive_and_wait();
|
||||
|
||||
for (int j = 0; j < ops_per_thread; ++j) {
|
||||
hist.observe(static_cast<double>(j) / ops_per_thread);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &t : threads) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
|
||||
SUBCASE("concurrent render calls") {
|
||||
// Multiple threads calling render concurrently should be safe (serialized
|
||||
// by mutex)
|
||||
auto counter_family = metric::create_counter("render_test", "Render test");
|
||||
auto counter = counter_family.create({});
|
||||
counter.inc(100);
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::latch start_latch{num_threads};
|
||||
std::atomic<int> success_count{0};
|
||||
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back([&]() {
|
||||
start_latch.arrive_and_wait();
|
||||
|
||||
ArenaAllocator arena;
|
||||
auto output = metric::render(arena);
|
||||
if (output.size() > 0) {
|
||||
success_count.fetch_add(1);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &t : threads) {
|
||||
t.join();
|
||||
}
|
||||
|
||||
CHECK(success_count.load() == num_threads);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("error conditions") {
|
||||
SUBCASE("counter negative increment") {
|
||||
auto counter_family = metric::create_counter("error_counter", "Error test");
|
||||
auto counter = counter_family.create({});
|
||||
|
||||
// This should abort in debug builds due to validation
|
||||
// In release builds, behavior is undefined
|
||||
// counter.inc(-1.0); // Would abort
|
||||
}
|
||||
|
||||
SUBCASE("invalid metric names") {
|
||||
// These should abort due to validation
|
||||
// auto bad_counter = metric::create_counter("123invalid", "help"); // Would
|
||||
// abort auto bad_gauge = metric::create_gauge("invalid-name", "help"); //
|
||||
// Would abort
|
||||
}
|
||||
|
||||
SUBCASE("invalid label keys") {
|
||||
auto counter_family = metric::create_counter("valid_name", "help");
|
||||
|
||||
// This should abort due to label validation
|
||||
// auto counter = counter_family.create({{"123invalid", "value"}}); // Would
|
||||
// abort
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("memory management") {
|
||||
SUBCASE("arena allocation in render") {
|
||||
ArenaAllocator arena;
|
||||
auto initial_used = arena.used_bytes();
|
||||
|
||||
auto counter_family = metric::create_counter("memory_test", "Memory test");
|
||||
auto counter = counter_family.create(
|
||||
{{"large_label", "very_long_value_that_takes_space"}});
|
||||
counter.inc(42);
|
||||
|
||||
auto output = metric::render(arena);
|
||||
auto final_used = arena.used_bytes();
|
||||
|
||||
CHECK(output.size() > 0);
|
||||
CHECK(final_used > initial_used); // Arena was used for string allocation
|
||||
|
||||
// All string_views should point to arena memory
|
||||
for (const auto &line : output) {
|
||||
CHECK(line.size() > 0);
|
||||
}
|
||||
}
|
||||
|
||||
SUBCASE("arena reset behavior") {
|
||||
ArenaAllocator arena;
|
||||
|
||||
auto counter_family = metric::create_counter("reset_test", "Reset test");
|
||||
auto counter = counter_family.create({});
|
||||
counter.inc(1);
|
||||
|
||||
// Render multiple times with arena resets
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
auto output = metric::render(arena);
|
||||
CHECK(output.size() > 0);
|
||||
arena.reset(); // Should not affect metric values, only arena memory
|
||||
}
|
||||
|
||||
// Final render should still work
|
||||
auto final_output = metric::render(arena);
|
||||
CHECK(final_output.size() > 0);
|
||||
}
|
||||
}
|
||||
2
todo.md
2
todo.md
@@ -42,7 +42,7 @@
|
||||
- [ ] Support for HTTP redirects (3xx responses) with redirect limits
|
||||
- [ ] SSL/TLS support using OpenSSL for HTTPS connections
|
||||
- [ ] Request/response logging and metrics integration
|
||||
- [ ] Memory-efficient design with zero-copy where possible
|
||||
- [ ] Memory-efficient design minimizing unnecessary copying
|
||||
- [ ] Implement fake in-process S3 service using separate Server instance with S3 ConnectionHandler
|
||||
- [ ] Use create_local_connection to get fd for in-process communication
|
||||
- [ ] Implement `ListObjectsV2` API for object enumeration
|
||||
|
||||
@@ -155,7 +155,7 @@ def main():
|
||||
|
||||
if violations:
|
||||
for v in violations:
|
||||
print(f"\n❌ {filepath}:{v['line']}:{v['column']}")
|
||||
print(f"\n❌ {filepath}:{v['line']}:{v['column']}:")
|
||||
print(f" {v['type']} '{v['camelCase']}' should be '{v['snake_case']}'")
|
||||
print(f" Context: {v['context']}")
|
||||
total_violations += len(violations)
|
||||
|
||||
@@ -401,6 +401,7 @@ private:
|
||||
double current_min = g_min_latency.load(std::memory_order_relaxed);
|
||||
while (latency < current_min &&
|
||||
!g_min_latency.compare_exchange_weak(current_min, latency,
|
||||
std::memory_order_relaxed,
|
||||
std::memory_order_relaxed)) {
|
||||
// Retry if another thread updated min_latency
|
||||
}
|
||||
@@ -408,6 +409,7 @@ private:
|
||||
double current_max = g_max_latency.load(std::memory_order_relaxed);
|
||||
while (latency > current_max &&
|
||||
!g_max_latency.compare_exchange_weak(current_max, latency,
|
||||
std::memory_order_relaxed,
|
||||
std::memory_order_relaxed)) {
|
||||
// Retry if another thread updated max_latency
|
||||
}
|
||||
@@ -637,7 +639,7 @@ int main(int argc, char *argv[]) {
|
||||
int epollfd = g_epoll_fds[i]; // Each thread uses its own epoll instance
|
||||
pthread_setname_np(pthread_self(),
|
||||
("network-" + std::to_string(i)).c_str());
|
||||
while (g_connect_threads.load() != 0) {
|
||||
while (g_connect_threads.load(std::memory_order_acquire) != 0) {
|
||||
struct epoll_event events[256]; // Use a reasonable max size
|
||||
int batch_size = std::min(int(sizeof(events) / sizeof(events[0])),
|
||||
g_config.event_batch_size);
|
||||
@@ -779,7 +781,7 @@ int main(int argc, char *argv[]) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
g_connect_threads.fetch_sub(1);
|
||||
g_connect_threads.fetch_sub(1, std::memory_order_release);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user