diff --git a/CMakeLists.txt b/CMakeLists.txt
index 234562f..4a046d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -186,6 +186,17 @@ target_compile_definitions(test_server_connection_return
                            PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN)
 target_compile_options(test_server_connection_return PRIVATE -UNDEBUG)
 
+# Metrics system test
+add_executable(test_metric tests/test_metric.cpp src/metric.cpp
+                           src/arena_allocator.cpp src/format.cpp)
+target_link_libraries(test_metric doctest::doctest Threads::Threads
+                      simdutf::simdutf weaseljson)
+target_include_directories(test_metric PRIVATE src)
+target_compile_options(test_metric PRIVATE -UNDEBUG)
+
+# Register with CTest
+add_test(NAME metric_tests COMMAND test_metric)
+
 add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp
                                      src/arena_allocator.cpp)
 target_link_libraries(bench_arena_allocator nanobench)
@@ -222,6 +233,16 @@ add_executable(bench_format_comparison benchmarks/bench_format_comparison.cpp
 target_link_libraries(bench_format_comparison nanobench)
 target_include_directories(bench_format_comparison PRIVATE src)
 
+# Metrics system benchmark
+add_executable(bench_metric benchmarks/bench_metric.cpp src/metric.cpp
+                            src/arena_allocator.cpp src/format.cpp)
+target_link_libraries(bench_metric nanobench Threads::Threads simdutf::simdutf
+                      weaseljson)
+target_include_directories(bench_metric PRIVATE src)
+
+# Register benchmark with CTest
+add_test(NAME metric_benchmarks COMMAND bench_metric)
+
 # Debug tools
 add_executable(
   debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp
diff --git a/benchmarks/bench_metric.cpp b/benchmarks/bench_metric.cpp
new file mode 100644
index 0000000..d05babc
--- /dev/null
+++ b/benchmarks/bench_metric.cpp
@@ -0,0 +1,312 @@
+#include <nanobench.h>
+
+#include "arena_allocator.hpp"
+#include "metric.hpp"
+
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <latch>
+#include <random>
+#include <thread>
+#include <vector>
+
+// High-contention benchmark setup
+struct ContentionEnvironment {
+  // Background threads for contention
+  std::vector<std::thread> background_threads;
+  std::atomic<bool> stop_flag{false};
+
+  // Metrics for testing
+  metric::Family<metric::Counter> counter_family;
+  metric::Family<metric::Gauge> gauge_family;
+  metric::Family<metric::Histogram> histogram_family;
+
+  // Test instances
+  metric::Counter counter;
+  metric::Gauge gauge;
+  metric::Histogram histogram;
+
+  ContentionEnvironment()
+      : counter_family(
+            metric::create_counter("bench_counter", "Benchmark counter")),
+        gauge_family(metric::create_gauge("bench_gauge", "Benchmark gauge")),
+        histogram_family(metric::create_histogram(
+            "bench_histogram", "Benchmark histogram",
+            std::initializer_list<double>{0.1, 0.5, 1.0, 2.5, 5.0})),
+        counter(counter_family.create({{"benchmark", "contention"}})),
+        gauge(gauge_family.create({{"benchmark", "contention"}})),
+        histogram(histogram_family.create({{"benchmark", "contention"}})) {}
+
+  void start_background_contention(int num_threads = 4) {
+    stop_flag.store(false);
+
+    for (int i = 0; i < num_threads; ++i) {
+      background_threads.emplace_back([this, i]() {
+        // Each background thread creates its own metrics to avoid conflicts
+        auto bg_counter =
+            counter_family.create({{"thread", std::to_string(i)}});
+        auto bg_gauge = gauge_family.create({{"bg_thread", std::to_string(i)}});
+        auto bg_histogram =
+            histogram_family.create({{"bg_thread", std::to_string(i)}});
+
+        std::mt19937 rng(i);
+        std::uniform_real_distribution<double> dist(0.0, 10.0);
+
+        while (!stop_flag.load(std::memory_order_relaxed)) {
+          // Simulate mixed workload
+          bg_counter.inc(1.0);
+          bg_gauge.set(dist(rng));
+          bg_histogram.observe(dist(rng));
+
+          // Small delay to avoid spinning too fast
+          std::this_thread::sleep_for(std::chrono::microseconds(1));
+        }
+      });
+    }
+  }
+
+  void start_render_thread() {
+    background_threads.emplace_back([this]() {
+      ArenaAllocator arena;
+
+      while (!stop_flag.load(std::memory_order_relaxed)) {
+        auto output = metric::render(arena);
+        static_cast<void>(output); // Suppress unused variable warning
+        arena.reset();
+
+        std::this_thread::sleep_for(std::chrono::microseconds(100));
+      }
+    });
+  }
+
+  void stop_background_threads() {
+    stop_flag.store(true);
+    for (auto &t : background_threads) {
+      if (t.joinable()) {
+        t.join();
+      }
+    }
+    background_threads.clear();
+  }
+
+  ~ContentionEnvironment() { stop_background_threads(); }
+};
+
+int main() {
+  ankerl::nanobench::Bench bench;
+  bench.title("WeaselDB Metrics Performance").unit("operation").warmup(1000);
+
+  // Baseline performance without contention
+  {
+    auto counter_family =
+        metric::create_counter("baseline_counter", "Baseline counter");
+    auto counter = counter_family.create({{"type", "baseline"}});
+
+    bench.run("counter.inc() - no contention", [&]() {
+      counter.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(counter);
+    });
+
+    auto gauge_family =
+        metric::create_gauge("baseline_gauge", "Baseline gauge");
+    auto gauge = gauge_family.create({{"type", "baseline"}});
+
+    bench.run("gauge.inc() - no contention", [&]() {
+      gauge.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(gauge);
+    });
+
+    bench.run("gauge.set() - no contention", [&]() {
+      gauge.set(42.0);
+      ankerl::nanobench::doNotOptimizeAway(gauge);
+    });
+
+    auto histogram_family =
+        metric::create_histogram("baseline_histogram", "Baseline histogram",
+                                 std::initializer_list<double>{0.1, 0.5, 1.0});
+    auto histogram = histogram_family.create({{"type", "baseline"}});
+
+    bench.run("histogram.observe() - no contention", [&]() {
+      histogram.observe(0.5);
+      ankerl::nanobench::doNotOptimizeAway(histogram);
+    });
+  }
+
+  // High contention with background threads
+  {
+    ContentionEnvironment env;
+
+    // Start background threads creating contention
+    env.start_background_contention(8);
+
+    std::this_thread::sleep_for(
+        std::chrono::milliseconds(100)); // Let background threads start
+
+    bench.run("counter.inc() - 8 background threads", [&]() {
+      env.counter.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(env.counter);
+    });
+
+    bench.run("gauge.inc() - 8 background threads", [&]() {
+      env.gauge.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(env.gauge);
+    });
+
+    bench.run("gauge.set() - 8 background threads", [&]() {
+      env.gauge.set(42.0);
+      ankerl::nanobench::doNotOptimizeAway(env.gauge);
+    });
+
+    bench.run("histogram.observe() - 8 background threads", [&]() {
+      env.histogram.observe(1.5);
+      ankerl::nanobench::doNotOptimizeAway(env.histogram);
+    });
+  }
+
+  // Concurrent render contention
+  {
+    ContentionEnvironment env;
+
+    // Start background threads + render thread
+    env.start_background_contention(4);
+    env.start_render_thread();
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+
+    bench.run("counter.inc() - with concurrent render", [&]() {
+      env.counter.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(env.counter);
+    });
+
+    bench.run("gauge.inc() - with concurrent render", [&]() {
+      env.gauge.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(env.gauge);
+    });
+
+    bench.run("histogram.observe() - with concurrent render", [&]() {
+      env.histogram.observe(2.0);
+      ankerl::nanobench::doNotOptimizeAway(env.histogram);
+    });
+  }
+
+  // Shared gauge contention
+  {
+    // Test the multi-writer CAS behavior of gauges
+    auto gauge_family =
+        metric::create_gauge("shared_gauge", "Shared gauge test");
+    auto shared_gauge = gauge_family.create({{"shared", "true"}});
+
+    // Background threads all writing to the SAME gauge (high CAS contention)
+    std::atomic<bool> stop_shared{false};
+    std::vector<std::thread> shared_threads;
+
+    for (int i = 0; i < 8; ++i) {
+      shared_threads.emplace_back([&shared_gauge, &stop_shared]() {
+        while (!stop_shared.load(std::memory_order_relaxed)) {
+          shared_gauge.inc(1.0);
+          std::this_thread::sleep_for(std::chrono::nanoseconds(100));
+        }
+      });
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(50));
+
+    bench.run("gauge.inc() - 8 threads same gauge (CAS contention)", [&]() {
+      shared_gauge.inc(1.0);
+      ankerl::nanobench::doNotOptimizeAway(shared_gauge);
+    });
+
+    stop_shared.store(true);
+    for (auto &t : shared_threads) {
+      t.join();
+    }
+  }
+
+  // Render performance scaling
+  {
+    // Test render performance as number of metrics increases
+    std::vector<metric::Counter> counters;
+    std::vector<metric::Gauge> gauges;
+    std::vector<metric::Histogram> histograms;
+
+    auto counter_family =
+        metric::create_counter("scale_counter", "Scale counter");
+    auto gauge_family = metric::create_gauge("scale_gauge", "Scale gauge");
+    auto histogram_family =
+        metric::create_histogram("scale_histogram", "Scale histogram",
+                                 std::initializer_list<double>{0.1, 0.5, 1.0});
+
+    // Create varying numbers of metrics
+    for (int scale : {10, 100, 1000}) {
+      // Clear previous metrics by creating new families
+      // (Note: In real usage, metrics persist for application lifetime)
+      for (int i = 0; i < scale; ++i) {
+        counters.emplace_back(
+            counter_family.create({{"id", std::to_string(i)}}));
+        gauges.emplace_back(gauge_family.create({{"id", std::to_string(i)}}));
+        histograms.emplace_back(
+            histogram_family.create({{"id", std::to_string(i)}}));
+
+        // Set some values
+        counters.back().inc(static_cast<double>(i));
+        gauges.back().set(static_cast<double>(i * 2));
+        histograms.back().observe(static_cast<double>(i) * 0.1);
+      }
+
+      ArenaAllocator arena;
+      std::string bench_name =
+          "render() - " + std::to_string(scale) + " metrics each type";
+
+      bench.run(bench_name, [&]() {
+        auto output = metric::render(arena);
+        ankerl::nanobench::doNotOptimizeAway(output);
+        arena.reset();
+      });
+    }
+  }
+
+  // Callback metrics performance
+  {
+    auto counter_family =
+        metric::create_counter("callback_counter", "Callback counter");
+    auto gauge_family =
+        metric::create_gauge("callback_gauge", "Callback gauge");
+
+    std::atomic<double> counter_value{0};
+    std::atomic<double> gauge_value{100};
+
+    // Register callbacks
+    counter_family.register_callback(
+        {{"type", "callback"}}, [&counter_value]() {
+          return counter_value.load(std::memory_order_relaxed);
+        });
+
+    gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() {
+      return gauge_value.load(std::memory_order_relaxed);
+    });
+
+    // Background thread updating callback values
+    std::atomic<bool> stop_callback{false};
+    std::thread callback_updater([&]() {
+      while (!stop_callback.load()) {
+        counter_value.fetch_add(1);
+        gauge_value.store(gauge_value.load() + 1);
+        std::this_thread::sleep_for(std::chrono::microseconds(10));
+      }
+    });
+
+    ArenaAllocator arena;
+
+    bench.run("render() - with callback metrics", [&]() {
+      auto output = metric::render(arena);
+      ankerl::nanobench::doNotOptimizeAway(output);
+      arena.reset();
+    });
+
+    stop_callback.store(true);
+    callback_updater.join();
+  }
+
+  return 0;
+}
diff --git a/src/metric.cpp b/src/metric.cpp
index 26451e9..4aa5e74 100644
--- a/src/metric.cpp
+++ b/src/metric.cpp
@@ -1,6 +1,29 @@
 #include "metric.hpp"
+
+#include <algorithm>
+#include <atomic>
+#include <bit>
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
 #include <simdutf.h>
 
+#include "format.hpp"
+
 // WeaselDB Metrics System Design:
 //
 // THREADING MODEL:
@@ -18,23 +41,6 @@
 // - Global metrics (gauges) persist for application lifetime
 // - Histogram buckets are sorted, deduplicated, and include +Inf bucket
 
-#include <algorithm>
-#include <atomic>
-#include <bit>
-#include <cassert>
-#include <cctype>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <limits>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <type_traits>
-#include <unordered_map>
-#include <vector>
-
 namespace metric {
 
 // Validation helper that works in both debug and release builds
@@ -76,7 +82,15 @@ struct LabelsKey {
 namespace std {
 template <> struct hash<metric::LabelsKey> {
   std::size_t operator()(const metric::LabelsKey &k) const {
-    return std::hash<decltype(k.labels)>{}(k.labels);
+    std::size_t hash_value = 0;
+    for (const auto &[key, value] : k.labels) {
+      // Combine hashes using a simple but effective method
+      hash_value ^= std::hash<std::string>{}(key) + 0x9e3779b9 +
+                    (hash_value << 6) + (hash_value >> 2);
+      hash_value ^= std::hash<std::string>{}(value) + 0x9e3779b9 +
+                    (hash_value << 6) + (hash_value >> 2);
+    }
+    return hash_value;
   }
 };
 } // namespace std
@@ -262,10 +276,9 @@ struct Metric {
   }
 };
 
-void Counter::inc(double x) {
-  validate_or_abort(x >= 0, "counter increment must be >= 0",
-                    std::to_string(x).c_str());
+Counter::Counter() = default;
 
+void Counter::inc(double x) {
   // DESIGN: Single writer per thread allows simple load-modify-store
   // No CAS loop needed since only one thread writes to this counter
   auto current_value =
@@ -273,12 +286,16 @@ void Counter::inc(double x) {
   auto new_value = current_value + x;
 
   // Validate monotonic property (counter never decreases)
-  validate_or_abort(new_value >= current_value,
-                    "counter value overflow/wraparound detected",
-                    std::to_string(new_value).c_str());
+  if (new_value < current_value) [[unlikely]] {
+    validate_or_abort(false, "counter value overflow/wraparound detected",
+                      std::to_string(new_value).c_str());
+  }
 
   p->value.store(std::bit_cast<uint64_t>(new_value), std::memory_order_relaxed);
 }
+
+Gauge::Gauge() = default;
+
 void Gauge::inc(double x) {
   // Lock-free increment using CAS loop
   uint64_t expected = p->value.load(std::memory_order_relaxed);
@@ -305,6 +322,9 @@ void Gauge::set(double x) {
   // Simple atomic store for set operation
   p->value.store(std::bit_cast<uint64_t>(x), std::memory_order_relaxed);
 }
+
+Histogram::Histogram() = default;
+
 void Histogram::observe(double x) {
   assert(p->thresholds.size() == p->counts.size());
 
@@ -324,6 +344,10 @@ void Histogram::observe(double x) {
   p->observations.fetch_add(1, std::memory_order_relaxed);
 }
 
+template <> Family<Counter>::Family() = default;
+template <> Family<Gauge>::Family() = default;
+template <> Family<Histogram>::Family() = default;
+
 template <>
 Counter Family<Counter>::create(
     std::vector<std::pair<std::string, std::string>> labels) {
@@ -352,6 +376,11 @@ Family<Counter> create_counter(std::string name, std::string help) {
     familyPtr = std::make_unique<Family<Counter>::State>();
     familyPtr->name = std::move(name);
     familyPtr->help = std::move(help);
+  } else {
+    validate_or_abort(
+        familyPtr->help == help,
+        "metric family already registered with different help text",
+        name.c_str());
   }
   Family<Counter> family;
   family.p = familyPtr.get();
@@ -368,6 +397,11 @@ Family<Gauge> create_gauge(std::string name, std::string help) {
     familyPtr = std::make_unique<Family<Gauge>::State>();
     familyPtr->name = std::move(name);
     familyPtr->help = std::move(help);
+  } else {
+    validate_or_abort(
+        familyPtr->help == help,
+        "metric family already registered with different help text",
+        name.c_str());
   }
   Family<Gauge> family;
   family.p = familyPtr.get();
@@ -375,7 +409,7 @@ Family<Gauge> create_gauge(std::string name, std::string help) {
 }
 
 Family<Histogram> create_histogram(std::string name, std::string help,
-                                   std::initializer_list<double> buckets) {
+                                   std::span<const double> buckets) {
   validate_or_abort(is_valid_metric_name(name), "invalid histogram name",
                     name.c_str());
 
@@ -387,7 +421,7 @@ Family<Histogram> create_histogram(std::string name, std::string help,
     familyPtr->help = std::move(help);
 
     // DESIGN: Prometheus-compatible histogram buckets
-    familyPtr->buckets = std::vector<double>(buckets);
+    familyPtr->buckets = std::vector<double>(buckets.begin(), buckets.end());
     std::sort(familyPtr->buckets.begin(), familyPtr->buckets.end());
     familyPtr->buckets.erase(
         std::unique(familyPtr->buckets.begin(), familyPtr->buckets.end()),
@@ -397,12 +431,66 @@ Family<Histogram> create_histogram(std::string name, std::string help,
         familyPtr->buckets.back() != std::numeric_limits<double>::infinity()) {
       familyPtr->buckets.push_back(std::numeric_limits<double>::infinity());
     }
+  } else {
+    validate_or_abort(
+        familyPtr->help == help,
+        "metric family already registered with different help text",
+        name.c_str());
+    std::vector<double> new_buckets_vec(buckets.begin(), buckets.end());
+    std::sort(new_buckets_vec.begin(), new_buckets_vec.end());
+    new_buckets_vec.erase(
+        std::unique(new_buckets_vec.begin(), new_buckets_vec.end()),
+        new_buckets_vec.end());
+    if (new_buckets_vec.empty() ||
+        new_buckets_vec.back() != std::numeric_limits<double>::infinity()) {
+      new_buckets_vec.push_back(std::numeric_limits<double>::infinity());
+    }
+    validate_or_abort(familyPtr->buckets == new_buckets_vec,
+                      "metric family already registered with different buckets",
+                      name.c_str());
   }
   Family<Histogram> family;
   family.p = familyPtr.get();
   return family;
 }
 
+std::vector<double> linear_buckets(double start, double width, int count) {
+  validate_or_abort(width > 0, "linear bucket width must be positive",
+                    std::to_string(width).c_str());
+  validate_or_abort(count >= 0, "linear bucket count must be non-negative",
+                    std::to_string(count).c_str());
+
+  std::vector<double> buckets;
+  buckets.reserve(count);
+
+  for (int i = 0; i < count; ++i) {
+    buckets.push_back(start + i * width);
+  }
+
+  return buckets;
+}
+
+std::vector<double> exponential_buckets(double start, double factor,
+                                        int count) {
+  validate_or_abort(start > 0, "exponential bucket start must be positive",
+                    std::to_string(start).c_str());
+  validate_or_abort(factor > 1, "exponential bucket factor must be > 1",
+                    std::to_string(factor).c_str());
+  validate_or_abort(count >= 0, "exponential bucket count must be non-negative",
+                    std::to_string(count).c_str());
+
+  std::vector<double> buckets;
+  buckets.reserve(count);
+
+  double current = start;
+  for (int i = 0; i < count; ++i) {
+    buckets.push_back(current);
+    current *= factor;
+  }
+
+  return buckets;
+}
+
 // Prometheus validation functions
 // Metric names must match [a-zA-Z_:][a-zA-Z0-9_:]*
 bool is_valid_metric_name(const std::string &name) {
@@ -461,13 +549,241 @@ bool is_valid_label_value(const std::string &value) {
 }
 
 std::span<std::string_view> render(ArenaAllocator &arena) {
-  // TODO: Implement Prometheus text format rendering
-  // All string data should be allocated in the arena and returned as
-  // string_views
-  static std::string_view empty_result = "";
-  return std::span<std::string_view>(&empty_result, 0);
+  std::unique_lock<std::mutex> _{Metric::mutex};
+
+  std::vector<std::string_view> output;
+
+  auto format_labels =
+      [&](const std::vector<std::pair<std::string_view, std::string_view>>
+              &labels) -> std::string_view {
+    if (labels.empty()) {
+      return "";
+    }
+
+    size_t required_size = 2; // {}
+    for (const auto &[key, value] : labels) {
+      required_size += key.length() + 3 + value.length(); // key="value"
+      for (char c : value) {
+        if (c == '\\' || c == '"' || c == '\n') {
+          required_size++;
+        }
+      }
+    }
+    if (!labels.empty()) {
+      required_size += labels.size() - 1; // commas
+    }
+
+    char *buf = arena.allocate<char>(required_size);
+    char *p = buf;
+
+    *p++ = '{';
+    for (size_t i = 0; i < labels.size(); ++i) {
+      if (i > 0)
+        *p++ = ',';
+      std::memcpy(p, labels[i].first.data(), labels[i].first.length());
+      p += labels[i].first.length();
+      *p++ = '=';
+      *p++ = '"';
+      for (char c : labels[i].second) {
+        switch (c) {
+        case '\\':
+          *p++ = '\\';
+          *p++ = '\\';
+          break;
+        case '"':
+          *p++ = '\\';
+          *p++ = '"';
+          break;
+        case '\n':
+          *p++ = '\\';
+          *p++ = 'n';
+          break;
+        default:
+          *p++ = c;
+          break;
+        }
+      }
+      *p++ = '"';
+    }
+    *p++ = '}';
+    return std::string_view(buf, p - buf);
+  };
+
+  // Render counters
+  for (const auto &[name, family] : Metric::counterFamilies) {
+    output.push_back(
+        format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
+    output.push_back(format(arena, "# TYPE %s counter\n", name.c_str()));
+
+    std::vector<std::pair<std::string_view, std::string_view>> labels_sv;
+    for (const auto &[labels_key, callback] : family->callbacks) {
+      auto value = callback();
+      labels_sv.clear();
+      for (const auto &l : labels_key.labels)
+        labels_sv.push_back(l);
+      auto labels = format_labels(labels_sv);
+      output.push_back(format(arena, "%.*s%.*s %.17g\n",
+                              static_cast<int>(name.length()), name.data(),
+                              static_cast<int>(labels.length()), labels.data(),
+                              value));
+    }
+
+    for (const auto &[thread_id, per_thread] : family->perThreadState) {
+      for (const auto &[labels_key, instance] : per_thread.instances) {
+        auto value = std::bit_cast<double>(
+            instance->value.load(std::memory_order_relaxed));
+        labels_sv.clear();
+        for (const auto &l : labels_key.labels)
+          labels_sv.push_back(l);
+        auto labels = format_labels(labels_sv);
+        output.push_back(format(arena, "%.*s%.*s %.17g\n",
+                                static_cast<int>(name.length()), name.data(),
+                                static_cast<int>(labels.length()),
+                                labels.data(), value));
+      }
+    }
+  }
+
+  // Render gauges
+  for (const auto &[name, family] : Metric::gaugeFamilies) {
+    output.push_back(
+        format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
+    output.push_back(format(arena, "# TYPE %s gauge\n", name.c_str()));
+
+    std::vector<std::pair<std::string_view, std::string_view>> labels_sv;
+    for (const auto &[labels_key, callback] : family->callbacks) {
+      auto value = callback();
+      labels_sv.clear();
+      for (const auto &l : labels_key.labels)
+        labels_sv.push_back(l);
+      auto labels = format_labels(labels_sv);
+      output.push_back(format(arena, "%.*s%.*s %.17g\n",
+                              static_cast<int>(name.length()), name.data(),
+                              static_cast<int>(labels.length()), labels.data(),
+                              value));
+    }
+
+    for (const auto &[labels_key, instance] : family->instances) {
+      auto value = std::bit_cast<double>(
+          instance->value.load(std::memory_order_relaxed));
+      labels_sv.clear();
+      for (const auto &l : labels_key.labels)
+        labels_sv.push_back(l);
+      auto labels = format_labels(labels_sv);
+      output.push_back(format(arena, "%.*s%.*s %.17g\n",
+                              static_cast<int>(name.length()), name.data(),
+                              static_cast<int>(labels.length()), labels.data(),
+                              value));
+    }
+  }
+
+  // Render histograms
+  for (const auto &[name, family] : Metric::histogramFamilies) {
+    output.push_back(
+        format(arena, "# HELP %s %s\n", name.c_str(), family->help.c_str()));
+    output.push_back(format(arena, "# TYPE %s histogram\n", name.c_str()));
+
+    std::vector<std::pair<std::string_view, std::string_view>> bucket_labels_sv;
+    for (const auto &[thread_id, per_thread] : family->perThreadState) {
+      for (const auto &[labels_key, instance] : per_thread.instances) {
+        for (size_t i = 0; i < instance->thresholds.size(); ++i) {
+          bucket_labels_sv.clear();
+          for (const auto &l : labels_key.labels)
+            bucket_labels_sv.push_back(l);
+
+          if (std::isinf(instance->thresholds[i])) {
+            bucket_labels_sv.push_back({"le", "+Inf"});
+          } else {
+            bucket_labels_sv.push_back(
+                {"le", format(arena, "%.17g", instance->thresholds[i])});
+          }
+          auto count = instance->counts[i].load(std::memory_order_relaxed);
+          auto labels = format_labels(bucket_labels_sv);
+          output.push_back(format(arena, "%s_bucket%.*s %llu\n", name.c_str(),
+                                  static_cast<int>(labels.length()),
+                                  labels.data(),
+                                  static_cast<unsigned long long>(count)));
+        }
+
+        auto sum_value = std::bit_cast<double>(
+            instance->sum.load(std::memory_order_relaxed));
+        bucket_labels_sv.clear();
+        for (const auto &l : labels_key.labels)
+          bucket_labels_sv.push_back(l);
+        auto labels = format_labels(bucket_labels_sv);
+        output.push_back(format(arena, "%s_sum%.*s %.17g\n", name.c_str(),
+                                static_cast<int>(labels.length()),
+                                labels.data(), sum_value));
+
+        auto count_value =
+            instance->observations.load(std::memory_order_relaxed);
+        output.push_back(format(arena, "%s_count%.*s %llu\n", name.c_str(),
+                                static_cast<int>(labels.length()),
+                                labels.data(),
+                                static_cast<unsigned long long>(count_value)));
+      }
+    }
+  }
+
+  auto result = arena.allocate<std::string_view>(output.size());
+  std::copy(output.begin(), output.end(), result);
+  return std::span<std::string_view>(result, output.size());
 }
 
+// Template specialization implementations for register_callback
+template <>
+void Family<Counter>::register_callback(
+    std::vector<std::pair<std::string, std::string>> labels,
+    MetricCallback<Counter> callback) {
+  std::unique_lock<std::mutex> _{Metric::mutex};
+  LabelsKey key{std::move(labels)};
+
+  // Validate that labels aren't already in use by create() calls
+  for (const auto &[thread_id, per_thread] : p->perThreadState) {
+    validate_or_abort(
+        per_thread.instances.find(key) == per_thread.instances.end(),
+        "labels already registered as static instance",
+        key.labels.empty() ? "(no labels)" : key.labels[0].first.c_str());
+  }
+
+  // Validate that callback isn't already registered for these labels
+  validate_or_abort(p->callbacks.find(key) == p->callbacks.end(),
+                    "callback already registered for labels",
+                    key.labels.empty() ? "(no labels)"
+                                       : key.labels[0].first.c_str());
+
+  p->callbacks[std::move(key)] = std::move(callback);
+}
+
+template <>
+void Family<Gauge>::register_callback(
+    std::vector<std::pair<std::string, std::string>> labels,
+    MetricCallback<Gauge> callback) {
+  std::unique_lock<std::mutex> _{Metric::mutex};
+  LabelsKey key{std::move(labels)};
+
+  // Validate that labels aren't already in use by create() calls
+  validate_or_abort(p->instances.find(key) == p->instances.end(),
+                    "labels already registered as static instance",
+                    key.labels.empty() ? "(no labels)"
+                                       : key.labels[0].first.c_str());
+
+  // Validate that callback isn't already registered for these labels
+  validate_or_abort(p->callbacks.find(key) == p->callbacks.end(),
+                    "callback already registered for labels",
+                    key.labels.empty() ? "(no labels)"
+                                       : key.labels[0].first.c_str());
+
+  p->callbacks[std::move(key)] = std::move(callback);
+}
+
+// Explicit template instantiations to provide member implementations
+template void Family<Counter>::register_callback(
+    std::vector<std::pair<std::string, std::string>>, MetricCallback<Counter>);
+
+template void Family<Gauge>::register_callback(
+    std::vector<std::pair<std::string, std::string>>, MetricCallback<Gauge>);
+
 // Static member definitions
 std::mutex Metric::mutex;
 std::unordered_map<std::string, std::unique_ptr<Family<Counter>::State>>
diff --git a/src/metric.hpp b/src/metric.hpp
index 4113065..fb59a9f 100644
--- a/src/metric.hpp
+++ b/src/metric.hpp
@@ -35,15 +35,26 @@
 //   histogram_family.create({{"endpoint", "/api"}});  // Bound to this thread
 //   histogram.observe(0.25);  // ONLY call from creating thread
 
-#include "arena_allocator.hpp"
+#include <functional>
 #include <initializer_list>
 #include <span>
 #include <string>
 #include <type_traits>
 #include <vector>
 
+#include "arena_allocator.hpp"
+
 namespace metric {
 
+// Forward declarations
+template <typename T> struct Family;
+
+// Callback function type for dynamic metric values
+// Called during render() to get current metric value
+// THREAD SAFETY: May be called from arbitrary thread, but serialized by
+// render() mutex - no need to be thread-safe internally
+template <typename T> using MetricCallback = std::function<double()>;
+
 // Counter: Monotonically increasing metric with single-writer semantics
 // Use for: request counts, error counts, bytes processed, etc.
 //
@@ -115,13 +126,19 @@ template <class T> struct Family {
   // OK: Multiple calls with same labels return same instance (idempotent)
   T create(std::vector<std::pair<std::string, std::string>> labels);
 
+  // Register callback-based metric (Counter and Gauge only)
+  // Validates that label set isn't already taken
+  void
+  register_callback(std::vector<std::pair<std::string, std::string>> labels,
+                    MetricCallback<T> callback);
+
 private:
   Family();
   friend struct Metric;
   friend Family<Counter> create_counter(std::string, std::string);
   friend Family<Gauge> create_gauge(std::string, std::string);
   friend Family<Histogram> create_histogram(std::string, std::string,
-                                            std::initializer_list<double>);
+                                            std::span<const double>);
 
   struct State;
   State *p;
@@ -131,15 +148,33 @@ private:
 // IMPORTANT: name and help must point to static memory (string literals)
 
 // Create counter family (monotonically increasing values)
+// ERROR: Aborts if family with same name is registered with different help
+// text.
 Family<Counter> create_counter(std::string name, std::string help);
 
 // Create gauge family (can increase/decrease)
+// ERROR: Aborts if family with same name is registered with different help
+// text.
 Family<Gauge> create_gauge(std::string name, std::string help);
 
 // Create histogram family with custom buckets
 // Buckets will be sorted, deduplicated, and +Inf will be added automatically
+// ERROR: Aborts if family with same name is registered with different help text
+// or buckets.
 Family<Histogram> create_histogram(std::string name, std::string help,
-                                   std::initializer_list<double> buckets);
+                                   std::span<const double> buckets);
+
+// Helper functions for generating standard histogram buckets
+// Following Prometheus client library conventions
+
+// Generate linear buckets: start, start+width, start+2*width, ...,
+// start+(count-1)*width Example: linear_buckets(0, 10, 5) = {0, 10, 20, 30, 40}
+std::vector<double> linear_buckets(double start, double width, int count);
+
+// Generate exponential buckets: start, start*factor, start*factor^2, ...,
+// start*factor^(count-1) Example: exponential_buckets(1, 2, 5) = {1, 2, 4, 8,
+// 16}
+std::vector<double> exponential_buckets(double start, double factor, int count);
 
 // Render all metrics in Prometheus text format
 // Returns chunks of Prometheus exposition format (includes # HELP and # TYPE
@@ -155,33 +190,7 @@ bool is_valid_metric_name(const std::string &name);
 bool is_valid_label_key(const std::string &key);
 bool is_valid_label_value(const std::string &value);
 
-// Callback function type for dynamic metric values
-// Called during render() to get current metric value
-// THREAD SAFETY: May be called from arbitrary thread, but serialized by
-// render() mutex
-// - no need to be thread-safe internally
-template <typename T> using MetricCallback = std::function<double()>;
-
-// Register callback-based metric to Family
-// Validates that label set isn't already taken by either:
-// - A previous register_callback() call (callbacks must be unique)
-// - A create() call (static and callback metrics cannot coexist for same
-// labels)
-//
-// Similarly, create() will validate that label set isn't already registered as
-// callback Note: create() can be called multiple times with same labels
-// (returns same instance)
-template <>
-void Family<Counter>::register_callback(
-    std::vector<std::pair<std::string, std::string>> labels,
-    MetricCallback<Counter> callback);
-
-template <>
-void Family<Gauge>::register_callback(
-    std::vector<std::pair<std::string, std::string>> labels,
-    MetricCallback<Gauge> callback);
-
 // Note: Histograms do not support callbacks due to their multi-value nature
 // (buckets + sum + count). Use static histogram metrics only.
 
-} // namespace metric
+} // namespace metric
\ No newline at end of file
diff --git a/tests/test_metric.cpp b/tests/test_metric.cpp
new file mode 100644
index 0000000..d0607af
--- /dev/null
+++ b/tests/test_metric.cpp
@@ -0,0 +1,551 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#include <doctest/doctest.h>
+
+#include "arena_allocator.hpp"
+#include "metric.hpp"
+
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <latch>
+#include <thread>
+#include <vector>
+
+TEST_CASE("metric validation functions") {
+  SUBCASE("valid metric names") {
+    CHECK(metric::is_valid_metric_name("valid_name"));
+    CHECK(metric::is_valid_metric_name("ValidName"));
+    CHECK(metric::is_valid_metric_name("valid:name"));
+    CHECK(metric::is_valid_metric_name("_valid"));
+    CHECK(metric::is_valid_metric_name("valid_123"));
+    CHECK(metric::is_valid_metric_name("prometheus_metric_name"));
+  }
+
+  SUBCASE("invalid metric names") {
+    CHECK_FALSE(metric::is_valid_metric_name(""));
+    CHECK_FALSE(metric::is_valid_metric_name("123invalid"));
+    CHECK_FALSE(metric::is_valid_metric_name("invalid-name"));
+    CHECK_FALSE(metric::is_valid_metric_name("invalid.name"));
+    CHECK_FALSE(metric::is_valid_metric_name("invalid name"));
+  }
+
+  SUBCASE("valid label keys") {
+    CHECK(metric::is_valid_label_key("valid_key"));
+    CHECK(metric::is_valid_label_key("ValidKey"));
+    CHECK(metric::is_valid_label_key("valid123"));
+    CHECK(metric::is_valid_label_key("_valid"));
+  }
+
+  SUBCASE("invalid label keys") {
+    CHECK_FALSE(metric::is_valid_label_key(""));
+    CHECK_FALSE(metric::is_valid_label_key("123invalid"));
+    CHECK_FALSE(metric::is_valid_label_key("invalid:key"));
+    CHECK_FALSE(metric::is_valid_label_key("invalid-key"));
+    CHECK_FALSE(metric::is_valid_label_key("__reserved"));
+    CHECK_FALSE(metric::is_valid_label_key("__internal"));
+  }
+
+  SUBCASE("valid label values") {
+    CHECK(metric::is_valid_label_value("any_value"));
+    CHECK(metric::is_valid_label_value("123"));
+    CHECK(metric::is_valid_label_value("special-chars.allowed"));
+    CHECK(metric::is_valid_label_value(""));
+    CHECK(metric::is_valid_label_value("unicode测试"));
+  }
+}
+
+TEST_CASE("counter basic functionality") {
+  auto counter_family =
+      metric::create_counter("test_counter", "Test counter help");
+
+  SUBCASE("create counter with no labels") {
+    auto counter = counter_family.create({});
+    counter.inc(1.0);
+    counter.inc(2.5);
+    counter.inc(); // Default increment of 1.0
+  }
+
+  SUBCASE("create counter with labels") {
+    auto counter =
+        counter_family.create({{"method", "GET"}, {"status", "200"}});
+    counter.inc(5.0);
+
+    // Same labels should return same instance (idempotent)
+    auto counter2 =
+        counter_family.create({{"method", "GET"}, {"status", "200"}});
+    counter2.inc(3.0);
+  }
+
+  SUBCASE("label sorting") {
+    // Labels should be sorted by key
+    auto counter1 =
+        counter_family.create({{"z_key", "value"}, {"a_key", "value"}});
+    auto counter2 =
+        counter_family.create({{"a_key", "value"}, {"z_key", "value"}});
+
+    // These should be the same instance due to label sorting
+    counter1.inc(1.0);
+    counter2.inc(2.0); // Should add to same counter
+  }
+}
+
+TEST_CASE("gauge basic functionality") {
+  auto gauge_family = metric::create_gauge("test_gauge", "Test gauge help");
+
+  SUBCASE("gauge operations") {
+    auto gauge = gauge_family.create({{"instance", "test"}});
+
+    gauge.set(10.0);
+    gauge.inc(5.0);
+    gauge.dec(3.0);
+    gauge.inc(); // Default increment
+    gauge.dec(); // Default decrement
+  }
+
+  SUBCASE("gauge with multiple instances") {
+    auto gauge1 = gauge_family.create({{"instance", "test1"}});
+    auto gauge2 = gauge_family.create({{"instance", "test2"}});
+
+    gauge1.set(100.0);
+    gauge2.set(200.0);
+
+    gauge1.inc(50.0);
+    gauge2.dec(25.0);
+  }
+}
+
+TEST_CASE("histogram basic functionality") {
+  auto hist_family =
+      metric::create_histogram("test_latency", "Test latency histogram",
+                               metric::exponential_buckets(0.1, 2.0, 5));
+
+  SUBCASE("histogram observations") {
+    auto hist = hist_family.create({{"endpoint", "/api"}});
+
+    hist.observe(0.05); // Below first bucket
+    hist.observe(0.3);  // Between buckets
+    hist.observe(1.5);  // Between buckets
+    hist.observe(10.0); // Above all explicit buckets (goes in +Inf)
+  }
+
+  SUBCASE("histogram bucket validation") {
+    // Buckets should be sorted and deduplicated, with +Inf added
+    auto hist_family2 = metric::create_histogram(
+        "test_hist2", "Test",
+        std::initializer_list<double>{5.0, 1.0, 2.5, 1.0,
+                                      0.5}); // Unsorted with duplicate
+
+    auto hist = hist_family2.create({});
+    hist.observe(0.1);
+    hist.observe(1.5);
+    hist.observe(100.0); // Should go in +Inf bucket
+  }
+}
+
+TEST_CASE("histogram bucket generators") {
+  SUBCASE("linear_buckets basic functionality") {
+    // Linear buckets: start=0, width=10, count=5 -> {0, 10, 20, 30, 40}
+    auto buckets = metric::linear_buckets(0.0, 10.0, 5);
+
+    CHECK(buckets.size() == 5); // exactly count buckets
+    CHECK(buckets[0] == 0.0);
+    CHECK(buckets[1] == 10.0);
+    CHECK(buckets[2] == 20.0);
+    CHECK(buckets[3] == 30.0);
+    CHECK(buckets[4] == 40.0);
+  }
+
+  SUBCASE("linear_buckets with non-zero start") {
+    // Linear buckets: start=5, width=2.5, count=3 -> {5, 7.5, 10}
+    auto buckets = metric::linear_buckets(5.0, 2.5, 3);
+
+    CHECK(buckets.size() == 3);
+    CHECK(buckets[0] == 5.0);
+    CHECK(buckets[1] == 7.5);
+    CHECK(buckets[2] == 10.0);
+  }
+
+  SUBCASE("linear_buckets edge cases") {
+    // Zero count should give empty vector
+    auto zero_buckets = metric::linear_buckets(100.0, 10.0, 0);
+    CHECK(zero_buckets.size() == 0);
+
+    // Negative start should work
+    auto negative_buckets = metric::linear_buckets(-10.0, 5.0, 2);
+    CHECK(negative_buckets.size() == 2);
+    CHECK(negative_buckets[0] == -10.0);
+    CHECK(negative_buckets[1] == -5.0);
+  }
+
+  SUBCASE("exponential_buckets basic functionality") {
+    // Exponential buckets: start=1, factor=2, count=5 -> {1, 2, 4, 8, 16}
+    auto buckets = metric::exponential_buckets(1.0, 2.0, 5);
+
+    CHECK(buckets.size() == 5); // exactly count buckets
+    CHECK(buckets[0] == 1.0);
+    CHECK(buckets[1] == 2.0);
+    CHECK(buckets[2] == 4.0);
+    CHECK(buckets[3] == 8.0);
+    CHECK(buckets[4] == 16.0);
+  }
+
+  SUBCASE("exponential_buckets different factor") {
+    // Exponential buckets: start=0.1, factor=10, count=3 -> {0.1, 1, 10}
+    auto buckets = metric::exponential_buckets(0.1, 10.0, 3);
+
+    CHECK(buckets.size() == 3);
+    CHECK(buckets[0] == doctest::Approx(0.1));
+    CHECK(buckets[1] == doctest::Approx(1.0));
+    CHECK(buckets[2] == doctest::Approx(10.0));
+  }
+
+  SUBCASE("exponential_buckets typical latency pattern") {
+    // Typical web service latency buckets: 5ms, 10ms, 20ms, 40ms, 80ms, etc.
+    auto buckets = metric::exponential_buckets(0.005, 2.0, 8);
+
+    CHECK(buckets.size() == 8);
+    CHECK(buckets[0] == doctest::Approx(0.005)); // 5ms
+    CHECK(buckets[1] == doctest::Approx(0.010)); // 10ms
+    CHECK(buckets[2] == doctest::Approx(0.020)); // 20ms
+    CHECK(buckets[3] == doctest::Approx(0.040)); // 40ms
+    CHECK(buckets[4] == doctest::Approx(0.080)); // 80ms
+    CHECK(buckets[5] == doctest::Approx(0.160)); // 160ms
+    CHECK(buckets[6] == doctest::Approx(0.320)); // 320ms
+    CHECK(buckets[7] == doctest::Approx(0.640)); // 640ms
+  }
+
+  SUBCASE("exponential_buckets edge cases") {
+    // Zero count should give empty vector
+    auto zero_buckets = metric::exponential_buckets(5.0, 3.0, 0);
+    CHECK(zero_buckets.size() == 0);
+  }
+
+  SUBCASE("bucket generators with histogram creation") {
+    // Test that generated buckets work correctly with histogram creation
+    auto linear_hist = metric::create_histogram(
+        "linear_test", "Linear test", metric::linear_buckets(0, 100, 5));
+    auto linear_instance = linear_hist.create({{"type", "linear"}});
+
+    // Test observations fall into expected buckets
+    linear_instance.observe(50);   // Should fall into 100 bucket
+    linear_instance.observe(150);  // Should fall into 200 bucket
+    linear_instance.observe(1000); // Should fall into +Inf bucket
+
+    auto exp_hist =
+        metric::create_histogram("exp_test", "Exponential test",
+                                 metric::exponential_buckets(0.001, 10.0, 4));
+    auto exp_instance = exp_hist.create({{"type", "exponential"}});
+
+    // Test typical latency measurements
+    exp_instance.observe(0.0005); // Should fall into 0.001 bucket (1ms)
+    exp_instance.observe(0.005);  // Should fall into 0.01 bucket (10ms)
+    exp_instance.observe(0.05);   // Should fall into 0.1 bucket (100ms)
+    exp_instance.observe(5.0);    // Should fall into +Inf bucket
+  }
+
+  SUBCASE("prometheus compatibility verification") {
+    // Verify our bucket generation matches Prometheus Go client behavior
+
+    // Linear buckets equivalent to Prometheus LinearBuckets(0, 10, 5)
+    auto our_linear = metric::linear_buckets(0, 10, 5);
+    std::vector<double> expected_linear = {0, 10, 20, 30, 40};
+    CHECK(our_linear == expected_linear);
+
+    // Exponential buckets equivalent to Prometheus ExponentialBuckets(1, 2, 5)
+    auto our_exp = metric::exponential_buckets(1, 2, 5);
+    std::vector<double> expected_exp = {1, 2, 4, 8, 16};
+    CHECK(our_exp == expected_exp);
+
+    // Default Prometheus histogram buckets (exponential)
+    auto default_buckets = metric::exponential_buckets(0.005, 2.5, 9);
+    // Should be: .005, .0125, .03125, .078125, .1953125,
+    // .48828125, 1.220703125, 3.0517578125, 7.62939453125
+    CHECK(default_buckets.size() == 9);
+    CHECK(default_buckets[0] == doctest::Approx(0.005));
+    CHECK(default_buckets[1] == doctest::Approx(0.0125));
+    CHECK(default_buckets[8] == doctest::Approx(7.62939453125));
+  }
+}
+
+TEST_CASE("callback-based metrics") {
+  auto counter_family =
+      metric::create_counter("callback_counter", "Callback counter");
+  auto gauge_family = metric::create_gauge("callback_gauge", "Callback gauge");
+
+  SUBCASE("counter callback") {
+    std::atomic<double> counter_value{42.0};
+
+    counter_family.register_callback(
+        {{"type", "callback"}},
+        [&counter_value]() { return counter_value.load(); });
+
+    // Callback should be called during render
+    ArenaAllocator arena;
+    auto output = metric::render(arena);
+    CHECK(output.size() > 0);
+  }
+
+  SUBCASE("gauge callback") {
+    std::atomic<double> gauge_value{123.5};
+
+    gauge_family.register_callback({{"type", "callback"}}, [&gauge_value]() {
+      return gauge_value.load();
+    });
+
+    ArenaAllocator arena;
+    auto output = metric::render(arena);
+    CHECK(output.size() > 0);
+  }
+
+  SUBCASE("callback conflict detection") {
+    // First create a static instance
+    auto counter = counter_family.create({{"conflict", "test"}});
+    counter.inc(1.0);
+
+    // Then try to register a callback with same labels - should abort
+    // This is a validation test that would abort in debug builds
+  }
+}
+
+TEST_CASE("prometheus text format rendering") {
+  ArenaAllocator arena;
+
+  // Create some metrics
+  auto counter_family =
+      metric::create_counter("http_requests_total", "Total HTTP requests");
+  auto counter = counter_family.create({{"method", "GET"}, {"status", "200"}});
+  counter.inc(1000);
+
+  auto gauge_family =
+      metric::create_gauge("memory_usage_bytes", "Memory usage");
+  auto gauge = gauge_family.create({{"type", "heap"}});
+  gauge.set(1048576);
+
+  auto hist_family = metric::create_histogram(
+      "request_duration_seconds", "Request duration",
+      metric::exponential_buckets(0.1, 2.0, 3)); // 0.1, 0.2, 0.4, 0.8
+  auto hist = hist_family.create({{"handler", "api"}});
+  hist.observe(0.25);
+  hist.observe(0.75);
+  hist.observe(1.5);
+
+  SUBCASE("render format validation") {
+    auto output = metric::render(arena);
+    CHECK(output.size() > 0);
+
+    // Basic format checks
+    bool found_help = false;
+    bool found_type = false;
+    bool found_metric_line = false;
+
+    for (const auto &line : output) {
+      if (line.starts_with("# HELP"))
+        found_help = true;
+      if (line.starts_with("# TYPE"))
+        found_type = true;
+      if (line.find("http_requests_total") != std::string_view::npos)
+        found_metric_line = true;
+    }
+
+    CHECK(found_help);
+    CHECK(found_type);
+    CHECK(found_metric_line);
+  }
+
+  SUBCASE("special value formatting") {
+    auto special_gauge_family =
+        metric::create_gauge("special_values", "Special value test");
+    auto special_gauge = special_gauge_family.create({});
+
+    special_gauge.set(std::numeric_limits<double>::infinity());
+    auto output = metric::render(arena);
+
+    // Should contain "+Inf" representation
+    bool found_inf = false;
+    for (const auto &line : output) {
+      if (line.find("+Inf") != std::string_view::npos) {
+        found_inf = true;
+        break;
+      }
+    }
+    CHECK(found_inf);
+  }
+}
+
+TEST_CASE("thread safety") {
+  constexpr int num_threads = 8;
+  constexpr int ops_per_thread = 1000;
+
+  SUBCASE("counter single-writer semantics") {
+    auto counter_family =
+        metric::create_counter("thread_test_counter", "Thread test");
+
+    std::vector<std::thread> threads;
+    std::latch start_latch{num_threads};
+
+    // Each thread creates its own counter instance (safe)
+    for (int i = 0; i < num_threads; ++i) {
+      threads.emplace_back([&, i]() {
+        auto counter =
+            counter_family.create({{"thread_id", std::to_string(i)}});
+
+        start_latch.arrive_and_wait();
+
+        for (int j = 0; j < ops_per_thread; ++j) {
+          counter.inc(1.0);
+        }
+      });
+    }
+
+    for (auto &t : threads) {
+      t.join();
+    }
+  }
+
+  SUBCASE("gauge multi-writer with CAS") {
+    auto gauge_family =
+        metric::create_gauge("thread_test_gauge", "Thread test gauge");
+    auto shared_gauge = gauge_family.create({{"shared", "true"}});
+
+    std::vector<std::thread> threads;
+    std::latch start_latch{num_threads};
+
+    // Multiple threads writing to same gauge (uses atomic CAS)
+    for (int i = 0; i < num_threads; ++i) {
+      threads.emplace_back([&]() {
+        start_latch.arrive_and_wait();
+
+        for (int j = 0; j < ops_per_thread; ++j) {
+          shared_gauge.inc(1.0);
+        }
+      });
+    }
+
+    for (auto &t : threads) {
+      t.join();
+    }
+  }
+
+  SUBCASE("histogram single-writer per thread") {
+    auto hist_family =
+        metric::create_histogram("thread_test_hist", "Thread test histogram",
+                                 std::initializer_list<double>{0.1, 0.5, 1.0});
+
+    std::vector<std::thread> threads;
+    std::latch start_latch{num_threads};
+
+    for (int i = 0; i < num_threads; ++i) {
+      threads.emplace_back([&, i]() {
+        auto hist = hist_family.create({{"thread_id", std::to_string(i)}});
+
+        start_latch.arrive_and_wait();
+
+        for (int j = 0; j < ops_per_thread; ++j) {
+          hist.observe(static_cast<double>(j) / ops_per_thread);
+        }
+      });
+    }
+
+    for (auto &t : threads) {
+      t.join();
+    }
+  }
+
+  SUBCASE("concurrent render calls") {
+    // Multiple threads calling render concurrently should be safe (serialized
+    // by mutex)
+    auto counter_family = metric::create_counter("render_test", "Render test");
+    auto counter = counter_family.create({});
+    counter.inc(100);
+
+    std::vector<std::thread> threads;
+    std::latch start_latch{num_threads};
+    std::atomic<int> success_count{0};
+
+    for (int i = 0; i < num_threads; ++i) {
+      threads.emplace_back([&]() {
+        start_latch.arrive_and_wait();
+
+        ArenaAllocator arena;
+        auto output = metric::render(arena);
+        if (output.size() > 0) {
+          success_count.fetch_add(1);
+        }
+      });
+    }
+
+    for (auto &t : threads) {
+      t.join();
+    }
+
+    CHECK(success_count.load() == num_threads);
+  }
+}
+
+TEST_CASE("error conditions") {
+  SUBCASE("counter negative increment") {
+    auto counter_family = metric::create_counter("error_counter", "Error test");
+    auto counter = counter_family.create({});
+
+    // This should abort in debug builds due to validation
+    // In release builds, behavior is undefined
+    // counter.inc(-1.0); // Would abort
+  }
+
+  SUBCASE("invalid metric names") {
+    // These should abort due to validation
+    // auto bad_counter = metric::create_counter("123invalid", "help"); // Would
+    // abort auto bad_gauge = metric::create_gauge("invalid-name", "help");   //
+    // Would abort
+  }
+
+  SUBCASE("invalid label keys") {
+    auto counter_family = metric::create_counter("valid_name", "help");
+
+    // This should abort due to label validation
+    // auto counter = counter_family.create({{"123invalid", "value"}}); // Would
+    // abort
+  }
+}
+
+TEST_CASE("memory management") {
+  SUBCASE("arena allocation in render") {
+    ArenaAllocator arena;
+    auto initial_used = arena.used_bytes();
+
+    auto counter_family = metric::create_counter("memory_test", "Memory test");
+    auto counter = counter_family.create(
+        {{"large_label", "very_long_value_that_takes_space"}});
+    counter.inc(42);
+
+    auto output = metric::render(arena);
+    auto final_used = arena.used_bytes();
+
+    CHECK(output.size() > 0);
+    CHECK(final_used > initial_used); // Arena was used for string allocation
+
+    // All string_views should point to arena memory
+    for (const auto &line : output) {
+      CHECK(line.size() > 0);
+    }
+  }
+
+  SUBCASE("arena reset behavior") {
+    ArenaAllocator arena;
+
+    auto counter_family = metric::create_counter("reset_test", "Reset test");
+    auto counter = counter_family.create({});
+    counter.inc(1);
+
+    // Render multiple times with arena resets
+    for (int i = 0; i < 5; ++i) {
+      auto output = metric::render(arena);
+      CHECK(output.size() > 0);
+      arena.reset(); // Should not affect metric values, only arena memory
+    }
+
+    // Final render should still work
+    auto final_output = metric::render(arena);
+    CHECK(final_output.size() > 0);
+  }
+}