Add process collector

This commit is contained in:
2025-09-03 14:38:10 -04:00
parent 2fa5b3e960
commit f0916d8269
6 changed files with 243 additions and 2 deletions

View File

@@ -1,10 +1,10 @@
#include "config.hpp"
#include "connection.hpp"
#include "connection_handler.hpp"
#include "http_handler.hpp"
#include "metric.hpp"
#include "perfetto_categories.hpp"
#include "process_collector.hpp"
#include "server.hpp"
#include <atomic>
#include <csignal>
#include <cstring>
#include <fcntl.h>
@@ -176,6 +176,9 @@ int main(int argc, char *argv[]) {
perfetto::TrackEvent::Register();
#endif
// Register the process collector for default metrics.
metric::register_collector(std::make_shared<ProcessCollector>());
std::string config_file = "config.toml";
// Parse command line arguments

View File

@@ -12,6 +12,7 @@
#include <cstring>
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <string>
@@ -451,6 +452,12 @@ struct Metric {
return *threadArenas;
}
static auto &get_collectors() {
using CollectorRegistry = std::vector<std::shared_ptr<Collector>>;
static CollectorRegistry *collectors = new CollectorRegistry();
return *collectors;
}
// Thread cleanup for per-family thread-local storage
struct ThreadInit {
ArenaAllocator arena;
@@ -1697,6 +1704,11 @@ std::span<std::string_view> render(ArenaAllocator &arena) {
// prevents races during static member initialization at program startup
std::unique_lock<std::mutex> _{Metric::mutex};
// Call all registered collectors to update their metrics
for (const auto &collector : Metric::get_collectors()) {
collector->collect();
}
// Phase 1: Compile - generate static text and instructions
// Safe: cached_plan access/initialization protected by mutex above
if (!Metric::cached_plan || Metric::cached_plan->registration_version !=
@@ -1800,4 +1812,10 @@ void reset_metrics_for_testing() {
// when threads exit naturally
}
void register_collector(std::shared_ptr<Collector> collector) {
std::unique_lock<std::mutex> _{Metric::mutex};
++Metric::registration_version;
Metric::get_collectors().push_back(std::move(collector));
}
} // namespace metric

View File

@@ -45,6 +45,7 @@
#include <functional>
#include <initializer_list>
#include <memory>
#include <span>
#include <type_traits>
#include <vector>
@@ -231,6 +232,36 @@ bool is_valid_label_value(std::string_view value);
// when no metric objects are in use and no concurrent render() calls.
void reset_metrics_for_testing();
/**
* @brief Interface for a custom collector that can be registered with the
* metrics system.
*
* This is used for complex metric gathering, such as reading from /proc, where
* multiple metrics need to be updated from a single data source.
*/
struct Collector {
/**
* @brief Virtual destructor.
*/
virtual ~Collector() = default;
/**
* @brief Called by the metrics system to update the metrics this collector is
* responsible for.
*/
virtual void collect() = 0;
};
/**
* @brief Register a collector with the metrics system.
*
* The system will hold a shared_ptr to the collector and call its collect()
* method during each metric rendering.
*
* @param collector A shared_ptr to the collector to be registered.
*/
void register_collector(std::shared_ptr<Collector> collector);
// Note: Histograms do not support callbacks due to their multi-value nature
// (buckets + sum + count). Use static histogram metrics only.

154
src/process_collector.cpp Normal file
View File

@@ -0,0 +1,154 @@
#include "process_collector.hpp"
#include <cstdio>
#include <cstring>
#include <dirent.h>
#include <sys/resource.h>
#include <unistd.h>
#include <vector>
namespace {
// Helper function to read the system boot time from /proc/stat.
// Returns boot time in seconds since epoch, or 0 on error.
double get_boot_time() {
FILE *fp = std::fopen("/proc/stat", "r");
if (!fp) {
return 0;
}
char line[256];
double boot_time = 0;
while (std::fgets(line, sizeof(line), fp)) {
if (std::strncmp(line, "btime ", 6) == 0) {
if (std::sscanf(line + 6, "%lf", &boot_time) != 1) {
boot_time = 0;
}
break;
}
}
std::fclose(fp);
return boot_time;
}
} // namespace
ProcessCollector::ProcessCollector()
: cpu_seconds_total_(metric::create_counter(
"process_cpu_seconds_total",
"Total user and system CPU time spent in seconds")
.create({})),
resident_memory_bytes_(
metric::create_gauge("process_resident_memory_bytes",
"Resident memory size in bytes")
.create({})),
virtual_memory_bytes_(metric::create_gauge("process_virtual_memory_bytes",
"Virtual memory size in bytes")
.create({})),
open_fds_(metric::create_gauge("process_open_fds",
"Number of open file descriptors")
.create({})),
max_fds_(metric::create_gauge("process_max_fds",
"Maximum number of open file descriptors")
.create({})),
start_time_seconds_(
metric::create_gauge(
"process_start_time_seconds",
"Start time of the process since unix epoch in seconds")
.create({})) {
// Set the constant max_fds metric.
struct rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
max_fds_.set(rlim.rlim_cur);
}
// Perform an initial collection to populate the other metrics and set the
// initial counter values.
collect();
}
void ProcessCollector::collect() {
// --- CPU Time, Memory, and Start Time from /proc/self/stat ---
FILE *fp = std::fopen("/proc/self/stat", "r");
if (!fp) {
return;
}
char buf[2048];
if (std::fgets(buf, sizeof(buf), fp) == nullptr) {
std::fclose(fp);
return;
}
std::fclose(fp);
// Find the end of the command name, which is in parentheses
const char *stats_start = std::strrchr(buf, ')');
if (!stats_start) {
return;
}
stats_start += 2; // Skip the ')' and the space
// Tokenize the rest of the string
std::vector<const char *> stats;
char *p = const_cast<char *>(stats_start);
while (*p) {
stats.push_back(p);
while (*p && *p != ' ') {
p++;
}
if (*p) {
*p = '\0';
p++;
}
}
// We need at least 22 fields for starttime, 24 for rss
if (stats.size() < 24) {
return;
}
long clk_tck = sysconf(_SC_CLK_TCK);
// --- CPU Time ---
unsigned long long utime_ticks = std::strtoull(stats[11], nullptr, 10);
unsigned long long stime_ticks = std::strtoull(stats[12], nullptr, 10);
unsigned long long current_total_ticks = utime_ticks + stime_ticks;
if (last_total_ticks_ > 0) { // If we have a previous value
if (current_total_ticks > last_total_ticks_) {
double delta_seconds =
(double)(current_total_ticks - last_total_ticks_) / clk_tck;
cpu_seconds_total_.inc(delta_seconds);
}
} else { // First run, initialize the counter
cpu_seconds_total_.inc((double)current_total_ticks / clk_tck);
}
last_total_ticks_ = current_total_ticks;
// --- Memory ---
unsigned long long vsize = std::strtoull(stats[20], nullptr, 10);
long rss_pages = std::strtol(stats[21], nullptr, 10);
virtual_memory_bytes_.set(vsize);
resident_memory_bytes_.set(rss_pages * sysconf(_SC_PAGESIZE));
// --- Start Time (only needs to be set once) ---
if (!start_time_set_) {
long long start_time_ticks = std::strtoll(stats[19], nullptr, 10);
double boot_time = get_boot_time();
if (boot_time > 0) {
start_time_seconds_.set(boot_time + (double)start_time_ticks / clk_tck);
start_time_set_ = true;
}
}
// --- File Descriptors ---
int fd_count = 0;
DIR *dp = opendir("/proc/self/fd");
if (dp) {
while (readdir(dp) != nullptr) {
fd_count++;
}
closedir(dp);
// Subtract 3 for '.', '..', and the opendir handle itself
open_fds_.set(fd_count > 3 ? fd_count - 3 : 0);
}
}

34
src/process_collector.hpp Normal file
View File

@@ -0,0 +1,34 @@
#pragma once
#include "metric.hpp"
/**
* @brief A metric collector for standard process-level statistics.
*
* Gathers metrics like CPU usage, memory, and file descriptors by reading
* files from the /proc filesystem.
*/
struct ProcessCollector : public metric::Collector {
/**
* @brief Constructs the collector and initializes the process metrics.
*/
ProcessCollector();
/**
* @brief Called by the metrics system to update the process metrics.
*/
void collect() override;
private:
// Metrics for process statistics
metric::Counter cpu_seconds_total_;
metric::Gauge resident_memory_bytes_;
metric::Gauge virtual_memory_bytes_;
metric::Gauge open_fds_;
metric::Gauge max_fds_;
metric::Gauge start_time_seconds_;
// Last observed values for calculating counter increments
unsigned long long last_total_ticks_ = 0;
bool start_time_set_ = false;
};