#include "process_collector.hpp" #include #include #include #include #include #include namespace { // Helper function to read the system boot time from /proc/stat. // Returns boot time in seconds since epoch, or 0 on error. double get_boot_time() { FILE *fp = std::fopen("/proc/stat", "r"); if (!fp) { return 0; } char line[256]; double boot_time = 0; while (std::fgets(line, sizeof(line), fp)) { if (std::strncmp(line, "btime ", 6) == 0) { if (std::sscanf(line + 6, "%lf", &boot_time) != 1) { boot_time = 0; } break; } } std::fclose(fp); return boot_time; } } // namespace ProcessCollector::ProcessCollector() : cpu_seconds_total_(metric::create_counter( "process_cpu_seconds_total", "Total user and system CPU time spent in seconds") .create({})), resident_memory_bytes_( metric::create_gauge("process_resident_memory_bytes", "Resident memory size in bytes") .create({})), virtual_memory_bytes_(metric::create_gauge("process_virtual_memory_bytes", "Virtual memory size in bytes") .create({})), open_fds_(metric::create_gauge("process_open_fds", "Number of open file descriptors") .create({})), max_fds_(metric::create_gauge("process_max_fds", "Maximum number of open file descriptors") .create({})), start_time_seconds_( metric::create_gauge( "process_start_time_seconds", "Start time of the process since unix epoch in seconds") .create({})), threads_(metric::create_gauge("process_threads", "Number of OS threads in this process") .create({})), context_switches_total_voluntary_( metric::create_counter("process_context_switches_total", "Total number of context switches") .create({{"type", "voluntary"}})), context_switches_total_nonvoluntary_( metric::create_counter("process_context_switches_total", "Total number of context switches") .create({{"type", "nonvoluntary"}})), page_faults_total_minor_( metric::create_counter("process_page_faults_total", "Total number of page faults") .create({{"type", "minor"}})), page_faults_total_major_( metric::create_counter("process_page_faults_total", "Total number of page faults") .create({{"type", "major"}})) { // Set the constant max_fds metric. struct rlimit rlim; if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) { max_fds_.set(rlim.rlim_cur); } // Perform an initial collection to populate the other metrics and set the // initial counter values. collect(); } void ProcessCollector::collect() { // --- CPU Time, Memory, and Start Time from /proc/self/stat --- FILE *fp = std::fopen("/proc/self/stat", "r"); if (!fp) { return; } char buf[2048]; if (std::fgets(buf, sizeof(buf), fp) == nullptr) { std::fclose(fp); return; } std::fclose(fp); // Find the end of the command name, which is in parentheses const char *stats_start = std::strrchr(buf, ')'); if (!stats_start) { return; } stats_start += 2; // Skip the ')' and the space // Tokenize the rest of the string std::vector stats; char *p = const_cast(stats_start); while (*p) { stats.push_back(p); while (*p && *p != ' ') { p++; } if (*p) { *p = '\0'; p++; } } // We need at least 24 fields for rss, and also fields 9,11 for page faults if (stats.size() < 24) { return; } long clk_tck = sysconf(_SC_CLK_TCK); // --- Page Faults --- unsigned long long minor_faults = std::strtoull(stats[7], nullptr, 10); unsigned long long major_faults = std::strtoull(stats[9], nullptr, 10); if (last_minor_faults_ > 0) { if (minor_faults > last_minor_faults_) { page_faults_total_minor_.inc(minor_faults - last_minor_faults_); } } else { page_faults_total_minor_.inc(minor_faults); } last_minor_faults_ = minor_faults; if (last_major_faults_ > 0) { if (major_faults > last_major_faults_) { page_faults_total_major_.inc(major_faults - last_major_faults_); } } else { page_faults_total_major_.inc(major_faults); } last_major_faults_ = major_faults; // --- CPU Time --- unsigned long long utime_ticks = std::strtoull(stats[11], nullptr, 10); unsigned long long stime_ticks = std::strtoull(stats[12], nullptr, 10); unsigned long long current_total_ticks = utime_ticks + stime_ticks; if (last_total_ticks_ > 0) { // If we have a previous value if (current_total_ticks > last_total_ticks_) { double delta_seconds = (double)(current_total_ticks - last_total_ticks_) / clk_tck; cpu_seconds_total_.inc(delta_seconds); } } else { // First run, initialize the counter cpu_seconds_total_.inc((double)current_total_ticks / clk_tck); } last_total_ticks_ = current_total_ticks; // --- Memory --- unsigned long long vsize = std::strtoull(stats[20], nullptr, 10); long rss_pages = std::strtol(stats[21], nullptr, 10); virtual_memory_bytes_.set(vsize); resident_memory_bytes_.set(rss_pages * sysconf(_SC_PAGESIZE)); // --- Start Time (only needs to be set once) --- if (!start_time_set_) { long long start_time_ticks = std::strtoll(stats[19], nullptr, 10); double boot_time = get_boot_time(); if (boot_time > 0) { start_time_seconds_.set(boot_time + (double)start_time_ticks / clk_tck); start_time_set_ = true; } } // --- File Descriptors --- int fd_count = 0; DIR *dp = opendir("/proc/self/fd"); if (dp) { while (readdir(dp) != nullptr) { fd_count++; } closedir(dp); // Subtract 3 for '.', '..', and the opendir handle itself open_fds_.set(fd_count > 3 ? fd_count - 3 : 0); } // --- Parse /proc/self/status for additional metrics --- FILE *status_fp = std::fopen("/proc/self/status", "r"); if (status_fp) { char status_line[256]; while (std::fgets(status_line, sizeof(status_line), status_fp)) { if (std::strncmp(status_line, "Threads:\t", 9) == 0) { int thread_count; if (std::sscanf(status_line + 9, "%d", &thread_count) == 1) { threads_.set(thread_count); } } else if (std::strncmp(status_line, "voluntary_ctxt_switches:\t", 25) == 0) { unsigned long long voluntary_switches; if (std::sscanf(status_line + 25, "%llu", &voluntary_switches) == 1) { if (last_voluntary_context_switches_ > 0) { if (voluntary_switches > last_voluntary_context_switches_) { context_switches_total_voluntary_.inc( voluntary_switches - last_voluntary_context_switches_); } } else { context_switches_total_voluntary_.inc(voluntary_switches); } last_voluntary_context_switches_ = voluntary_switches; } } else if (std::strncmp(status_line, "nonvoluntary_ctxt_switches:\t", 29) == 0) { unsigned long long nonvoluntary_switches; if (std::sscanf(status_line + 29, "%llu", &nonvoluntary_switches) == 1) { if (last_nonvoluntary_context_switches_ > 0) { if (nonvoluntary_switches > last_nonvoluntary_context_switches_) { context_switches_total_nonvoluntary_.inc( nonvoluntary_switches - last_nonvoluntary_context_switches_); } } else { context_switches_total_nonvoluntary_.inc(nonvoluntary_switches); } last_nonvoluntary_context_switches_ = nonvoluntary_switches; } } } std::fclose(status_fp); } }