240 lines
8.1 KiB
C++
240 lines
8.1 KiB
C++
#include "process_collector.hpp"
|
|
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#include <dirent.h>
|
|
#include <sys/resource.h>
|
|
#include <unistd.h>
|
|
#include <vector>
|
|
|
|
namespace {
|
|
// Helper function to read the system boot time from /proc/stat.
|
|
// Returns boot time in seconds since epoch, or 0 on error.
|
|
double get_boot_time() {
|
|
FILE *fp = std::fopen("/proc/stat", "r");
|
|
if (!fp) {
|
|
return 0;
|
|
}
|
|
|
|
char line[256];
|
|
double boot_time = 0;
|
|
while (std::fgets(line, sizeof(line), fp)) {
|
|
if (std::strncmp(line, "btime ", 6) == 0) {
|
|
if (std::sscanf(line + 6, "%lf", &boot_time) != 1) {
|
|
boot_time = 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::fclose(fp);
|
|
return boot_time;
|
|
}
|
|
} // namespace
|
|
|
|
ProcessCollector::ProcessCollector()
|
|
: cpu_seconds_total_(metric::create_counter(
|
|
"process_cpu_seconds_total",
|
|
"Total user and system CPU time spent in seconds")
|
|
.create({})),
|
|
resident_memory_bytes_(
|
|
metric::create_gauge("process_resident_memory_bytes",
|
|
"Resident memory size in bytes")
|
|
.create({})),
|
|
virtual_memory_bytes_(metric::create_gauge("process_virtual_memory_bytes",
|
|
"Virtual memory size in bytes")
|
|
.create({})),
|
|
open_fds_(metric::create_gauge("process_open_fds",
|
|
"Number of open file descriptors")
|
|
.create({})),
|
|
max_fds_(metric::create_gauge("process_max_fds",
|
|
"Maximum number of open file descriptors")
|
|
.create({})),
|
|
start_time_seconds_(
|
|
metric::create_gauge(
|
|
"process_start_time_seconds",
|
|
"Start time of the process since unix epoch in seconds")
|
|
.create({})),
|
|
threads_(metric::create_gauge("process_threads",
|
|
"Number of OS threads in this process")
|
|
.create({})),
|
|
context_switches_total_voluntary_(
|
|
metric::create_counter("process_context_switches_total",
|
|
"Total number of context switches")
|
|
.create({{"type", "voluntary"}})),
|
|
context_switches_total_nonvoluntary_(
|
|
metric::create_counter("process_context_switches_total",
|
|
"Total number of context switches")
|
|
.create({{"type", "nonvoluntary"}})),
|
|
page_faults_total_minor_(
|
|
metric::create_counter("process_page_faults_total",
|
|
"Total number of page faults")
|
|
.create({{"type", "minor"}})),
|
|
page_faults_total_major_(
|
|
metric::create_counter("process_page_faults_total",
|
|
"Total number of page faults")
|
|
.create({{"type", "major"}})) {
|
|
// Set the constant max_fds metric.
|
|
struct rlimit rlim;
|
|
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
|
|
max_fds_.set(rlim.rlim_cur);
|
|
}
|
|
|
|
// Perform an initial collection to populate the other metrics and set the
|
|
// initial counter values.
|
|
collect();
|
|
}
|
|
|
|
void ProcessCollector::collect() {
|
|
// --- CPU Time, Memory, and Start Time from /proc/self/stat ---
|
|
FILE *fp = std::fopen("/proc/self/stat", "r");
|
|
if (!fp) {
|
|
return;
|
|
}
|
|
|
|
char buf[2048];
|
|
if (std::fgets(buf, sizeof(buf), fp) == nullptr) {
|
|
std::fclose(fp);
|
|
return;
|
|
}
|
|
std::fclose(fp);
|
|
|
|
// Find the end of the command name, which is in parentheses
|
|
const char *stats_start = std::strrchr(buf, ')');
|
|
if (!stats_start) {
|
|
return;
|
|
}
|
|
stats_start += 2; // Skip the ')' and the space
|
|
|
|
// Tokenize the rest of the string
|
|
std::vector<const char *> stats;
|
|
char *p = const_cast<char *>(stats_start);
|
|
while (*p) {
|
|
stats.push_back(p);
|
|
while (*p && *p != ' ') {
|
|
p++;
|
|
}
|
|
if (*p) {
|
|
*p = '\0';
|
|
p++;
|
|
}
|
|
}
|
|
|
|
// We need at least 24 fields for rss, and also fields 9,11 for page faults
|
|
if (stats.size() < 24) {
|
|
return;
|
|
}
|
|
|
|
long clk_tck = sysconf(_SC_CLK_TCK);
|
|
|
|
// --- Page Faults ---
|
|
unsigned long long minor_faults = std::strtoull(stats[7], nullptr, 10);
|
|
unsigned long long major_faults = std::strtoull(stats[9], nullptr, 10);
|
|
|
|
if (last_minor_faults_ > 0) {
|
|
if (minor_faults > last_minor_faults_) {
|
|
page_faults_total_minor_.inc(minor_faults - last_minor_faults_);
|
|
}
|
|
} else {
|
|
page_faults_total_minor_.inc(minor_faults);
|
|
}
|
|
last_minor_faults_ = minor_faults;
|
|
|
|
if (last_major_faults_ > 0) {
|
|
if (major_faults > last_major_faults_) {
|
|
page_faults_total_major_.inc(major_faults - last_major_faults_);
|
|
}
|
|
} else {
|
|
page_faults_total_major_.inc(major_faults);
|
|
}
|
|
last_major_faults_ = major_faults;
|
|
|
|
// --- CPU Time ---
|
|
unsigned long long utime_ticks = std::strtoull(stats[11], nullptr, 10);
|
|
unsigned long long stime_ticks = std::strtoull(stats[12], nullptr, 10);
|
|
unsigned long long current_total_ticks = utime_ticks + stime_ticks;
|
|
|
|
if (last_total_ticks_ > 0) { // If we have a previous value
|
|
if (current_total_ticks > last_total_ticks_) {
|
|
double delta_seconds =
|
|
(double)(current_total_ticks - last_total_ticks_) / clk_tck;
|
|
cpu_seconds_total_.inc(delta_seconds);
|
|
}
|
|
} else { // First run, initialize the counter
|
|
cpu_seconds_total_.inc((double)current_total_ticks / clk_tck);
|
|
}
|
|
last_total_ticks_ = current_total_ticks;
|
|
|
|
// --- Memory ---
|
|
unsigned long long vsize = std::strtoull(stats[20], nullptr, 10);
|
|
long rss_pages = std::strtol(stats[21], nullptr, 10);
|
|
virtual_memory_bytes_.set(vsize);
|
|
resident_memory_bytes_.set(rss_pages * sysconf(_SC_PAGESIZE));
|
|
|
|
// --- Start Time (only needs to be set once) ---
|
|
if (!start_time_set_) {
|
|
long long start_time_ticks = std::strtoll(stats[19], nullptr, 10);
|
|
double boot_time = get_boot_time();
|
|
if (boot_time > 0) {
|
|
start_time_seconds_.set(boot_time + (double)start_time_ticks / clk_tck);
|
|
start_time_set_ = true;
|
|
}
|
|
}
|
|
|
|
// --- File Descriptors ---
|
|
int fd_count = 0;
|
|
DIR *dp = opendir("/proc/self/fd");
|
|
if (dp) {
|
|
while (readdir(dp) != nullptr) {
|
|
fd_count++;
|
|
}
|
|
closedir(dp);
|
|
// Subtract 3 for '.', '..', and the opendir handle itself
|
|
open_fds_.set(fd_count > 3 ? fd_count - 3 : 0);
|
|
}
|
|
|
|
// --- Parse /proc/self/status for additional metrics ---
|
|
FILE *status_fp = std::fopen("/proc/self/status", "r");
|
|
if (status_fp) {
|
|
char status_line[256];
|
|
while (std::fgets(status_line, sizeof(status_line), status_fp)) {
|
|
if (std::strncmp(status_line, "Threads:\t", 9) == 0) {
|
|
int thread_count;
|
|
if (std::sscanf(status_line + 9, "%d", &thread_count) == 1) {
|
|
threads_.set(thread_count);
|
|
}
|
|
} else if (std::strncmp(status_line, "voluntary_ctxt_switches:\t", 25) ==
|
|
0) {
|
|
unsigned long long voluntary_switches;
|
|
if (std::sscanf(status_line + 25, "%llu", &voluntary_switches) == 1) {
|
|
if (last_voluntary_context_switches_ > 0) {
|
|
if (voluntary_switches > last_voluntary_context_switches_) {
|
|
context_switches_total_voluntary_.inc(
|
|
voluntary_switches - last_voluntary_context_switches_);
|
|
}
|
|
} else {
|
|
context_switches_total_voluntary_.inc(voluntary_switches);
|
|
}
|
|
last_voluntary_context_switches_ = voluntary_switches;
|
|
}
|
|
} else if (std::strncmp(status_line, "nonvoluntary_ctxt_switches:\t",
|
|
29) == 0) {
|
|
unsigned long long nonvoluntary_switches;
|
|
if (std::sscanf(status_line + 29, "%llu", &nonvoluntary_switches) ==
|
|
1) {
|
|
if (last_nonvoluntary_context_switches_ > 0) {
|
|
if (nonvoluntary_switches > last_nonvoluntary_context_switches_) {
|
|
context_switches_total_nonvoluntary_.inc(
|
|
nonvoluntary_switches - last_nonvoluntary_context_switches_);
|
|
}
|
|
} else {
|
|
context_switches_total_nonvoluntary_.inc(nonvoluntary_switches);
|
|
}
|
|
last_nonvoluntary_context_switches_ = nonvoluntary_switches;
|
|
}
|
|
}
|
|
}
|
|
std::fclose(status_fp);
|
|
}
|
|
}
|