Round out process collector
This commit is contained in:
@@ -54,7 +54,26 @@ ProcessCollector::ProcessCollector()
|
|||||||
metric::create_gauge(
|
metric::create_gauge(
|
||||||
"process_start_time_seconds",
|
"process_start_time_seconds",
|
||||||
"Start time of the process since unix epoch in seconds")
|
"Start time of the process since unix epoch in seconds")
|
||||||
.create({})) {
|
.create({})),
|
||||||
|
threads_(metric::create_gauge("process_threads",
|
||||||
|
"Number of OS threads in this process")
|
||||||
|
.create({})),
|
||||||
|
context_switches_total_voluntary_(
|
||||||
|
metric::create_counter("process_context_switches_total",
|
||||||
|
"Total number of context switches")
|
||||||
|
.create({{"type", "voluntary"}})),
|
||||||
|
context_switches_total_nonvoluntary_(
|
||||||
|
metric::create_counter("process_context_switches_total",
|
||||||
|
"Total number of context switches")
|
||||||
|
.create({{"type", "nonvoluntary"}})),
|
||||||
|
page_faults_total_minor_(
|
||||||
|
metric::create_counter("process_page_faults_total",
|
||||||
|
"Total number of page faults")
|
||||||
|
.create({{"type", "minor"}})),
|
||||||
|
page_faults_total_major_(
|
||||||
|
metric::create_counter("process_page_faults_total",
|
||||||
|
"Total number of page faults")
|
||||||
|
.create({{"type", "major"}})) {
|
||||||
// Set the constant max_fds metric.
|
// Set the constant max_fds metric.
|
||||||
struct rlimit rlim;
|
struct rlimit rlim;
|
||||||
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
|
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
|
||||||
@@ -101,13 +120,35 @@ void ProcessCollector::collect() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We need at least 22 fields for starttime, 24 for rss
|
// We need at least 24 fields for rss, and also fields 9,11 for page faults
|
||||||
if (stats.size() < 24) {
|
if (stats.size() < 24) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
long clk_tck = sysconf(_SC_CLK_TCK);
|
long clk_tck = sysconf(_SC_CLK_TCK);
|
||||||
|
|
||||||
|
// --- Page Faults ---
|
||||||
|
unsigned long long minor_faults = std::strtoull(stats[7], nullptr, 10);
|
||||||
|
unsigned long long major_faults = std::strtoull(stats[9], nullptr, 10);
|
||||||
|
|
||||||
|
if (last_minor_faults_ > 0) {
|
||||||
|
if (minor_faults > last_minor_faults_) {
|
||||||
|
page_faults_total_minor_.inc(minor_faults - last_minor_faults_);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
page_faults_total_minor_.inc(minor_faults);
|
||||||
|
}
|
||||||
|
last_minor_faults_ = minor_faults;
|
||||||
|
|
||||||
|
if (last_major_faults_ > 0) {
|
||||||
|
if (major_faults > last_major_faults_) {
|
||||||
|
page_faults_total_major_.inc(major_faults - last_major_faults_);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
page_faults_total_major_.inc(major_faults);
|
||||||
|
}
|
||||||
|
last_major_faults_ = major_faults;
|
||||||
|
|
||||||
// --- CPU Time ---
|
// --- CPU Time ---
|
||||||
unsigned long long utime_ticks = std::strtoull(stats[11], nullptr, 10);
|
unsigned long long utime_ticks = std::strtoull(stats[11], nullptr, 10);
|
||||||
unsigned long long stime_ticks = std::strtoull(stats[12], nullptr, 10);
|
unsigned long long stime_ticks = std::strtoull(stats[12], nullptr, 10);
|
||||||
@@ -151,4 +192,48 @@ void ProcessCollector::collect() {
|
|||||||
// Subtract 3 for '.', '..', and the opendir handle itself
|
// Subtract 3 for '.', '..', and the opendir handle itself
|
||||||
open_fds_.set(fd_count > 3 ? fd_count - 3 : 0);
|
open_fds_.set(fd_count > 3 ? fd_count - 3 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Parse /proc/self/status for additional metrics ---
|
||||||
|
FILE *status_fp = std::fopen("/proc/self/status", "r");
|
||||||
|
if (status_fp) {
|
||||||
|
char status_line[256];
|
||||||
|
while (std::fgets(status_line, sizeof(status_line), status_fp)) {
|
||||||
|
if (std::strncmp(status_line, "Threads:\t", 9) == 0) {
|
||||||
|
int thread_count;
|
||||||
|
if (std::sscanf(status_line + 9, "%d", &thread_count) == 1) {
|
||||||
|
threads_.set(thread_count);
|
||||||
|
}
|
||||||
|
} else if (std::strncmp(status_line, "voluntary_ctxt_switches:\t", 25) ==
|
||||||
|
0) {
|
||||||
|
unsigned long long voluntary_switches;
|
||||||
|
if (std::sscanf(status_line + 25, "%llu", &voluntary_switches) == 1) {
|
||||||
|
if (last_voluntary_context_switches_ > 0) {
|
||||||
|
if (voluntary_switches > last_voluntary_context_switches_) {
|
||||||
|
context_switches_total_voluntary_.inc(
|
||||||
|
voluntary_switches - last_voluntary_context_switches_);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
context_switches_total_voluntary_.inc(voluntary_switches);
|
||||||
|
}
|
||||||
|
last_voluntary_context_switches_ = voluntary_switches;
|
||||||
|
}
|
||||||
|
} else if (std::strncmp(status_line, "nonvoluntary_ctxt_switches:\t",
|
||||||
|
29) == 0) {
|
||||||
|
unsigned long long nonvoluntary_switches;
|
||||||
|
if (std::sscanf(status_line + 29, "%llu", &nonvoluntary_switches) ==
|
||||||
|
1) {
|
||||||
|
if (last_nonvoluntary_context_switches_ > 0) {
|
||||||
|
if (nonvoluntary_switches > last_nonvoluntary_context_switches_) {
|
||||||
|
context_switches_total_nonvoluntary_.inc(
|
||||||
|
nonvoluntary_switches - last_nonvoluntary_context_switches_);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
context_switches_total_nonvoluntary_.inc(nonvoluntary_switches);
|
||||||
|
}
|
||||||
|
last_nonvoluntary_context_switches_ = nonvoluntary_switches;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::fclose(status_fp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,7 +28,20 @@ private:
|
|||||||
metric::Gauge max_fds_;
|
metric::Gauge max_fds_;
|
||||||
metric::Gauge start_time_seconds_;
|
metric::Gauge start_time_seconds_;
|
||||||
|
|
||||||
|
// Additional process metrics from /proc/self/status
|
||||||
|
metric::Gauge threads_;
|
||||||
|
metric::Counter context_switches_total_voluntary_;
|
||||||
|
metric::Counter context_switches_total_nonvoluntary_;
|
||||||
|
|
||||||
|
// Page fault metrics from /proc/self/stat
|
||||||
|
metric::Counter page_faults_total_minor_;
|
||||||
|
metric::Counter page_faults_total_major_;
|
||||||
|
|
||||||
// Last observed values for calculating counter increments
|
// Last observed values for calculating counter increments
|
||||||
unsigned long long last_total_ticks_ = 0;
|
unsigned long long last_total_ticks_ = 0;
|
||||||
|
unsigned long long last_minor_faults_ = 0;
|
||||||
|
unsigned long long last_major_faults_ = 0;
|
||||||
|
unsigned long long last_voluntary_context_switches_ = 0;
|
||||||
|
unsigned long long last_nonvoluntary_context_switches_ = 0;
|
||||||
bool start_time_set_ = false;
|
bool start_time_set_ = false;
|
||||||
};
|
};
|
||||||
Reference in New Issue
Block a user