From a5e134014f4a26578c4fdadb3e235caf3ed6c8b2 Mon Sep 17 00:00:00 2001 From: Matt Harbison <57785103+mharbison72@users.noreply.github.com> Date: Wed, 4 Sep 2024 10:56:01 -0400 Subject: [PATCH] process_collector: fill in most statistics on macOS (#1600) * process_collector: fill in most statistics on macOS Unfortunately, the virtual memory, resident memory, and network stats will require access to undocumented C functions. I was warned off of cgo in IRC because it would then have to be enabled in a bunch of different projects that use this module, but I already was against it because that would break the ability to cross-compile. There is no interface to `dlopen` built into golang. The `github.com/ebitengine/purego` module looks promising (I can cross-compile and call these methods), but I'm currently getting unexpected results. I'll follow up with that separately if I can get it working, but hopefully this stuff is pretty uncontroversial. Tested on macOS 10.14.6 (amd64), macOS 14.6.1 (amd64), and macOS 15.0 (arm64) by spawning `/usr/bin/ulimit -a -S` and `/usr/sbin/lsof -c $my_process` from the test exporter process, and `ps -o lstart,vsize,rss,utime,stime,command` from the shell, and comparing results with the exported metrics. I can't find documentation for `RLIMIT_AS` on macOS (specifically if it's in bytes or pages). It's currently being reported back as `RLIM_INFINITY`, which seems reasonable, because I've come across reports that the value is ignored anyway[1]. The bash 3.2 code for the built-in `ulimit` divides the value reported by `getrusage(2)` by 1024 when printing, as it does for `RLIMIT_DATA`, which is documented as being bytes in `getrusage(2)`. The help for `ulimit` indicates it prints both in kbytes, so it's reasonable to assume this is already in bytes. [1] https://issues.chromium.org/issues/40581251#comment3 Signed-off-by: Matt Harbison * Update prometheus/process_collector_darwin.go Co-authored-by: Ben Kochie Signed-off-by: Matt Harbison <57785103+mharbison72@users.noreply.github.com> --------- Signed-off-by: Matt Harbison Signed-off-by: Matt Harbison <57785103+mharbison72@users.noreply.github.com> Co-authored-by: Ben Kochie Co-authored-by: Bartlomiej Plotka --- prometheus/process_collector_darwin.go | 110 +++++++++++++++++++++++++ prometheus/process_collector_other.go | 4 +- 2 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 prometheus/process_collector_darwin.go diff --git a/prometheus/process_collector_darwin.go b/prometheus/process_collector_darwin.go new file mode 100644 index 0000000..4d9314c --- /dev/null +++ b/prometheus/process_collector_darwin.go @@ -0,0 +1,110 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +import ( + "fmt" + "golang.org/x/sys/unix" + "os" + "syscall" + "time" +) + +func canCollectProcess() bool { + return true +} + +func getSoftLimit(which int) (uint64, error) { + rlimit := syscall.Rlimit{} + + if err := syscall.Getrlimit(which, &rlimit); err != nil { + return 0, err + } + + return rlimit.Cur, nil +} + +func getOpenFileCount() (float64, error) { + // Alternately, the undocumented proc_pidinfo(PROC_PIDLISTFDS) can be used to + // return a list of open fds, but that requires a way to call C APIs. The + // benefits, however, include fewer system calls and not failing when at the + // open file soft limit. + + if dir, err := os.Open("/dev/fd"); err != nil { + return 0.0, err + } else { + defer dir.Close() + + // Avoid ReadDir(), as it calls stat(2) on each descriptor. Not only is + // that info not used, but KQUEUE descriptors fail stat(2), which causes + // the whole method to fail. + if names, err := dir.Readdirnames(0); err != nil { + return 0.0, err + } else { + // Subtract 1 to ignore the open /dev/fd descriptor above. + return float64(len(names) - 1), nil + } + } +} + +func (c *processCollector) processCollect(ch chan<- Metric) { + if procs, err := unix.SysctlKinfoProcSlice("kern.proc.pid", os.Getpid()); err == nil { + if len(procs) == 1 { + startTime := float64(procs[0].Proc.P_starttime.Nano() / 1e9) + ch <- MustNewConstMetric(c.startTime, GaugeValue, startTime) + } else { + err = fmt.Errorf("sysctl() returned %d proc structs (expected 1)", len(procs)) + c.reportError(ch, c.startTime, err) + } + } else { + c.reportError(ch, c.startTime, err) + } + + // The proc structure returned by kern.proc.pid above has an Rusage member, + // but it is not filled in, so it needs to be fetched by getrusage(2). For + // that call, the UTime, STime, and Maxrss members are filled out, but not + // Ixrss, Idrss, or Isrss for the memory usage. Memory stats will require + // access to the C API to call task_info(TASK_BASIC_INFO). + rusage := unix.Rusage{} + + if err := unix.Getrusage(syscall.RUSAGE_SELF, &rusage); err == nil { + cpuTime := time.Duration(rusage.Stime.Nano() + rusage.Utime.Nano()).Seconds() + ch <- MustNewConstMetric(c.cpuTotal, CounterValue, cpuTime) + } else { + c.reportError(ch, c.cpuTotal, err) + } + + // TODO: publish c.vsize and c.rss values + + if fds, err := getOpenFileCount(); err == nil { + ch <- MustNewConstMetric(c.openFDs, GaugeValue, fds) + } else { + c.reportError(ch, c.openFDs, err) + } + + if openFiles, err := getSoftLimit(syscall.RLIMIT_NOFILE); err == nil { + ch <- MustNewConstMetric(c.maxFDs, GaugeValue, float64(openFiles)) + } else { + c.reportError(ch, c.maxFDs, err) + } + + if addressSpace, err := getSoftLimit(syscall.RLIMIT_AS); err == nil { + ch <- MustNewConstMetric(c.maxVsize, GaugeValue, float64(addressSpace)) + } else { + c.reportError(ch, c.maxVsize, err) + } + + // TODO: socket(PF_SYSTEM) to fetch "com.apple.network.statistics" might + // be able to get the per-process network send/receive counts. +} diff --git a/prometheus/process_collector_other.go b/prometheus/process_collector_other.go index 14d56d2..a05029c 100644 --- a/prometheus/process_collector_other.go +++ b/prometheus/process_collector_other.go @@ -11,8 +11,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !windows && !js && !wasip1 -// +build !windows,!js,!wasip1 +//go:build !windows && !js && !wasip1 && !darwin +// +build !windows,!js,!wasip1,!darwin package prometheus