From 7faf9e76efa0d84fd537cdb9c2372ce4c3df14f1 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Tue, 10 Feb 2015 16:19:27 -0500 Subject: [PATCH 1/2] Handle process collector errors gracefully As it is expected that the process collector can fail under certain conditions (proc information for a process only readable by root or other user for example) and as there is currently no option to configure the error behavior of the client, this change reverts the error reporting introduced in 159e96f. This effectively means that errors are simply ignored and there won't be any samples for the process_* metrics in case of an error. Once a user can control how to behave in case of errors returned by collectors, this change should probably be reverted. --- prometheus/process_collector_procfs.go | 31 ++++---------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/prometheus/process_collector_procfs.go b/prometheus/process_collector_procfs.go index fab1f03..a15a75f 100644 --- a/prometheus/process_collector_procfs.go +++ b/prometheus/process_collector_procfs.go @@ -27,23 +27,15 @@ func processCollectSupported() bool { func (c *processCollector) processCollect(ch chan<- Metric) { pid, err := c.pidFn() if err != nil { - c.reportCollectErrors(ch, err) return } p, err := procfs.NewProc(pid) if err != nil { - c.reportCollectErrors(ch, err) return } - if stat, err := p.NewStat(); err != nil { - // Report collect errors for metrics depending on stat. - ch <- NewInvalidMetric(c.vsize.Desc(), err) - ch <- NewInvalidMetric(c.rss.Desc(), err) - ch <- NewInvalidMetric(c.startTime.Desc(), err) - ch <- NewInvalidMetric(c.cpuTotal.Desc(), err) - } else { + if stat, err := p.NewStat(); err == nil { c.cpuTotal.Set(stat.CPUTime()) ch <- c.cpuTotal c.vsize.Set(float64(stat.VirtualMemory())) @@ -51,34 +43,19 @@ func (c *processCollector) processCollect(ch chan<- Metric) { c.rss.Set(float64(stat.ResidentMemory())) ch <- c.rss - if startTime, err := stat.StartTime(); err != nil { - ch <- NewInvalidMetric(c.startTime.Desc(), err) - } else { + if startTime, err := stat.StartTime(); err == nil { c.startTime.Set(startTime) ch <- c.startTime } } - if fds, err := p.FileDescriptorsLen(); err != nil { - ch <- NewInvalidMetric(c.openFDs.Desc(), err) - } else { + if fds, err := p.FileDescriptorsLen(); err == nil { c.openFDs.Set(float64(fds)) ch <- c.openFDs } - if limits, err := p.NewLimits(); err != nil { - ch <- NewInvalidMetric(c.maxFDs.Desc(), err) - } else { + if limits, err := p.NewLimits(); err == nil { c.maxFDs.Set(float64(limits.OpenFiles)) ch <- c.maxFDs } } - -func (c *processCollector) reportCollectErrors(ch chan<- Metric, err error) { - ch <- NewInvalidMetric(c.cpuTotal.Desc(), err) - ch <- NewInvalidMetric(c.openFDs.Desc(), err) - ch <- NewInvalidMetric(c.maxFDs.Desc(), err) - ch <- NewInvalidMetric(c.vsize.Desc(), err) - ch <- NewInvalidMetric(c.rss.Desc(), err) - ch <- NewInvalidMetric(c.startTime.Desc(), err) -} From 98061eecab1aef966a79e2cfd1c2f70d1c1df395 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Wed, 11 Feb 2015 14:58:35 -0500 Subject: [PATCH 2/2] Add todo comment to bring back error reporting --- prometheus/process_collector_procfs.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/prometheus/process_collector_procfs.go b/prometheus/process_collector_procfs.go index a15a75f..5a09ded 100644 --- a/prometheus/process_collector_procfs.go +++ b/prometheus/process_collector_procfs.go @@ -24,6 +24,8 @@ func processCollectSupported() bool { return false } +// TODO(ts): Bring back error reporting by reverting 7faf9e7 as soon as the +// client allows users to configure the error behavior. func (c *processCollector) processCollect(ch chan<- Metric) { pid, err := c.pidFn() if err != nil {