forked from mirror/client_golang
Merge pull request #594 from prometheus/beorn7/promhttp
Add an error counter for internal errors in the HTTP handler
This commit is contained in:
commit
7ed96b33bd
|
@ -84,10 +84,32 @@ func Handler() http.Handler {
|
||||||
// instrumentation. Use the InstrumentMetricHandler function to apply the same
|
// instrumentation. Use the InstrumentMetricHandler function to apply the same
|
||||||
// kind of instrumentation as it is used by the Handler function.
|
// kind of instrumentation as it is used by the Handler function.
|
||||||
func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
||||||
var inFlightSem chan struct{}
|
var (
|
||||||
|
inFlightSem chan struct{}
|
||||||
|
errCnt = prometheus.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "promhttp_metric_handler_errors_total",
|
||||||
|
Help: "Total number of internal errors encountered by the promhttp metric handler.",
|
||||||
|
},
|
||||||
|
[]string{"cause"},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if opts.MaxRequestsInFlight > 0 {
|
if opts.MaxRequestsInFlight > 0 {
|
||||||
inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
|
inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
|
||||||
}
|
}
|
||||||
|
if opts.Registry != nil {
|
||||||
|
// Initialize all possibilites that can occur below.
|
||||||
|
errCnt.WithLabelValues("gathering")
|
||||||
|
errCnt.WithLabelValues("encoding")
|
||||||
|
if err := opts.Registry.Register(errCnt); err != nil {
|
||||||
|
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||||
|
errCnt = are.ExistingCollector.(*prometheus.CounterVec)
|
||||||
|
} else {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
|
h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
|
||||||
if inFlightSem != nil {
|
if inFlightSem != nil {
|
||||||
|
@ -106,6 +128,7 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
||||||
if opts.ErrorLog != nil {
|
if opts.ErrorLog != nil {
|
||||||
opts.ErrorLog.Println("error gathering metrics:", err)
|
opts.ErrorLog.Println("error gathering metrics:", err)
|
||||||
}
|
}
|
||||||
|
errCnt.WithLabelValues("gathering").Inc()
|
||||||
switch opts.ErrorHandling {
|
switch opts.ErrorHandling {
|
||||||
case PanicOnError:
|
case PanicOnError:
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -146,6 +169,7 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
||||||
if opts.ErrorLog != nil {
|
if opts.ErrorLog != nil {
|
||||||
opts.ErrorLog.Println("error encoding and sending metric family:", err)
|
opts.ErrorLog.Println("error encoding and sending metric family:", err)
|
||||||
}
|
}
|
||||||
|
errCnt.WithLabelValues("encoding").Inc()
|
||||||
switch opts.ErrorHandling {
|
switch opts.ErrorHandling {
|
||||||
case PanicOnError:
|
case PanicOnError:
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -236,9 +260,12 @@ const (
|
||||||
// Ignore errors and try to serve as many metrics as possible. However,
|
// Ignore errors and try to serve as many metrics as possible. However,
|
||||||
// if no metrics can be served, serve an HTTP status code 500 and the
|
// if no metrics can be served, serve an HTTP status code 500 and the
|
||||||
// last error message in the body. Only use this in deliberate "best
|
// last error message in the body. Only use this in deliberate "best
|
||||||
// effort" metrics collection scenarios. It is recommended to at least
|
// effort" metrics collection scenarios. In this case, it is highly
|
||||||
// log errors (by providing an ErrorLog in HandlerOpts) to not mask
|
// recommended to provide other means of detecting errors: By setting an
|
||||||
// errors completely.
|
// ErrorLog in HandlerOpts, the errors are logged. By providing a
|
||||||
|
// Registry in HandlerOpts, the exposed metrics include an error counter
|
||||||
|
// "promhttp_metric_handler_errors_total", which can be used for
|
||||||
|
// alerts.
|
||||||
ContinueOnError
|
ContinueOnError
|
||||||
// Panic upon the first error encountered (useful for "crash only" apps).
|
// Panic upon the first error encountered (useful for "crash only" apps).
|
||||||
PanicOnError
|
PanicOnError
|
||||||
|
@ -261,6 +288,18 @@ type HandlerOpts struct {
|
||||||
// logged regardless of the configured ErrorHandling provided ErrorLog
|
// logged regardless of the configured ErrorHandling provided ErrorLog
|
||||||
// is not nil.
|
// is not nil.
|
||||||
ErrorHandling HandlerErrorHandling
|
ErrorHandling HandlerErrorHandling
|
||||||
|
// If Registry is not nil, it is used to register a metric
|
||||||
|
// "promhttp_metric_handler_errors_total", partitioned by "cause". A
|
||||||
|
// failed registration causes a panic. Note that this error counter is
|
||||||
|
// different from the instrumentation you get from the various
|
||||||
|
// InstrumentHandler... helpers. It counts errors that don't necessarily
|
||||||
|
// result in a non-2xx HTTP status code. There are two typical cases:
|
||||||
|
// (1) Encoding errors that only happen after streaming of the HTTP body
|
||||||
|
// has already started (and the status code 200 has been sent). This
|
||||||
|
// should only happen with custom collectors. (2) Collection errors with
|
||||||
|
// no effect on the HTTP status code because ErrorHandling is set to
|
||||||
|
// ContinueOnError.
|
||||||
|
Registry prometheus.Registerer
|
||||||
// If DisableCompression is true, the handler will never compress the
|
// If DisableCompression is true, the handler will never compress the
|
||||||
// response, even if requested by the client.
|
// response, even if requested by the client.
|
||||||
DisableCompression bool
|
DisableCompression bool
|
||||||
|
|
|
@ -59,7 +59,8 @@ func (b blockingCollector) Collect(ch chan<- prometheus.Metric) {
|
||||||
func TestHandlerErrorHandling(t *testing.T) {
|
func TestHandlerErrorHandling(t *testing.T) {
|
||||||
|
|
||||||
// Create a registry that collects a MetricFamily with two elements,
|
// Create a registry that collects a MetricFamily with two elements,
|
||||||
// another with one, and reports an error.
|
// another with one, and reports an error. Further down, we'll use the
|
||||||
|
// same registry in the HandlerOpts.
|
||||||
reg := prometheus.NewRegistry()
|
reg := prometheus.NewRegistry()
|
||||||
|
|
||||||
cnt := prometheus.NewCounter(prometheus.CounterOpts{
|
cnt := prometheus.NewCounter(prometheus.CounterOpts{
|
||||||
|
@ -92,14 +93,17 @@ func TestHandlerErrorHandling(t *testing.T) {
|
||||||
errorHandler := HandlerFor(reg, HandlerOpts{
|
errorHandler := HandlerFor(reg, HandlerOpts{
|
||||||
ErrorLog: logger,
|
ErrorLog: logger,
|
||||||
ErrorHandling: HTTPErrorOnError,
|
ErrorHandling: HTTPErrorOnError,
|
||||||
|
Registry: reg,
|
||||||
})
|
})
|
||||||
continueHandler := HandlerFor(reg, HandlerOpts{
|
continueHandler := HandlerFor(reg, HandlerOpts{
|
||||||
ErrorLog: logger,
|
ErrorLog: logger,
|
||||||
ErrorHandling: ContinueOnError,
|
ErrorHandling: ContinueOnError,
|
||||||
|
Registry: reg,
|
||||||
})
|
})
|
||||||
panicHandler := HandlerFor(reg, HandlerOpts{
|
panicHandler := HandlerFor(reg, HandlerOpts{
|
||||||
ErrorLog: logger,
|
ErrorLog: logger,
|
||||||
ErrorHandling: PanicOnError,
|
ErrorHandling: PanicOnError,
|
||||||
|
Registry: reg,
|
||||||
})
|
})
|
||||||
wantMsg := `error gathering metrics: error collecting metric Desc{fqName: "invalid_metric", help: "not helpful", constLabels: {}, variableLabels: []}: collect error
|
wantMsg := `error gathering metrics: error collecting metric Desc{fqName: "invalid_metric", help: "not helpful", constLabels: {}, variableLabels: []}: collect error
|
||||||
`
|
`
|
||||||
|
@ -107,10 +111,29 @@ func TestHandlerErrorHandling(t *testing.T) {
|
||||||
|
|
||||||
error collecting metric Desc{fqName: "invalid_metric", help: "not helpful", constLabels: {}, variableLabels: []}: collect error
|
error collecting metric Desc{fqName: "invalid_metric", help: "not helpful", constLabels: {}, variableLabels: []}: collect error
|
||||||
`
|
`
|
||||||
wantOKBody := `# HELP name docstring
|
wantOKBody1 := `# HELP name docstring
|
||||||
# TYPE name counter
|
# TYPE name counter
|
||||||
name{constname="constvalue",labelname="val1"} 1
|
name{constname="constvalue",labelname="val1"} 1
|
||||||
name{constname="constvalue",labelname="val2"} 1
|
name{constname="constvalue",labelname="val2"} 1
|
||||||
|
# HELP promhttp_metric_handler_errors_total Total number of internal errors encountered by the promhttp metric handler.
|
||||||
|
# TYPE promhttp_metric_handler_errors_total counter
|
||||||
|
promhttp_metric_handler_errors_total{cause="encoding"} 0
|
||||||
|
promhttp_metric_handler_errors_total{cause="gathering"} 1
|
||||||
|
# HELP the_count Ah-ah-ah! Thunder and lightning!
|
||||||
|
# TYPE the_count counter
|
||||||
|
the_count 0
|
||||||
|
`
|
||||||
|
// It might happen that counting the gathering error makes it to the
|
||||||
|
// promhttp_metric_handler_errors_total counter before it is gathered
|
||||||
|
// itself. Thus, we have to bodies that are acceptable for the test.
|
||||||
|
wantOKBody2 := `# HELP name docstring
|
||||||
|
# TYPE name counter
|
||||||
|
name{constname="constvalue",labelname="val1"} 1
|
||||||
|
name{constname="constvalue",labelname="val2"} 1
|
||||||
|
# HELP promhttp_metric_handler_errors_total Total number of internal errors encountered by the promhttp metric handler.
|
||||||
|
# TYPE promhttp_metric_handler_errors_total counter
|
||||||
|
promhttp_metric_handler_errors_total{cause="encoding"} 0
|
||||||
|
promhttp_metric_handler_errors_total{cause="gathering"} 2
|
||||||
# HELP the_count Ah-ah-ah! Thunder and lightning!
|
# HELP the_count Ah-ah-ah! Thunder and lightning!
|
||||||
# TYPE the_count counter
|
# TYPE the_count counter
|
||||||
the_count 0
|
the_count 0
|
||||||
|
@ -137,8 +160,8 @@ the_count 0
|
||||||
if got := logBuf.String(); got != wantMsg {
|
if got := logBuf.String(); got != wantMsg {
|
||||||
t.Errorf("got log message %q, want %q", got, wantMsg)
|
t.Errorf("got log message %q, want %q", got, wantMsg)
|
||||||
}
|
}
|
||||||
if got := writer.Body.String(); got != wantOKBody {
|
if got := writer.Body.String(); got != wantOKBody1 && got != wantOKBody2 {
|
||||||
t.Errorf("got body %q, want %q", got, wantOKBody)
|
t.Errorf("got body %q, want either %q or %q", got, wantOKBody1, wantOKBody2)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
|
|
Loading…
Reference in New Issue