Add an error counter for internal errors in the HTTP handler
The doc comments explain the rationale in a quite detailed way. Fixes #543 and #542 Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
parent
ea8c935f20
commit
3fa9fca2cb
|
@ -84,10 +84,32 @@ func Handler() http.Handler {
|
|||
// instrumentation. Use the InstrumentMetricHandler function to apply the same
|
||||
// kind of instrumentation as it is used by the Handler function.
|
||||
func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
||||
var inFlightSem chan struct{}
|
||||
var (
|
||||
inFlightSem chan struct{}
|
||||
errCnt = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "promhttp_metric_handler_errors_total",
|
||||
Help: "Total number of internal errors encountered by the promhttp metric handler.",
|
||||
},
|
||||
[]string{"cause"},
|
||||
)
|
||||
)
|
||||
|
||||
if opts.MaxRequestsInFlight > 0 {
|
||||
inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
|
||||
}
|
||||
if opts.Registry != nil {
|
||||
// Initialize all possibilites that can occur below.
|
||||
errCnt.WithLabelValues("gathering")
|
||||
errCnt.WithLabelValues("encoding")
|
||||
if err := opts.Registry.Register(errCnt); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
errCnt = are.ExistingCollector.(*prometheus.CounterVec)
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
|
||||
if inFlightSem != nil {
|
||||
|
@ -106,6 +128,7 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
|||
if opts.ErrorLog != nil {
|
||||
opts.ErrorLog.Println("error gathering metrics:", err)
|
||||
}
|
||||
errCnt.WithLabelValues("gathering").Inc()
|
||||
switch opts.ErrorHandling {
|
||||
case PanicOnError:
|
||||
panic(err)
|
||||
|
@ -146,6 +169,7 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
|
|||
if opts.ErrorLog != nil {
|
||||
opts.ErrorLog.Println("error encoding and sending metric family:", err)
|
||||
}
|
||||
errCnt.WithLabelValues("encoding").Inc()
|
||||
switch opts.ErrorHandling {
|
||||
case PanicOnError:
|
||||
panic(err)
|
||||
|
@ -236,9 +260,12 @@ const (
|
|||
// Ignore errors and try to serve as many metrics as possible. However,
|
||||
// if no metrics can be served, serve an HTTP status code 500 and the
|
||||
// last error message in the body. Only use this in deliberate "best
|
||||
// effort" metrics collection scenarios. It is recommended to at least
|
||||
// log errors (by providing an ErrorLog in HandlerOpts) to not mask
|
||||
// errors completely.
|
||||
// effort" metrics collection scenarios. In this case, it is highly
|
||||
// recommended to provide other means of detecting errors: By setting an
|
||||
// ErrorLog in HandlerOpts, the errors are logged. By providing a
|
||||
// Registry in HandlerOpts, the exposed metrics include an error counter
|
||||
// "promhttp_metric_handler_errors_total", which can be used for
|
||||
// alerts.
|
||||
ContinueOnError
|
||||
// Panic upon the first error encountered (useful for "crash only" apps).
|
||||
PanicOnError
|
||||
|
@ -261,6 +288,18 @@ type HandlerOpts struct {
|
|||
// logged regardless of the configured ErrorHandling provided ErrorLog
|
||||
// is not nil.
|
||||
ErrorHandling HandlerErrorHandling
|
||||
// If Registry is not nil, it is used to register a metric
|
||||
// "promhttp_metric_handler_errors_total", partitioned by "cause". A
|
||||
// failed registration causes a panic. Note that this error counter is
|
||||
// different from the instrumentation you get from the various
|
||||
// InstrumentHandler... helpers. It counts errors that don't necessarily
|
||||
// result in a non-2xx HTTP status code. There are two typical cases:
|
||||
// (1) Encoding errors that only happen after streaming of the HTTP body
|
||||
// has already started (and the status code 200 has been sent). This
|
||||
// should only happen with custom collectors. (2) Collection errors with
|
||||
// no effect on the HTTP status code because ErrorHandling is set to
|
||||
// ContinueOnError.
|
||||
Registry prometheus.Registerer
|
||||
// If DisableCompression is true, the handler will never compress the
|
||||
// response, even if requested by the client.
|
||||
DisableCompression bool
|
||||
|
|
Loading…
Reference in New Issue