From 18d13eacc9cce330610a70daf4ed0fef2e846589 Mon Sep 17 00:00:00 2001 From: Bob Shannon Date: Tue, 15 Jan 2019 16:34:51 -0500 Subject: [PATCH] Add support for fetching rules with API client (#508) This PR adds support for fetching [rules](https://prometheus.io/docs/prometheus/latest/querying/api/#rules) via the `/api/v1/rules` endpoint using the API client. Currently, the API client exposes no way to do this and it would be nice to have for external systems that wish to retrieve this information. --- api/prometheus/v1/api.go | 207 ++++++++++++++++++++++++++++++++++ api/prometheus/v1/api_test.go | 108 ++++++++++++++++++ 2 files changed, 315 insertions(+) diff --git a/api/prometheus/v1/api.go b/api/prometheus/v1/api.go index 6424188..345c0de 100644 --- a/api/prometheus/v1/api.go +++ b/api/prometheus/v1/api.go @@ -20,6 +20,7 @@ package v1 import ( "context" "encoding/json" + "errors" "fmt" "net/http" "strconv" @@ -40,6 +41,7 @@ const ( epLabelValues = apiPrefix + "/label/:name/values" epSeries = apiPrefix + "/series" epTargets = apiPrefix + "/targets" + epRules = apiPrefix + "/rules" epSnapshot = apiPrefix + "/admin/tsdb/snapshot" epDeleteSeries = apiPrefix + "/admin/tsdb/delete_series" epCleanTombstones = apiPrefix + "/admin/tsdb/clean_tombstones" @@ -47,13 +49,27 @@ const ( epFlags = apiPrefix + "/status/flags" ) +// AlertState models the state of an alert. +type AlertState string + // ErrorType models the different API error types. type ErrorType string // HealthStatus models the health status of a scrape target. type HealthStatus string +// RuleType models the type of a rule. +type RuleType string + +// RuleHealth models the health status of a rule. +type RuleHealth string + const ( + // Possible values for AlertState. + AlertStateFiring AlertState = "firing" + AlertStateInactive AlertState = "inactive" + AlertStatePending AlertState = "pending" + // Possible values for ErrorType. ErrBadData ErrorType = "bad_data" ErrTimeout ErrorType = "timeout" @@ -67,6 +83,15 @@ const ( HealthGood HealthStatus = "up" HealthUnknown HealthStatus = "unknown" HealthBad HealthStatus = "down" + + // Possible values for RuleType. + RuleTypeRecording RuleType = "recording" + RuleTypeAlerting RuleType = "alerting" + + // Possible values for RuleHealth. + RuleHealthGood = "ok" + RuleHealthUnknown = "unknown" + RuleHealthBad = "err" ) // Error is an error returned by the API. @@ -111,6 +136,8 @@ type API interface { // Snapshot creates a snapshot of all current data into snapshots/- // under the TSDB's data directory and returns the directory as response. Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error) + // Rules returns a list of alerting and recording rules that are currently loaded. + Rules(ctx context.Context) (RulesResult, error) // Targets returns an overview of the current state of the Prometheus target discovery. Targets(ctx context.Context) (TargetsResult, error) } @@ -139,6 +166,63 @@ type SnapshotResult struct { Name string `json:"name"` } +// RulesResult contains the result from querying the rules endpoint. +type RulesResult struct { + Groups []RuleGroup `json:"groups"` +} + +// RuleGroup models a rule group that contains a set of recording and alerting rules. +type RuleGroup struct { + Name string `json:"name"` + File string `json:"file"` + Interval float64 `json:"interval"` + Rules Rules `json:"rules"` +} + +// Recording and alerting rules are stored in the same slice to preserve the order +// that rules are returned in by the API. +// +// Rule types can be determined using a type switch: +// switch v := rule.(type) { +// case RecordingRule: +// fmt.Print("got a recording rule") +// case AlertingRule: +// fmt.Print("got a alerting rule") +// default: +// fmt.Printf("unknown rule type %s", v) +// } +type Rules []interface{} + +// AlertingRule models a alerting rule. +type AlertingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels model.LabelSet `json:"labels"` + Annotations model.LabelSet `json:"annotations"` + Alerts []*Alert `json:"alerts"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` +} + +// RecordingRule models a recording rule. +type RecordingRule struct { + Name string `json:"name"` + Query string `json:"query"` + Labels model.LabelSet `json:"labels,omitempty"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` +} + +// Alert models an active alert. +type Alert struct { + ActiveAt time.Time `json:"activeAt"` + Annotations model.LabelSet + Labels model.LabelSet + State AlertState + Value float64 +} + // TargetsResult contains the result from querying the targets endpoint. type TargetsResult struct { Active []ActiveTarget `json:"activeTargets"` @@ -169,6 +253,111 @@ type queryResult struct { v model.Value } +func (rg *RuleGroup) UnmarshalJSON(b []byte) error { + v := struct { + Name string `json:"name"` + File string `json:"file"` + Interval float64 `json:"interval"` + Rules []json.RawMessage `json:"rules"` + }{} + + if err := json.Unmarshal(b, &v); err != nil { + return err + } + + rg.Name = v.Name + rg.File = v.File + rg.Interval = v.Interval + + for _, rule := range v.Rules { + alertingRule := AlertingRule{} + if err := json.Unmarshal(rule, &alertingRule); err == nil { + rg.Rules = append(rg.Rules, alertingRule) + continue + } + recordingRule := RecordingRule{} + if err := json.Unmarshal(rule, &recordingRule); err == nil { + rg.Rules = append(rg.Rules, recordingRule) + continue + } + return errors.New("failed to decode JSON into an alerting or recording rule") + } + + return nil +} + +func (r *AlertingRule) UnmarshalJSON(b []byte) error { + v := struct { + Type string `json:"type"` + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + if v.Type == "" { + return errors.New("type field not present in rule") + } + if v.Type != string(RuleTypeAlerting) { + return fmt.Errorf("expected rule of type %s but got %s", string(RuleTypeAlerting), v.Type) + } + + rule := struct { + Name string `json:"name"` + Query string `json:"query"` + Duration float64 `json:"duration"` + Labels model.LabelSet `json:"labels"` + Annotations model.LabelSet `json:"annotations"` + Alerts []*Alert `json:"alerts"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + }{} + if err := json.Unmarshal(b, &rule); err != nil { + return err + } + r.Health = rule.Health + r.Annotations = rule.Annotations + r.Name = rule.Name + r.Query = rule.Query + r.Alerts = rule.Alerts + r.Duration = rule.Duration + r.Labels = rule.Labels + r.LastError = rule.LastError + + return nil +} + +func (r *RecordingRule) UnmarshalJSON(b []byte) error { + v := struct { + Type string `json:"type"` + }{} + if err := json.Unmarshal(b, &v); err != nil { + return err + } + if v.Type == "" { + return errors.New("type field not present in rule") + } + if v.Type != string(RuleTypeRecording) { + return fmt.Errorf("expected rule of type %s but got %s", string(RuleTypeRecording), v.Type) + } + + rule := struct { + Name string `json:"name"` + Query string `json:"query"` + Labels model.LabelSet `json:"labels,omitempty"` + Health RuleHealth `json:"health"` + LastError string `json:"lastError,omitempty"` + }{} + if err := json.Unmarshal(b, &rule); err != nil { + return err + } + r.Health = rule.Health + r.Labels = rule.Labels + r.Name = rule.Name + r.LastError = rule.LastError + r.Query = rule.Query + + return nil +} + func (qr *queryResult) UnmarshalJSON(b []byte) error { v := struct { Type model.ValueType `json:"resultType"` @@ -427,6 +616,24 @@ func (h *httpAPI) Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, return res, err } +func (h *httpAPI) Rules(ctx context.Context) (RulesResult, error) { + u := h.client.URL(epRules, nil) + + req, err := http.NewRequest(http.MethodGet, u.String(), nil) + if err != nil { + return RulesResult{}, err + } + + _, body, err := h.client.Do(ctx, req) + if err != nil { + return RulesResult{}, err + } + + var res RulesResult + err = json.Unmarshal(body, &res) + return res, err +} + func (h *httpAPI) Targets(ctx context.Context) (TargetsResult, error) { u := h.client.URL(epTargets, nil) diff --git a/api/prometheus/v1/api_test.go b/api/prometheus/v1/api_test.go index 8492a5c..c95ab3e 100644 --- a/api/prometheus/v1/api_test.go +++ b/api/prometheus/v1/api_test.go @@ -158,6 +158,12 @@ func TestAPIs(t *testing.T) { } } + doRules := func() func() (interface{}, error) { + return func() (interface{}, error) { + return promAPI.Rules(context.Background()) + } + } + doTargets := func() func() (interface{}, error) { return func() (interface{}, error) { return promAPI.Targets(context.Background()) @@ -460,6 +466,108 @@ func TestAPIs(t *testing.T) { err: fmt.Errorf("some error"), }, + { + do: doRules(), + reqMethod: "GET", + reqPath: "/api/v1/rules", + inRes: map[string]interface{}{ + "groups": []map[string]interface{}{ + { + "file": "/rules.yaml", + "interval": 60, + "name": "example", + "rules": []map[string]interface{}{ + { + "alerts": []map[string]interface{}{ + { + "activeAt": testTime.UTC().Format(time.RFC3339Nano), + "annotations": map[string]interface{}{ + "summary": "High request latency", + }, + "labels": map[string]interface{}{ + "alertname": "HighRequestLatency", + "severity": "page", + }, + "state": "firing", + "value": 1, + }, + }, + "annotations": map[string]interface{}{ + "summary": "High request latency", + }, + "duration": 600, + "health": "ok", + "labels": map[string]interface{}{ + "severity": "page", + }, + "name": "HighRequestLatency", + "query": "job:request_latency_seconds:mean5m{job=\"myjob\"} > 0.5", + "type": "alerting", + }, + { + "health": "ok", + "name": "job:http_inprogress_requests:sum", + "query": "sum(http_inprogress_requests) by (job)", + "type": "recording", + }, + }, + }, + }, + }, + res: RulesResult{ + Groups: []RuleGroup{ + { + Name: "example", + File: "/rules.yaml", + Interval: 60, + Rules: []interface{}{ + AlertingRule{ + Alerts: []*Alert{ + { + ActiveAt: testTime.UTC(), + Annotations: model.LabelSet{ + "summary": "High request latency", + }, + Labels: model.LabelSet{ + "alertname": "HighRequestLatency", + "severity": "page", + }, + State: AlertStateFiring, + Value: 1, + }, + }, + Annotations: model.LabelSet{ + "summary": "High request latency", + }, + Labels: model.LabelSet{ + "severity": "page", + }, + Duration: 600, + Health: RuleHealthGood, + Name: "HighRequestLatency", + Query: "job:request_latency_seconds:mean5m{job=\"myjob\"} > 0.5", + LastError: "", + }, + RecordingRule{ + Health: RuleHealthGood, + Name: "job:http_inprogress_requests:sum", + Query: "sum(http_inprogress_requests) by (job)", + LastError: "", + }, + }, + }, + }, + }, + }, + + { + do: doRules(), + reqMethod: "GET", + reqPath: "/api/v1/rules", + inErr: fmt.Errorf("some error"), + err: fmt.Errorf("some error"), + }, + { do: doTargets(), reqMethod: "GET",