diff --git a/prometheus/graphite/bridge.go b/prometheus/graphite/bridge.go new file mode 100644 index 0000000..7f397b0 --- /dev/null +++ b/prometheus/graphite/bridge.go @@ -0,0 +1,277 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package graphite provides a bridge to push Prometheus metrics to a Graphite +// server. +package graphite + +import ( + "bufio" + "errors" + "fmt" + "io" + "net" + "sort" + "time" + + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" + "golang.org/x/net/context" + + dto "github.com/prometheus/client_model/go" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + defaultInterval = 15 * time.Second + millisecondsPerSecond = 1000 +) + +// HandlerErrorHandling defines how a Handler serving metrics will handle +// errors. +type HandlerErrorHandling int + +// These constants cause handlers serving metrics to behave as described if +// errors are encountered. +const ( + // Ignore errors and try to push as many metrics to Graphite as possible. + ContinueOnError HandlerErrorHandling = iota + + // Abort the push to Graphite upon the first error encountered. + AbortOnError +) + +// Config defines the Graphite bridge config. +type Config struct { + // The url to push data to. Required. + URL string + + // The prefix for the pushed Graphite metrics. Defaults to empty string. + Prefix string + + // The interval to use for pushing data to Graphite. Defaults to 15 seconds. + Interval time.Duration + + // The timeout for pushing metrics to Graphite. Defaults to 15 seconds. + Timeout time.Duration + + // The Gatherer to use for metrics. Defaults to prometheus.DefaultGatherer. + Gatherer prometheus.Gatherer + + // The logger that messages are written to. Defaults to no logging. + Logger Logger + + // ErrorHandling defines how errors are handled. Note that errors are + // logged regardless of the configured ErrorHandling provided Logger + // is not nil. + ErrorHandling HandlerErrorHandling +} + +// Bridge pushes metrics to the configured Graphite server. +type Bridge struct { + url string + prefix string + interval time.Duration + timeout time.Duration + + errorHandling HandlerErrorHandling + logger Logger + + g prometheus.Gatherer +} + +// Logger is the minimal interface Bridge needs for logging. Note that +// log.Logger from the standard library implements this interface, and it is +// easy to implement by custom loggers, if they don't do so already anyway. +type Logger interface { + Println(v ...interface{}) +} + +// NewBridge returns a pointer to a new Bridge struct. +func NewBridge(c *Config) (*Bridge, error) { + b := &Bridge{} + + if c.URL == "" { + return nil, errors.New("missing URL") + } + b.url = c.URL + + if c.Gatherer == nil { + b.g = prometheus.DefaultGatherer + } else { + b.g = c.Gatherer + } + + if c.Logger != nil { + b.logger = c.Logger + } + + if c.Prefix != "" { + b.prefix = c.Prefix + } + + var z time.Duration + if c.Interval == z { + b.interval = defaultInterval + } else { + b.interval = c.Interval + } + + if c.Timeout == z { + b.timeout = defaultInterval + } else { + b.timeout = c.Timeout + } + + b.errorHandling = c.ErrorHandling + + return b, nil +} + +// Run starts the event loop that pushes Prometheus metrics to Graphite at the +// configured interval. +func (b *Bridge) Run(ctx context.Context) { + ticker := time.NewTicker(b.interval) + defer ticker.Stop() + for { + select { + case <-ticker.C: + if err := b.Push(); err != nil && b.logger != nil { + b.logger.Println("error pushing to Graphite:", err) + } + case <-ctx.Done(): + return + } + } +} + +// Push pushes Prometheus metrics to the configured Graphite server. +func (b *Bridge) Push() error { + mfs, err := b.g.Gather() + if err != nil || len(mfs) == 0 { + switch b.errorHandling { + case AbortOnError: + return err + case ContinueOnError: + if b.logger != nil { + b.logger.Println("continue on error:", err) + } + default: + panic("unrecognized error handling value") + } + } + + conn, err := net.DialTimeout("tcp", b.url, b.timeout) + if err != nil { + return err + } + defer conn.Close() + + return writeMetrics(conn, mfs, b.prefix, model.Now()) +} + +func writeMetrics(w io.Writer, mfs []*dto.MetricFamily, prefix string, now model.Time) error { + vec := expfmt.ExtractSamples(&expfmt.DecodeOptions{ + Timestamp: now, + }, mfs...) + + buf := bufio.NewWriter(w) + for _, s := range vec { + if err := writeSanitized(buf, prefix); err != nil { + return err + } + if err := buf.WriteByte('.'); err != nil { + return err + } + if err := writeMetric(buf, s.Metric); err != nil { + return err + } + if _, err := fmt.Fprintf(buf, " %g %d\n", s.Value, int64(s.Timestamp)/millisecondsPerSecond); err != nil { + return err + } + if err := buf.Flush(); err != nil { + return err + } + } + + return nil +} + +func writeMetric(buf *bufio.Writer, m model.Metric) error { + metricName, hasName := m[model.MetricNameLabel] + numLabels := len(m) - 1 + if !hasName { + numLabels = len(m) + } + + labelStrings := make([]string, 0, numLabels) + for label, value := range m { + if label != model.MetricNameLabel { + labelStrings = append(labelStrings, fmt.Sprintf("%s %s", string(label), string(value))) + } + } + + var err error + switch numLabels { + case 0: + if hasName { + return writeSanitized(buf, string(metricName)) + } + default: + sort.Strings(labelStrings) + if err = writeSanitized(buf, string(metricName)); err != nil { + return err + } + for _, s := range labelStrings { + if err = buf.WriteByte('.'); err != nil { + return err + } + if err = writeSanitized(buf, s); err != nil { + return err + } + } + } + return nil +} + +func writeSanitized(buf *bufio.Writer, s string) error { + prevUnderscore := false + + for _, c := range s { + c = replaceInvalidRune(c) + if c == '_' { + if prevUnderscore { + continue + } + prevUnderscore = true + } else { + prevUnderscore = false + } + if _, err := buf.WriteRune(c); err != nil { + return err + } + } + + return nil +} + +func replaceInvalidRune(c rune) rune { + if c == ' ' { + return '.' + } + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':' || (c >= '0' && c <= '9')) { + return '_' + } + return c +} diff --git a/prometheus/graphite/bridge_test.go b/prometheus/graphite/bridge_test.go new file mode 100644 index 0000000..c439a3d --- /dev/null +++ b/prometheus/graphite/bridge_test.go @@ -0,0 +1,308 @@ +package graphite + +import ( + "bufio" + "bytes" + "io" + "log" + "net" + "os" + "regexp" + "testing" + "time" + + "github.com/prometheus/common/model" + "golang.org/x/net/context" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestSanitize(t *testing.T) { + testCases := []struct { + in, out string + }{ + {in: "hello", out: "hello"}, + {in: "hE/l1o", out: "hE_l1o"}, + {in: "he,*ll(.o", out: "he_ll_o"}, + {in: "hello_there%^&", out: "hello_there_"}, + } + + var buf bytes.Buffer + w := bufio.NewWriter(&buf) + + for i, tc := range testCases { + if err := writeSanitized(w, tc.in); err != nil { + t.Fatalf("write failed: %v", err) + } + if err := w.Flush(); err != nil { + t.Fatalf("flush failed: %v", err) + } + + if want, got := tc.out, buf.String(); want != got { + t.Fatalf("test case index %d: got sanitized string %s, want %s", i, got, want) + } + + buf.Reset() + } +} + +func TestWriteSummary(t *testing.T) { + sumVec := prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "name", + Help: "docstring", + ConstLabels: prometheus.Labels{"constname": "constvalue"}, + }, + []string{"labelname"}, + ) + + sumVec.WithLabelValues("val1").Observe(float64(10)) + sumVec.WithLabelValues("val1").Observe(float64(20)) + sumVec.WithLabelValues("val1").Observe(float64(30)) + sumVec.WithLabelValues("val2").Observe(float64(20)) + sumVec.WithLabelValues("val2").Observe(float64(30)) + sumVec.WithLabelValues("val2").Observe(float64(40)) + + reg := prometheus.NewRegistry() + reg.MustRegister(sumVec) + + mfs, err := reg.Gather() + if err != nil { + t.Fatalf("error: %v", err) + } + + now := model.Time(1477043083) + var buf bytes.Buffer + err = writeMetrics(&buf, mfs, "prefix", now) + if err != nil { + t.Fatalf("error: %v", err) + } + + want := `prefix.name.constname.constvalue.labelname.val1.quantile.0_5 20 1477043 +prefix.name.constname.constvalue.labelname.val1.quantile.0_9 30 1477043 +prefix.name.constname.constvalue.labelname.val1.quantile.0_99 30 1477043 +prefix.name_sum.constname.constvalue.labelname.val1 60 1477043 +prefix.name_count.constname.constvalue.labelname.val1 3 1477043 +prefix.name.constname.constvalue.labelname.val2.quantile.0_5 30 1477043 +prefix.name.constname.constvalue.labelname.val2.quantile.0_9 40 1477043 +prefix.name.constname.constvalue.labelname.val2.quantile.0_99 40 1477043 +prefix.name_sum.constname.constvalue.labelname.val2 90 1477043 +prefix.name_count.constname.constvalue.labelname.val2 3 1477043 +` + + if got := buf.String(); want != got { + t.Fatalf("wanted \n%s\n, got \n%s\n", want, got) + } +} + +func TestWriteHistogram(t *testing.T) { + histVec := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "name", + Help: "docstring", + ConstLabels: prometheus.Labels{"constname": "constvalue"}, + Buckets: []float64{0.01, 0.02, 0.05, 0.1}, + }, + []string{"labelname"}, + ) + + histVec.WithLabelValues("val1").Observe(float64(10)) + histVec.WithLabelValues("val1").Observe(float64(20)) + histVec.WithLabelValues("val1").Observe(float64(30)) + histVec.WithLabelValues("val2").Observe(float64(20)) + histVec.WithLabelValues("val2").Observe(float64(30)) + histVec.WithLabelValues("val2").Observe(float64(40)) + + reg := prometheus.NewRegistry() + reg.MustRegister(histVec) + + mfs, err := reg.Gather() + if err != nil { + t.Fatalf("error: %v", err) + } + + now := model.Time(1477043083) + var buf bytes.Buffer + err = writeMetrics(&buf, mfs, "prefix", now) + if err != nil { + t.Fatalf("error: %v", err) + } + + want := `prefix.name_bucket.constname.constvalue.labelname.val1.le.0_01 0 1477043 +prefix.name_bucket.constname.constvalue.labelname.val1.le.0_02 0 1477043 +prefix.name_bucket.constname.constvalue.labelname.val1.le.0_05 0 1477043 +prefix.name_bucket.constname.constvalue.labelname.val1.le.0_1 0 1477043 +prefix.name_sum.constname.constvalue.labelname.val1 60 1477043 +prefix.name_count.constname.constvalue.labelname.val1 3 1477043 +prefix.name_bucket.constname.constvalue.labelname.val1.le._Inf 3 1477043 +prefix.name_bucket.constname.constvalue.labelname.val2.le.0_01 0 1477043 +prefix.name_bucket.constname.constvalue.labelname.val2.le.0_02 0 1477043 +prefix.name_bucket.constname.constvalue.labelname.val2.le.0_05 0 1477043 +prefix.name_bucket.constname.constvalue.labelname.val2.le.0_1 0 1477043 +prefix.name_sum.constname.constvalue.labelname.val2 90 1477043 +prefix.name_count.constname.constvalue.labelname.val2 3 1477043 +prefix.name_bucket.constname.constvalue.labelname.val2.le._Inf 3 1477043 +` + if got := buf.String(); want != got { + t.Fatalf("wanted \n%s\n, got \n%s\n", want, got) + } +} + +func TestToReader(t *testing.T) { + cntVec := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "name", + Help: "docstring", + ConstLabels: prometheus.Labels{"constname": "constvalue"}, + }, + []string{"labelname"}, + ) + cntVec.WithLabelValues("val1").Inc() + cntVec.WithLabelValues("val2").Inc() + + reg := prometheus.NewRegistry() + reg.MustRegister(cntVec) + + want := `prefix.name.constname.constvalue.labelname.val1 1 1477043 +prefix.name.constname.constvalue.labelname.val2 1 1477043 +` + mfs, err := reg.Gather() + if err != nil { + t.Fatalf("error: %v", err) + } + + now := model.Time(1477043083) + var buf bytes.Buffer + err = writeMetrics(&buf, mfs, "prefix", now) + if err != nil { + t.Fatalf("error: %v", err) + } + + if got := buf.String(); want != got { + t.Fatalf("wanted \n%s\n, got \n%s\n", want, got) + } +} + +func TestPush(t *testing.T) { + reg := prometheus.NewRegistry() + cntVec := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "name", + Help: "docstring", + ConstLabels: prometheus.Labels{"constname": "constvalue"}, + }, + []string{"labelname"}, + ) + cntVec.WithLabelValues("val1").Inc() + cntVec.WithLabelValues("val2").Inc() + reg.MustRegister(cntVec) + + host := "localhost" + port := ":56789" + b, err := NewBridge(&Config{ + URL: host + port, + Gatherer: reg, + Prefix: "prefix", + }) + if err != nil { + t.Fatalf("error creating bridge: %v", err) + } + + nmg, err := newMockGraphite(port) + if err != nil { + t.Fatalf("error creating mock graphite: %v", err) + } + defer nmg.Close() + + err = b.Push() + if err != nil { + t.Fatalf("error pushing: %v", err) + } + + wants := []string{ + "prefix.name.constname.constvalue.labelname.val1 1", + "prefix.name.constname.constvalue.labelname.val2 1", + } + + select { + case got := <-nmg.readc: + for _, want := range wants { + matched, err := regexp.MatchString(want, got) + if err != nil { + t.Fatalf("error pushing: %v", err) + } + if !matched { + t.Fatalf("missing metric:\nno match for %s received by server:\n%s", want, got) + } + } + return + case err := <-nmg.errc: + t.Fatalf("error reading push: %v", err) + case <-time.After(50 * time.Millisecond): + t.Fatalf("no result from graphite server") + } +} + +func newMockGraphite(port string) (*mockGraphite, error) { + readc := make(chan string) + errc := make(chan error) + ln, err := net.Listen("tcp", port) + if err != nil { + return nil, err + } + + go func() { + conn, err := ln.Accept() + if err != nil { + errc <- err + } + var b bytes.Buffer + io.Copy(&b, conn) + readc <- b.String() + }() + + return &mockGraphite{ + readc: readc, + errc: errc, + Listener: ln, + }, nil +} + +type mockGraphite struct { + readc chan string + errc chan error + + net.Listener +} + +func ExampleBridge() { + b, err := NewBridge(&Config{ + URL: "graphite.example.org:3099", + Gatherer: prometheus.DefaultGatherer, + Prefix: "prefix", + Interval: 15 * time.Second, + Timeout: 10 * time.Second, + ErrorHandling: AbortOnError, + Logger: log.New(os.Stdout, "graphite bridge: ", log.Lshortfile), + }) + if err != nil { + panic(err) + } + + go func() { + // Start something in a goroutine that uses metrics. + }() + + // Push initial metrics to Graphite. Fail fast if the push fails. + if err := b.Push(); err != nil { + panic(err) + } + + // Create a Context to control stopping the Run() loop that pushes + // metrics to Graphite. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start pushing metrics to Graphite in the Run() loop. + b.Run(ctx) +}