Graphite bridge (#244)

Add Graphite bridge
This commit is contained in:
stuart nelson 2016-11-14 14:43:07 +01:00 committed by Björn Rabenstein
parent 54851043b9
commit 7664178cb2
2 changed files with 585 additions and 0 deletions

View File

@ -0,0 +1,277 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package graphite provides a bridge to push Prometheus metrics to a Graphite
// server.
package graphite
import (
"bufio"
"errors"
"fmt"
"io"
"net"
"sort"
"time"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"golang.org/x/net/context"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/client_golang/prometheus"
)
const (
defaultInterval = 15 * time.Second
millisecondsPerSecond = 1000
)
// HandlerErrorHandling defines how a Handler serving metrics will handle
// errors.
type HandlerErrorHandling int
// These constants cause handlers serving metrics to behave as described if
// errors are encountered.
const (
// Ignore errors and try to push as many metrics to Graphite as possible.
ContinueOnError HandlerErrorHandling = iota
// Abort the push to Graphite upon the first error encountered.
AbortOnError
)
// Config defines the Graphite bridge config.
type Config struct {
// The url to push data to. Required.
URL string
// The prefix for the pushed Graphite metrics. Defaults to empty string.
Prefix string
// The interval to use for pushing data to Graphite. Defaults to 15 seconds.
Interval time.Duration
// The timeout for pushing metrics to Graphite. Defaults to 15 seconds.
Timeout time.Duration
// The Gatherer to use for metrics. Defaults to prometheus.DefaultGatherer.
Gatherer prometheus.Gatherer
// The logger that messages are written to. Defaults to no logging.
Logger Logger
// ErrorHandling defines how errors are handled. Note that errors are
// logged regardless of the configured ErrorHandling provided Logger
// is not nil.
ErrorHandling HandlerErrorHandling
}
// Bridge pushes metrics to the configured Graphite server.
type Bridge struct {
url string
prefix string
interval time.Duration
timeout time.Duration
errorHandling HandlerErrorHandling
logger Logger
g prometheus.Gatherer
}
// Logger is the minimal interface Bridge needs for logging. Note that
// log.Logger from the standard library implements this interface, and it is
// easy to implement by custom loggers, if they don't do so already anyway.
type Logger interface {
Println(v ...interface{})
}
// NewBridge returns a pointer to a new Bridge struct.
func NewBridge(c *Config) (*Bridge, error) {
b := &Bridge{}
if c.URL == "" {
return nil, errors.New("missing URL")
}
b.url = c.URL
if c.Gatherer == nil {
b.g = prometheus.DefaultGatherer
} else {
b.g = c.Gatherer
}
if c.Logger != nil {
b.logger = c.Logger
}
if c.Prefix != "" {
b.prefix = c.Prefix
}
var z time.Duration
if c.Interval == z {
b.interval = defaultInterval
} else {
b.interval = c.Interval
}
if c.Timeout == z {
b.timeout = defaultInterval
} else {
b.timeout = c.Timeout
}
b.errorHandling = c.ErrorHandling
return b, nil
}
// Run starts the event loop that pushes Prometheus metrics to Graphite at the
// configured interval.
func (b *Bridge) Run(ctx context.Context) {
ticker := time.NewTicker(b.interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if err := b.Push(); err != nil && b.logger != nil {
b.logger.Println("error pushing to Graphite:", err)
}
case <-ctx.Done():
return
}
}
}
// Push pushes Prometheus metrics to the configured Graphite server.
func (b *Bridge) Push() error {
mfs, err := b.g.Gather()
if err != nil || len(mfs) == 0 {
switch b.errorHandling {
case AbortOnError:
return err
case ContinueOnError:
if b.logger != nil {
b.logger.Println("continue on error:", err)
}
default:
panic("unrecognized error handling value")
}
}
conn, err := net.DialTimeout("tcp", b.url, b.timeout)
if err != nil {
return err
}
defer conn.Close()
return writeMetrics(conn, mfs, b.prefix, model.Now())
}
func writeMetrics(w io.Writer, mfs []*dto.MetricFamily, prefix string, now model.Time) error {
vec := expfmt.ExtractSamples(&expfmt.DecodeOptions{
Timestamp: now,
}, mfs...)
buf := bufio.NewWriter(w)
for _, s := range vec {
if err := writeSanitized(buf, prefix); err != nil {
return err
}
if err := buf.WriteByte('.'); err != nil {
return err
}
if err := writeMetric(buf, s.Metric); err != nil {
return err
}
if _, err := fmt.Fprintf(buf, " %g %d\n", s.Value, int64(s.Timestamp)/millisecondsPerSecond); err != nil {
return err
}
if err := buf.Flush(); err != nil {
return err
}
}
return nil
}
func writeMetric(buf *bufio.Writer, m model.Metric) error {
metricName, hasName := m[model.MetricNameLabel]
numLabels := len(m) - 1
if !hasName {
numLabels = len(m)
}
labelStrings := make([]string, 0, numLabels)
for label, value := range m {
if label != model.MetricNameLabel {
labelStrings = append(labelStrings, fmt.Sprintf("%s %s", string(label), string(value)))
}
}
var err error
switch numLabels {
case 0:
if hasName {
return writeSanitized(buf, string(metricName))
}
default:
sort.Strings(labelStrings)
if err = writeSanitized(buf, string(metricName)); err != nil {
return err
}
for _, s := range labelStrings {
if err = buf.WriteByte('.'); err != nil {
return err
}
if err = writeSanitized(buf, s); err != nil {
return err
}
}
}
return nil
}
func writeSanitized(buf *bufio.Writer, s string) error {
prevUnderscore := false
for _, c := range s {
c = replaceInvalidRune(c)
if c == '_' {
if prevUnderscore {
continue
}
prevUnderscore = true
} else {
prevUnderscore = false
}
if _, err := buf.WriteRune(c); err != nil {
return err
}
}
return nil
}
func replaceInvalidRune(c rune) rune {
if c == ' ' {
return '.'
}
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':' || (c >= '0' && c <= '9')) {
return '_'
}
return c
}

View File

@ -0,0 +1,308 @@
package graphite
import (
"bufio"
"bytes"
"io"
"log"
"net"
"os"
"regexp"
"testing"
"time"
"github.com/prometheus/common/model"
"golang.org/x/net/context"
"github.com/prometheus/client_golang/prometheus"
)
func TestSanitize(t *testing.T) {
testCases := []struct {
in, out string
}{
{in: "hello", out: "hello"},
{in: "hE/l1o", out: "hE_l1o"},
{in: "he,*ll(.o", out: "he_ll_o"},
{in: "hello_there%^&", out: "hello_there_"},
}
var buf bytes.Buffer
w := bufio.NewWriter(&buf)
for i, tc := range testCases {
if err := writeSanitized(w, tc.in); err != nil {
t.Fatalf("write failed: %v", err)
}
if err := w.Flush(); err != nil {
t.Fatalf("flush failed: %v", err)
}
if want, got := tc.out, buf.String(); want != got {
t.Fatalf("test case index %d: got sanitized string %s, want %s", i, got, want)
}
buf.Reset()
}
}
func TestWriteSummary(t *testing.T) {
sumVec := prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "name",
Help: "docstring",
ConstLabels: prometheus.Labels{"constname": "constvalue"},
},
[]string{"labelname"},
)
sumVec.WithLabelValues("val1").Observe(float64(10))
sumVec.WithLabelValues("val1").Observe(float64(20))
sumVec.WithLabelValues("val1").Observe(float64(30))
sumVec.WithLabelValues("val2").Observe(float64(20))
sumVec.WithLabelValues("val2").Observe(float64(30))
sumVec.WithLabelValues("val2").Observe(float64(40))
reg := prometheus.NewRegistry()
reg.MustRegister(sumVec)
mfs, err := reg.Gather()
if err != nil {
t.Fatalf("error: %v", err)
}
now := model.Time(1477043083)
var buf bytes.Buffer
err = writeMetrics(&buf, mfs, "prefix", now)
if err != nil {
t.Fatalf("error: %v", err)
}
want := `prefix.name.constname.constvalue.labelname.val1.quantile.0_5 20 1477043
prefix.name.constname.constvalue.labelname.val1.quantile.0_9 30 1477043
prefix.name.constname.constvalue.labelname.val1.quantile.0_99 30 1477043
prefix.name_sum.constname.constvalue.labelname.val1 60 1477043
prefix.name_count.constname.constvalue.labelname.val1 3 1477043
prefix.name.constname.constvalue.labelname.val2.quantile.0_5 30 1477043
prefix.name.constname.constvalue.labelname.val2.quantile.0_9 40 1477043
prefix.name.constname.constvalue.labelname.val2.quantile.0_99 40 1477043
prefix.name_sum.constname.constvalue.labelname.val2 90 1477043
prefix.name_count.constname.constvalue.labelname.val2 3 1477043
`
if got := buf.String(); want != got {
t.Fatalf("wanted \n%s\n, got \n%s\n", want, got)
}
}
func TestWriteHistogram(t *testing.T) {
histVec := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "name",
Help: "docstring",
ConstLabels: prometheus.Labels{"constname": "constvalue"},
Buckets: []float64{0.01, 0.02, 0.05, 0.1},
},
[]string{"labelname"},
)
histVec.WithLabelValues("val1").Observe(float64(10))
histVec.WithLabelValues("val1").Observe(float64(20))
histVec.WithLabelValues("val1").Observe(float64(30))
histVec.WithLabelValues("val2").Observe(float64(20))
histVec.WithLabelValues("val2").Observe(float64(30))
histVec.WithLabelValues("val2").Observe(float64(40))
reg := prometheus.NewRegistry()
reg.MustRegister(histVec)
mfs, err := reg.Gather()
if err != nil {
t.Fatalf("error: %v", err)
}
now := model.Time(1477043083)
var buf bytes.Buffer
err = writeMetrics(&buf, mfs, "prefix", now)
if err != nil {
t.Fatalf("error: %v", err)
}
want := `prefix.name_bucket.constname.constvalue.labelname.val1.le.0_01 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le.0_02 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le.0_05 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le.0_1 0 1477043
prefix.name_sum.constname.constvalue.labelname.val1 60 1477043
prefix.name_count.constname.constvalue.labelname.val1 3 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le._Inf 3 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_01 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_02 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_05 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_1 0 1477043
prefix.name_sum.constname.constvalue.labelname.val2 90 1477043
prefix.name_count.constname.constvalue.labelname.val2 3 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le._Inf 3 1477043
`
if got := buf.String(); want != got {
t.Fatalf("wanted \n%s\n, got \n%s\n", want, got)
}
}
func TestToReader(t *testing.T) {
cntVec := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "name",
Help: "docstring",
ConstLabels: prometheus.Labels{"constname": "constvalue"},
},
[]string{"labelname"},
)
cntVec.WithLabelValues("val1").Inc()
cntVec.WithLabelValues("val2").Inc()
reg := prometheus.NewRegistry()
reg.MustRegister(cntVec)
want := `prefix.name.constname.constvalue.labelname.val1 1 1477043
prefix.name.constname.constvalue.labelname.val2 1 1477043
`
mfs, err := reg.Gather()
if err != nil {
t.Fatalf("error: %v", err)
}
now := model.Time(1477043083)
var buf bytes.Buffer
err = writeMetrics(&buf, mfs, "prefix", now)
if err != nil {
t.Fatalf("error: %v", err)
}
if got := buf.String(); want != got {
t.Fatalf("wanted \n%s\n, got \n%s\n", want, got)
}
}
func TestPush(t *testing.T) {
reg := prometheus.NewRegistry()
cntVec := prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "name",
Help: "docstring",
ConstLabels: prometheus.Labels{"constname": "constvalue"},
},
[]string{"labelname"},
)
cntVec.WithLabelValues("val1").Inc()
cntVec.WithLabelValues("val2").Inc()
reg.MustRegister(cntVec)
host := "localhost"
port := ":56789"
b, err := NewBridge(&Config{
URL: host + port,
Gatherer: reg,
Prefix: "prefix",
})
if err != nil {
t.Fatalf("error creating bridge: %v", err)
}
nmg, err := newMockGraphite(port)
if err != nil {
t.Fatalf("error creating mock graphite: %v", err)
}
defer nmg.Close()
err = b.Push()
if err != nil {
t.Fatalf("error pushing: %v", err)
}
wants := []string{
"prefix.name.constname.constvalue.labelname.val1 1",
"prefix.name.constname.constvalue.labelname.val2 1",
}
select {
case got := <-nmg.readc:
for _, want := range wants {
matched, err := regexp.MatchString(want, got)
if err != nil {
t.Fatalf("error pushing: %v", err)
}
if !matched {
t.Fatalf("missing metric:\nno match for %s received by server:\n%s", want, got)
}
}
return
case err := <-nmg.errc:
t.Fatalf("error reading push: %v", err)
case <-time.After(50 * time.Millisecond):
t.Fatalf("no result from graphite server")
}
}
func newMockGraphite(port string) (*mockGraphite, error) {
readc := make(chan string)
errc := make(chan error)
ln, err := net.Listen("tcp", port)
if err != nil {
return nil, err
}
go func() {
conn, err := ln.Accept()
if err != nil {
errc <- err
}
var b bytes.Buffer
io.Copy(&b, conn)
readc <- b.String()
}()
return &mockGraphite{
readc: readc,
errc: errc,
Listener: ln,
}, nil
}
type mockGraphite struct {
readc chan string
errc chan error
net.Listener
}
func ExampleBridge() {
b, err := NewBridge(&Config{
URL: "graphite.example.org:3099",
Gatherer: prometheus.DefaultGatherer,
Prefix: "prefix",
Interval: 15 * time.Second,
Timeout: 10 * time.Second,
ErrorHandling: AbortOnError,
Logger: log.New(os.Stdout, "graphite bridge: ", log.Lshortfile),
})
if err != nil {
panic(err)
}
go func() {
// Start something in a goroutine that uses metrics.
}()
// Push initial metrics to Graphite. Fail fast if the push fails.
if err := b.Push(); err != nil {
panic(err)
}
// Create a Context to control stopping the Run() loop that pushes
// metrics to Graphite.
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Start pushing metrics to Graphite in the Run() loop.
b.Run(ctx)
}