forked from mirror/redis
chore: update otel example
This commit is contained in:
parent
1278a8094f
commit
dd858eaf07
|
@ -40,9 +40,25 @@ UPTRACE_DSN=http://project2_secret_token@localhost:14317/2 go run client.go
|
||||||
trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35
|
trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35
|
||||||
```
|
```
|
||||||
|
|
||||||
|
![Redis trace](./image/redis-trace.png)
|
||||||
|
|
||||||
You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available
|
You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available
|
||||||
spans, logs, and metrics.
|
spans, logs, and metrics.
|
||||||
|
|
||||||
|
## Redis monitoring
|
||||||
|
|
||||||
|
You can also [monitor Redis performance](https://uptrace.dev/opentelemetry/redis-monitoring.html)
|
||||||
|
metrics By installing OpenTelemetry Collector.
|
||||||
|
|
||||||
|
[OpenTelemetry Collector](https://uptrace.dev/opentelemetry/collector.html) is an agent that pulls
|
||||||
|
telemetry data from systems you want to monitor and sends it to APM tools using the OpenTelemetry
|
||||||
|
protocol (OTLP).
|
||||||
|
|
||||||
|
When telemetry data reaches Uptrace, it automatically generates a Redis dashboard from a pre-defined
|
||||||
|
template.
|
||||||
|
|
||||||
|
![Redis dashboard](./image/metrics.png)
|
||||||
|
|
||||||
## Links
|
## Links
|
||||||
|
|
||||||
- [Uptrace open-source APM](https://uptrace.dev/get/open-source-apm.html)
|
- [Uptrace open-source APM](https://uptrace.dev/get/open-source-apm.html)
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
# See https://prometheus.io/docs/alerting/latest/configuration/ for details.
|
||||||
|
|
||||||
|
global:
|
||||||
|
# The smarthost and SMTP sender used for mail notifications.
|
||||||
|
smtp_smarthost: 'mailhog:1025'
|
||||||
|
smtp_from: 'alertmanager@example.com'
|
||||||
|
smtp_require_tls: false
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: 'team-X'
|
||||||
|
email_configs:
|
||||||
|
- to: 'some-receiver@example.com'
|
||||||
|
send_resolved: true
|
||||||
|
|
||||||
|
# The root route on which each incoming alert enters.
|
||||||
|
route:
|
||||||
|
# The labels by which incoming alerts are grouped together. For example,
|
||||||
|
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
||||||
|
# be batched into a single group.
|
||||||
|
group_by: ['alertname', 'cluster', 'service']
|
||||||
|
|
||||||
|
# When a new group of alerts is created by an incoming alert, wait at
|
||||||
|
# least 'group_wait' to send the initial notification.
|
||||||
|
# This way ensures that you get multiple alerts for the same group that start
|
||||||
|
# firing shortly after another are batched together on the first
|
||||||
|
# notification.
|
||||||
|
group_wait: 30s
|
||||||
|
|
||||||
|
# When the first notification was sent, wait 'group_interval' to send a batch
|
||||||
|
# of new alerts that started firing for that group.
|
||||||
|
group_interval: 5m
|
||||||
|
|
||||||
|
# If an alert has successfully been sent, wait 'repeat_interval' to
|
||||||
|
# resend them.
|
||||||
|
repeat_interval: 3h
|
||||||
|
|
||||||
|
# A default receiver
|
||||||
|
receiver: team-X
|
||||||
|
|
||||||
|
# All the above attributes are inherited by all child routes and can
|
||||||
|
# overwritten on each.
|
||||||
|
|
||||||
|
# The child route trees.
|
||||||
|
routes:
|
||||||
|
# This route matches error alerts created from spans or logs.
|
||||||
|
- matchers:
|
||||||
|
- alert_kind="error"
|
||||||
|
group_interval: 24h
|
||||||
|
receiver: team-X
|
||||||
|
|
||||||
|
# The directory from which notification templates are read.
|
||||||
|
templates:
|
||||||
|
- '/etc/alertmanager/template/*.tmpl'
|
|
@ -18,7 +18,7 @@ services:
|
||||||
- '9000:9000'
|
- '9000:9000'
|
||||||
|
|
||||||
uptrace:
|
uptrace:
|
||||||
image: 'uptrace/uptrace:1.1.0'
|
image: 'uptrace/uptrace:1.2.0'
|
||||||
#image: 'uptrace/uptrace-dev:latest'
|
#image: 'uptrace/uptrace-dev:latest'
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
volumes:
|
volumes:
|
||||||
|
@ -36,11 +36,8 @@ services:
|
||||||
otel-collector:
|
otel-collector:
|
||||||
image: otel/opentelemetry-collector-contrib:0.58.0
|
image: otel/opentelemetry-collector-contrib:0.58.0
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
user: '0:0' # required for logs
|
|
||||||
volumes:
|
volumes:
|
||||||
- ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml
|
- ./config/otel-collector.yaml:/etc/otelcol-contrib/config.yaml
|
||||||
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
|
||||||
- /var/log:/var/log:ro
|
|
||||||
ports:
|
ports:
|
||||||
- '4317:4317'
|
- '4317:4317'
|
||||||
- '4318:4318'
|
- '4318:4318'
|
||||||
|
@ -48,7 +45,25 @@ services:
|
||||||
vector:
|
vector:
|
||||||
image: timberio/vector:0.24.X-alpine
|
image: timberio/vector:0.24.X-alpine
|
||||||
volumes:
|
volumes:
|
||||||
- ./vector.toml:/etc/vector/vector.toml:ro
|
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
||||||
|
|
||||||
|
alertmanager:
|
||||||
|
image: prom/alertmanager:v0.24.0
|
||||||
|
restart: on-failure
|
||||||
|
volumes:
|
||||||
|
- ./config/alertmanager.yml:/etc/alertmanager/config.yml
|
||||||
|
- alertmanager_data:/alertmanager
|
||||||
|
ports:
|
||||||
|
- 9093:9093
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/alertmanager/config.yml'
|
||||||
|
- '--storage.path=/alertmanager'
|
||||||
|
|
||||||
|
mailhog:
|
||||||
|
image: mailhog/mailhog:v1.0.1
|
||||||
|
restart: on-failure
|
||||||
|
ports:
|
||||||
|
- '8025:8025'
|
||||||
|
|
||||||
redis-server:
|
redis-server:
|
||||||
image: redis
|
image: redis
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 32 KiB |
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
|
@ -13,6 +13,16 @@
|
||||||
## foo: $$FOO_BAR
|
## foo: $$FOO_BAR
|
||||||
##
|
##
|
||||||
|
|
||||||
|
##
|
||||||
|
## ClickHouse database credentials.
|
||||||
|
##
|
||||||
|
ch:
|
||||||
|
# Connection string for ClickHouse database. For example:
|
||||||
|
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
|
||||||
|
#
|
||||||
|
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
|
||||||
|
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
|
||||||
|
|
||||||
##
|
##
|
||||||
## A list of pre-configured projects. Each project is fully isolated.
|
## A list of pre-configured projects. Each project is fully isolated.
|
||||||
##
|
##
|
||||||
|
@ -26,6 +36,10 @@ projects:
|
||||||
- service.name
|
- service.name
|
||||||
- host.name
|
- host.name
|
||||||
- deployment.environment
|
- deployment.environment
|
||||||
|
# Group spans by deployment.environment attribute.
|
||||||
|
group_by_env: false
|
||||||
|
# Group funcs spans by service.name attribute.
|
||||||
|
group_funcs_by_service: false
|
||||||
|
|
||||||
# Other projects can be used to monitor your applications.
|
# Other projects can be used to monitor your applications.
|
||||||
# To monitor micro-services or multiple related services, use a single project.
|
# To monitor micro-services or multiple related services, use a single project.
|
||||||
|
@ -36,6 +50,49 @@ projects:
|
||||||
- service.name
|
- service.name
|
||||||
- host.name
|
- host.name
|
||||||
- deployment.environment
|
- deployment.environment
|
||||||
|
# Group spans by deployment.environment attribute.
|
||||||
|
group_by_env: false
|
||||||
|
# Group funcs spans by service.name attribute.
|
||||||
|
group_funcs_by_service: false
|
||||||
|
|
||||||
|
##
|
||||||
|
## Create metrics from spans and events.
|
||||||
|
##
|
||||||
|
metrics_from_spans:
|
||||||
|
- name: uptrace.tracing.spans_duration
|
||||||
|
description: Spans duration (excluding events)
|
||||||
|
instrument: histogram
|
||||||
|
unit: microseconds
|
||||||
|
value: span.duration / 1000
|
||||||
|
attrs:
|
||||||
|
- span.system as system
|
||||||
|
- service.name as service
|
||||||
|
- host.name as host
|
||||||
|
- span.status_code as status
|
||||||
|
where: not span.is_event
|
||||||
|
|
||||||
|
- name: uptrace.tracing.spans
|
||||||
|
description: Spans count (excluding events)
|
||||||
|
instrument: counter
|
||||||
|
unit: 1
|
||||||
|
value: span.count
|
||||||
|
attrs:
|
||||||
|
- span.system as system
|
||||||
|
- service.name as service
|
||||||
|
- host.name as host
|
||||||
|
- span.status_code as status
|
||||||
|
where: not span.is_event
|
||||||
|
|
||||||
|
- name: uptrace.tracing.events
|
||||||
|
description: Events count (excluding spans)
|
||||||
|
instrument: counter
|
||||||
|
unit: 1
|
||||||
|
value: span.count
|
||||||
|
attrs:
|
||||||
|
- span.system as system
|
||||||
|
- service.name as service
|
||||||
|
- host.name as host
|
||||||
|
where: span.is_event
|
||||||
|
|
||||||
##
|
##
|
||||||
## To require authentication, uncomment the following section.
|
## To require authentication, uncomment the following section.
|
||||||
|
@ -78,16 +135,6 @@ auth:
|
||||||
# # Defaults to 'preferred_username'.
|
# # Defaults to 'preferred_username'.
|
||||||
# claim: preferred_username
|
# claim: preferred_username
|
||||||
|
|
||||||
##
|
|
||||||
## ClickHouse database credentials.
|
|
||||||
##
|
|
||||||
ch:
|
|
||||||
# Connection string for ClickHouse database. For example:
|
|
||||||
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
|
|
||||||
#
|
|
||||||
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
|
|
||||||
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
|
|
||||||
|
|
||||||
##
|
##
|
||||||
## Alerting rules for monitoring metrics.
|
## Alerting rules for monitoring metrics.
|
||||||
##
|
##
|
||||||
|
@ -102,8 +149,8 @@ alerting:
|
||||||
- $net_errors > 0 group by host.name
|
- $net_errors > 0 group by host.name
|
||||||
# for the last 5 minutes
|
# for the last 5 minutes
|
||||||
for: 5m
|
for: 5m
|
||||||
# in the project id=1
|
annotations:
|
||||||
projects: [1]
|
summary: '{{ $labels.host_name }} has high number of net errors: {{ $values.net_errors }}'
|
||||||
|
|
||||||
- name: Filesystem usage >= 90%
|
- name: Filesystem usage >= 90%
|
||||||
metrics:
|
metrics:
|
||||||
|
@ -114,7 +161,8 @@ alerting:
|
||||||
- where device !~ "loop"
|
- where device !~ "loop"
|
||||||
- $fs_usage{state="used"} / $fs_usage >= 0.9
|
- $fs_usage{state="used"} / $fs_usage >= 0.9
|
||||||
for: 5m
|
for: 5m
|
||||||
projects: [1]
|
annotations:
|
||||||
|
summary: '{{ $labels.host_name }} has high FS usage: {{ $values.fs_usage }}'
|
||||||
|
|
||||||
- name: Uptrace is dropping spans
|
- name: Uptrace is dropping spans
|
||||||
metrics:
|
metrics:
|
||||||
|
@ -122,7 +170,17 @@ alerting:
|
||||||
query:
|
query:
|
||||||
- $spans{type=dropped} > 0
|
- $spans{type=dropped} > 0
|
||||||
for: 1m
|
for: 1m
|
||||||
projects: [1]
|
annotations:
|
||||||
|
summary: 'Uptrace has dropped {{ $values.spans }} spans'
|
||||||
|
|
||||||
|
- name: Always firing (for fun and testing)
|
||||||
|
metrics:
|
||||||
|
- process.runtime.go.goroutines as $goroutines
|
||||||
|
query:
|
||||||
|
- $goroutines >= 0 group by host.name
|
||||||
|
for: 1m
|
||||||
|
annotations:
|
||||||
|
summary: '{{ $labels.host_name }} has high number of goroutines: {{ $values.goroutines }}'
|
||||||
|
|
||||||
# Create alerts from error logs and span events.
|
# Create alerts from error logs and span events.
|
||||||
create_alerts_from_spans:
|
create_alerts_from_spans:
|
||||||
|
@ -139,8 +197,8 @@ alerting:
|
||||||
##
|
##
|
||||||
alertmanager_client:
|
alertmanager_client:
|
||||||
# AlertManager API endpoints that Uptrace uses to manage alerts.
|
# AlertManager API endpoints that Uptrace uses to manage alerts.
|
||||||
# urls:
|
urls:
|
||||||
# - 'http://alertmanager:9093/api/v2/alerts'
|
- 'http://alertmanager:9093/api/v2/alerts'
|
||||||
|
|
||||||
##
|
##
|
||||||
## Various options to tweak ClickHouse schema.
|
## Various options to tweak ClickHouse schema.
|
||||||
|
|
Loading…
Reference in New Issue