forked from mirror/redis
chore: update otel example
This commit is contained in:
parent
1278a8094f
commit
dd858eaf07
|
@ -40,9 +40,25 @@ UPTRACE_DSN=http://project2_secret_token@localhost:14317/2 go run client.go
|
|||
trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35
|
||||
```
|
||||
|
||||
![Redis trace](./image/redis-trace.png)
|
||||
|
||||
You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available
|
||||
spans, logs, and metrics.
|
||||
|
||||
## Redis monitoring
|
||||
|
||||
You can also [monitor Redis performance](https://uptrace.dev/opentelemetry/redis-monitoring.html)
|
||||
metrics By installing OpenTelemetry Collector.
|
||||
|
||||
[OpenTelemetry Collector](https://uptrace.dev/opentelemetry/collector.html) is an agent that pulls
|
||||
telemetry data from systems you want to monitor and sends it to APM tools using the OpenTelemetry
|
||||
protocol (OTLP).
|
||||
|
||||
When telemetry data reaches Uptrace, it automatically generates a Redis dashboard from a pre-defined
|
||||
template.
|
||||
|
||||
![Redis dashboard](./image/metrics.png)
|
||||
|
||||
## Links
|
||||
|
||||
- [Uptrace open-source APM](https://uptrace.dev/get/open-source-apm.html)
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
# See https://prometheus.io/docs/alerting/latest/configuration/ for details.
|
||||
|
||||
global:
|
||||
# The smarthost and SMTP sender used for mail notifications.
|
||||
smtp_smarthost: 'mailhog:1025'
|
||||
smtp_from: 'alertmanager@example.com'
|
||||
smtp_require_tls: false
|
||||
|
||||
receivers:
|
||||
- name: 'team-X'
|
||||
email_configs:
|
||||
- to: 'some-receiver@example.com'
|
||||
send_resolved: true
|
||||
|
||||
# The root route on which each incoming alert enters.
|
||||
route:
|
||||
# The labels by which incoming alerts are grouped together. For example,
|
||||
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
||||
# be batched into a single group.
|
||||
group_by: ['alertname', 'cluster', 'service']
|
||||
|
||||
# When a new group of alerts is created by an incoming alert, wait at
|
||||
# least 'group_wait' to send the initial notification.
|
||||
# This way ensures that you get multiple alerts for the same group that start
|
||||
# firing shortly after another are batched together on the first
|
||||
# notification.
|
||||
group_wait: 30s
|
||||
|
||||
# When the first notification was sent, wait 'group_interval' to send a batch
|
||||
# of new alerts that started firing for that group.
|
||||
group_interval: 5m
|
||||
|
||||
# If an alert has successfully been sent, wait 'repeat_interval' to
|
||||
# resend them.
|
||||
repeat_interval: 3h
|
||||
|
||||
# A default receiver
|
||||
receiver: team-X
|
||||
|
||||
# All the above attributes are inherited by all child routes and can
|
||||
# overwritten on each.
|
||||
|
||||
# The child route trees.
|
||||
routes:
|
||||
# This route matches error alerts created from spans or logs.
|
||||
- matchers:
|
||||
- alert_kind="error"
|
||||
group_interval: 24h
|
||||
receiver: team-X
|
||||
|
||||
# The directory from which notification templates are read.
|
||||
templates:
|
||||
- '/etc/alertmanager/template/*.tmpl'
|
|
@ -18,7 +18,7 @@ services:
|
|||
- '9000:9000'
|
||||
|
||||
uptrace:
|
||||
image: 'uptrace/uptrace:1.1.0'
|
||||
image: 'uptrace/uptrace:1.2.0'
|
||||
#image: 'uptrace/uptrace-dev:latest'
|
||||
restart: on-failure
|
||||
volumes:
|
||||
|
@ -36,11 +36,8 @@ services:
|
|||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:0.58.0
|
||||
restart: on-failure
|
||||
user: '0:0' # required for logs
|
||||
volumes:
|
||||
- ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml
|
||||
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
||||
- /var/log:/var/log:ro
|
||||
- ./config/otel-collector.yaml:/etc/otelcol-contrib/config.yaml
|
||||
ports:
|
||||
- '4317:4317'
|
||||
- '4318:4318'
|
||||
|
@ -48,7 +45,25 @@ services:
|
|||
vector:
|
||||
image: timberio/vector:0.24.X-alpine
|
||||
volumes:
|
||||
- ./vector.toml:/etc/vector/vector.toml:ro
|
||||
- ./config/vector.toml:/etc/vector/vector.toml:ro
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.24.0
|
||||
restart: on-failure
|
||||
volumes:
|
||||
- ./config/alertmanager.yml:/etc/alertmanager/config.yml
|
||||
- alertmanager_data:/alertmanager
|
||||
ports:
|
||||
- 9093:9093
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/config.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
|
||||
mailhog:
|
||||
image: mailhog/mailhog:v1.0.1
|
||||
restart: on-failure
|
||||
ports:
|
||||
- '8025:8025'
|
||||
|
||||
redis-server:
|
||||
image: redis
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 32 KiB |
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
|
@ -13,6 +13,16 @@
|
|||
## foo: $$FOO_BAR
|
||||
##
|
||||
|
||||
##
|
||||
## ClickHouse database credentials.
|
||||
##
|
||||
ch:
|
||||
# Connection string for ClickHouse database. For example:
|
||||
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
|
||||
#
|
||||
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
|
||||
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
|
||||
|
||||
##
|
||||
## A list of pre-configured projects. Each project is fully isolated.
|
||||
##
|
||||
|
@ -26,6 +36,10 @@ projects:
|
|||
- service.name
|
||||
- host.name
|
||||
- deployment.environment
|
||||
# Group spans by deployment.environment attribute.
|
||||
group_by_env: false
|
||||
# Group funcs spans by service.name attribute.
|
||||
group_funcs_by_service: false
|
||||
|
||||
# Other projects can be used to monitor your applications.
|
||||
# To monitor micro-services or multiple related services, use a single project.
|
||||
|
@ -36,6 +50,49 @@ projects:
|
|||
- service.name
|
||||
- host.name
|
||||
- deployment.environment
|
||||
# Group spans by deployment.environment attribute.
|
||||
group_by_env: false
|
||||
# Group funcs spans by service.name attribute.
|
||||
group_funcs_by_service: false
|
||||
|
||||
##
|
||||
## Create metrics from spans and events.
|
||||
##
|
||||
metrics_from_spans:
|
||||
- name: uptrace.tracing.spans_duration
|
||||
description: Spans duration (excluding events)
|
||||
instrument: histogram
|
||||
unit: microseconds
|
||||
value: span.duration / 1000
|
||||
attrs:
|
||||
- span.system as system
|
||||
- service.name as service
|
||||
- host.name as host
|
||||
- span.status_code as status
|
||||
where: not span.is_event
|
||||
|
||||
- name: uptrace.tracing.spans
|
||||
description: Spans count (excluding events)
|
||||
instrument: counter
|
||||
unit: 1
|
||||
value: span.count
|
||||
attrs:
|
||||
- span.system as system
|
||||
- service.name as service
|
||||
- host.name as host
|
||||
- span.status_code as status
|
||||
where: not span.is_event
|
||||
|
||||
- name: uptrace.tracing.events
|
||||
description: Events count (excluding spans)
|
||||
instrument: counter
|
||||
unit: 1
|
||||
value: span.count
|
||||
attrs:
|
||||
- span.system as system
|
||||
- service.name as service
|
||||
- host.name as host
|
||||
where: span.is_event
|
||||
|
||||
##
|
||||
## To require authentication, uncomment the following section.
|
||||
|
@ -78,16 +135,6 @@ auth:
|
|||
# # Defaults to 'preferred_username'.
|
||||
# claim: preferred_username
|
||||
|
||||
##
|
||||
## ClickHouse database credentials.
|
||||
##
|
||||
ch:
|
||||
# Connection string for ClickHouse database. For example:
|
||||
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
|
||||
#
|
||||
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
|
||||
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
|
||||
|
||||
##
|
||||
## Alerting rules for monitoring metrics.
|
||||
##
|
||||
|
@ -102,8 +149,8 @@ alerting:
|
|||
- $net_errors > 0 group by host.name
|
||||
# for the last 5 minutes
|
||||
for: 5m
|
||||
# in the project id=1
|
||||
projects: [1]
|
||||
annotations:
|
||||
summary: '{{ $labels.host_name }} has high number of net errors: {{ $values.net_errors }}'
|
||||
|
||||
- name: Filesystem usage >= 90%
|
||||
metrics:
|
||||
|
@ -114,7 +161,8 @@ alerting:
|
|||
- where device !~ "loop"
|
||||
- $fs_usage{state="used"} / $fs_usage >= 0.9
|
||||
for: 5m
|
||||
projects: [1]
|
||||
annotations:
|
||||
summary: '{{ $labels.host_name }} has high FS usage: {{ $values.fs_usage }}'
|
||||
|
||||
- name: Uptrace is dropping spans
|
||||
metrics:
|
||||
|
@ -122,7 +170,17 @@ alerting:
|
|||
query:
|
||||
- $spans{type=dropped} > 0
|
||||
for: 1m
|
||||
projects: [1]
|
||||
annotations:
|
||||
summary: 'Uptrace has dropped {{ $values.spans }} spans'
|
||||
|
||||
- name: Always firing (for fun and testing)
|
||||
metrics:
|
||||
- process.runtime.go.goroutines as $goroutines
|
||||
query:
|
||||
- $goroutines >= 0 group by host.name
|
||||
for: 1m
|
||||
annotations:
|
||||
summary: '{{ $labels.host_name }} has high number of goroutines: {{ $values.goroutines }}'
|
||||
|
||||
# Create alerts from error logs and span events.
|
||||
create_alerts_from_spans:
|
||||
|
@ -139,8 +197,8 @@ alerting:
|
|||
##
|
||||
alertmanager_client:
|
||||
# AlertManager API endpoints that Uptrace uses to manage alerts.
|
||||
# urls:
|
||||
# - 'http://alertmanager:9093/api/v2/alerts'
|
||||
urls:
|
||||
- 'http://alertmanager:9093/api/v2/alerts'
|
||||
|
||||
##
|
||||
## Various options to tweak ClickHouse schema.
|
||||
|
|
Loading…
Reference in New Issue