chore: update otel example

This commit is contained in:
Vladimir Mihailenco 2022-11-09 15:16:13 +02:00
parent 1278a8094f
commit dd858eaf07
8 changed files with 164 additions and 22 deletions

View File

@ -40,9 +40,25 @@ UPTRACE_DSN=http://project2_secret_token@localhost:14317/2 go run client.go
trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35
```
![Redis trace](./image/redis-trace.png)
You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available
spans, logs, and metrics.
## Redis monitoring
You can also [monitor Redis performance](https://uptrace.dev/opentelemetry/redis-monitoring.html)
metrics By installing OpenTelemetry Collector.
[OpenTelemetry Collector](https://uptrace.dev/opentelemetry/collector.html) is an agent that pulls
telemetry data from systems you want to monitor and sends it to APM tools using the OpenTelemetry
protocol (OTLP).
When telemetry data reaches Uptrace, it automatically generates a Redis dashboard from a pre-defined
template.
![Redis dashboard](./image/metrics.png)
## Links
- [Uptrace open-source APM](https://uptrace.dev/get/open-source-apm.html)

View File

@ -0,0 +1,53 @@
# See https://prometheus.io/docs/alerting/latest/configuration/ for details.
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'mailhog:1025'
smtp_from: 'alertmanager@example.com'
smtp_require_tls: false
receivers:
- name: 'team-X'
email_configs:
- to: 'some-receiver@example.com'
send_resolved: true
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: team-X
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
# This route matches error alerts created from spans or logs.
- matchers:
- alert_kind="error"
group_interval: 24h
receiver: team-X
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'

View File

@ -18,7 +18,7 @@ services:
- '9000:9000'
uptrace:
image: 'uptrace/uptrace:1.1.0'
image: 'uptrace/uptrace:1.2.0'
#image: 'uptrace/uptrace-dev:latest'
restart: on-failure
volumes:
@ -36,11 +36,8 @@ services:
otel-collector:
image: otel/opentelemetry-collector-contrib:0.58.0
restart: on-failure
user: '0:0' # required for logs
volumes:
- ./otel-collector.yaml:/etc/otelcol-contrib/config.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/log:/var/log:ro
- ./config/otel-collector.yaml:/etc/otelcol-contrib/config.yaml
ports:
- '4317:4317'
- '4318:4318'
@ -48,7 +45,25 @@ services:
vector:
image: timberio/vector:0.24.X-alpine
volumes:
- ./vector.toml:/etc/vector/vector.toml:ro
- ./config/vector.toml:/etc/vector/vector.toml:ro
alertmanager:
image: prom/alertmanager:v0.24.0
restart: on-failure
volumes:
- ./config/alertmanager.yml:/etc/alertmanager/config.yml
- alertmanager_data:/alertmanager
ports:
- 9093:9093
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
mailhog:
image: mailhog/mailhog:v1.0.1
restart: on-failure
ports:
- '8025:8025'
redis-server:
image: redis

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

View File

@ -13,6 +13,16 @@
## foo: $$FOO_BAR
##
##
## ClickHouse database credentials.
##
ch:
# Connection string for ClickHouse database. For example:
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
#
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
##
## A list of pre-configured projects. Each project is fully isolated.
##
@ -26,6 +36,10 @@ projects:
- service.name
- host.name
- deployment.environment
# Group spans by deployment.environment attribute.
group_by_env: false
# Group funcs spans by service.name attribute.
group_funcs_by_service: false
# Other projects can be used to monitor your applications.
# To monitor micro-services or multiple related services, use a single project.
@ -36,6 +50,49 @@ projects:
- service.name
- host.name
- deployment.environment
# Group spans by deployment.environment attribute.
group_by_env: false
# Group funcs spans by service.name attribute.
group_funcs_by_service: false
##
## Create metrics from spans and events.
##
metrics_from_spans:
- name: uptrace.tracing.spans_duration
description: Spans duration (excluding events)
instrument: histogram
unit: microseconds
value: span.duration / 1000
attrs:
- span.system as system
- service.name as service
- host.name as host
- span.status_code as status
where: not span.is_event
- name: uptrace.tracing.spans
description: Spans count (excluding events)
instrument: counter
unit: 1
value: span.count
attrs:
- span.system as system
- service.name as service
- host.name as host
- span.status_code as status
where: not span.is_event
- name: uptrace.tracing.events
description: Events count (excluding spans)
instrument: counter
unit: 1
value: span.count
attrs:
- span.system as system
- service.name as service
- host.name as host
where: span.is_event
##
## To require authentication, uncomment the following section.
@ -78,16 +135,6 @@ auth:
# # Defaults to 'preferred_username'.
# claim: preferred_username
##
## ClickHouse database credentials.
##
ch:
# Connection string for ClickHouse database. For example:
# clickhouse://<user>:<password>@<host>:<port>/<database>?sslmode=disable
#
# See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options
dsn: 'clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable'
##
## Alerting rules for monitoring metrics.
##
@ -102,8 +149,8 @@ alerting:
- $net_errors > 0 group by host.name
# for the last 5 minutes
for: 5m
# in the project id=1
projects: [1]
annotations:
summary: '{{ $labels.host_name }} has high number of net errors: {{ $values.net_errors }}'
- name: Filesystem usage >= 90%
metrics:
@ -114,7 +161,8 @@ alerting:
- where device !~ "loop"
- $fs_usage{state="used"} / $fs_usage >= 0.9
for: 5m
projects: [1]
annotations:
summary: '{{ $labels.host_name }} has high FS usage: {{ $values.fs_usage }}'
- name: Uptrace is dropping spans
metrics:
@ -122,7 +170,17 @@ alerting:
query:
- $spans{type=dropped} > 0
for: 1m
projects: [1]
annotations:
summary: 'Uptrace has dropped {{ $values.spans }} spans'
- name: Always firing (for fun and testing)
metrics:
- process.runtime.go.goroutines as $goroutines
query:
- $goroutines >= 0 group by host.name
for: 1m
annotations:
summary: '{{ $labels.host_name }} has high number of goroutines: {{ $values.goroutines }}'
# Create alerts from error logs and span events.
create_alerts_from_spans:
@ -139,8 +197,8 @@ alerting:
##
alertmanager_client:
# AlertManager API endpoints that Uptrace uses to manage alerts.
# urls:
# - 'http://alertmanager:9093/api/v2/alerts'
urls:
- 'http://alertmanager:9093/api/v2/alerts'
##
## Various options to tweak ClickHouse schema.