diff --git a/tutorials/runtime/wheelofmisfortune/Dockerfile b/tutorials/runtime/wheelofmisfortune/Dockerfile new file mode 100644 index 0000000..d112445 --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/Dockerfile @@ -0,0 +1,6 @@ +# Use amd64 sha. +FROM curlimages/curl@sha256:8addc281f0ea517409209f76832b6ddc2cabc3264feb1ebbec2a2521ffad24e4 + +COPY brokenapp /brokenapp +CMD ["/brokenapp"] +ENTRYPOINT ["/brokenapp"] diff --git a/tutorials/runtime/wheelofmisfortune/Makefile b/tutorials/runtime/wheelofmisfortune/Makefile new file mode 100644 index 0000000..d5c86af --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/Makefile @@ -0,0 +1,34 @@ +.PHONY: help +help: ## Displays help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n\nTargets:\n"} /^[a-z0-9A-Z_-]+:.*?##/ { printf " \033[36m%-10s\033[0m %s\n", $$1, $$2 }' $(MAKEFILE_LIST) + +.PHONY: deploy +deploy: ## init + @bash setup-gke.sh bwplotka-stdtest + +.PHONY: run +run: ## run brokenapp + @go run brokenapp.go -listen-address=":99" + +THE_CHOSEN_POD=$(shell kubectl get pods -l 'app=brokenapp' -o jsonpath='{.items[*].metadata.name}' | sort | awk '{print $$3}') + +.PHONY: port-forward +port-forward: ## forward traffic to one pod + @kubectl port-forward pod/$(THE_CHOSEN_POD) 9999 + +.PHONY: metrics +metrics: ## get metrics from one pod + @kubectl exec $(THE_CHOSEN_POD) -- curl -s http://localhost:9999/metrics + +.PHONY: open-metrics +open-metrics: ## get metrics from one pod + @kubectl exec $(THE_CHOSEN_POD) -- curl -s -H 'Accept: application/openmetrics-text' http://localhost:9999/metrics + +CASE=0 +.PHONY: break +break: ## break one pod + @kubectl exec $(THE_CHOSEN_POD) -- curl -s http://localhost:9999/break/$(CASE) + +.PHONY: fix +fix: ## fix one pod + @kubectl exec $(THE_CHOSEN_POD) -- curl -s http://localhost:9999/fix/$(CASE) diff --git a/tutorials/runtime/wheelofmisfortune/README.md b/tutorials/runtime/wheelofmisfortune/README.md new file mode 100644 index 0000000..00921f4 --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/README.md @@ -0,0 +1,3 @@ +# client_golang Tutorial: runtime wheel of misfortune + +[Slides](https://docs.google.com/presentation/d/1blGS0rey7-nDWHAJrZnX_0kwbDuS0M3cnzDt3_ct9Ss/edit#slide=id.g2f3249de911_4_146) diff --git a/tutorials/runtime/wheelofmisfortune/brokenapp.go b/tutorials/runtime/wheelofmisfortune/brokenapp.go new file mode 100644 index 0000000..c75ca2d --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/brokenapp.go @@ -0,0 +1,132 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "flag" + "log" + "net/http" + httppprof "net/http/pprof" + "regexp" + "strings" + "sync" + "sync/atomic" + "syscall" + + "github.com/efficientgo/core/errors" + "github.com/oklog/run" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +var ( + addr = flag.String("listen-address", ":9999", "The address to listen on for HTTP requests.") +) + +func main() { + flag.Parse() + + if err := runMain(*addr); err != nil { + // Use %+v for github.com/efficientgo/core/errors error to print with stack. + log.Fatalf("Error: %+v", errors.Wrapf(err, "%s", flag.Arg(0))) + } +} + +func runMain(addr string) (err error) { + // Create registry for Prometheus metrics. + reg := prometheus.NewRegistry() + reg.MustRegister( + collectors.NewGoCollector(collectors.WithGoCollectorRuntimeMetrics( // Metrics from Go runtime. + collectors.GoRuntimeMetricsRule{ + Matcher: regexp.MustCompile("/sched/latencies:seconds"), // One more recommended metric on top of the default. + }, + )), + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), // Metrics about the current UNIX process. + ) + + m := http.NewServeMux() + + // Create HTTP handler for Prometheus metrics. + m.Handle("/metrics", promhttp.HandlerFor( + reg, + promhttp.HandlerOpts{EnableOpenMetrics: true}, + )) + + // Debug profiling endpoints. + m.HandleFunc("/debug/pprof/", httppprof.Index) + m.HandleFunc("/debug/pprof/cmdline", httppprof.Cmdline) + m.HandleFunc("/debug/pprof/profile", httppprof.Profile) + m.HandleFunc("/debug/pprof/symbol", httppprof.Symbol) + + s := &scenarios{} + m.HandleFunc("/break/", func(w http.ResponseWriter, r *http.Request) { + if err := s.SetFromParam(strings.TrimPrefix(r.URL.Path, "/break/"), true); err != nil { + w.WriteHeader(http.StatusBadRequest) + } + }) + m.HandleFunc("/fix/", func(w http.ResponseWriter, r *http.Request) { + if err := s.SetFromParam(strings.TrimPrefix(r.URL.Path, "/fix/"), false); err != nil { + w.WriteHeader(http.StatusBadRequest) + } + }) + + srv := http.Server{Addr: addr, Handler: m} + g := &run.Group{} + { + g.Add(func() error { + log.Println("Starting HTTP server", "addr", addr) + if err := srv.ListenAndServe(); err != nil { + return errors.Wrap(err, "starting web server") + } + return nil + }, func(error) { + if err := srv.Close(); err != nil { + log.Println("Error: Failed to stop web server", "err", err) + } + }) + } + g.Add(run.SignalHandler(context.Background(), syscall.SIGINT, syscall.SIGTERM)) + addContextNotCanceledGroup(g, reg, func() bool { return s.IsEnabled(contextNotCanceled) }) + addGoroutineJumpGroup(g, func() bool { return s.IsEnabled(goroutineJump) }) + return g.Run() +} + +func doOp(ctx context.Context) int64 { + wg := sync.WaitGroup{} + wg.Add(10) + var sum int64 + for i := 0; i < 10; i++ { + atomic.StoreInt64(&sum, int64(fib(ctx, 1e5))) + wg.Done() + } + wg.Wait() + return sum +} + +func fib(ctx context.Context, n int) int { + if n <= 1 { + return n + } + var n2, n1 = 0, 1 + for i := 2; i <= n; i++ { + if ctx.Err() != nil { + return -1 + } + n2, n1 = n1, n1+n2 + } + return n1 +} diff --git a/tutorials/runtime/wheelofmisfortune/brokenapp.yaml b/tutorials/runtime/wheelofmisfortune/brokenapp.yaml new file mode 100644 index 0000000..6463b34 --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/brokenapp.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: brokenapp + labels: + app: brokenapp +spec: + selector: + matchLabels: + app: brokenapp + replicas: 5 + template: + metadata: + labels: + app: brokenapp + spec: + containers: + - name: brokenapp + imagePullPolicy: Always + image: gcr.io/gpe-test-1/bwplotka-brokenapp:latest + args: + - "/app" + - "-listen-address=:9999" + ports: + - containerPort: 9999 + name: metrics + resources: + limits: + memory: 1G +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + name: brokenapp + labels: + app: brokenapp +spec: + endpoints: + - port: metrics + interval: 15s + path: /metrics + selector: + matchLabels: + app: brokenapp diff --git a/tutorials/runtime/wheelofmisfortune/go.mod b/tutorials/runtime/wheelofmisfortune/go.mod new file mode 100644 index 0000000..61fca9b --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/go.mod @@ -0,0 +1,23 @@ +module github.com/prometheus/client_golang/tutorials/runtime/wheelofmisfortune + +go 1.21.4 + +require ( + github.com/efficientgo/core v1.0.0-rc.2 + github.com/oklog/run v1.1.0 + github.com/prometheus/client_golang v1.19.1 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + golang.org/x/sys v0.22.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect +) + +replace github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.19.1-0.20240811171649-aa3c00d2ee32 diff --git a/tutorials/runtime/wheelofmisfortune/go.sum b/tutorials/runtime/wheelofmisfortune/go.sum new file mode 100644 index 0000000..8da6643 --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/go.sum @@ -0,0 +1,30 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/efficientgo/core v1.0.0-rc.2 h1:7j62qHLnrZqO3V3UA0AqOGd5d5aXV3AX6m/NZBHp78I= +github.com/efficientgo/core v1.0.0-rc.2/go.mod h1:FfGdkzWarkuzOlY04VY+bGfb1lWrjaL6x/GLcQ4vJps= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= +github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= +github.com/prometheus/client_golang v1.19.1-0.20240811171649-aa3c00d2ee32 h1:HKADPlynLA8q5KEF/f7KYo8pnyRFuvKz9f/QPfXs2nk= +github.com/prometheus/client_golang v1.19.1-0.20240811171649-aa3c00d2ee32/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= diff --git a/tutorials/runtime/wheelofmisfortune/scenarios.go b/tutorials/runtime/wheelofmisfortune/scenarios.go new file mode 100644 index 0000000..17971f5 --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/scenarios.go @@ -0,0 +1,150 @@ +package main + +import ( + "context" + "errors" + "fmt" + "strconv" + "sync" + "time" + + "github.com/oklog/run" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +type scenario int + +const ( + contextNotCanceled scenario = 0 + goroutineJump scenario = 1 +) + +type scenarios struct { + enabled [2]bool + mu sync.RWMutex +} + +func (s *scenarios) SetFromParam(c string, v bool) error { + if c == "" { + return errors.New("no {case} parameter in path") + } + cN, err := strconv.Atoi(c) + if err != nil { + return errors.New("{case} is not a number") + } + if cN < 0 || cN >= len(s.enabled) { + return fmt.Errorf("{case} should be a number from 0 to %d", len(s.enabled)-1) + } + s.set(scenario(cN), v) + return nil +} + +func (s *scenarios) set(choice scenario, v bool) { + s.mu.Lock() + s.enabled[choice] = v + s.mu.Unlock() +} + +func (s *scenarios) IsEnabled(choice scenario) bool { + s.mu.RLock() + ret := s.enabled[choice] + s.mu.RUnlock() + return ret +} + +func addContextNotCanceledGroup(g *run.Group, reg *prometheus.Registry, shouldBreak func() bool) { + // Create latency metric for our app operation. + opLatency := promauto.With(reg).NewHistogram( + prometheus.HistogramOpts{ + Name: "brokenapp_operation_latency_seconds", + Help: "Tracks the latencies for calls.", + Buckets: []float64{0.01, 0.05, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20}, + }, + ) + + ctx, cancel := context.WithCancel(context.Background()) + + // Custom contexts can happen... + // Without it, Go has many clever tricks to avoid extra goroutines per context + // cancel setup or timers. + ctx = withCustomContext(ctx) + g.Add(func() error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(100 * time.Millisecond): + } + broken := shouldBreak() + + // Do an operation. + ctx, cancel := context.WithTimeout(ctx, 1*time.Hour) + if broken { + // Bug: Cancel will run until the end of this function... so until program + // exit of timeout. This means we are leaking goroutines here with + // all their allocated memory (and a bit of memory for defer). + defer cancel() + } + + start := time.Now() + ret := doOp(ctx) + since := time.Since(start) + opLatency.Observe(float64(since.Nanoseconds()) * 1e-9) + + fmt.Println("10 * 1e5th fibonacci number is", ret, "; elapsed", since.String()) + + if !broken { + cancel() + } + } + }, func(err error) { + cancel() + }) +} + +func addGoroutineJumpGroup(g *run.Group, shouldBreak func() bool) { + ctx, cancel := context.WithCancel(context.Background()) + g.Add(func() error { + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(30 * time.Second): + } + + if !shouldBreak() { + continue + } + + var wg sync.WaitGroup + done := make(chan struct{}) + + for i := 0; i < 300; i++ { + time.Sleep(1 * time.Second) + wg.Add(1) + go func() { + <-done + wg.Done() + }() + } + time.Sleep(30 * time.Second) + close(done) + wg.Wait() + } + }, func(err error) { + cancel() + }) +} + +type customCtx struct { + context.Context +} + +func withCustomContext(ctx context.Context) context.Context { + return customCtx{Context: ctx} +} + +func (c customCtx) Value(any) any { + return nil // Noop to avoid optimizations to highlight the negative effect. +} diff --git a/tutorials/runtime/wheelofmisfortune/setup-gke.sh b/tutorials/runtime/wheelofmisfortune/setup-gke.sh new file mode 100644 index 0000000..c102284 --- /dev/null +++ b/tutorials/runtime/wheelofmisfortune/setup-gke.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -efo pipefail +export SHELLOPTS # propagate set to children by default +IFS=$'\t\n' + +CLUSTER_NAME=$1 +if [ -z "${CLUSTER_NAME}" ]; then + echo "cluster name is required as the first parameter!" +fi + +ZONE="us-central1-a" +PROJECT_ID=$(gcloud config get project) + +# Do nothing if cluster already exists. +if gcloud container clusters list --filter="name: ${CLUSTER_NAME}" 2>&1 | grep -q "^${CLUSTER_NAME} " +then + echo "WARN: Cluster ${CLUSTER_NAME} already exists, skipping creation" + gcloud container clusters get-credentials ${CLUSTER_NAME} --zone ${ZONE} --project ${PROJECT_ID} +else + # Start a new one-node cluster. + # https://cloud.google.com/sdk/gcloud/reference/container/clusters/create + # n2-standard-4 -- 4 vCPUs 16 GB + gcloud container clusters create ${CLUSTER_NAME} \ + --project=${PROJECT_ID} \ + --location=${ZONE} \ + --workload-pool=${PROJECT_ID}.svc.id.goog \ + --release-channel=rapid \ + --num-nodes=1 \ + --machine-type="n2-standard-4" +fi + +CLUSTER_API_URL=$(kubectl config view --minify -o jsonpath="{.clusters[?(@.name == \"kind-${CLUSTER_NAME}\")].cluster.server}") +echo "## Cluster is now running, kubectl should point to the new cluster at ${CLUSTER_API_URL}" +kubectl cluster-info + +CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o ./brokenapp +CGO_ENABLED=0 go build -o ./brokenapp-mac + +# Build image. +docker buildx build --platform linux/amd64 --tag gcr.io/gpe-test-1/bwplotka-brokenapp:latest --load . + +# Push image. +docker push gcr.io/gpe-test-1/bwplotka-brokenapp:latest +kubectl delete deployment brokenapp +# Deploy. +kubectl apply -f brokenapp.yaml