Print stacktrace in questionable situations

It would be useful to see what the stack looks like when we get
consecutive watchdog patting failures. We can then try to work out
what's ultimately causing this issue.

Approved-by: Trek Hopton
This commit is contained in:
Saxon Milton 2024-04-07 00:25:03 +00:00 committed by Saxon Nelson-Milton
parent a40faaa23e
commit 66a1687316
3 changed files with 34 additions and 9 deletions

View File

@ -138,6 +138,7 @@ func terminationCallback(m *broadcastManager) func() {
return return
} }
m.log.Info("successfully saved broadcast manager state on termination signal") m.log.Info("successfully saved broadcast manager state on termination signal")
logTrace(m.log.Debug,m.log.Warning)
} }
} }
@ -163,7 +164,7 @@ func (m *broadcastManager) loadConfig() error {
return fmt.Errorf("could not unmarshal config file: %w", err) return fmt.Errorf("could not unmarshal config file: %w", err)
} }
m.log.Debug("logger config loaded","cfg",cfg) m.log.Debug("logger config loaded", "cfg", cfg)
m.log.(*logging.JSONLogger).SetLevel(map[string]int8{ m.log.(*logging.JSONLogger).SetLevel(map[string]int8{
"debug": logging.Debug, "debug": logging.Debug,
"info": logging.Info, "info": logging.Info,
@ -265,7 +266,7 @@ func (m *broadcastManager) recv(w http.ResponseWriter, r *http.Request) {
rv, err := m.getPipeline(ma) rv, err := m.getPipeline(ma)
if err != nil { if err != nil {
m.errorLogWrite(m.log, w, "could not get revid pipeline","mac",ma,"error",err) m.errorLogWrite(m.log, w, "could not get revid pipeline", "mac", ma, "error", err)
return return
} }
@ -386,7 +387,7 @@ func (m *broadcastManager) getPipeline(ma MAC) (*revid.Revid, error) {
var err error var err error
b.rv, err = m.initOrStartPipeline(b.rv, b.urls) b.rv, err = m.initOrStartPipeline(b.rv, b.urls)
if err != nil { if err != nil {
return nil, fmt.Errorf("could not init or start pipeline: %v",err) return nil, fmt.Errorf("could not init or start pipeline: %v", err)
} }
return b.rv, nil return b.rv, nil
@ -394,18 +395,18 @@ func (m *broadcastManager) getPipeline(ma MAC) (*revid.Revid, error) {
// initOrStartPipeline ensures that provided Revid pointer points to an // initOrStartPipeline ensures that provided Revid pointer points to an
// initialised and running revid pipeline. // initialised and running revid pipeline.
func (m *broadcastManager) initOrStartPipeline(rv *revid.Revid, urls []string) (*revid.Revid, error){ func (m *broadcastManager) initOrStartPipeline(rv *revid.Revid, urls []string) (*revid.Revid, error) {
var err error var err error
if rv == nil { if rv == nil {
rv, err = newRevid(m.log, urls) rv, err = newRevid(m.log, urls)
if err != nil { if err != nil {
return nil, fmt.Errorf("could not create new revid: %v",err) return nil, fmt.Errorf("could not create new revid: %v", err)
} }
} }
if !rv.Running() { if !rv.Running() {
err = rv.Start() err = rv.Start()
if err != nil { if err != nil {
return nil, fmt.Errorf("could not start revid pipeline: %v",err) return nil, fmt.Errorf("could not start revid pipeline: %v", err)
} }
} }
return rv, nil return rv, nil
@ -436,7 +437,7 @@ func (m *broadcastManager) isActive(ma MAC) bool {
// status field of the broadcast i.e. whether we expect data from a source // status field of the broadcast i.e. whether we expect data from a source
// or write the slate image. // or write the slate image.
func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error { func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error {
m.log.Debug("create or update","mac",broadcast.mac) m.log.Debug("create or update", "mac", broadcast.mac)
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()
@ -459,7 +460,7 @@ func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error {
// If the URLS have changed, we need to update the revid pipeline. // If the URLS have changed, we need to update the revid pipeline.
// We won't enter this if we've just created a new revid pipeline. // We won't enter this if we've just created a new revid pipeline.
if !reflect.DeepEqual(maybeOld.urls, broadcast.urls) { if !reflect.DeepEqual(maybeOld.urls, broadcast.urls) {
m.log.Debug("RTMP URLs have changed, updating pipeline config","mac",broadcast.mac,"old",maybeOld.urls,"new",broadcast.urls) m.log.Debug("RTMP URLs have changed, updating pipeline config", "mac", broadcast.mac, "old", maybeOld.urls, "new", broadcast.urls)
urls := strings.Join(broadcast.urls, ",") urls := strings.Join(broadcast.urls, ",")
err := maybeOld.rv.Update(map[string]string{"RTMPURL": urls}) err := maybeOld.rv.Update(map[string]string{"RTMPURL": urls})
if err != nil { if err != nil {
@ -472,7 +473,7 @@ func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error {
// i.e. if the status is slate, we need to start writing the slate image. // i.e. if the status is slate, we need to start writing the slate image.
// We won't enter this if we've just created a new revid pipeline. // We won't enter this if we've just created a new revid pipeline.
if maybeOld.status != broadcast.status { if maybeOld.status != broadcast.status {
m.log.Debug("status has changed, starting or stopping slate","mac",broadcast.mac,"old",maybeOld.status,"new",broadcast.status) m.log.Debug("status has changed, starting or stopping slate", "mac", broadcast.mac, "old", maybeOld.status, "new", broadcast.status)
switch broadcast.status { switch broadcast.status {
case statusActive, statusPlay, statusCreate: case statusActive, statusPlay, statusCreate:
m.log.Info("updating configuration for mac", "mac", broadcast.mac) m.log.Info("updating configuration for mac", "mac", broadcast.mac)

View File

@ -92,12 +92,20 @@ func (n *watchdogNotifier) notify() {
n.termCallback() n.termCallback()
}() }()
var consecutiveUnhealthyStates int
for { for {
const nUnhealthyStatesForTrace = 10
if n.handlersUnhealthy() { if n.handlersUnhealthy() {
consecutiveUnhealthyStates++
if consecutiveUnhealthyStates >= nUnhealthyStatesForTrace {
logTrace(n.log.Debug,n.log.Warning)
consecutiveUnhealthyStates = 0
}
const unhealthyHandlerWait = 1 * time.Second const unhealthyHandlerWait = 1 * time.Second
time.Sleep(unhealthyHandlerWait) time.Sleep(unhealthyHandlerWait)
continue continue
} }
consecutiveUnhealthyStates = 0
<-notifyTicker.C <-notifyTicker.C

View File

@ -28,6 +28,7 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"strconv" "strconv"
"runtime"
"bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/revid" "bitbucket.org/ausocean/av/revid"
@ -86,3 +87,18 @@ func isMac(m string) bool {
} }
return true return true
} }
type Log func(msg string, args ...interface{})
func logTrace(debug, warning Log){
const (
maxStackTraceSize = 100000
allStacks = true
)
buf := make([]byte, maxStackTraceSize)
n := runtime.Stack(buf, allStacks)
if n > maxStackTraceSize && warning != nil {
warning("stacktrace exceeded buffer size")
}
debug("got stacktrace at termination", "stacktrace", string(buf[:n]))
}