Print stacktrace in questionable situations

It would be useful to see what the stack looks like when we get
consecutive watchdog patting failures. We can then try to work out
what's ultimately causing this issue.

Approved-by: Trek Hopton
This commit is contained in:
Saxon Milton 2024-04-07 00:25:03 +00:00
parent 524e010fe1
commit 6f70366f1b
3 changed files with 34 additions and 9 deletions

View File

@ -138,6 +138,7 @@ func terminationCallback(m *broadcastManager) func() {
return
}
m.log.Info("successfully saved broadcast manager state on termination signal")
logTrace(m.log.Debug,m.log.Warning)
}
}
@ -163,7 +164,7 @@ func (m *broadcastManager) loadConfig() error {
return fmt.Errorf("could not unmarshal config file: %w", err)
}
m.log.Debug("logger config loaded","cfg",cfg)
m.log.Debug("logger config loaded", "cfg", cfg)
m.log.(*logging.JSONLogger).SetLevel(map[string]int8{
"debug": logging.Debug,
"info": logging.Info,
@ -265,7 +266,7 @@ func (m *broadcastManager) recv(w http.ResponseWriter, r *http.Request) {
rv, err := m.getPipeline(ma)
if err != nil {
m.errorLogWrite(m.log, w, "could not get revid pipeline","mac",ma,"error",err)
m.errorLogWrite(m.log, w, "could not get revid pipeline", "mac", ma, "error", err)
return
}
@ -385,7 +386,7 @@ func (m *broadcastManager) getPipeline(ma MAC) (*revid.Revid, error) {
var err error
b.rv, err = m.initOrStartPipeline(b.rv, b.urls)
if err != nil {
return nil, fmt.Errorf("could not init or start pipeline: %v",err)
return nil, fmt.Errorf("could not init or start pipeline: %v", err)
}
return b.rv, nil
@ -393,18 +394,18 @@ func (m *broadcastManager) getPipeline(ma MAC) (*revid.Revid, error) {
// initOrStartPipeline ensures that provided Revid pointer points to an
// initialised and running revid pipeline.
func (m *broadcastManager) initOrStartPipeline(rv *revid.Revid, urls []string) (*revid.Revid, error){
func (m *broadcastManager) initOrStartPipeline(rv *revid.Revid, urls []string) (*revid.Revid, error) {
var err error
if rv == nil {
rv, err = newRevid(m.log, urls)
if err != nil {
return nil, fmt.Errorf("could not create new revid: %v",err)
return nil, fmt.Errorf("could not create new revid: %v", err)
}
}
if !rv.Running() {
err = rv.Start()
if err != nil {
return nil, fmt.Errorf("could not start revid pipeline: %v",err)
return nil, fmt.Errorf("could not start revid pipeline: %v", err)
}
}
return rv, nil
@ -435,7 +436,7 @@ func (m *broadcastManager) isActive(ma MAC) bool {
// status field of the broadcast i.e. whether we expect data from a source
// or write the slate image.
func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error {
m.log.Debug("create or update","mac",broadcast.mac)
m.log.Debug("create or update", "mac", broadcast.mac)
m.mu.Lock()
defer m.mu.Unlock()
@ -456,7 +457,7 @@ func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error {
// If the URLS have changed, we need to update the revid pipeline.
// We won't enter this if we've just created a new revid pipeline.
if !reflect.DeepEqual(maybeOld.urls, broadcast.urls) {
m.log.Debug("RTMP URLs have changed, updating pipeline config","mac",broadcast.mac,"old",maybeOld.urls,"new",broadcast.urls)
m.log.Debug("RTMP URLs have changed, updating pipeline config", "mac", broadcast.mac, "old", maybeOld.urls, "new", broadcast.urls)
urls := strings.Join(broadcast.urls, ",")
err := maybeOld.rv.Update(map[string]string{"RTMPURL": urls})
if err != nil {
@ -468,7 +469,7 @@ func (m *broadcastManager) createOrUpdate(broadcast Broadcast) error {
// i.e. if the status is slate, we need to start writing the slate image.
// We won't enter this if we've just created a new revid pipeline.
if maybeOld.status != broadcast.status {
m.log.Debug("status has changed, starting or stopping slate","mac",broadcast.mac,"old",maybeOld.status,"new",broadcast.status)
m.log.Debug("status has changed, starting or stopping slate", "mac", broadcast.mac, "old", maybeOld.status, "new", broadcast.status)
switch broadcast.status {
case statusActive, statusPlay, statusCreate:
m.log.Info("updating configuration for mac", "mac", broadcast.mac)

View File

@ -92,12 +92,20 @@ func (n *watchdogNotifier) notify() {
n.termCallback()
}()
var consecutiveUnhealthyStates int
for {
const nUnhealthyStatesForTrace = 10
if n.handlersUnhealthy() {
consecutiveUnhealthyStates++
if consecutiveUnhealthyStates >= nUnhealthyStatesForTrace {
logTrace(n.log.Debug,n.log.Warning)
consecutiveUnhealthyStates = 0
}
const unhealthyHandlerWait = 1 * time.Second
time.Sleep(unhealthyHandlerWait)
continue
}
consecutiveUnhealthyStates = 0
<-notifyTicker.C

View File

@ -28,6 +28,7 @@ import (
"fmt"
"net/http"
"strconv"
"runtime"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/revid"
@ -86,3 +87,18 @@ func isMac(m string) bool {
}
return true
}
type Log func(msg string, args ...interface{})
func logTrace(debug, warning Log){
const (
maxStackTraceSize = 100000
allStacks = true
)
buf := make([]byte, maxStackTraceSize)
n := runtime.Stack(buf, allStacks)
if n > maxStackTraceSize && warning != nil {
warning("stacktrace exceeded buffer size")
}
debug("got stacktrace at termination", "stacktrace", string(buf[:n]))
}