From e0ec6a8dc980cfb8e01e80130509ff0f385ad5dd Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 17 Apr 2019 23:11:23 +0930 Subject: [PATCH 01/57] revid: added basic PCM lexer and started adding startMic() func to revid setup. --- codec/lex/lex.go | 29 +++++++++++++++++++++++++++++ revid/revid.go | 7 +++++++ 2 files changed, 36 insertions(+) diff --git a/codec/lex/lex.go b/codec/lex/lex.go index da0dd1b6..a3d05df5 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -36,6 +36,10 @@ import ( "time" ) +const ( + audioChunkSize = 16000 +) + var noDelay = make(chan time.Time) func init() { @@ -245,3 +249,28 @@ func MJPEG(dst io.Writer, src io.Reader, delay time.Duration) error { } } } + +func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { + var tick <-chan time.Time + if delay == 0 { + tick = noDelay + } else { + ticker := time.NewTicker(delay) + defer ticker.Stop() + tick = ticker.C + } + + r := bufio.NewReader(src) + for { + buf := make([]byte, 0, audioChunkSize) + _, err := r.Read(buf) + if err != nil { + return err + } + <-tick + _, err = dst.Write(buf) + if err != nil { + return err + } + } +} diff --git a/revid/revid.go b/revid/revid.go index d8068115..64fad64b 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -668,6 +668,13 @@ func (r *Revid) setupInputForFile() error { return nil } +// startMic is used to start capturing audio from an audio device and processing it. +func startMic() { + + go processFrom(stream, r.config.Rate) + return nil +} + func (r *Revid) processFrom(read io.Reader, delay time.Duration) { r.config.Logger.Log(logger.Info, pkg+"reading input data") r.err <- r.lexTo(r.buffer, read, delay) From 58b9458ff471f02074201cc1ae51b3aff2ab553e Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 18 Apr 2019 16:22:20 +0930 Subject: [PATCH 02/57] revid: added audio.go to handle sound cards and devices audio.go will be used for recording sound from the sound card and mic it is like audio-netsender but it is a package instead of a command and without the netsender. --- revid/audio.go | 555 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 555 insertions(+) create mode 100644 revid/audio.go diff --git a/revid/audio.go b/revid/audio.go new file mode 100644 index 00000000..2177d146 --- /dev/null +++ b/revid/audio.go @@ -0,0 +1,555 @@ +/* +NAME + audio-netsender - NetSender client for sending audio to NetReceiver + +AUTHORS + Alan Noble + Trek Hopton + +ACKNOWLEDGEMENTS + A special thanks to Joel Jensen for his Go ALSA package. + +LICENSE + audio-netsender is Copyright (C) 2018 the Australian Ocean Lab (AusOcean). + + It is free software: you can redistribute it and/or modify them under + the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with https://bitbucket.org/ausocean/iot/src/master/gpl.txt. + If not, see http://www.gnu.org/licenses. +*/ + +// audio-netsender is a NetSender client for sending audio to +// NetReceiver. Audio is captured by means of an ALSA recording +// device, specified by the NetReceiver "source" variable. It sent via +// HTTP to NetReceiver in raw audio form, i.e., as PCM data, where it +// is stored as BinaryData objects. Other NetReceiver variables are +// "rate", "period", "channels" and "bits", for specifiying the frame +// rate (Hz), audio period (seconds), number of channels and sample +// bit size respectively. For a description of NetReceiver see +// http://netreceiver.appspot.com/help. +package main + +import ( + "errors" + "flag" + "io" + "strconv" + "sync" + "time" + + "github.com/yobert/alsa" + + "bitbucket.org/ausocean/av/codec/pcm" + "bitbucket.org/ausocean/iot/pi/netsender" + "bitbucket.org/ausocean/iot/pi/sds" + "bitbucket.org/ausocean/iot/pi/smartlogger" + "bitbucket.org/ausocean/utils/logger" + "bitbucket.org/ausocean/utils/ring" +) + +const ( + progName = "audio-netsender" + logPath = "/var/log/netsender" + retryPeriod = 5 * time.Second + defaultFrameRate = 48000 + defaultPeriod = 5 // seconds + defaultChannels = 2 + defaultBits = 16 + rbDuration = 300 // seconds + rbTimeout = 100 * time.Millisecond + rbNextTimeout = 100 * time.Millisecond +) + +// audioClient holds everything we need to know about the client. +// NB: At 44100 Hz frame rate, 2 channels and 16-bit samples, a period of 5 seconds +// results in PCM data chunks of 882000 bytes! A longer period exceeds datastore's 1MB blob limit. +type audioClient struct { + mu sync.Mutex // mu protects the audioClient. + + parameters + + // internals + dev *alsa.Device // audio input device + ab alsa.Buffer // ALSA's buffer + rb *ring.Buffer // our buffer + ns *netsender.Sender // our NetSender + vs int // our "var sum" to track var changes +} + +type parameters struct { + mode string // operating mode, either "Normal" or "Paused" + source string // name of audio source, or empty for the default source + rate int // frame rate in Hz, 44100Hz by default + period int // audio period in seconds, 5s by default + channels int // number of audio channels, 1 for mono, 2 for stereo + bits int // sample bit size, 16 by default +} + +var log *logger.Logger + +func main() { + var logLevel int + flag.IntVar(&logLevel, "LogLevel", int(logger.Debug), "Specifies log level") + flag.Parse() + + validLogLevel := true + if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { + logLevel = int(logger.Info) + validLogLevel = false + } + + logSender := smartlogger.New(logPath) + log = logger.New(int8(logLevel), &logSender.LogRoller) + log.Log(logger.Info, "log-netsender: Logger Initialized") + if !validLogLevel { + log.Log(logger.Error, "Invalid log level was defaulted to Info") + } + + var ac audioClient + var err error + ac.ns, err = netsender.New(log, nil, sds.ReadSystem, nil) + if err != nil { + log.Log(logger.Fatal, "netsender.Init failed", "error", err.Error()) + } + + // Get audio params and store the current var sum. + vars, err := ac.ns.Vars() + if err != nil { + log.Log(logger.Warning, "netsender.Vars failed; using defaults", "error", err.Error()) + } + ac.params(vars) + ac.vs = ac.ns.VarSum() + + // Open the requested audio device. + err = ac.open() + if err != nil { + log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + } + + // Capture audio in periods of ac.period seconds, and buffer rbDuration seconds in total. + ac.ab = ac.dev.NewBufferDuration(time.Second * time.Duration(ac.period)) + recSize := (((len(ac.ab.Data) / ac.dev.BufferFormat().Channels) * ac.channels) / ac.dev.BufferFormat().Rate) * ac.rate + rbLen := rbDuration / ac.period + ac.rb = ring.NewBuffer(rbLen, recSize, rbTimeout) + + go ac.input() + + ac.output() +} + +// params extracts audio params from corresponding NetReceiver vars and returns true if anything has changed. +// See audioClient for a description of the params and their limits. +func (ac *audioClient) params(vars map[string]string) bool { + // We are the only writers to this field + // so we don't need to lock here. + p := ac.parameters + changed := false + + mode := vars["mode"] + if p.mode != mode { + p.mode = mode + changed = true + } + source := vars["source"] + if p.source != source { + p.source = source + changed = true + } + val, err := strconv.Atoi(vars["rate"]) + if err != nil { + val = defaultFrameRate + } + if p.rate != val { + p.rate = val + changed = true + } + val, err = strconv.Atoi(vars["period"]) + if err != nil || val < 1 || 5 < val { + val = defaultPeriod + } + if p.period != val { + p.period = val + changed = true + } + val, err = strconv.Atoi(vars["channels"]) + if err != nil || (val != 1 && val != 2) { + val = defaultChannels + } + if p.channels != val { + p.channels = val + changed = true + } + val, err = strconv.Atoi(vars["bits"]) + if err != nil || (val != 16 && val != 32) { + val = defaultBits + } + if p.bits != val { + p.bits = val + changed = true + } + + if changed { + ac.mu.Lock() + ac.parameters = p + ac.mu.Unlock() + log.Log(logger.Debug, "Params changed") + } + log.Log(logger.Debug, "Parameters", "mode", p.mode, "source", p.source, "rate", p.rate, "period", p.period, "channels", p.channels, "bits", p.bits) + return changed +} + +// open or re-open the recording device with the given name and prepare it to record. +// If name is empty, the first recording device is used. +func (ac *audioClient) open() error { + if ac.dev != nil { + log.Log(logger.Debug, "Closing", "source", ac.source) + ac.dev.Close() + ac.dev = nil + } + log.Log(logger.Debug, "Opening", "source", ac.source) + + cards, err := alsa.OpenCards() + if err != nil { + return err + } + defer alsa.CloseCards(cards) + + for _, card := range cards { + devices, err := card.Devices() + if err != nil { + return err + } + for _, dev := range devices { + if dev.Type != alsa.PCM || !dev.Record { + continue + } + if dev.Title == ac.source || ac.source == "" { + ac.dev = dev + break + } + } + } + + if ac.dev == nil { + return errors.New("No audio source found") + } + log.Log(logger.Debug, "Found audio source", "source", ac.dev.Title) + + // ToDo: time out if Open takes too long. + err = ac.dev.Open() + if err != nil { + return err + } + log.Log(logger.Debug, "Opened audio source") + + _, err = ac.dev.NegotiateChannels(defaultChannels) + if err != nil { + return err + } + + // Try to negotiate a rate to record in that is divisible by the wanted rate + // so that it can be easily downsampled to the wanted rate. + // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. Eg. + // the audioinjector is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, + // to fix this 8000 and 16000 must be removed from this slice. + rates := [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} + foundRate := false + for i := 0; i < len(rates) && !foundRate; i++ { + if rates[i] < ac.rate { + continue + } + if rates[i]%ac.rate == 0 { + _, err = ac.dev.NegotiateRate(rates[i]) + if err == nil { + foundRate = true + log.Log(logger.Debug, "Sample rate set", "rate", rates[i]) + } + } + } + + // If no easily divisible rate is found, then use the default rate. + if !foundRate { + log.Log(logger.Warning, "No available device sample-rates are divisible by the requested rate. Default rate will be used. Resampling may fail.", "rateRequested", ac.rate) + _, err = ac.dev.NegotiateRate(defaultFrameRate) + if err != nil { + return err + } + log.Log(logger.Debug, "Sample rate set", "rate", defaultFrameRate) + } + + var fmt alsa.FormatType + switch ac.bits { + case 16: + fmt = alsa.S16_LE + case 32: + fmt = alsa.S32_LE + default: + return errors.New("Unsupported sample bits") + } + _, err = ac.dev.NegotiateFormat(fmt) + if err != nil { + return err + } + + // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. + _, err = ac.dev.NegotiateBufferSize(8192, 16384) + if err != nil { + return err + } + + if err = ac.dev.Prepare(); err != nil { + return err + } + log.Log(logger.Debug, "Successfully negotiated ALSA params") + return nil +} + +// input continously records audio and writes it to the ringbuffer. +// Re-opens the device and tries again if ASLA returns an error. +// Spends a lot of time sleeping in Paused mode. +// ToDo: Currently, reading audio and writing to the ringbuffer are synchronous. +// Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps. +func (ac *audioClient) input() { + for { + ac.mu.Lock() + mode := ac.mode + ac.mu.Unlock() + if mode == "Paused" { + time.Sleep(time.Duration(ac.period) * time.Second) + continue + } + log.Log(logger.Debug, "Recording audio for period", "seconds", ac.period) + ac.mu.Lock() + err := ac.dev.Read(ac.ab.Data) + ac.mu.Unlock() + if err != nil { + log.Log(logger.Debug, "Device.Read failed", "error", err.Error()) + ac.mu.Lock() + err = ac.open() // re-open + if err != nil { + log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + } + ac.mu.Unlock() + continue + } + + toWrite := ac.formatBuffer() + + log.Log(logger.Debug, "Audio format conversion has been performed where needed") + + var n int + n, err = ac.rb.Write(toWrite.Data) + switch err { + case nil: + log.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) + case ring.ErrDropped: + log.Log(logger.Warning, "Dropped audio") + default: + log.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) + return + } + } +} + +// output continously reads audio from the ringbuffer and sends it to NetReceiver via poll requests. +// When "B0" is configured as one of the NetReceiver inputs, audio data is posted as "B0". +// When "B0" is not an input, the poll request happens without any audio data +// (although other inputs may still be present via URL parameters). +// When paused, polling continues but without sending audio (B0) data. +// Sending is throttled so as to complete one pass of this loop approximately every audio period, +// since cycling more frequently is pointless. +// Finally while audio data is sent every audio period, other data is reported only every monitor period. +// This function also handles NetReceiver configuration requests and updating of NetReceiver vars. +func (ac *audioClient) output() { + // Calculate the size of the output data based on wanted channels and rate. + outLen := (((len(ac.ab.Data) / ac.ab.Format.Channels) * ac.channels) / ac.ab.Format.Rate) * ac.rate + buf := make([]byte, outLen) + + mime := "audio/x-wav;codec=pcm;rate=" + strconv.Itoa(ac.rate) + ";channels=" + strconv.Itoa(ac.channels) + ";bits=" + strconv.Itoa(ac.bits) + ip := ac.ns.Param("ip") + mp, err := strconv.Atoi(ac.ns.Param("mp")) + if err != nil { + log.Log(logger.Fatal, "mp not an integer") + } + + report := true // Report non-audio data. + reported := time.Now() // When we last did so. + + for { + var reconfig bool + start := time.Now() + audio := false + var pins []netsender.Pin + + if ac.mode == "Paused" { + + // Only send X data when paused (if any). + if report { + pins = netsender.MakePins(ip, "X") + } + } else { + n, err := read(ac.rb, buf) + if err != nil { + return + } + if n == 0 { + goto sleep + } + if n != len(buf) { + log.Log(logger.Error, "Unexpected length from read", "length", n) + return + } + if report { + pins = netsender.MakePins(ip, "") + } else { + pins = netsender.MakePins(ip, "B") + } + for i, pin := range pins { + if pin.Name == "B0" { + audio = true + pins[i].Value = n + pins[i].Data = buf + pins[i].MimeType = mime + } + } + } + + if !(report || audio) { + goto sleep // nothing to do + } + + // Populate X pins, if any. + for i, pin := range pins { + if pin.Name[0] == 'X' { + err := sds.ReadSystem(&pins[i]) + if err != nil { + log.Log(logger.Warning, "sds.ReadSystem failed", "error", err.Error()) + // Pin.Value defaults to -1 upon error, so OK to continue. + } + } + } + _, reconfig, err = ac.ns.Send(netsender.RequestPoll, pins) + if err != nil { + log.Log(logger.Debug, "netsender.Send failed", "error", err.Error()) + goto sleep + } + if report { + reported = start + report = false + } + if reconfig { + err = ac.ns.Config() + if err != nil { + log.Log(logger.Warning, "netsender.Config failed", "error", err.Error()) + goto sleep + } + ip = ac.ns.Param("ip") + mp, err = strconv.Atoi(ac.ns.Param("mp")) + if err != nil { + log.Log(logger.Fatal, "mp not an integer") + } + } + + if ac.vs != ac.ns.VarSum() { + vars, err := ac.ns.Vars() + if err != nil { + log.Log(logger.Error, "netsender.Vars failed", "error", err.Error()) + goto sleep + } + ac.params(vars) // ToDo: re-open device if audio params have changed. + ac.vs = ac.ns.VarSum() + } + + sleep: + pause := ac.period*1000 - int(time.Since(start).Seconds()*1000) + if pause > 0 { + time.Sleep(time.Duration(pause) * time.Millisecond) + } + if time.Since(reported).Seconds() >= float64(mp) { + report = true + } + + } +} + +// read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. +// Any errors returned are unexpected and should be considered fatal. +func read(rb *ring.Buffer, buf []byte) (int, error) { + chunk, err := rb.Next(rbNextTimeout) + switch err { + case nil: + // Do nothing. + case ring.ErrTimeout: + return 0, nil + case io.EOF: + log.Log(logger.Error, "Unexpected EOF from ring.Next") + return 0, io.ErrUnexpectedEOF + default: + log.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) + return 0, err + } + + n, err := io.ReadFull(rb, buf[:chunk.Len()]) + if err != nil { + log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) + return n, err + } + + log.Log(logger.Debug, "Read audio from ringbuffer", "length", n) + return n, nil +} + +// formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored +// in the desired format specified by the ac's parameters. +func (ac *audioClient) formatBuffer() alsa.Buffer { + var err error + ac.mu.Lock() + wantChannels := ac.channels + wantRate := ac.rate + ac.mu.Unlock() + + // If nothing needs to be changed, return the original. + if ac.ab.Format.Channels == wantChannels && ac.ab.Format.Rate == wantRate { + return ac.ab + } + + formatted := alsa.Buffer{Format: ac.ab.Format} + bufCopied := false + if ac.ab.Format.Channels != wantChannels { + + // Convert channels. + if ac.ab.Format.Channels == 2 && wantChannels == 1 { + if formatted.Data, err = pcm.StereoToMono(ac.ab); err != nil { + log.Log(logger.Warning, "Channel conversion failed, audio has remained stereo", "error", err.Error()) + } else { + formatted.Format.Channels = 1 + } + bufCopied = true + } + } + + if ac.ab.Format.Rate != wantRate { + + // Convert rate. + if bufCopied { + formatted.Data, err = pcm.Resample(formatted, wantRate) + } else { + formatted.Data, err = pcm.Resample(ac.ab, wantRate) + } + if err != nil { + log.Log(logger.Warning, "Rate conversion failed, audio has remained original rate", "error", err.Error()) + } else { + formatted.Format.Rate = wantRate + } + } + return formatted +} From 8a1f35c0a5e1e31dc1544a5e92f54b0e9511ab61 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 18 Apr 2019 16:59:48 +0930 Subject: [PATCH 03/57] revid: started modifying audio-netsender to be a general audio input --- revid/audio.go | 122 ++++++++++++------------------------------------- 1 file changed, 28 insertions(+), 94 deletions(-) diff --git a/revid/audio.go b/revid/audio.go index 2177d146..5fe6aaef 100644 --- a/revid/audio.go +++ b/revid/audio.go @@ -1,46 +1,7 @@ -/* -NAME - audio-netsender - NetSender client for sending audio to NetReceiver - -AUTHORS - Alan Noble - Trek Hopton - -ACKNOWLEDGEMENTS - A special thanks to Joel Jensen for his Go ALSA package. - -LICENSE - audio-netsender is Copyright (C) 2018 the Australian Ocean Lab (AusOcean). - - It is free software: you can redistribute it and/or modify them under - the terms of the GNU General Public License as published by the - Free Software Foundation, either version 3 of the License, or (at your - option) any later version. - - It is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with https://bitbucket.org/ausocean/iot/src/master/gpl.txt. - If not, see http://www.gnu.org/licenses. -*/ - -// audio-netsender is a NetSender client for sending audio to -// NetReceiver. Audio is captured by means of an ALSA recording -// device, specified by the NetReceiver "source" variable. It sent via -// HTTP to NetReceiver in raw audio form, i.e., as PCM data, where it -// is stored as BinaryData objects. Other NetReceiver variables are -// "rate", "period", "channels" and "bits", for specifiying the frame -// rate (Hz), audio period (seconds), number of channels and sample -// bit size respectively. For a description of NetReceiver see -// http://netreceiver.appspot.com/help. -package main +package revid import ( "errors" - "flag" "io" "strconv" "sync" @@ -51,38 +12,33 @@ import ( "bitbucket.org/ausocean/av/codec/pcm" "bitbucket.org/ausocean/iot/pi/netsender" "bitbucket.org/ausocean/iot/pi/sds" - "bitbucket.org/ausocean/iot/pi/smartlogger" "bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/ring" ) const ( - progName = "audio-netsender" - logPath = "/var/log/netsender" - retryPeriod = 5 * time.Second - defaultFrameRate = 48000 - defaultPeriod = 5 // seconds - defaultChannels = 2 - defaultBits = 16 - rbDuration = 300 // seconds - rbTimeout = 100 * time.Millisecond - rbNextTimeout = 100 * time.Millisecond + defaultRate = 48000 + defaultPeriod = 5 // seconds + defaultChannels = 2 + defaultBits = 16 + rbDuration = 300 // seconds + rbTimeout = 100 * time.Millisecond + rbNextTimeout = 100 * time.Millisecond ) -// audioClient holds everything we need to know about the client. +// audioInput holds everything we need to know about the audio input stream. // NB: At 44100 Hz frame rate, 2 channels and 16-bit samples, a period of 5 seconds // results in PCM data chunks of 882000 bytes! A longer period exceeds datastore's 1MB blob limit. -type audioClient struct { - mu sync.Mutex // mu protects the audioClient. +type audioInput struct { + mu sync.Mutex // mu protects the audioInput. parameters // internals - dev *alsa.Device // audio input device - ab alsa.Buffer // ALSA's buffer - rb *ring.Buffer // our buffer - ns *netsender.Sender // our NetSender - vs int // our "var sum" to track var changes + dev *alsa.Device // audio input device + ab alsa.Buffer // ALSA's buffer + rb *ring.Buffer // our buffer //TODO: change this to output stream, doesn't have to be ring buffer + vs int // our "var sum" to track var changes } type parameters struct { @@ -94,32 +50,8 @@ type parameters struct { bits int // sample bit size, 16 by default } -var log *logger.Logger - -func main() { - var logLevel int - flag.IntVar(&logLevel, "LogLevel", int(logger.Debug), "Specifies log level") - flag.Parse() - - validLogLevel := true - if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { - logLevel = int(logger.Info) - validLogLevel = false - } - - logSender := smartlogger.New(logPath) - log = logger.New(int8(logLevel), &logSender.LogRoller) - log.Log(logger.Info, "log-netsender: Logger Initialized") - if !validLogLevel { - log.Log(logger.Error, "Invalid log level was defaulted to Info") - } - - var ac audioClient - var err error - ac.ns, err = netsender.New(log, nil, sds.ReadSystem, nil) - if err != nil { - log.Log(logger.Fatal, "netsender.Init failed", "error", err.Error()) - } +func NewAudioInput() { + var ac audioInput // Get audio params and store the current var sum. vars, err := ac.ns.Vars() @@ -144,11 +76,13 @@ func main() { go ac.input() ac.output() + + return stream } // params extracts audio params from corresponding NetReceiver vars and returns true if anything has changed. -// See audioClient for a description of the params and their limits. -func (ac *audioClient) params(vars map[string]string) bool { +// See audioInput for a description of the params and their limits. +func (ac *audioInput) params(vars map[string]string) bool { // We are the only writers to this field // so we don't need to lock here. p := ac.parameters @@ -166,7 +100,7 @@ func (ac *audioClient) params(vars map[string]string) bool { } val, err := strconv.Atoi(vars["rate"]) if err != nil { - val = defaultFrameRate + val = defaultRate } if p.rate != val { p.rate = val @@ -209,7 +143,7 @@ func (ac *audioClient) params(vars map[string]string) bool { // open or re-open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. -func (ac *audioClient) open() error { +func (ac *audioInput) open() error { if ac.dev != nil { log.Log(logger.Debug, "Closing", "source", ac.source) ac.dev.Close() @@ -279,11 +213,11 @@ func (ac *audioClient) open() error { // If no easily divisible rate is found, then use the default rate. if !foundRate { log.Log(logger.Warning, "No available device sample-rates are divisible by the requested rate. Default rate will be used. Resampling may fail.", "rateRequested", ac.rate) - _, err = ac.dev.NegotiateRate(defaultFrameRate) + _, err = ac.dev.NegotiateRate(defaultRate) if err != nil { return err } - log.Log(logger.Debug, "Sample rate set", "rate", defaultFrameRate) + log.Log(logger.Debug, "Sample rate set", "rate", defaultRate) } var fmt alsa.FormatType @@ -318,7 +252,7 @@ func (ac *audioClient) open() error { // Spends a lot of time sleeping in Paused mode. // ToDo: Currently, reading audio and writing to the ringbuffer are synchronous. // Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps. -func (ac *audioClient) input() { +func (ac *audioInput) input() { for { ac.mu.Lock() mode := ac.mode @@ -369,7 +303,7 @@ func (ac *audioClient) input() { // since cycling more frequently is pointless. // Finally while audio data is sent every audio period, other data is reported only every monitor period. // This function also handles NetReceiver configuration requests and updating of NetReceiver vars. -func (ac *audioClient) output() { +func (ac *audioInput) output() { // Calculate the size of the output data based on wanted channels and rate. outLen := (((len(ac.ab.Data) / ac.ab.Format.Channels) * ac.channels) / ac.ab.Format.Rate) * ac.rate buf := make([]byte, outLen) @@ -510,7 +444,7 @@ func read(rb *ring.Buffer, buf []byte) (int, error) { // formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored // in the desired format specified by the ac's parameters. -func (ac *audioClient) formatBuffer() alsa.Buffer { +func (ac *audioInput) formatBuffer() alsa.Buffer { var err error ac.mu.Lock() wantChannels := ac.channels From 46ca3e2611d92be0ea4149e7ffc5bbf2ecd9b387 Mon Sep 17 00:00:00 2001 From: Trek H Date: Fri, 19 Apr 2019 01:20:48 +0930 Subject: [PATCH 04/57] revid: created an audioInput struct to record audio that acts as a reader for revid --- go.mod | 3 + revid/audio.go | 240 ++++++++---------------------------------------- revid/config.go | 2 + revid/revid.go | 8 +- 4 files changed, 50 insertions(+), 203 deletions(-) diff --git a/go.mod b/go.mod index 1280e1a3..5d3fde42 100644 --- a/go.mod +++ b/go.mod @@ -6,9 +6,12 @@ require ( bitbucket.org/ausocean/iot v1.2.4 bitbucket.org/ausocean/utils v0.0.0-20190408050157-66d3b4d4041e github.com/Comcast/gots v0.0.0-20190305015453-8d56e473f0f7 + github.com/Shopify/toxiproxy v2.1.4+incompatible // indirect + github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 // indirect github.com/go-audio/audio v0.0.0-20181013203223-7b2a6ca21480 github.com/go-audio/wav v0.0.0-20181013172942-de841e69b884 github.com/mewkiz/flac v1.0.5 + github.com/sergi/go-diff v1.0.0 // indirect github.com/yobert/alsa v0.0.0-20180630182551-d38d89fa843e gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect ) diff --git a/revid/audio.go b/revid/audio.go index 5fe6aaef..f34d9736 100644 --- a/revid/audio.go +++ b/revid/audio.go @@ -3,21 +3,20 @@ package revid import ( "errors" "io" - "strconv" "sync" "time" "github.com/yobert/alsa" "bitbucket.org/ausocean/av/codec/pcm" - "bitbucket.org/ausocean/iot/pi/netsender" - "bitbucket.org/ausocean/iot/pi/sds" + "bitbucket.org/ausocean/iot/pi/smartlogger" "bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/ring" ) const ( - defaultRate = 48000 + logPath = "/var/log/netsender" + defaultSampRate = 48000 defaultPeriod = 5 // seconds defaultChannels = 2 defaultBits = 16 @@ -26,6 +25,8 @@ const ( rbNextTimeout = 100 * time.Millisecond ) +var log *logger.Logger + // audioInput holds everything we need to know about the audio input stream. // NB: At 44100 Hz frame rate, 2 channels and 16-bit samples, a period of 5 seconds // results in PCM data chunks of 882000 bytes! A longer period exceeds datastore's 1MB blob limit. @@ -37,7 +38,7 @@ type audioInput struct { // internals dev *alsa.Device // audio input device ab alsa.Buffer // ALSA's buffer - rb *ring.Buffer // our buffer //TODO: change this to output stream, doesn't have to be ring buffer + rb *ring.Buffer // our buffer vs int // our "var sum" to track var changes } @@ -50,19 +51,29 @@ type parameters struct { bits int // sample bit size, 16 by default } -func NewAudioInput() { +// NewAudioInput starts recording audio and returns an AudioInput which the audio can be read from. +func NewAudioInput() io.Reader { + logLevel := int(logger.Debug) + + validLogLevel := true + if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { + logLevel = int(logger.Info) + validLogLevel = false + } + + logSender := smartlogger.New(logPath) + log = logger.New(int8(logLevel), &logSender.LogRoller) + log.Log(logger.Info, "log-netsender: Logger Initialized") + if !validLogLevel { + log.Log(logger.Error, "Invalid log level was defaulted to Info") + } + var ac audioInput - // Get audio params and store the current var sum. - vars, err := ac.ns.Vars() - if err != nil { - log.Log(logger.Warning, "netsender.Vars failed; using defaults", "error", err.Error()) - } - ac.params(vars) - ac.vs = ac.ns.VarSum() + ac.setParams() // Open the requested audio device. - err = ac.open() + err := ac.open() if err != nil { log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } @@ -75,70 +86,20 @@ func NewAudioInput() { go ac.input() - ac.output() - - return stream + return ac } -// params extracts audio params from corresponding NetReceiver vars and returns true if anything has changed. -// See audioInput for a description of the params and their limits. -func (ac *audioInput) params(vars map[string]string) bool { - // We are the only writers to this field - // so we don't need to lock here. +func (ac *audioInput) setParams() { p := ac.parameters - changed := false - mode := vars["mode"] - if p.mode != mode { - p.mode = mode - changed = true - } - source := vars["source"] - if p.source != source { - p.source = source - changed = true - } - val, err := strconv.Atoi(vars["rate"]) - if err != nil { - val = defaultRate - } - if p.rate != val { - p.rate = val - changed = true - } - val, err = strconv.Atoi(vars["period"]) - if err != nil || val < 1 || 5 < val { - val = defaultPeriod - } - if p.period != val { - p.period = val - changed = true - } - val, err = strconv.Atoi(vars["channels"]) - if err != nil || (val != 1 && val != 2) { - val = defaultChannels - } - if p.channels != val { - p.channels = val - changed = true - } - val, err = strconv.Atoi(vars["bits"]) - if err != nil || (val != 16 && val != 32) { - val = defaultBits - } - if p.bits != val { - p.bits = val - changed = true - } + p.rate = defaultSampRate + p.period = defaultPeriod + p.channels = defaultChannels + p.bits = defaultBits - if changed { - ac.mu.Lock() - ac.parameters = p - ac.mu.Unlock() - log.Log(logger.Debug, "Params changed") - } - log.Log(logger.Debug, "Parameters", "mode", p.mode, "source", p.source, "rate", p.rate, "period", p.period, "channels", p.channels, "bits", p.bits) - return changed + ac.mu.Lock() + ac.parameters = p + ac.mu.Unlock() } // open or re-open the recording device with the given name and prepare it to record. @@ -212,12 +173,12 @@ func (ac *audioInput) open() error { // If no easily divisible rate is found, then use the default rate. if !foundRate { - log.Log(logger.Warning, "No available device sample-rates are divisible by the requested rate. Default rate will be used. Resampling may fail.", "rateRequested", ac.rate) - _, err = ac.dev.NegotiateRate(defaultRate) + log.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", ac.rate) + _, err = ac.dev.NegotiateRate(defaultSampRate) if err != nil { return err } - log.Log(logger.Debug, "Sample rate set", "rate", defaultRate) + log.Log(logger.Debug, "Sample rate set", "rate", defaultSampRate) } var fmt alsa.FormatType @@ -294,131 +255,10 @@ func (ac *audioInput) input() { } } -// output continously reads audio from the ringbuffer and sends it to NetReceiver via poll requests. -// When "B0" is configured as one of the NetReceiver inputs, audio data is posted as "B0". -// When "B0" is not an input, the poll request happens without any audio data -// (although other inputs may still be present via URL parameters). -// When paused, polling continues but without sending audio (B0) data. -// Sending is throttled so as to complete one pass of this loop approximately every audio period, -// since cycling more frequently is pointless. -// Finally while audio data is sent every audio period, other data is reported only every monitor period. -// This function also handles NetReceiver configuration requests and updating of NetReceiver vars. -func (ac *audioInput) output() { - // Calculate the size of the output data based on wanted channels and rate. - outLen := (((len(ac.ab.Data) / ac.ab.Format.Channels) * ac.channels) / ac.ab.Format.Rate) * ac.rate - buf := make([]byte, outLen) - - mime := "audio/x-wav;codec=pcm;rate=" + strconv.Itoa(ac.rate) + ";channels=" + strconv.Itoa(ac.channels) + ";bits=" + strconv.Itoa(ac.bits) - ip := ac.ns.Param("ip") - mp, err := strconv.Atoi(ac.ns.Param("mp")) - if err != nil { - log.Log(logger.Fatal, "mp not an integer") - } - - report := true // Report non-audio data. - reported := time.Now() // When we last did so. - - for { - var reconfig bool - start := time.Now() - audio := false - var pins []netsender.Pin - - if ac.mode == "Paused" { - - // Only send X data when paused (if any). - if report { - pins = netsender.MakePins(ip, "X") - } - } else { - n, err := read(ac.rb, buf) - if err != nil { - return - } - if n == 0 { - goto sleep - } - if n != len(buf) { - log.Log(logger.Error, "Unexpected length from read", "length", n) - return - } - if report { - pins = netsender.MakePins(ip, "") - } else { - pins = netsender.MakePins(ip, "B") - } - for i, pin := range pins { - if pin.Name == "B0" { - audio = true - pins[i].Value = n - pins[i].Data = buf - pins[i].MimeType = mime - } - } - } - - if !(report || audio) { - goto sleep // nothing to do - } - - // Populate X pins, if any. - for i, pin := range pins { - if pin.Name[0] == 'X' { - err := sds.ReadSystem(&pins[i]) - if err != nil { - log.Log(logger.Warning, "sds.ReadSystem failed", "error", err.Error()) - // Pin.Value defaults to -1 upon error, so OK to continue. - } - } - } - _, reconfig, err = ac.ns.Send(netsender.RequestPoll, pins) - if err != nil { - log.Log(logger.Debug, "netsender.Send failed", "error", err.Error()) - goto sleep - } - if report { - reported = start - report = false - } - if reconfig { - err = ac.ns.Config() - if err != nil { - log.Log(logger.Warning, "netsender.Config failed", "error", err.Error()) - goto sleep - } - ip = ac.ns.Param("ip") - mp, err = strconv.Atoi(ac.ns.Param("mp")) - if err != nil { - log.Log(logger.Fatal, "mp not an integer") - } - } - - if ac.vs != ac.ns.VarSum() { - vars, err := ac.ns.Vars() - if err != nil { - log.Log(logger.Error, "netsender.Vars failed", "error", err.Error()) - goto sleep - } - ac.params(vars) // ToDo: re-open device if audio params have changed. - ac.vs = ac.ns.VarSum() - } - - sleep: - pause := ac.period*1000 - int(time.Since(start).Seconds()*1000) - if pause > 0 { - time.Sleep(time.Duration(pause) * time.Millisecond) - } - if time.Since(reported).Seconds() >= float64(mp) { - report = true - } - - } -} - // read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. -func read(rb *ring.Buffer, buf []byte) (int, error) { - chunk, err := rb.Next(rbNextTimeout) +func (ac audioInput) Read(p []byte) (n int, err error) { + chunk, err := ac.rb.Next(rbNextTimeout) switch err { case nil: // Do nothing. @@ -432,7 +272,7 @@ func read(rb *ring.Buffer, buf []byte) (int, error) { return 0, err } - n, err := io.ReadFull(rb, buf[:chunk.Len()]) + n, err = io.ReadFull(ac.rb, p[:chunk.Len()]) if err != nil { log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) return n, err diff --git a/revid/config.go b/revid/config.go index 6f5a0c8a..a38d9956 100644 --- a/revid/config.go +++ b/revid/config.go @@ -62,6 +62,7 @@ type Config struct { Height uint Width uint FrameRate uint + Rate uint HttpAddress string Quantization uint IntraRefreshPeriod uint @@ -138,6 +139,7 @@ const ( defaultOutput = Http defaultPacketization = Flv defaultFrameRate = 25 + defaultRate = 25 defaultWidth = 1280 defaultHeight = 720 defaultIntraRefreshPeriod = 100 diff --git a/revid/revid.go b/revid/revid.go index 64fad64b..75424779 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -668,10 +668,12 @@ func (r *Revid) setupInputForFile() error { return nil } -// startMic is used to start capturing audio from an audio device and processing it. -func startMic() { +// startAudioInput is used to start capturing audio from an audio device and processing it. +func (r *Revid) startAudioInput() error { - go processFrom(stream, r.config.Rate) + ai := NewAudioInput() + + go r.processFrom(ai, time.Second/time.Duration(r.config.Rate)) return nil } From a60c65a6cf08e7267ff2e9eade2e2b54672f5106 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 23 Apr 2019 14:51:17 +0930 Subject: [PATCH 05/57] revid: added rate flags to config and cli, started writing test --- cmd/revid-cli/main.go | 16 +- revid/audio.go | 329 ------------------------------------------ revid/config.go | 12 +- 3 files changed, 24 insertions(+), 333 deletions(-) delete mode 100644 revid/audio.go diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index c7e3fffd..67b495b2 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -60,7 +60,9 @@ const ( defaultLogPath = "/var/log/netsender" pkg = "revid-cli:" defaultLogVerbosity = logger.Info - defaultSleepTime = 60 // Seconds + defaultSleepTime = 60 // Seconds + sampleSize = 2 // Bytes + blockSize = 16000 // Bytes ) // canProfile is set to false with revid-cli is built with "-tags profile". @@ -105,7 +107,7 @@ func handleFlags() revid.Config { var ( cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") - inputPtr = flag.String("Input", "", "The input type: Raspivid, File, Webcam") + inputPtr = flag.String("Input", "", "The input type: Raspivid, File, v4l, Audio") inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg") rtmpMethodPtr = flag.String("RtmpMethod", "", "The method used to send over rtmp: Ffmpeg, Librtmp") quantizePtr = flag.Bool("Quantize", false, "Quantize input (non-variable bitrate)") @@ -125,6 +127,7 @@ func handleFlags() revid.Config { heightPtr = flag.Uint("Height", 0, "Height in pixels") widthPtr = flag.Uint("Width", 0, "Width in pixels") frameRatePtr = flag.Uint("FrameRate", 0, "Frame rate of captured video") + sampleRatePtr = flag.Uint("SampleRate", 0, "Sample rate of recorded audio") quantizationPtr = flag.Uint("Quantization", 0, "Desired quantization value: 0-40") intraRefreshPeriodPtr = flag.Uint("IntraRefreshPeriod", 0, "The IntraRefreshPeriod i.e. how many keyframes we send") rotationPtr = flag.Uint("Rotation", 0, "Rotate video output. (0-359 degrees)") @@ -180,11 +183,20 @@ func handleFlags() revid.Config { cfg.Input = revid.V4L case "File": cfg.Input = revid.File + case "Audio": + cfg.Input = revid.Audio case "": default: log.Log(logger.Error, pkg+"bad input argument") } + switch *inputPtr { + case "Audio": + cfg.Rate = float64(*sampleRatePtr*sampleSize) / float64(blockSize) + default: + cfg.Rate = *frameRatePtr + } + switch *inputCodecPtr { case "H264": cfg.InputCodec = revid.H264 diff --git a/revid/audio.go b/revid/audio.go deleted file mode 100644 index f34d9736..00000000 --- a/revid/audio.go +++ /dev/null @@ -1,329 +0,0 @@ -package revid - -import ( - "errors" - "io" - "sync" - "time" - - "github.com/yobert/alsa" - - "bitbucket.org/ausocean/av/codec/pcm" - "bitbucket.org/ausocean/iot/pi/smartlogger" - "bitbucket.org/ausocean/utils/logger" - "bitbucket.org/ausocean/utils/ring" -) - -const ( - logPath = "/var/log/netsender" - defaultSampRate = 48000 - defaultPeriod = 5 // seconds - defaultChannels = 2 - defaultBits = 16 - rbDuration = 300 // seconds - rbTimeout = 100 * time.Millisecond - rbNextTimeout = 100 * time.Millisecond -) - -var log *logger.Logger - -// audioInput holds everything we need to know about the audio input stream. -// NB: At 44100 Hz frame rate, 2 channels and 16-bit samples, a period of 5 seconds -// results in PCM data chunks of 882000 bytes! A longer period exceeds datastore's 1MB blob limit. -type audioInput struct { - mu sync.Mutex // mu protects the audioInput. - - parameters - - // internals - dev *alsa.Device // audio input device - ab alsa.Buffer // ALSA's buffer - rb *ring.Buffer // our buffer - vs int // our "var sum" to track var changes -} - -type parameters struct { - mode string // operating mode, either "Normal" or "Paused" - source string // name of audio source, or empty for the default source - rate int // frame rate in Hz, 44100Hz by default - period int // audio period in seconds, 5s by default - channels int // number of audio channels, 1 for mono, 2 for stereo - bits int // sample bit size, 16 by default -} - -// NewAudioInput starts recording audio and returns an AudioInput which the audio can be read from. -func NewAudioInput() io.Reader { - logLevel := int(logger.Debug) - - validLogLevel := true - if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { - logLevel = int(logger.Info) - validLogLevel = false - } - - logSender := smartlogger.New(logPath) - log = logger.New(int8(logLevel), &logSender.LogRoller) - log.Log(logger.Info, "log-netsender: Logger Initialized") - if !validLogLevel { - log.Log(logger.Error, "Invalid log level was defaulted to Info") - } - - var ac audioInput - - ac.setParams() - - // Open the requested audio device. - err := ac.open() - if err != nil { - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) - } - - // Capture audio in periods of ac.period seconds, and buffer rbDuration seconds in total. - ac.ab = ac.dev.NewBufferDuration(time.Second * time.Duration(ac.period)) - recSize := (((len(ac.ab.Data) / ac.dev.BufferFormat().Channels) * ac.channels) / ac.dev.BufferFormat().Rate) * ac.rate - rbLen := rbDuration / ac.period - ac.rb = ring.NewBuffer(rbLen, recSize, rbTimeout) - - go ac.input() - - return ac -} - -func (ac *audioInput) setParams() { - p := ac.parameters - - p.rate = defaultSampRate - p.period = defaultPeriod - p.channels = defaultChannels - p.bits = defaultBits - - ac.mu.Lock() - ac.parameters = p - ac.mu.Unlock() -} - -// open or re-open the recording device with the given name and prepare it to record. -// If name is empty, the first recording device is used. -func (ac *audioInput) open() error { - if ac.dev != nil { - log.Log(logger.Debug, "Closing", "source", ac.source) - ac.dev.Close() - ac.dev = nil - } - log.Log(logger.Debug, "Opening", "source", ac.source) - - cards, err := alsa.OpenCards() - if err != nil { - return err - } - defer alsa.CloseCards(cards) - - for _, card := range cards { - devices, err := card.Devices() - if err != nil { - return err - } - for _, dev := range devices { - if dev.Type != alsa.PCM || !dev.Record { - continue - } - if dev.Title == ac.source || ac.source == "" { - ac.dev = dev - break - } - } - } - - if ac.dev == nil { - return errors.New("No audio source found") - } - log.Log(logger.Debug, "Found audio source", "source", ac.dev.Title) - - // ToDo: time out if Open takes too long. - err = ac.dev.Open() - if err != nil { - return err - } - log.Log(logger.Debug, "Opened audio source") - - _, err = ac.dev.NegotiateChannels(defaultChannels) - if err != nil { - return err - } - - // Try to negotiate a rate to record in that is divisible by the wanted rate - // so that it can be easily downsampled to the wanted rate. - // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. Eg. - // the audioinjector is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, - // to fix this 8000 and 16000 must be removed from this slice. - rates := [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} - foundRate := false - for i := 0; i < len(rates) && !foundRate; i++ { - if rates[i] < ac.rate { - continue - } - if rates[i]%ac.rate == 0 { - _, err = ac.dev.NegotiateRate(rates[i]) - if err == nil { - foundRate = true - log.Log(logger.Debug, "Sample rate set", "rate", rates[i]) - } - } - } - - // If no easily divisible rate is found, then use the default rate. - if !foundRate { - log.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", ac.rate) - _, err = ac.dev.NegotiateRate(defaultSampRate) - if err != nil { - return err - } - log.Log(logger.Debug, "Sample rate set", "rate", defaultSampRate) - } - - var fmt alsa.FormatType - switch ac.bits { - case 16: - fmt = alsa.S16_LE - case 32: - fmt = alsa.S32_LE - default: - return errors.New("Unsupported sample bits") - } - _, err = ac.dev.NegotiateFormat(fmt) - if err != nil { - return err - } - - // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. - _, err = ac.dev.NegotiateBufferSize(8192, 16384) - if err != nil { - return err - } - - if err = ac.dev.Prepare(); err != nil { - return err - } - log.Log(logger.Debug, "Successfully negotiated ALSA params") - return nil -} - -// input continously records audio and writes it to the ringbuffer. -// Re-opens the device and tries again if ASLA returns an error. -// Spends a lot of time sleeping in Paused mode. -// ToDo: Currently, reading audio and writing to the ringbuffer are synchronous. -// Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps. -func (ac *audioInput) input() { - for { - ac.mu.Lock() - mode := ac.mode - ac.mu.Unlock() - if mode == "Paused" { - time.Sleep(time.Duration(ac.period) * time.Second) - continue - } - log.Log(logger.Debug, "Recording audio for period", "seconds", ac.period) - ac.mu.Lock() - err := ac.dev.Read(ac.ab.Data) - ac.mu.Unlock() - if err != nil { - log.Log(logger.Debug, "Device.Read failed", "error", err.Error()) - ac.mu.Lock() - err = ac.open() // re-open - if err != nil { - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) - } - ac.mu.Unlock() - continue - } - - toWrite := ac.formatBuffer() - - log.Log(logger.Debug, "Audio format conversion has been performed where needed") - - var n int - n, err = ac.rb.Write(toWrite.Data) - switch err { - case nil: - log.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) - case ring.ErrDropped: - log.Log(logger.Warning, "Dropped audio") - default: - log.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) - return - } - } -} - -// read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. -// Any errors returned are unexpected and should be considered fatal. -func (ac audioInput) Read(p []byte) (n int, err error) { - chunk, err := ac.rb.Next(rbNextTimeout) - switch err { - case nil: - // Do nothing. - case ring.ErrTimeout: - return 0, nil - case io.EOF: - log.Log(logger.Error, "Unexpected EOF from ring.Next") - return 0, io.ErrUnexpectedEOF - default: - log.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) - return 0, err - } - - n, err = io.ReadFull(ac.rb, p[:chunk.Len()]) - if err != nil { - log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) - return n, err - } - - log.Log(logger.Debug, "Read audio from ringbuffer", "length", n) - return n, nil -} - -// formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored -// in the desired format specified by the ac's parameters. -func (ac *audioInput) formatBuffer() alsa.Buffer { - var err error - ac.mu.Lock() - wantChannels := ac.channels - wantRate := ac.rate - ac.mu.Unlock() - - // If nothing needs to be changed, return the original. - if ac.ab.Format.Channels == wantChannels && ac.ab.Format.Rate == wantRate { - return ac.ab - } - - formatted := alsa.Buffer{Format: ac.ab.Format} - bufCopied := false - if ac.ab.Format.Channels != wantChannels { - - // Convert channels. - if ac.ab.Format.Channels == 2 && wantChannels == 1 { - if formatted.Data, err = pcm.StereoToMono(ac.ab); err != nil { - log.Log(logger.Warning, "Channel conversion failed, audio has remained stereo", "error", err.Error()) - } else { - formatted.Format.Channels = 1 - } - bufCopied = true - } - } - - if ac.ab.Format.Rate != wantRate { - - // Convert rate. - if bufCopied { - formatted.Data, err = pcm.Resample(formatted, wantRate) - } else { - formatted.Data, err = pcm.Resample(ac.ab, wantRate) - } - if err != nil { - log.Log(logger.Warning, "Rate conversion failed, audio has remained original rate", "error", err.Error()) - } else { - formatted.Format.Rate = wantRate - } - } - return formatted -} diff --git a/revid/config.go b/revid/config.go index a38d9956..75fc0e5d 100644 --- a/revid/config.go +++ b/revid/config.go @@ -62,7 +62,8 @@ type Config struct { Height uint Width uint FrameRate uint - Rate uint + SampleRate uint + Rate float64 HttpAddress string Quantization uint IntraRefreshPeriod uint @@ -113,6 +114,7 @@ const ( Raspivid V4L H264Codec + Audio File Http H264 @@ -139,6 +141,7 @@ const ( defaultOutput = Http defaultPacketization = Flv defaultFrameRate = 25 + defaultSampleRate = 48000 defaultRate = 25 defaultWidth = 1280 defaultHeight = 720 @@ -174,7 +177,7 @@ func (c *Config) Validate(r *Revid) error { } switch c.Input { - case Raspivid, V4L, File: + case Raspivid, V4L, File, Audio: case NothingDefined: c.Logger.Log(logger.Info, pkg+"no input type defined, defaulting", "input", defaultInput) @@ -276,6 +279,11 @@ func (c *Config) Validate(r *Revid) error { c.FrameRate = defaultFrameRate } + if c.SampleRate == 0 { + c.Logger.Log(logger.Info, pkg+"no sample rate defined, defaulting", "sampleRate", defaultSampleRate) + c.SampleRate = defaultSampleRate + } + if c.Bitrate == 0 { c.Logger.Log(logger.Info, pkg+"no bitrate defined, defaulting", "bitrate", defaultBitrate) c.Bitrate = defaultBitrate From 20c9e6c409473e109562ebfb77e8377b0c69f377 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 23 Apr 2019 16:20:47 +0930 Subject: [PATCH 06/57] revid: added PCM and ADPCM codecs --- cmd/revid-cli/main.go | 8 ++++++-- codec/lex/lex.go | 11 +++++++++++ container/mts/encoder.go | 2 +- revid/config.go | 2 ++ revid/revid.go | 9 +++++++-- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index 67b495b2..2a863680 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -108,7 +108,7 @@ func handleFlags() revid.Config { cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") inputPtr = flag.String("Input", "", "The input type: Raspivid, File, v4l, Audio") - inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg") + inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg, PCM, ADPCM") rtmpMethodPtr = flag.String("RtmpMethod", "", "The method used to send over rtmp: Ffmpeg, Librtmp") quantizePtr = flag.Bool("Quantize", false, "Quantize input (non-variable bitrate)") verbosityPtr = flag.String("Verbosity", "Info", "Verbosity: Debug, Info, Warning, Error, Fatal") @@ -194,12 +194,16 @@ func handleFlags() revid.Config { case "Audio": cfg.Rate = float64(*sampleRatePtr*sampleSize) / float64(blockSize) default: - cfg.Rate = *frameRatePtr + cfg.Rate = float64(*frameRatePtr) } switch *inputCodecPtr { case "H264": cfg.InputCodec = revid.H264 + case "PCM": + cfg.InputCodec = revid.PCM + case "ADPCM": + cfg.InputCodec = revid.ADPCM case "": default: log.Log(logger.Error, pkg+"bad input codec argument") diff --git a/codec/lex/lex.go b/codec/lex/lex.go index a3d05df5..ff1d4e4d 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -250,6 +250,8 @@ func MJPEG(dst io.Writer, src io.Reader, delay time.Duration) error { } } +// PCM reads from the given source and breaks the PCM into chunks that +// are an appropriate size for mts and pes packets. func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { var tick <-chan time.Time if delay == 0 { @@ -274,3 +276,12 @@ func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { } } } + +// ADPCM reads from the given source and breaks the ADPCM into chunks that +// are an appropriate size for mts and pes packets. +// Since PCM and ADPCM are not any different when it comes to how they are +// transmitted, ADPCM is just a wrapper for PCM. +func ADPCM(dst io.Writer, src io.Reader, delay time.Duration) error { + err := PCM(dst, src, delay) + return err +} diff --git a/container/mts/encoder.go b/container/mts/encoder.go index e72cfebf..2d7ce656 100644 --- a/container/mts/encoder.go +++ b/container/mts/encoder.go @@ -202,7 +202,7 @@ func (e *Encoder) TimeBasedPsi(b bool, sendCount int) { e.pktCount = e.psiSendCount } -// Write implements io.Writer. Write takes raw h264 and encodes into mpegts, +// Write implements io.Writer. Write takes raw video or audio data and encodes into mpegts, // then sending it to the encoder's io.Writer destination. func (e *Encoder) Write(data []byte) (int, error) { if len(data) > pes.MaxPesSize { diff --git a/revid/config.go b/revid/config.go index 75fc0e5d..ceeec5e8 100644 --- a/revid/config.go +++ b/revid/config.go @@ -119,6 +119,8 @@ const ( Http H264 Mjpeg + PCM + ADPCM None Mpegts Ffmpeg diff --git a/revid/revid.go b/revid/revid.go index 75424779..75be7ca4 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -263,6 +263,12 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.Writer, rate int) (io.W case Mjpeg: r.config.Logger.Log(logger.Info, pkg+"using MJPEG lexer") r.lexTo = lex.MJPEG + case PCM: + r.config.Logger.Log(logger.Info, pkg+"using PCM lexer") + r.lexTo = lex.PCM + case ADPCM: + r.config.Logger.Log(logger.Info, pkg+"using ADPCM lexer") + r.lexTo = lex.ADPCM } return nil } @@ -670,8 +676,7 @@ func (r *Revid) setupInputForFile() error { // startAudioInput is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioInput() error { - - ai := NewAudioInput() + ai := NewAudioInput(audioParams) go r.processFrom(ai, time.Second/time.Duration(r.config.Rate)) return nil From c51e0ec168e2923dec22b7e29945800bd564d8a3 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 23 Apr 2019 19:05:47 +0930 Subject: [PATCH 07/57] revid: adding audio config parameters --- cmd/revid-cli/main.go | 1 + revid/config.go | 64 +++++++++++++++++++++++-------------------- revid/revid.go | 2 +- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index 2a863680..e4019887 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -255,6 +255,7 @@ func handleFlags() revid.Config { cfg.Height = *heightPtr cfg.Width = *widthPtr cfg.FrameRate = *frameRatePtr + cfg.SampleRate = *sampleRatePtr cfg.HttpAddress = *httpAddressPtr cfg.Quantization = *quantizationPtr cfg.IntraRefreshPeriod = *intraRefreshPeriodPtr diff --git a/revid/config.go b/revid/config.go index ceeec5e8..59207107 100644 --- a/revid/config.go +++ b/revid/config.go @@ -36,46 +36,50 @@ import ( // Config provides parameters relevant to a revid instance. A new config must // be passed to the constructor. type Config struct { + Logger Logger LogLevel int8 - Input uint8 - InputCodec uint8 - Outputs []uint8 - RtmpMethod uint8 - Packetization uint8 - - // Quantize specifies whether the input to - // revid will have constant or variable - // bitrate. - Quantize bool - - // FlipHorizonatla and FlipVertical specify - // whether video frames should be flipped. - FlipHorizontal bool - FlipVertical bool - - FramesPerClip uint + // IO + Input uint8 + InputCodec uint8 + Outputs []uint8 + RtmpMethod uint8 + Packetization uint8 + Quantize bool // Determines whether input to revid will have constant or variable bitrate. RtmpUrl string Bitrate uint OutputPath string InputPath string - Height uint - Width uint - FrameRate uint - SampleRate uint - Rate float64 HttpAddress string Quantization uint IntraRefreshPeriod uint RtpAddress string - Logger Logger SendRetry bool - BurstPeriod uint - Rotation uint - Brightness uint - Saturation int - Exposure string - AutoWhiteBalance string + WriteRate float64 // How many times a second revid encoders will be written to. + + // Video + Height uint + Width uint + FrameRate uint + FramesPerClip uint + BurstPeriod uint + + // Transformation + FlipHorizontal bool + FlipVertical bool + Rotation uint + + // Color correction + Brightness uint + Saturation int + Exposure string + AutoWhiteBalance string + + // Audio + SampleRate uint // Frame rate in Hz. + Period int // How many seconds to record at a time. + Channels int // Number of audio channels, 1 for mono, 2 for stereo. + BitDepth int // Sample bit depth, 16-bit by default. } // Possible modes for raspivid --exposure parameter. @@ -144,7 +148,7 @@ const ( defaultPacketization = Flv defaultFrameRate = 25 defaultSampleRate = 48000 - defaultRate = 25 + defaultWriteRate = 25 defaultWidth = 1280 defaultHeight = 720 defaultIntraRefreshPeriod = 100 diff --git a/revid/revid.go b/revid/revid.go index 75be7ca4..47c2c4f2 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -676,7 +676,7 @@ func (r *Revid) setupInputForFile() error { // startAudioInput is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioInput() error { - ai := NewAudioInput(audioParams) + ai := NewAudioInput() go r.processFrom(ai, time.Second/time.Duration(r.config.Rate)) return nil From 889d4402596ba4acd2c998d9d4ed764cebadbb13 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 24 Apr 2019 11:46:00 +0930 Subject: [PATCH 08/57] revid: updated audio config parameters throughout audio-input.go to use revid.Config --- revid/config.go | 8 ++++---- revid/revid.go | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/revid/config.go b/revid/config.go index 59207107..63f6d017 100644 --- a/revid/config.go +++ b/revid/config.go @@ -76,10 +76,10 @@ type Config struct { AutoWhiteBalance string // Audio - SampleRate uint // Frame rate in Hz. - Period int // How many seconds to record at a time. - Channels int // Number of audio channels, 1 for mono, 2 for stereo. - BitDepth int // Sample bit depth, 16-bit by default. + SampleRate int // Samples a second (Hz). + Period int // How many seconds to record at a time. + Channels int // Number of audio channels, 1 for mono, 2 for stereo. + BitDepth int // Sample bit depth. } // Possible modes for raspivid --exposure parameter. diff --git a/revid/revid.go b/revid/revid.go index 47c2c4f2..cda7f127 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -676,7 +676,7 @@ func (r *Revid) setupInputForFile() error { // startAudioInput is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioInput() error { - ai := NewAudioInput() + ai := NewAudioInput(&r.config) go r.processFrom(ai, time.Second/time.Duration(r.config.Rate)) return nil From 3484e356926b81bc79287eb895b7e4adffc3d531 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 24 Apr 2019 13:58:56 +0930 Subject: [PATCH 09/57] revid: revid building and running with audio additions --- cmd/revid-cli/main.go | 14 ++++++++++---- revid/config.go | 10 +++++++--- revid/revid.go | 4 +++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index e4019887..97cc8ecc 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -127,7 +127,6 @@ func handleFlags() revid.Config { heightPtr = flag.Uint("Height", 0, "Height in pixels") widthPtr = flag.Uint("Width", 0, "Width in pixels") frameRatePtr = flag.Uint("FrameRate", 0, "Frame rate of captured video") - sampleRatePtr = flag.Uint("SampleRate", 0, "Sample rate of recorded audio") quantizationPtr = flag.Uint("Quantization", 0, "Desired quantization value: 0-40") intraRefreshPeriodPtr = flag.Uint("IntraRefreshPeriod", 0, "The IntraRefreshPeriod i.e. how many keyframes we send") rotationPtr = flag.Uint("Rotation", 0, "Rotate video output. (0-359 degrees)") @@ -135,6 +134,10 @@ func handleFlags() revid.Config { saturationPtr = flag.Int("Saturation", 0, "Set Saturation. (100-100)") exposurePtr = flag.String("Exposure", "auto", "Set exposure mode. ("+strings.Join(revid.ExposureModes[:], ",")+")") autoWhiteBalancePtr = flag.String("Awb", "auto", "Set automatic white balance mode. ("+strings.Join(revid.AutoWhiteBalanceModes[:], ",")+")") + sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio") + channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)") + recPeriodPtr = flag.Int("recPeriod", 5, "How many seconds to record at a time") + bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.") ) var outputs flagStrings @@ -192,9 +195,9 @@ func handleFlags() revid.Config { switch *inputPtr { case "Audio": - cfg.Rate = float64(*sampleRatePtr*sampleSize) / float64(blockSize) + cfg.WriteRate = float64(*sampleRatePtr*sampleSize) / float64(blockSize) default: - cfg.Rate = float64(*frameRatePtr) + cfg.WriteRate = float64(*frameRatePtr) } switch *inputCodecPtr { @@ -255,7 +258,6 @@ func handleFlags() revid.Config { cfg.Height = *heightPtr cfg.Width = *widthPtr cfg.FrameRate = *frameRatePtr - cfg.SampleRate = *sampleRatePtr cfg.HttpAddress = *httpAddressPtr cfg.Quantization = *quantizationPtr cfg.IntraRefreshPeriod = *intraRefreshPeriodPtr @@ -265,6 +267,10 @@ func handleFlags() revid.Config { cfg.Saturation = *saturationPtr cfg.Exposure = *exposurePtr cfg.AutoWhiteBalance = *autoWhiteBalancePtr + cfg.SampleRate = *sampleRatePtr + cfg.Channels = *channelsPtr + cfg.RecPeriod = *recPeriodPtr + cfg.BitDepth = *bitDepthPtr return cfg } diff --git a/revid/config.go b/revid/config.go index 63f6d017..ab686e43 100644 --- a/revid/config.go +++ b/revid/config.go @@ -77,7 +77,7 @@ type Config struct { // Audio SampleRate int // Samples a second (Hz). - Period int // How many seconds to record at a time. + RecPeriod int // How many seconds to record at a time. Channels int // Number of audio channels, 1 for mono, 2 for stereo. BitDepth int // Sample bit depth. } @@ -147,7 +147,6 @@ const ( defaultOutput = Http defaultPacketization = Flv defaultFrameRate = 25 - defaultSampleRate = 48000 defaultWriteRate = 25 defaultWidth = 1280 defaultHeight = 720 @@ -166,6 +165,11 @@ const ( defaultBrightness = 50 defaultExposure = "auto" defaultAutoWhiteBalance = "auto" + + defaultSampleRate = 48000 + defaultBitDepth = 16 + defaultChannels = 1 + defaultRecPeriod = 5 ) // Validate checks for any errors in the config fields and defaults settings @@ -208,7 +212,7 @@ func (c *Config) Validate(r *Revid) error { if c.Quantization > 0 || c.Bitrate == 0 { return errors.New("bad bitrate or quantization for mjpeg input") } - + case PCM, ADPCM: case NothingDefined: c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec) diff --git a/revid/revid.go b/revid/revid.go index cda7f127..8b0ea88b 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -254,6 +254,8 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.Writer, rate int) (io.W r.setupInput = r.startV4L case File: r.setupInput = r.setupInputForFile + case Audio: + r.setupInput = r.startAudioInput } switch r.config.InputCodec { @@ -678,7 +680,7 @@ func (r *Revid) setupInputForFile() error { func (r *Revid) startAudioInput() error { ai := NewAudioInput(&r.config) - go r.processFrom(ai, time.Second/time.Duration(r.config.Rate)) + go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate)) return nil } From b1e5b4341ffc8e4010bef4a4a73a985873c5ac80 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 24 Apr 2019 16:39:18 +0930 Subject: [PATCH 10/57] revid: pid for audio being written to mts packets --- cmd/revid-cli/main.go | 4 ++-- container/mts/audio_test.go | 12 ++++++------ container/mts/encoder.go | 17 ++++++++++------- revid/revid.go | 14 ++++++++++---- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index 97cc8ecc..b4abf0ef 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -62,7 +62,7 @@ const ( defaultLogVerbosity = logger.Info defaultSleepTime = 60 // Seconds sampleSize = 2 // Bytes - blockSize = 16000 // Bytes + chunkSize = 16000 // Bytes ) // canProfile is set to false with revid-cli is built with "-tags profile". @@ -195,7 +195,7 @@ func handleFlags() revid.Config { switch *inputPtr { case "Audio": - cfg.WriteRate = float64(*sampleRatePtr*sampleSize) / float64(blockSize) + cfg.WriteRate = float64(*sampleRatePtr*sampleSize) / float64(chunkSize) default: cfg.WriteRate = float64(*frameRatePtr) } diff --git a/container/mts/audio_test.go b/container/mts/audio_test.go index 23ba16e6..0674df19 100644 --- a/container/mts/audio_test.go +++ b/container/mts/audio_test.go @@ -43,8 +43,8 @@ func TestEncodePcm(t *testing.T) { var buf bytes.Buffer sampleRate := 48000 sampleSize := 2 - blockSize := 16000 - writeFreq := float64(sampleRate*sampleSize) / float64(blockSize) + chunkSize := 16000 + writeFreq := float64(sampleRate*sampleSize) / float64(chunkSize) e := NewEncoder(&buf, writeFreq, Audio) inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm" @@ -54,15 +54,15 @@ func TestEncodePcm(t *testing.T) { } // Break pcm into blocks and encode to mts and get the resulting bytes. - for i := 0; i < len(inPcm); i += blockSize { - if len(inPcm)-i < blockSize { + for i := 0; i < len(inPcm); i += chunkSize { + if len(inPcm)-i < chunkSize { block := inPcm[i:] _, err = e.Write(block) if err != nil { t.Errorf("unable to write block: %v", err) } } else { - block := inPcm[i : i+blockSize] + block := inPcm[i : i+chunkSize] _, err = e.Write(block) if err != nil { t.Errorf("unable to write block: %v", err) @@ -73,7 +73,7 @@ func TestEncodePcm(t *testing.T) { // Get the first MTS packet to check var pkt packet.Packet - pesPacket := make([]byte, 0, blockSize) + pesPacket := make([]byte, 0, chunkSize) got := make([]byte, 0, len(inPcm)) i := 0 if i+PacketSize <= len(clip) { diff --git a/container/mts/encoder.go b/container/mts/encoder.go index 2d7ce656..b27a100d 100644 --- a/container/mts/encoder.go +++ b/container/mts/encoder.go @@ -99,11 +99,12 @@ var ( ) const ( - sdtPid = 17 - patPid = 0 - pmtPid = 4096 - videoPid = 256 - audioPid = 210 + sdtPid = 17 + patPid = 0 + pmtPid = 4096 + videoPid = 256 + // AudioPid is the Id for packets containing audio data + AudioPid = 210 videoStreamID = 0xe0 // First video stream ID. audioStreamID = 0xc0 // First audio stream ID. ) @@ -154,7 +155,7 @@ func NewEncoder(dst io.Writer, rate float64, mediaType int) *Encoder { var sid byte switch mediaType { case Audio: - mPid = audioPid + mPid = AudioPid sid = audioStreamID case Video: mPid = videoPid @@ -248,7 +249,9 @@ func (e *Encoder) Write(data []byte) (int, error) { pkt.PCR = e.pcr() pusi = false } - _, err := e.dst.Write(pkt.Bytes(e.tsSpace[:PacketSize])) + bytes := pkt.Bytes(e.tsSpace[:PacketSize]) + fmt.Printf("Packet: %v", bytes) + _, err := e.dst.Write(bytes) if err != nil { return len(data), err } diff --git a/revid/revid.go b/revid/revid.go index 8b0ea88b..6c21762f 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -187,7 +187,7 @@ func (r *Revid) setConfig(config Config) error { // mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder // respectively. multiWriter will be used to create an ioext.multiWriteCloser // so that encoders can write to multiple senders. -func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.Writer, rate int) (io.Writer, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { +func (r *Revid) setupPipeline(mtsEnc func(dst io.Writer, rate, mediaType int) (io.Writer, error), flvEnc func(dst io.Writer, rate int) (io.Writer, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { r.buffer = (*buffer)(ring.NewBuffer(ringBufferSize, ringBufferElementSize, writeTimeout)) r.encoder = r.encoder[:0] @@ -231,7 +231,13 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.Writer, rate int) (io.W // as a destination. if len(mtsSenders) != 0 { mw := multiWriter(mtsSenders...) - e, _ := mtsEnc(mw, int(r.config.FrameRate)) + var mediaType int + if r.config.Input == Audio { + mediaType = mts.Audio + } else { + mediaType = mts.Video + } + e, _ := mtsEnc(mw, int(r.config.WriteRate), mediaType) r.encoder = append(r.encoder, e) } @@ -275,8 +281,8 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.Writer, rate int) (io.W return nil } -func newMtsEncoder(dst io.Writer, fps int) (io.Writer, error) { - e := mts.NewEncoder(dst, float64(fps), mts.Video) +func newMtsEncoder(dst io.Writer, writeRate, mediaType int) (io.Writer, error) { + e := mts.NewEncoder(dst, float64(writeRate), mediaType) return e, nil } From 09db8907a504ba2b6ad9cb41e5796bbe12546906 Mon Sep 17 00:00:00 2001 From: Trek H Date: Fri, 26 Apr 2019 17:03:30 +0930 Subject: [PATCH 11/57] revid: matching up audio packet sizes, chunk sizes and rates throughout revid pipeline --- cmd/revid-cli/main.go | 34 ++++++++++++++++++++++++---------- codec/adpcm/adpcm.go | 7 +++++++ codec/lex/lex.go | 4 ++-- container/mts/encoder.go | 4 +--- container/mts/pes/pes.go | 2 +- revid/config.go | 2 +- 6 files changed, 36 insertions(+), 17 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index b4abf0ef..68e7c07f 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -30,14 +30,17 @@ package main import ( "flag" + "math" "os" "runtime/pprof" "strconv" "strings" "time" + "bitbucket.org/ausocean/av/codec/adpcm" "bitbucket.org/ausocean/av/container/mts" "bitbucket.org/ausocean/av/container/mts/meta" + "bitbucket.org/ausocean/av/container/mts/pes" "bitbucket.org/ausocean/av/revid" "bitbucket.org/ausocean/iot/pi/netsender" "bitbucket.org/ausocean/iot/pi/sds" @@ -60,9 +63,7 @@ const ( defaultLogPath = "/var/log/netsender" pkg = "revid-cli:" defaultLogVerbosity = logger.Info - defaultSleepTime = 60 // Seconds - sampleSize = 2 // Bytes - chunkSize = 16000 // Bytes + defaultSleepTime = 60 // Seconds ) // canProfile is set to false with revid-cli is built with "-tags profile". @@ -193,13 +194,6 @@ func handleFlags() revid.Config { log.Log(logger.Error, pkg+"bad input argument") } - switch *inputPtr { - case "Audio": - cfg.WriteRate = float64(*sampleRatePtr*sampleSize) / float64(chunkSize) - default: - cfg.WriteRate = float64(*frameRatePtr) - } - switch *inputCodecPtr { case "H264": cfg.InputCodec = revid.H264 @@ -212,6 +206,26 @@ func handleFlags() revid.Config { log.Log(logger.Error, pkg+"bad input codec argument") } + switch *inputPtr { + case "Audio": + byteDepth := *bitDepthPtr / 8 + PCMRate := *sampleRatePtr * byteDepth * *channelsPtr * *recPeriodPtr + var byteRate int + switch cfg.InputCodec { + case revid.PCM: + byteRate = PCMRate + case revid.ADPCM: + byteRate = adpcm.BytesOutput(PCMRate) + } + if byteRate < pes.MaxPesSize { + cfg.WriteRate = 1 + } else { + cfg.WriteRate = uint(math.Ceil(float64(byteRate) / float64(pes.MaxPesSize))) + } + default: + cfg.WriteRate = *frameRatePtr + } + if len(outputs) == 0 { cfg.Outputs = make([]uint8, 1) } diff --git a/codec/adpcm/adpcm.go b/codec/adpcm/adpcm.go index 595728a2..f90af10b 100644 --- a/codec/adpcm/adpcm.go +++ b/codec/adpcm/adpcm.go @@ -57,6 +57,13 @@ type decoder struct { step int16 } +// BytesOutput will return the number of adpcm bytes that will be generated for the given pcm data +func BytesOutput(pcm int) int { + // for X pcm bytes, 2 bytes are left uncompressed, the rest is compressed by a factor of 4 + // and a start index and padding byte are added. + return (pcm-2)/4 + 2 + 1 + 1 +} + // PcmBS is the size of the blocks that an encoder uses. // 'encodeBlock' will encode PcmBS bytes at a time and the output will be AdpcmBS bytes long. const PcmBS = 1010 diff --git a/codec/lex/lex.go b/codec/lex/lex.go index ff1d4e4d..9f5dc55a 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -262,14 +262,14 @@ func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { tick = ticker.C } - r := bufio.NewReader(src) for { buf := make([]byte, 0, audioChunkSize) - _, err := r.Read(buf) + _, err := src.Read(buf) if err != nil { return err } <-tick + fmt.Printf("LEXED AUDIO: %v\n", buf[:64]) _, err = dst.Write(buf) if err != nil { return err diff --git a/container/mts/encoder.go b/container/mts/encoder.go index b27a100d..a4e4931b 100644 --- a/container/mts/encoder.go +++ b/container/mts/encoder.go @@ -249,9 +249,7 @@ func (e *Encoder) Write(data []byte) (int, error) { pkt.PCR = e.pcr() pusi = false } - bytes := pkt.Bytes(e.tsSpace[:PacketSize]) - fmt.Printf("Packet: %v", bytes) - _, err := e.dst.Write(bytes) + _, err := e.dst.Write(pkt.Bytes(e.tsSpace[:PacketSize])) if err != nil { return len(data), err } diff --git a/container/mts/pes/pes.go b/container/mts/pes/pes.go index b0e40f86..58a143cc 100644 --- a/container/mts/pes/pes.go +++ b/container/mts/pes/pes.go @@ -26,7 +26,7 @@ LICENSE package pes -const MaxPesSize = 64 * 1 << 10 +const MaxPesSize = 64 * 1 << 10 // 65536 /* The below data struct encapsulates the fields of an PES packet. Below is diff --git a/revid/config.go b/revid/config.go index ab686e43..83d8339b 100644 --- a/revid/config.go +++ b/revid/config.go @@ -55,7 +55,7 @@ type Config struct { IntraRefreshPeriod uint RtpAddress string SendRetry bool - WriteRate float64 // How many times a second revid encoders will be written to. + WriteRate uint // How many times a second revid encoders will be written to. // Video Height uint From a3c7cb5616f5749336c9f212c19a123b9877a3be Mon Sep 17 00:00:00 2001 From: Trek H Date: Sun, 5 May 2019 17:56:14 +0930 Subject: [PATCH 12/57] revid: added chunkSize to audioInput and bufferSize to lex functions The audio lexers need to know how much data they will be receiving unlike video which has a fixed buffer size. This means that all the lex function will need to be given a buffer size since they are used as a function pointer with the same signature. --- cmd/revid-cli/main.go | 10 ++++++---- codec/lex/lex.go | 18 +++++++----------- revid/revid.go | 15 +++++++-------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index 68e7c07f..c6ac1bb6 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -135,10 +135,12 @@ func handleFlags() revid.Config { saturationPtr = flag.Int("Saturation", 0, "Set Saturation. (100-100)") exposurePtr = flag.String("Exposure", "auto", "Set exposure mode. ("+strings.Join(revid.ExposureModes[:], ",")+")") autoWhiteBalancePtr = flag.String("Awb", "auto", "Set automatic white balance mode. ("+strings.Join(revid.AutoWhiteBalanceModes[:], ",")+")") - sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio") - channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)") - recPeriodPtr = flag.Int("recPeriod", 5, "How many seconds to record at a time") - bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.") + + // Audio specific flags. + sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio") + channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)") + recPeriodPtr = flag.Int("recPeriod", 5, "How many seconds to record at a time") + bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.") ) var outputs flagStrings diff --git a/codec/lex/lex.go b/codec/lex/lex.go index 9f5dc55a..6e0e8498 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -36,10 +36,6 @@ import ( "time" ) -const ( - audioChunkSize = 16000 -) - var noDelay = make(chan time.Time) func init() { @@ -52,7 +48,7 @@ var h264Prefix = [...]byte{0x00, 0x00, 0x01, 0x09, 0xf0} // successive writes being performed not earlier than the specified delay. // NAL units are split after type 1 (Coded slice of a non-IDR picture), 5 // (Coded slice of a IDR picture) and 8 (Picture parameter set). -func H264(dst io.Writer, src io.Reader, delay time.Duration) error { +func H264(dst io.Writer, src io.Reader, delay time.Duration, bufSize int) error { var tick <-chan time.Time if delay == 0 { tick = noDelay @@ -62,7 +58,7 @@ func H264(dst io.Writer, src io.Reader, delay time.Duration) error { tick = ticker.C } - const bufSize = 8 << 10 + bufSize = 8 << 10 //TODO(Trek): Pass this in rather than set it in here. c := newScanner(src, make([]byte, 4<<10)) // Standard file buffer size. @@ -207,7 +203,7 @@ func (c *scanner) reload() error { // MJPEG parses MJPEG frames read from src into separate writes to dst with // successive writes being performed not earlier than the specified delay. -func MJPEG(dst io.Writer, src io.Reader, delay time.Duration) error { +func MJPEG(dst io.Writer, src io.Reader, delay time.Duration, bufSize int) error { var tick <-chan time.Time if delay == 0 { tick = noDelay @@ -252,7 +248,7 @@ func MJPEG(dst io.Writer, src io.Reader, delay time.Duration) error { // PCM reads from the given source and breaks the PCM into chunks that // are an appropriate size for mts and pes packets. -func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { +func PCM(dst io.Writer, src io.Reader, delay time.Duration, bufSize int) error { var tick <-chan time.Time if delay == 0 { tick = noDelay @@ -263,7 +259,7 @@ func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { } for { - buf := make([]byte, 0, audioChunkSize) + buf := make([]byte, 0, bufSize) _, err := src.Read(buf) if err != nil { return err @@ -281,7 +277,7 @@ func PCM(dst io.Writer, src io.Reader, delay time.Duration) error { // are an appropriate size for mts and pes packets. // Since PCM and ADPCM are not any different when it comes to how they are // transmitted, ADPCM is just a wrapper for PCM. -func ADPCM(dst io.Writer, src io.Reader, delay time.Duration) error { - err := PCM(dst, src, delay) +func ADPCM(dst io.Writer, src io.Reader, delay time.Duration, bufSize int) error { + err := PCM(dst, src, delay, bufSize) return err } diff --git a/revid/revid.go b/revid/revid.go index 6c21762f..6a54791d 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -103,7 +103,7 @@ type Revid struct { cmd *exec.Cmd // lexTo, encoder and packer handle transcoding the input stream. - lexTo func(dest io.Writer, src io.Reader, delay time.Duration) error + lexTo func(dest io.Writer, src io.Reader, delay time.Duration, bufSize int) error // buffer handles passing frames from the transcoder // to the target destination. @@ -620,7 +620,7 @@ func (r *Revid) startRaspivid() error { } r.wg.Add(1) - go r.processFrom(stdout, 0) + go r.processFrom(stdout, 0, 0) return nil } @@ -662,7 +662,7 @@ func (r *Revid) startV4L() error { } r.wg.Add(1) - go r.processFrom(stdout, time.Duration(0)) + go r.processFrom(stdout, time.Duration(0), 0) return nil } @@ -678,21 +678,20 @@ func (r *Revid) setupInputForFile() error { // TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop. r.wg.Add(1) - go r.processFrom(f, time.Second/time.Duration(r.config.FrameRate)) + go r.processFrom(f, time.Second/time.Duration(r.config.FrameRate), 0) return nil } // startAudioInput is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioInput() error { ai := NewAudioInput(&r.config) - - go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate)) + go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate), ai.ChunkSize()) return nil } -func (r *Revid) processFrom(read io.Reader, delay time.Duration) { +func (r *Revid) processFrom(read io.Reader, delay time.Duration, bufSize int) { r.config.Logger.Log(logger.Info, pkg+"reading input data") - r.err <- r.lexTo(r.buffer, read, delay) + r.err <- r.lexTo(r.buffer, read, delay, bufSize) r.config.Logger.Log(logger.Info, pkg+"finished reading input data") r.wg.Done() } From 7d4da08cf0b86dbf45102ac5c35ccdd73899750d Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 7 May 2019 16:52:58 +0930 Subject: [PATCH 13/57] revid: audio lexer reading and writing correct bytes --- cmd/revid-cli/main.go | 20 ++------------------ codec/lex/lex.go | 12 ++++++++---- revid/config.go | 2 +- revid/revid.go | 9 ++++++++- 4 files changed, 19 insertions(+), 24 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index c6ac1bb6..78d0e31b 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -30,17 +30,14 @@ package main import ( "flag" - "math" "os" "runtime/pprof" "strconv" "strings" "time" - "bitbucket.org/ausocean/av/codec/adpcm" "bitbucket.org/ausocean/av/container/mts" "bitbucket.org/ausocean/av/container/mts/meta" - "bitbucket.org/ausocean/av/container/mts/pes" "bitbucket.org/ausocean/av/revid" "bitbucket.org/ausocean/iot/pi/netsender" "bitbucket.org/ausocean/iot/pi/sds" @@ -139,7 +136,7 @@ func handleFlags() revid.Config { // Audio specific flags. sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio") channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)") - recPeriodPtr = flag.Int("recPeriod", 5, "How many seconds to record at a time") + recPeriodPtr = flag.Int("recPeriod", 1, "How many seconds to record at a time") bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.") ) @@ -210,20 +207,7 @@ func handleFlags() revid.Config { switch *inputPtr { case "Audio": - byteDepth := *bitDepthPtr / 8 - PCMRate := *sampleRatePtr * byteDepth * *channelsPtr * *recPeriodPtr - var byteRate int - switch cfg.InputCodec { - case revid.PCM: - byteRate = PCMRate - case revid.ADPCM: - byteRate = adpcm.BytesOutput(PCMRate) - } - if byteRate < pes.MaxPesSize { - cfg.WriteRate = 1 - } else { - cfg.WriteRate = uint(math.Ceil(float64(byteRate) / float64(pes.MaxPesSize))) - } + cfg.WriteRate = uint(*recPeriodPtr) default: cfg.WriteRate = *frameRatePtr } diff --git a/codec/lex/lex.go b/codec/lex/lex.go index 6e0e8498..af801946 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -259,14 +259,18 @@ func PCM(dst io.Writer, src io.Reader, delay time.Duration, bufSize int) error { } for { - buf := make([]byte, 0, bufSize) - _, err := src.Read(buf) + buf := make([]byte, bufSize) + fmt.Printf("LEXER reading: %v bytes\n", bufSize) + n, err := src.Read(buf) if err != nil { return err } <-tick - fmt.Printf("LEXED AUDIO: %v\n", buf[:64]) - _, err = dst.Write(buf) + fmt.Printf("LEXER: read %v bytes\n", n) + fmt.Printf("LEXER: writing: %v bytes\n", len(buf)) + // fmt.Printf("LEXER: %v\n", buf) + n, err = dst.Write(buf) + fmt.Printf("LEXER: wrote %v bytes\n\n", n) if err != nil { return err } diff --git a/revid/config.go b/revid/config.go index 83d8339b..575c1042 100644 --- a/revid/config.go +++ b/revid/config.go @@ -169,7 +169,7 @@ const ( defaultSampleRate = 48000 defaultBitDepth = 16 defaultChannels = 1 - defaultRecPeriod = 5 + defaultRecPeriod = 1 ) // Validate checks for any errors in the config fields and defaults settings diff --git a/revid/revid.go b/revid/revid.go index 6a54791d..14923af2 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -684,7 +684,14 @@ func (r *Revid) setupInputForFile() error { // startAudioInput is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioInput() error { - ai := NewAudioInput(&r.config) + ac := &AudioConfig{ + SampleRate: r.config.SampleRate, + Channels: r.config.Channels, + RecPeriod: r.config.RecPeriod, + BitDepth: r.config.BitDepth, + Codec: r.config.InputCodec, + } + ai := NewAudioInput(ac) go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate), ai.ChunkSize()) return nil } From 2bb3a6b82f09b3c766b14ce5e3c73a6505f7c6bb Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 8 May 2019 17:21:21 +0930 Subject: [PATCH 14/57] revid: added missing files for audio input --- revid/audio-input.go | 325 ++++++++++++++++++++++++++++++++++++++ revid/audio-input_test.go | 37 +++++ 2 files changed, 362 insertions(+) create mode 100644 revid/audio-input.go create mode 100644 revid/audio-input_test.go diff --git a/revid/audio-input.go b/revid/audio-input.go new file mode 100644 index 00000000..c0fb032a --- /dev/null +++ b/revid/audio-input.go @@ -0,0 +1,325 @@ +package revid + +import ( + "errors" + "fmt" + "io" + "sync" + "time" + + "github.com/yobert/alsa" + + "bitbucket.org/ausocean/av/codec/pcm" + "bitbucket.org/ausocean/iot/pi/smartlogger" + "bitbucket.org/ausocean/utils/logger" + "bitbucket.org/ausocean/utils/ring" +) + +const ( + logPath = "/var/log/netsender" + rbDuration = 300 // seconds + rbTimeout = 100 * time.Millisecond + rbNextTimeout = 100 * time.Millisecond +) + +var log *logger.Logger + +// AudioInput holds everything we need to know about the audio input stream. +// NB: At 44100 Hz frame rate, 2 channels and 16-bit samples, a period of 5 seconds +// results in PCM data chunks of 882000 bytes! A longer period exceeds datastore's 1MB blob limit. +type AudioInput struct { + mu sync.Mutex // mu protects the AudioInput. + mode string // operating mode, either "Normal" or "Paused" + source string // name of audio source, or empty for the default source + + dev *alsa.Device // audio input device + ab alsa.Buffer // ALSA's buffer + rb *ring.Buffer // our buffer + chunkSize int + vs int // our "var sum" to track var changes + + *AudioConfig +} + +// AudioConfig provides parameters used by AudioInput. +type AudioConfig struct { + SampleRate int + Channels int + BitDepth int + RecPeriod int + Codec uint8 +} + +// NewAudioInput starts recording audio and returns an AudioInput struct which the audio can be read from. +func NewAudioInput(cfg *AudioConfig) *AudioInput { + + logLevel := int(logger.Debug) + + validLogLevel := true + if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { + logLevel = int(logger.Info) + validLogLevel = false + } + + logSender := smartlogger.New(logPath) + log = logger.New(int8(logLevel), &logSender.LogRoller) + log.Log(logger.Info, "log-netsender: Logger Initialized") + if !validLogLevel { + log.Log(logger.Error, "Invalid log level was defaulted to Info") + } + + a := &AudioInput{} + a.AudioConfig = cfg + + // Open the requested audio device. + err := a.open() + if err != nil { + log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + } + + // Capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. + a.ab = a.dev.NewBufferDuration(time.Second * time.Duration(a.RecPeriod)) + a.chunkSize = (((len(a.ab.Data) / a.dev.BufferFormat().Channels) * a.Channels) / a.dev.BufferFormat().Rate) * a.SampleRate + rbLen := rbDuration / a.RecPeriod + a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) + + go a.input() + + return a +} + +func (a *AudioInput) ChunkSize() int { + return a.chunkSize +} + +// open or re-open the recording device with the given name and prepare it to record. +// If name is empty, the first recording device is used. +func (a *AudioInput) open() error { + if a.dev != nil { + log.Log(logger.Debug, "Closing", "source", a.source) + a.dev.Close() + a.dev = nil + } + log.Log(logger.Debug, "Opening", "source", a.source) + + cards, err := alsa.OpenCards() + if err != nil { + return err + } + defer alsa.CloseCards(cards) + + for _, card := range cards { + devices, err := card.Devices() + if err != nil { + return err + } + for _, dev := range devices { + if dev.Type != alsa.PCM || !dev.Record { + continue + } + if dev.Title == a.source || a.source == "" { + a.dev = dev + break + } + } + } + + if a.dev == nil { + return errors.New("No audio source found") + } + log.Log(logger.Debug, "Found audio source", "source", a.dev.Title) + + // ToDo: time out if Open takes too long. + err = a.dev.Open() + if err != nil { + return err + } + log.Log(logger.Debug, "Opened audio source") + + // 2 channels is what most devices need to record in. If mono is requested, + // the recording will be converted in formatBuffer(). + _, err = a.dev.NegotiateChannels(2) + if err != nil { + return err + } + + // Try to negotiate a rate to record in that is divisible by the wanted rate + // so that it can be easily downsampled to the wanted rate. + // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. Eg. + // the audioinjector is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, + // to fix this 8000 and 16000 must be removed from this slice. + rates := [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} + foundRate := false + for i := 0; i < len(rates) && !foundRate; i++ { + if rates[i] < a.SampleRate { + continue + } + if rates[i]%a.SampleRate == 0 { + _, err = a.dev.NegotiateRate(rates[i]) + if err == nil { + foundRate = true + log.Log(logger.Debug, "Sample rate set", "rate", rates[i]) + } + } + } + + // If no easily divisible rate is found, then use the default rate. + if !foundRate { + log.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) + _, err = a.dev.NegotiateRate(defaultSampleRate) + if err != nil { + return err + } + log.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate) + } + + var fmt alsa.FormatType + switch a.BitDepth { + case 16: + fmt = alsa.S16_LE + case 32: + fmt = alsa.S32_LE + default: + return errors.New("Unsupported sample bits") + } + _, err = a.dev.NegotiateFormat(fmt) + if err != nil { + return err + } + + // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. + _, err = a.dev.NegotiateBufferSize(8192, 16384) + if err != nil { + return err + } + + if err = a.dev.Prepare(); err != nil { + return err + } + log.Log(logger.Debug, "Successfully negotiated ALSA params") + return nil +} + +// input continously records audio and writes it to the ringbuffer. +// Re-opens the device and tries again if ASLA returns an error. +// Spends a lot of time sleeping in Paused mode. +// ToDo: Currently, reading audio and writing to the ringbuffer are synchronous. +// Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps. +func (a *AudioInput) input() { + for { + a.mu.Lock() + mode := a.mode + a.mu.Unlock() + if mode == "Paused" { + time.Sleep(time.Duration(a.RecPeriod) * time.Second) + continue + } + log.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) + a.mu.Lock() + err := a.dev.Read(a.ab.Data) + a.mu.Unlock() + if err != nil { + log.Log(logger.Debug, "Device.Read failed", "error", err.Error()) + a.mu.Lock() + err = a.open() // re-open + if err != nil { + log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + } + a.mu.Unlock() + continue + } + + toWrite := a.formatBuffer() + + log.Log(logger.Debug, "Audio format conversion has been performed where needed") + + fmt.Printf("Writing %v bytes to ringbuffer\n", len(toWrite.Data)) + var n int + n, err = a.rb.Write(toWrite.Data) + fmt.Printf("Wrote %v bytes to ringbuffer\n", n) + switch err { + case nil: + log.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) + case ring.ErrDropped: + log.Log(logger.Warning, "Dropped audio") + default: + log.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) + return + } + } +} + +// read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. +// Any errors returned are unexpected and should be considered fatal. +func (a AudioInput) Read(p []byte) (n int, err error) { + fmt.Println("Performing AudioInput read...") + chunk, err := a.rb.Next(rbNextTimeout) + switch err { + case nil: + // Do nothing. + case ring.ErrTimeout: + return 0, nil + case io.EOF: + log.Log(logger.Error, "Unexpected EOF from ring.Next") + return 0, io.ErrUnexpectedEOF + default: + log.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) + return 0, err + } + fmt.Printf("Reading %v bytes from ringbuffer\n", chunk.Len()) + n, err = io.ReadFull(a.rb, p[:chunk.Len()]) + fmt.Printf("Read %v bytes from ringbuffer\n", n) + if err != nil { + log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) + return n, err + } + + log.Log(logger.Debug, "Read audio from ringbuffer", "length", n) + return n, nil +} + +// formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored +// in the desired format specified by the ac's parameters. +func (a *AudioInput) formatBuffer() alsa.Buffer { + var err error + a.mu.Lock() + wantChannels := a.Channels + wantRate := a.SampleRate + a.mu.Unlock() + + // If nothing needs to be changed, return the original. + if a.ab.Format.Channels == wantChannels && a.ab.Format.Rate == wantRate { + return a.ab + } + + formatted := alsa.Buffer{Format: a.ab.Format} + bufCopied := false + if a.ab.Format.Channels != wantChannels { + + // Convert channels. + if a.ab.Format.Channels == 2 && wantChannels == 1 { + if formatted.Data, err = pcm.StereoToMono(a.ab); err != nil { + log.Log(logger.Warning, "Channel conversion failed, audio has remained stereo", "error", err.Error()) + } else { + formatted.Format.Channels = 1 + } + bufCopied = true + } + } + + if a.ab.Format.Rate != wantRate { + + // Convert rate. + if bufCopied { + formatted.Data, err = pcm.Resample(formatted, wantRate) + } else { + formatted.Data, err = pcm.Resample(a.ab, wantRate) + } + if err != nil { + log.Log(logger.Warning, "Rate conversion failed, audio has remained original rate", "error", err.Error()) + } else { + formatted.Format.Rate = wantRate + } + } + return formatted +} diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go new file mode 100644 index 00000000..c494f98d --- /dev/null +++ b/revid/audio-input_test.go @@ -0,0 +1,37 @@ +package revid + +import ( + "testing" + + "bitbucket.org/ausocean/iot/pi/netsender" +) + +func TestAudioInput(t *testing.T) { + + var logger testLogger + ns, err := netsender.New(&logger, nil, nil, nil) + if err != nil { + t.Errorf("netsender.New failed with error %v", err) + } + + var c Config + c.Logger = &logger + c.Input = Audio + c.Outputs = make([]uint8, 1) + + rv, err := New(c, ns) + if err != nil { + t.Errorf("revid.New failed with error %v", err) + } + + err = rv.Start() + if err != nil { + t.Errorf("revid.Start failed with error %v", err) + } +} + +// testLogger implements a netsender.Logger. +type testLogger struct{} + +// SetLevel normally sets the logging level, but it is a no-op in our case. +func (tl *testLogger) SetLevel(level int8) {} From 76765c8a20a0310496fc96181e1989a1b5f54da1 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 8 May 2019 18:07:33 +0930 Subject: [PATCH 15/57] revid: updated revid test to use new mtsEncoder func --- revid/audio-input_test.go | 42 ++++++++++++++++----------------------- revid/revid_test.go | 2 +- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index c494f98d..a510428b 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -2,36 +2,28 @@ package revid import ( "testing" - - "bitbucket.org/ausocean/iot/pi/netsender" ) func TestAudioInput(t *testing.T) { - var logger testLogger - ns, err := netsender.New(&logger, nil, nil, nil) - if err != nil { - t.Errorf("netsender.New failed with error %v", err) - } + // var logger testLogger + // ns, err := netsender.New(&logger, nil, nil, nil) + // if err != nil { + // t.Errorf("netsender.New failed with error %v", err) + // } - var c Config - c.Logger = &logger - c.Input = Audio - c.Outputs = make([]uint8, 1) + // var c Config + // c.Logger = &logger + // c.Input = Audio + // c.Outputs = make([]uint8, 1) - rv, err := New(c, ns) - if err != nil { - t.Errorf("revid.New failed with error %v", err) - } + // rv, err := New(c, ns) + // if err != nil { + // t.Errorf("revid.New failed with error %v", err) + // } - err = rv.Start() - if err != nil { - t.Errorf("revid.Start failed with error %v", err) - } + // err = rv.Start() + // if err != nil { + // t.Errorf("revid.Start failed with error %v", err) + // } } - -// testLogger implements a netsender.Logger. -type testLogger struct{} - -// SetLevel normally sets the logging level, but it is a no-op in our case. -func (tl *testLogger) SetLevel(level int8) {} diff --git a/revid/revid_test.go b/revid/revid_test.go index 18086912..17739221 100644 --- a/revid/revid_test.go +++ b/revid/revid_test.go @@ -232,7 +232,7 @@ func TestResetEncoderSenderSetup(t *testing.T) { // This logic is what we want to check. err = rv.setupPipeline( - func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { + func(dst io.WriteCloser, rate int, mediaType int) (io.WriteCloser, error) { return &tstMtsEncoder{dst: dst}, nil }, func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { From 909908260971628e809c5ae9115e8b36b0b1bfc6 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 8 May 2019 19:05:00 +0930 Subject: [PATCH 16/57] revid: setting up mts encoder with pid based on media type --- revid/revid.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/revid/revid.go b/revid/revid.go index e02b9404..965ae546 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -163,7 +163,7 @@ func (r *Revid) reset(config Config) error { err = r.setupPipeline( func(dst io.WriteCloser, fps, medType int) (io.WriteCloser, error) { - e := mts.NewEncoder(dst, float64(fps), mts.Video) + e := mts.NewEncoder(dst, float64(fps), medType) return e, nil }, func(dst io.WriteCloser, fps int) (io.WriteCloser, error) { From e699c30a8550f53cff708f16424b5da2dee2e473 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 8 May 2019 19:31:25 +0930 Subject: [PATCH 17/57] revid: updated documentation and licenses --- cmd/revid-cli/main.go | 2 ++ codec/lex/lex.go | 4 +--- revid/audio-input.go | 24 ++++++++++++++++++++++++ revid/config.go | 4 +--- revid/revid.go | 5 ++--- 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index 87044450..ab55188a 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -8,6 +8,7 @@ DESCRIPTION AUTHORS Saxon A. Nelson-Milton Jack Richardson + Trek Hopton LICENSE revid-cli is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) @@ -26,6 +27,7 @@ LICENSE along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. */ +// revid-cli is a command line interface for revid. package main import ( diff --git a/codec/lex/lex.go b/codec/lex/lex.go index af801946..024430ed 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -2,11 +2,9 @@ NAME lex.go -DESCRIPTION - See Readme.md - AUTHOR Dan Kortschak + Trek Hopton LICENSE lex.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean) diff --git a/revid/audio-input.go b/revid/audio-input.go index c0fb032a..a1729bf9 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -1,3 +1,27 @@ +/* +NAME + audio-input.go + +AUTHOR + Trek Hopton + +LICENSE + audio-input.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + package revid import ( diff --git a/revid/config.go b/revid/config.go index 6986e83b..7bebf599 100644 --- a/revid/config.go +++ b/revid/config.go @@ -2,11 +2,9 @@ NAME Config.go -DESCRIPTION - See Readme.md - AUTHORS Saxon A. Nelson-Milton + Trek Hopton LICENSE Config.go is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) diff --git a/revid/revid.go b/revid/revid.go index 965ae546..a38b8eff 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -2,13 +2,11 @@ NAME revid.go -DESCRIPTION - See Readme.md - AUTHORS Saxon A. Nelson-Milton Alan Noble Dan Kortschak + Trek Hopton LICENSE revid is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) @@ -27,6 +25,7 @@ LICENSE in gpl.txt. If not, see http://www.gnu.org/licenses. */ +// Package revid provides an API for reading, transcoding, and writing audio/video streams and files. package revid import ( From 28eba2ad60af04e305266a2e2fbd7de9cc2dad90 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 8 May 2019 19:56:02 +0930 Subject: [PATCH 18/57] revid: changed PCM lexer timing previously the lexer would read before any audio was generated, resulting in silence but now the timing is correct. --- codec/lex/lex.go | 11 +++-------- revid/audio-input.go | 8 ++------ 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/codec/lex/lex.go b/codec/lex/lex.go index 024430ed..6f6a7ba0 100644 --- a/codec/lex/lex.go +++ b/codec/lex/lex.go @@ -257,18 +257,13 @@ func PCM(dst io.Writer, src io.Reader, delay time.Duration, bufSize int) error { } for { + <-tick buf := make([]byte, bufSize) - fmt.Printf("LEXER reading: %v bytes\n", bufSize) - n, err := src.Read(buf) + _, err := src.Read(buf) if err != nil { return err } - <-tick - fmt.Printf("LEXER: read %v bytes\n", n) - fmt.Printf("LEXER: writing: %v bytes\n", len(buf)) - // fmt.Printf("LEXER: %v\n", buf) - n, err = dst.Write(buf) - fmt.Printf("LEXER: wrote %v bytes\n\n", n) + _, err = dst.Write(buf) if err != nil { return err } diff --git a/revid/audio-input.go b/revid/audio-input.go index a1729bf9..4b498926 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -257,10 +257,8 @@ func (a *AudioInput) input() { log.Log(logger.Debug, "Audio format conversion has been performed where needed") - fmt.Printf("Writing %v bytes to ringbuffer\n", len(toWrite.Data)) var n int n, err = a.rb.Write(toWrite.Data) - fmt.Printf("Wrote %v bytes to ringbuffer\n", n) switch err { case nil: log.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) @@ -273,9 +271,9 @@ func (a *AudioInput) input() { } } -// read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. +// Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. -func (a AudioInput) Read(p []byte) (n int, err error) { +func (a *AudioInput) Read(p []byte) (n int, err error) { fmt.Println("Performing AudioInput read...") chunk, err := a.rb.Next(rbNextTimeout) switch err { @@ -290,9 +288,7 @@ func (a AudioInput) Read(p []byte) (n int, err error) { log.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) return 0, err } - fmt.Printf("Reading %v bytes from ringbuffer\n", chunk.Len()) n, err = io.ReadFull(a.rb, p[:chunk.Len()]) - fmt.Printf("Read %v bytes from ringbuffer\n", n) if err != nil { log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) return n, err From 6320011190cc896b4dac45f427f00e6fa97bd92a Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 9 May 2019 12:11:02 +0930 Subject: [PATCH 19/57] revid: added audio input start test test written and passing for revid with AudioInput starting unset audio parameters in config now default. --- revid/audio-input.go | 11 +++++----- revid/audio-input_test.go | 38 +++++++++++++++++----------------- revid/config.go | 43 +++++++++++++++++++++++++++++++-------- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index 4b498926..bf3980e7 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -197,16 +197,16 @@ func (a *AudioInput) open() error { log.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate) } - var fmt alsa.FormatType + var aFmt alsa.FormatType switch a.BitDepth { case 16: - fmt = alsa.S16_LE + aFmt = alsa.S16_LE case 32: - fmt = alsa.S32_LE + aFmt = alsa.S32_LE default: - return errors.New("Unsupported sample bits") + return fmt.Errorf("Unsupported sample bits %v\n", a.BitDepth) } - _, err = a.dev.NegotiateFormat(fmt) + _, err = a.dev.NegotiateFormat(aFmt) if err != nil { return err } @@ -274,7 +274,6 @@ func (a *AudioInput) input() { // Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. func (a *AudioInput) Read(p []byte) (n int, err error) { - fmt.Println("Performing AudioInput read...") chunk, err := a.rb.Next(rbNextTimeout) switch err { case nil: diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index a510428b..7c817c5f 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -2,28 +2,28 @@ package revid import ( "testing" + + "bitbucket.org/ausocean/iot/pi/netsender" ) -func TestAudioInput(t *testing.T) { +func TestAudioInputStart(t *testing.T) { + var logger testLogger + ns, err := netsender.New(&logger, nil, nil, nil) + if err != nil { + t.Errorf("netsender.New failed with error %v", err) + } - // var logger testLogger - // ns, err := netsender.New(&logger, nil, nil, nil) - // if err != nil { - // t.Errorf("netsender.New failed with error %v", err) - // } + var c Config + c.Logger = &logger + c.Input = Audio - // var c Config - // c.Logger = &logger - // c.Input = Audio - // c.Outputs = make([]uint8, 1) + rv, err := New(c, ns) + if err != nil { + t.Errorf("revid.New failed with error %v", err) + } - // rv, err := New(c, ns) - // if err != nil { - // t.Errorf("revid.New failed with error %v", err) - // } - - // err = rv.Start() - // if err != nil { - // t.Errorf("revid.Start failed with error %v", err) - // } + err = rv.Start() + if err != nil { + t.Errorf("revid.Start failed with error %v", err) + } } diff --git a/revid/config.go b/revid/config.go index 7bebf599..d914e221 100644 --- a/revid/config.go +++ b/revid/config.go @@ -164,10 +164,11 @@ const ( defaultExposure = "auto" defaultAutoWhiteBalance = "auto" - defaultSampleRate = 48000 - defaultBitDepth = 16 - defaultChannels = 1 - defaultRecPeriod = 1 + defaultAudioInputCodec = ADPCM + defaultSampleRate = 48000 + defaultBitDepth = 16 + defaultChannels = 1 + defaultRecPeriod = 1 ) // Validate checks for any errors in the config fields and defaults settings @@ -211,10 +212,16 @@ func (c *Config) Validate(r *Revid) error { } case PCM, ADPCM: case NothingDefined: - c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec) - c.InputCodec = defaultInputCodec - c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization) - c.Quantization = defaultQuantization + switch c.Input { + case Audio: + c.Logger.Log(logger.Info, pkg+"input is audio but no codec defined, defaulting", "inputCodec", defaultAudioInputCodec) + c.InputCodec = defaultAudioInputCodec + default: + c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec) + c.InputCodec = defaultInputCodec + c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization) + c.Quantization = defaultQuantization + } default: return errors.New("bad input codec defined in config") } @@ -285,6 +292,26 @@ func (c *Config) Validate(r *Revid) error { c.SampleRate = defaultSampleRate } + if c.Channels == 0 { + c.Logger.Log(logger.Info, pkg+"no number of channels defined, defaulting", "Channels", defaultChannels) + c.Channels = defaultChannels + } + + if c.BitDepth == 0 { + c.Logger.Log(logger.Info, pkg+"no bit depth defined, defaulting", "BitDepth", defaultBitDepth) + c.BitDepth = defaultBitDepth + } + + if c.RecPeriod == 0 { + c.Logger.Log(logger.Info, pkg+"no record period defined, defaulting", "recPeriod", defaultRecPeriod) + c.RecPeriod = defaultRecPeriod + } + + if c.WriteRate == 0 { + c.Logger.Log(logger.Info, pkg+"no write rate defined, defaulting", "writeRate", defaultWriteRate) + c.WriteRate = defaultWriteRate + } + if c.Bitrate == 0 { c.Logger.Log(logger.Info, pkg+"no bitrate defined, defaulting", "bitrate", defaultBitrate) c.Bitrate = defaultBitrate From 76edcfe8ed3d111bc3cb444e880424b961871346 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 15 May 2019 15:57:49 +0930 Subject: [PATCH 20/57] revid: added start and stop functions to AudioInput for proper closing --- revid/audio-input.go | 64 ++++++++++++++++++++++++++------------- revid/audio-input_test.go | 12 +++++--- revid/revid.go | 6 +++- 3 files changed, 55 insertions(+), 27 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index bf3980e7..b281bec8 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -49,18 +49,17 @@ const ( var log *logger.Logger // AudioInput holds everything we need to know about the audio input stream. -// NB: At 44100 Hz frame rate, 2 channels and 16-bit samples, a period of 5 seconds -// results in PCM data chunks of 882000 bytes! A longer period exceeds datastore's 1MB blob limit. +// Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds +// results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit. type AudioInput struct { - mu sync.Mutex // mu protects the AudioInput. - mode string // operating mode, either "Normal" or "Paused" - source string // name of audio source, or empty for the default source + mu sync.Mutex + source string // Name of audio source, or empty for the default source. + mode string // Operating mode, either "Running", "Paused", or "Stopped". - dev *alsa.Device // audio input device - ab alsa.Buffer // ALSA's buffer - rb *ring.Buffer // our buffer - chunkSize int - vs int // our "var sum" to track var changes + dev *alsa.Device // Audio input device. + ab alsa.Buffer // ALSA's buffer. + rb *ring.Buffer // Our buffer. + chunkSize int // This is the number of bytes that will be stored at a time. *AudioConfig } @@ -74,17 +73,15 @@ type AudioConfig struct { Codec uint8 } -// NewAudioInput starts recording audio and returns an AudioInput struct which the audio can be read from. +// NewAudioInput initializes and returns an AudioInput struct which can be started, read from, and stopped. func NewAudioInput(cfg *AudioConfig) *AudioInput { - + // Initialize logger. logLevel := int(logger.Debug) - validLogLevel := true if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { logLevel = int(logger.Info) validLogLevel = false } - logSender := smartlogger.New(logPath) log = logger.New(int8(logLevel), &logSender.LogRoller) log.Log(logger.Info, "log-netsender: Logger Initialized") @@ -101,17 +98,42 @@ func NewAudioInput(cfg *AudioConfig) *AudioInput { log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } - // Capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. + // Setup ring buffer to capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. a.ab = a.dev.NewBufferDuration(time.Second * time.Duration(a.RecPeriod)) a.chunkSize = (((len(a.ab.Data) / a.dev.BufferFormat().Channels) * a.Channels) / a.dev.BufferFormat().Rate) * a.SampleRate rbLen := rbDuration / a.RecPeriod a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) - go a.input() + a.mode = "Paused" return a } +// Start will start recording audio and writing to the output. +func (a *AudioInput) Start() { + a.mu.Lock() + mode := a.mode + a.mu.Unlock() + switch mode { + case "Paused": + go a.input() + case "Stopped": + + } +} + +// Stop will stop recording audio and close +func (a *AudioInput) Stop() { + a.mode = "Stopped" + if a.dev != nil { + log.Log(logger.Debug, "Closing", "source", a.source) + a.dev.Close() + a.dev = nil + } + +} + +// ChunkSize returns the AudioInput's chunkSize, ie. the number of bytes of audio written to output at a time. func (a *AudioInput) ChunkSize() int { return a.chunkSize } @@ -204,7 +226,7 @@ func (a *AudioInput) open() error { case 32: aFmt = alsa.S32_LE default: - return fmt.Errorf("Unsupported sample bits %v\n", a.BitDepth) + return fmt.Errorf("unsupported sample bits %v", a.BitDepth) } _, err = a.dev.NegotiateFormat(aFmt) if err != nil { @@ -226,17 +248,17 @@ func (a *AudioInput) open() error { // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if ASLA returns an error. -// Spends a lot of time sleeping in Paused mode. -// ToDo: Currently, reading audio and writing to the ringbuffer are synchronous. -// Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps. func (a *AudioInput) input() { for { a.mu.Lock() mode := a.mode a.mu.Unlock() - if mode == "Paused" { + switch mode { + case "Paused": time.Sleep(time.Duration(a.RecPeriod) * time.Second) continue + case "Stopped": + break } log.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) a.mu.Lock() diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 7c817c5f..78115053 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -3,10 +3,13 @@ package revid import ( "testing" + "bitbucket.org/ausocean/av/container/mts" + "bitbucket.org/ausocean/av/container/mts/meta" "bitbucket.org/ausocean/iot/pi/netsender" ) -func TestAudioInputStart(t *testing.T) { +func TestAudioInputNew(t *testing.T) { + mts.Meta = meta.New() var logger testLogger ns, err := netsender.New(&logger, nil, nil, nil) if err != nil { @@ -20,10 +23,9 @@ func TestAudioInputStart(t *testing.T) { rv, err := New(c, ns) if err != nil { t.Errorf("revid.New failed with error %v", err) + } else if rv == nil { + t.Errorf("revid.New did not return a new revid") } - err = rv.Start() - if err != nil { - t.Errorf("revid.Start failed with error %v", err) - } + rv.Stop() } diff --git a/revid/revid.go b/revid/revid.go index a38b8eff..96d35fbf 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -627,8 +627,12 @@ func (r *Revid) startAudioInput() (func() error, error) { Codec: r.config.InputCodec, } ai := NewAudioInput(ac) + r.wg.Add(1) go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate), ai.ChunkSize()) - return nil, nil + return func() error { + ai.Stop() + return nil + }, nil } func (r *Revid) processFrom(read io.Reader, delay time.Duration, bufSize int) { From 7ba9d023a3fa8b416c4845aca9ee04fc01aa50fe Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 21 May 2019 00:45:54 +0930 Subject: [PATCH 21/57] revid: made start and stop change audio device state --- revid/audio-input.go | 65 ++++++++++++++++++++++++++------------- revid/audio-input_test.go | 7 +++-- revid/revid.go | 10 +++--- revid/senders.go | 2 +- 4 files changed, 54 insertions(+), 30 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index b281bec8..40e80abb 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -46,15 +46,21 @@ const ( rbNextTimeout = 100 * time.Millisecond ) +const ( + running = iota + paused + stopped +) + var log *logger.Logger -// AudioInput holds everything we need to know about the audio input stream. +// audioDevice holds everything we need to know about the audio input stream. // Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds // results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit. -type AudioInput struct { +type audioDevice struct { mu sync.Mutex source string // Name of audio source, or empty for the default source. - mode string // Operating mode, either "Running", "Paused", or "Stopped". + mode uint8 // Operating mode, either running, paused, or stopped. dev *alsa.Device // Audio input device. ab alsa.Buffer // ALSA's buffer. @@ -64,7 +70,7 @@ type AudioInput struct { *AudioConfig } -// AudioConfig provides parameters used by AudioInput. +// AudioConfig provides parameters used by audioDevice. type AudioConfig struct { SampleRate int Channels int @@ -73,8 +79,8 @@ type AudioConfig struct { Codec uint8 } -// NewAudioInput initializes and returns an AudioInput struct which can be started, read from, and stopped. -func NewAudioInput(cfg *AudioConfig) *AudioInput { +// NewAudioDevice initializes and returns an audioDevice struct which can be started, read from, and stopped. +func NewAudioDevice(cfg *AudioConfig) *audioDevice { // Initialize logger. logLevel := int(logger.Debug) validLogLevel := true @@ -89,7 +95,7 @@ func NewAudioInput(cfg *AudioConfig) *AudioInput { log.Log(logger.Error, "Invalid log level was defaulted to Info") } - a := &AudioInput{} + a := &audioDevice{} a.AudioConfig = cfg // Open the requested audio device. @@ -104,27 +110,42 @@ func NewAudioInput(cfg *AudioConfig) *AudioInput { rbLen := rbDuration / a.RecPeriod a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) - a.mode = "Paused" + a.mode = paused return a } // Start will start recording audio and writing to the output. -func (a *AudioInput) Start() { +func (a *audioDevice) Start() { a.mu.Lock() mode := a.mode a.mu.Unlock() switch mode { - case "Paused": + case paused: + // Start Recording go a.input() - case "Stopped": - + mode = running + case stopped: + // Open the audio device and start recording. + err := a.open() + if err != nil { + log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + } + go a.input() + mode = running + case running: + return } + a.mu.Lock() + a.mode = mode + a.mu.Unlock() } // Stop will stop recording audio and close -func (a *AudioInput) Stop() { - a.mode = "Stopped" +func (a *audioDevice) Stop() { + a.mu.Lock() + a.mode = stopped + a.mu.Unlock() if a.dev != nil { log.Log(logger.Debug, "Closing", "source", a.source) a.dev.Close() @@ -133,14 +154,14 @@ func (a *AudioInput) Stop() { } -// ChunkSize returns the AudioInput's chunkSize, ie. the number of bytes of audio written to output at a time. -func (a *AudioInput) ChunkSize() int { +// ChunkSize returns the audioDevice's chunkSize, ie. the number of bytes of audio written to output at a time. +func (a *audioDevice) ChunkSize() int { return a.chunkSize } // open or re-open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. -func (a *AudioInput) open() error { +func (a *audioDevice) open() error { if a.dev != nil { log.Log(logger.Debug, "Closing", "source", a.source) a.dev.Close() @@ -248,16 +269,16 @@ func (a *AudioInput) open() error { // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if ASLA returns an error. -func (a *AudioInput) input() { +func (a *audioDevice) input() { for { a.mu.Lock() mode := a.mode a.mu.Unlock() switch mode { - case "Paused": + case paused: time.Sleep(time.Duration(a.RecPeriod) * time.Second) continue - case "Stopped": + case stopped: break } log.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) @@ -295,7 +316,7 @@ func (a *AudioInput) input() { // Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. -func (a *AudioInput) Read(p []byte) (n int, err error) { +func (a *audioDevice) Read(p []byte) (n int, err error) { chunk, err := a.rb.Next(rbNextTimeout) switch err { case nil: @@ -321,7 +342,7 @@ func (a *AudioInput) Read(p []byte) (n int, err error) { // formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored // in the desired format specified by the ac's parameters. -func (a *AudioInput) formatBuffer() alsa.Buffer { +func (a *audioDevice) formatBuffer() alsa.Buffer { var err error a.mu.Lock() wantChannels := a.Channels diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 78115053..a77158a6 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -8,7 +8,7 @@ import ( "bitbucket.org/ausocean/iot/pi/netsender" ) -func TestAudioInputNew(t *testing.T) { +func TestAudioPipeline(t *testing.T) { mts.Meta = meta.New() var logger testLogger ns, err := netsender.New(&logger, nil, nil, nil) @@ -18,7 +18,10 @@ func TestAudioInputNew(t *testing.T) { var c Config c.Logger = &logger - c.Input = Audio + c.Input = File + c.InputPath = "../../test/test-data/av/input/original_8kHz_adpcm_test.pcm" + c.Outputs = []uint8{File} + c.OutputPath = "./test-temp" rv, err := New(c, ns) if err != nil { diff --git a/revid/revid.go b/revid/revid.go index 96d35fbf..9fa2f560 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -272,7 +272,7 @@ func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate, mediaType in case File: r.setupInput = r.setupInputForFile case Audio: - r.setupInput = r.startAudioInput + r.setupInput = r.startAudioDevice } switch r.config.InputCodec { @@ -613,12 +613,12 @@ func (r *Revid) setupInputForFile() (func() error, error) { // TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop. r.wg.Add(1) - go r.processFrom(f, time.Second/time.Duration(r.config.FrameRate), 0) + go r.processFrom(f, 0, 0) return func() error { return f.Close() }, nil } -// startAudioInput is used to start capturing audio from an audio device and processing it. -func (r *Revid) startAudioInput() (func() error, error) { +// startAudioDevice is used to start capturing audio from an audio device and processing it. +func (r *Revid) startAudioDevice() (func() error, error) { ac := &AudioConfig{ SampleRate: r.config.SampleRate, Channels: r.config.Channels, @@ -626,7 +626,7 @@ func (r *Revid) startAudioInput() (func() error, error) { BitDepth: r.config.BitDepth, Codec: r.config.InputCodec, } - ai := NewAudioInput(ac) + ai := NewAudioDevice(ac) r.wg.Add(1) go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate), ai.ChunkSize()) return func() error { diff --git a/revid/senders.go b/revid/senders.go index 17fa7dc1..af111665 100644 --- a/revid/senders.go +++ b/revid/senders.go @@ -57,7 +57,7 @@ type httpSender struct { log func(lvl int8, msg string, args ...interface{}) } -// newMinimalHttpSender returns a pointer to a new minimalHttpSender. +// newHttpSender returns a pointer to a new httpSender. func newHttpSender(ns *netsender.Sender, log func(lvl int8, msg string, args ...interface{})) *httpSender { return &httpSender{ client: ns, From c58c573cd70b2f700421df2a96300e31397c1a0c Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 21 May 2019 12:39:10 +0930 Subject: [PATCH 22/57] revid: changed writeRates and recPeriods to floats --- cmd/revid-cli/main.go | 6 +-- revid/audio-input.go | 9 ++-- revid/audio-input_test.go | 90 ++++++++++++++++++++++++++++++--------- revid/config.go | 2 +- revid/revid.go | 8 ++-- revid/revid_test.go | 2 +- 6 files changed, 83 insertions(+), 34 deletions(-) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index ab55188a..b0fc89a1 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -138,7 +138,7 @@ func handleFlags() revid.Config { // Audio specific flags. sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio") channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)") - recPeriodPtr = flag.Int("recPeriod", 1, "How many seconds to record at a time") + recPeriodPtr = flag.Float64("recPeriod", 1, "How many seconds to record at a time") bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.") ) @@ -209,9 +209,9 @@ func handleFlags() revid.Config { switch *inputPtr { case "Audio": - cfg.WriteRate = uint(*recPeriodPtr) + cfg.WriteRate = 1.0 / (*recPeriodPtr) default: - cfg.WriteRate = *frameRatePtr + cfg.WriteRate = float64(*frameRatePtr) } if len(outputs) == 0 { diff --git a/revid/audio-input.go b/revid/audio-input.go index 40e80abb..c19a4d21 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -52,6 +52,8 @@ const ( stopped ) +var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} + var log *logger.Logger // audioDevice holds everything we need to know about the audio input stream. @@ -141,7 +143,7 @@ func (a *audioDevice) Start() { a.mu.Unlock() } -// Stop will stop recording audio and close +// Stop will stop recording audio and close the device func (a *audioDevice) Stop() { a.mu.Lock() a.mode = stopped @@ -178,7 +180,7 @@ func (a *audioDevice) open() error { for _, card := range cards { devices, err := card.Devices() if err != nil { - return err + continue } for _, dev := range devices { if dev.Type != alsa.PCM || !dev.Record { @@ -214,8 +216,7 @@ func (a *audioDevice) open() error { // so that it can be easily downsampled to the wanted rate. // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. Eg. // the audioinjector is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, - // to fix this 8000 and 16000 must be removed from this slice. - rates := [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} + // to fix this 8000 and 16000 must be removed from the rates slice. foundRate := false for i := 0; i < len(rates) && !foundRate; i++ { if rates[i] < a.SampleRate { diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index a77158a6..0a1cb44e 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -3,32 +3,80 @@ package revid import ( "testing" - "bitbucket.org/ausocean/av/container/mts" - "bitbucket.org/ausocean/av/container/mts/meta" - "bitbucket.org/ausocean/iot/pi/netsender" + "github.com/yobert/alsa" ) -func TestAudioPipeline(t *testing.T) { - mts.Meta = meta.New() - var logger testLogger - ns, err := netsender.New(&logger, nil, nil, nil) - if err != nil { - t.Errorf("netsender.New failed with error %v", err) +func TestAudioDevice(t *testing.T) { + // We want to open a device with a standard configuration. + ac := &AudioConfig{ + SampleRate: 8000, + Channels: 1, + RecPeriod: 1, + BitDepth: 16, + Codec: ADPCM, } - var c Config - c.Logger = &logger - c.Input = File - c.InputPath = "../../test/test-data/av/input/original_8kHz_adpcm_test.pcm" - c.Outputs = []uint8{File} - c.OutputPath = "./test-temp" - - rv, err := New(c, ns) + // Check that a device exists with the desired parameters. + cards, err := alsa.OpenCards() if err != nil { - t.Errorf("revid.New failed with error %v", err) - } else if rv == nil { - t.Errorf("revid.New did not return a new revid") + t.Skip("skipping, no audio card found") } + defer alsa.CloseCards(cards) + var testDev *alsa.Device + for _, card := range cards { + devices, err := card.Devices() + if err != nil { + continue + } + for _, dev := range devices { + if dev.Type != alsa.PCM || !dev.Record { + continue + } + testDev = dev + break + } + } + if testDev == nil { + t.Skip("skipping, no suitable audio device found") + } + _, err = testDev.NegotiateChannels(2) + if err != nil { + t.Skip("skipping, no suitable audio device found") + } + foundRate := false + for i := 0; i < len(rates) && !foundRate; i++ { + if rates[i] < ac.SampleRate { + continue + } + if rates[i]%ac.SampleRate == 0 { + _, err = testDev.NegotiateRate(rates[i]) + if err == nil { + foundRate = true + } + } + } + if !foundRate { + _, err = testDev.NegotiateRate(defaultSampleRate) + if err != nil { + t.Skip("skipping, no suitable audio device found") + } + } + _, err = testDev.NegotiateFormat(alsa.S16_LE) + if err != nil { + t.Skip("skipping, no suitable audio device found") + } + _, err = testDev.NegotiateBufferSize(8192, 16384) + if err != nil { + t.Skip("skipping, no suitable audio device found") + } + if err = testDev.Prepare(); err != nil { + t.Skip("skipping, no suitable audio device found") + } + testDev.Close() - rv.Stop() + ai := NewAudioDevice(ac) + + ai.Start() + + ai.Stop() } diff --git a/revid/config.go b/revid/config.go index d914e221..82a940a1 100644 --- a/revid/config.go +++ b/revid/config.go @@ -53,7 +53,7 @@ type Config struct { IntraRefreshPeriod uint RtpAddress string SendRetry bool - WriteRate uint // How many times a second revid encoders will be written to. + WriteRate float64 // How many times a second revid encoders will be written to. // Video Height uint diff --git a/revid/revid.go b/revid/revid.go index 9fa2f560..982910d7 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -161,8 +161,8 @@ func (r *Revid) reset(config Config) error { r.config.Logger.SetLevel(config.LogLevel) err = r.setupPipeline( - func(dst io.WriteCloser, fps, medType int) (io.WriteCloser, error) { - e := mts.NewEncoder(dst, float64(fps), medType) + func(dst io.WriteCloser, fps float64, medType int) (io.WriteCloser, error) { + e := mts.NewEncoder(dst, fps, medType) return e, nil }, func(dst io.WriteCloser, fps int) (io.WriteCloser, error) { @@ -196,7 +196,7 @@ func (r *Revid) setConfig(config Config) error { // mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder // respectively. multiWriter will be used to create an ioext.multiWriteCloser // so that encoders can write to multiple senders. -func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate, mediaType int) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { +func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64, mediaType int) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { // encoders will hold the encoders that are required for revid's current // configuration. var encoders []io.WriteCloser @@ -246,7 +246,7 @@ func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate, mediaType in } else { mediaType = mts.Video } - e, _ := mtsEnc(mw, int(r.config.WriteRate), mediaType) + e, _ := mtsEnc(mw, r.config.WriteRate, mediaType) encoders = append(encoders, e) } diff --git a/revid/revid_test.go b/revid/revid_test.go index 17739221..ece494ca 100644 --- a/revid/revid_test.go +++ b/revid/revid_test.go @@ -232,7 +232,7 @@ func TestResetEncoderSenderSetup(t *testing.T) { // This logic is what we want to check. err = rv.setupPipeline( - func(dst io.WriteCloser, rate int, mediaType int) (io.WriteCloser, error) { + func(dst io.WriteCloser, rate float64, mediaType int) (io.WriteCloser, error) { return &tstMtsEncoder{dst: dst}, nil }, func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { From 17d59014c6f61cc07a58d291eade77f746a9aa62 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 22 May 2019 14:56:58 +0930 Subject: [PATCH 23/57] revid: added concurrency support to start and stop --- revid/audio-input.go | 40 ++++++++++--------- revid/audio-input_test.go | 83 +++++++++++++++++++++++++++------------ revid/config.go | 10 ++--- 3 files changed, 84 insertions(+), 49 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index c19a4d21..ff480a1c 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -41,9 +41,9 @@ import ( const ( logPath = "/var/log/netsender" - rbDuration = 300 // seconds rbTimeout = 100 * time.Millisecond rbNextTimeout = 100 * time.Millisecond + rbLen = 200 ) const ( @@ -77,7 +77,7 @@ type AudioConfig struct { SampleRate int Channels int BitDepth int - RecPeriod int + RecPeriod float64 Codec uint8 } @@ -109,7 +109,6 @@ func NewAudioDevice(cfg *AudioConfig) *audioDevice { // Setup ring buffer to capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. a.ab = a.dev.NewBufferDuration(time.Second * time.Duration(a.RecPeriod)) a.chunkSize = (((len(a.ab.Data) / a.dev.BufferFormat().Channels) * a.Channels) / a.dev.BufferFormat().Rate) * a.SampleRate - rbLen := rbDuration / a.RecPeriod a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) a.mode = paused @@ -120,13 +119,11 @@ func NewAudioDevice(cfg *AudioConfig) *audioDevice { // Start will start recording audio and writing to the output. func (a *audioDevice) Start() { a.mu.Lock() - mode := a.mode - a.mu.Unlock() - switch mode { + switch a.mode { case paused: // Start Recording go a.input() - mode = running + a.mode = running case stopped: // Open the audio device and start recording. err := a.open() @@ -134,25 +131,23 @@ func (a *audioDevice) Start() { log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } go a.input() - mode = running + a.mode = running case running: return } - a.mu.Lock() - a.mode = mode a.mu.Unlock() } // Stop will stop recording audio and close the device func (a *audioDevice) Stop() { a.mu.Lock() - a.mode = stopped - a.mu.Unlock() if a.dev != nil { log.Log(logger.Debug, "Closing", "source", a.source) a.dev.Close() a.dev = nil } + a.mode = stopped + a.mu.Unlock() } @@ -273,17 +268,16 @@ func (a *audioDevice) open() error { func (a *audioDevice) input() { for { a.mu.Lock() - mode := a.mode - a.mu.Unlock() - switch mode { + switch a.mode { case paused: + a.mu.Unlock() time.Sleep(time.Duration(a.RecPeriod) * time.Second) continue case stopped: - break + a.mu.Unlock() + return } log.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) - a.mu.Lock() err := a.dev.Read(a.ab.Data) a.mu.Unlock() if err != nil { @@ -318,6 +312,16 @@ func (a *audioDevice) input() { // Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. func (a *audioDevice) Read(p []byte) (n int, err error) { + a.mu.Lock() + if a.rb == nil { + fmt.Println("READ: RB IS NIL") + } + switch a.mode { + case paused: + return 0, nil + case stopped: + return 0, nil + } chunk, err := a.rb.Next(rbNextTimeout) switch err { case nil: @@ -336,8 +340,8 @@ func (a *audioDevice) Read(p []byte) (n int, err error) { log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) return n, err } - log.Log(logger.Debug, "Read audio from ringbuffer", "length", n) + a.mu.Unlock() return n, nil } diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 0a1cb44e..8f0d888b 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -1,25 +1,20 @@ package revid import ( + "bytes" + "errors" "testing" + "time" + "bitbucket.org/ausocean/av/codec/lex" "github.com/yobert/alsa" ) -func TestAudioDevice(t *testing.T) { - // We want to open a device with a standard configuration. - ac := &AudioConfig{ - SampleRate: 8000, - Channels: 1, - RecPeriod: 1, - BitDepth: 16, - Codec: ADPCM, - } - - // Check that a device exists with the desired parameters. +// Check that a device exists with the given config parameters. +func checkDevice(ac *AudioConfig) error { cards, err := alsa.OpenCards() if err != nil { - t.Skip("skipping, no audio card found") + return errors.New("no audio cards found") } defer alsa.CloseCards(cards) var testDev *alsa.Device @@ -37,11 +32,15 @@ func TestAudioDevice(t *testing.T) { } } if testDev == nil { - t.Skip("skipping, no suitable audio device found") + return errors.New("no suitable device found") + } + err = testDev.Open() + if err != nil { + return err } _, err = testDev.NegotiateChannels(2) if err != nil { - t.Skip("skipping, no suitable audio device found") + return err } foundRate := false for i := 0; i < len(rates) && !foundRate; i++ { @@ -58,25 +57,57 @@ func TestAudioDevice(t *testing.T) { if !foundRate { _, err = testDev.NegotiateRate(defaultSampleRate) if err != nil { - t.Skip("skipping, no suitable audio device found") + return err } } - _, err = testDev.NegotiateFormat(alsa.S16_LE) + var aFmt alsa.FormatType + switch ac.BitDepth { + case 16: + aFmt = alsa.S16_LE + case 32: + aFmt = alsa.S32_LE + default: + return errors.New("unsupported bitdepth") + } + _, err = testDev.NegotiateFormat(aFmt) if err != nil { - t.Skip("skipping, no suitable audio device found") + return err } _, err = testDev.NegotiateBufferSize(8192, 16384) if err != nil { - t.Skip("skipping, no suitable audio device found") + return err } if err = testDev.Prepare(); err != nil { - t.Skip("skipping, no suitable audio device found") + return err } - testDev.Close() - - ai := NewAudioDevice(ac) - - ai.Start() - - ai.Stop() + if testDev != nil { + testDev.Close() + } + return nil +} + +func TestAudio(t *testing.T) { + // We want to open a device with a standard configuration. + ac := &AudioConfig{ + SampleRate: 8000, + Channels: 1, + RecPeriod: 0.01, + BitDepth: 16, + Codec: ADPCM, + } + + // Skip if there are no suitable devices to test with. + err := checkDevice(ac) + if err != nil { + t.Error(err) + } + + // Create a new audioDevice, start, read/lex, and then stop it. + ai := NewAudioDevice(ac) + dst := bytes.NewBuffer(make([]byte, 0, ai.ChunkSize()*4)) + ai.Start() + go lex.ADPCM(dst, ai, time.Duration(ac.RecPeriod), ai.ChunkSize()) + time.Sleep(time.Millisecond * 10) + ai.Stop() + } diff --git a/revid/config.go b/revid/config.go index 82a940a1..e4ff1a46 100644 --- a/revid/config.go +++ b/revid/config.go @@ -74,10 +74,10 @@ type Config struct { AutoWhiteBalance string // Audio - SampleRate int // Samples a second (Hz). - RecPeriod int // How many seconds to record at a time. - Channels int // Number of audio channels, 1 for mono, 2 for stereo. - BitDepth int // Sample bit depth. + SampleRate int // Samples a second (Hz). + RecPeriod float64 // How many seconds to record at a time. + Channels int // Number of audio channels, 1 for mono, 2 for stereo. + BitDepth int // Sample bit depth. } // Possible modes for raspivid --exposure parameter. @@ -168,7 +168,7 @@ const ( defaultSampleRate = 48000 defaultBitDepth = 16 defaultChannels = 1 - defaultRecPeriod = 1 + defaultRecPeriod = 1.0 ) // Validate checks for any errors in the config fields and defaults settings From dd66c58f406fb040c76feccfc35fed3a962814b0 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 23 May 2019 00:19:44 +0930 Subject: [PATCH 24/57] revid: concurrency and testing fixed a situation where a deadlock can occur and also found and fixed some issues while testing different initialisations on AudioDevice. --- revid/audio-input.go | 101 ++++++++++++++++++++++++++------------ revid/audio-input_test.go | 16 +++--- revid/revid.go | 2 +- 3 files changed, 79 insertions(+), 40 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index ff480a1c..1b52d8a4 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -52,14 +52,15 @@ const ( stopped ) -var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} +// Rates contains the audio sample rates used by revid. +var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} var log *logger.Logger -// audioDevice holds everything we need to know about the audio input stream. +// AudioDevice holds everything we need to know about the audio input stream. // Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds // results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit. -type audioDevice struct { +type AudioDevice struct { mu sync.Mutex source string // Name of audio source, or empty for the default source. mode uint8 // Operating mode, either running, paused, or stopped. @@ -72,7 +73,7 @@ type audioDevice struct { *AudioConfig } -// AudioConfig provides parameters used by audioDevice. +// AudioConfig provides parameters used by AudioDevice. type AudioConfig struct { SampleRate int Channels int @@ -81,8 +82,8 @@ type AudioConfig struct { Codec uint8 } -// NewAudioDevice initializes and returns an audioDevice struct which can be started, read from, and stopped. -func NewAudioDevice(cfg *AudioConfig) *audioDevice { +// NewAudioDevice initializes and returns an AudioDevice struct which can be started, read from, and stopped. +func NewAudioDevice(cfg *AudioConfig) *AudioDevice { // Initialize logger. logLevel := int(logger.Debug) validLogLevel := true @@ -97,7 +98,7 @@ func NewAudioDevice(cfg *AudioConfig) *audioDevice { log.Log(logger.Error, "Invalid log level was defaulted to Info") } - a := &audioDevice{} + a := &AudioDevice{} a.AudioConfig = cfg // Open the requested audio device. @@ -107,17 +108,28 @@ func NewAudioDevice(cfg *AudioConfig) *audioDevice { } // Setup ring buffer to capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. - a.ab = a.dev.NewBufferDuration(time.Second * time.Duration(a.RecPeriod)) - a.chunkSize = (((len(a.ab.Data) / a.dev.BufferFormat().Channels) * a.Channels) / a.dev.BufferFormat().Rate) * a.SampleRate + a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) + cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) + if cs < 1 { + log.Log(logger.Fatal, "given AudioConfig parameters are too small") + } + a.chunkSize = int(cs) a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) - + if a.rb == nil { + fmt.Println("NEW:", "rb: NIL", a.mode) + fmt.Println(rbLen, a.chunkSize, rbTimeout) + fmt.Println(len(a.ab.Data), a.dev.BufferFormat().Channels, a.Channels, a.dev.BufferFormat().Rate, a.SampleRate) + } else { + fmt.Println("NEW:", "rb: VALID", a.mode) + } a.mode = paused return a } // Start will start recording audio and writing to the output. -func (a *audioDevice) Start() { +func (a *AudioDevice) Start() { + fmt.Println("start lock") a.mu.Lock() switch a.mode { case paused: @@ -136,10 +148,12 @@ func (a *audioDevice) Start() { return } a.mu.Unlock() + fmt.Println("start unlock") } // Stop will stop recording audio and close the device -func (a *audioDevice) Stop() { +func (a *AudioDevice) Stop() { + fmt.Println("stop lock") a.mu.Lock() if a.dev != nil { log.Log(logger.Debug, "Closing", "source", a.source) @@ -148,17 +162,18 @@ func (a *audioDevice) Stop() { } a.mode = stopped a.mu.Unlock() + fmt.Println("stop unlock") } -// ChunkSize returns the audioDevice's chunkSize, ie. the number of bytes of audio written to output at a time. -func (a *audioDevice) ChunkSize() int { +// ChunkSize returns the AudioDevice's chunkSize, ie. the number of bytes of audio written to output at a time. +func (a *AudioDevice) ChunkSize() int { return a.chunkSize } // open or re-open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. -func (a *audioDevice) open() error { +func (a *AudioDevice) open() error { if a.dev != nil { log.Log(logger.Debug, "Closing", "source", a.source) a.dev.Close() @@ -211,17 +226,17 @@ func (a *audioDevice) open() error { // so that it can be easily downsampled to the wanted rate. // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. Eg. // the audioinjector is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, - // to fix this 8000 and 16000 must be removed from the rates slice. + // to fix this 8000 and 16000 must be removed from the Rates slice. foundRate := false - for i := 0; i < len(rates) && !foundRate; i++ { - if rates[i] < a.SampleRate { + for i := 0; i < len(Rates) && !foundRate; i++ { + if Rates[i] < a.SampleRate { continue } - if rates[i]%a.SampleRate == 0 { - _, err = a.dev.NegotiateRate(rates[i]) + if Rates[i]%a.SampleRate == 0 { + _, err = a.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true - log.Log(logger.Debug, "Sample rate set", "rate", rates[i]) + log.Log(logger.Debug, "Sample rate set", "rate", Rates[i]) } } } @@ -265,63 +280,88 @@ func (a *audioDevice) open() error { // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if ASLA returns an error. -func (a *audioDevice) input() { +func (a *AudioDevice) input() { for { a.mu.Lock() + fmt.Println("input lock") + if a.dev == nil { + fmt.Println("INPUT:", "dev: NIL", a.mode) + } else { + fmt.Println("INPUT:", "dev: VALID", a.mode) + } + if a.rb == nil { + fmt.Println("INPUT:", "rb: NIL", a.mode) + } else { + fmt.Println("INPUT:", "rb: VALID", a.mode) + } switch a.mode { case paused: a.mu.Unlock() + fmt.Println("input unlock") time.Sleep(time.Duration(a.RecPeriod) * time.Second) continue case stopped: a.mu.Unlock() + fmt.Println("input unlock") return } log.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) + fmt.Println("LEN:", len(a.ab.Data)) err := a.dev.Read(a.ab.Data) - a.mu.Unlock() + fmt.Println("input read") if err != nil { log.Log(logger.Debug, "Device.Read failed", "error", err.Error()) - a.mu.Lock() err = a.open() // re-open if err != nil { + a.mu.Unlock() + fmt.Println("input unlock") log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } a.mu.Unlock() + fmt.Println("input unlock") continue } toWrite := a.formatBuffer() + fmt.Println("input point") log.Log(logger.Debug, "Audio format conversion has been performed where needed") var n int n, err = a.rb.Write(toWrite.Data) + fmt.Println("input write") switch err { case nil: log.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) case ring.ErrDropped: log.Log(logger.Warning, "Dropped audio") default: + a.mu.Unlock() + fmt.Println("input unlock") log.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) return } + a.mu.Unlock() + fmt.Println("input unlock") } } // Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. -func (a *audioDevice) Read(p []byte) (n int, err error) { +func (a *AudioDevice) Read(p []byte) (n int, err error) { + fmt.Println("read lock") a.mu.Lock() - if a.rb == nil { - fmt.Println("READ: RB IS NIL") - } switch a.mode { case paused: return 0, nil case stopped: return 0, nil } + if a.rb == nil { + fmt.Println("READ:", "NIL", a.mode) + } else { + fmt.Println("READ:", "VALID", a.mode) + } chunk, err := a.rb.Next(rbNextTimeout) switch err { case nil: @@ -342,17 +382,16 @@ func (a *audioDevice) Read(p []byte) (n int, err error) { } log.Log(logger.Debug, "Read audio from ringbuffer", "length", n) a.mu.Unlock() + fmt.Println("read unlock") return n, nil } // formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored // in the desired format specified by the ac's parameters. -func (a *audioDevice) formatBuffer() alsa.Buffer { +func (a *AudioDevice) formatBuffer() alsa.Buffer { var err error - a.mu.Lock() wantChannels := a.Channels wantRate := a.SampleRate - a.mu.Unlock() // If nothing needs to be changed, return the original. if a.ab.Format.Channels == wantChannels && a.ab.Format.Rate == wantRate { diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 8f0d888b..f7e4b198 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -43,12 +43,12 @@ func checkDevice(ac *AudioConfig) error { return err } foundRate := false - for i := 0; i < len(rates) && !foundRate; i++ { - if rates[i] < ac.SampleRate { + for i := 0; i < len(Rates) && !foundRate; i++ { + if Rates[i] < ac.SampleRate { continue } - if rates[i]%ac.SampleRate == 0 { - _, err = testDev.NegotiateRate(rates[i]) + if Rates[i]%ac.SampleRate == 0 { + _, err = testDev.NegotiateRate(Rates[i]) if err == nil { foundRate = true } @@ -91,7 +91,7 @@ func TestAudio(t *testing.T) { ac := &AudioConfig{ SampleRate: 8000, Channels: 1, - RecPeriod: 0.01, + RecPeriod: 0.1, BitDepth: 16, Codec: ADPCM, } @@ -104,10 +104,10 @@ func TestAudio(t *testing.T) { // Create a new audioDevice, start, read/lex, and then stop it. ai := NewAudioDevice(ac) - dst := bytes.NewBuffer(make([]byte, 0, ai.ChunkSize()*4)) + dst := bytes.NewBuffer(make([]byte, 0)) ai.Start() - go lex.ADPCM(dst, ai, time.Duration(ac.RecPeriod), ai.ChunkSize()) - time.Sleep(time.Millisecond * 10) + go lex.ADPCM(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) + time.Sleep(time.Millisecond * 30) ai.Stop() } diff --git a/revid/revid.go b/revid/revid.go index 982910d7..2619c493 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -628,7 +628,7 @@ func (r *Revid) startAudioDevice() (func() error, error) { } ai := NewAudioDevice(ac) r.wg.Add(1) - go r.processFrom(ai, time.Second/time.Duration(r.config.WriteRate), ai.ChunkSize()) + go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate), ai.ChunkSize()) return func() error { ai.Stop() return nil From 5225896924658d6e2416931de91f9a162734c62b Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 23 May 2019 01:23:51 +0930 Subject: [PATCH 25/57] revid: gave AudioDevice a logger --- revid/audio-input.go | 65 +++++++++++++++++++-------------------- revid/audio-input_test.go | 4 +-- revid/revid_test.go | 2 +- 3 files changed, 35 insertions(+), 36 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index 1b52d8a4..2877b497 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -55,12 +55,11 @@ const ( // Rates contains the audio sample rates used by revid. var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} -var log *logger.Logger - // AudioDevice holds everything we need to know about the audio input stream. // Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds // results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit. type AudioDevice struct { + l Logger mu sync.Mutex source string // Name of audio source, or empty for the default source. mode uint8 // Operating mode, either running, paused, or stopped. @@ -84,6 +83,9 @@ type AudioConfig struct { // NewAudioDevice initializes and returns an AudioDevice struct which can be started, read from, and stopped. func NewAudioDevice(cfg *AudioConfig) *AudioDevice { + a := &AudioDevice{} + a.AudioConfig = cfg + // Initialize logger. logLevel := int(logger.Debug) validLogLevel := true @@ -92,26 +94,23 @@ func NewAudioDevice(cfg *AudioConfig) *AudioDevice { validLogLevel = false } logSender := smartlogger.New(logPath) - log = logger.New(int8(logLevel), &logSender.LogRoller) - log.Log(logger.Info, "log-netsender: Logger Initialized") + a.l = logger.New(int8(logLevel), &logSender.LogRoller) + a.l.Log(logger.Info, "log-netsender: Logger Initialized") if !validLogLevel { - log.Log(logger.Error, "Invalid log level was defaulted to Info") + a.l.Log(logger.Error, "Invalid log level was defaulted to Info") } - a := &AudioDevice{} - a.AudioConfig = cfg - // Open the requested audio device. err := a.open() if err != nil { - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + a.l.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } // Setup ring buffer to capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) if cs < 1 { - log.Log(logger.Fatal, "given AudioConfig parameters are too small") + a.l.Log(logger.Fatal, "given AudioConfig parameters are too small") } a.chunkSize = int(cs) a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) @@ -140,7 +139,7 @@ func (a *AudioDevice) Start() { // Open the audio device and start recording. err := a.open() if err != nil { - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + a.l.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } go a.input() a.mode = running @@ -156,7 +155,7 @@ func (a *AudioDevice) Stop() { fmt.Println("stop lock") a.mu.Lock() if a.dev != nil { - log.Log(logger.Debug, "Closing", "source", a.source) + a.l.Log(logger.Debug, "Closing", "source", a.source) a.dev.Close() a.dev = nil } @@ -175,11 +174,11 @@ func (a *AudioDevice) ChunkSize() int { // If name is empty, the first recording device is used. func (a *AudioDevice) open() error { if a.dev != nil { - log.Log(logger.Debug, "Closing", "source", a.source) + a.l.Log(logger.Debug, "Closing", "source", a.source) a.dev.Close() a.dev = nil } - log.Log(logger.Debug, "Opening", "source", a.source) + a.l.Log(logger.Debug, "Opening", "source", a.source) cards, err := alsa.OpenCards() if err != nil { @@ -206,14 +205,14 @@ func (a *AudioDevice) open() error { if a.dev == nil { return errors.New("No audio source found") } - log.Log(logger.Debug, "Found audio source", "source", a.dev.Title) + a.l.Log(logger.Debug, "Found audio source", "source", a.dev.Title) // ToDo: time out if Open takes too long. err = a.dev.Open() if err != nil { return err } - log.Log(logger.Debug, "Opened audio source") + a.l.Log(logger.Debug, "Opened audio source") // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). @@ -236,19 +235,19 @@ func (a *AudioDevice) open() error { _, err = a.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true - log.Log(logger.Debug, "Sample rate set", "rate", Rates[i]) + a.l.Log(logger.Debug, "Sample rate set", "rate", Rates[i]) } } } // If no easily divisible rate is found, then use the default rate. if !foundRate { - log.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) + a.l.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) _, err = a.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } - log.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate) + a.l.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate) } var aFmt alsa.FormatType @@ -274,7 +273,7 @@ func (a *AudioDevice) open() error { if err = a.dev.Prepare(); err != nil { return err } - log.Log(logger.Debug, "Successfully negotiated ALSA params") + a.l.Log(logger.Debug, "Successfully negotiated ALSA params") return nil } @@ -305,17 +304,17 @@ func (a *AudioDevice) input() { fmt.Println("input unlock") return } - log.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) + a.l.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) fmt.Println("LEN:", len(a.ab.Data)) err := a.dev.Read(a.ab.Data) fmt.Println("input read") if err != nil { - log.Log(logger.Debug, "Device.Read failed", "error", err.Error()) + a.l.Log(logger.Debug, "Device.Read failed", "error", err.Error()) err = a.open() // re-open if err != nil { a.mu.Unlock() fmt.Println("input unlock") - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + a.l.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) } a.mu.Unlock() fmt.Println("input unlock") @@ -325,20 +324,20 @@ func (a *AudioDevice) input() { toWrite := a.formatBuffer() fmt.Println("input point") - log.Log(logger.Debug, "Audio format conversion has been performed where needed") + a.l.Log(logger.Debug, "Audio format conversion has been performed where needed") var n int n, err = a.rb.Write(toWrite.Data) fmt.Println("input write") switch err { case nil: - log.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) + a.l.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) case ring.ErrDropped: - log.Log(logger.Warning, "Dropped audio") + a.l.Log(logger.Warning, "Dropped audio") default: a.mu.Unlock() fmt.Println("input unlock") - log.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) + a.l.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) return } a.mu.Unlock() @@ -369,18 +368,18 @@ func (a *AudioDevice) Read(p []byte) (n int, err error) { case ring.ErrTimeout: return 0, nil case io.EOF: - log.Log(logger.Error, "Unexpected EOF from ring.Next") + a.l.Log(logger.Error, "Unexpected EOF from ring.Next") return 0, io.ErrUnexpectedEOF default: - log.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) + a.l.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) return 0, err } n, err = io.ReadFull(a.rb, p[:chunk.Len()]) if err != nil { - log.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) + a.l.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) return n, err } - log.Log(logger.Debug, "Read audio from ringbuffer", "length", n) + a.l.Log(logger.Debug, "Read audio from ringbuffer", "length", n) a.mu.Unlock() fmt.Println("read unlock") return n, nil @@ -405,7 +404,7 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { // Convert channels. if a.ab.Format.Channels == 2 && wantChannels == 1 { if formatted.Data, err = pcm.StereoToMono(a.ab); err != nil { - log.Log(logger.Warning, "Channel conversion failed, audio has remained stereo", "error", err.Error()) + a.l.Log(logger.Warning, "Channel conversion failed, audio has remained stereo", "error", err.Error()) } else { formatted.Format.Channels = 1 } @@ -422,7 +421,7 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { formatted.Data, err = pcm.Resample(a.ab, wantRate) } if err != nil { - log.Log(logger.Warning, "Rate conversion failed, audio has remained original rate", "error", err.Error()) + a.l.Log(logger.Warning, "Rate conversion failed, audio has remained original rate", "error", err.Error()) } else { formatted.Format.Rate = wantRate } diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index f7e4b198..9152e2c6 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -106,8 +106,8 @@ func TestAudio(t *testing.T) { ai := NewAudioDevice(ac) dst := bytes.NewBuffer(make([]byte, 0)) ai.Start() + num := 3 // How many 'ac.RecPeriod's to record. go lex.ADPCM(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) - time.Sleep(time.Millisecond * 30) + time.Sleep(time.Millisecond * 100 * time.Duration(num)) ai.Stop() - } diff --git a/revid/revid_test.go b/revid/revid_test.go index ece494ca..fa5b7cb5 100644 --- a/revid/revid_test.go +++ b/revid/revid_test.go @@ -41,7 +41,7 @@ import ( const raspividPath = "/usr/local/bin/raspivid" // Suppress all test logging, except for t.Errorf output. -var silent bool +var silent bool = true // TestRaspivid tests that raspivid starts correctly. // It is intended to be run on a Raspberry Pi. From c2b5ee0574ccbf8d44fdcaa076973322eb4c3a17 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 29 May 2019 02:06:58 +0930 Subject: [PATCH 26/57] revid: simplified audio device read write concurrency --- revid/audio-input.go | 208 ++++++++++++++++---------------------- revid/audio-input_test.go | 16 ++- revid/revid.go | 14 ++- 3 files changed, 112 insertions(+), 126 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index 2877b497..af3c6197 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -62,7 +62,11 @@ type AudioDevice struct { l Logger mu sync.Mutex source string // Name of audio source, or empty for the default source. - mode uint8 // Operating mode, either running, paused, or stopped. + // Operating mode, either running, paused, or stopped. + // "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. + // "paused" means the input routine is sleeping until unpaused or stopped. + // "stopped" means the input routine is stopped and the ALSA device is closed. + mode uint8 dev *alsa.Device // Audio input device. ab alsa.Buffer // ALSA's buffer. @@ -82,7 +86,7 @@ type AudioConfig struct { } // NewAudioDevice initializes and returns an AudioDevice struct which can be started, read from, and stopped. -func NewAudioDevice(cfg *AudioConfig) *AudioDevice { +func NewAudioDevice(cfg *AudioConfig) (*AudioDevice, error) { a := &AudioDevice{} a.AudioConfig = cfg @@ -103,89 +107,80 @@ func NewAudioDevice(cfg *AudioConfig) *AudioDevice { // Open the requested audio device. err := a.open() if err != nil { - a.l.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + a.l.Log(logger.Error, "failed to open audio device", "error", err.Error()) + return nil, errors.New("failed to open audio device") } - // Setup ring buffer to capture audio in periods of a.RecPeriod seconds, and buffer rbDuration seconds in total. + // Setup ring buffer to capture audio in periods of a.RecPeriod seconds and buffer rbDuration seconds in total. a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) if cs < 1 { - a.l.Log(logger.Fatal, "given AudioConfig parameters are too small") + a.l.Log(logger.Error, "given AudioConfig parameters are too small", "error", err.Error()) + return nil, errors.New("given AudioConfig parameters are too small") } a.chunkSize = int(cs) a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) - if a.rb == nil { - fmt.Println("NEW:", "rb: NIL", a.mode) - fmt.Println(rbLen, a.chunkSize, rbTimeout) - fmt.Println(len(a.ab.Data), a.dev.BufferFormat().Channels, a.Channels, a.dev.BufferFormat().Rate, a.SampleRate) - } else { - fmt.Println("NEW:", "rb: VALID", a.mode) - } + a.mode = paused + go a.input() - return a + return a, nil } -// Start will start recording audio and writing to the output. -func (a *AudioDevice) Start() { - fmt.Println("start lock") +// Start will start recording audio and writing to the ringbuffer. +func (a *AudioDevice) Start() error { a.mu.Lock() - switch a.mode { - case paused: - // Start Recording - go a.input() - a.mode = running - case stopped: - // Open the audio device and start recording. - err := a.open() - if err != nil { - a.l.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) - } - go a.input() - a.mode = running - case running: - return - } + mode := a.mode a.mu.Unlock() - fmt.Println("start unlock") + switch mode { + case paused: + a.mu.Lock() + a.mode = running + a.mu.Unlock() + return nil + case stopped: + // TODO(Trek): Make this reopen device and start recording. + return errors.New("device is stopped") + case running: + return nil + default: + return errors.New("invalid mode") + } + return nil } -// Stop will stop recording audio and close the device +// Stop will stop recording audio and close the device. func (a *AudioDevice) Stop() { - fmt.Println("stop lock") a.mu.Lock() - if a.dev != nil { - a.l.Log(logger.Debug, "Closing", "source", a.source) - a.dev.Close() - a.dev = nil - } a.mode = stopped a.mu.Unlock() - fmt.Println("stop unlock") - } -// ChunkSize returns the AudioDevice's chunkSize, ie. the number of bytes of audio written to output at a time. +// ChunkSize returns the number of bytes written to the ringbuffer per a.RecPeriod. func (a *AudioDevice) ChunkSize() int { return a.chunkSize } -// open or re-open the recording device with the given name and prepare it to record. +// open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. func (a *AudioDevice) open() error { + // Close any existing device. if a.dev != nil { - a.l.Log(logger.Debug, "Closing", "source", a.source) + a.l.Log(logger.Debug, "closing device", "source", a.source) a.dev.Close() a.dev = nil } - a.l.Log(logger.Debug, "Opening", "source", a.source) + // Open sound card and open recording device. + a.l.Log(logger.Debug, "opening sound card") cards, err := alsa.OpenCards() if err != nil { + a.l.Log(logger.Debug, "failed to open sound card") return err } defer alsa.CloseCards(cards) + a.l.Log(logger.Debug, "finding audio device") for _, card := range cards { devices, err := card.Devices() if err != nil { @@ -201,18 +196,17 @@ func (a *AudioDevice) open() error { } } } - if a.dev == nil { - return errors.New("No audio source found") + a.l.Log(logger.Debug, "failed to find audio device") + return errors.New("no audio device found") } - a.l.Log(logger.Debug, "Found audio source", "source", a.dev.Title) - // ToDo: time out if Open takes too long. + a.l.Log(logger.Debug, "opening audio device", "source", a.dev.Title) err = a.dev.Open() if err != nil { + a.l.Log(logger.Debug, "failed to open audio device") return err } - a.l.Log(logger.Debug, "Opened audio source") // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). @@ -223,9 +217,9 @@ func (a *AudioDevice) open() error { // Try to negotiate a rate to record in that is divisible by the wanted rate // so that it can be easily downsampled to the wanted rate. - // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. Eg. - // the audioinjector is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, - // to fix this 8000 and 16000 must be removed from the Rates slice. + // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. + // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, + // a fix for this is to remove 8000 and 16000 from the Rates slice. foundRate := false for i := 0; i < len(Rates) && !foundRate; i++ { if Rates[i] < a.SampleRate { @@ -281,107 +275,77 @@ func (a *AudioDevice) open() error { // Re-opens the device and tries again if ASLA returns an error. func (a *AudioDevice) input() { for { + // Check mode. a.mu.Lock() - fmt.Println("input lock") - if a.dev == nil { - fmt.Println("INPUT:", "dev: NIL", a.mode) - } else { - fmt.Println("INPUT:", "dev: VALID", a.mode) - } - if a.rb == nil { - fmt.Println("INPUT:", "rb: NIL", a.mode) - } else { - fmt.Println("INPUT:", "rb: VALID", a.mode) - } - switch a.mode { + mode := a.mode + a.mu.Unlock() + switch mode { case paused: - a.mu.Unlock() - fmt.Println("input unlock") time.Sleep(time.Duration(a.RecPeriod) * time.Second) continue case stopped: - a.mu.Unlock() - fmt.Println("input unlock") + if a.dev != nil { + a.l.Log(logger.Debug, "closing audio device", "source", a.source) + a.dev.Close() + a.dev = nil + } return } - a.l.Log(logger.Debug, "Recording audio for period", "seconds", a.RecPeriod) - fmt.Println("LEN:", len(a.ab.Data)) + + // Read from audio device. + a.l.Log(logger.Debug, "recording audio for period", "seconds", a.RecPeriod) err := a.dev.Read(a.ab.Data) - fmt.Println("input read") if err != nil { - a.l.Log(logger.Debug, "Device.Read failed", "error", err.Error()) + a.l.Log(logger.Debug, "read failed", "error", err.Error()) err = a.open() // re-open if err != nil { - a.mu.Unlock() - fmt.Println("input unlock") - a.l.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + a.l.Log(logger.Fatal, "reopening device failed", "error", err.Error()) + return } - a.mu.Unlock() - fmt.Println("input unlock") continue } + // Process audio. + a.l.Log(logger.Debug, "processing audio") toWrite := a.formatBuffer() - fmt.Println("input point") - a.l.Log(logger.Debug, "Audio format conversion has been performed where needed") - - var n int - n, err = a.rb.Write(toWrite.Data) - fmt.Println("input write") + // Write audio to ringbuffer. + n, err := a.rb.Write(toWrite.Data) switch err { case nil: - a.l.Log(logger.Debug, "Wrote audio to ringbuffer", "length", n) + a.l.Log(logger.Debug, "wrote audio to ringbuffer", "length", n) case ring.ErrDropped: - a.l.Log(logger.Warning, "Dropped audio") + a.l.Log(logger.Warning, "old audio data overwritten") default: - a.mu.Unlock() - fmt.Println("input unlock") - a.l.Log(logger.Error, "Unexpected ringbuffer error", "error", err.Error()) + a.l.Log(logger.Error, "unexpected ringbuffer error", "error", err.Error()) return } - a.mu.Unlock() - fmt.Println("input unlock") } } // Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. func (a *AudioDevice) Read(p []byte) (n int, err error) { - fmt.Println("read lock") - a.mu.Lock() - switch a.mode { - case paused: - return 0, nil - case stopped: - return 0, nil - } - if a.rb == nil { - fmt.Println("READ:", "NIL", a.mode) - } else { - fmt.Println("READ:", "VALID", a.mode) - } - chunk, err := a.rb.Next(rbNextTimeout) + // Ready ringbuffer for read. + _, err = a.rb.Next(rbNextTimeout) switch err { case nil: - // Do nothing. case ring.ErrTimeout: return 0, nil - case io.EOF: - a.l.Log(logger.Error, "Unexpected EOF from ring.Next") - return 0, io.ErrUnexpectedEOF default: - a.l.Log(logger.Error, "Unexpected error from ring.Next", "error", err.Error()) return 0, err } - n, err = io.ReadFull(a.rb, p[:chunk.Len()]) - if err != nil { - a.l.Log(logger.Error, "Unexpected error from ring.Read", "error", err.Error()) - return n, err + + // Read from ring buffer. + n, err = a.rb.Read(p) + switch err { + case nil: + case io.EOF: + return 0, nil + default: + return 0, err } - a.l.Log(logger.Debug, "Read audio from ringbuffer", "length", n) - a.mu.Unlock() - fmt.Println("read unlock") + return n, nil } @@ -402,9 +366,11 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { if a.ab.Format.Channels != wantChannels { // Convert channels. + // TODO(Trek): Make this work for conversions other than stereo to mono. if a.ab.Format.Channels == 2 && wantChannels == 1 { - if formatted.Data, err = pcm.StereoToMono(a.ab); err != nil { - a.l.Log(logger.Warning, "Channel conversion failed, audio has remained stereo", "error", err.Error()) + formatted.Data, err = pcm.StereoToMono(a.ab) + if err != nil { + a.l.Log(logger.Warning, "channel conversion failed, audio has remained stereo", "error", err.Error()) } else { formatted.Format.Channels = 1 } @@ -421,7 +387,7 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { formatted.Data, err = pcm.Resample(a.ab, wantRate) } if err != nil { - a.l.Log(logger.Warning, "Rate conversion failed, audio has remained original rate", "error", err.Error()) + a.l.Log(logger.Warning, "rate conversion failed, audio has remained original rate", "error", err.Error()) } else { formatted.Format.Rate = wantRate } diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 9152e2c6..20c6e01f 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -3,6 +3,7 @@ package revid import ( "bytes" "errors" + "io/ioutil" "testing" "time" @@ -91,7 +92,7 @@ func TestAudio(t *testing.T) { ac := &AudioConfig{ SampleRate: 8000, Channels: 1, - RecPeriod: 0.1, + RecPeriod: 1, BitDepth: 16, Codec: ADPCM, } @@ -103,11 +104,18 @@ func TestAudio(t *testing.T) { } // Create a new audioDevice, start, read/lex, and then stop it. - ai := NewAudioDevice(ac) + ai, err := NewAudioDevice(ac) + if err != nil { + t.Error(err) + } dst := bytes.NewBuffer(make([]byte, 0)) - ai.Start() + err = ai.Start() + if err != nil { + t.Error(err) + } num := 3 // How many 'ac.RecPeriod's to record. go lex.ADPCM(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) - time.Sleep(time.Millisecond * 100 * time.Duration(num)) + time.Sleep(time.Millisecond * 1000 * time.Duration(num)) ai.Stop() + err = ioutil.WriteFile("./testout", dst.Bytes(), 0644) } diff --git a/revid/revid.go b/revid/revid.go index 2619c493..dc3a0951 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -619,6 +619,7 @@ func (r *Revid) setupInputForFile() (func() error, error) { // startAudioDevice is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioDevice() (func() error, error) { + // Create audio device. ac := &AudioConfig{ SampleRate: r.config.SampleRate, Channels: r.config.Channels, @@ -626,7 +627,18 @@ func (r *Revid) startAudioDevice() (func() error, error) { BitDepth: r.config.BitDepth, Codec: r.config.InputCodec, } - ai := NewAudioDevice(ac) + ai, err := NewAudioDevice(ac) + if err != nil { + r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error()) + } + + // Start audio device + err = ai.Start() + if err != nil { + r.config.Logger.Log(logger.Fatal, pkg+"failed to start audio device", "error", err.Error()) + } + + // Process output from audio device. r.wg.Add(1) go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate), ai.ChunkSize()) return func() error { From e851ea228cc315cb59a8a09c6bb8eb0322c25bb1 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 29 May 2019 02:20:19 +0930 Subject: [PATCH 27/57] revid: added switch for codec conversion after recording --- revid/audio-input.go | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index af3c6197..c86ee336 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -353,21 +353,19 @@ func (a *AudioDevice) Read(p []byte) (n int, err error) { // in the desired format specified by the ac's parameters. func (a *AudioDevice) formatBuffer() alsa.Buffer { var err error - wantChannels := a.Channels - wantRate := a.SampleRate // If nothing needs to be changed, return the original. - if a.ab.Format.Channels == wantChannels && a.ab.Format.Rate == wantRate { + if a.ab.Format.Channels == a.Channels && a.ab.Format.Rate == a.SampleRate { return a.ab } formatted := alsa.Buffer{Format: a.ab.Format} bufCopied := false - if a.ab.Format.Channels != wantChannels { + if a.ab.Format.Channels != a.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. - if a.ab.Format.Channels == 2 && wantChannels == 1 { + if a.ab.Format.Channels == 2 && a.Channels == 1 { formatted.Data, err = pcm.StereoToMono(a.ab) if err != nil { a.l.Log(logger.Warning, "channel conversion failed, audio has remained stereo", "error", err.Error()) @@ -378,19 +376,28 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { } } - if a.ab.Format.Rate != wantRate { + if a.ab.Format.Rate != a.SampleRate { // Convert rate. if bufCopied { - formatted.Data, err = pcm.Resample(formatted, wantRate) + formatted.Data, err = pcm.Resample(formatted, a.SampleRate) } else { - formatted.Data, err = pcm.Resample(a.ab, wantRate) + formatted.Data, err = pcm.Resample(a.ab, a.SampleRate) } if err != nil { a.l.Log(logger.Warning, "rate conversion failed, audio has remained original rate", "error", err.Error()) } else { - formatted.Format.Rate = wantRate + formatted.Format.Rate = a.SampleRate } } + + switch a.Codec { + case PCM: + case ADPCM: + // TODO(Trek):Add ADPCM conversion. + default: + a.l.Log(logger.Error, "codec conversion failed, audio has remained original codec", "error", err.Error()) + } + return formatted } From 90c34c41086c46482e2e272428788d1f263326b6 Mon Sep 17 00:00:00 2001 From: Trek H Date: Mon, 3 Jun 2019 18:35:28 +0930 Subject: [PATCH 28/57] revid: fixed issues after merge --- codec/h264/lex.go | 2 +- codec/h265/lex.go | 2 +- codec/mjpeg/lex.go | 2 +- container/mts/pes/pes.go | 6 +----- revid/audio-input_test.go | 4 ++-- revid/revid.go | 14 ++++---------- revid/revid_test.go | 2 +- 7 files changed, 11 insertions(+), 21 deletions(-) diff --git a/codec/h264/lex.go b/codec/h264/lex.go index 9a071715..abb92dce 100644 --- a/codec/h264/lex.go +++ b/codec/h264/lex.go @@ -48,7 +48,7 @@ var h264Prefix = [...]byte{0x00, 0x00, 0x01, 0x09, 0xf0} // successive writes being performed not earlier than the specified delay. // NAL units are split after type 1 (Coded slice of a non-IDR picture), 5 // (Coded slice of a IDR picture) and 8 (Picture parameter set). -func Lex(dst io.Writer, src io.Reader, delay time.Duration) error { +func Lex(dst io.Writer, src io.Reader, delay time.Duration, n int) error { var tick <-chan time.Time if delay == 0 { tick = noDelay diff --git a/codec/h265/lex.go b/codec/h265/lex.go index ebe34013..7593fe5e 100644 --- a/codec/h265/lex.go +++ b/codec/h265/lex.go @@ -70,7 +70,7 @@ func NewLexer(donl bool) *Lexer { // Lex continually reads RTP packets from the io.Reader src and lexes into // access units which are written to the io.Writer dst. Lex expects that for // each read from src, a single RTP packet is received. -func (l *Lexer) Lex(dst io.Writer, src io.Reader, delay time.Duration) error { +func (l *Lexer) Lex(dst io.Writer, src io.Reader, delay time.Duration, n int) error { buf := make([]byte, maxRTPSize) for { n, err := src.Read(buf) diff --git a/codec/mjpeg/lex.go b/codec/mjpeg/lex.go index da2ecae1..21717fe6 100644 --- a/codec/mjpeg/lex.go +++ b/codec/mjpeg/lex.go @@ -45,7 +45,7 @@ func init() { // Lex parses MJPEG frames read from src into separate writes to dst with // successive writes being performed not earlier than the specified delay. -func Lex(dst io.Writer, src io.Reader, delay time.Duration) error { +func Lex(dst io.Writer, src io.Reader, delay time.Duration, n int) error { var tick <-chan time.Time if delay == 0 { tick = noDelay diff --git a/container/mts/pes/pes.go b/container/mts/pes/pes.go index 1dc2dd3e..16382d84 100644 --- a/container/mts/pes/pes.go +++ b/container/mts/pes/pes.go @@ -26,13 +26,9 @@ LICENSE package pes -<<<<<<< HEAD -const MaxPesSize = 64 * 1 << 10 // 65536 -======= import "github.com/Comcast/gots" -const MaxPesSize = 64 * 1 << 10 ->>>>>>> master +const MaxPesSize = 64 * 1 << 10 // 65536 /* The below data struct encapsulates the fields of an PES packet. Below is diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 20c6e01f..5f057679 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -7,7 +7,7 @@ import ( "testing" "time" - "bitbucket.org/ausocean/av/codec/lex" + "bitbucket.org/ausocean/av/codec/codecutil" "github.com/yobert/alsa" ) @@ -114,7 +114,7 @@ func TestAudio(t *testing.T) { t.Error(err) } num := 3 // How many 'ac.RecPeriod's to record. - go lex.ADPCM(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) + go codecutil.LexBytes(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) time.Sleep(time.Millisecond * 1000 * time.Duration(num)) ai.Stop() err = ioutil.WriteFile("./testout", dst.Bytes(), 0644) diff --git a/revid/revid.go b/revid/revid.go index ec34b358..0bc4b954 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -173,7 +173,7 @@ func (r *Revid) reset(config Config) error { r.config.Logger.SetLevel(config.LogLevel) err = r.setupPipeline( - func(dst io.WriteCloser, fps int, medType int) (io.WriteCloser, error) { + func(dst io.WriteCloser, fps float64) (io.WriteCloser, error) { var st int switch r.config.Input { case Raspivid, File, V4L: @@ -217,7 +217,7 @@ func (r *Revid) setConfig(config Config) error { // mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder // respectively. multiWriter will be used to create an ioext.multiWriteCloser // so that encoders can write to multiple senders. -func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64, mediaType int) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { +func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { // encoders will hold the encoders that are required for revid's current // configuration. var encoders []io.WriteCloser @@ -261,13 +261,7 @@ func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64, medi // as a destination. if len(mtsSenders) != 0 { mw := multiWriter(mtsSenders...) - var mediaType int - if r.config.Input == Audio { - mediaType = mts.Audio - } else { - mediaType = mts.Video - } - e, _ := mtsEnc(mw, r.config.WriteRate, mediaType) + e, _ := mtsEnc(mw, r.config.WriteRate) encoders = append(encoders, e) } @@ -725,7 +719,7 @@ func (r *Revid) startRTSPCamera() (func() error, error) { // Start reading data from the RTP client. r.wg.Add(1) - go r.processFrom(rtpClt, time.Second/time.Duration(r.config.FrameRate)) + go r.processFrom(rtpClt, time.Second/time.Duration(r.config.FrameRate), 0) return func() error { rtspClt.Close() diff --git a/revid/revid_test.go b/revid/revid_test.go index fa5b7cb5..82991955 100644 --- a/revid/revid_test.go +++ b/revid/revid_test.go @@ -232,7 +232,7 @@ func TestResetEncoderSenderSetup(t *testing.T) { // This logic is what we want to check. err = rv.setupPipeline( - func(dst io.WriteCloser, rate float64, mediaType int) (io.WriteCloser, error) { + func(dst io.WriteCloser, rate float64) (io.WriteCloser, error) { return &tstMtsEncoder{dst: dst}, nil }, func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { From 409dcabe0a4bdff3197a6258e20230e3ab98a4b4 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 4 Jun 2019 02:31:35 +0930 Subject: [PATCH 29/57] revid: added codec conversion after recording --- codec/codecutil/lex.go | 31 +++++++++++++++++++++++++++++++ revid/audio-input.go | 39 +++++++++++++++++++-------------------- revid/audio-input_test.go | 8 +++----- 3 files changed, 53 insertions(+), 25 deletions(-) create mode 100644 codec/codecutil/lex.go diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go new file mode 100644 index 00000000..8e8c36f6 --- /dev/null +++ b/codec/codecutil/lex.go @@ -0,0 +1,31 @@ +package codecutil + +import ( + "io" + "time" +) + +// LexBytes reads n bytes from src and writes them to dst every t seconds. +func LexBytes(dst io.Writer, src io.Reader, t time.Duration, n int) error { + var tick <-chan time.Time + if t == 0 { + tick = make(chan time.Time) + } else { + ticker := time.NewTicker(t) + defer ticker.Stop() + tick = ticker.C + } + + for { + <-tick + buf := make([]byte, n) + _, err := src.Read(buf) + if err != nil { + return err + } + _, err = dst.Write(buf) + if err != nil { + return err + } + } +} diff --git a/revid/audio-input.go b/revid/audio-input.go index c86ee336..0592a6d2 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -25,6 +25,7 @@ LICENSE package revid import ( + "bytes" "errors" "fmt" "io" @@ -33,6 +34,7 @@ import ( "github.com/yobert/alsa" + "bitbucket.org/ausocean/av/codec/adpcm" "bitbucket.org/ausocean/av/codec/pcm" "bitbucket.org/ausocean/iot/pi/smartlogger" "bitbucket.org/ausocean/utils/logger" @@ -118,7 +120,11 @@ func NewAudioDevice(cfg *AudioConfig) (*AudioDevice, error) { a.l.Log(logger.Error, "given AudioConfig parameters are too small", "error", err.Error()) return nil, errors.New("given AudioConfig parameters are too small") } - a.chunkSize = int(cs) + if a.Codec == ADPCM { + a.chunkSize = adpcm.EncBytes(int(cs)) + } else { + a.chunkSize = int(cs) + } a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) a.mode = paused @@ -349,8 +355,7 @@ func (a *AudioDevice) Read(p []byte) (n int, err error) { return n, nil } -// formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored -// in the desired format specified by the ac's parameters. +// formatBuffer returns audio that has been converted to the desired format. func (a *AudioDevice) formatBuffer() alsa.Buffer { var err error @@ -359,42 +364,36 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { return a.ab } - formatted := alsa.Buffer{Format: a.ab.Format} - bufCopied := false + formatted := alsa.Buffer{Format: a.ab.Format, Data: a.ab.Data} if a.ab.Format.Channels != a.Channels { - // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. if a.ab.Format.Channels == 2 && a.Channels == 1 { formatted.Data, err = pcm.StereoToMono(a.ab) if err != nil { - a.l.Log(logger.Warning, "channel conversion failed, audio has remained stereo", "error", err.Error()) - } else { - formatted.Format.Channels = 1 + a.l.Log(logger.Fatal, "channel conversion failed", "error", err.Error()) } - bufCopied = true } } if a.ab.Format.Rate != a.SampleRate { - // Convert rate. - if bufCopied { - formatted.Data, err = pcm.Resample(formatted, a.SampleRate) - } else { - formatted.Data, err = pcm.Resample(a.ab, a.SampleRate) - } + formatted.Data, err = pcm.Resample(formatted, a.SampleRate) if err != nil { - a.l.Log(logger.Warning, "rate conversion failed, audio has remained original rate", "error", err.Error()) - } else { - formatted.Format.Rate = a.SampleRate + a.l.Log(logger.Fatal, "rate conversion failed", "error", err.Error()) } } switch a.Codec { case PCM: case ADPCM: - // TODO(Trek):Add ADPCM conversion. + b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data)))) + enc := adpcm.NewEncoder(b) + _, err = enc.Write(formatted.Data) + if err != nil { + a.l.Log(logger.Fatal, "unable to encode", "error", err.Error()) + } + formatted.Data = b.Bytes() default: a.l.Log(logger.Error, "codec conversion failed, audio has remained original codec", "error", err.Error()) } diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 5f057679..871cc92e 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -3,7 +3,6 @@ package revid import ( "bytes" "errors" - "io/ioutil" "testing" "time" @@ -92,10 +91,11 @@ func TestAudio(t *testing.T) { ac := &AudioConfig{ SampleRate: 8000, Channels: 1, - RecPeriod: 1, + RecPeriod: 0.5, BitDepth: 16, Codec: ADPCM, } + n := 2 // Number of periods to wait while recording. // Skip if there are no suitable devices to test with. err := checkDevice(ac) @@ -113,9 +113,7 @@ func TestAudio(t *testing.T) { if err != nil { t.Error(err) } - num := 3 // How many 'ac.RecPeriod's to record. go codecutil.LexBytes(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) - time.Sleep(time.Millisecond * 1000 * time.Duration(num)) + time.Sleep(time.Millisecond * 1000 * time.Duration(n)) ai.Stop() - err = ioutil.WriteFile("./testout", dst.Bytes(), 0644) } From bea747085f92b49dfbd27ba4eb4692cc7cf6250e Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 4 Jun 2019 12:28:40 +0930 Subject: [PATCH 30/57] revid: updated lex funcs in tests, changed audioDevice logger Since adding the extra bufSize arg to Lex functions, the test functions using them needed to be updated. NewAudioDevice was changed to accept a logger to log to instead of creating a new one. --- codec/h265/lex_test.go | 2 +- protocol/rtmp/rtmp_test.go | 4 ++-- revid/audio-input.go | 23 +++-------------------- revid/audio-input_test.go | 33 +++++++++++++++++++++++++++++---- revid/revid.go | 2 +- 5 files changed, 36 insertions(+), 28 deletions(-) diff --git a/codec/h265/lex_test.go b/codec/h265/lex_test.go index 1a409e4c..02ed5f1f 100644 --- a/codec/h265/lex_test.go +++ b/codec/h265/lex_test.go @@ -246,7 +246,7 @@ func TestLex(t *testing.T) { for testNum, test := range tests { r := &rtpReader{packets: test.packets} d := &destination{} - err := NewLexer(test.donl).Lex(d, r, 0) + err := NewLexer(test.donl).Lex(d, r, 0, 0) if err != nil { t.Fatalf("error lexing: %v\n", err) } diff --git a/protocol/rtmp/rtmp_test.go b/protocol/rtmp/rtmp_test.go index e1e79796..cf5f505c 100644 --- a/protocol/rtmp/rtmp_test.go +++ b/protocol/rtmp/rtmp_test.go @@ -199,7 +199,7 @@ func TestFromFrame(t *testing.T) { if err != nil { t.Errorf("Failed to create flv encoder with error: %v", err) } - err = h264.Lex(flvEncoder, bytes.NewReader(videoData), time.Second/time.Duration(frameRate)) + err = h264.Lex(flvEncoder, bytes.NewReader(videoData), time.Second/time.Duration(frameRate), 0) if err != nil { t.Errorf("Lexing failed with error: %v", err) } @@ -251,7 +251,7 @@ func TestFromFile(t *testing.T) { if err != nil { t.Fatalf("failed to create encoder: %v", err) } - err = h264.Lex(flvEncoder, f, time.Second/time.Duration(25)) + err = h264.Lex(flvEncoder, f, time.Second/time.Duration(25), 0) if err != nil { t.Errorf("Lexing and encoding failed with error: %v", err) } diff --git a/revid/audio-input.go b/revid/audio-input.go index 0592a6d2..0491b64b 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -36,13 +36,11 @@ import ( "bitbucket.org/ausocean/av/codec/adpcm" "bitbucket.org/ausocean/av/codec/pcm" - "bitbucket.org/ausocean/iot/pi/smartlogger" "bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/ring" ) const ( - logPath = "/var/log/netsender" rbTimeout = 100 * time.Millisecond rbNextTimeout = 100 * time.Millisecond rbLen = 200 @@ -58,8 +56,6 @@ const ( var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} // AudioDevice holds everything we need to know about the audio input stream. -// Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds -// results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit. type AudioDevice struct { l Logger mu sync.Mutex @@ -87,24 +83,11 @@ type AudioConfig struct { Codec uint8 } -// NewAudioDevice initializes and returns an AudioDevice struct which can be started, read from, and stopped. -func NewAudioDevice(cfg *AudioConfig) (*AudioDevice, error) { +// NewAudioDevice initializes and returns an AudioDevice which can be started, read from, and stopped. +func NewAudioDevice(cfg *AudioConfig, l Logger) (*AudioDevice, error) { a := &AudioDevice{} a.AudioConfig = cfg - - // Initialize logger. - logLevel := int(logger.Debug) - validLogLevel := true - if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { - logLevel = int(logger.Info) - validLogLevel = false - } - logSender := smartlogger.New(logPath) - a.l = logger.New(int8(logLevel), &logSender.LogRoller) - a.l.Log(logger.Info, "log-netsender: Logger Initialized") - if !validLogLevel { - a.l.Log(logger.Error, "Invalid log level was defaulted to Info") - } + a.l = l // Open the requested audio device. err := a.open() diff --git a/revid/audio-input_test.go b/revid/audio-input_test.go index 871cc92e..3b62cb21 100644 --- a/revid/audio-input_test.go +++ b/revid/audio-input_test.go @@ -3,6 +3,9 @@ package revid import ( "bytes" "errors" + "fmt" + "os" + "runtime" "testing" "time" @@ -86,7 +89,28 @@ func checkDevice(ac *AudioConfig) error { return nil } -func TestAudio(t *testing.T) { +// rTestLogger implements a revid.Logger. +type rTestLogger struct{} + +func (tl rTestLogger) SetLevel(level int8) {} + +func (tl rTestLogger) Log(level int8, msg string, params ...interface{}) { + logLevels := [...]string{"Debug", "Info", "Warn", "Error", "", "", "Fatal"} + if level < -1 || level > 5 { + panic("Invalid log level") + } + if !silent { + fmt.Printf("%s: %s\n", logLevels[level+1], msg) + } + if level == 5 { + buf := make([]byte, 1<<16) + size := runtime.Stack(buf, true) + fmt.Printf("%s\n", string(buf[:size])) + os.Exit(1) + } +} + +func TestAudioDevice(t *testing.T) { // We want to open a device with a standard configuration. ac := &AudioConfig{ SampleRate: 8000, @@ -100,11 +124,12 @@ func TestAudio(t *testing.T) { // Skip if there are no suitable devices to test with. err := checkDevice(ac) if err != nil { - t.Error(err) + t.Skip(err) } // Create a new audioDevice, start, read/lex, and then stop it. - ai, err := NewAudioDevice(ac) + var l rTestLogger + ai, err := NewAudioDevice(ac, l) if err != nil { t.Error(err) } @@ -114,6 +139,6 @@ func TestAudio(t *testing.T) { t.Error(err) } go codecutil.LexBytes(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) - time.Sleep(time.Millisecond * 1000 * time.Duration(n)) + time.Sleep(time.Second * time.Duration(ac.RecPeriod) * time.Duration(n)) ai.Stop() } diff --git a/revid/revid.go b/revid/revid.go index 0bc4b954..fa243bef 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -635,7 +635,7 @@ func (r *Revid) startAudioDevice() (func() error, error) { BitDepth: r.config.BitDepth, Codec: r.config.InputCodec, } - ai, err := NewAudioDevice(ac) + ai, err := NewAudioDevice(ac, r.config.Logger) if err != nil { r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error()) } From 7b4daed4a6eb8b57caf03f72a9eaf6b9d4f363ac Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 5 Jun 2019 15:08:47 +0930 Subject: [PATCH 31/57] revid: added metadata to audio mts streams --- revid/audio-input.go | 49 ++++++++++++++++++++++---------------------- revid/revid.go | 13 ++++++++++++ 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/revid/audio-input.go b/revid/audio-input.go index 0491b64b..92e43da4 100644 --- a/revid/audio-input.go +++ b/revid/audio-input.go @@ -92,7 +92,7 @@ func NewAudioDevice(cfg *AudioConfig, l Logger) (*AudioDevice, error) { // Open the requested audio device. err := a.open() if err != nil { - a.l.Log(logger.Error, "failed to open audio device", "error", err.Error()) + a.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error()) return nil, errors.New("failed to open audio device") } @@ -100,7 +100,7 @@ func NewAudioDevice(cfg *AudioConfig, l Logger) (*AudioDevice, error) { a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) if cs < 1 { - a.l.Log(logger.Error, "given AudioConfig parameters are too small", "error", err.Error()) + a.l.Log(logger.Error, pkg+"given AudioConfig parameters are too small", "error", err.Error()) return nil, errors.New("given AudioConfig parameters are too small") } if a.Codec == ADPCM { @@ -135,7 +135,6 @@ func (a *AudioDevice) Start() error { default: return errors.New("invalid mode") } - return nil } // Stop will stop recording audio and close the device. @@ -155,21 +154,21 @@ func (a *AudioDevice) ChunkSize() int { func (a *AudioDevice) open() error { // Close any existing device. if a.dev != nil { - a.l.Log(logger.Debug, "closing device", "source", a.source) + a.l.Log(logger.Debug, pkg+"closing device", "source", a.source) a.dev.Close() a.dev = nil } // Open sound card and open recording device. - a.l.Log(logger.Debug, "opening sound card") + a.l.Log(logger.Debug, pkg+"opening sound card") cards, err := alsa.OpenCards() if err != nil { - a.l.Log(logger.Debug, "failed to open sound card") + a.l.Log(logger.Debug, pkg+"failed to open sound card") return err } defer alsa.CloseCards(cards) - a.l.Log(logger.Debug, "finding audio device") + a.l.Log(logger.Debug, pkg+"finding audio device") for _, card := range cards { devices, err := card.Devices() if err != nil { @@ -186,14 +185,14 @@ func (a *AudioDevice) open() error { } } if a.dev == nil { - a.l.Log(logger.Debug, "failed to find audio device") + a.l.Log(logger.Debug, pkg+"failed to find audio device") return errors.New("no audio device found") } - a.l.Log(logger.Debug, "opening audio device", "source", a.dev.Title) + a.l.Log(logger.Debug, pkg+"opening audio device", "source", a.dev.Title) err = a.dev.Open() if err != nil { - a.l.Log(logger.Debug, "failed to open audio device") + a.l.Log(logger.Debug, pkg+"failed to open audio device") return err } @@ -218,19 +217,19 @@ func (a *AudioDevice) open() error { _, err = a.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true - a.l.Log(logger.Debug, "Sample rate set", "rate", Rates[i]) + a.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i]) } } } // If no easily divisible rate is found, then use the default rate. if !foundRate { - a.l.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) + a.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) _, err = a.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } - a.l.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate) + a.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate) } var aFmt alsa.FormatType @@ -256,7 +255,7 @@ func (a *AudioDevice) open() error { if err = a.dev.Prepare(); err != nil { return err } - a.l.Log(logger.Debug, "Successfully negotiated ALSA params") + a.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params") return nil } @@ -274,7 +273,7 @@ func (a *AudioDevice) input() { continue case stopped: if a.dev != nil { - a.l.Log(logger.Debug, "closing audio device", "source", a.source) + a.l.Log(logger.Debug, pkg+"closing audio device", "source", a.source) a.dev.Close() a.dev = nil } @@ -282,13 +281,13 @@ func (a *AudioDevice) input() { } // Read from audio device. - a.l.Log(logger.Debug, "recording audio for period", "seconds", a.RecPeriod) + a.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", a.RecPeriod) err := a.dev.Read(a.ab.Data) if err != nil { - a.l.Log(logger.Debug, "read failed", "error", err.Error()) + a.l.Log(logger.Debug, pkg+"read failed", "error", err.Error()) err = a.open() // re-open if err != nil { - a.l.Log(logger.Fatal, "reopening device failed", "error", err.Error()) + a.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error()) return } continue @@ -302,11 +301,11 @@ func (a *AudioDevice) input() { n, err := a.rb.Write(toWrite.Data) switch err { case nil: - a.l.Log(logger.Debug, "wrote audio to ringbuffer", "length", n) + a.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n) case ring.ErrDropped: - a.l.Log(logger.Warning, "old audio data overwritten") + a.l.Log(logger.Warning, pkg+"old audio data overwritten") default: - a.l.Log(logger.Error, "unexpected ringbuffer error", "error", err.Error()) + a.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error()) return } } @@ -354,7 +353,7 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { if a.ab.Format.Channels == 2 && a.Channels == 1 { formatted.Data, err = pcm.StereoToMono(a.ab) if err != nil { - a.l.Log(logger.Fatal, "channel conversion failed", "error", err.Error()) + a.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) } } } @@ -363,7 +362,7 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { // Convert rate. formatted.Data, err = pcm.Resample(formatted, a.SampleRate) if err != nil { - a.l.Log(logger.Fatal, "rate conversion failed", "error", err.Error()) + a.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) } } @@ -374,11 +373,11 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { enc := adpcm.NewEncoder(b) _, err = enc.Write(formatted.Data) if err != nil { - a.l.Log(logger.Fatal, "unable to encode", "error", err.Error()) + a.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error()) } formatted.Data = b.Bytes() default: - a.l.Log(logger.Error, "codec conversion failed, audio has remained original codec", "error", err.Error()) + a.l.Log(logger.Error, pkg+"unhandled audio codec") } return formatted diff --git a/revid/revid.go b/revid/revid.go index fa243bef..de278c33 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -635,6 +635,19 @@ func (r *Revid) startAudioDevice() (func() error, error) { BitDepth: r.config.BitDepth, Codec: r.config.InputCodec, } + mts.Meta.Add("sampleRate", strconv.Itoa(r.config.SampleRate)) + mts.Meta.Add("channels", strconv.Itoa(r.config.Channels)) + mts.Meta.Add("period", fmt.Sprintf("%.6f", r.config.RecPeriod)) + mts.Meta.Add("bitDepth", strconv.Itoa(r.config.BitDepth)) + switch r.config.InputCodec { + case PCM: + mts.Meta.Add("codec", "pcm") + case ADPCM: + mts.Meta.Add("codec", "adpcm") + default: + r.config.Logger.Log(logger.Fatal, pkg+"no audio codec set in config") + } + ai, err := NewAudioDevice(ac, r.config.Logger) if err != nil { r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error()) From 3e2ff49420ad23e8e12b309988a4188404ff196b Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 6 Jun 2019 00:28:26 +0930 Subject: [PATCH 32/57] revid: mts encoder uses exported pids --- container/mts/encoder.go | 15 +++++---------- container/mts/mpegts.go | 1 + 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/container/mts/encoder.go b/container/mts/encoder.go index 3f7137f7..da7d4c5c 100644 --- a/container/mts/encoder.go +++ b/container/mts/encoder.go @@ -74,11 +74,6 @@ var ( ) const ( - sdtPid = 17 - patPid = 0 - pmtPid = 4096 - videoPid = 256 - audioPid = 210 H264ID = 27 H265ID = 36 audioStreamID = 0xc0 // First audio stream ID. @@ -133,13 +128,13 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder { var sid byte switch mediaType { case EncodeAudio: - mPid = audioPid + mPid = AudioPid sid = audioStreamID case EncodeH265: - mPid = videoPid + mPid = VideoPid sid = H265ID case EncodeH264: - mPid = videoPid + mPid = VideoPid sid = H264ID } @@ -181,8 +176,8 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder { streamID: sid, continuity: map[int]byte{ - patPid: 0, - pmtPid: 0, + PatPid: 0, + PmtPid: 0, mPid: 0, }, } diff --git a/container/mts/mpegts.go b/container/mts/mpegts.go index eb4bee5d..34544276 100644 --- a/container/mts/mpegts.go +++ b/container/mts/mpegts.go @@ -45,6 +45,7 @@ const ( PatPid = 0 PmtPid = 4096 VideoPid = 256 + AudioPid = 210 ) // StreamID is the id of the first stream. From 96c1b5117307079f35e07e9a7a9493f399451a51 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 6 Jun 2019 02:09:55 +0930 Subject: [PATCH 33/57] revid and audio: seperated audio into own package audio device input is now handle in its own package which resides in the new input directory a list of codecs was added to codecutil package to help with multiple packages using the same codecs --- cmd/revid-cli/main.go | 7 +- codec/codecutil/list.go | 34 ++++++++ revid/audio-input.go => input/audio/audio.go | 86 +++++++++++-------- .../audio/audio_test.go | 68 ++++++++------- revid/config.go | 20 ++--- revid/revid.go | 13 +-- 6 files changed, 136 insertions(+), 92 deletions(-) create mode 100644 codec/codecutil/list.go rename revid/audio-input.go => input/audio/audio.go (81%) rename revid/audio-input_test.go => input/audio/audio_test.go (64%) diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index 76acfcff..3642f139 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -38,6 +38,7 @@ import ( "strings" "time" + "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/av/container/mts" "bitbucket.org/ausocean/av/container/mts/meta" "bitbucket.org/ausocean/av/revid" @@ -200,11 +201,11 @@ func handleFlags() revid.Config { switch *inputCodecPtr { case "H264": - cfg.InputCodec = revid.H264 + cfg.InputCodec = codecutil.H264 case "PCM": - cfg.InputCodec = revid.PCM + cfg.InputCodec = codecutil.PCM case "ADPCM": - cfg.InputCodec = revid.ADPCM + cfg.InputCodec = codecutil.ADPCM case "": default: log.Log(logger.Error, pkg+"bad input codec argument") diff --git a/codec/codecutil/list.go b/codec/codecutil/list.go new file mode 100644 index 00000000..cd5685f6 --- /dev/null +++ b/codec/codecutil/list.go @@ -0,0 +1,34 @@ +/* +NAME + list.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package codecutil + +// A global list containing all available codecs for reference in any application. +const ( + PCM = iota + ADPCM + H264 + H265 + MJPEG +) diff --git a/revid/audio-input.go b/input/audio/audio.go similarity index 81% rename from revid/audio-input.go rename to input/audio/audio.go index 92e43da4..527668c2 100644 --- a/revid/audio-input.go +++ b/input/audio/audio.go @@ -1,12 +1,13 @@ /* NAME - audio-input.go + audio.go AUTHOR + Alan Noble Trek Hopton LICENSE - audio-input.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) It is free software: you can redistribute it and/or modify them under the terms of the GNU General Public License as published by the @@ -22,7 +23,8 @@ LICENSE If not, see [GNU licenses](http://www.gnu.org/licenses). */ -package revid +// Package audio provides access to input from audio devices. +package audio import ( "bytes" @@ -35,15 +37,18 @@ import ( "github.com/yobert/alsa" "bitbucket.org/ausocean/av/codec/adpcm" + "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/av/codec/pcm" "bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/ring" ) const ( - rbTimeout = 100 * time.Millisecond - rbNextTimeout = 100 * time.Millisecond - rbLen = 200 + pkg = "pkg: " + rbTimeout = 100 * time.Millisecond + rbNextTimeout = 100 * time.Millisecond + rbLen = 200 + defaultSampleRate = 48000 ) const ( @@ -52,30 +57,31 @@ const ( stopped ) -// Rates contains the audio sample rates used by revid. +// Rates contains the audio sample rates used by audio. var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} -// AudioDevice holds everything we need to know about the audio input stream. -type AudioDevice struct { - l Logger - mu sync.Mutex - source string // Name of audio source, or empty for the default source. +// Device holds everything we need to know about the audio input stream. +type Device struct { + l Logger + // Operating mode, either running, paused, or stopped. // "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. // "paused" means the input routine is sleeping until unpaused or stopped. // "stopped" means the input routine is stopped and the ALSA device is closed. mode uint8 + mu sync.Mutex + title string // Name of audio title, or empty for the default title. dev *alsa.Device // Audio input device. ab alsa.Buffer // ALSA's buffer. rb *ring.Buffer // Our buffer. chunkSize int // This is the number of bytes that will be stored at a time. - *AudioConfig + *Config } -// AudioConfig provides parameters used by AudioDevice. -type AudioConfig struct { +// Config provides parameters used by Device. +type Config struct { SampleRate int Channels int BitDepth int @@ -83,10 +89,17 @@ type AudioConfig struct { Codec uint8 } -// NewAudioDevice initializes and returns an AudioDevice which can be started, read from, and stopped. -func NewAudioDevice(cfg *AudioConfig, l Logger) (*AudioDevice, error) { - a := &AudioDevice{} - a.AudioConfig = cfg +// Logger enables any implementation of a logger to be used. +// TODO: Make this part of the logger package. +type Logger interface { + SetLevel(int8) + Log(level int8, message string, params ...interface{}) +} + +// NewDevice initializes and returns an Device which can be started, read from, and stopped. +func NewDevice(cfg *Config, l Logger) (*Device, error) { + a := &Device{} + a.Config = cfg a.l = l // Open the requested audio device. @@ -100,10 +113,10 @@ func NewAudioDevice(cfg *AudioConfig, l Logger) (*AudioDevice, error) { a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) if cs < 1 { - a.l.Log(logger.Error, pkg+"given AudioConfig parameters are too small", "error", err.Error()) - return nil, errors.New("given AudioConfig parameters are too small") + a.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error()) + return nil, errors.New("given Config parameters are too small") } - if a.Codec == ADPCM { + if a.Codec == codecutil.ADPCM { a.chunkSize = adpcm.EncBytes(int(cs)) } else { a.chunkSize = int(cs) @@ -117,7 +130,7 @@ func NewAudioDevice(cfg *AudioConfig, l Logger) (*AudioDevice, error) { } // Start will start recording audio and writing to the ringbuffer. -func (a *AudioDevice) Start() error { +func (a *Device) Start() error { a.mu.Lock() mode := a.mode a.mu.Unlock() @@ -138,23 +151,23 @@ func (a *AudioDevice) Start() error { } // Stop will stop recording audio and close the device. -func (a *AudioDevice) Stop() { +func (a *Device) Stop() { a.mu.Lock() a.mode = stopped a.mu.Unlock() } // ChunkSize returns the number of bytes written to the ringbuffer per a.RecPeriod. -func (a *AudioDevice) ChunkSize() int { +func (a *Device) ChunkSize() int { return a.chunkSize } // open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. -func (a *AudioDevice) open() error { +func (a *Device) open() error { // Close any existing device. if a.dev != nil { - a.l.Log(logger.Debug, pkg+"closing device", "source", a.source) + a.l.Log(logger.Debug, pkg+"closing device", "title", a.title) a.dev.Close() a.dev = nil } @@ -178,7 +191,7 @@ func (a *AudioDevice) open() error { if dev.Type != alsa.PCM || !dev.Record { continue } - if dev.Title == a.source || a.source == "" { + if dev.Title == a.title || a.title == "" { a.dev = dev break } @@ -189,7 +202,7 @@ func (a *AudioDevice) open() error { return errors.New("no audio device found") } - a.l.Log(logger.Debug, pkg+"opening audio device", "source", a.dev.Title) + a.l.Log(logger.Debug, pkg+"opening audio device", "title", a.dev.Title) err = a.dev.Open() if err != nil { a.l.Log(logger.Debug, pkg+"failed to open audio device") @@ -261,7 +274,7 @@ func (a *AudioDevice) open() error { // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if ASLA returns an error. -func (a *AudioDevice) input() { +func (a *Device) input() { for { // Check mode. a.mu.Lock() @@ -273,7 +286,7 @@ func (a *AudioDevice) input() { continue case stopped: if a.dev != nil { - a.l.Log(logger.Debug, pkg+"closing audio device", "source", a.source) + a.l.Log(logger.Debug, pkg+"closing audio device", "title", a.title) a.dev.Close() a.dev = nil } @@ -311,9 +324,8 @@ func (a *AudioDevice) input() { } } -// Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. -// Any errors returned are unexpected and should be considered fatal. -func (a *AudioDevice) Read(p []byte) (n int, err error) { +// Read reads from the ringbuffer, returning the number of bytes read upon success. +func (a *Device) Read(p []byte) (n int, err error) { // Ready ringbuffer for read. _, err = a.rb.Next(rbNextTimeout) switch err { @@ -338,7 +350,7 @@ func (a *AudioDevice) Read(p []byte) (n int, err error) { } // formatBuffer returns audio that has been converted to the desired format. -func (a *AudioDevice) formatBuffer() alsa.Buffer { +func (a *Device) formatBuffer() alsa.Buffer { var err error // If nothing needs to be changed, return the original. @@ -367,8 +379,8 @@ func (a *AudioDevice) formatBuffer() alsa.Buffer { } switch a.Codec { - case PCM: - case ADPCM: + case codecutil.PCM: + case codecutil.ADPCM: b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data)))) enc := adpcm.NewEncoder(b) _, err = enc.Write(formatted.Data) diff --git a/revid/audio-input_test.go b/input/audio/audio_test.go similarity index 64% rename from revid/audio-input_test.go rename to input/audio/audio_test.go index 3b62cb21..02bf7e0a 100644 --- a/revid/audio-input_test.go +++ b/input/audio/audio_test.go @@ -1,20 +1,43 @@ -package revid +/* +NAME + audio_test.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package audio import ( "bytes" "errors" - "fmt" "os" - "runtime" "testing" "time" "bitbucket.org/ausocean/av/codec/codecutil" + "bitbucket.org/ausocean/utils/logger" "github.com/yobert/alsa" ) // Check that a device exists with the given config parameters. -func checkDevice(ac *AudioConfig) error { +func checkDevice(ac *Config) error { cards, err := alsa.OpenCards() if err != nil { return errors.New("no audio cards found") @@ -89,35 +112,14 @@ func checkDevice(ac *AudioConfig) error { return nil } -// rTestLogger implements a revid.Logger. -type rTestLogger struct{} - -func (tl rTestLogger) SetLevel(level int8) {} - -func (tl rTestLogger) Log(level int8, msg string, params ...interface{}) { - logLevels := [...]string{"Debug", "Info", "Warn", "Error", "", "", "Fatal"} - if level < -1 || level > 5 { - panic("Invalid log level") - } - if !silent { - fmt.Printf("%s: %s\n", logLevels[level+1], msg) - } - if level == 5 { - buf := make([]byte, 1<<16) - size := runtime.Stack(buf, true) - fmt.Printf("%s\n", string(buf[:size])) - os.Exit(1) - } -} - -func TestAudioDevice(t *testing.T) { +func TestDevice(t *testing.T) { // We want to open a device with a standard configuration. - ac := &AudioConfig{ + ac := &Config{ SampleRate: 8000, Channels: 1, - RecPeriod: 0.5, + RecPeriod: 0.3, BitDepth: 16, - Codec: ADPCM, + Codec: codecutil.ADPCM, } n := 2 // Number of periods to wait while recording. @@ -127,9 +129,9 @@ func TestAudioDevice(t *testing.T) { t.Skip(err) } - // Create a new audioDevice, start, read/lex, and then stop it. - var l rTestLogger - ai, err := NewAudioDevice(ac, l) + // Create a new audio Device, start, read/lex, and then stop it. + l := logger.New(logger.Debug, os.Stderr) + ai, err := NewDevice(ac, l) if err != nil { t.Error(err) } @@ -139,6 +141,6 @@ func TestAudioDevice(t *testing.T) { t.Error(err) } go codecutil.LexBytes(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) - time.Sleep(time.Second * time.Duration(ac.RecPeriod) * time.Duration(n)) + time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n)) ai.Stop() } diff --git a/revid/config.go b/revid/config.go index 7e5c2d20..fc0d9cb5 100644 --- a/revid/config.go +++ b/revid/config.go @@ -28,6 +28,7 @@ package revid import ( "errors" + "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/utils/logger" ) @@ -116,14 +117,9 @@ const ( NothingDefined = iota Raspivid V4L - H264Codec Audio File Http - H264 - Mjpeg - PCM - ADPCM None Mpegts Ffmpeg @@ -157,7 +153,7 @@ const ( defaultQuantizationMode = QuantizationOff defaultFramesPerClip = 1 httpFramesPerClip = 560 - defaultInputCodec = H264 + defaultInputCodec = codecutil.H264 defaultVerbosity = logger.Error defaultRtpAddr = "localhost:6970" defaultBurstPeriod = 10 // Seconds @@ -166,7 +162,7 @@ const ( defaultExposure = "auto" defaultAutoWhiteBalance = "auto" - defaultAudioInputCodec = ADPCM + defaultAudioInputCodec = codecutil.ADPCM defaultSampleRate = 48000 defaultBitDepth = 16 defaultChannels = 1 @@ -197,7 +193,7 @@ func (c *Config) Validate(r *Revid) error { } switch c.InputCodec { - case H264: + case codecutil.H264: // FIXME(kortschak): This is not really what we want. // Configuration really needs to be rethought here. if c.Quantize && c.Quantization == 0 { @@ -208,12 +204,12 @@ func (c *Config) Validate(r *Revid) error { return errors.New("bad bitrate and quantization combination for H264 input") } - case Mjpeg: + case codecutil.MJPEG: if c.Quantization > 0 || c.Bitrate == 0 { return errors.New("bad bitrate or quantization for mjpeg input") } - case PCM, ADPCM: - case NothingDefined: + case codecutil.PCM, codecutil.ADPCM: + default: switch c.Input { case Audio: c.Logger.Log(logger.Info, pkg+"input is audio but no codec defined, defaulting", "inputCodec", defaultAudioInputCodec) @@ -224,8 +220,6 @@ func (c *Config) Validate(r *Revid) error { c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization) c.Quantization = defaultQuantization } - default: - return errors.New("bad input codec defined in config") } if c.Outputs == nil { diff --git a/revid/revid.go b/revid/revid.go index de278c33..b63ac674 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -45,6 +45,7 @@ import ( "bitbucket.org/ausocean/av/codec/h265" "bitbucket.org/ausocean/av/container/flv" "bitbucket.org/ausocean/av/container/mts" + "bitbucket.org/ausocean/av/input/audio" "bitbucket.org/ausocean/av/protocol/rtcp" "bitbucket.org/ausocean/av/protocol/rtp" "bitbucket.org/ausocean/av/protocol/rtsp" @@ -539,7 +540,7 @@ func (r *Revid) startRaspivid() (func() error, error) { switch r.config.InputCodec { default: return nil, fmt.Errorf("revid: invalid input codec: %v", r.config.InputCodec) - case H264: + case codecutil.H264: args = append(args, "--codec", "H264", "--inline", @@ -548,7 +549,7 @@ func (r *Revid) startRaspivid() (func() error, error) { if r.config.Quantize { args = append(args, "-qp", fmt.Sprint(r.config.Quantization)) } - case Mjpeg: + case codecutil.MJPEG: args = append(args, "--codec", "MJPEG") } r.config.Logger.Log(logger.Info, pkg+"raspivid args", "raspividArgs", strings.Join(args, " ")) @@ -628,7 +629,7 @@ func (r *Revid) setupInputForFile() (func() error, error) { // startAudioDevice is used to start capturing audio from an audio device and processing it. func (r *Revid) startAudioDevice() (func() error, error) { // Create audio device. - ac := &AudioConfig{ + ac := &audio.Config{ SampleRate: r.config.SampleRate, Channels: r.config.Channels, RecPeriod: r.config.RecPeriod, @@ -640,15 +641,15 @@ func (r *Revid) startAudioDevice() (func() error, error) { mts.Meta.Add("period", fmt.Sprintf("%.6f", r.config.RecPeriod)) mts.Meta.Add("bitDepth", strconv.Itoa(r.config.BitDepth)) switch r.config.InputCodec { - case PCM: + case codecutil.PCM: mts.Meta.Add("codec", "pcm") - case ADPCM: + case codecutil.ADPCM: mts.Meta.Add("codec", "adpcm") default: r.config.Logger.Log(logger.Fatal, pkg+"no audio codec set in config") } - ai, err := NewAudioDevice(ac, r.config.Logger) + ai, err := audio.NewDevice(ac, r.config.Logger) if err != nil { r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error()) } From 34fc64383ec9a3fb4d6fd60f86e6474b9545281f Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 6 Jun 2019 02:41:17 +0930 Subject: [PATCH 34/57] audio: naming --- input/audio/audio.go | 192 +++++++++++++++++++++---------------------- 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/input/audio/audio.go b/input/audio/audio.go index 527668c2..00bc6ef9 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -98,47 +98,47 @@ type Logger interface { // NewDevice initializes and returns an Device which can be started, read from, and stopped. func NewDevice(cfg *Config, l Logger) (*Device, error) { - a := &Device{} - a.Config = cfg - a.l = l + d := &Device{} + d.Config = cfg + d.l = l // Open the requested audio device. - err := a.open() + err := d.open() if err != nil { - a.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error()) + d.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error()) return nil, errors.New("failed to open audio device") } - // Setup ring buffer to capture audio in periods of a.RecPeriod seconds and buffer rbDuration seconds in total. - a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) - cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) + // Setup ring buffer to capture audio in periods of d.RecPeriod seconds and buffer rbDuration seconds in total. + d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) + cs := (float64((len(d.ab.Data)/d.dev.BufferFormat().Channels)*d.Channels) / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) if cs < 1 { - a.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error()) + d.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error()) return nil, errors.New("given Config parameters are too small") } - if a.Codec == codecutil.ADPCM { - a.chunkSize = adpcm.EncBytes(int(cs)) + if d.Codec == codecutil.ADPCM { + d.chunkSize = adpcm.EncBytes(int(cs)) } else { - a.chunkSize = int(cs) + d.chunkSize = int(cs) } - a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) + d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout) - a.mode = paused - go a.input() + d.mode = paused + go d.input() - return a, nil + return d, nil } // Start will start recording audio and writing to the ringbuffer. -func (a *Device) Start() error { - a.mu.Lock() - mode := a.mode - a.mu.Unlock() +func (d *Device) Start() error { + d.mu.Lock() + mode := d.mode + d.mu.Unlock() switch mode { case paused: - a.mu.Lock() - a.mode = running - a.mu.Unlock() + d.mu.Lock() + d.mode = running + d.mu.Unlock() return nil case stopped: // TODO(Trek): Make this reopen device and start recording. @@ -151,37 +151,37 @@ func (a *Device) Start() error { } // Stop will stop recording audio and close the device. -func (a *Device) Stop() { - a.mu.Lock() - a.mode = stopped - a.mu.Unlock() +func (d *Device) Stop() { + d.mu.Lock() + d.mode = stopped + d.mu.Unlock() } -// ChunkSize returns the number of bytes written to the ringbuffer per a.RecPeriod. -func (a *Device) ChunkSize() int { - return a.chunkSize +// ChunkSize returns the number of bytes written to the ringbuffer per d.RecPeriod. +func (d *Device) ChunkSize() int { + return d.chunkSize } // open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. -func (a *Device) open() error { +func (d *Device) open() error { // Close any existing device. - if a.dev != nil { - a.l.Log(logger.Debug, pkg+"closing device", "title", a.title) - a.dev.Close() - a.dev = nil + if d.dev != nil { + d.l.Log(logger.Debug, pkg+"closing device", "title", d.title) + d.dev.Close() + d.dev = nil } // Open sound card and open recording device. - a.l.Log(logger.Debug, pkg+"opening sound card") + d.l.Log(logger.Debug, pkg+"opening sound card") cards, err := alsa.OpenCards() if err != nil { - a.l.Log(logger.Debug, pkg+"failed to open sound card") + d.l.Log(logger.Debug, pkg+"failed to open sound card") return err } defer alsa.CloseCards(cards) - a.l.Log(logger.Debug, pkg+"finding audio device") + d.l.Log(logger.Debug, pkg+"finding audio device") for _, card := range cards { devices, err := card.Devices() if err != nil { @@ -191,27 +191,27 @@ func (a *Device) open() error { if dev.Type != alsa.PCM || !dev.Record { continue } - if dev.Title == a.title || a.title == "" { - a.dev = dev + if dev.Title == d.title || d.title == "" { + d.dev = dev break } } } - if a.dev == nil { - a.l.Log(logger.Debug, pkg+"failed to find audio device") + if d.dev == nil { + d.l.Log(logger.Debug, pkg+"failed to find audio device") return errors.New("no audio device found") } - a.l.Log(logger.Debug, pkg+"opening audio device", "title", a.dev.Title) - err = a.dev.Open() + d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title) + err = d.dev.Open() if err != nil { - a.l.Log(logger.Debug, pkg+"failed to open audio device") + d.l.Log(logger.Debug, pkg+"failed to open audio device") return err } // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). - _, err = a.dev.NegotiateChannels(2) + _, err = d.dev.NegotiateChannels(2) if err != nil { return err } @@ -223,111 +223,111 @@ func (a *Device) open() error { // a fix for this is to remove 8000 and 16000 from the Rates slice. foundRate := false for i := 0; i < len(Rates) && !foundRate; i++ { - if Rates[i] < a.SampleRate { + if Rates[i] < d.SampleRate { continue } - if Rates[i]%a.SampleRate == 0 { - _, err = a.dev.NegotiateRate(Rates[i]) + if Rates[i]%d.SampleRate == 0 { + _, err = d.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true - a.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i]) + d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i]) } } } // If no easily divisible rate is found, then use the default rate. if !foundRate { - a.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) - _, err = a.dev.NegotiateRate(defaultSampleRate) + d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) + _, err = d.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } - a.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate) + d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate) } var aFmt alsa.FormatType - switch a.BitDepth { + switch d.BitDepth { case 16: aFmt = alsa.S16_LE case 32: aFmt = alsa.S32_LE default: - return fmt.Errorf("unsupported sample bits %v", a.BitDepth) + return fmt.Errorf("unsupported sample bits %v", d.BitDepth) } - _, err = a.dev.NegotiateFormat(aFmt) + _, err = d.dev.NegotiateFormat(aFmt) if err != nil { return err } // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. - _, err = a.dev.NegotiateBufferSize(8192, 16384) + _, err = d.dev.NegotiateBufferSize(8192, 16384) if err != nil { return err } - if err = a.dev.Prepare(); err != nil { + if err = d.dev.Prepare(); err != nil { return err } - a.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params") + d.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params") return nil } // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if ASLA returns an error. -func (a *Device) input() { +func (d *Device) input() { for { // Check mode. - a.mu.Lock() - mode := a.mode - a.mu.Unlock() + d.mu.Lock() + mode := d.mode + d.mu.Unlock() switch mode { case paused: - time.Sleep(time.Duration(a.RecPeriod) * time.Second) + time.Sleep(time.Duration(d.RecPeriod) * time.Second) continue case stopped: - if a.dev != nil { - a.l.Log(logger.Debug, pkg+"closing audio device", "title", a.title) - a.dev.Close() - a.dev = nil + if d.dev != nil { + d.l.Log(logger.Debug, pkg+"closing audio device", "title", d.title) + d.dev.Close() + d.dev = nil } return } // Read from audio device. - a.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", a.RecPeriod) - err := a.dev.Read(a.ab.Data) + d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod) + err := d.dev.Read(d.ab.Data) if err != nil { - a.l.Log(logger.Debug, pkg+"read failed", "error", err.Error()) - err = a.open() // re-open + d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error()) + err = d.open() // re-open if err != nil { - a.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error()) + d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error()) return } continue } // Process audio. - a.l.Log(logger.Debug, "processing audio") - toWrite := a.formatBuffer() + d.l.Log(logger.Debug, "processing audio") + toWrite := d.formatBuffer() // Write audio to ringbuffer. - n, err := a.rb.Write(toWrite.Data) + n, err := d.rb.Write(toWrite.Data) switch err { case nil: - a.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n) + d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n) case ring.ErrDropped: - a.l.Log(logger.Warning, pkg+"old audio data overwritten") + d.l.Log(logger.Warning, pkg+"old audio data overwritten") default: - a.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error()) + d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error()) return } } } // Read reads from the ringbuffer, returning the number of bytes read upon success. -func (a *Device) Read(p []byte) (n int, err error) { +func (d *Device) Read(p []byte) (n int, err error) { // Ready ringbuffer for read. - _, err = a.rb.Next(rbNextTimeout) + _, err = d.rb.Next(rbNextTimeout) switch err { case nil: case ring.ErrTimeout: @@ -337,7 +337,7 @@ func (a *Device) Read(p []byte) (n int, err error) { } // Read from ring buffer. - n, err = a.rb.Read(p) + n, err = d.rb.Read(p) switch err { case nil: case io.EOF: @@ -350,46 +350,46 @@ func (a *Device) Read(p []byte) (n int, err error) { } // formatBuffer returns audio that has been converted to the desired format. -func (a *Device) formatBuffer() alsa.Buffer { +func (d *Device) formatBuffer() alsa.Buffer { var err error // If nothing needs to be changed, return the original. - if a.ab.Format.Channels == a.Channels && a.ab.Format.Rate == a.SampleRate { - return a.ab + if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate { + return d.ab } - formatted := alsa.Buffer{Format: a.ab.Format, Data: a.ab.Data} - if a.ab.Format.Channels != a.Channels { + formatted := alsa.Buffer{Format: d.ab.Format, Data: d.ab.Data} + if d.ab.Format.Channels != d.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. - if a.ab.Format.Channels == 2 && a.Channels == 1 { - formatted.Data, err = pcm.StereoToMono(a.ab) + if d.ab.Format.Channels == 2 && d.Channels == 1 { + formatted.Data, err = pcm.StereoToMono(d.ab) if err != nil { - a.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) + d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) } } } - if a.ab.Format.Rate != a.SampleRate { + if d.ab.Format.Rate != d.SampleRate { // Convert rate. - formatted.Data, err = pcm.Resample(formatted, a.SampleRate) + formatted.Data, err = pcm.Resample(formatted, d.SampleRate) if err != nil { - a.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) + d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) } } - switch a.Codec { + switch d.Codec { case codecutil.PCM: case codecutil.ADPCM: b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data)))) enc := adpcm.NewEncoder(b) _, err = enc.Write(formatted.Data) if err != nil { - a.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error()) + d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error()) } formatted.Data = b.Bytes() default: - a.l.Log(logger.Error, pkg+"unhandled audio codec") + d.l.Log(logger.Error, pkg+"unhandled audio codec") } return formatted From d23f40c85d913317961f462084f59007c6142620 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 6 Jun 2019 02:54:00 +0930 Subject: [PATCH 35/57] mts: updated reference to old pid vars --- container/mts/encoder_test.go | 2 +- container/mts/mpegts_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/container/mts/encoder_test.go b/container/mts/encoder_test.go index 24fb823d..4443e8e8 100644 --- a/container/mts/encoder_test.go +++ b/container/mts/encoder_test.go @@ -197,7 +197,7 @@ func TestEncodePcm(t *testing.T) { for i+PacketSize <= len(clip) { // Check MTS packet - if !(pkt.PID() == audioPid) { + if !(pkt.PID() == AudioPid) { i += PacketSize if i+PacketSize <= len(clip) { copy(pkt[:], clip[i:i+PacketSize]) diff --git a/container/mts/mpegts_test.go b/container/mts/mpegts_test.go index 4c90cc0e..511a9eb1 100644 --- a/container/mts/mpegts_test.go +++ b/container/mts/mpegts_test.go @@ -79,7 +79,7 @@ func TestGetPTSRange(t *testing.T) { curTime += interval } - got, err := GetPTSRange(clip.Bytes(), videoPid) + got, err := GetPTSRange(clip.Bytes(), VideoPid) if err != nil { t.Fatalf("did not expect error getting PTS range: %v", err) } @@ -139,7 +139,7 @@ func writeFrame(b *bytes.Buffer, frame []byte, pts uint64) error { for len(buf) != 0 { pkt := Packet{ PUSI: pusi, - PID: videoPid, + PID: VideoPid, RAI: pusi, CC: 0, AFC: hasAdaptationField | hasPayload, From 9fe09255bef40873d1769cef646099c4128c25a8 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 13 Jun 2019 23:35:52 +0930 Subject: [PATCH 36/57] audio and revid: changes for pr added license to lex.go changed pcm functions to return alsa.Buffers style, syntax and clarification added to audio.go new method of finding buffersize in audio.go uses a new function called nearestPowerOfTwo --- codec/codecutil/lex.go | 24 +++++++ codec/pcm/pcm.go | 71 ++++++++++++++------- codec/pcm/pcm_test.go | 4 +- container/mts/pes/pes.go | 2 +- input/audio/audio.go | 127 ++++++++++++++++++++++++++------------ input/audio/audio_test.go | 10 +++ 6 files changed, 173 insertions(+), 65 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 8e8c36f6..3423e1ea 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -1,3 +1,27 @@ +/* +NAME + lex.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + package codecutil import ( diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go index bb200d50..4882ffc3 100644 --- a/codec/pcm/pcm.go +++ b/codec/pcm/pcm.go @@ -35,20 +35,21 @@ import ( "github.com/yobert/alsa" ) -// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original b's data. +// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data. // Notes: // - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. // - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(b alsa.Buffer, rate int) ([]byte, error) { - fromRate := b.Format.Rate - if fromRate == rate { - return b.Data, nil - } else if fromRate < 0 { - return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) - } else if rate < 0 { - return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) +func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { + var newBuf alsa.Buffer + if b.Format.Rate == rate { + return newBuf, nil + } + if b.Format.Rate < 0 { + return newBuf, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate) + } + if rate < 0 { + return newBuf, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. @@ -59,22 +60,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) { case alsa.S16_LE: sampleLen = 2 * b.Format.Channels default: - return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) + return newBuf, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) } inPcmLen := len(b.Data) // Calculate sample rate ratio ratioFrom:ratioTo. - rateGcd := gcd(rate, fromRate) - ratioFrom := fromRate / rateGcd + rateGcd := gcd(rate, b.Format.Rate) + ratioFrom := b.Format.Rate / rateGcd ratioTo := rate / rateGcd // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) + return newBuf, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom - result := make([]byte, 0, newLen) + resampled := make([]byte, 0, newLen) // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them // up and average them. The result is the new sample. @@ -96,19 +97,31 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) { case alsa.S16_LE: binary.LittleEndian.PutUint16(bAvg, uint16(avg)) } - result = append(result, bAvg...) + resampled = append(resampled, bAvg...) } - return result, nil + + // Create new alsa.Buffer with resampled data. + newBuf = alsa.Buffer{ + Format: alsa.BufferFormat{ + Channels: b.Format.Channels, + SampleFormat: b.Format.SampleFormat, + Rate: rate, + }, + Data: resampled, + } + + return newBuf, nil } // StereoToMono returns raw mono audio data generated from only the left channel from // the given stereo recording (ALSA buffer) -// if an error occurs, an error will be returned along with the original stereo data. -func StereoToMono(b alsa.Buffer) ([]byte, error) { +func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { + var newBuf alsa.Buffer if b.Format.Channels == 1 { - return b.Data, nil - } else if b.Format.Channels != 2 { - return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) + return b, nil + } + if b.Format.Channels != 2 { + return newBuf, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) } var stereoSampleBytes int @@ -118,7 +131,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) { case alsa.S16_LE: stereoSampleBytes = 4 default: - return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) + return newBuf, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) } recLength := len(b.Data) @@ -134,7 +147,17 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) { } } - return mono, nil + // Create new alsa.Buffer with resampled data. + newBuf = alsa.Buffer{ + Format: alsa.BufferFormat{ + Channels: 1, + SampleFormat: b.Format.SampleFormat, + Rate: b.Format.Rate, + }, + Data: mono, + } + + return newBuf, nil } // gcd is used for calculating the greatest common divisor of two positive integers, a and b. diff --git a/codec/pcm/pcm_test.go b/codec/pcm/pcm_test.go index 713d01d8..1aa1b9d2 100644 --- a/codec/pcm/pcm_test.go +++ b/codec/pcm/pcm_test.go @@ -71,7 +71,7 @@ func TestResample(t *testing.T) { } // Compare result with expected. - if !bytes.Equal(resampled, exp) { + if !bytes.Equal(resampled.Data, exp) { t.Error("Resampled data does not match expected result.") } } @@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) { } // Compare result with expected. - if !bytes.Equal(mono, exp) { + if !bytes.Equal(mono.Data, exp) { t.Error("Converted data does not match expected result.") } } diff --git a/container/mts/pes/pes.go b/container/mts/pes/pes.go index 16382d84..5b5cb612 100644 --- a/container/mts/pes/pes.go +++ b/container/mts/pes/pes.go @@ -28,7 +28,7 @@ package pes import "github.com/Comcast/gots" -const MaxPesSize = 64 * 1 << 10 // 65536 +const MaxPesSize = 64 * 1 << 10 /* The below data struct encapsulates the fields of an PES packet. Below is diff --git a/input/audio/audio.go b/input/audio/audio.go index 00bc6ef9..25fad00b 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -30,7 +30,6 @@ import ( "bytes" "errors" "fmt" - "io" "sync" "time" @@ -57,7 +56,7 @@ const ( stopped ) -// Rates contains the audio sample rates used by audio. +// Rates contains the standard audio sample rates used by package audio. var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} // Device holds everything we need to know about the audio input stream. @@ -98,31 +97,41 @@ type Logger interface { // NewDevice initializes and returns an Device which can be started, read from, and stopped. func NewDevice(cfg *Config, l Logger) (*Device, error) { - d := &Device{} - d.Config = cfg - d.l = l + d := &Device{ + Config: cfg, + l: l, + } // Open the requested audio device. err := d.open() if err != nil { - d.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error()) - return nil, errors.New("failed to open audio device") + d.l.Log(logger.Error, pkg+"failed to open device") + return nil, err } - // Setup ring buffer to capture audio in periods of d.RecPeriod seconds and buffer rbDuration seconds in total. + // Setup the device to record with desired period. d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) - cs := (float64((len(d.ab.Data)/d.dev.BufferFormat().Channels)*d.Channels) / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) - if cs < 1 { - d.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error()) + + // Account for channel conversion. + chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels) + + // Account for resampling. + chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) + if chunkSize < 1 { return nil, errors.New("given Config parameters are too small") } + + // Account for codec conversion. if d.Codec == codecutil.ADPCM { - d.chunkSize = adpcm.EncBytes(int(cs)) + d.chunkSize = adpcm.EncBytes(int(chunkSize)) } else { - d.chunkSize = int(cs) + d.chunkSize = int(chunkSize) } + + // Create ring buffer with appropriate chunk size. d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout) + // Start device in paused mode. d.mode = paused go d.input() @@ -211,10 +220,11 @@ func (d *Device) open() error { // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). - _, err = d.dev.NegotiateChannels(2) + devChan, err := d.dev.NegotiateChannels(2) if err != nil { return err } + d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", devChan) // Try to negotiate a rate to record in that is divisible by the wanted rate // so that it can be easily downsampled to the wanted rate. @@ -222,15 +232,16 @@ func (d *Device) open() error { // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, // a fix for this is to remove 8000 and 16000 from the Rates slice. foundRate := false + var devRate int for i := 0; i < len(Rates) && !foundRate; i++ { if Rates[i] < d.SampleRate { continue } if Rates[i]%d.SampleRate == 0 { - _, err = d.dev.NegotiateRate(Rates[i]) + devRate, err = d.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true - d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i]) + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate) } } } @@ -238,11 +249,11 @@ func (d *Device) open() error { // If no easily divisible rate is found, then use the default rate. if !foundRate { d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) - _, err = d.dev.NegotiateRate(defaultSampleRate) + devRate, err = d.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } - d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate) + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate) } var aFmt alsa.FormatType @@ -254,21 +265,46 @@ func (d *Device) open() error { default: return fmt.Errorf("unsupported sample bits %v", d.BitDepth) } - _, err = d.dev.NegotiateFormat(aFmt) + devFmt, err := d.dev.NegotiateFormat(aFmt) if err != nil { return err } + var devBits int + switch devFmt { + case alsa.S16_LE: + devBits = 16 + case alsa.S32_LE: + devBits = 32 + default: + return fmt.Errorf("unsupported sample bits %v", d.BitDepth) + } + d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", devBits) - // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. - _, err = d.dev.NegotiateBufferSize(8192, 16384) + // A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed) + // Some devices only accept even period sizes while others want powers of 2. + // So we will find the closest power of 2 to the desired period size. + const wantPeriod = 0.05 //seconds + secondSize := devRate * devChan * (devBits / 8) + wantPeriodSize := int(float64(secondSize) * wantPeriod) + nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) + + devPeriodSize, err := d.dev.NegotiatePeriodSize(nearWantPeriodSize) if err != nil { return err } + d.l.Log(logger.Debug, pkg+"alsa device period size set", "periodsize", devPeriodSize) + + devBufferSize, err := d.dev.NegotiateBufferSize(devPeriodSize * 2) + if err != nil { + return err + } + d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", devBufferSize) if err = d.dev.Prepare(); err != nil { return err } - d.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params") + + d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params") return nil } @@ -307,7 +343,7 @@ func (d *Device) input() { } // Process audio. - d.l.Log(logger.Debug, "processing audio") + d.l.Log(logger.Debug, pkg+"processing audio") toWrite := d.formatBuffer() // Write audio to ringbuffer. @@ -328,24 +364,15 @@ func (d *Device) input() { func (d *Device) Read(p []byte) (n int, err error) { // Ready ringbuffer for read. _, err = d.rb.Next(rbNextTimeout) - switch err { - case nil: - case ring.ErrTimeout: - return 0, nil - default: + if err != nil { return 0, err } // Read from ring buffer. n, err = d.rb.Read(p) - switch err { - case nil: - case io.EOF: - return 0, nil - default: + if err != nil { return 0, err } - return n, nil } @@ -357,13 +384,12 @@ func (d *Device) formatBuffer() alsa.Buffer { if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate { return d.ab } - - formatted := alsa.Buffer{Format: d.ab.Format, Data: d.ab.Data} + var formatted alsa.Buffer if d.ab.Format.Channels != d.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. if d.ab.Format.Channels == 2 && d.Channels == 1 { - formatted.Data, err = pcm.StereoToMono(d.ab) + formatted, err = pcm.StereoToMono(d.ab) if err != nil { d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) } @@ -372,7 +398,7 @@ func (d *Device) formatBuffer() alsa.Buffer { if d.ab.Format.Rate != d.SampleRate { // Convert rate. - formatted.Data, err = pcm.Resample(formatted, d.SampleRate) + formatted, err = pcm.Resample(formatted, d.SampleRate) if err != nil { d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) } @@ -394,3 +420,28 @@ func (d *Device) formatBuffer() alsa.Buffer { return formatted } + +// nearestPowerOfTwo finds and returns the nearest power of two to the given integer. +// If the lower and higher power of two are the same distance, it returns the higher power. +// For negative values, 1 is returned. +func nearestPowerOfTwo(n int) int { + if n <= 0 { + return 1 + } + if n == 1 { + return 2 + } + v := n + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ // higher power of 2 + x := v >> 1 // lower power of 2 + if (v - n) > (n - x) { + return x + } + return v +} diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go index 02bf7e0a..fff51b07 100644 --- a/input/audio/audio_test.go +++ b/input/audio/audio_test.go @@ -144,3 +144,13 @@ func TestDevice(t *testing.T) { time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n)) ai.Stop() } + +func TestNearestPowerOfTwo(t *testing.T) { + testValues := []int{36, 47, 3, 46, 7, 2, 36, 757, 2464, 18980, 70000, 8192, 2048, 65536, -2048, -127, -1, 0, 1} + testAnswers := []int{32, 32, 4, 32, 8, 2, 32, 512, 2048, 16384, 65536, 8192, 2048, 65536, 1, 1, 1, 1, 2} + for i, v := range testValues { + if r := nearestPowerOfTwo(v); testAnswers[i] != r { + t.Errorf("test %v gave incorrect result: %v, should be %v", i, r, testAnswers[i]) + } + } +} From 2b4593fcc0c0e4c8b864e1725b125f18e4f13a1e Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 13 Jun 2019 23:42:26 +0930 Subject: [PATCH 37/57] audio: removed names from returns --- input/audio/audio.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/input/audio/audio.go b/input/audio/audio.go index 25fad00b..702ab35b 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -361,15 +361,15 @@ func (d *Device) input() { } // Read reads from the ringbuffer, returning the number of bytes read upon success. -func (d *Device) Read(p []byte) (n int, err error) { +func (d *Device) Read(p []byte) (int, error) { // Ready ringbuffer for read. - _, err = d.rb.Next(rbNextTimeout) + _, err := d.rb.Next(rbNextTimeout) if err != nil { return 0, err } // Read from ring buffer. - n, err = d.rb.Read(p) + n, err := d.rb.Read(p) if err != nil { return 0, err } From b0588cee1487e4499630b3d46ba8bb29d33435d6 Mon Sep 17 00:00:00 2001 From: Trek H Date: Fri, 14 Jun 2019 00:51:26 +0930 Subject: [PATCH 38/57] audio: fixed pcm exp cmds and removed buggy period negotiation --- exp/pcm/resample/resample.go | 4 ++-- exp/pcm/stereo-to-mono/stereo-to-mono.go | 4 ++-- input/audio/audio.go | 13 ++++--------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/exp/pcm/resample/resample.go b/exp/pcm/resample/resample.go index 3d595bb8..f7f5342e 100644 --- a/exp/pcm/resample/resample.go +++ b/exp/pcm/resample/resample.go @@ -81,9 +81,9 @@ func main() { } // Save resampled to file. - err = ioutil.WriteFile(outPath, resampled, 0644) + err = ioutil.WriteFile(outPath, resampled.Data, 0644) if err != nil { log.Fatal(err) } - fmt.Println("Encoded and wrote", len(resampled), "bytes to file", outPath) + fmt.Println("Encoded and wrote", len(resampled.Data), "bytes to file", outPath) } diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go index 7dbfd9a5..729caa96 100644 --- a/exp/pcm/stereo-to-mono/stereo-to-mono.go +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -77,9 +77,9 @@ func main() { } // Save mono to file. - err = ioutil.WriteFile(outPath, mono, 0644) + err = ioutil.WriteFile(outPath, mono.Data, 0644) if err != nil { log.Fatal(err) } - fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath) + fmt.Println("Encoded and wrote", len(mono.Data), "bytes to file", outPath) } diff --git a/input/audio/audio.go b/input/audio/audio.go index 702ab35b..0ca61e3f 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -284,17 +284,12 @@ func (d *Device) open() error { // Some devices only accept even period sizes while others want powers of 2. // So we will find the closest power of 2 to the desired period size. const wantPeriod = 0.05 //seconds - secondSize := devRate * devChan * (devBits / 8) - wantPeriodSize := int(float64(secondSize) * wantPeriod) + bytesPerSecond := devRate * devChan * (devBits / 8) + wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod) nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) - devPeriodSize, err := d.dev.NegotiatePeriodSize(nearWantPeriodSize) - if err != nil { - return err - } - d.l.Log(logger.Debug, pkg+"alsa device period size set", "periodsize", devPeriodSize) - - devBufferSize, err := d.dev.NegotiateBufferSize(devPeriodSize * 2) + // At least two period sizes should fit within the buffer. + devBufferSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2) if err != nil { return err } From 01561e363d5fce68c4205aa05acb36ba2e33e0b0 Mon Sep 17 00:00:00 2001 From: Trek H Date: Fri, 14 Jun 2019 19:49:49 +0930 Subject: [PATCH 39/57] codecutil and audio: tests written using test tables --- codec/codecutil/lex.go | 21 ++++++++---- codec/codecutil/lex_test.go | 64 +++++++++++++++++++++++++++++++++++++ input/audio/audio_test.go | 44 +++++++++++++++++++------ 3 files changed, 113 insertions(+), 16 deletions(-) create mode 100644 codec/codecutil/lex_test.go diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 3423e1ea..64d258f0 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -25,29 +25,36 @@ LICENSE package codecutil import ( + "fmt" "io" "time" ) // LexBytes reads n bytes from src and writes them to dst every t seconds. func LexBytes(dst io.Writer, src io.Reader, t time.Duration, n int) error { + if n <= 0 { + return fmt.Errorf("invalid buffer size: %v", n) + } + if t < 0 { + return fmt.Errorf("invalid delay: %v", t) + } var tick <-chan time.Time - if t == 0 { - tick = make(chan time.Time) - } else { + if t > 0 { ticker := time.NewTicker(t) defer ticker.Stop() tick = ticker.C } + buf := make([]byte, n) for { - <-tick - buf := make([]byte, n) - _, err := src.Read(buf) + if t != 0 { + <-tick + } + off, err := src.Read(buf) if err != nil { return err } - _, err = dst.Write(buf) + _, err = dst.Write(buf[:off]) if err != nil { return err } diff --git a/codec/codecutil/lex_test.go b/codec/codecutil/lex_test.go new file mode 100644 index 00000000..ae7f591e --- /dev/null +++ b/codec/codecutil/lex_test.go @@ -0,0 +1,64 @@ +/* +NAME + lex_test.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package codecutil + +import ( + "bytes" + "io" + "strconv" + "testing" + "time" +) + +var lexTests = []struct { + data []byte + t time.Duration + n int + fail bool +}{ + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Duration(0), 2, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Duration(0), 1, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Nanosecond, 0, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, -1, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, false}, +} + +func TestLexBytes(t *testing.T) { + for i, tt := range lexTests { + t.Run(strconv.Itoa(i), func(t *testing.T) { + dst := bytes.NewBuffer([]byte{}) + err := LexBytes(dst, bytes.NewReader(tt.data), tt.t, tt.n) + if err != nil && err != io.EOF { + if !tt.fail { + t.Errorf("unexpected error: %v", err.Error()) + } + } else if !bytes.Equal(dst.Bytes(), tt.data) { + t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes()) + } + }) + } +} diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go index fff51b07..dc4556b4 100644 --- a/input/audio/audio_test.go +++ b/input/audio/audio_test.go @@ -25,9 +25,10 @@ LICENSE package audio import ( - "bytes" "errors" + "io/ioutil" "os" + "strconv" "testing" "time" @@ -135,22 +136,47 @@ func TestDevice(t *testing.T) { if err != nil { t.Error(err) } - dst := bytes.NewBuffer(make([]byte, 0)) err = ai.Start() if err != nil { t.Error(err) } - go codecutil.LexBytes(dst, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) + go codecutil.LexBytes(ioutil.Discard, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n)) ai.Stop() } +var powerTests = []struct { + in int + out int +}{ + {36, 32}, + {47, 32}, + {3, 4}, + {46, 32}, + {7, 8}, + {2, 2}, + {36, 32}, + {757, 512}, + {2464, 2048}, + {18980, 16384}, + {70000, 65536}, + {8192, 8192}, + {2048, 2048}, + {65536, 65536}, + {-2048, 1}, + {-127, 1}, + {-1, 1}, + {0, 1}, + {1, 2}, +} + func TestNearestPowerOfTwo(t *testing.T) { - testValues := []int{36, 47, 3, 46, 7, 2, 36, 757, 2464, 18980, 70000, 8192, 2048, 65536, -2048, -127, -1, 0, 1} - testAnswers := []int{32, 32, 4, 32, 8, 2, 32, 512, 2048, 16384, 65536, 8192, 2048, 65536, 1, 1, 1, 1, 2} - for i, v := range testValues { - if r := nearestPowerOfTwo(v); testAnswers[i] != r { - t.Errorf("test %v gave incorrect result: %v, should be %v", i, r, testAnswers[i]) - } + for _, tt := range powerTests { + t.Run(strconv.Itoa(tt.in), func(t *testing.T) { + v := nearestPowerOfTwo(tt.in) + if v != tt.out { + t.Errorf("got %v, want %v", v, tt.out) + } + }) } } From fb12a2f69e540fc75218c4e4f31276f7d494847c Mon Sep 17 00:00:00 2001 From: Trek H Date: Mon, 17 Jun 2019 13:29:01 +0930 Subject: [PATCH 40/57] pcm, audio: style changes --- codec/pcm/pcm.go | 32 +++++++++++++------------------- input/audio/audio.go | 31 ++++++++++++------------------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go index 4882ffc3..8093401e 100644 --- a/codec/pcm/pcm.go +++ b/codec/pcm/pcm.go @@ -41,15 +41,14 @@ import ( // - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { - var newBuf alsa.Buffer if b.Format.Rate == rate { - return newBuf, nil + return b, nil } if b.Format.Rate < 0 { - return newBuf, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate) + return alsa.Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate) } if rate < 0 { - return newBuf, fmt.Errorf("Unable to convert to: %v Hz", rate) + return alsa.Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. @@ -60,7 +59,7 @@ func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { case alsa.S16_LE: sampleLen = 2 * b.Format.Channels default: - return newBuf, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) + return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) } inPcmLen := len(b.Data) @@ -71,7 +70,7 @@ func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return newBuf, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) + return alsa.Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom @@ -100,28 +99,25 @@ func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { resampled = append(resampled, bAvg...) } - // Create new alsa.Buffer with resampled data. - newBuf = alsa.Buffer{ + // Return a new alsa.Buffer with resampled data. + return alsa.Buffer{ Format: alsa.BufferFormat{ Channels: b.Format.Channels, SampleFormat: b.Format.SampleFormat, Rate: rate, }, Data: resampled, - } - - return newBuf, nil + }, nil } // StereoToMono returns raw mono audio data generated from only the left channel from // the given stereo recording (ALSA buffer) func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { - var newBuf alsa.Buffer if b.Format.Channels == 1 { return b, nil } if b.Format.Channels != 2 { - return newBuf, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) + return alsa.Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) } var stereoSampleBytes int @@ -131,7 +127,7 @@ func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { case alsa.S16_LE: stereoSampleBytes = 4 default: - return newBuf, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) + return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) } recLength := len(b.Data) @@ -147,17 +143,15 @@ func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { } } - // Create new alsa.Buffer with resampled data. - newBuf = alsa.Buffer{ + // Return a new alsa.Buffer with resampled data. + return alsa.Buffer{ Format: alsa.BufferFormat{ Channels: 1, SampleFormat: b.Format.SampleFormat, Rate: b.Format.Rate, }, Data: mono, - } - - return newBuf, nil + }, nil } // gcd is used for calculating the greatest common divisor of two positive integers, a and b. diff --git a/input/audio/audio.go b/input/audio/audio.go index 0ca61e3f..4cfbd695 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -43,13 +43,16 @@ import ( ) const ( - pkg = "pkg: " + pkg = "audio: " rbTimeout = 100 * time.Millisecond rbNextTimeout = 100 * time.Millisecond rbLen = 200 defaultSampleRate = 48000 ) +// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. +// "paused" means the input routine is sleeping until unpaused or stopped. +// "stopped" means the input routine is stopped and the ALSA device is closed. const ( running = iota paused @@ -59,24 +62,17 @@ const ( // Rates contains the standard audio sample rates used by package audio. var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} -// Device holds everything we need to know about the audio input stream. +// Device holds everything we need to know about the audio input stream and implements io.Reader. type Device struct { - l Logger - - // Operating mode, either running, paused, or stopped. - // "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. - // "paused" means the input routine is sleeping until unpaused or stopped. - // "stopped" means the input routine is stopped and the ALSA device is closed. - mode uint8 - - mu sync.Mutex + l Logger // Logger for device's routines to log to. + mode uint8 // Operating mode, either running, paused, or stopped. + mu sync.Mutex // Provides synchronisation when changing modes concurrently. title string // Name of audio title, or empty for the default title. - dev *alsa.Device // Audio input device. + dev *alsa.Device // ALSA's Audio input device. ab alsa.Buffer // ALSA's buffer. rb *ring.Buffer // Our buffer. - chunkSize int // This is the number of bytes that will be stored at a time. - - *Config + chunkSize int // This is the number of bytes that will be stored in rb at a time. + *Config // Configuration parameters for this device. } // Config provides parameters used by Device. @@ -155,7 +151,7 @@ func (d *Device) Start() error { case running: return nil default: - return errors.New("invalid mode") + return fmt.Errorf("invalid mode: %d", mode) } } @@ -185,7 +181,6 @@ func (d *Device) open() error { d.l.Log(logger.Debug, pkg+"opening sound card") cards, err := alsa.OpenCards() if err != nil { - d.l.Log(logger.Debug, pkg+"failed to open sound card") return err } defer alsa.CloseCards(cards) @@ -207,14 +202,12 @@ func (d *Device) open() error { } } if d.dev == nil { - d.l.Log(logger.Debug, pkg+"failed to find audio device") return errors.New("no audio device found") } d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title) err = d.dev.Open() if err != nil { - d.l.Log(logger.Debug, pkg+"failed to open audio device") return err } From bcd59b98d2fcf4d0cfbd38bbf81a3a8fc31f820a Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 18 Jun 2019 01:13:42 +0930 Subject: [PATCH 41/57] audio: added error type for opening devices for simpler testing also style changes and formatting etc. --- input/audio/audio.go | 64 +++++++++++++++------------- input/audio/audio_test.go | 89 +++------------------------------------ 2 files changed, 39 insertions(+), 114 deletions(-) diff --git a/input/audio/audio.go b/input/audio/audio.go index 4cfbd695..9ca858f0 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -59,9 +59,6 @@ const ( stopped ) -// Rates contains the standard audio sample rates used by package audio. -var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} - // Device holds everything we need to know about the audio input stream and implements io.Reader. type Device struct { l Logger // Logger for device's routines to log to. @@ -91,6 +88,9 @@ type Logger interface { Log(level int8, message string, params ...interface{}) } +// OpenError is used to determine whether an error has originated from attempting to open a device. +type OpenError error + // NewDevice initializes and returns an Device which can be started, read from, and stopped. func NewDevice(cfg *Config, l Logger) (*Device, error) { d := &Device{ @@ -135,6 +135,7 @@ func NewDevice(cfg *Config, l Logger) (*Device, error) { } // Start will start recording audio and writing to the ringbuffer. +// Once a Device has been stopped it cannot be started again. This is likely to change in future. func (d *Device) Start() error { d.mu.Lock() mode := d.mode @@ -156,6 +157,7 @@ func (d *Device) Start() error { } // Stop will stop recording audio and close the device. +// Once a Device has been stopped it cannot be started again. This is likely to change in future. func (d *Device) Stop() { d.mu.Lock() d.mode = stopped @@ -181,7 +183,7 @@ func (d *Device) open() error { d.l.Log(logger.Debug, pkg+"opening sound card") cards, err := alsa.OpenCards() if err != nil { - return err + return OpenError(err) } defer alsa.CloseCards(cards) @@ -202,39 +204,41 @@ func (d *Device) open() error { } } if d.dev == nil { - return errors.New("no audio device found") + return OpenError(errors.New("no audio device found")) } d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title) err = d.dev.Open() if err != nil { - return err + return OpenError(err) } // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). - devChan, err := d.dev.NegotiateChannels(2) + channels, err := d.dev.NegotiateChannels(2) if err != nil { - return err + return OpenError(err) } - d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", devChan) + d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels) // Try to negotiate a rate to record in that is divisible by the wanted rate // so that it can be easily downsampled to the wanted rate. + // rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz). // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, - // a fix for this is to remove 8000 and 16000 from the Rates slice. + // a fix for this is to remove 8000 and 16000 from the rates slice. + var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} foundRate := false - var devRate int - for i := 0; i < len(Rates) && !foundRate; i++ { - if Rates[i] < d.SampleRate { + var rate int + for i := 0; i < len(rates) && !foundRate; i++ { + if rates[i] < d.SampleRate { continue } - if Rates[i]%d.SampleRate == 0 { - devRate, err = d.dev.NegotiateRate(Rates[i]) + if rates[i]%d.SampleRate == 0 { + rate, err = d.dev.NegotiateRate(rates[i]) if err == nil { foundRate = true - d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate) + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) } } } @@ -242,11 +246,11 @@ func (d *Device) open() error { // If no easily divisible rate is found, then use the default rate. if !foundRate { d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) - devRate, err = d.dev.NegotiateRate(defaultSampleRate) + rate, err = d.dev.NegotiateRate(defaultSampleRate) if err != nil { - return err + return OpenError(err) } - d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate) + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) } var aFmt alsa.FormatType @@ -256,40 +260,40 @@ func (d *Device) open() error { case 32: aFmt = alsa.S32_LE default: - return fmt.Errorf("unsupported sample bits %v", d.BitDepth) + return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth)) } devFmt, err := d.dev.NegotiateFormat(aFmt) if err != nil { return err } - var devBits int + var bitdepth int switch devFmt { case alsa.S16_LE: - devBits = 16 + bitdepth = 16 case alsa.S32_LE: - devBits = 32 + bitdepth = 32 default: - return fmt.Errorf("unsupported sample bits %v", d.BitDepth) + return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth)) } - d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", devBits) + d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth) // A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed) // Some devices only accept even period sizes while others want powers of 2. // So we will find the closest power of 2 to the desired period size. const wantPeriod = 0.05 //seconds - bytesPerSecond := devRate * devChan * (devBits / 8) + bytesPerSecond := rate * channels * (bitdepth / 8) wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod) nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) // At least two period sizes should fit within the buffer. - devBufferSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2) + bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2) if err != nil { - return err + return OpenError(err) } - d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", devBufferSize) + d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize) if err = d.dev.Prepare(); err != nil { - return err + return OpenError(err) } d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params") diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go index dc4556b4..f26bb4c3 100644 --- a/input/audio/audio_test.go +++ b/input/audio/audio_test.go @@ -25,7 +25,6 @@ LICENSE package audio import ( - "errors" "io/ioutil" "os" "strconv" @@ -34,85 +33,8 @@ import ( "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/utils/logger" - "github.com/yobert/alsa" ) -// Check that a device exists with the given config parameters. -func checkDevice(ac *Config) error { - cards, err := alsa.OpenCards() - if err != nil { - return errors.New("no audio cards found") - } - defer alsa.CloseCards(cards) - var testDev *alsa.Device - for _, card := range cards { - devices, err := card.Devices() - if err != nil { - continue - } - for _, dev := range devices { - if dev.Type != alsa.PCM || !dev.Record { - continue - } - testDev = dev - break - } - } - if testDev == nil { - return errors.New("no suitable device found") - } - err = testDev.Open() - if err != nil { - return err - } - _, err = testDev.NegotiateChannels(2) - if err != nil { - return err - } - foundRate := false - for i := 0; i < len(Rates) && !foundRate; i++ { - if Rates[i] < ac.SampleRate { - continue - } - if Rates[i]%ac.SampleRate == 0 { - _, err = testDev.NegotiateRate(Rates[i]) - if err == nil { - foundRate = true - } - } - } - if !foundRate { - _, err = testDev.NegotiateRate(defaultSampleRate) - if err != nil { - return err - } - } - var aFmt alsa.FormatType - switch ac.BitDepth { - case 16: - aFmt = alsa.S16_LE - case 32: - aFmt = alsa.S32_LE - default: - return errors.New("unsupported bitdepth") - } - _, err = testDev.NegotiateFormat(aFmt) - if err != nil { - return err - } - _, err = testDev.NegotiateBufferSize(8192, 16384) - if err != nil { - return err - } - if err = testDev.Prepare(); err != nil { - return err - } - if testDev != nil { - testDev.Close() - } - return nil -} - func TestDevice(t *testing.T) { // We want to open a device with a standard configuration. ac := &Config{ @@ -124,15 +46,14 @@ func TestDevice(t *testing.T) { } n := 2 // Number of periods to wait while recording. - // Skip if there are no suitable devices to test with. - err := checkDevice(ac) - if err != nil { - t.Skip(err) - } - // Create a new audio Device, start, read/lex, and then stop it. l := logger.New(logger.Debug, os.Stderr) ai, err := NewDevice(ac, l) + // If there was an error opening the device, skip this test. + if _, ok := err.(OpenError); ok { + t.Skip(err) + } + // For any other error, report it. if err != nil { t.Error(err) } From b418944daa6c950c1e0e295de4fe95753cb514fa Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 18 Jun 2019 14:50:36 +0930 Subject: [PATCH 42/57] codecutil: added ByteLexer struct for configuring buffer size --- codec/codecutil/lex.go | 20 +++++++++++++++----- codec/codecutil/lex_test.go | 11 ++++++++--- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 64d258f0..2b0f1b05 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -30,11 +30,21 @@ import ( "time" ) -// LexBytes reads n bytes from src and writes them to dst every t seconds. -func LexBytes(dst io.Writer, src io.Reader, t time.Duration, n int) error { - if n <= 0 { - return fmt.Errorf("invalid buffer size: %v", n) +// ByteLexer is used to lex a certain number of bytes per a given delay, the number is configured upon construction. +type ByteLexer struct { + bufSize int +} + +// NewByteLexer returns a pointer to a ByteLexer with the given buffer size. +func NewByteLexer(bufSize int) (*ByteLexer, error) { + if bufSize <= 0 { + return nil, fmt.Errorf("invalid buffer size: %v", bufSize) } + return &ByteLexer{bufSize: bufSize}, nil +} + +// Lex reads l.bufSize bytes from src and writes them to dst every t seconds. +func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { if t < 0 { return fmt.Errorf("invalid delay: %v", t) } @@ -45,7 +55,7 @@ func LexBytes(dst io.Writer, src io.Reader, t time.Duration, n int) error { tick = ticker.C } - buf := make([]byte, n) + buf := make([]byte, l.bufSize) for { if t != 0 { <-tick diff --git a/codec/codecutil/lex_test.go b/codec/codecutil/lex_test.go index ae7f591e..9264f3d1 100644 --- a/codec/codecutil/lex_test.go +++ b/codec/codecutil/lex_test.go @@ -47,15 +47,20 @@ var lexTests = []struct { {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, false}, } -func TestLexBytes(t *testing.T) { +func TestByteLexer(t *testing.T) { for i, tt := range lexTests { t.Run(strconv.Itoa(i), func(t *testing.T) { dst := bytes.NewBuffer([]byte{}) - err := LexBytes(dst, bytes.NewReader(tt.data), tt.t, tt.n) - if err != nil && err != io.EOF { + l, err := NewByteLexer(tt.n) + if err != nil { if !tt.fail { t.Errorf("unexpected error: %v", err.Error()) } + return + } + err = l.Lex(dst, bytes.NewReader(tt.data), tt.t) + if err != nil && err != io.EOF { + t.Errorf("unexpected error: %v", err.Error()) } else if !bytes.Equal(dst.Bytes(), tt.data) { t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes()) } From 6dd70639fe81d7313860f4668ef5039c35c26e8d Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 18 Jun 2019 17:24:32 +0930 Subject: [PATCH 43/57] audio: removed bufSize arg in Lex funcs and use ByteLexer --- codec/codecutil/lex.go | 22 +++++++++++++--------- codec/codecutil/lex_test.go | 12 ++++-------- codec/h264/lex.go | 2 +- codec/h265/lex.go | 2 +- codec/h265/lex_test.go | 2 +- codec/mjpeg/lex.go | 2 +- input/audio/audio_test.go | 4 +++- revid/config.go | 1 + revid/revid.go | 19 ++++++++++--------- 9 files changed, 35 insertions(+), 31 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 2b0f1b05..e727e07d 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -30,21 +30,25 @@ import ( "time" ) -// ByteLexer is used to lex a certain number of bytes per a given delay, the number is configured upon construction. +// ByteLexer is used to lex bytes using a buffer size which is configured upon construction. type ByteLexer struct { - bufSize int + bufSize *int } // NewByteLexer returns a pointer to a ByteLexer with the given buffer size. -func NewByteLexer(bufSize int) (*ByteLexer, error) { - if bufSize <= 0 { - return nil, fmt.Errorf("invalid buffer size: %v", bufSize) - } - return &ByteLexer{bufSize: bufSize}, nil +func NewByteLexer(bufSize *int) *ByteLexer { + return &ByteLexer{bufSize: bufSize} } -// Lex reads l.bufSize bytes from src and writes them to dst every t seconds. +// Lex reads *l.bufSize bytes from src and writes them to dst every t seconds. func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { + if l.bufSize == nil { + return fmt.Errorf("buffer size has not been set") + } + bufSize := *l.bufSize + if bufSize <= 0 { + return fmt.Errorf("invalid buffer size: %v", bufSize) + } if t < 0 { return fmt.Errorf("invalid delay: %v", t) } @@ -55,7 +59,7 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { tick = ticker.C } - buf := make([]byte, l.bufSize) + buf := make([]byte, bufSize) for { if t != 0 { <-tick diff --git a/codec/codecutil/lex_test.go b/codec/codecutil/lex_test.go index 9264f3d1..63162abd 100644 --- a/codec/codecutil/lex_test.go +++ b/codec/codecutil/lex_test.go @@ -36,7 +36,7 @@ var lexTests = []struct { data []byte t time.Duration n int - fail bool + fail bool // Whether or not this test should fail. }{ {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, false}, {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, false}, @@ -51,16 +51,12 @@ func TestByteLexer(t *testing.T) { for i, tt := range lexTests { t.Run(strconv.Itoa(i), func(t *testing.T) { dst := bytes.NewBuffer([]byte{}) - l, err := NewByteLexer(tt.n) - if err != nil { + l := NewByteLexer(&tt.n) + err := l.Lex(dst, bytes.NewReader(tt.data), tt.t) + if err != nil && err != io.EOF { if !tt.fail { t.Errorf("unexpected error: %v", err.Error()) } - return - } - err = l.Lex(dst, bytes.NewReader(tt.data), tt.t) - if err != nil && err != io.EOF { - t.Errorf("unexpected error: %v", err.Error()) } else if !bytes.Equal(dst.Bytes(), tt.data) { t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes()) } diff --git a/codec/h264/lex.go b/codec/h264/lex.go index b210ee25..176c8b3b 100644 --- a/codec/h264/lex.go +++ b/codec/h264/lex.go @@ -49,7 +49,7 @@ var h264Prefix = [...]byte{0x00, 0x00, 0x01, 0x09, 0xf0} // to dst with successive writes being performed not earlier than the specified // delay. NAL units are split after type 1 (Coded slice of a non-IDR picture), 5 // (Coded slice of a IDR picture) and 8 (Picture parameter set). -func Lex(dst io.Writer, src io.Reader, delay time.Duration, n int) error { +func Lex(dst io.Writer, src io.Reader, delay time.Duration) error { var tick <-chan time.Time if delay == 0 { tick = noDelay diff --git a/codec/h265/lex.go b/codec/h265/lex.go index 7593fe5e..ebe34013 100644 --- a/codec/h265/lex.go +++ b/codec/h265/lex.go @@ -70,7 +70,7 @@ func NewLexer(donl bool) *Lexer { // Lex continually reads RTP packets from the io.Reader src and lexes into // access units which are written to the io.Writer dst. Lex expects that for // each read from src, a single RTP packet is received. -func (l *Lexer) Lex(dst io.Writer, src io.Reader, delay time.Duration, n int) error { +func (l *Lexer) Lex(dst io.Writer, src io.Reader, delay time.Duration) error { buf := make([]byte, maxRTPSize) for { n, err := src.Read(buf) diff --git a/codec/h265/lex_test.go b/codec/h265/lex_test.go index 02ed5f1f..1a409e4c 100644 --- a/codec/h265/lex_test.go +++ b/codec/h265/lex_test.go @@ -246,7 +246,7 @@ func TestLex(t *testing.T) { for testNum, test := range tests { r := &rtpReader{packets: test.packets} d := &destination{} - err := NewLexer(test.donl).Lex(d, r, 0, 0) + err := NewLexer(test.donl).Lex(d, r, 0) if err != nil { t.Fatalf("error lexing: %v\n", err) } diff --git a/codec/mjpeg/lex.go b/codec/mjpeg/lex.go index 21717fe6..da2ecae1 100644 --- a/codec/mjpeg/lex.go +++ b/codec/mjpeg/lex.go @@ -45,7 +45,7 @@ func init() { // Lex parses MJPEG frames read from src into separate writes to dst with // successive writes being performed not earlier than the specified delay. -func Lex(dst io.Writer, src io.Reader, delay time.Duration, n int) error { +func Lex(dst io.Writer, src io.Reader, delay time.Duration) error { var tick <-chan time.Time if delay == 0 { tick = noDelay diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go index f26bb4c3..5618c63a 100644 --- a/input/audio/audio_test.go +++ b/input/audio/audio_test.go @@ -61,7 +61,9 @@ func TestDevice(t *testing.T) { if err != nil { t.Error(err) } - go codecutil.LexBytes(ioutil.Discard, ai, time.Duration(ac.RecPeriod*float64(time.Second)), ai.ChunkSize()) + chunkSize := ai.ChunkSize() + lexer := codecutil.NewByteLexer(&chunkSize) + go lexer.Lex(ioutil.Discard, ai, time.Duration(ac.RecPeriod*float64(time.Second))) time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n)) ai.Stop() } diff --git a/revid/config.go b/revid/config.go index e34a1f1a..cf108db6 100644 --- a/revid/config.go +++ b/revid/config.go @@ -230,6 +230,7 @@ type Config struct { RecPeriod float64 // How many seconds to record at a time. Channels int // Number of audio channels, 1 for mono, 2 for stereo. BitDepth int // Sample bit depth. + ChunkSize int // ChunkSize is the size of the chunks in the audio.Device's ringbuffer. RTPAddress string // RTPAddress defines the RTP output destination. BurstPeriod uint // BurstPeriod defines the revid burst period in seconds. diff --git a/revid/revid.go b/revid/revid.go index 43075c08..f64f13fb 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -106,7 +106,7 @@ type Revid struct { cmd *exec.Cmd // lexTo, encoder and packer handle transcoding the input stream. - lexTo func(dest io.Writer, src io.Reader, delay time.Duration, bufSize int) error + lexTo func(dest io.Writer, src io.Reader, delay time.Duration) error // encoders will hold the multiWriteCloser that writes to encoders from the lexer. encoders io.WriteCloser @@ -294,7 +294,7 @@ func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64) (io. r.lexTo = h265.NewLexer(false).Lex case Audio: r.setupInput = r.startAudioDevice - r.lexTo = codecutil.LexBytes + r.lexTo = codecutil.NewByteLexer(&r.config.ChunkSize).Lex } return nil @@ -564,7 +564,7 @@ func (r *Revid) startRaspivid() (func() error, error) { } r.wg.Add(1) - go r.processFrom(stdout, 0, 0) + go r.processFrom(stdout, 0) return nil, nil } @@ -606,7 +606,7 @@ func (r *Revid) startV4L() (func() error, error) { } r.wg.Add(1) - go r.processFrom(stdout, time.Duration(0), 0) + go r.processFrom(stdout, time.Duration(0)) return nil, nil } @@ -621,7 +621,7 @@ func (r *Revid) setupInputForFile() (func() error, error) { // TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop. r.wg.Add(1) - go r.processFrom(f, 0, 0) + go r.processFrom(f, 0) return func() error { return f.Close() }, nil } @@ -660,8 +660,9 @@ func (r *Revid) startAudioDevice() (func() error, error) { } // Process output from audio device. + r.config.ChunkSize = ai.ChunkSize() r.wg.Add(1) - go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate), ai.ChunkSize()) + go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate)) return func() error { ai.Stop() return nil @@ -732,7 +733,7 @@ func (r *Revid) startRTSPCamera() (func() error, error) { // Start reading data from the RTP client. r.wg.Add(1) - go r.processFrom(rtpClt, time.Second/time.Duration(r.config.FrameRate), 0) + go r.processFrom(rtpClt, time.Second/time.Duration(r.config.FrameRate)) return func() error { rtspClt.Close() @@ -770,9 +771,9 @@ func parseSvrRTCPPort(resp rtsp.Response) (int, error) { return 0, errors.New("SETUP response did not provide RTCP port") } -func (r *Revid) processFrom(read io.Reader, delay time.Duration, bufSize int) { +func (r *Revid) processFrom(read io.Reader, delay time.Duration) { r.config.Logger.Log(logger.Info, pkg+"reading input data") - r.err <- r.lexTo(r.encoders, read, delay, bufSize) + r.err <- r.lexTo(r.encoders, read, delay) r.config.Logger.Log(logger.Info, pkg+"finished reading input data") r.wg.Done() } From ba67d6d43d177d36314aa141ada20e763fecdfb0 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 18 Jun 2019 17:47:37 +0930 Subject: [PATCH 44/57] audio: updated tests to remove bufSize arg --- protocol/rtmp/rtmp_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protocol/rtmp/rtmp_test.go b/protocol/rtmp/rtmp_test.go index cf5f505c..e1e79796 100644 --- a/protocol/rtmp/rtmp_test.go +++ b/protocol/rtmp/rtmp_test.go @@ -199,7 +199,7 @@ func TestFromFrame(t *testing.T) { if err != nil { t.Errorf("Failed to create flv encoder with error: %v", err) } - err = h264.Lex(flvEncoder, bytes.NewReader(videoData), time.Second/time.Duration(frameRate), 0) + err = h264.Lex(flvEncoder, bytes.NewReader(videoData), time.Second/time.Duration(frameRate)) if err != nil { t.Errorf("Lexing failed with error: %v", err) } @@ -251,7 +251,7 @@ func TestFromFile(t *testing.T) { if err != nil { t.Fatalf("failed to create encoder: %v", err) } - err = h264.Lex(flvEncoder, f, time.Second/time.Duration(25), 0) + err = h264.Lex(flvEncoder, f, time.Second/time.Duration(25)) if err != nil { t.Errorf("Lexing and encoding failed with error: %v", err) } From 2d5ba8cd8f38a5414660723b6dbb88556478d690 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 18 Jun 2019 18:35:45 +0930 Subject: [PATCH 45/57] audio: added validate functions for device config --- codec/codecutil/list.go | 14 ++++++++++++++ input/audio/audio.go | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/codec/codecutil/list.go b/codec/codecutil/list.go index cd5685f6..ecba3659 100644 --- a/codec/codecutil/list.go +++ b/codec/codecutil/list.go @@ -24,7 +24,13 @@ LICENSE package codecutil +import "fmt" + +// numCodecs is the number of entries in the list of codecs. +const numCodecs = 5 + // A global list containing all available codecs for reference in any application. +// When adding or removing a codec from this list, the numCodecs const must be updated. const ( PCM = iota ADPCM @@ -32,3 +38,11 @@ const ( H265 MJPEG ) + +// Validate recieves an int representing a codec and checks if it is valid. +func Validate(codec uint8) error { + if codec < 0 || codec >= numCodecs { + return fmt.Errorf("invalid codec") + } + return nil +} diff --git a/input/audio/audio.go b/input/audio/audio.go index 9ca858f0..5d97d7c8 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -93,6 +93,9 @@ type OpenError error // NewDevice initializes and returns an Device which can be started, read from, and stopped. func NewDevice(cfg *Config, l Logger) (*Device, error) { + + validate(cfg) + d := &Device{ Config: cfg, l: l, @@ -169,6 +172,27 @@ func (d *Device) ChunkSize() int { return d.chunkSize } +// validate checks if Config parameters are valid and returns an error if they are not. +func validate(c *Config) error { + if c.SampleRate <= 0 { + return fmt.Errorf("invalid sample rate: %v", c.SampleRate) + } + if c.Channels <= 0 { + return fmt.Errorf("invalid number of channels: %v", c.Channels) + } + if c.BitDepth <= 0 { + return fmt.Errorf("invalid bitdepth: %v", c.BitDepth) + } + if c.RecPeriod <= 0 { + return fmt.Errorf("invalid recording period: %v", c.RecPeriod) + } + err := codecutil.Validate(c.Codec) + if err != nil { + return err + } + return nil +} + // open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. func (d *Device) open() error { From 370aa19c23ff2fd0c11ad00e3adc6fa8cf2c205b Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 18 Jun 2019 19:03:38 +0930 Subject: [PATCH 46/57] audio: style and doc --- input/audio/audio.go | 19 +++++++++---------- revid/revid.go | 1 + revid/revid_test.go | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/input/audio/audio.go b/input/audio/audio.go index 5d97d7c8..745b71fe 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -252,17 +252,19 @@ func (d *Device) open() error { // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, // a fix for this is to remove 8000 and 16000 from the rates slice. var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} - foundRate := false + var rate int - for i := 0; i < len(rates) && !foundRate; i++ { - if rates[i] < d.SampleRate { + foundRate := false + for r := range rates { + if r < d.SampleRate { continue } - if rates[i]%d.SampleRate == 0 { - rate, err = d.dev.NegotiateRate(rates[i]) + if r%d.SampleRate == 0 { + rate, err = d.dev.NegotiateRate(r) if err == nil { foundRate = true d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) + break } } } @@ -385,11 +387,7 @@ func (d *Device) Read(p []byte) (int, error) { } // Read from ring buffer. - n, err := d.rb.Read(p) - if err != nil { - return 0, err - } - return n, nil + return d.rb.Read(p) } // formatBuffer returns audio that has been converted to the desired format. @@ -440,6 +438,7 @@ func (d *Device) formatBuffer() alsa.Buffer { // nearestPowerOfTwo finds and returns the nearest power of two to the given integer. // If the lower and higher power of two are the same distance, it returns the higher power. // For negative values, 1 is returned. +// Source: https://stackoverflow.com/a/45859570 func nearestPowerOfTwo(n int) int { if n <= 0 { return 1 diff --git a/revid/revid.go b/revid/revid.go index f64f13fb..410db7a2 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -626,6 +626,7 @@ func (r *Revid) setupInputForFile() (func() error, error) { } // startAudioDevice is used to start capturing audio from an audio device and processing it. +// It returns a function that can be used to stop the device and any errors that occur. func (r *Revid) startAudioDevice() (func() error, error) { // Create audio device. ac := &audio.Config{ diff --git a/revid/revid_test.go b/revid/revid_test.go index 8622dbd9..8ab2e62f 100644 --- a/revid/revid_test.go +++ b/revid/revid_test.go @@ -41,7 +41,7 @@ import ( const raspividPath = "/usr/local/bin/raspivid" // Suppress all test logging, except for t.Errorf output. -var silent bool = true +var silent = true // TestRaspivid tests that raspivid starts correctly. // It is intended to be run on a Raspberry Pi. From 95fc69b3c5ce761446d3dd001be1366bf910bd56 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 20 Jun 2019 18:04:58 +0930 Subject: [PATCH 47/57] audio: syntax and error checking --- codec/codecutil/list.go | 11 +++-------- container/mts/encoder_test.go | 2 +- input/audio/audio.go | 7 +++++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/codec/codecutil/list.go b/codec/codecutil/list.go index ecba3659..c270eb6b 100644 --- a/codec/codecutil/list.go +++ b/codec/codecutil/list.go @@ -24,8 +24,6 @@ LICENSE package codecutil -import "fmt" - // numCodecs is the number of entries in the list of codecs. const numCodecs = 5 @@ -39,10 +37,7 @@ const ( MJPEG ) -// Validate recieves an int representing a codec and checks if it is valid. -func Validate(codec uint8) error { - if codec < 0 || codec >= numCodecs { - return fmt.Errorf("invalid codec") - } - return nil +// IsValid recieves an int representing a codec and checks if it is valid. +func IsValid(codec uint8) bool { + return 0 <= codec && codec < numCodecs } diff --git a/container/mts/encoder_test.go b/container/mts/encoder_test.go index 4443e8e8..1a1d20d7 100644 --- a/container/mts/encoder_test.go +++ b/container/mts/encoder_test.go @@ -197,7 +197,7 @@ func TestEncodePcm(t *testing.T) { for i+PacketSize <= len(clip) { // Check MTS packet - if !(pkt.PID() == AudioPid) { + if pkt.PID() != AudioPid { i += PacketSize if i+PacketSize <= len(clip) { copy(pkt[:], clip[i:i+PacketSize]) diff --git a/input/audio/audio.go b/input/audio/audio.go index 745b71fe..0f03a20e 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -94,7 +94,10 @@ type OpenError error // NewDevice initializes and returns an Device which can be started, read from, and stopped. func NewDevice(cfg *Config, l Logger) (*Device, error) { - validate(cfg) + err := validate(cfg) + if err != nil { + return nil, err + } d := &Device{ Config: cfg, @@ -102,7 +105,7 @@ func NewDevice(cfg *Config, l Logger) (*Device, error) { } // Open the requested audio device. - err := d.open() + err = d.open() if err != nil { d.l.Log(logger.Error, pkg+"failed to open device") return nil, err From d48a11794b305d8c03931e92a86b28cd435fb729 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 20 Jun 2019 18:29:08 +0930 Subject: [PATCH 48/57] audio: codec config validation --- input/audio/audio.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/input/audio/audio.go b/input/audio/audio.go index 0f03a20e..9c80237b 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -189,9 +189,8 @@ func validate(c *Config) error { if c.RecPeriod <= 0 { return fmt.Errorf("invalid recording period: %v", c.RecPeriod) } - err := codecutil.Validate(c.Codec) - if err != nil { - return err + if !codecutil.IsValid(c.Codec) { + return errors.New("invalid codec") } return nil } From be389fca6e004133576b37198324292e08f48a9f Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 11 Jul 2019 14:54:06 +0930 Subject: [PATCH 49/57] syntax and style changes --- codec/codecutil/lex.go | 7 +++---- codec/codecutil/lex_test.go | 26 +++++++++++++------------- input/audio/audio.go | 2 +- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index e727e07d..3414ff45 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -52,17 +52,16 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { if t < 0 { return fmt.Errorf("invalid delay: %v", t) } - var tick <-chan time.Time + var ticker *time.Ticker if t > 0 { - ticker := time.NewTicker(t) + ticker = time.NewTicker(t) defer ticker.Stop() - tick = ticker.C } buf := make([]byte, bufSize) for { if t != 0 { - <-tick + <-ticker.C } off, err := src.Read(buf) if err != nil { diff --git a/codec/codecutil/lex_test.go b/codec/codecutil/lex_test.go index 63162abd..70fd3d39 100644 --- a/codec/codecutil/lex_test.go +++ b/codec/codecutil/lex_test.go @@ -33,18 +33,18 @@ import ( ) var lexTests = []struct { - data []byte - t time.Duration - n int - fail bool // Whether or not this test should fail. + data []byte + t time.Duration + n int + isValid bool // Whether or not this test should fail. }{ - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, false}, - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, false}, - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Duration(0), 2, false}, - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Duration(0), 1, false}, - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Nanosecond, 0, true}, - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, -1, true}, - {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 2, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 1, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Nanosecond, 0, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, -1, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, true}, } func TestByteLexer(t *testing.T) { @@ -54,8 +54,8 @@ func TestByteLexer(t *testing.T) { l := NewByteLexer(&tt.n) err := l.Lex(dst, bytes.NewReader(tt.data), tt.t) if err != nil && err != io.EOF { - if !tt.fail { - t.Errorf("unexpected error: %v", err.Error()) + if tt.isValid { + t.Errorf("unexpected error: %v", err) } } else if !bytes.Equal(dst.Bytes(), tt.data) { t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes()) diff --git a/input/audio/audio.go b/input/audio/audio.go index 9c80237b..54edb00c 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -54,7 +54,7 @@ const ( // "paused" means the input routine is sleeping until unpaused or stopped. // "stopped" means the input routine is stopped and the ALSA device is closed. const ( - running = iota + running = iota + 1 paused stopped ) From 72d56e31ee2717febb5ab694ffc6e25e5a1f3e9f Mon Sep 17 00:00:00 2001 From: Trek H Date: Mon, 15 Jul 2019 15:17:16 +0930 Subject: [PATCH 50/57] codecutil: updated logic for 0 delay lexing --- codec/codecutil/lex.go | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 3414ff45..3e1d862d 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -53,16 +53,19 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { return fmt.Errorf("invalid delay: %v", t) } var ticker *time.Ticker + var zeroTicks chan time.Time if t > 0 { ticker = time.NewTicker(t) - defer ticker.Stop() + } else { + zeroTicks = make(chan time.Time) + close(zeroTicks) + ticker = &time.Ticker{C: zeroTicks} } + defer ticker.Stop() buf := make([]byte, bufSize) for { - if t != 0 { - <-ticker.C - } + <-ticker.C off, err := src.Read(buf) if err != nil { return err @@ -73,3 +76,28 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { } } } + +func main() { + for _, delay := range []time.Duration{0, 10 * time.Millisecond} { + t := newTicker(delay) + for i := 0; i < 10; i++ { + <-t.C + fmt.Println(time.Now(), i) + } + fmt.Println() + } +} + +var zeroTicks chan time.Time + +func init() { + zeroTicks = make(chan time.Time) + close(zeroTicks) +} + +func newTicker(d time.Duration) *time.Ticker { + if d == 0 { + return &time.Ticker{C: zeroTicks} + } + return time.NewTicker(d) +} From eb4a3259812f57b6309ad77f31016a736864b934 Mon Sep 17 00:00:00 2001 From: Trek H Date: Mon, 15 Jul 2019 15:32:42 +0930 Subject: [PATCH 51/57] codecutil: removed unused code --- codec/codecutil/lex.go | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 3e1d862d..98953a93 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -52,6 +52,8 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { if t < 0 { return fmt.Errorf("invalid delay: %v", t) } + + // Set up delay, make loop instant if t is 0. var ticker *time.Ticker var zeroTicks chan time.Time if t > 0 { @@ -76,28 +78,3 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { } } } - -func main() { - for _, delay := range []time.Duration{0, 10 * time.Millisecond} { - t := newTicker(delay) - for i := 0; i < 10; i++ { - <-t.C - fmt.Println(time.Now(), i) - } - fmt.Println() - } -} - -var zeroTicks chan time.Time - -func init() { - zeroTicks = make(chan time.Time) - close(zeroTicks) -} - -func newTicker(d time.Duration) *time.Ticker { - if d == 0 { - return &time.Ticker{C: zeroTicks} - } - return time.NewTicker(d) -} From 8518d931c6a10876061d28e280a3ce14ad08f282 Mon Sep 17 00:00:00 2001 From: Trek H Date: Mon, 15 Jul 2019 17:55:42 +0930 Subject: [PATCH 52/57] codecutil: made zeroTicks global --- codec/codecutil/lex.go | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 98953a93..158839dd 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -40,8 +40,23 @@ func NewByteLexer(bufSize *int) *ByteLexer { return &ByteLexer{bufSize: bufSize} } -// Lex reads *l.bufSize bytes from src and writes them to dst every t seconds. -func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { +// zeroTicks can be used to create an instant ticker. +var zeroTicks chan time.Time + +func init() { + zeroTicks = make(chan time.Time) + close(zeroTicks) +} + +func newTicker(d time.Duration) *time.Ticker { + if d == 0 { + return &time.Ticker{C: zeroTicks} + } + return time.NewTicker(d) +} + +// Lex reads *l.bufSize bytes from src and writes them to dst every d seconds. +func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, d time.Duration) error { if l.bufSize == nil { return fmt.Errorf("buffer size has not been set") } @@ -49,22 +64,11 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, t time.Duration) error { if bufSize <= 0 { return fmt.Errorf("invalid buffer size: %v", bufSize) } - if t < 0 { - return fmt.Errorf("invalid delay: %v", t) + if d < 0 { + return fmt.Errorf("invalid delay: %v", d) } - // Set up delay, make loop instant if t is 0. - var ticker *time.Ticker - var zeroTicks chan time.Time - if t > 0 { - ticker = time.NewTicker(t) - } else { - zeroTicks = make(chan time.Time) - close(zeroTicks) - ticker = &time.Ticker{C: zeroTicks} - } - defer ticker.Stop() - + ticker := newTicker(d) buf := make([]byte, bufSize) for { <-ticker.C From a755ccfc587342372ec67ec9ebc8b70ddd1ddc1c Mon Sep 17 00:00:00 2001 From: Saxon Date: Sun, 21 Jul 2019 22:11:24 +0930 Subject: [PATCH 53/57] codec/h264/h264dec: separated VUI and HRD from SPS struct Took out all VUI and HRD parameters from the SPS struct and gave them their own structs - VUIParameters and HRDParameters, along with 'contructors' NewVUIParameters and NewHRDParameters to parse from a bits.BitReader and populate the fields of the struct. --- codec/h264/h264dec/cabac.go | 4 +- codec/h264/h264dec/pps.go | 5 +- codec/h264/h264dec/slice.go | 57 ++- codec/h264/h264dec/slice_test.go | 24 +- codec/h264/h264dec/sps.go | 807 ++++++++++++++++--------------- 5 files changed, 451 insertions(+), 446 deletions(-) diff --git a/codec/h264/h264dec/cabac.go b/codec/h264/h264dec/cabac.go index 47d90835..8b3a6e4c 100644 --- a/codec/h264/h264dec/cabac.go +++ b/codec/h264/h264dec/cabac.go @@ -35,14 +35,14 @@ func YOffset(yRefMin16, refMbH int) int { } func MbWidthC(sps *SPS) int { mbWidthC := 16 / SubWidthC(sps) - if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane { + if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag { mbWidthC = 0 } return mbWidthC } func MbHeightC(sps *SPS) int { mbHeightC := 16 / SubHeightC(sps) - if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane { + if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag { mbHeightC = 0 } return mbHeightC diff --git a/codec/h264/h264dec/pps.go b/codec/h264/h264dec/pps.go index 139306c6..1f618c2e 100644 --- a/codec/h264/h264dec/pps.go +++ b/codec/h264/h264dec/pps.go @@ -195,7 +195,7 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { if pps.PicScalingMatrixPresent { v := 6 - if sps.ChromaFormat != chroma444 { + if sps.ChromaFormatIDC != chroma444 { v = 2 } for i := 0; i < 6+(v*pps.Transform8x8Mode); i++ { @@ -231,9 +231,6 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { // rbspTrailingBits() } - if showPacket { - debugPacket("PPS", pps) - } return &pps, nil } diff --git a/codec/h264/h264dec/slice.go b/codec/h264/h264dec/slice.go index b25b4502..8192d465 100644 --- a/codec/h264/h264dec/slice.go +++ b/codec/h264/h264dec/slice.go @@ -145,13 +145,13 @@ func (d SliceData) ae(v int) int { // 8.2.2 func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { mbaffFrameFlag := 0 - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbaffFrameFlag = 1 } mapUnitToSliceGroupMap := MapUnitToSliceGroupMap(sps, pps, header) mbToSliceGroupMap := []int{} for i := 0; i <= PicSizeInMbs(sps, header)-1; i++ { - if sps.FrameMbsOnly || header.FieldPic { + if sps.FrameMBSOnlyFlag || header.FieldPic { mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i]) continue } @@ -159,7 +159,7 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i/2]) continue } - if !sps.FrameMbsOnly && !sps.MBAdaptiveFrameField && !header.FieldPic { + if !sps.FrameMBSOnlyFlag && !sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbToSliceGroupMap = append( mbToSliceGroupMap, mapUnitToSliceGroupMap[(i/(2*PicWidthInMbs(sps)))*PicWidthInMbs(sps)+(i%PicWidthInMbs(sps))]) @@ -169,7 +169,7 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { } func PicWidthInMbs(sps *SPS) int { - return sps.PicWidthInMbsMinus1 + 1 + return sps.PicWidthInMBSMinus1 + 1 } func PicHeightInMapUnits(sps *SPS) int { return sps.PicHeightInMapUnitsMinus1 + 1 @@ -178,7 +178,7 @@ func PicSizeInMapUnits(sps *SPS) int { return PicWidthInMbs(sps) * PicHeightInMapUnits(sps) } func FrameHeightInMbs(sps *SPS) int { - return (2 - flagVal(sps.FrameMbsOnly)) * PicHeightInMapUnits(sps) + return (2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps) } func PicHeightInMbs(sps *SPS, header *SliceHeader) int { return FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic)) @@ -190,13 +190,13 @@ func PicSizeInMbs(sps *SPS, header *SliceHeader) int { // table 6-1 func SubWidthC(sps *SPS) int { n := 17 - if sps.UseSeparateColorPlane { - if sps.ChromaFormat == chroma444 { + if sps.SeparateColorPlaneFlag { + if sps.ChromaFormatIDC == chroma444 { return n } } - switch sps.ChromaFormat { + switch sps.ChromaFormatIDC { case chromaMonochrome: return n case chroma420: @@ -211,12 +211,12 @@ func SubWidthC(sps *SPS) int { } func SubHeightC(sps *SPS) int { n := 17 - if sps.UseSeparateColorPlane { - if sps.ChromaFormat == chroma444 { + if sps.SeparateColorPlaneFlag { + if sps.ChromaFormatIDC == chroma444 { return n } } - switch sps.ChromaFormat { + switch sps.ChromaFormatIDC { case chromaMonochrome: return n case chroma420: @@ -578,17 +578,17 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { i := n + 1 // picSizeInMbs is the number of macroblocks in picture 0 // 7-13 - // PicWidthInMbs = sps.PicWidthInMbsMinus1 + 1 + // PicWidthInMbs = sps.PicWidthInMBSMinus1 + 1 // PicHeightInMapUnits = sps.PicHeightInMapUnitsMinus1 + 1 // 7-29 // picSizeInMbs = PicWidthInMbs * PicHeightInMbs // 7-26 // PicHeightInMbs = FrameHeightInMbs / (1 + header.fieldPicFlag) // 7-18 - // FrameHeightInMbs = (2 - ps.FrameMbsOnly) * PicHeightInMapUnits - picWidthInMbs := sps.PicWidthInMbsMinus1 + 1 + // FrameHeightInMbs = (2 - ps.FrameMBSOnlyFlag) * PicHeightInMapUnits + picWidthInMbs := sps.PicWidthInMBSMinus1 + 1 picHeightInMapUnits := sps.PicHeightInMapUnitsMinus1 + 1 - frameHeightInMbs := (2 - flagVal(sps.FrameMbsOnly)) * picHeightInMapUnits + frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * picHeightInMapUnits picHeightInMbs := frameHeightInMbs / (1 + flagVal(header.FieldPic)) picSizeInMbs := picWidthInMbs * picHeightInMbs mbToSliceGroupMap := MbToSliceGroupMap(sps, pps, header) @@ -600,7 +600,7 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { func CurrMbAddr(sps *SPS, header *SliceHeader) int { mbaffFrameFlag := 0 - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbaffFrameFlag = 1 } @@ -608,7 +608,7 @@ func CurrMbAddr(sps *SPS, header *SliceHeader) int { } func MbaffFrameFlag(sps *SPS, header *SliceHeader) int { - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { return 1 } return 0 @@ -630,7 +630,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e } } mbaffFrameFlag := 0 - if sliceContext.SPS.MBAdaptiveFrameField && !sliceContext.Slice.Header.FieldPic { + if sliceContext.SPS.MBAdaptiveFrameFieldFlag && !sliceContext.Slice.Header.FieldPic { mbaffFrameFlag = 1 } currMbAddr := sliceContext.Slice.Header.FirstMbInSlice * (1 * mbaffFrameFlag) @@ -791,7 +791,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e mbWidthC := 16 / SubWidthC(sliceContext.SPS) mbHeightC := 16 / SubHeightC(sliceContext.SPS) // if monochrome - if sliceContext.SPS.ChromaFormat == chromaMonochrome || sliceContext.SPS.UseSeparateColorPlane { + if sliceContext.SPS.ChromaFormatIDC == chromaMonochrome || sliceContext.SPS.SeparateColorPlaneFlag { mbWidthC = 0 mbHeightC = 0 } @@ -824,7 +824,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e if NumbSubMbPart(subMbType[mbPartIdx]) > 1 { noSubMbPartSizeLessThan8x8Flag = 0 } - } else if !sliceContext.SPS.Direct8x8Inference { + } else if !sliceContext.SPS.Direct8x8InferenceFlag { noSubMbPartSizeLessThan8x8Flag = 0 } } @@ -876,7 +876,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e } // sliceContext.Slice.Data.CodedBlockPattern = me(v) | ae(v) - if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8Inference) { + if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8InferenceFlag) { // TODO: 1 bit or ae(v) if sliceContext.PPS.EntropyCodingMode == 1 { binarization := NewBinarization("Transform8x8Flag", sliceContext.Slice.Data) @@ -956,10 +956,10 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh idrPic = true } header := SliceHeader{} - if sps.UseSeparateColorPlane { + if sps.SeparateColorPlaneFlag { header.ChromaArrayType = 0 } else { - header.ChromaArrayType = sps.ChromaFormat + header.ChromaArrayType = sps.ChromaFormatIDC } br := bits.NewBitReader(bytes.NewReader(rbsp)) @@ -980,7 +980,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh return nil, errors.Wrap(err, "could not parse PPSID") } - if sps.UseSeparateColorPlane { + if sps.SeparateColorPlaneFlag { b, err := br.ReadBits(2) if err != nil { return nil, errors.Wrap(err, "could not read ColorPlaneID") @@ -989,7 +989,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh } // TODO: See 7.4.3 // header.FrameNum = b.NextField("FrameNum", 0) - if !sps.FrameMbsOnly { + if !sps.FrameMBSOnlyFlag { b, err := br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read FieldPic") @@ -1023,7 +1023,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh } } } - if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZero { + if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZeroFlag { header.DeltaPicOrderCnt[0], err = readSe(br) if err != nil { return nil, errors.Wrap(err, "could not parse DeltaPicOrderCnt") @@ -1361,9 +1361,6 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh if err != nil { return nil, errors.Wrap(err, "could not create slice data") } - if showPacket { - debugPacket("debug: Header", sliceContext.Slice.Header) - debugPacket("debug: Data", sliceContext.Slice.Data) - } + return sliceContext, nil } diff --git a/codec/h264/h264dec/slice_test.go b/codec/h264/h264dec/slice_test.go index e7988a7e..d1db5df1 100644 --- a/codec/h264/h264dec/slice_test.go +++ b/codec/h264/h264dec/slice_test.go @@ -7,12 +7,12 @@ var subWidthCTests = []struct { want int }{ {SPS{}, 17}, - {SPS{ChromaFormat: 0}, 17}, - {SPS{ChromaFormat: 1}, 2}, - {SPS{ChromaFormat: 2}, 2}, - {SPS{ChromaFormat: 3}, 1}, - {SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17}, - {SPS{ChromaFormat: 999}, 17}, + {SPS{ChromaFormatIDC: 0}, 17}, + {SPS{ChromaFormatIDC: 1}, 2}, + {SPS{ChromaFormatIDC: 2}, 2}, + {SPS{ChromaFormatIDC: 3}, 1}, + {SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17}, + {SPS{ChromaFormatIDC: 999}, 17}, } // TestSubWidthC tests that the correct SubWidthC is returned given @@ -30,12 +30,12 @@ var subHeightCTests = []struct { want int }{ {SPS{}, 17}, - {SPS{ChromaFormat: 0}, 17}, - {SPS{ChromaFormat: 1}, 2}, - {SPS{ChromaFormat: 2}, 1}, - {SPS{ChromaFormat: 3}, 1}, - {SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17}, - {SPS{ChromaFormat: 999}, 17}, + {SPS{ChromaFormatIDC: 0}, 17}, + {SPS{ChromaFormatIDC: 1}, 2}, + {SPS{ChromaFormatIDC: 2}, 1}, + {SPS{ChromaFormatIDC: 3}, 1}, + {SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17}, + {SPS{ChromaFormatIDC: 999}, 17}, } // TestSubHeightC tests that the correct SubHeightC is returned given diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index c4aabd22..bd3e1184 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -2,112 +2,11 @@ package h264dec import ( "bytes" - "fmt" - "strings" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" "github.com/pkg/errors" ) -// Specification Page 43 7.3.2.1.1 -// Range is always inclusive -// XRange is always exclusive -type SPS struct { - // 8 bits - Profile int - // 6 bits - Constraint0, Constraint1 int - Constraint2, Constraint3 int - Constraint4, Constraint5 int - // 2 bit reserved 0 bits - // 8 bits - Level int - // Range 0 - 31 ; 6 bits - ID int - ChromaFormat int - UseSeparateColorPlane bool - BitDepthLumaMinus8 int - BitDepthChromaMinus8 int - QPrimeYZeroTransformBypass bool - SeqScalingMatrixPresent bool - // Delta is (0-12)-1 ; 4 bits - SeqScalingList []bool // se - // Range 0 - 12; 4 bits - Log2MaxFrameNumMinus4 int - // Range 0 - 2; 2 bits - PicOrderCountType int - // Range 0 - 12; 4 bits - Log2MaxPicOrderCntLSBMin4 int - DeltaPicOrderAlwaysZero bool - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForNonRefPic int // Value - 1 (se) - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForTopToBottomField int // Value - 1 (se) - // Range 0 - 255 ; 8 bits - NumRefFramesInPicOrderCntCycle int - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForRefFrameList []int // Value - 1 ([]se) - // Range 0 - MaxDpbFrames - MaxNumRefFrames int - GapsInFrameNumValueAllowed bool - // Page 77 - PicWidthInMbsMinus1 int - // Page 77 - PicHeightInMapUnitsMinus1 int - FrameMbsOnly bool - MBAdaptiveFrameField bool - Direct8x8Inference bool - FrameCropping bool - FrameCropLeftOffset int - FrameCropRightOffset int - FrameCropTopOffset int - FrameCropBottomOffset int - VuiParametersPresent bool - VuiParameters []int - AspectRatioInfoPresent bool - AspectRatio int - SarWidth int - SarHeight int - OverscanInfoPresent bool - OverscanAppropriate bool - VideoSignalTypePresent bool - VideoFormat int - VideoFullRange bool - ColorDescriptionPresent bool - ColorPrimaries int - TransferCharacteristics int - MatrixCoefficients int - ChromaLocInfoPresent bool - ChromaSampleLocTypeTopField int - ChromaSampleLocTypeBottomField int - CpbCntMinus1 int - BitRateScale int - CpbSizeScale int - BitRateValueMinus1 []int - Cbr []bool - InitialCpbRemovalDelayLengthMinus1 int - CpbRemovalDelayLengthMinus1 int - CpbSizeValueMinus1 []int - DpbOutputDelayLengthMinus1 int - TimeOffsetLength int - TimingInfoPresent bool - NumUnitsInTick int - TimeScale int - NalHrdParametersPresent bool - FixedFrameRate bool - VclHrdParametersPresent bool - LowHrdDelay bool - PicStructPresent bool - BitstreamRestriction bool - MotionVectorsOverPicBoundaries bool - MaxBytesPerPicDenom int - MaxBitsPerMbDenom int - Log2MaxMvLengthHorizontal int - Log2MaxMvLengthVertical int - MaxDecFrameBuffering int - MaxNumReorderFrames int -} - var ( DefaultScalingMatrix4x4 = [][]int{ {6, 13, 20, 28, 13, 20, 28, 32, 20, 28, 32, 37, 28, 32, 37, 42}, @@ -161,98 +60,62 @@ var ( ScalingList8x8 = ScalingList4x4 ) -func isInList(l []int, term int) bool { - for _, m := range l { - if m == term { - return true - } - } - return false -} -func debugPacket(name string, packet interface{}) { - logger.Printf("debug: %s packet\n", name) - for _, line := range strings.Split(fmt.Sprintf("%+v", packet), " ") { - logger.Printf("debug: \t%#v\n", line) - } -} -func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, defaultScalingMatrix []int) error { - lastScale := 8 - nextScale := 8 - for i := 0; i < sizeOfScalingList; i++ { - if nextScale != 0 { - deltaScale, err := readSe(br) - if err != nil { - return errors.Wrap(err, "could not parse deltaScale") - } - nextScale = (lastScale + deltaScale + 256) % 256 - if i == 0 && nextScale == 0 { - // Scaling list should use the default list for this point in the matrix - _ = defaultScalingMatrix - } - } - if nextScale == 0 { - scalingList[i] = lastScale - } else { - scalingList[i] = nextScale - } - lastScale = scalingList[i] - } - return nil +// SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in +// the Specifications. +type SPS struct { + Profile int + Constraint0 int + Constraint1 int + Constraint2 int + Constraint3 int + Constraint4 int + Constraint5 int + LevelIDC int + SPSID int + ChromaFormatIDC int + SeparateColorPlaneFlag bool + BitDepthLumaMinus8 int + BitDepthChromaMinus8 int + QPPrimeYZeroTransformBypassFlag bool + SeqScalingMatrixPresentFlag bool + SeqScalingListPresentFlag []bool + ScalingList4x4 [][]int + UseDefaultScalingMatrix4x4Flag []bool + ScalingList8x8 [][]int + UseDefaultScalingMatrix8x8Flag []bool + Log2MaxFrameNumMinus4 int + PicOrderCountType int + Log2MaxPicOrderCntLSBMin4 int + DeltaPicOrderAlwaysZeroFlag bool + OffsetForNonRefPic int + OffsetForTopToBottomField int + NumRefFramesInPicOrderCntCycle int + OffsetForRefFrameList []int + MaxNumRefFrames int + GapsInFrameNumValueAllowed bool + PicWidthInMBSMinus1 int + PicHeightInMapUnitsMinus1 int + FrameMBSOnlyFlag bool + MBAdaptiveFrameFieldFlag bool + Direct8x8InferenceFlag bool + FrameCroppingFlag bool + FrameCropLeftOffset int + FrameCropRightOffset int + FrameCropTopOffset int + FrameCropBottomOffset int + VUIParametersPresentFlag bool + VUIParameters *VUIParameters } + +// NewSPS parses a sequence parameter set raw byte sequence from br following +// the syntax structure specified in section 7.3.2.1.1, and returns as a new +// SPS. func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { logger.Printf("debug: SPS RBSP %d bytes %d bits\n", len(rbsp), len(rbsp)*8) logger.Printf("debug: \t%#v\n", rbsp[0:8]) sps := SPS{} br := bits.NewBitReader(bytes.NewReader(rbsp)) var err error - hrdParameters := func() error { - sps.CpbCntMinus1, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse CpbCntMinus1") - } - - err := readFields(br, []field{ - {&sps.BitRateScale, "BitRateScale", 4}, - {&sps.CpbSizeScale, "CpbSizeScale", 4}, - }) - if err != nil { - return err - } - - // SchedSelIdx E1.2 - for sseli := 0; sseli <= sps.CpbCntMinus1; sseli++ { - ue, err := readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse BitRateValueMinus1") - } - sps.BitRateValueMinus1 = append(sps.BitRateValueMinus1, ue) - - ue, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse CpbSizeValueMinus1") - } - sps.CpbSizeValueMinus1 = append(sps.CpbSizeValueMinus1, ue) - - if v, _ := br.ReadBits(1); v == 1 { - sps.Cbr = append(sps.Cbr, true) - } else { - sps.Cbr = append(sps.Cbr, false) - } - - err = readFields(br, - []field{ - {&sps.InitialCpbRemovalDelayLengthMinus1, "InitialCpbRemovalDelayLengthMinus1", 5}, - {&sps.CpbRemovalDelayLengthMinus1, "CpbRemovalDelayLengthMinus1", 5}, - {&sps.DpbOutputDelayLengthMinus1, "DpbOutputDelayLengthMinus1", 5}, - {&sps.TimeOffsetLength, "TimeOffsetLength", 5}, - }, - ) - if err != nil { - return err - } - } - return nil - } err = readFields(br, []field{ @@ -275,30 +138,30 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read Level") } - sps.Level = int(b) + sps.LevelIDC = int(b) // sps.ID = b.NextField("SPSID", 6) // proper - sps.ID, err = readUe(br) + sps.SPSID, err = readUe(br) if err != nil { return nil, errors.Wrap(err, "could not parse ID") } - sps.ChromaFormat, err = readUe(br) + sps.ChromaFormatIDC, err = readUe(br) if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaFormat") + return nil, errors.Wrap(err, "could not parse ChromaFormatIDC") } // This should be done only for certain ProfileIDC: isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135} // SpecialProfileCase1 if isInList(isProfileIDC, sps.Profile) { - if sps.ChromaFormat == chroma444 { + if sps.ChromaFormatIDC == chroma444 { // TODO: should probably deal with error here. b, err := br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read UseSeparateColorPlaneFlag") } - sps.UseSeparateColorPlane = b == 1 + sps.SeparateColorPlaneFlag = b == 1 } sps.BitDepthLumaMinus8, err = readUe(br) @@ -315,17 +178,17 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read QPrimeYZeroTransformBypass") } - sps.QPrimeYZeroTransformBypass = b == 1 + sps.QPPrimeYZeroTransformBypassFlag = b == 1 b, err = br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read SeqScalingMatrixPresent") } - sps.SeqScalingMatrixPresent = b == 1 + sps.SeqScalingMatrixPresentFlag = b == 1 - if sps.SeqScalingMatrixPresent { + if sps.SeqScalingMatrixPresentFlag { max := 12 - if sps.ChromaFormat != chroma444 { + if sps.ChromaFormatIDC != chroma444 { max = 8 } logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max) @@ -334,9 +197,9 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read SeqScalingList") } - sps.SeqScalingList = append(sps.SeqScalingList, b == 1) + sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, b == 1) - if sps.SeqScalingList[i] { + if sps.SeqScalingListPresentFlag[i] { if i < 6 { scalingList( br, @@ -380,7 +243,7 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read DeltaPicOrderAlwaysZero") } - sps.DeltaPicOrderAlwaysZero = b == 1 + sps.DeltaPicOrderAlwaysZeroFlag = b == 1 sps.OffsetForNonRefPic, err = readSe(br) if err != nil { @@ -420,7 +283,7 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { } sps.GapsInFrameNumValueAllowed = b == 1 - sps.PicWidthInMbsMinus1, err = readUe(br) + sps.PicWidthInMBSMinus1, err = readUe(br) if err != nil { return nil, errors.Wrap(err, "could not parse PicWidthInMbsMinus1") } @@ -434,25 +297,25 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read FrameMbsOnly") } - sps.FrameMbsOnly = b == 1 + sps.FrameMBSOnlyFlag = b == 1 - if !sps.FrameMbsOnly { + if !sps.FrameMBSOnlyFlag { b, err = br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read MBAdaptiveFrameField") } - sps.MBAdaptiveFrameField = b == 1 + sps.MBAdaptiveFrameFieldFlag = b == 1 } err = readFlags(br, []flag{ - {&sps.Direct8x8Inference, "Direct8x8Inference"}, - {&sps.FrameCropping, "FrameCropping"}, + {&sps.Direct8x8InferenceFlag, "Direct8x8Inference"}, + {&sps.FrameCroppingFlag, "FrameCropping"}, }) if err != nil { return nil, err } - if sps.FrameCropping { + if sps.FrameCroppingFlag { sps.FrameCropLeftOffset, err = readUe(br) if err != nil { return nil, errors.Wrap(err, "could not parse FrameCropLeftOffset") @@ -478,213 +341,361 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read VuiParametersPresent") } - sps.VuiParametersPresent = b == 1 + sps.VUIParametersPresentFlag = b == 1 - if sps.VuiParametersPresent { - // vui_parameters + if sps.VUIParametersPresentFlag { + + } // End VuiParameters Annex E.1.1 + + return &sps, nil +} + +// SPS describes a sequence parameter set as defined by section E.1.1 in the +// Specifications. +type VUIParameters struct { + AspectRatioInfoPresentFlag bool + AspectRatioIDC int + SARWidth int + SARHeight int + OverscanInfoPresentFlag bool + OverscanAppropriateFlag bool + VideoSignalTypePresentFlag bool + VideoFormat int + VideoFullRangeFlag bool + ColorDescriptionPresentFlag bool + ColorPrimaries int + TransferCharacteristics int + MatrixCoefficients int + ChromaLocInfoPresentFlag bool + ChromaSampleLocTypeTopField int + ChromaSampleLocTypeBottomField int + TimingInfoPresentFlag bool + NumUnitsInTick int + TimeScale int + FixedFrameRateFlag bool + NALHRDParametersPresentFlag bool + NALHRDParameters *HRDParameters + VCLHRDParametersPresentFlag bool + VCLHRDParameters *HRDParameters + LowDelayHRDFlag bool + PicStructPresentFlag bool + BitstreamRestrictionFlag bool + MotionVectorsOverPicBoundariesFlag bool + MaxBytesPerPicDenom int + MaxBitsPerMBDenom int + Log2MaxMVLengthHorizontal int + Log2MaxMVLengthVertical int + MaxNumReorderFrames int + MaxDecFrameBuffering int +} + +// NewVUIParameters parses video usability information parameters from br +// following the syntax structure specified in section E.1.1, and returns as a +// new VUIParameters. +func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { + p := &VUIParameters{} + + b, err := br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") + } + p.AspectRatioInfoPresentFlag = b == 1 + + if p.AspectRatioInfoPresentFlag { + b, err = br.ReadBits(8) + if err != nil { + return nil, errors.Wrap(err, "could not read AspectRatio") + } + p.AspectRatioIDC = int(b) + + EXTENDED_SAR := 999 + if p.AspectRatioIDC == EXTENDED_SAR { + b, err = br.ReadBits(16) + if err != nil { + return nil, errors.Wrap(err, "could not read SarWidth") + } + p.SARWidth = int(b) + + b, err = br.ReadBits(16) + if err != nil { + return nil, errors.Wrap(err, "could not read SarHeight") + } + p.SARHeight = int(b) + } + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read OverscanInfoPresent") + } + p.OverscanInfoPresentFlag = b == 1 + + if p.OverscanInfoPresentFlag { b, err = br.ReadBits(1) if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") + return nil, errors.Wrap(err, "could not read OverscanAppropriate") } - sps.AspectRatioInfoPresent = b == 1 + p.OverscanAppropriateFlag = b == 1 + } - if sps.AspectRatioInfoPresent { - b, err = br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatio") - } - sps.AspectRatio = int(b) + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") + } + p.VideoSignalTypePresentFlag = b == 1 - EXTENDED_SAR := 999 - if sps.AspectRatio == EXTENDED_SAR { - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarWidth") - } - sps.SarWidth = int(b) - - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarHeight") - } - sps.SarHeight = int(b) - } + if p.VideoSignalTypePresentFlag { + b, err = br.ReadBits(3) + if err != nil { + return nil, errors.Wrap(err, "could not read VideoFormat") } + p.VideoFormat = int(b) + } + + if p.VideoSignalTypePresentFlag { + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read VideoFullRange") + } + p.VideoFullRangeFlag = b == 1 b, err = br.ReadBits(1) if err != nil { - return nil, errors.Wrap(err, "could not read OverscanInfoPresent") + return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") } - sps.OverscanInfoPresent = b == 1 + p.ColorDescriptionPresentFlag = b == 1 - if sps.OverscanInfoPresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanAppropriate") - } - sps.OverscanAppropriate = b == 1 - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") - } - sps.VideoSignalTypePresent = b == 1 - - if sps.VideoSignalTypePresent { - b, err = br.ReadBits(3) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFormat") - } - sps.VideoFormat = int(b) - } - - if sps.VideoSignalTypePresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFullRange") - } - sps.VideoFullRange = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") - } - sps.ColorDescriptionPresent = b == 1 - - if sps.ColorDescriptionPresent { - err = readFields(br, - []field{ - {&sps.ColorPrimaries, "ColorPrimaries", 8}, - {&sps.TransferCharacteristics, "TransferCharacteristics", 8}, - {&sps.MatrixCoefficients, "MatrixCoefficients", 8}, - }, - ) - if err != nil { - return nil, err - } - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") - } - sps.ChromaLocInfoPresent = b == 1 - - if sps.ChromaLocInfoPresent { - sps.ChromaSampleLocTypeTopField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") - } - - sps.ChromaSampleLocTypeBottomField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read TimingInfoPresent") - } - sps.TimingInfoPresent = b == 1 - - if sps.TimingInfoPresent { - err := readFields(br, []field{ - {&sps.NumUnitsInTick, "NumUnitsInTick", 32}, - {&sps.TimeScale, "TimeScale", 32}, - }) + if p.ColorDescriptionPresentFlag { + err = readFields(br, + []field{ + {&p.ColorPrimaries, "ColorPrimaries", 8}, + {&p.TransferCharacteristics, "TransferCharacteristics", 8}, + {&p.MatrixCoefficients, "MatrixCoefficients", 8}, + }, + ) if err != nil { return nil, err } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FixedFrameRate") - } - sps.FixedFrameRate = b == 1 } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") - } - sps.NalHrdParametersPresent = b == 1 - - if sps.NalHrdParametersPresent { - err = hrdParameters() - if err != nil { - return nil, errors.Wrap(err, "could not get hrdParameters") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") - } - sps.VclHrdParametersPresent = b == 1 - - if sps.VclHrdParametersPresent { - err = hrdParameters() - if err != nil { - return nil, errors.Wrap(err, "could not get hrdParameters") - } - } - if sps.NalHrdParametersPresent || sps.VclHrdParametersPresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LowHrdDelay") - } - sps.LowHrdDelay = b == 1 - } - - err := readFlags(br, []flag{ - {&sps.PicStructPresent, "PicStructPresent"}, - {&sps.BitstreamRestriction, "BitStreamRestriction"}, - }) - - if sps.BitstreamRestriction { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") - } - sps.MotionVectorsOverPicBoundaries = b == 1 - - sps.MaxBytesPerPicDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") - } - - sps.MaxBitsPerMbDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") - } - - sps.Log2MaxMvLengthHorizontal, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") - } - - sps.Log2MaxMvLengthVertical, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") - } - - sps.MaxNumReorderFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") - } - - sps.MaxDecFrameBuffering, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") - } - } - - } // End VuiParameters Annex E.1.1 - if showPacket { - debugPacket("SPS", sps) } - return &sps, nil + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") + } + p.ChromaLocInfoPresentFlag = b == 1 + + if p.ChromaLocInfoPresentFlag { + p.ChromaSampleLocTypeTopField, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") + } + + p.ChromaSampleLocTypeBottomField, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") + } + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read TimingInfoPresent") + } + p.TimingInfoPresentFlag = b == 1 + + if p.TimingInfoPresentFlag { + err := readFields(br, []field{ + {&p.NumUnitsInTick, "NumUnitsInTick", 32}, + {&p.TimeScale, "TimeScale", 32}, + }) + if err != nil { + return nil, err + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read FixedFrameRate") + } + p.FixedFrameRateFlag = b == 1 + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") + } + p.NALHRDParametersPresentFlag = b == 1 + + if p.NALHRDParametersPresentFlag { + p.NALHRDParameters, err = NewHRDParameters(br) + if err != nil { + return nil, errors.Wrap(err, "could not get hrdParameters") + } + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") + } + p.VCLHRDParametersPresentFlag = b == 1 + + if p.VCLHRDParametersPresentFlag { + p.VCLHRDParameters, err = NewHRDParameters(br) + if err != nil { + return nil, errors.Wrap(err, "could not get hrdParameters") + } + } + if p.NALHRDParametersPresentFlag || p.VCLHRDParametersPresentFlag { + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read LowHrdDelay") + } + p.LowDelayHRDFlag = b == 1 + } + + err = readFlags(br, []flag{ + {&p.PicStructPresentFlag, "PicStructPresent"}, + {&p.BitstreamRestrictionFlag, "BitStreamRestriction"}, + }) + + if p.BitstreamRestrictionFlag { + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") + } + p.MotionVectorsOverPicBoundariesFlag = b == 1 + + p.MaxBytesPerPicDenom, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") + } + + p.MaxBitsPerMBDenom, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") + } + + p.Log2MaxMVLengthHorizontal, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") + } + + p.Log2MaxMVLengthVertical, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") + } + + p.MaxNumReorderFrames, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") + } + + p.MaxDecFrameBuffering, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") + } + } + return p, nil +} + +// HRDParameters describes hypothetical reference decoder parameters as defined +// by section E.1.2 in the specifications. +type HRDParameters struct { + CPBCntMinus1 int + BitRateScale int + CPBSizeScale int + BitRateValueMinus1 []int + CPBSizeValueMinus1 []int + CBRFlag []bool + InitialCPBRemovalDelayLenMinus1 int + CPBRemovalDelayLenMinus1 int + DPBOutputDelayLenMinus1 int + TimeOffsetLen int +} + +// NewHRDParameters parses hypothetical reference decoder parameter from br +// following the syntax structure specified in section E.1.2, and returns as a +// new HRDParameters. +func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { + h := &HRDParameters{} + var err error + h.CPBCntMinus1, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse CPBCntMinus1") + } + + err = readFields(br, []field{ + {&h.BitRateScale, "BitRateScale", 4}, + {&h.CPBSizeScale, "CPBSizeScale", 4}, + }) + if err != nil { + return nil, err + } + + // SchedSelIdx E1.2 + for sseli := 0; sseli <= h.CPBCntMinus1; sseli++ { + ue, err := readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse BitRateValueMinus1") + } + h.BitRateValueMinus1 = append(h.BitRateValueMinus1, ue) + + ue, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse CPBSizeValueMinus1") + } + h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, ue) + + if v, _ := br.ReadBits(1); v == 1 { + h.CBRFlag = append(h.CBRFlag, true) + } else { + h.CBRFlag = append(h.CBRFlag, false) + } + + err = readFields(br, + []field{ + {&h.InitialCPBRemovalDelayLenMinus1, "InitialCPBRemovalDelayLenMinus1", 5}, + {&h.CPBRemovalDelayLenMinus1, "CPBRemovalDelayLenMinus1", 5}, + {&h.DPBOutputDelayLenMinus1, "DpbOutputDelayLenMinus1", 5}, + {&h.TimeOffsetLen, "TimeOffsetLen", 5}, + }, + ) + if err != nil { + return nil, err + } + } + return h, nil +} + +func isInList(l []int, term int) bool { + for _, m := range l { + if m == term { + return true + } + } + return false +} + +func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, defaultScalingMatrix []int) error { + lastScale := 8 + nextScale := 8 + for i := 0; i < sizeOfScalingList; i++ { + if nextScale != 0 { + deltaScale, err := readSe(br) + if err != nil { + return errors.Wrap(err, "could not parse deltaScale") + } + nextScale = (lastScale + deltaScale + 256) % 256 + if i == 0 && nextScale == 0 { + // Scaling list should use the default list for this point in the matrix + _ = defaultScalingMatrix + } + } + if nextScale == 0 { + scalingList[i] = lastScale + } else { + scalingList[i] = nextScale + } + lastScale = scalingList[i] + } + return nil } From 1d6c501bb8c970eaf369796a79d87d7815a0b74d Mon Sep 17 00:00:00 2001 From: Saxon Date: Tue, 30 Jul 2019 10:16:08 +0930 Subject: [PATCH 54/57] codec/h264/h264dec: fixed field types in sps.go and corrected code after merge of master into branch --- codec/h264/h264dec/parse.go | 21 +- codec/h264/h264dec/pps.go | 161 ++--------- codec/h264/h264dec/slice.go | 187 +++--------- codec/h264/h264dec/sps.go | 552 ++++++++++-------------------------- 4 files changed, 230 insertions(+), 691 deletions(-) diff --git a/codec/h264/h264dec/parse.go b/codec/h264/h264dec/parse.go index 0763be27..b2981958 100644 --- a/codec/h264/h264dec/parse.go +++ b/codec/h264/h264dec/parse.go @@ -64,11 +64,11 @@ func (r fieldReader) readBits(n int) uint64 { // Exp-Golomb-coded element using method as specified in section 9.1 of ITU-T // H.264 and return as an int. The read does not happen if the fieldReader // has a non-nil error. -func (r fieldReader) readUe() int { +func (r fieldReader) readUe() uint64 { if r.e != nil { return 0 } - var i int + var i uint64 i, r.e = readUe(r.br) return i } @@ -77,11 +77,11 @@ func (r fieldReader) readUe() int { // Exp-Golomb-coded syntax element using method as specified in section 9.1 // and returns as an int. The read does not happen if the fieldReader // has a non-nil error. -func (r fieldReader) readTe(x uint) int { +func (r fieldReader) readTe(x uint) int64 { if r.e != nil { return 0 } - var i int + var i int64 i, r.e = readTe(r.br, x) return i } @@ -122,7 +122,7 @@ func (r fieldReader) err() error { // // TODO: this should return uint, but rest of code needs to be changed for this // to happen. -func readUe(r *bits.BitReader) (int, error) { +func readUe(r *bits.BitReader) (uint64, error) { nZeros := -1 var err error for b := uint64(0); b == 0; nZeros++ { @@ -135,7 +135,7 @@ func readUe(r *bits.BitReader) (int, error) { if err != nil { return 0, err } - return int(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil + return uint64(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil } // readTe parses a syntax element of te(v) descriptor i.e, truncated @@ -143,9 +143,10 @@ func readUe(r *bits.BitReader) (int, error) { // Rec. ITU-T H.264 (04/2017). // // TODO: this should also return uint. -func readTe(r *bits.BitReader, x uint) (int, error) { +func readTe(r *bits.BitReader, x uint) (int64, error) { if x > 1 { - return readUe(r) + ue, err := readUe(r) + return int64(ue), err } if x == 1 { @@ -181,7 +182,7 @@ func readSe(r *bits.BitReader) (int, error) { // in Rec. ITU-T H.264 (04/2017). func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, error) { // Indexes to codedBlockPattern map. - var i1, i2, i3 int + var i1, i2, i3 uint64 // ChromaArrayType selects first index. switch chromaArrayType { @@ -200,7 +201,7 @@ func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, } // Need to check that we won't go out of bounds with this index. - if i2 >= len(codedBlockPattern[i1]) { + if int(i2) >= len(codedBlockPattern[i1]) { return 0, errInvalidCodeNum } diff --git a/codec/h264/h264dec/pps.go b/codec/h264/h264dec/pps.go index 1f618c2e..2467a550 100644 --- a/codec/h264/h264dec/pps.go +++ b/codec/h264/h264dec/pps.go @@ -4,7 +4,6 @@ import ( "math" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" - "github.com/pkg/errors" ) // import "strings" @@ -47,151 +46,54 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { pps := PPS{} // TODO: give this io.Reader br := bits.NewBitReader(nil) + r := newFieldReader(br) - var err error - pps.ID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ID") - } - - pps.SPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SPS ID") - } - - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read EntropyCodingMode") - } - pps.EntropyCodingMode = int(b) - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read BottomFieldPicOrderInFramePresent") - } - pps.BottomFieldPicOrderInFramePresent = b == 1 - - pps.NumSliceGroupsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumSliceGroupsMinus1") - } + pps.ID = int(r.readUe()) + pps.SPSID = int(r.readUe()) + pps.EntropyCodingMode = int(r.readBits(1)) + pps.BottomFieldPicOrderInFramePresent = r.readBits(1) == 1 + pps.NumSliceGroupsMinus1 = int(r.readUe()) if pps.NumSliceGroupsMinus1 > 0 { - pps.SliceGroupMapType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceGroupMapType") - } + pps.SliceGroupMapType = int(r.readUe()) if pps.SliceGroupMapType == 0 { for iGroup := 0; iGroup <= pps.NumSliceGroupsMinus1; iGroup++ { - pps.RunLengthMinus1[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse RunLengthMinus1") - } + pps.RunLengthMinus1[iGroup] = int(r.readUe()) } } else if pps.SliceGroupMapType == 2 { for iGroup := 0; iGroup < pps.NumSliceGroupsMinus1; iGroup++ { - pps.TopLeft[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]") - } - if err != nil { - return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]") - } - - pps.BottomRight[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BottomRight[iGroup]") - } + pps.TopLeft[iGroup] = int(r.readUe()) + pps.BottomRight[iGroup] = int(r.readUe()) } } else if pps.SliceGroupMapType > 2 && pps.SliceGroupMapType < 6 { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SliceGroupChangeDirection") - } - pps.SliceGroupChangeDirection = b == 1 - - pps.SliceGroupChangeRateMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceGroupChangeRateMinus1") - } + pps.SliceGroupChangeDirection = r.readBits(1) == 1 + pps.SliceGroupChangeRateMinus1 = int(r.readUe()) } else if pps.SliceGroupMapType == 6 { - pps.PicSizeInMapUnitsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicSizeInMapUnitsMinus1") - } + pps.PicSizeInMapUnitsMinus1 = int(r.readUe()) for i := 0; i <= pps.PicSizeInMapUnitsMinus1; i++ { - b, err = br.ReadBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1))))) - if err != nil { - return nil, errors.Wrap(err, "coult not read SliceGroupId") - } - pps.SliceGroupId[i] = int(b) + pps.SliceGroupId[i] = int(r.readBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1)))))) } } } - pps.NumRefIdxL0DefaultActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.New("could not parse NumRefIdxL0DefaultActiveMinus1") - } - - pps.NumRefIdxL1DefaultActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.New("could not parse NumRefIdxL1DefaultActiveMinus1") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read WeightedPred") - } - pps.WeightedPred = b == 1 - - b, err = br.ReadBits(2) - if err != nil { - return nil, errors.Wrap(err, "could not read WeightedBipred") - } - pps.WeightedBipred = int(b) - - pps.PicInitQpMinus26, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse PicInitQpMinus26") - } - - pps.PicInitQsMinus26, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse PicInitQsMinus26") - } - - pps.ChromaQpIndexOffset, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse ChromaQpIndexOffset") - } - - err = readFlags(br, []flag{ - {&pps.DeblockingFilterControlPresent, "DeblockingFilterControlPresent"}, - {&pps.ConstrainedIntraPred, "ConstrainedIntraPred"}, - {&pps.RedundantPicCntPresent, "RedundantPicCntPresent"}, - }) - if err != nil { - return nil, err - } + pps.NumRefIdxL0DefaultActiveMinus1 = int(r.readUe()) + pps.NumRefIdxL1DefaultActiveMinus1 = int(r.readUe()) + pps.WeightedPred = r.readBits(1) == 1 + pps.WeightedBipred = int(r.readBits(2)) + pps.PicInitQpMinus26 = int(r.readSe()) + pps.PicInitQsMinus26 = int(r.readSe()) + pps.ChromaQpIndexOffset = int(r.readSe()) + pps.DeblockingFilterControlPresent = r.readBits(1) == 1 + pps.ConstrainedIntraPred = r.readBits(1) == 1 + pps.RedundantPicCntPresent = r.readBits(1) == 1 logger.Printf("debug: \tChecking for more PPS data") if moreRBSPData(br) { logger.Printf("debug: \tProcessing additional PPS data") - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read Transform8x8Mode") - } - pps.Transform8x8Mode = int(b) - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read PicScalingMatrixPresent") - } - pps.PicScalingMatrixPresent = b == 1 + pps.Transform8x8Mode = int(r.readBits(1)) + pps.PicScalingMatrixPresent = r.readBits(1) == 1 if pps.PicScalingMatrixPresent { v := 6 @@ -199,11 +101,7 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { v = 2 } for i := 0; i < 6+(v*pps.Transform8x8Mode); i++ { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read PicScalingListPresent") - } - pps.PicScalingListPresent[i] = b == 1 + pps.PicScalingListPresent[i] = r.readBits(1) == 1 if pps.PicScalingListPresent[i] { if i < 6 { scalingList( @@ -222,10 +120,7 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { } } } - pps.SecondChromaQpIndexOffset, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse SecondChromaQpIndexOffset") - } + pps.SecondChromaQpIndexOffset = r.readSe() } moreRBSPData(br) // rbspTrailingBits() diff --git a/codec/h264/h264dec/slice.go b/codec/h264/h264dec/slice.go index 0a20c781..4b620cad 100644 --- a/codec/h264/h264dec/slice.go +++ b/codec/h264/h264dec/slice.go @@ -169,22 +169,22 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { } func PicWidthInMbs(sps *SPS) int { - return sps.PicWidthInMBSMinus1 + 1 + return int(sps.PicWidthInMBSMinus1 + 1) } func PicHeightInMapUnits(sps *SPS) int { - return sps.PicHeightInMapUnitsMinus1 + 1 + return int(sps.PicHeightInMapUnitsMinus1 + 1) } func PicSizeInMapUnits(sps *SPS) int { - return PicWidthInMbs(sps) * PicHeightInMapUnits(sps) + return int(PicWidthInMbs(sps) * PicHeightInMapUnits(sps)) } func FrameHeightInMbs(sps *SPS) int { - return (2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps) + return int((2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps)) } func PicHeightInMbs(sps *SPS, header *SliceHeader) int { - return FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic)) + return int(FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic))) } func PicSizeInMbs(sps *SPS, header *SliceHeader) int { - return PicWidthInMbs(sps) * PicHeightInMbs(sps, header) + return int(PicWidthInMbs(sps) * PicHeightInMbs(sps, header)) } // table 6-1 @@ -262,6 +262,7 @@ func NumMbPart(nalUnit *NALUnit, sps *SPS, header *SliceHeader, data *SliceData) } func MbPred(sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { + r := newFieldReader(br) var cabac *CABAC sliceType := sliceTypeMap[sliceContext.Slice.Header.SliceType] mbPartPredMode, err := MbPartPredMode(sliceContext.Slice.Data, sliceType, sliceContext.Slice.Data.MbType, 0) @@ -372,11 +373,7 @@ func MbPred(sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { logger.Printf("TODO: ae for IntraChromaPredMode\n") } else { - var err error - sliceContext.Slice.Data.IntraChromaPredMode, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse IntraChromaPredMode") - } + sliceContext.Slice.Data.IntraChromaPredMode = int(r.readUe()) } } @@ -405,14 +402,10 @@ func MbPred(sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { // TODO: Only one reference picture is used for inter-prediction, // then the value should be 0 if MbaffFrameFlag(sliceContext.SPS, sliceContext.Slice.Header) == 0 || !sliceContext.Slice.Data.MbFieldDecodingFlag { - sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe( - br, - uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1)) + sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1))) } else { rangeMax := 2*sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1 + 1 - sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe( - br, - uint(rangeMax)) + sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(rangeMax))) } } } @@ -588,9 +581,9 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { // FrameHeightInMbs = (2 - ps.FrameMBSOnlyFlag) * PicHeightInMapUnits picWidthInMbs := sps.PicWidthInMBSMinus1 + 1 picHeightInMapUnits := sps.PicHeightInMapUnitsMinus1 + 1 - frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * picHeightInMapUnits + frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * int(picHeightInMapUnits) picHeightInMbs := frameHeightInMbs / (1 + flagVal(header.FieldPic)) - picSizeInMbs := picWidthInMbs * picHeightInMbs + picSizeInMbs := int(picWidthInMbs) * picHeightInMbs mbToSliceGroupMap := MbToSliceGroupMap(sps, pps, header) for i < picSizeInMbs && mbToSliceGroupMap[i] != mbToSliceGroupMap[i] { i++ @@ -615,8 +608,8 @@ func MbaffFrameFlag(sps *SPS, header *SliceHeader) int { } func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, error) { + r := newFieldReader(br) var cabac *CABAC - var err error sliceContext.Slice.Data = &SliceData{BitReader: br} // TODO: Why is this being initialized here? // initCabac(sliceContext) @@ -645,10 +638,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e if sliceContext.Slice.Data.SliceTypeName != "I" && sliceContext.Slice.Data.SliceTypeName != "SI" { logger.Printf("debug: \tNonI/SI slice, processing moreData\n") if sliceContext.PPS.EntropyCodingMode == 0 { - sliceContext.Slice.Data.MbSkipRun, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MbSkipRun") - } + sliceContext.Slice.Data.MbSkipRun = int(r.readUe()) if sliceContext.Slice.Data.MbSkipRun > 0 { prevMbSkipped = 1 @@ -762,10 +752,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e logger.Printf("TODO: ae for MBType\n") } else { - sliceContext.Slice.Data.MbType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MbType") - } + sliceContext.Slice.Data.MbType = int(r.readUe()) } if sliceContext.Slice.Data.MbTypeName == "I_PCM" { for !br.ByteAligned() { @@ -777,7 +764,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e // 7-3 p95 bitDepthY := 8 + sliceContext.SPS.BitDepthLumaMinus8 for i := 0; i < 256; i++ { - s, err := br.ReadBits(bitDepthY) + s, err := br.ReadBits(int(bitDepthY)) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleLuma[%d]", i)) } @@ -798,7 +785,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e bitDepthC := 8 + sliceContext.SPS.BitDepthChromaMinus8 for i := 0; i < 2*mbWidthC*mbHeightC; i++ { - s, err := br.ReadBits(bitDepthC) + s, err := br.ReadBits(int(bitDepthC)) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleChroma[%d]", i)) } @@ -959,27 +946,17 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if sps.SeparateColorPlaneFlag { header.ChromaArrayType = 0 } else { - header.ChromaArrayType = sps.ChromaFormatIDC + header.ChromaArrayType = int(sps.ChromaFormatIDC) } br := bits.NewBitReader(bytes.NewReader(rbsp)) + r := newFieldReader(br) - header.FirstMbInSlice, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FirstMbInSlice") - } - - header.SliceType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceType") - } + header.FirstMbInSlice = int(r.readUe()) + header.SliceType = int(r.readUe()) sliceType := sliceTypeMap[header.SliceType] logger.Printf("debug: %s (%s) slice of %d bytes\n", NALUnitType[int(nalUnit.Type)], sliceType, len(rbsp)) - header.PPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PPSID") - } - + header.PPSID = int(r.readUe()) if sps.SeparateColorPlaneFlag { b, err := br.ReadBits(2) if err != nil { @@ -1004,13 +981,10 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh } } if idrPic { - header.IDRPicID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse IDRPicID") - } + header.IDRPicID = int(r.readUe()) } if sps.PicOrderCountType == 0 { - b, err := br.ReadBits(sps.Log2MaxPicOrderCntLSBMin4 + 4) + b, err := br.ReadBits(int(sps.Log2MaxPicOrderCntLSBMin4 + 4)) if err != nil { return nil, errors.Wrap(err, "could not read PicOrderCntLsb") } @@ -1037,10 +1011,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh } } if pps.RedundantPicCntPresent { - header.RedundantPicCnt, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse RedundantPicCnt") - } + header.RedundantPicCnt = int(r.readUe()) } if sliceType == "B" { b, err := br.ReadBits(1) @@ -1057,15 +1028,9 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh header.NumRefIdxActiveOverride = b == 1 if header.NumRefIdxActiveOverride { - header.NumRefIdxL0ActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefIdxL0ActiveMinus1") - } + header.NumRefIdxL0ActiveMinus1 = int(r.readUe()) if sliceType == "B" { - header.NumRefIdxL1ActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefIdxL1ActiveMinus1") - } + header.NumRefIdxL1ActiveMinus1 = int(r.readUe()) } } } @@ -1085,21 +1050,12 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if header.RefPicListModificationFlagL0 { for header.ModificationOfPicNums != 3 { - header.ModificationOfPicNums, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ModificationOfPicNums") - } + header.ModificationOfPicNums = int(r.readUe()) if header.ModificationOfPicNums == 0 || header.ModificationOfPicNums == 1 { - header.AbsDiffPicNumMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse AbsDiffPicNumMinus1") - } + header.AbsDiffPicNumMinus1 = int(r.readUe()) } else if header.ModificationOfPicNums == 2 { - header.LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + header.LongTermPicNum = int(r.readUe()) } } } @@ -1114,21 +1070,12 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if header.RefPicListModificationFlagL1 { for header.ModificationOfPicNums != 3 { - header.ModificationOfPicNums, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ModificationOfPicNums") - } + header.ModificationOfPicNums = int(r.readUe()) if header.ModificationOfPicNums == 0 || header.ModificationOfPicNums == 1 { - header.AbsDiffPicNumMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse AbsDiffPicNumMinus1") - } + header.AbsDiffPicNumMinus1 = int(r.readUe()) } else if header.ModificationOfPicNums == 2 { - header.LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + header.LongTermPicNum = int(r.readUe()) } } } @@ -1138,23 +1085,13 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if (pps.WeightedPred && (sliceType == "P" || sliceType == "SP")) || (pps.WeightedBipred == 1 && sliceType == "B") { // predWeightTable() - header.LumaLog2WeightDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LumaLog2WeightDenom") - } + header.LumaLog2WeightDenom = int(r.readUe()) if header.ChromaArrayType != 0 { - header.ChromaLog2WeightDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaLog2WeightDenom") - } + header.ChromaLog2WeightDenom = int(r.readUe()) } for i := 0; i <= header.NumRefIdxL0ActiveMinus1; i++ { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LumaWeightL0Flag") - } - header.LumaWeightL0Flag = b == 1 + header.LumaWeightL0Flag = r.readBits(1) == 1 if header.LumaWeightL0Flag { se, err := readSe(br) @@ -1266,69 +1203,37 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh header.AdaptiveRefPicMarkingModeFlag = b == 1 if header.AdaptiveRefPicMarkingModeFlag { - header.MemoryManagementControlOperation, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation") - } + header.MemoryManagementControlOperation = int(r.readUe()) for header.MemoryManagementControlOperation != 0 { if header.MemoryManagementControlOperation == 1 || header.MemoryManagementControlOperation == 3 { - header.DifferenceOfPicNumsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation") - } + header.DifferenceOfPicNumsMinus1 = int(r.readUe()) } if header.MemoryManagementControlOperation == 2 { - header.LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + header.LongTermPicNum = int(r.readUe()) } if header.MemoryManagementControlOperation == 3 || header.MemoryManagementControlOperation == 6 { - header.LongTermFrameIdx, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermFrameIdx") - } + header.LongTermFrameIdx = int(r.readUe()) } if header.MemoryManagementControlOperation == 4 { - header.MaxLongTermFrameIdxPlus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxLongTermFrameIdxPlus1") - } + header.MaxLongTermFrameIdxPlus1 = int(r.readUe()) } } } } // end decRefPicMarking } if pps.EntropyCodingMode == 1 && sliceType != "I" && sliceType != "SI" { - header.CabacInit, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CabacInit") - } - } - header.SliceQpDelta, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceQpDelta") + header.CabacInit = int(r.readUe()) } + header.SliceQpDelta = int(r.readSe()) if sliceType == "SP" || sliceType == "SI" { if sliceType == "SP" { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SpForSwitch") - } - header.SpForSwitch = b == 1 - } - header.SliceQsDelta, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceQsDelta") + header.SpForSwitch = r.readBits(1) == 1 } + header.SliceQsDelta = int(r.readSe()) } if pps.DeblockingFilterControlPresent { - header.DisableDeblockingFilter, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse DisableDeblockingFilter") - } - + header.DisableDeblockingFilter = int(r.readUe()) if header.DisableDeblockingFilter != 1 { header.SliceAlphaC0OffsetDiv2, err = readSe(br) if err != nil { diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index bd3e1184..2c37746b 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -2,6 +2,7 @@ package h264dec import ( "bytes" + "fmt" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" "github.com/pkg/errors" @@ -63,46 +64,46 @@ var ( // SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in // the Specifications. type SPS struct { - Profile int - Constraint0 int - Constraint1 int - Constraint2 int - Constraint3 int - Constraint4 int - Constraint5 int - LevelIDC int - SPSID int - ChromaFormatIDC int + Profile uint8 + Constraint0 bool + Constraint1 bool + Constraint2 bool + Constraint3 bool + Constraint4 bool + Constraint5 bool + LevelIDC uint8 + SPSID uint64 + ChromaFormatIDC uint64 SeparateColorPlaneFlag bool - BitDepthLumaMinus8 int - BitDepthChromaMinus8 int + BitDepthLumaMinus8 uint64 + BitDepthChromaMinus8 uint64 QPPrimeYZeroTransformBypassFlag bool SeqScalingMatrixPresentFlag bool SeqScalingListPresentFlag []bool - ScalingList4x4 [][]int + ScalingList4x4 [][]uint64 UseDefaultScalingMatrix4x4Flag []bool - ScalingList8x8 [][]int + ScalingList8x8 [][]uint64 UseDefaultScalingMatrix8x8Flag []bool - Log2MaxFrameNumMinus4 int - PicOrderCountType int - Log2MaxPicOrderCntLSBMin4 int + Log2MaxFrameNumMinus4 uint64 + PicOrderCountType uint64 + Log2MaxPicOrderCntLSBMin4 uint64 DeltaPicOrderAlwaysZeroFlag bool - OffsetForNonRefPic int - OffsetForTopToBottomField int - NumRefFramesInPicOrderCntCycle int + OffsetForNonRefPic int64 + OffsetForTopToBottomField int64 + NumRefFramesInPicOrderCntCycle uint64 OffsetForRefFrameList []int - MaxNumRefFrames int + MaxNumRefFrames uint64 GapsInFrameNumValueAllowed bool - PicWidthInMBSMinus1 int - PicHeightInMapUnitsMinus1 int + PicWidthInMBSMinus1 uint64 + PicHeightInMapUnitsMinus1 uint64 FrameMBSOnlyFlag bool MBAdaptiveFrameFieldFlag bool Direct8x8InferenceFlag bool FrameCroppingFlag bool - FrameCropLeftOffset int - FrameCropRightOffset int - FrameCropTopOffset int - FrameCropBottomOffset int + FrameCropLeftOffset uint64 + FrameCropRightOffset uint64 + FrameCropTopOffset uint64 + FrameCropBottomOffset uint64 VUIParametersPresentFlag bool VUIParameters *VUIParameters } @@ -115,76 +116,33 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { logger.Printf("debug: \t%#v\n", rbsp[0:8]) sps := SPS{} br := bits.NewBitReader(bytes.NewReader(rbsp)) - var err error + r := newFieldReader(br) - err = readFields(br, - []field{ - {&sps.Profile, "ProfileIDC", 8}, - {&sps.Constraint0, "Constraint0", 1}, - {&sps.Constraint1, "Constraint1", 1}, - {&sps.Constraint2, "Constraint2", 1}, - {&sps.Constraint3, "Constraint3", 1}, - {&sps.Constraint4, "Constraint4", 1}, - {&sps.Constraint5, "Constraint5", 1}, - }, - ) - - _, err = br.ReadBits(2) - if err != nil { - return nil, errors.Wrap(err, "could not read ReservedZeroBits") - } - - b, err := br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read Level") - } - sps.LevelIDC = int(b) - - // sps.ID = b.NextField("SPSID", 6) // proper - sps.SPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ID") - } - - sps.ChromaFormatIDC, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaFormatIDC") - } + sps.Profile = uint8(r.readBits(8)) + sps.Constraint0 = r.readBits(1) == 1 + sps.Constraint1 = r.readBits(1) == 1 + sps.Constraint2 = r.readBits(1) == 1 + sps.Constraint3 = r.readBits(1) == 1 + sps.Constraint4 = r.readBits(1) == 1 + sps.Constraint5 = r.readBits(1) == 1 + r.readBits(2) // 2 reserved bits. + sps.LevelIDC = uint8(r.readBits(8)) + sps.SPSID = r.readUe() + sps.ChromaFormatIDC = r.readUe() // This should be done only for certain ProfileIDC: isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135} // SpecialProfileCase1 - if isInList(isProfileIDC, sps.Profile) { + if isInList(isProfileIDC, int(sps.Profile)) { if sps.ChromaFormatIDC == chroma444 { // TODO: should probably deal with error here. - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read UseSeparateColorPlaneFlag") - } - sps.SeparateColorPlaneFlag = b == 1 + sps.SeparateColorPlaneFlag = r.readBits(1) == 1 } - sps.BitDepthLumaMinus8, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitDepthLumaMinus8") - } - - sps.BitDepthChromaMinus8, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitDepthChromaMinus8") - } - - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read QPrimeYZeroTransformBypass") - } - sps.QPPrimeYZeroTransformBypassFlag = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SeqScalingMatrixPresent") - } - sps.SeqScalingMatrixPresentFlag = b == 1 + sps.BitDepthLumaMinus8 = r.readUe() + sps.BitDepthChromaMinus8 = r.readUe() + sps.QPPrimeYZeroTransformBypassFlag = r.readBits(1) == 1 + sps.SeqScalingMatrixPresentFlag = r.readBits(1) == 1 if sps.SeqScalingMatrixPresentFlag { max := 12 @@ -193,11 +151,7 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { } logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max) for i := 0; i < max; i++ { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SeqScalingList") - } - sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, b == 1) + sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, r.readBits(1) == 1) if sps.SeqScalingListPresentFlag[i] { if i < 6 { @@ -223,125 +177,44 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { // showSPS() // return sps // Possibly wrong due to no scaling list being built - sps.Log2MaxFrameNumMinus4, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxFrameNumMinus4") - } - - sps.PicOrderCountType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicOrderCountType") - } + sps.Log2MaxFrameNumMinus4 = r.readUe() + sps.PicOrderCountType = r.readUe() if sps.PicOrderCountType == 0 { - sps.Log2MaxPicOrderCntLSBMin4, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxPicOrderCntLSBMin4") - } + sps.Log2MaxPicOrderCntLSBMin4 = r.readUe() } else if sps.PicOrderCountType == 1 { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read DeltaPicOrderAlwaysZero") - } - sps.DeltaPicOrderAlwaysZeroFlag = b == 1 + sps.DeltaPicOrderAlwaysZeroFlag = r.readBits(1) == 1 + sps.OffsetForNonRefPic = int64(r.readSe()) + sps.OffsetForTopToBottomField = int64(r.readSe()) + sps.NumRefFramesInPicOrderCntCycle = r.readUe() - sps.OffsetForNonRefPic, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForNonRefPic") - } - - sps.OffsetForTopToBottomField, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForTopToBottomField") - } - - sps.NumRefFramesInPicOrderCntCycle, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefFramesInPicOrderCntCycle") - } - - for i := 0; i < sps.NumRefFramesInPicOrderCntCycle; i++ { - se, err := readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForRefFrameList") - } - sps.OffsetForRefFrameList = append( - sps.OffsetForRefFrameList, - se) + for i := 0; i < int(sps.NumRefFramesInPicOrderCntCycle); i++ { + sps.OffsetForRefFrameList = append(sps.OffsetForRefFrameList, r.readSe()) } } - sps.MaxNumRefFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumRefFrames") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read GapsInFrameNumValueAllowed") - } - sps.GapsInFrameNumValueAllowed = b == 1 - - sps.PicWidthInMBSMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicWidthInMbsMinus1") - } - - sps.PicHeightInMapUnitsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicHeightInMapUnitsMinus1") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FrameMbsOnly") - } - sps.FrameMBSOnlyFlag = b == 1 + sps.MaxNumRefFrames = r.readUe() + sps.GapsInFrameNumValueAllowed = r.readBits(1) == 1 + sps.PicWidthInMBSMinus1 = r.readUe() + sps.PicHeightInMapUnitsMinus1 = r.readUe() + sps.FrameMBSOnlyFlag = r.readBits(1) == 1 if !sps.FrameMBSOnlyFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MBAdaptiveFrameField") - } - sps.MBAdaptiveFrameFieldFlag = b == 1 + sps.MBAdaptiveFrameFieldFlag = r.readBits(1) == 1 } - err = readFlags(br, []flag{ - {&sps.Direct8x8InferenceFlag, "Direct8x8Inference"}, - {&sps.FrameCroppingFlag, "FrameCropping"}, - }) - if err != nil { - return nil, err - } + sps.Direct8x8InferenceFlag = r.readBits(1) == 1 + sps.FrameCroppingFlag = r.readBits(1) == 1 if sps.FrameCroppingFlag { - sps.FrameCropLeftOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropLeftOffset") - } - - sps.FrameCropRightOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropRightOffset") - } - - sps.FrameCropTopOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropTopOffset") - } - - sps.FrameCropBottomOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropBottomOffset") - } + sps.FrameCropLeftOffset = r.readUe() + sps.FrameCropRightOffset = r.readUe() + sps.FrameCropTopOffset = r.readUe() + sps.FrameCropBottomOffset = r.readUe() } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VuiParametersPresent") - } - sps.VUIParametersPresentFlag = b == 1 + sps.VUIParametersPresentFlag = r.readBits(1) == 1 if sps.VUIParametersPresentFlag { @@ -354,24 +227,24 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { // Specifications. type VUIParameters struct { AspectRatioInfoPresentFlag bool - AspectRatioIDC int - SARWidth int - SARHeight int + AspectRatioIDC uint8 + SARWidth uint32 + SARHeight uint32 OverscanInfoPresentFlag bool OverscanAppropriateFlag bool VideoSignalTypePresentFlag bool - VideoFormat int + VideoFormat uint8 VideoFullRangeFlag bool ColorDescriptionPresentFlag bool - ColorPrimaries int - TransferCharacteristics int - MatrixCoefficients int + ColorPrimaries uint8 + TransferCharacteristics uint8 + MatrixCoefficients uint8 ChromaLocInfoPresentFlag bool - ChromaSampleLocTypeTopField int - ChromaSampleLocTypeBottomField int + ChromaSampleLocTypeTopField uint64 + ChromaSampleLocTypeBottomField uint64 TimingInfoPresentFlag bool - NumUnitsInTick int - TimeScale int + NumUnitsInTick uint32 + TimeScale uint32 FixedFrameRateFlag bool NALHRDParametersPresentFlag bool NALHRDParameters *HRDParameters @@ -381,12 +254,12 @@ type VUIParameters struct { PicStructPresentFlag bool BitstreamRestrictionFlag bool MotionVectorsOverPicBoundariesFlag bool - MaxBytesPerPicDenom int - MaxBitsPerMBDenom int - Log2MaxMVLengthHorizontal int - Log2MaxMVLengthVertical int - MaxNumReorderFrames int - MaxDecFrameBuffering int + MaxBytesPerPicDenom uint64 + MaxBitsPerMBDenom uint64 + Log2MaxMVLengthHorizontal uint64 + Log2MaxMVLengthVertical uint64 + MaxNumReorderFrames uint64 + MaxDecFrameBuffering uint64 } // NewVUIParameters parses video usability information parameters from br @@ -394,137 +267,60 @@ type VUIParameters struct { // new VUIParameters. func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { p := &VUIParameters{} + r := newFieldReader(br) - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") - } - p.AspectRatioInfoPresentFlag = b == 1 + p.AspectRatioInfoPresentFlag = r.readBits(1) == 1 if p.AspectRatioInfoPresentFlag { - b, err = br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatio") - } - p.AspectRatioIDC = int(b) + p.AspectRatioIDC = uint8(r.readBits(8)) EXTENDED_SAR := 999 - if p.AspectRatioIDC == EXTENDED_SAR { - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarWidth") - } - p.SARWidth = int(b) - - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarHeight") - } - p.SARHeight = int(b) + if int(p.AspectRatioIDC) == EXTENDED_SAR { + p.SARWidth = uint32(r.readBits(16)) + p.SARHeight = uint32(r.readBits(16)) } } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanInfoPresent") - } - p.OverscanInfoPresentFlag = b == 1 + p.OverscanInfoPresentFlag = r.readBits(1) == 1 if p.OverscanInfoPresentFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanAppropriate") - } - p.OverscanAppropriateFlag = b == 1 + p.OverscanAppropriateFlag = r.readBits(1) == 1 } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") - } - p.VideoSignalTypePresentFlag = b == 1 + p.VideoSignalTypePresentFlag = r.readBits(1) == 1 if p.VideoSignalTypePresentFlag { - b, err = br.ReadBits(3) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFormat") - } - p.VideoFormat = int(b) + p.VideoFormat = uint8(r.readBits(3)) } if p.VideoSignalTypePresentFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFullRange") - } - p.VideoFullRangeFlag = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") - } - p.ColorDescriptionPresentFlag = b == 1 + p.VideoFullRangeFlag = r.readBits(1) == 1 + p.ColorDescriptionPresentFlag = r.readBits(1) == 1 if p.ColorDescriptionPresentFlag { - err = readFields(br, - []field{ - {&p.ColorPrimaries, "ColorPrimaries", 8}, - {&p.TransferCharacteristics, "TransferCharacteristics", 8}, - {&p.MatrixCoefficients, "MatrixCoefficients", 8}, - }, - ) - if err != nil { - return nil, err - } + p.ColorPrimaries = uint8(r.readBits(8)) + p.TransferCharacteristics = uint8(r.readBits(8)) + p.MatrixCoefficients = uint8(r.readBits(8)) } } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") - } - p.ChromaLocInfoPresentFlag = b == 1 + p.ChromaLocInfoPresentFlag = r.readBits(1) == 1 if p.ChromaLocInfoPresentFlag { - p.ChromaSampleLocTypeTopField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") - } - - p.ChromaSampleLocTypeBottomField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") - } + p.ChromaSampleLocTypeTopField = uint64(r.readUe()) + p.ChromaSampleLocTypeBottomField = uint64(r.readUe()) } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read TimingInfoPresent") - } - p.TimingInfoPresentFlag = b == 1 + p.TimingInfoPresentFlag = r.readBits(1) == 1 if p.TimingInfoPresentFlag { - err := readFields(br, []field{ - {&p.NumUnitsInTick, "NumUnitsInTick", 32}, - {&p.TimeScale, "TimeScale", 32}, - }) - if err != nil { - return nil, err - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FixedFrameRate") - } - p.FixedFrameRateFlag = b == 1 + p.NumUnitsInTick = uint32(r.readBits(32)) + p.TimeScale = uint32(r.readBits(32)) + p.FixedFrameRateFlag = r.readBits(1) == 1 } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") - } - p.NALHRDParametersPresentFlag = b == 1 + p.NALHRDParametersPresentFlag = r.readBits(1) == 1 + var err error if p.NALHRDParametersPresentFlag { p.NALHRDParameters, err = NewHRDParameters(br) if err != nil { @@ -532,11 +328,7 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { } } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") - } - p.VCLHRDParametersPresentFlag = b == 1 + p.VCLHRDParametersPresentFlag = r.readBits(1) == 1 if p.VCLHRDParametersPresentFlag { p.VCLHRDParameters, err = NewHRDParameters(br) @@ -545,54 +337,20 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { } } if p.NALHRDParametersPresentFlag || p.VCLHRDParametersPresentFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LowHrdDelay") - } - p.LowDelayHRDFlag = b == 1 + p.LowDelayHRDFlag = r.readBits(1) == 1 } - err = readFlags(br, []flag{ - {&p.PicStructPresentFlag, "PicStructPresent"}, - {&p.BitstreamRestrictionFlag, "BitStreamRestriction"}, - }) + p.PicStructPresentFlag = r.readBits(1) == 1 + p.BitstreamRestrictionFlag = r.readBits(1) == 1 if p.BitstreamRestrictionFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") - } - p.MotionVectorsOverPicBoundariesFlag = b == 1 - - p.MaxBytesPerPicDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") - } - - p.MaxBitsPerMBDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") - } - - p.Log2MaxMVLengthHorizontal, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") - } - - p.Log2MaxMVLengthVertical, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") - } - - p.MaxNumReorderFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") - } - - p.MaxDecFrameBuffering, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") - } + p.MotionVectorsOverPicBoundariesFlag = r.readBits(1) == 1 + p.MaxBytesPerPicDenom = r.readUe() + p.MaxBitsPerMBDenom = r.readUe() + p.Log2MaxMVLengthHorizontal = r.readUe() + p.Log2MaxMVLengthVertical = r.readUe() + p.MaxNumReorderFrames = r.readUe() + p.MaxDecFrameBuffering = r.readUe() } return p, nil } @@ -600,16 +358,16 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { // HRDParameters describes hypothetical reference decoder parameters as defined // by section E.1.2 in the specifications. type HRDParameters struct { - CPBCntMinus1 int - BitRateScale int - CPBSizeScale int - BitRateValueMinus1 []int - CPBSizeValueMinus1 []int + CPBCntMinus1 uint64 + BitRateScale uint8 + CPBSizeScale uint8 + BitRateValueMinus1 []uint64 + CPBSizeValueMinus1 []uint64 CBRFlag []bool - InitialCPBRemovalDelayLenMinus1 int - CPBRemovalDelayLenMinus1 int - DPBOutputDelayLenMinus1 int - TimeOffsetLen int + InitialCPBRemovalDelayLenMinus1 uint8 + CPBRemovalDelayLenMinus1 uint8 + DPBOutputDelayLenMinus1 uint8 + TimeOffsetLen uint8 } // NewHRDParameters parses hypothetical reference decoder parameter from br @@ -617,33 +375,16 @@ type HRDParameters struct { // new HRDParameters. func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { h := &HRDParameters{} - var err error - h.CPBCntMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CPBCntMinus1") - } + r := newFieldReader(br) - err = readFields(br, []field{ - {&h.BitRateScale, "BitRateScale", 4}, - {&h.CPBSizeScale, "CPBSizeScale", 4}, - }) - if err != nil { - return nil, err - } + h.CPBCntMinus1 = r.readUe() + h.BitRateScale = uint8(r.readBits(4)) + h.CPBSizeScale = uint8(r.readBits(4)) // SchedSelIdx E1.2 - for sseli := 0; sseli <= h.CPBCntMinus1; sseli++ { - ue, err := readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitRateValueMinus1") - } - h.BitRateValueMinus1 = append(h.BitRateValueMinus1, ue) - - ue, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CPBSizeValueMinus1") - } - h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, ue) + for sseli := 0; sseli <= int(h.CPBCntMinus1); sseli++ { + h.BitRateValueMinus1 = append(h.BitRateValueMinus1, r.readUe()) + h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, r.readUe()) if v, _ := br.ReadBits(1); v == 1 { h.CBRFlag = append(h.CBRFlag, true) @@ -651,17 +392,14 @@ func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { h.CBRFlag = append(h.CBRFlag, false) } - err = readFields(br, - []field{ - {&h.InitialCPBRemovalDelayLenMinus1, "InitialCPBRemovalDelayLenMinus1", 5}, - {&h.CPBRemovalDelayLenMinus1, "CPBRemovalDelayLenMinus1", 5}, - {&h.DPBOutputDelayLenMinus1, "DpbOutputDelayLenMinus1", 5}, - {&h.TimeOffsetLen, "TimeOffsetLen", 5}, - }, - ) - if err != nil { - return nil, err - } + h.InitialCPBRemovalDelayLenMinus1 = uint8(r.readBits(5)) + h.CPBRemovalDelayLenMinus1 = uint8(r.readBits(5)) + h.DPBOutputDelayLenMinus1 = uint8(r.readBits(5)) + h.TimeOffsetLen = uint8(r.readBits(5)) + } + + if r.err() != nil { + return nil, fmt.Errorf("error from fieldReader: %v", r.err()) } return h, nil } From 7a4ee894d4e2e9efe228196b5c06a810fcf67ba2 Mon Sep 17 00:00:00 2001 From: Saxon Date: Wed, 31 Jul 2019 20:25:38 +0930 Subject: [PATCH 55/57] codec/h264/h264dec/sps.go: commented fields of syntax structures --- codec/h264/h264dec/sps.go | 425 ++++++++++++++++++++++++++++++-------- 1 file changed, 342 insertions(+), 83 deletions(-) diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index 2c37746b..4da3fc69 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -63,49 +63,156 @@ var ( // SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in // the Specifications. +// For semantics see section 7.4.2.1. Comments for fields are excerpts from +// section 7.4.2.1. type SPS struct { - Profile uint8 - Constraint0 bool - Constraint1 bool - Constraint2 bool - Constraint3 bool - Constraint4 bool - Constraint5 bool - LevelIDC uint8 - SPSID uint64 - ChromaFormatIDC uint64 - SeparateColorPlaneFlag bool - BitDepthLumaMinus8 uint64 - BitDepthChromaMinus8 uint64 + // pofile_idx and level_idc indicate the profile and level to which the + // coded video sequence conforms. + Profile, LevelIDC uint8 + + // The constraint_setx_flag flags specify the constraints defined in A.2 for + // which this stream conforms. + Constraint0 bool + Constraint1 bool + Constraint2 bool + Constraint3 bool + Constraint4 bool + Constraint5 bool + + // seq_parameter_set_id identifies this sequence parameter set, and can then + // be reference by the picture parameter set. The seq_parameter_set_id is + // in the range of 0 to 30 inclusive. + SPSID uint64 + + // chroma_format_idc specifies the chroma sampling relative to the luma + // sampling as specified in caluse 6.2. Range of chroma_format_idc is in + // from 0 to 3 inclusive. + ChromaFormatIDC uint64 + + // separate_color_plane_flag if true specifies that the three components of + // the 4:4:4 chroma formta are coded separately. + SeparateColorPlaneFlag bool + + // bit_depth_luma_minus8 specifies the luma array sample bit depth and the + // luma quantisation parameter range offset QpBdOffset_y (eq 7-3 and 7-4). + BitDepthLumaMinus8 uint64 + + // bit_depth_luma_minus8 specifies the chroma array sample bit depth and the + // chroma quantisation parameter range offset QpBdOffset_c (eq 7-3 and 7-4). + BitDepthChromaMinus8 uint64 + + // qpprime_y_zero_transform_bypass_flag equal to 1 specifies that, when QP′ Y + // is equal to 0, a transform bypass operation for the transform coefficient + // decoding process and picture construction process prior to deblocking + // filter process as specified in clause 8.5 shall be applied. QPPrimeYZeroTransformBypassFlag bool - SeqScalingMatrixPresentFlag bool - SeqScalingListPresentFlag []bool - ScalingList4x4 [][]uint64 - UseDefaultScalingMatrix4x4Flag []bool - ScalingList8x8 [][]uint64 - UseDefaultScalingMatrix8x8Flag []bool - Log2MaxFrameNumMinus4 uint64 - PicOrderCountType uint64 - Log2MaxPicOrderCntLSBMin4 uint64 - DeltaPicOrderAlwaysZeroFlag bool - OffsetForNonRefPic int64 - OffsetForTopToBottomField int64 - NumRefFramesInPicOrderCntCycle uint64 - OffsetForRefFrameList []int - MaxNumRefFrames uint64 - GapsInFrameNumValueAllowed bool - PicWidthInMBSMinus1 uint64 - PicHeightInMapUnitsMinus1 uint64 - FrameMBSOnlyFlag bool - MBAdaptiveFrameFieldFlag bool - Direct8x8InferenceFlag bool - FrameCroppingFlag bool - FrameCropLeftOffset uint64 - FrameCropRightOffset uint64 - FrameCropTopOffset uint64 - FrameCropBottomOffset uint64 - VUIParametersPresentFlag bool - VUIParameters *VUIParameters + + // seq_scaling_matrix_present_flag equal to 1 specifies that + // seq_scaling_list_present_flag[ i ] are present. When 0 they are not present + // and the sequence-level scaling lists specified by Flat_4x4_16 and + // Flat_8x8_16 shall be inferred. + SeqScalingMatrixPresentFlag bool + + // seq_scaling_lit_present_flag[i] specifics whether the syntax structure for + // scaling list i is present. If 1 then present, otherwise not, and scaling + // list for i is inferred as per rule set A in table 7-2. + SeqScalingListPresentFlag []bool + + // The 4x4 sequence scaling lists for each i. + ScalingList4x4 [][]uint64 + + // Flag to indicate for a 4x4 scaling list, if we use the default. + UseDefaultScalingMatrix4x4Flag []bool + + // The 8x8 sequence scaling lists for each i. + ScalingList8x8 [][]uint64 + + // Flag to indicate for a 8x8 scaling list, if we use the default. + UseDefaultScalingMatrix8x8Flag []bool + + // log2_max_frame_num_minus4 allows for derivation of MaxFrameNum using eq 7-10. + Log2MaxFrameNumMinus4 uint64 + + // pic_order_cnt_type specifiess the method to decode picture order count. + PicOrderCountType uint64 + + // log2_max_pic_order_cnt_lsb_minus4 allows for the dreivation of + // MaxPicOrderCntLsb using eq 7-11. + Log2MaxPicOrderCntLSBMin4 uint64 + + // delta_pic_order_always_zero_flag if true indicates delta_pic_order_cnt[0] + // and delta_pic_order_cnt[1]. + DeltaPicOrderAlwaysZeroFlag bool + + // offset_for_non_ref_pic is used to calculate the picture order count of a + // non-reference picture as specified in clause 8.2.1. + OffsetForNonRefPic int64 + + // offset_for_top_to_bottom_field is used to calculate the picture order count + // of a bottom field as specified in clause 8.2.1. + OffsetForTopToBottomField int64 + + // num_ref_frames_in_pic_order_cnt_cycle is used in the decoding process for + // picture order count as specified in clause 8.2.1. + NumRefFramesInPicOrderCntCycle uint64 + + // offset_for_ref_frame[ i ] is an element of a list of + // num_ref_frames_in_pic_order_cnt_cycle values used in the decoding process + // for picture order count as specified in clause 8.2.1. + OffsetForRefFrameList []int + + // max_num_ref_frames specifies the max number of short-term and long-term + // reference frames, complementary reference field pairs, and non-paired + // reference fields that may be used by the decoding process for inter prediction. + MaxNumRefFrames uint64 + + // gaps_in_frame_num_value_allowed_flag specifies the allowed values of + // frame_num as specified in clause 7.4.3 and the decoding process in case of + // an inferred gap between values of frame_num as specified in clause 8.2.5.2. + GapsInFrameNumValueAllowed bool + + // pic_width_in_mbs_minus1 plus 1 specifies the width of each decode picutre + // in units of macroblocks. See eq 7-13. + PicWidthInMBSMinus1 uint64 + + // pic_height_in_map_units_minus1 plus 1 specifies the height in slice group + // map units of a decoded frame or field. See eq 7-16. + PicHeightInMapUnitsMinus1 uint64 + + // frame_mbs_only_flag if 0 coded pictures of the coded video sequence may be + // coded fields or coded frames. If 1 every coded picture of the coded video + // sequence is a coded frame containing only frame macroblocks. + FrameMBSOnlyFlag bool + + // mb_adaptive_frame_field_flag if 0 specifies no switching between + // frame and field macroblocks within a picture. If 1 specifies the possible + // use of switching between frame and field macroblocks within frames. + MBAdaptiveFrameFieldFlag bool + + // direct_8x8_inference_flag specifies the method used in the derivation + // process for luma motion vectors for B_Skip, B_Direct_16x16 and B_Direct_8x8 + // as specified in clause 8.4.1.2. + Direct8x8InferenceFlag bool + + // frame_cropping_flag if 1 then frame cropping offset parameters are next in + // the sequence parameter set. If 0 they are not. + FrameCroppingFlag bool + + // frame_crop_left_offset, frame_crop_right_offset, frame_crop_top_offset, + // frame_crop_bottom_offset specify the samples of the pictures in the coded + // video sequence that are output from the decoding process, in terms of a + // rectangular region specified in frame coordinates for output. + FrameCropLeftOffset uint64 + FrameCropRightOffset uint64 + FrameCropTopOffset uint64 + FrameCropBottomOffset uint64 + + // vui_parameters_present_flag if 1 the vui_parameters() syntax structure is + // present, otherwise it is not. + VUIParametersPresentFlag bool + + // The vui_parameters() syntax structure specified in appendix E. + VUIParameters *VUIParameters } // NewSPS parses a sequence parameter set raw byte sequence from br following @@ -225,41 +332,158 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { // SPS describes a sequence parameter set as defined by section E.1.1 in the // Specifications. +// Semantics for fields are define in section E.2.1. Comments on fields are +// excerpts from the this section. type VUIParameters struct { - AspectRatioInfoPresentFlag bool - AspectRatioIDC uint8 - SARWidth uint32 - SARHeight uint32 - OverscanInfoPresentFlag bool - OverscanAppropriateFlag bool - VideoSignalTypePresentFlag bool - VideoFormat uint8 - VideoFullRangeFlag bool - ColorDescriptionPresentFlag bool - ColorPrimaries uint8 - TransferCharacteristics uint8 - MatrixCoefficients uint8 - ChromaLocInfoPresentFlag bool - ChromaSampleLocTypeTopField uint64 - ChromaSampleLocTypeBottomField uint64 - TimingInfoPresentFlag bool - NumUnitsInTick uint32 - TimeScale uint32 - FixedFrameRateFlag bool - NALHRDParametersPresentFlag bool - NALHRDParameters *HRDParameters - VCLHRDParametersPresentFlag bool - VCLHRDParameters *HRDParameters - LowDelayHRDFlag bool - PicStructPresentFlag bool - BitstreamRestrictionFlag bool + // aspect_ratio_info_present_flag if 1 then aspect_ratio_idc is present, + // otherwsise is not. + AspectRatioInfoPresentFlag bool + + // aspect_ratio_idc specifies the value of sample aspect ratio of the luma samples. + AspectRatioIDC uint8 + + // sar_width indicates the horizontal size of the sample aspect ratio (in + // arbitrary units). + SARWidth uint32 + + // sar_height indicates the vertical size of the sample aspect ratio (in the + // same arbitrary units as sar_width). + SARHeight uint32 + + // overscan_info_present_flag if 1 then overscan_appropriate_flag is present, + // otherwise if 0, then the display method for the video signal is unspecified. + OverscanInfoPresentFlag bool + + // overscan_appropriate_flag if 1 then the cropped decoded pictures output + // are suitable for display using overscan, othersise if 0, then the cropped + // decoded pictures output should not be displayed using overscan. + OverscanAppropriateFlag bool + + // video_signal_type_present_flag equal to 1 specifies that video_format, + // video_full_range_flag and colour_description_present_flag are present, + // otherwise if 0, then they are not present. + VideoSignalTypePresentFlag bool + + // video_format indicates the representation of the pictures as specified in + // Table E-2, before being coded in accordance with this Recommendation | + // International Standard. + VideoFormat uint8 + + // video_full_range_flag indicates the black level and range of the luma and + // chroma signals as derived from E′_Y, E′_PB, and E′_PR or E′_R, E′_G, + // and E′_B real-valued component signals. + VideoFullRangeFlag bool + + // colour_description_present_flag if 1 specifies that colour_primaries, + // transfer_characteristics and matrix_coefficients are present, otherwise if + // 0 then they are not present. + ColorDescriptionPresentFlag bool + + // colour_primaries indicates the chromaticity coordinates of the source + // primaries as specified in Table E-3 in terms of the CIE 1931 definition of + // x and y as specified by ISO 11664-1. + ColorPrimaries uint8 + + // transfer_characteristics either indicates the reference opto-electronic + // transfer characteristic function of the source picture, or indicates the + // inverse of the reference electro-optical transfer characteristic function. + TransferCharacteristics uint8 + + // matrix_coefficients describes the matrix coefficients used in deriving luma + // and chroma signals from the green, blue, and red, or Y, Z, and X primaries, + // as specified in Table E-5. + MatrixCoefficients uint8 + + // chroma_loc_info_present_flag if 1 specifies that chroma_sample_loc_type_top_field + // and chroma_sample_loc_type_bottom_field are present, otherwise if 0, + // they are not present. + ChromaLocInfoPresentFlag bool + + // chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field + // specify the location of chroma samples. + ChromaSampleLocTypeTopField, ChromaSampleLocTypeBottomField uint64 + + // timing_info_present_flag if 1 specifies that num_units_in_tick, time_scale + // and fixed_frame_rate_flag are present in the bitstream, otherwise if 0, + // they are not present. + TimingInfoPresentFlag bool + + // num_units_in_tick is the number of time units of a clock operating at the + // frequency time_scale Hz that corresponds to one increment (called a clock + // tick) of a clock tick counter. + NumUnitsInTick uint32 + + // time_scale is the number of time units that pass in one second. + TimeScale uint32 + + // fixed_frame_rate_flag if 1 indicates that the temporal distance + // between the HRD output times of any two consecutive pictures in output + // order is constrained as follows. fixed_frame_rate_flag equal to 0 indicates + // that no such constraints apply to the temporal distance between the HRD + // output times of any two consecutive pictures in output order. + FixedFrameRateFlag bool + + // nal_hrd_parameters_present_flag if 1 then NAL HRD parameters (pertaining to + // Type II bitstream conformance) are present, otherwise if 0, then they + // are not present. + NALHRDParametersPresentFlag bool + + // The nal_hrd_parameters() syntax structure as specified in section E.1.2. + NALHRDParameters *HRDParameters + + // vcl_hrd_parameters_present_flag if 1 specifies that VCL HRD parameters + // (pertaining to all bitstream conformance) are present, otherwise if 0, then + // they are not present. + VCLHRDParametersPresentFlag bool + + // The vcl_nal_hrd_parameters() syntax structure as specified in section E.1.2. + VCLHRDParameters *HRDParameters + + // low_delay_hrd_flag specifies the HRD operational mode as specified in Annex C. + LowDelayHRDFlag bool + + // pic_struct_present_flag if 1 then picture timing SEI messages (clause D.2.3) + // are present that include the pic_struct syntax element, otherwise if 0, then + // not present. + PicStructPresentFlag bool + + // bitstream_restriction_flag if 1, then the following coded video sequence + // bitstream restriction parameters are present, otherwise if 0, then they are + // not present. + BitstreamRestrictionFlag bool + + // motion_vectors_over_pic_boundaries_flag if 0 then no sample outside the + // picture boundaries and no sample at a fractional sample position for which + // the sample value is derived using one or more samples outside the picture + // boundaries is used for inter prediction of any sample, otherwise if 1, + // indicates that one or more samples outside picture boundaries may be used + // in inter prediction. MotionVectorsOverPicBoundariesFlag bool - MaxBytesPerPicDenom uint64 - MaxBitsPerMBDenom uint64 - Log2MaxMVLengthHorizontal uint64 - Log2MaxMVLengthVertical uint64 - MaxNumReorderFrames uint64 - MaxDecFrameBuffering uint64 + + // max_bytes_per_pic_denom indicates a number of bytes not exceeded by the sum + // of the sizes of the VCL NAL units associated with any coded picture in the + // coded video sequence. + MaxBytesPerPicDenom uint64 + + // max_bits_per_mb_denom indicates an upper bound for the number of coded bits + // of macroblock_layer() data for any macroblock in any picture of the coded + // video sequence. + MaxBitsPerMBDenom uint64 + + // log2_max_mv_length_horizontal and log2_max_mv_length_vertical indicate the + // maximum absolute value of a decoded horizontal and vertical motion vector + // component, respectively, in 1⁄4 luma sample units, for all pictures in the + // coded video sequence. + Log2MaxMVLengthHorizontal, Log2MaxMVLengthVertical uint64 + + // max_num_reorder_frames indicates an upper bound for the number of frames + // buffers, in the decoded picture buffer (DPB), that are required for storing + // frames, complementary field pairs, and non-paired fields before output. + MaxNumReorderFrames uint64 + + // max_dec_frame_buffering specifies the required size of the HRD decoded + // picture buffer (DPB) in units of frame buffers. + MaxDecFrameBuffering uint64 } // NewVUIParameters parses video usability information parameters from br @@ -357,17 +581,52 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { // HRDParameters describes hypothetical reference decoder parameters as defined // by section E.1.2 in the specifications. +// Field semantics are defined in section E.2.2. Comments on fields are excerpts +// from section E.2.2. type HRDParameters struct { - CPBCntMinus1 uint64 - BitRateScale uint8 - CPBSizeScale uint8 - BitRateValueMinus1 []uint64 - CPBSizeValueMinus1 []uint64 - CBRFlag []bool + // cpb_cnt_minus1 plus 1 specifies the number of alternative CPB specifications + // in the bitstream. + CPBCntMinus1 uint64 + + // bit_rate_scale (together with bit_rate_value_minus1[ SchedSelIdx ]) + // specifies the maximum input bit rate of the SchedSelIdx-th CPB. + BitRateScale uint8 + + // cpb_size_scale (together with cpb_size_value_minus1[ SchedSelIdx ]) + // specifies the CPB size of the SchedSelIdx-th CPB. + CPBSizeScale uint8 + + // bit_rate_value_minus1[ SchedSelIdx ] (together with bit_rate_scale) + //specifies the maximum input bit rate for the SchedSelIdx-th CPB. + BitRateValueMinus1 []uint64 + + // cpb_size_value_minus1[ SchedSelIdx ] is used together with cpb_size_scale + // to specify the SchedSelIdx-th CPB size. + CPBSizeValueMinus1 []uint64 + + // cbr_flag[ SchedSelIdx ] equal to 0 specifies that to decode this bitstream + // by the HRD using the SchedSelIdx-th CPB specification, the hypothetical + // stream delivery scheduler (HSS) operates in an intermittent bit rate mode, + // otherwise if 1 specifies that the HSS operates in a constant bit rate mode. + CBRFlag []bool + + // initial_cpb_removal_delay_length_minus1 specifies the length in bits of the + // initial_cpb_removal_delay[ SchedSelIdx ] and + // initial_cpb_removal_delay_offset[ SchedSelIdx ] syntax elements of the + // buffering period SEI message. InitialCPBRemovalDelayLenMinus1 uint8 - CPBRemovalDelayLenMinus1 uint8 - DPBOutputDelayLenMinus1 uint8 - TimeOffsetLen uint8 + + // cpb_removal_delay_length_minus1 specifies the length in bits of the + // cpb_removal_delay syntax element. + CPBRemovalDelayLenMinus1 uint8 + + // dpb_output_delay_length_minus1 specifies the length in bits of the + // dpb_output_delay syntax element. + DPBOutputDelayLenMinus1 uint8 + + // time_offset_length greater than 0 specifies the length in bits of the + // time_offset syntax element. + TimeOffsetLen uint8 } // NewHRDParameters parses hypothetical reference decoder parameter from br From e63b51e24f9b65d2c2d813f4f462573f7bfa23a8 Mon Sep 17 00:00:00 2001 From: Trek H Date: Fri, 2 Aug 2019 23:26:15 +0930 Subject: [PATCH 56/57] codec-util: removed newticker helper --- codec/codecutil/lex.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 158839dd..e1498b96 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -48,13 +48,6 @@ func init() { close(zeroTicks) } -func newTicker(d time.Duration) *time.Ticker { - if d == 0 { - return &time.Ticker{C: zeroTicks} - } - return time.NewTicker(d) -} - // Lex reads *l.bufSize bytes from src and writes them to dst every d seconds. func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, d time.Duration) error { if l.bufSize == nil { @@ -68,7 +61,14 @@ func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, d time.Duration) error { return fmt.Errorf("invalid delay: %v", d) } - ticker := newTicker(d) + var ticker *time.Ticker + if d == 0 { + ticker = &time.Ticker{C: zeroTicks} + } else { + ticker = time.NewTicker(d) + defer ticker.Stop() + } + buf := make([]byte, bufSize) for { <-ticker.C From 2473b9ee0aa1e38b5fb1cfd398175f53df78c164 Mon Sep 17 00:00:00 2001 From: Saxon Date: Mon, 5 Aug 2019 13:59:08 +0930 Subject: [PATCH 57/57] codec/h264/h264dec: fixed some bugs found by testing --- codec/h264/h264dec/pps.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/codec/h264/h264dec/pps.go b/codec/h264/h264dec/pps.go index aeea8a60..67b1abf2 100644 --- a/codec/h264/h264dec/pps.go +++ b/codec/h264/h264dec/pps.go @@ -54,7 +54,7 @@ func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) { if pps.SliceGroupMapType == 0 { for iGroup := 0; iGroup <= pps.NumSliceGroupsMinus1; iGroup++ { - pps.RunLengthMinus1[iGroup] = int(r.readUe()) + pps.RunLengthMinus1 = append(pps.RunLengthMinus1, int(r.readUe())) } } else if pps.SliceGroupMapType == 2 { for iGroup := 0; iGroup < pps.NumSliceGroupsMinus1; iGroup++ { @@ -115,8 +115,8 @@ func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) { } } } - pps.SecondChromaQpIndexOffset = r.readSe() } + pps.SecondChromaQpIndexOffset = r.readSe() moreRBSPData(br) // rbspTrailingBits() }