/* NAME audio-input.go AUTHOR Trek Hopton LICENSE audio-input.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) It is free software: you can redistribute it and/or modify them under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License in gpl.txt. If not, see [GNU licenses](http://www.gnu.org/licenses). */ package revid import ( "errors" "fmt" "io" "sync" "time" "github.com/yobert/alsa" "bitbucket.org/ausocean/av/codec/pcm" "bitbucket.org/ausocean/iot/pi/smartlogger" "bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/ring" ) const ( logPath = "/var/log/netsender" rbTimeout = 100 * time.Millisecond rbNextTimeout = 100 * time.Millisecond rbLen = 200 ) const ( running = iota paused stopped ) // Rates contains the audio sample rates used by revid. var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} // AudioDevice holds everything we need to know about the audio input stream. // Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds // results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit. type AudioDevice struct { l Logger mu sync.Mutex source string // Name of audio source, or empty for the default source. // Operating mode, either running, paused, or stopped. // "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. // "paused" means the input routine is sleeping until unpaused or stopped. // "stopped" means the input routine is stopped and the ALSA device is closed. mode uint8 dev *alsa.Device // Audio input device. ab alsa.Buffer // ALSA's buffer. rb *ring.Buffer // Our buffer. chunkSize int // This is the number of bytes that will be stored at a time. *AudioConfig } // AudioConfig provides parameters used by AudioDevice. type AudioConfig struct { SampleRate int Channels int BitDepth int RecPeriod float64 Codec uint8 } // NewAudioDevice initializes and returns an AudioDevice struct which can be started, read from, and stopped. func NewAudioDevice(cfg *AudioConfig) (*AudioDevice, error) { a := &AudioDevice{} a.AudioConfig = cfg // Initialize logger. logLevel := int(logger.Debug) validLogLevel := true if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) { logLevel = int(logger.Info) validLogLevel = false } logSender := smartlogger.New(logPath) a.l = logger.New(int8(logLevel), &logSender.LogRoller) a.l.Log(logger.Info, "log-netsender: Logger Initialized") if !validLogLevel { a.l.Log(logger.Error, "Invalid log level was defaulted to Info") } // Open the requested audio device. err := a.open() if err != nil { a.l.Log(logger.Error, "failed to open audio device", "error", err.Error()) return nil, errors.New("failed to open audio device") } // Setup ring buffer to capture audio in periods of a.RecPeriod seconds and buffer rbDuration seconds in total. a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second))) cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate) if cs < 1 { a.l.Log(logger.Error, "given AudioConfig parameters are too small", "error", err.Error()) return nil, errors.New("given AudioConfig parameters are too small") } a.chunkSize = int(cs) a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout) a.mode = paused go a.input() return a, nil } // Start will start recording audio and writing to the ringbuffer. func (a *AudioDevice) Start() error { a.mu.Lock() mode := a.mode a.mu.Unlock() switch mode { case paused: a.mu.Lock() a.mode = running a.mu.Unlock() return nil case stopped: // TODO(Trek): Make this reopen device and start recording. return errors.New("device is stopped") case running: return nil default: return errors.New("invalid mode") } return nil } // Stop will stop recording audio and close the device. func (a *AudioDevice) Stop() { a.mu.Lock() a.mode = stopped a.mu.Unlock() } // ChunkSize returns the number of bytes written to the ringbuffer per a.RecPeriod. func (a *AudioDevice) ChunkSize() int { return a.chunkSize } // open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. func (a *AudioDevice) open() error { // Close any existing device. if a.dev != nil { a.l.Log(logger.Debug, "closing device", "source", a.source) a.dev.Close() a.dev = nil } // Open sound card and open recording device. a.l.Log(logger.Debug, "opening sound card") cards, err := alsa.OpenCards() if err != nil { a.l.Log(logger.Debug, "failed to open sound card") return err } defer alsa.CloseCards(cards) a.l.Log(logger.Debug, "finding audio device") for _, card := range cards { devices, err := card.Devices() if err != nil { continue } for _, dev := range devices { if dev.Type != alsa.PCM || !dev.Record { continue } if dev.Title == a.source || a.source == "" { a.dev = dev break } } } if a.dev == nil { a.l.Log(logger.Debug, "failed to find audio device") return errors.New("no audio device found") } a.l.Log(logger.Debug, "opening audio device", "source", a.dev.Title) err = a.dev.Open() if err != nil { a.l.Log(logger.Debug, "failed to open audio device") return err } // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). _, err = a.dev.NegotiateChannels(2) if err != nil { return err } // Try to negotiate a rate to record in that is divisible by the wanted rate // so that it can be easily downsampled to the wanted rate. // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, // a fix for this is to remove 8000 and 16000 from the Rates slice. foundRate := false for i := 0; i < len(Rates) && !foundRate; i++ { if Rates[i] < a.SampleRate { continue } if Rates[i]%a.SampleRate == 0 { _, err = a.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true a.l.Log(logger.Debug, "Sample rate set", "rate", Rates[i]) } } } // If no easily divisible rate is found, then use the default rate. if !foundRate { a.l.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate) _, err = a.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } a.l.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate) } var aFmt alsa.FormatType switch a.BitDepth { case 16: aFmt = alsa.S16_LE case 32: aFmt = alsa.S32_LE default: return fmt.Errorf("unsupported sample bits %v", a.BitDepth) } _, err = a.dev.NegotiateFormat(aFmt) if err != nil { return err } // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. _, err = a.dev.NegotiateBufferSize(8192, 16384) if err != nil { return err } if err = a.dev.Prepare(); err != nil { return err } a.l.Log(logger.Debug, "Successfully negotiated ALSA params") return nil } // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if ASLA returns an error. func (a *AudioDevice) input() { for { // Check mode. a.mu.Lock() mode := a.mode a.mu.Unlock() switch mode { case paused: time.Sleep(time.Duration(a.RecPeriod) * time.Second) continue case stopped: if a.dev != nil { a.l.Log(logger.Debug, "closing audio device", "source", a.source) a.dev.Close() a.dev = nil } return } // Read from audio device. a.l.Log(logger.Debug, "recording audio for period", "seconds", a.RecPeriod) err := a.dev.Read(a.ab.Data) if err != nil { a.l.Log(logger.Debug, "read failed", "error", err.Error()) err = a.open() // re-open if err != nil { a.l.Log(logger.Fatal, "reopening device failed", "error", err.Error()) return } continue } // Process audio. a.l.Log(logger.Debug, "processing audio") toWrite := a.formatBuffer() // Write audio to ringbuffer. n, err := a.rb.Write(toWrite.Data) switch err { case nil: a.l.Log(logger.Debug, "wrote audio to ringbuffer", "length", n) case ring.ErrDropped: a.l.Log(logger.Warning, "old audio data overwritten") default: a.l.Log(logger.Error, "unexpected ringbuffer error", "error", err.Error()) return } } } // Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success. // Any errors returned are unexpected and should be considered fatal. func (a *AudioDevice) Read(p []byte) (n int, err error) { // Ready ringbuffer for read. _, err = a.rb.Next(rbNextTimeout) switch err { case nil: case ring.ErrTimeout: return 0, nil default: return 0, err } // Read from ring buffer. n, err = a.rb.Read(p) switch err { case nil: case io.EOF: return 0, nil default: return 0, err } return n, nil } // formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored // in the desired format specified by the ac's parameters. func (a *AudioDevice) formatBuffer() alsa.Buffer { var err error // If nothing needs to be changed, return the original. if a.ab.Format.Channels == a.Channels && a.ab.Format.Rate == a.SampleRate { return a.ab } formatted := alsa.Buffer{Format: a.ab.Format} bufCopied := false if a.ab.Format.Channels != a.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. if a.ab.Format.Channels == 2 && a.Channels == 1 { formatted.Data, err = pcm.StereoToMono(a.ab) if err != nil { a.l.Log(logger.Warning, "channel conversion failed, audio has remained stereo", "error", err.Error()) } else { formatted.Format.Channels = 1 } bufCopied = true } } if a.ab.Format.Rate != a.SampleRate { // Convert rate. if bufCopied { formatted.Data, err = pcm.Resample(formatted, a.SampleRate) } else { formatted.Data, err = pcm.Resample(a.ab, a.SampleRate) } if err != nil { a.l.Log(logger.Warning, "rate conversion failed, audio has remained original rate", "error", err.Error()) } else { formatted.Format.Rate = a.SampleRate } } switch a.Codec { case PCM: case ADPCM: // TODO(Trek):Add ADPCM conversion. default: a.l.Log(logger.Error, "codec conversion failed, audio has remained original codec", "error", err.Error()) } return formatted }