mirror of https://bitbucket.org/ausocean/av.git
403 lines
11 KiB
Go
403 lines
11 KiB
Go
/*
|
|
NAME
|
|
audio-input.go
|
|
|
|
AUTHOR
|
|
Trek Hopton <trek@ausocean.org>
|
|
|
|
LICENSE
|
|
audio-input.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
|
|
|
It is free software: you can redistribute it and/or modify them
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation, either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
It is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
|
*/
|
|
|
|
package revid
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/yobert/alsa"
|
|
|
|
"bitbucket.org/ausocean/av/codec/adpcm"
|
|
"bitbucket.org/ausocean/av/codec/pcm"
|
|
"bitbucket.org/ausocean/iot/pi/smartlogger"
|
|
"bitbucket.org/ausocean/utils/logger"
|
|
"bitbucket.org/ausocean/utils/ring"
|
|
)
|
|
|
|
const (
|
|
logPath = "/var/log/netsender"
|
|
rbTimeout = 100 * time.Millisecond
|
|
rbNextTimeout = 100 * time.Millisecond
|
|
rbLen = 200
|
|
)
|
|
|
|
const (
|
|
running = iota
|
|
paused
|
|
stopped
|
|
)
|
|
|
|
// Rates contains the audio sample rates used by revid.
|
|
var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
|
|
|
|
// AudioDevice holds everything we need to know about the audio input stream.
|
|
// Note: At 44100 Hz sample rate, 2 channels and 16-bit samples, a period of 5 seconds
|
|
// results in PCM data chunks of 882000 bytes. A longer period exceeds datastore's 1MB blob limit.
|
|
type AudioDevice struct {
|
|
l Logger
|
|
mu sync.Mutex
|
|
source string // Name of audio source, or empty for the default source.
|
|
// Operating mode, either running, paused, or stopped.
|
|
// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer.
|
|
// "paused" means the input routine is sleeping until unpaused or stopped.
|
|
// "stopped" means the input routine is stopped and the ALSA device is closed.
|
|
mode uint8
|
|
|
|
dev *alsa.Device // Audio input device.
|
|
ab alsa.Buffer // ALSA's buffer.
|
|
rb *ring.Buffer // Our buffer.
|
|
chunkSize int // This is the number of bytes that will be stored at a time.
|
|
|
|
*AudioConfig
|
|
}
|
|
|
|
// AudioConfig provides parameters used by AudioDevice.
|
|
type AudioConfig struct {
|
|
SampleRate int
|
|
Channels int
|
|
BitDepth int
|
|
RecPeriod float64
|
|
Codec uint8
|
|
}
|
|
|
|
// NewAudioDevice initializes and returns an AudioDevice struct which can be started, read from, and stopped.
|
|
func NewAudioDevice(cfg *AudioConfig) (*AudioDevice, error) {
|
|
a := &AudioDevice{}
|
|
a.AudioConfig = cfg
|
|
|
|
// Initialize logger.
|
|
logLevel := int(logger.Debug)
|
|
validLogLevel := true
|
|
if logLevel < int(logger.Debug) || logLevel > int(logger.Fatal) {
|
|
logLevel = int(logger.Info)
|
|
validLogLevel = false
|
|
}
|
|
logSender := smartlogger.New(logPath)
|
|
a.l = logger.New(int8(logLevel), &logSender.LogRoller)
|
|
a.l.Log(logger.Info, "log-netsender: Logger Initialized")
|
|
if !validLogLevel {
|
|
a.l.Log(logger.Error, "Invalid log level was defaulted to Info")
|
|
}
|
|
|
|
// Open the requested audio device.
|
|
err := a.open()
|
|
if err != nil {
|
|
a.l.Log(logger.Error, "failed to open audio device", "error", err.Error())
|
|
return nil, errors.New("failed to open audio device")
|
|
}
|
|
|
|
// Setup ring buffer to capture audio in periods of a.RecPeriod seconds and buffer rbDuration seconds in total.
|
|
a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second)))
|
|
cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate)
|
|
if cs < 1 {
|
|
a.l.Log(logger.Error, "given AudioConfig parameters are too small", "error", err.Error())
|
|
return nil, errors.New("given AudioConfig parameters are too small")
|
|
}
|
|
if a.Codec == ADPCM {
|
|
a.chunkSize = adpcm.EncBytes(int(cs))
|
|
} else {
|
|
a.chunkSize = int(cs)
|
|
}
|
|
a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout)
|
|
|
|
a.mode = paused
|
|
go a.input()
|
|
|
|
return a, nil
|
|
}
|
|
|
|
// Start will start recording audio and writing to the ringbuffer.
|
|
func (a *AudioDevice) Start() error {
|
|
a.mu.Lock()
|
|
mode := a.mode
|
|
a.mu.Unlock()
|
|
switch mode {
|
|
case paused:
|
|
a.mu.Lock()
|
|
a.mode = running
|
|
a.mu.Unlock()
|
|
return nil
|
|
case stopped:
|
|
// TODO(Trek): Make this reopen device and start recording.
|
|
return errors.New("device is stopped")
|
|
case running:
|
|
return nil
|
|
default:
|
|
return errors.New("invalid mode")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Stop will stop recording audio and close the device.
|
|
func (a *AudioDevice) Stop() {
|
|
a.mu.Lock()
|
|
a.mode = stopped
|
|
a.mu.Unlock()
|
|
}
|
|
|
|
// ChunkSize returns the number of bytes written to the ringbuffer per a.RecPeriod.
|
|
func (a *AudioDevice) ChunkSize() int {
|
|
return a.chunkSize
|
|
}
|
|
|
|
// open the recording device with the given name and prepare it to record.
|
|
// If name is empty, the first recording device is used.
|
|
func (a *AudioDevice) open() error {
|
|
// Close any existing device.
|
|
if a.dev != nil {
|
|
a.l.Log(logger.Debug, "closing device", "source", a.source)
|
|
a.dev.Close()
|
|
a.dev = nil
|
|
}
|
|
|
|
// Open sound card and open recording device.
|
|
a.l.Log(logger.Debug, "opening sound card")
|
|
cards, err := alsa.OpenCards()
|
|
if err != nil {
|
|
a.l.Log(logger.Debug, "failed to open sound card")
|
|
return err
|
|
}
|
|
defer alsa.CloseCards(cards)
|
|
|
|
a.l.Log(logger.Debug, "finding audio device")
|
|
for _, card := range cards {
|
|
devices, err := card.Devices()
|
|
if err != nil {
|
|
continue
|
|
}
|
|
for _, dev := range devices {
|
|
if dev.Type != alsa.PCM || !dev.Record {
|
|
continue
|
|
}
|
|
if dev.Title == a.source || a.source == "" {
|
|
a.dev = dev
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if a.dev == nil {
|
|
a.l.Log(logger.Debug, "failed to find audio device")
|
|
return errors.New("no audio device found")
|
|
}
|
|
|
|
a.l.Log(logger.Debug, "opening audio device", "source", a.dev.Title)
|
|
err = a.dev.Open()
|
|
if err != nil {
|
|
a.l.Log(logger.Debug, "failed to open audio device")
|
|
return err
|
|
}
|
|
|
|
// 2 channels is what most devices need to record in. If mono is requested,
|
|
// the recording will be converted in formatBuffer().
|
|
_, err = a.dev.NegotiateChannels(2)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Try to negotiate a rate to record in that is divisible by the wanted rate
|
|
// so that it can be easily downsampled to the wanted rate.
|
|
// Note: if a card thinks it can record at a rate but can't actually, this can cause a failure.
|
|
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
|
|
// a fix for this is to remove 8000 and 16000 from the Rates slice.
|
|
foundRate := false
|
|
for i := 0; i < len(Rates) && !foundRate; i++ {
|
|
if Rates[i] < a.SampleRate {
|
|
continue
|
|
}
|
|
if Rates[i]%a.SampleRate == 0 {
|
|
_, err = a.dev.NegotiateRate(Rates[i])
|
|
if err == nil {
|
|
foundRate = true
|
|
a.l.Log(logger.Debug, "Sample rate set", "rate", Rates[i])
|
|
}
|
|
}
|
|
}
|
|
|
|
// If no easily divisible rate is found, then use the default rate.
|
|
if !foundRate {
|
|
a.l.Log(logger.Warning, "Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate)
|
|
_, err = a.dev.NegotiateRate(defaultSampleRate)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
a.l.Log(logger.Debug, "Sample rate set", "rate", defaultSampleRate)
|
|
}
|
|
|
|
var aFmt alsa.FormatType
|
|
switch a.BitDepth {
|
|
case 16:
|
|
aFmt = alsa.S16_LE
|
|
case 32:
|
|
aFmt = alsa.S32_LE
|
|
default:
|
|
return fmt.Errorf("unsupported sample bits %v", a.BitDepth)
|
|
}
|
|
_, err = a.dev.NegotiateFormat(aFmt)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Either 8192 or 16384 bytes is a reasonable ALSA buffer size.
|
|
_, err = a.dev.NegotiateBufferSize(8192, 16384)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err = a.dev.Prepare(); err != nil {
|
|
return err
|
|
}
|
|
a.l.Log(logger.Debug, "Successfully negotiated ALSA params")
|
|
return nil
|
|
}
|
|
|
|
// input continously records audio and writes it to the ringbuffer.
|
|
// Re-opens the device and tries again if ASLA returns an error.
|
|
func (a *AudioDevice) input() {
|
|
for {
|
|
// Check mode.
|
|
a.mu.Lock()
|
|
mode := a.mode
|
|
a.mu.Unlock()
|
|
switch mode {
|
|
case paused:
|
|
time.Sleep(time.Duration(a.RecPeriod) * time.Second)
|
|
continue
|
|
case stopped:
|
|
if a.dev != nil {
|
|
a.l.Log(logger.Debug, "closing audio device", "source", a.source)
|
|
a.dev.Close()
|
|
a.dev = nil
|
|
}
|
|
return
|
|
}
|
|
|
|
// Read from audio device.
|
|
a.l.Log(logger.Debug, "recording audio for period", "seconds", a.RecPeriod)
|
|
err := a.dev.Read(a.ab.Data)
|
|
if err != nil {
|
|
a.l.Log(logger.Debug, "read failed", "error", err.Error())
|
|
err = a.open() // re-open
|
|
if err != nil {
|
|
a.l.Log(logger.Fatal, "reopening device failed", "error", err.Error())
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Process audio.
|
|
a.l.Log(logger.Debug, "processing audio")
|
|
toWrite := a.formatBuffer()
|
|
|
|
// Write audio to ringbuffer.
|
|
n, err := a.rb.Write(toWrite.Data)
|
|
switch err {
|
|
case nil:
|
|
a.l.Log(logger.Debug, "wrote audio to ringbuffer", "length", n)
|
|
case ring.ErrDropped:
|
|
a.l.Log(logger.Warning, "old audio data overwritten")
|
|
default:
|
|
a.l.Log(logger.Error, "unexpected ringbuffer error", "error", err.Error())
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Read reads a full PCM chunk from the ringbuffer, returning the number of bytes read upon success.
|
|
// Any errors returned are unexpected and should be considered fatal.
|
|
func (a *AudioDevice) Read(p []byte) (n int, err error) {
|
|
// Ready ringbuffer for read.
|
|
_, err = a.rb.Next(rbNextTimeout)
|
|
switch err {
|
|
case nil:
|
|
case ring.ErrTimeout:
|
|
return 0, nil
|
|
default:
|
|
return 0, err
|
|
}
|
|
|
|
// Read from ring buffer.
|
|
n, err = a.rb.Read(p)
|
|
switch err {
|
|
case nil:
|
|
case io.EOF:
|
|
return 0, nil
|
|
default:
|
|
return 0, err
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
// formatBuffer returns audio that has been converted to the desired format.
|
|
func (a *AudioDevice) formatBuffer() alsa.Buffer {
|
|
var err error
|
|
|
|
// If nothing needs to be changed, return the original.
|
|
if a.ab.Format.Channels == a.Channels && a.ab.Format.Rate == a.SampleRate {
|
|
return a.ab
|
|
}
|
|
|
|
formatted := alsa.Buffer{Format: a.ab.Format, Data: a.ab.Data}
|
|
if a.ab.Format.Channels != a.Channels {
|
|
// Convert channels.
|
|
// TODO(Trek): Make this work for conversions other than stereo to mono.
|
|
if a.ab.Format.Channels == 2 && a.Channels == 1 {
|
|
formatted.Data, err = pcm.StereoToMono(a.ab)
|
|
if err != nil {
|
|
a.l.Log(logger.Fatal, "channel conversion failed", "error", err.Error())
|
|
}
|
|
}
|
|
}
|
|
|
|
if a.ab.Format.Rate != a.SampleRate {
|
|
// Convert rate.
|
|
formatted.Data, err = pcm.Resample(formatted, a.SampleRate)
|
|
if err != nil {
|
|
a.l.Log(logger.Fatal, "rate conversion failed", "error", err.Error())
|
|
}
|
|
}
|
|
|
|
switch a.Codec {
|
|
case PCM:
|
|
case ADPCM:
|
|
b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data))))
|
|
enc := adpcm.NewEncoder(b)
|
|
_, err = enc.Write(formatted.Data)
|
|
if err != nil {
|
|
a.l.Log(logger.Fatal, "unable to encode", "error", err.Error())
|
|
}
|
|
formatted.Data = b.Bytes()
|
|
default:
|
|
a.l.Log(logger.Error, "codec conversion failed, audio has remained original codec", "error", err.Error())
|
|
}
|
|
|
|
return formatted
|
|
}
|