av/input/audio/audio.go

397 lines
10 KiB
Go

/*
NAME
audio.go
AUTHOR
Alan Noble <alan@ausocean.org>
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see [GNU licenses](http://www.gnu.org/licenses).
*/
// Package audio provides access to input from audio devices.
package audio
import (
"bytes"
"errors"
"fmt"
"io"
"sync"
"time"
"github.com/yobert/alsa"
"bitbucket.org/ausocean/av/codec/adpcm"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/codec/pcm"
"bitbucket.org/ausocean/utils/logger"
"bitbucket.org/ausocean/utils/ring"
)
const (
pkg = "pkg: "
rbTimeout = 100 * time.Millisecond
rbNextTimeout = 100 * time.Millisecond
rbLen = 200
defaultSampleRate = 48000
)
const (
running = iota
paused
stopped
)
// Rates contains the audio sample rates used by audio.
var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
// Device holds everything we need to know about the audio input stream.
type Device struct {
l Logger
// Operating mode, either running, paused, or stopped.
// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer.
// "paused" means the input routine is sleeping until unpaused or stopped.
// "stopped" means the input routine is stopped and the ALSA device is closed.
mode uint8
mu sync.Mutex
title string // Name of audio title, or empty for the default title.
dev *alsa.Device // Audio input device.
ab alsa.Buffer // ALSA's buffer.
rb *ring.Buffer // Our buffer.
chunkSize int // This is the number of bytes that will be stored at a time.
*Config
}
// Config provides parameters used by Device.
type Config struct {
SampleRate int
Channels int
BitDepth int
RecPeriod float64
Codec uint8
}
// Logger enables any implementation of a logger to be used.
// TODO: Make this part of the logger package.
type Logger interface {
SetLevel(int8)
Log(level int8, message string, params ...interface{})
}
// NewDevice initializes and returns an Device which can be started, read from, and stopped.
func NewDevice(cfg *Config, l Logger) (*Device, error) {
a := &Device{}
a.Config = cfg
a.l = l
// Open the requested audio device.
err := a.open()
if err != nil {
a.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error())
return nil, errors.New("failed to open audio device")
}
// Setup ring buffer to capture audio in periods of a.RecPeriod seconds and buffer rbDuration seconds in total.
a.ab = a.dev.NewBufferDuration(time.Duration(a.RecPeriod * float64(time.Second)))
cs := (float64((len(a.ab.Data)/a.dev.BufferFormat().Channels)*a.Channels) / float64(a.dev.BufferFormat().Rate)) * float64(a.SampleRate)
if cs < 1 {
a.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error())
return nil, errors.New("given Config parameters are too small")
}
if a.Codec == codecutil.ADPCM {
a.chunkSize = adpcm.EncBytes(int(cs))
} else {
a.chunkSize = int(cs)
}
a.rb = ring.NewBuffer(rbLen, a.chunkSize, rbTimeout)
a.mode = paused
go a.input()
return a, nil
}
// Start will start recording audio and writing to the ringbuffer.
func (a *Device) Start() error {
a.mu.Lock()
mode := a.mode
a.mu.Unlock()
switch mode {
case paused:
a.mu.Lock()
a.mode = running
a.mu.Unlock()
return nil
case stopped:
// TODO(Trek): Make this reopen device and start recording.
return errors.New("device is stopped")
case running:
return nil
default:
return errors.New("invalid mode")
}
}
// Stop will stop recording audio and close the device.
func (a *Device) Stop() {
a.mu.Lock()
a.mode = stopped
a.mu.Unlock()
}
// ChunkSize returns the number of bytes written to the ringbuffer per a.RecPeriod.
func (a *Device) ChunkSize() int {
return a.chunkSize
}
// open the recording device with the given name and prepare it to record.
// If name is empty, the first recording device is used.
func (a *Device) open() error {
// Close any existing device.
if a.dev != nil {
a.l.Log(logger.Debug, pkg+"closing device", "title", a.title)
a.dev.Close()
a.dev = nil
}
// Open sound card and open recording device.
a.l.Log(logger.Debug, pkg+"opening sound card")
cards, err := alsa.OpenCards()
if err != nil {
a.l.Log(logger.Debug, pkg+"failed to open sound card")
return err
}
defer alsa.CloseCards(cards)
a.l.Log(logger.Debug, pkg+"finding audio device")
for _, card := range cards {
devices, err := card.Devices()
if err != nil {
continue
}
for _, dev := range devices {
if dev.Type != alsa.PCM || !dev.Record {
continue
}
if dev.Title == a.title || a.title == "" {
a.dev = dev
break
}
}
}
if a.dev == nil {
a.l.Log(logger.Debug, pkg+"failed to find audio device")
return errors.New("no audio device found")
}
a.l.Log(logger.Debug, pkg+"opening audio device", "title", a.dev.Title)
err = a.dev.Open()
if err != nil {
a.l.Log(logger.Debug, pkg+"failed to open audio device")
return err
}
// 2 channels is what most devices need to record in. If mono is requested,
// the recording will be converted in formatBuffer().
_, err = a.dev.NegotiateChannels(2)
if err != nil {
return err
}
// Try to negotiate a rate to record in that is divisible by the wanted rate
// so that it can be easily downsampled to the wanted rate.
// Note: if a card thinks it can record at a rate but can't actually, this can cause a failure.
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
// a fix for this is to remove 8000 and 16000 from the Rates slice.
foundRate := false
for i := 0; i < len(Rates) && !foundRate; i++ {
if Rates[i] < a.SampleRate {
continue
}
if Rates[i]%a.SampleRate == 0 {
_, err = a.dev.NegotiateRate(Rates[i])
if err == nil {
foundRate = true
a.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i])
}
}
}
// If no easily divisible rate is found, then use the default rate.
if !foundRate {
a.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", a.SampleRate)
_, err = a.dev.NegotiateRate(defaultSampleRate)
if err != nil {
return err
}
a.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate)
}
var aFmt alsa.FormatType
switch a.BitDepth {
case 16:
aFmt = alsa.S16_LE
case 32:
aFmt = alsa.S32_LE
default:
return fmt.Errorf("unsupported sample bits %v", a.BitDepth)
}
_, err = a.dev.NegotiateFormat(aFmt)
if err != nil {
return err
}
// Either 8192 or 16384 bytes is a reasonable ALSA buffer size.
_, err = a.dev.NegotiateBufferSize(8192, 16384)
if err != nil {
return err
}
if err = a.dev.Prepare(); err != nil {
return err
}
a.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params")
return nil
}
// input continously records audio and writes it to the ringbuffer.
// Re-opens the device and tries again if ASLA returns an error.
func (a *Device) input() {
for {
// Check mode.
a.mu.Lock()
mode := a.mode
a.mu.Unlock()
switch mode {
case paused:
time.Sleep(time.Duration(a.RecPeriod) * time.Second)
continue
case stopped:
if a.dev != nil {
a.l.Log(logger.Debug, pkg+"closing audio device", "title", a.title)
a.dev.Close()
a.dev = nil
}
return
}
// Read from audio device.
a.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", a.RecPeriod)
err := a.dev.Read(a.ab.Data)
if err != nil {
a.l.Log(logger.Debug, pkg+"read failed", "error", err.Error())
err = a.open() // re-open
if err != nil {
a.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error())
return
}
continue
}
// Process audio.
a.l.Log(logger.Debug, "processing audio")
toWrite := a.formatBuffer()
// Write audio to ringbuffer.
n, err := a.rb.Write(toWrite.Data)
switch err {
case nil:
a.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n)
case ring.ErrDropped:
a.l.Log(logger.Warning, pkg+"old audio data overwritten")
default:
a.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error())
return
}
}
}
// Read reads from the ringbuffer, returning the number of bytes read upon success.
func (a *Device) Read(p []byte) (n int, err error) {
// Ready ringbuffer for read.
_, err = a.rb.Next(rbNextTimeout)
switch err {
case nil:
case ring.ErrTimeout:
return 0, nil
default:
return 0, err
}
// Read from ring buffer.
n, err = a.rb.Read(p)
switch err {
case nil:
case io.EOF:
return 0, nil
default:
return 0, err
}
return n, nil
}
// formatBuffer returns audio that has been converted to the desired format.
func (a *Device) formatBuffer() alsa.Buffer {
var err error
// If nothing needs to be changed, return the original.
if a.ab.Format.Channels == a.Channels && a.ab.Format.Rate == a.SampleRate {
return a.ab
}
formatted := alsa.Buffer{Format: a.ab.Format, Data: a.ab.Data}
if a.ab.Format.Channels != a.Channels {
// Convert channels.
// TODO(Trek): Make this work for conversions other than stereo to mono.
if a.ab.Format.Channels == 2 && a.Channels == 1 {
formatted.Data, err = pcm.StereoToMono(a.ab)
if err != nil {
a.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
}
}
}
if a.ab.Format.Rate != a.SampleRate {
// Convert rate.
formatted.Data, err = pcm.Resample(formatted, a.SampleRate)
if err != nil {
a.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
}
}
switch a.Codec {
case codecutil.PCM:
case codecutil.ADPCM:
b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data))))
enc := adpcm.NewEncoder(b)
_, err = enc.Write(formatted.Data)
if err != nil {
a.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error())
}
formatted.Data = b.Bytes()
default:
a.l.Log(logger.Error, pkg+"unhandled audio codec")
}
return formatted
}