mirror of https://bitbucket.org/ausocean/av.git
491 lines
14 KiB
Go
491 lines
14 KiB
Go
|
/*
|
||
|
NAME
|
||
|
audio.go
|
||
|
|
||
|
AUTHOR
|
||
|
Alan Noble <alan@ausocean.org>
|
||
|
Trek Hopton <trek@ausocean.org>
|
||
|
|
||
|
LICENSE
|
||
|
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||
|
|
||
|
It is free software: you can redistribute it and/or modify them
|
||
|
under the terms of the GNU General Public License as published by the
|
||
|
Free Software Foundation, either version 3 of the License, or (at your
|
||
|
option) any later version.
|
||
|
|
||
|
It is distributed in the hope that it will be useful, but WITHOUT
|
||
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
||
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||
|
*/
|
||
|
|
||
|
// Package audio provides access to input from audio devices.
|
||
|
package audio
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"sync"
|
||
|
"time"
|
||
|
|
||
|
"github.com/yobert/alsa"
|
||
|
|
||
|
"bitbucket.org/ausocean/av/codec/adpcm"
|
||
|
"bitbucket.org/ausocean/av/codec/codecutil"
|
||
|
"bitbucket.org/ausocean/av/codec/pcm"
|
||
|
"bitbucket.org/ausocean/av/device"
|
||
|
"bitbucket.org/ausocean/av/revid/config"
|
||
|
"bitbucket.org/ausocean/utils/logger"
|
||
|
"bitbucket.org/ausocean/utils/ring"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
pkg = "audio: "
|
||
|
rbTimeout = 100 * time.Millisecond
|
||
|
rbNextTimeout = 100 * time.Millisecond
|
||
|
rbLen = 200
|
||
|
defaultSampleRate = 48000
|
||
|
)
|
||
|
|
||
|
// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer.
|
||
|
// "paused" means the input routine is sleeping until unpaused or stopped.
|
||
|
// "stopped" means the input routine is stopped and the ALSA device is closed.
|
||
|
const (
|
||
|
running = iota + 1
|
||
|
paused
|
||
|
stopped
|
||
|
)
|
||
|
|
||
|
// Device holds everything we need to know about the audio input stream and implements io.Reader.
|
||
|
type Device struct {
|
||
|
l Logger // Logger for device's routines to log to.
|
||
|
mode uint8 // Operating mode, either running, paused, or stopped.
|
||
|
mu sync.Mutex // Provides synchronisation when changing modes concurrently.
|
||
|
title string // Name of audio title, or empty for the default title.
|
||
|
dev *alsa.Device // ALSA's Audio input device.
|
||
|
ab alsa.Buffer // ALSA's buffer.
|
||
|
rb *ring.Buffer // Our buffer.
|
||
|
chunkSize int // This is the number of bytes that will be stored in rb at a time.
|
||
|
Config // Configuration parameters for this device.
|
||
|
}
|
||
|
|
||
|
// Config provides parameters used by Device.
|
||
|
type Config struct {
|
||
|
SampleRate int
|
||
|
Channels int
|
||
|
BitDepth int
|
||
|
RecPeriod float64
|
||
|
Codec uint8
|
||
|
}
|
||
|
|
||
|
// Logger enables any implementation of a logger to be used.
|
||
|
// TODO: Make this part of the logger package.
|
||
|
type Logger interface {
|
||
|
SetLevel(int8)
|
||
|
Log(level int8, message string, params ...interface{})
|
||
|
}
|
||
|
|
||
|
// OpenError is used to determine whether an error has originated from attempting to open a device.
|
||
|
type OpenError error
|
||
|
|
||
|
// NewDevice initializes and returns a Device which has its logger set as the given logger.
|
||
|
func NewDevice(l Logger) *Device {
|
||
|
d := &Device{
|
||
|
l: l,
|
||
|
}
|
||
|
return d
|
||
|
}
|
||
|
|
||
|
// Set will take a Config struct, check the validity of the relevant fields
|
||
|
// and then performs any configuration necessary. If fields are not valid,
|
||
|
// an error is added to the multiError and a default value is used.
|
||
|
// It then initialises the Device which can then be started, read from, and stopped.
|
||
|
func (d *Device) Set(c config.Config) error {
|
||
|
var errs device.MultiError
|
||
|
if c.SampleRate <= 0 {
|
||
|
errs = append(errs, fmt.Errorf("invalid sample rate: %v", c.SampleRate))
|
||
|
}
|
||
|
if c.Channels <= 0 {
|
||
|
errs = append(errs, fmt.Errorf("invalid number of channels: %v", c.Channels))
|
||
|
}
|
||
|
if c.BitDepth <= 0 {
|
||
|
errs = append(errs, fmt.Errorf("invalid bitdepth: %v", c.BitDepth))
|
||
|
}
|
||
|
if c.RecPeriod <= 0 {
|
||
|
errs = append(errs, fmt.Errorf("invalid recording period: %v", c.RecPeriod))
|
||
|
}
|
||
|
if !codecutil.IsValid(c.InputCodec) {
|
||
|
errs = append(errs, errors.New("invalid codec"))
|
||
|
}
|
||
|
d.Config = Config{
|
||
|
SampleRate: c.SampleRate,
|
||
|
Channels: c.Channels,
|
||
|
BitDepth: c.BitDepth,
|
||
|
RecPeriod: c.RecPeriod,
|
||
|
Codec: c.InputCodec,
|
||
|
}
|
||
|
|
||
|
// Open the requested audio device.
|
||
|
err := d.open()
|
||
|
if err != nil {
|
||
|
d.l.Log(logger.Error, pkg+"failed to open device")
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// Setup the device to record with desired period.
|
||
|
d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
|
||
|
|
||
|
// Account for channel conversion.
|
||
|
chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels)
|
||
|
|
||
|
// Account for resampling.
|
||
|
chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
|
||
|
if chunkSize < 1 {
|
||
|
return errors.New("given Config parameters are too small")
|
||
|
}
|
||
|
|
||
|
// Account for codec conversion.
|
||
|
if d.Codec == codecutil.ADPCM {
|
||
|
d.chunkSize = adpcm.EncBytes(int(chunkSize))
|
||
|
} else {
|
||
|
d.chunkSize = int(chunkSize)
|
||
|
}
|
||
|
|
||
|
// Create ring buffer with appropriate chunk size.
|
||
|
d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout)
|
||
|
|
||
|
// Start device in paused mode.
|
||
|
d.mode = paused
|
||
|
go d.input()
|
||
|
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// Start will start recording audio and writing to the ringbuffer.
|
||
|
// Once a Device has been stopped it cannot be started again. This is likely to change in future.
|
||
|
func (d *Device) Start() error {
|
||
|
d.mu.Lock()
|
||
|
mode := d.mode
|
||
|
d.mu.Unlock()
|
||
|
switch mode {
|
||
|
case paused:
|
||
|
d.mu.Lock()
|
||
|
d.mode = running
|
||
|
d.mu.Unlock()
|
||
|
return nil
|
||
|
case stopped:
|
||
|
// TODO(Trek): Make this reopen device and start recording.
|
||
|
return errors.New("device is stopped")
|
||
|
case running:
|
||
|
return nil
|
||
|
default:
|
||
|
return fmt.Errorf("invalid mode: %d", mode)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Stop will stop recording audio and close the device.
|
||
|
// Once a Device has been stopped it cannot be started again. This is likely to change in future.
|
||
|
func (d *Device) Stop() {
|
||
|
d.mu.Lock()
|
||
|
d.mode = stopped
|
||
|
d.mu.Unlock()
|
||
|
}
|
||
|
|
||
|
// ChunkSize returns the number of bytes written to the ringbuffer per d.RecPeriod.
|
||
|
func (d *Device) ChunkSize() int {
|
||
|
return d.chunkSize
|
||
|
}
|
||
|
|
||
|
// validate checks if Config parameters are valid and returns an error if they are not.
|
||
|
func validate(c *Config) error {
|
||
|
if c.SampleRate <= 0 {
|
||
|
return fmt.Errorf("invalid sample rate: %v", c.SampleRate)
|
||
|
}
|
||
|
if c.Channels <= 0 {
|
||
|
return fmt.Errorf("invalid number of channels: %v", c.Channels)
|
||
|
}
|
||
|
if c.BitDepth <= 0 {
|
||
|
return fmt.Errorf("invalid bitdepth: %v", c.BitDepth)
|
||
|
}
|
||
|
if c.RecPeriod <= 0 {
|
||
|
return fmt.Errorf("invalid recording period: %v", c.RecPeriod)
|
||
|
}
|
||
|
if !codecutil.IsValid(c.Codec) {
|
||
|
return errors.New("invalid codec")
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// open the recording device with the given name and prepare it to record.
|
||
|
// If name is empty, the first recording device is used.
|
||
|
func (d *Device) open() error {
|
||
|
// Close any existing device.
|
||
|
if d.dev != nil {
|
||
|
d.l.Log(logger.Debug, pkg+"closing device", "title", d.title)
|
||
|
d.dev.Close()
|
||
|
d.dev = nil
|
||
|
}
|
||
|
|
||
|
// Open sound card and open recording device.
|
||
|
d.l.Log(logger.Debug, pkg+"opening sound card")
|
||
|
cards, err := alsa.OpenCards()
|
||
|
if err != nil {
|
||
|
return OpenError(err)
|
||
|
}
|
||
|
defer alsa.CloseCards(cards)
|
||
|
|
||
|
d.l.Log(logger.Debug, pkg+"finding audio device")
|
||
|
for _, card := range cards {
|
||
|
devices, err := card.Devices()
|
||
|
if err != nil {
|
||
|
continue
|
||
|
}
|
||
|
for _, dev := range devices {
|
||
|
if dev.Type != alsa.PCM || !dev.Record {
|
||
|
continue
|
||
|
}
|
||
|
if dev.Title == d.title || d.title == "" {
|
||
|
d.dev = dev
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if d.dev == nil {
|
||
|
return OpenError(errors.New("no audio device found"))
|
||
|
}
|
||
|
|
||
|
d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title)
|
||
|
err = d.dev.Open()
|
||
|
if err != nil {
|
||
|
return OpenError(err)
|
||
|
}
|
||
|
|
||
|
// 2 channels is what most devices need to record in. If mono is requested,
|
||
|
// the recording will be converted in formatBuffer().
|
||
|
channels, err := d.dev.NegotiateChannels(2)
|
||
|
if err != nil {
|
||
|
return OpenError(err)
|
||
|
}
|
||
|
d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels)
|
||
|
|
||
|
// Try to negotiate a rate to record in that is divisible by the wanted rate
|
||
|
// so that it can be easily downsampled to the wanted rate.
|
||
|
// rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz).
|
||
|
// Note: if a card thinks it can record at a rate but can't actually, this can cause a failure.
|
||
|
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
|
||
|
// a fix for this is to remove 8000 and 16000 from the rates slice.
|
||
|
var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
|
||
|
|
||
|
var rate int
|
||
|
foundRate := false
|
||
|
for r := range rates {
|
||
|
if r < d.SampleRate {
|
||
|
continue
|
||
|
}
|
||
|
if r%d.SampleRate == 0 {
|
||
|
rate, err = d.dev.NegotiateRate(r)
|
||
|
if err == nil {
|
||
|
foundRate = true
|
||
|
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// If no easily divisible rate is found, then use the default rate.
|
||
|
if !foundRate {
|
||
|
d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate)
|
||
|
rate, err = d.dev.NegotiateRate(defaultSampleRate)
|
||
|
if err != nil {
|
||
|
return OpenError(err)
|
||
|
}
|
||
|
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
|
||
|
}
|
||
|
|
||
|
var aFmt alsa.FormatType
|
||
|
switch d.BitDepth {
|
||
|
case 16:
|
||
|
aFmt = alsa.S16_LE
|
||
|
case 32:
|
||
|
aFmt = alsa.S32_LE
|
||
|
default:
|
||
|
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
|
||
|
}
|
||
|
devFmt, err := d.dev.NegotiateFormat(aFmt)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
var bitdepth int
|
||
|
switch devFmt {
|
||
|
case alsa.S16_LE:
|
||
|
bitdepth = 16
|
||
|
case alsa.S32_LE:
|
||
|
bitdepth = 32
|
||
|
default:
|
||
|
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
|
||
|
}
|
||
|
d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth)
|
||
|
|
||
|
// A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed)
|
||
|
// Some devices only accept even period sizes while others want powers of 2.
|
||
|
// So we will find the closest power of 2 to the desired period size.
|
||
|
const wantPeriod = 0.05 //seconds
|
||
|
bytesPerSecond := rate * channels * (bitdepth / 8)
|
||
|
wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod)
|
||
|
nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize)
|
||
|
|
||
|
// At least two period sizes should fit within the buffer.
|
||
|
bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2)
|
||
|
if err != nil {
|
||
|
return OpenError(err)
|
||
|
}
|
||
|
d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize)
|
||
|
|
||
|
if err = d.dev.Prepare(); err != nil {
|
||
|
return OpenError(err)
|
||
|
}
|
||
|
|
||
|
d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params")
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// input continously records audio and writes it to the ringbuffer.
|
||
|
// Re-opens the device and tries again if ASLA returns an error.
|
||
|
func (d *Device) input() {
|
||
|
for {
|
||
|
// Check mode.
|
||
|
d.mu.Lock()
|
||
|
mode := d.mode
|
||
|
d.mu.Unlock()
|
||
|
switch mode {
|
||
|
case paused:
|
||
|
time.Sleep(time.Duration(d.RecPeriod) * time.Second)
|
||
|
continue
|
||
|
case stopped:
|
||
|
if d.dev != nil {
|
||
|
d.l.Log(logger.Debug, pkg+"closing audio device", "title", d.title)
|
||
|
d.dev.Close()
|
||
|
d.dev = nil
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Read from audio device.
|
||
|
d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod)
|
||
|
err := d.dev.Read(d.ab.Data)
|
||
|
if err != nil {
|
||
|
d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error())
|
||
|
err = d.open() // re-open
|
||
|
if err != nil {
|
||
|
d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error())
|
||
|
return
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// Process audio.
|
||
|
d.l.Log(logger.Debug, pkg+"processing audio")
|
||
|
toWrite := d.formatBuffer()
|
||
|
|
||
|
// Write audio to ringbuffer.
|
||
|
n, err := d.rb.Write(toWrite.Data)
|
||
|
switch err {
|
||
|
case nil:
|
||
|
d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n)
|
||
|
case ring.ErrDropped:
|
||
|
d.l.Log(logger.Warning, pkg+"old audio data overwritten")
|
||
|
default:
|
||
|
d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error())
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Read reads from the ringbuffer, returning the number of bytes read upon success.
|
||
|
func (d *Device) Read(p []byte) (int, error) {
|
||
|
// Ready ringbuffer for read.
|
||
|
_, err := d.rb.Next(rbNextTimeout)
|
||
|
if err != nil {
|
||
|
return 0, err
|
||
|
}
|
||
|
|
||
|
// Read from ring buffer.
|
||
|
return d.rb.Read(p)
|
||
|
}
|
||
|
|
||
|
// formatBuffer returns audio that has been converted to the desired format.
|
||
|
func (d *Device) formatBuffer() alsa.Buffer {
|
||
|
var err error
|
||
|
|
||
|
// If nothing needs to be changed, return the original.
|
||
|
if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate {
|
||
|
return d.ab
|
||
|
}
|
||
|
var formatted alsa.Buffer
|
||
|
if d.ab.Format.Channels != d.Channels {
|
||
|
// Convert channels.
|
||
|
// TODO(Trek): Make this work for conversions other than stereo to mono.
|
||
|
if d.ab.Format.Channels == 2 && d.Channels == 1 {
|
||
|
formatted, err = pcm.StereoToMono(d.ab)
|
||
|
if err != nil {
|
||
|
d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if d.ab.Format.Rate != d.SampleRate {
|
||
|
// Convert rate.
|
||
|
formatted, err = pcm.Resample(formatted, d.SampleRate)
|
||
|
if err != nil {
|
||
|
d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
switch d.Codec {
|
||
|
case codecutil.PCM:
|
||
|
case codecutil.ADPCM:
|
||
|
b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data))))
|
||
|
enc := adpcm.NewEncoder(b)
|
||
|
_, err = enc.Write(formatted.Data)
|
||
|
if err != nil {
|
||
|
d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error())
|
||
|
}
|
||
|
formatted.Data = b.Bytes()
|
||
|
default:
|
||
|
d.l.Log(logger.Error, pkg+"unhandled audio codec")
|
||
|
}
|
||
|
|
||
|
return formatted
|
||
|
}
|
||
|
|
||
|
// nearestPowerOfTwo finds and returns the nearest power of two to the given integer.
|
||
|
// If the lower and higher power of two are the same distance, it returns the higher power.
|
||
|
// For negative values, 1 is returned.
|
||
|
// Source: https://stackoverflow.com/a/45859570
|
||
|
func nearestPowerOfTwo(n int) int {
|
||
|
if n <= 0 {
|
||
|
return 1
|
||
|
}
|
||
|
if n == 1 {
|
||
|
return 2
|
||
|
}
|
||
|
v := n
|
||
|
v--
|
||
|
v |= v >> 1
|
||
|
v |= v >> 2
|
||
|
v |= v >> 4
|
||
|
v |= v >> 8
|
||
|
v |= v >> 16
|
||
|
v++ // higher power of 2
|
||
|
x := v >> 1 // lower power of 2
|
||
|
if (v - n) > (n - x) {
|
||
|
return x
|
||
|
}
|
||
|
return v
|
||
|
}
|