av/device/alsa/alsa.go

465 lines
13 KiB
Go

/*
NAME
alsa.go
AUTHOR
Alan Noble <alan@ausocean.org>
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see http://www.gnu.org/licenses.
*/
// Package alsa provides access to input from ALSA audio devices.
package alsa
import (
"bytes"
"errors"
"fmt"
"sync"
"time"
yalsa "github.com/yobert/alsa"
"bitbucket.org/ausocean/av/codec/adpcm"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/codec/pcm"
"bitbucket.org/ausocean/av/device"
"bitbucket.org/ausocean/av/revid/config"
"bitbucket.org/ausocean/utils/logger"
"bitbucket.org/ausocean/utils/ring"
)
const (
pkg = "alsa: "
rbTimeout = 100 * time.Millisecond
rbNextTimeout = 2000 * time.Millisecond
rbLen = 200
defaultSampleRate = 48000
)
// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer.
// "paused" means the input routine is sleeping until unpaused or stopped.
// "stopped" means the input routine is stopped and the ALSA device is closed.
const (
running = iota + 1
paused
stopped
)
// An ALSA device holds everything we need to know about the audio input stream and implements io.Reader and device.AVDevice.
type ALSA struct {
l Logger // Logger for device's routines to log to.
mode uint8 // Operating mode, either running, paused, or stopped.
mu sync.Mutex // Provides synchronisation when changing modes concurrently.
title string // Name of audio title, or empty for the default title.
dev *yalsa.Device // ALSA device's Audio input device.
pb pcm.Buffer // Buffer to contain the direct audio from ALSA.
rb *ring.Buffer // Ring buffer to contain processed audio ready to be read.
Config // Configuration parameters for this device.
}
// Config provides parameters used by the ALSA device.
type Config struct {
SampleRate int
Channels int
BitDepth int
RecPeriod float64
Codec uint8
}
// Logger enables any implementation of a logger to be used.
// TODO: Make this part of the logger package.
type Logger interface {
SetLevel(int8)
Log(level int8, message string, params ...interface{})
}
// OpenError is used to determine whether an error has originated from attempting to open a device.
type OpenError error
// New initializes and returns an ALSA device which has its logger set as the given logger.
func New(l Logger) *ALSA { return &ALSA{l: l} }
// Name returns the name of the device.
func (d *ALSA) Name() string {
return "ALSA"
}
// Set will take a Config struct, check the validity of the relevant fields
// and then performs any configuration necessary. If fields are not valid,
// an error is added to the multiError and a default value is used.
// It then initialises the ALSA device which can then be started, read from, and stopped.
func (d *ALSA) Set(c config.Config) error {
var errs device.MultiError
if c.SampleRate <= 0 {
errs = append(errs, fmt.Errorf("invalid sample rate: %v", c.SampleRate))
}
if c.Channels <= 0 {
errs = append(errs, fmt.Errorf("invalid number of channels: %v", c.Channels))
}
if c.BitDepth <= 0 {
errs = append(errs, fmt.Errorf("invalid bitdepth: %v", c.BitDepth))
}
if c.RecPeriod <= 0 {
errs = append(errs, fmt.Errorf("invalid recording period: %v", c.RecPeriod))
}
if !codecutil.IsValid(c.InputCodec) {
errs = append(errs, errors.New("invalid codec"))
}
d.Config = Config{
SampleRate: c.SampleRate,
Channels: c.Channels,
BitDepth: c.BitDepth,
RecPeriod: c.RecPeriod,
Codec: c.InputCodec,
}
// Open the requested audio device.
err := d.open()
if err != nil {
d.l.Log(logger.Error, pkg+"failed to open device")
return err
}
// Setup the device to record with desired period.
ab := d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
sf, err := pcm.SFFromString(ab.Format.SampleFormat.String())
if err != nil {
d.l.Log(logger.Error, pkg+err.Error())
return err
}
cf := pcm.BufferFormat{
SFormat: sf,
Channels: ab.Format.Channels,
Rate: ab.Format.Rate,
}
d.pb = pcm.Buffer{
Format: cf,
Data: ab.Data,
}
// Create ring buffer with appropriate chunk size.
cs := pcm.DataSize(d.SampleRate, d.Channels, d.BitDepth, d.RecPeriod, 0)
d.rb = ring.NewBuffer(rbLen, cs, rbTimeout)
// Start device in paused mode.
d.mode = paused
go d.input()
return nil
}
// Start will start recording audio and writing to the ringbuffer.
// Once an ALSA device has been stopped it cannot be started again. This is likely to change in future.
func (d *ALSA) Start() error {
d.mu.Lock()
mode := d.mode
d.mu.Unlock()
switch mode {
case paused:
d.mu.Lock()
d.mode = running
d.mu.Unlock()
return nil
case stopped:
// TODO(Trek): Make this reopen device and start recording.
return errors.New("device is stopped")
case running:
return nil
default:
return fmt.Errorf("invalid mode: %d", mode)
}
}
// Stop will stop recording audio and close the device.
// Once an ALSA device has been stopped it cannot be started again. This is likely to change in future.
func (d *ALSA) Stop() error {
d.mu.Lock()
d.mode = stopped
d.mu.Unlock()
return nil
}
// open the recording device with the given name and prepare it to record.
// If name is empty, the first recording device is used.
func (d *ALSA) open() error {
// Close any existing device.
if d.dev != nil {
d.l.Log(logger.Debug, pkg+"closing device", "title", d.title)
d.dev.Close()
d.dev = nil
}
// Open sound card and open recording device.
d.l.Log(logger.Debug, pkg+"opening sound card")
cards, err := yalsa.OpenCards()
if err != nil {
return OpenError(err)
}
defer yalsa.CloseCards(cards)
d.l.Log(logger.Debug, pkg+"finding audio device")
for _, card := range cards {
devices, err := card.Devices()
if err != nil {
continue
}
for _, dev := range devices {
if dev.Type != yalsa.PCM || !dev.Record {
continue
}
if dev.Title == d.title || d.title == "" {
d.dev = dev
break
}
}
}
if d.dev == nil {
return OpenError(errors.New("no ALSA device found"))
}
d.l.Log(logger.Debug, pkg+"opening ALSA device", "title", d.dev.Title)
err = d.dev.Open()
if err != nil {
return OpenError(err)
}
// 2 channels is what most devices need to record in. If mono is requested,
// the recording will be converted in formatBuffer().
channels, err := d.dev.NegotiateChannels(2)
if err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels)
// Try to negotiate a rate to record in that is divisible by the wanted rate
// so that it can be easily downsampled to the wanted rate.
// rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz).
// Note: if a card thinks it can record at a rate but can't actually, this can cause a failure.
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
// a fix for this is to remove 8000 and 16000 from the rates slice.
var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
var rate int
foundRate := false
for r := range rates {
if r < d.SampleRate {
continue
}
if r%d.SampleRate == 0 {
rate, err = d.dev.NegotiateRate(r)
if err == nil {
foundRate = true
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
break
}
}
}
// If no easily divisible rate is found, then use the default rate.
if !foundRate {
d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate)
rate, err = d.dev.NegotiateRate(defaultSampleRate)
if err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
}
var aFmt yalsa.FormatType
switch d.BitDepth {
case 16:
aFmt = yalsa.S16_LE
case 32:
aFmt = yalsa.S32_LE
default:
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
}
devFmt, err := d.dev.NegotiateFormat(aFmt)
if err != nil {
return err
}
var bitdepth int
switch devFmt {
case yalsa.S16_LE:
bitdepth = 16
case yalsa.S32_LE:
bitdepth = 32
default:
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
}
d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth)
// A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed)
// Some devices only accept even period sizes while others want powers of 2.
// So we will find the closest power of 2 to the desired period size.
const wantPeriod = 0.05 //seconds
bytesPerSecond := rate * channels * (bitdepth / 8)
wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod)
nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize)
// At least two period sizes should fit within the buffer.
bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2)
if err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize)
if err = d.dev.Prepare(); err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"successfully negotiated device params")
return nil
}
// input continously records audio and writes it to the ringbuffer.
// Re-opens the device and tries again if the ASLA device returns an error.
func (d *ALSA) input() {
for {
// Check mode.
d.mu.Lock()
mode := d.mode
d.mu.Unlock()
switch mode {
case paused:
time.Sleep(time.Duration(d.RecPeriod) * time.Second)
continue
case stopped:
if d.dev != nil {
d.l.Log(logger.Debug, pkg+"closing ALSA device", "title", d.title)
d.dev.Close()
d.dev = nil
}
return
}
// Read from audio device.
d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod)
err := d.dev.Read(d.pb.Data)
if err != nil {
d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error())
err = d.open() // re-open
if err != nil {
d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error())
return
}
continue
}
// Process audio.
d.l.Log(logger.Debug, pkg+"processing audio")
toWrite := d.formatBuffer()
// Write audio to ringbuffer.
n, err := d.rb.Write(toWrite.Data)
switch err {
case nil:
d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n)
case ring.ErrDropped:
d.l.Log(logger.Warning, pkg+"old audio data overwritten")
default:
d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error())
return
}
}
}
// Read reads from the ringbuffer, returning the number of bytes read upon success.
func (d *ALSA) Read(p []byte) (int, error) {
// Ready ringbuffer for read.
_, err := d.rb.Next(rbNextTimeout)
if err != nil {
return 0, err
}
// Read from ring buffer.
return d.rb.Read(p)
}
// formatBuffer returns audio that has been converted to the desired format.
func (d *ALSA) formatBuffer() pcm.Buffer {
var err error
// If nothing needs to be changed, return the original.
if d.pb.Format.Channels == d.Channels && d.pb.Format.Rate == d.SampleRate {
return d.pb
}
var formatted pcm.Buffer
if d.pb.Format.Channels != d.Channels {
// Convert channels.
// TODO(Trek): Make this work for conversions other than stereo to mono.
if d.pb.Format.Channels == 2 && d.Channels == 1 {
formatted, err = pcm.StereoToMono(d.pb)
if err != nil {
d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
}
}
}
if d.pb.Format.Rate != d.SampleRate {
// Convert rate.
formatted, err = pcm.Resample(formatted, d.SampleRate)
if err != nil {
d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
}
}
switch d.Codec {
case codecutil.PCM:
case codecutil.ADPCM:
b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data))))
enc := adpcm.NewEncoder(b)
_, err = enc.Write(formatted.Data)
if err != nil {
d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error())
}
formatted.Data = b.Bytes()
default:
d.l.Log(logger.Error, pkg+"unhandled audio codec")
}
return formatted
}
// nearestPowerOfTwo finds and returns the nearest power of two to the given integer.
// If the lower and higher power of two are the same distance, it returns the higher power.
// For negative values, 1 is returned.
// Source: https://stackoverflow.com/a/45859570
func nearestPowerOfTwo(n int) int {
if n <= 0 {
return 1
}
if n == 1 {
return 2
}
v := n
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v++ // higher power of 2
x := v >> 1 // lower power of 2
if (v - n) > (n - x) {
return x
}
return v
}