diff --git a/device/audio/audio.go b/device/audio/audio.go new file mode 100644 index 00000000..a356ea48 --- /dev/null +++ b/device/audio/audio.go @@ -0,0 +1,490 @@ +/* +NAME + audio.go + +AUTHOR + Alan Noble + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +// Package audio provides access to input from audio devices. +package audio + +import ( + "bytes" + "errors" + "fmt" + "sync" + "time" + + "github.com/yobert/alsa" + + "bitbucket.org/ausocean/av/codec/adpcm" + "bitbucket.org/ausocean/av/codec/codecutil" + "bitbucket.org/ausocean/av/codec/pcm" + "bitbucket.org/ausocean/av/device" + "bitbucket.org/ausocean/av/revid/config" + "bitbucket.org/ausocean/utils/logger" + "bitbucket.org/ausocean/utils/ring" +) + +const ( + pkg = "audio: " + rbTimeout = 100 * time.Millisecond + rbNextTimeout = 100 * time.Millisecond + rbLen = 200 + defaultSampleRate = 48000 +) + +// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. +// "paused" means the input routine is sleeping until unpaused or stopped. +// "stopped" means the input routine is stopped and the ALSA device is closed. +const ( + running = iota + 1 + paused + stopped +) + +// Device holds everything we need to know about the audio input stream and implements io.Reader. +type Device struct { + l Logger // Logger for device's routines to log to. + mode uint8 // Operating mode, either running, paused, or stopped. + mu sync.Mutex // Provides synchronisation when changing modes concurrently. + title string // Name of audio title, or empty for the default title. + dev *alsa.Device // ALSA's Audio input device. + ab alsa.Buffer // ALSA's buffer. + rb *ring.Buffer // Our buffer. + chunkSize int // This is the number of bytes that will be stored in rb at a time. + Config // Configuration parameters for this device. +} + +// Config provides parameters used by Device. +type Config struct { + SampleRate int + Channels int + BitDepth int + RecPeriod float64 + Codec uint8 +} + +// Logger enables any implementation of a logger to be used. +// TODO: Make this part of the logger package. +type Logger interface { + SetLevel(int8) + Log(level int8, message string, params ...interface{}) +} + +// OpenError is used to determine whether an error has originated from attempting to open a device. +type OpenError error + +// NewDevice initializes and returns a Device which has its logger set as the given logger. +func NewDevice(l Logger) *Device { + d := &Device{ + l: l, + } + return d +} + +// Set will take a Config struct, check the validity of the relevant fields +// and then performs any configuration necessary. If fields are not valid, +// an error is added to the multiError and a default value is used. +// It then initialises the Device which can then be started, read from, and stopped. +func (d *Device) Set(c config.Config) error { + var errs device.MultiError + if c.SampleRate <= 0 { + errs = append(errs, fmt.Errorf("invalid sample rate: %v", c.SampleRate)) + } + if c.Channels <= 0 { + errs = append(errs, fmt.Errorf("invalid number of channels: %v", c.Channels)) + } + if c.BitDepth <= 0 { + errs = append(errs, fmt.Errorf("invalid bitdepth: %v", c.BitDepth)) + } + if c.RecPeriod <= 0 { + errs = append(errs, fmt.Errorf("invalid recording period: %v", c.RecPeriod)) + } + if !codecutil.IsValid(c.InputCodec) { + errs = append(errs, errors.New("invalid codec")) + } + d.Config = Config{ + SampleRate: c.SampleRate, + Channels: c.Channels, + BitDepth: c.BitDepth, + RecPeriod: c.RecPeriod, + Codec: c.InputCodec, + } + + // Open the requested audio device. + err := d.open() + if err != nil { + d.l.Log(logger.Error, pkg+"failed to open device") + return err + } + + // Setup the device to record with desired period. + d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) + + // Account for channel conversion. + chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels) + + // Account for resampling. + chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) + if chunkSize < 1 { + return errors.New("given Config parameters are too small") + } + + // Account for codec conversion. + if d.Codec == codecutil.ADPCM { + d.chunkSize = adpcm.EncBytes(int(chunkSize)) + } else { + d.chunkSize = int(chunkSize) + } + + // Create ring buffer with appropriate chunk size. + d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout) + + // Start device in paused mode. + d.mode = paused + go d.input() + + return nil +} + +// Start will start recording audio and writing to the ringbuffer. +// Once a Device has been stopped it cannot be started again. This is likely to change in future. +func (d *Device) Start() error { + d.mu.Lock() + mode := d.mode + d.mu.Unlock() + switch mode { + case paused: + d.mu.Lock() + d.mode = running + d.mu.Unlock() + return nil + case stopped: + // TODO(Trek): Make this reopen device and start recording. + return errors.New("device is stopped") + case running: + return nil + default: + return fmt.Errorf("invalid mode: %d", mode) + } +} + +// Stop will stop recording audio and close the device. +// Once a Device has been stopped it cannot be started again. This is likely to change in future. +func (d *Device) Stop() { + d.mu.Lock() + d.mode = stopped + d.mu.Unlock() +} + +// ChunkSize returns the number of bytes written to the ringbuffer per d.RecPeriod. +func (d *Device) ChunkSize() int { + return d.chunkSize +} + +// validate checks if Config parameters are valid and returns an error if they are not. +func validate(c *Config) error { + if c.SampleRate <= 0 { + return fmt.Errorf("invalid sample rate: %v", c.SampleRate) + } + if c.Channels <= 0 { + return fmt.Errorf("invalid number of channels: %v", c.Channels) + } + if c.BitDepth <= 0 { + return fmt.Errorf("invalid bitdepth: %v", c.BitDepth) + } + if c.RecPeriod <= 0 { + return fmt.Errorf("invalid recording period: %v", c.RecPeriod) + } + if !codecutil.IsValid(c.Codec) { + return errors.New("invalid codec") + } + return nil +} + +// open the recording device with the given name and prepare it to record. +// If name is empty, the first recording device is used. +func (d *Device) open() error { + // Close any existing device. + if d.dev != nil { + d.l.Log(logger.Debug, pkg+"closing device", "title", d.title) + d.dev.Close() + d.dev = nil + } + + // Open sound card and open recording device. + d.l.Log(logger.Debug, pkg+"opening sound card") + cards, err := alsa.OpenCards() + if err != nil { + return OpenError(err) + } + defer alsa.CloseCards(cards) + + d.l.Log(logger.Debug, pkg+"finding audio device") + for _, card := range cards { + devices, err := card.Devices() + if err != nil { + continue + } + for _, dev := range devices { + if dev.Type != alsa.PCM || !dev.Record { + continue + } + if dev.Title == d.title || d.title == "" { + d.dev = dev + break + } + } + } + if d.dev == nil { + return OpenError(errors.New("no audio device found")) + } + + d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title) + err = d.dev.Open() + if err != nil { + return OpenError(err) + } + + // 2 channels is what most devices need to record in. If mono is requested, + // the recording will be converted in formatBuffer(). + channels, err := d.dev.NegotiateChannels(2) + if err != nil { + return OpenError(err) + } + d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels) + + // Try to negotiate a rate to record in that is divisible by the wanted rate + // so that it can be easily downsampled to the wanted rate. + // rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz). + // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. + // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, + // a fix for this is to remove 8000 and 16000 from the rates slice. + var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} + + var rate int + foundRate := false + for r := range rates { + if r < d.SampleRate { + continue + } + if r%d.SampleRate == 0 { + rate, err = d.dev.NegotiateRate(r) + if err == nil { + foundRate = true + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) + break + } + } + } + + // If no easily divisible rate is found, then use the default rate. + if !foundRate { + d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) + rate, err = d.dev.NegotiateRate(defaultSampleRate) + if err != nil { + return OpenError(err) + } + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) + } + + var aFmt alsa.FormatType + switch d.BitDepth { + case 16: + aFmt = alsa.S16_LE + case 32: + aFmt = alsa.S32_LE + default: + return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth)) + } + devFmt, err := d.dev.NegotiateFormat(aFmt) + if err != nil { + return err + } + var bitdepth int + switch devFmt { + case alsa.S16_LE: + bitdepth = 16 + case alsa.S32_LE: + bitdepth = 32 + default: + return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth)) + } + d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth) + + // A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed) + // Some devices only accept even period sizes while others want powers of 2. + // So we will find the closest power of 2 to the desired period size. + const wantPeriod = 0.05 //seconds + bytesPerSecond := rate * channels * (bitdepth / 8) + wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod) + nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) + + // At least two period sizes should fit within the buffer. + bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2) + if err != nil { + return OpenError(err) + } + d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize) + + if err = d.dev.Prepare(); err != nil { + return OpenError(err) + } + + d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params") + return nil +} + +// input continously records audio and writes it to the ringbuffer. +// Re-opens the device and tries again if ASLA returns an error. +func (d *Device) input() { + for { + // Check mode. + d.mu.Lock() + mode := d.mode + d.mu.Unlock() + switch mode { + case paused: + time.Sleep(time.Duration(d.RecPeriod) * time.Second) + continue + case stopped: + if d.dev != nil { + d.l.Log(logger.Debug, pkg+"closing audio device", "title", d.title) + d.dev.Close() + d.dev = nil + } + return + } + + // Read from audio device. + d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod) + err := d.dev.Read(d.ab.Data) + if err != nil { + d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error()) + err = d.open() // re-open + if err != nil { + d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error()) + return + } + continue + } + + // Process audio. + d.l.Log(logger.Debug, pkg+"processing audio") + toWrite := d.formatBuffer() + + // Write audio to ringbuffer. + n, err := d.rb.Write(toWrite.Data) + switch err { + case nil: + d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n) + case ring.ErrDropped: + d.l.Log(logger.Warning, pkg+"old audio data overwritten") + default: + d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error()) + return + } + } +} + +// Read reads from the ringbuffer, returning the number of bytes read upon success. +func (d *Device) Read(p []byte) (int, error) { + // Ready ringbuffer for read. + _, err := d.rb.Next(rbNextTimeout) + if err != nil { + return 0, err + } + + // Read from ring buffer. + return d.rb.Read(p) +} + +// formatBuffer returns audio that has been converted to the desired format. +func (d *Device) formatBuffer() alsa.Buffer { + var err error + + // If nothing needs to be changed, return the original. + if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate { + return d.ab + } + var formatted alsa.Buffer + if d.ab.Format.Channels != d.Channels { + // Convert channels. + // TODO(Trek): Make this work for conversions other than stereo to mono. + if d.ab.Format.Channels == 2 && d.Channels == 1 { + formatted, err = pcm.StereoToMono(d.ab) + if err != nil { + d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) + } + } + } + + if d.ab.Format.Rate != d.SampleRate { + // Convert rate. + formatted, err = pcm.Resample(formatted, d.SampleRate) + if err != nil { + d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) + } + } + + switch d.Codec { + case codecutil.PCM: + case codecutil.ADPCM: + b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data)))) + enc := adpcm.NewEncoder(b) + _, err = enc.Write(formatted.Data) + if err != nil { + d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error()) + } + formatted.Data = b.Bytes() + default: + d.l.Log(logger.Error, pkg+"unhandled audio codec") + } + + return formatted +} + +// nearestPowerOfTwo finds and returns the nearest power of two to the given integer. +// If the lower and higher power of two are the same distance, it returns the higher power. +// For negative values, 1 is returned. +// Source: https://stackoverflow.com/a/45859570 +func nearestPowerOfTwo(n int) int { + if n <= 0 { + return 1 + } + if n == 1 { + return 2 + } + v := n + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ // higher power of 2 + x := v >> 1 // lower power of 2 + if (v - n) > (n - x) { + return x + } + return v +} diff --git a/device/audio/audio_test.go b/device/audio/audio_test.go new file mode 100644 index 00000000..f27a5659 --- /dev/null +++ b/device/audio/audio_test.go @@ -0,0 +1,107 @@ +/* +NAME + audio_test.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package audio + +import ( + "io/ioutil" + "os" + "strconv" + "testing" + "time" + + "bitbucket.org/ausocean/av/codec/codecutil" + "bitbucket.org/ausocean/av/revid/config" + "bitbucket.org/ausocean/utils/logger" +) + +func TestDevice(t *testing.T) { + // We want to open a device with a standard configuration. + c := config.Config{ + SampleRate: 8000, + Channels: 1, + RecPeriod: 0.3, + BitDepth: 16, + InputCodec: codecutil.ADPCM, + } + n := 2 // Number of periods to wait while recording. + + // Create a new audio Device, start, read/lex, and then stop it. + l := logger.New(logger.Debug, os.Stderr, true) + ai := NewDevice(l) + err := ai.Set(c) + // If there was an error opening the device, skip this test. + if _, ok := err.(OpenError); ok { + t.Skip(err) + } + // For any other error, report it. + if err != nil { + t.Error(err) + } + err = ai.Start() + if err != nil { + t.Error(err) + } + chunkSize := ai.ChunkSize() + lexer := codecutil.NewByteLexer(&chunkSize) + go lexer.Lex(ioutil.Discard, ai, time.Duration(c.RecPeriod*float64(time.Second))) + time.Sleep(time.Duration(c.RecPeriod*float64(time.Second)) * time.Duration(n)) + ai.Stop() +} + +var powerTests = []struct { + in int + out int +}{ + {36, 32}, + {47, 32}, + {3, 4}, + {46, 32}, + {7, 8}, + {2, 2}, + {36, 32}, + {757, 512}, + {2464, 2048}, + {18980, 16384}, + {70000, 65536}, + {8192, 8192}, + {2048, 2048}, + {65536, 65536}, + {-2048, 1}, + {-127, 1}, + {-1, 1}, + {0, 1}, + {1, 2}, +} + +func TestNearestPowerOfTwo(t *testing.T) { + for _, tt := range powerTests { + t.Run(strconv.Itoa(tt.in), func(t *testing.T) { + v := nearestPowerOfTwo(tt.in) + if v != tt.out { + t.Errorf("got %v, want %v", v, tt.out) + } + }) + } +}