/* NAME alsa.go AUTHOR Alan Noble Trek Hopton LICENSE This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) It is free software: you can redistribute it and/or modify them under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License in gpl.txt. If not, see http://www.gnu.org/licenses. */ // Package alsa provides access to input from ALSA audio devices. package alsa import ( "bytes" "errors" "fmt" "sync" "time" yalsa "github.com/yobert/alsa" "bitbucket.org/ausocean/av/codec/adpcm" "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/av/codec/pcm" "bitbucket.org/ausocean/av/device" "bitbucket.org/ausocean/av/revid/config" "bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/pool" ) const ( pkg = "alsa: " rbTimeout = 100 * time.Millisecond rbNextTimeout = 2000 * time.Millisecond rbLen = 200 ) // "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. // "paused" means the input routine is sleeping until unpaused or stopped. // "stopped" means the input routine is stopped and the ALSA device is closed. const ( running = iota + 1 paused stopped ) const ( defaultSampleRate = 48000 defaultBitDepth = 16 defaultChannels = 1 defaultRecPeriod = 1.0 defaultCodec = codecutil.PCM ) // Configuration field errors. var ( errInvalidSampleRate = errors.New("invalid sample rate, defaulting") errInvalidChannels = errors.New("invalid number of channels, defaulting") errInvalidBitDepth = errors.New("invalid bitdepth, defaulting") errInvalidRecPeriod = errors.New("invalid record period, defaulting") errInvalidCodec = errors.New("invalid audio codec, defaulting") ) // An ALSA device holds everything we need to know about the audio input stream and implements io.Reader and device.AVDevice. type ALSA struct { l Logger // Logger for device's routines to log to. mode uint8 // Operating mode, either running, paused, or stopped. mu sync.Mutex // Provides synchronisation when changing modes concurrently. title string // Name of audio title, or empty for the default title. dev *yalsa.Device // ALSA device's Audio input device. pb pcm.Buffer // Buffer to contain the direct audio from ALSA. buf *pool.Buffer // Ring buffer to contain processed audio ready to be read. Config // Configuration parameters for this device. } // Config provides parameters used by the ALSA device. type Config struct { SampleRate uint Channels uint BitDepth uint RecPeriod float64 Codec string } // Logger enables any implementation of a logger to be used. // TODO: Make this part of the logger package. type Logger interface { SetLevel(int8) Log(level int8, message string, params ...interface{}) } // New initializes and returns an ALSA device which has its logger set as the given logger. func New(l Logger) *ALSA { return &ALSA{l: l} } // Name returns the name of the device. func (d *ALSA) Name() string { return "ALSA" } // Setup will take a Config struct, check the validity of the relevant fields // and then perform any configuration necessary. If fields are not valid, // an error is added to the multiError and a default value is used. // It then initialises the ALSA device which can then be started, read from, and stopped. func (d *ALSA) Setup(c config.Config) error { var errs device.MultiError if c.SampleRate <= 0 { errs = append(errs, errInvalidSampleRate) c.SampleRate = defaultSampleRate } if c.Channels <= 0 { errs = append(errs, errInvalidChannels) c.Channels = defaultChannels } if c.BitDepth <= 0 { errs = append(errs, errInvalidBitDepth) c.BitDepth = defaultBitDepth } if c.RecPeriod <= 0 { errs = append(errs, errInvalidRecPeriod) c.RecPeriod = defaultRecPeriod } if c.InputCodec != codecutil.ADPCM && c.InputCodec != codecutil.PCM { errs = append(errs, errInvalidCodec) c.InputCodec = defaultCodec } d.Config = Config{ SampleRate: c.SampleRate, Channels: c.Channels, BitDepth: c.BitDepth, RecPeriod: c.RecPeriod, Codec: c.InputCodec, } // Open the requested audio device. err := d.open() if err != nil { return fmt.Errorf("failed to open device: %w", err) } // Setup the device to record with desired period. ab := d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) sf, err := pcm.SFFromString(ab.Format.SampleFormat.String()) if err != nil { return fmt.Errorf("unable to get sample format from string: %w", err) } cf := pcm.BufferFormat{ SFormat: sf, Channels: uint(ab.Format.Channels), Rate: uint(ab.Format.Rate), } d.pb = pcm.Buffer{ Format: cf, Data: ab.Data, } // Create pool buffer with appropriate chunk size. cs := d.DataSize() d.buf = pool.NewBuffer(rbLen, cs, rbTimeout) // Start device in paused mode. d.mode = paused go d.input() if len(errs) != 0 { return errs } return nil } // Set exists to satisfy the implementation of the Device interface that revid uses. // Everything that would usually be in Set is in the Setup function. // This is because an ALSA device is different to other devices in that it // outputs binary non-packetised data and it requires a different configuration procedure. func (d *ALSA) Set(c config.Config) error { return nil } // Start will start recording audio and writing to the ringbuffer. // Once an ALSA device has been stopped it cannot be started again. This is likely to change in future. func (d *ALSA) Start() error { d.mu.Lock() mode := d.mode d.mu.Unlock() switch mode { case paused: d.mu.Lock() d.mode = running d.mu.Unlock() return nil case stopped: // TODO(Trek): Make this reopen device and start recording. return errors.New("device is stopped") case running: return nil default: return fmt.Errorf("invalid mode: %d", mode) } } // Stop will stop recording audio and close the device. // Once an ALSA device has been stopped it cannot be started again. This is likely to change in future. func (d *ALSA) Stop() error { d.mu.Lock() d.mode = stopped d.mu.Unlock() return nil } // open the recording device with the given name and prepare it to record. // If name is empty, the first recording device is used. func (d *ALSA) open() error { // Close any existing device. if d.dev != nil { d.l.Log(logger.Debug, "closing device", "title", d.title) d.dev.Close() d.dev = nil } // Open sound card and open recording device. d.l.Log(logger.Debug, "opening sound card") cards, err := yalsa.OpenCards() if err != nil { return err } defer yalsa.CloseCards(cards) d.l.Log(logger.Debug, "finding audio device") for _, card := range cards { devices, err := card.Devices() if err != nil { continue } for _, dev := range devices { if dev.Type != yalsa.PCM || !dev.Record { continue } if dev.Title == d.title || d.title == "" { d.dev = dev break } } } if d.dev == nil { return errors.New("no ALSA device found") } d.l.Log(logger.Debug, "opening ALSA device", "title", d.dev.Title) err = d.dev.Open() if err != nil { return err } // Try to configure device with chosen channels. channels, err := d.dev.NegotiateChannels(int(d.Channels)) if err != nil && d.Channels == 1 { d.l.Log(logger.Info, "device is unable to record in mono, trying stereo", "error", err) channels, err = d.dev.NegotiateChannels(2) } if err != nil { return fmt.Errorf("device is unable to record with requested number of channels: %w", err) } d.l.Log(logger.Debug, "alsa device channels set", "channels", channels) // Try to negotiate a rate to record in that is divisible by the wanted rate // so that it can be easily downsampled to the wanted rate. // rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz). // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, // a fix for this is to remove 8000 and 16000 from the rates slice. var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} var rate int foundRate := false for r := range rates { if r < int(d.SampleRate) { continue } if r%int(d.SampleRate) == 0 { rate, err = d.dev.NegotiateRate(r) if err == nil { foundRate = true d.l.Log(logger.Debug, "alsa device sample rate set", "rate", rate) break } } } // If no easily divisible rate is found, then use the default rate. if !foundRate { d.l.Log(logger.Warning, "unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) rate, err = d.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } d.l.Log(logger.Debug, "alsa device sample rate set", "rate", rate) } var aFmt yalsa.FormatType switch d.BitDepth { case 16: aFmt = yalsa.S16_LE case 32: aFmt = yalsa.S32_LE default: return fmt.Errorf("unsupported sample bits %v", d.BitDepth) } devFmt, err := d.dev.NegotiateFormat(aFmt) if err != nil { return err } var bitdepth int switch devFmt { case yalsa.S16_LE: bitdepth = 16 case yalsa.S32_LE: bitdepth = 32 default: return fmt.Errorf("unsupported sample bits %v", d.BitDepth) } d.l.Log(logger.Debug, "alsa device bit depth set", "bitdepth", bitdepth) // A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed) // Some devices only accept even period sizes while others want powers of 2. // So we will find the closest power of 2 to the desired period size. const wantPeriod = 0.05 //seconds bytesPerSecond := rate * channels * (bitdepth / 8) wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod) nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) // At least two period sizes should fit within the buffer. bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2) if err != nil { return err } d.l.Log(logger.Debug, "alsa device buffer size set", "buffersize", bufSize) if err = d.dev.Prepare(); err != nil { return err } d.l.Log(logger.Debug, "successfully negotiated device params") return nil } // input continously records audio and writes it to the ringbuffer. // Re-opens the device and tries again if the ASLA device returns an error. func (d *ALSA) input() { for { // Check mode. d.mu.Lock() mode := d.mode d.mu.Unlock() switch mode { case paused: time.Sleep(time.Duration(d.RecPeriod) * time.Second) continue case stopped: if d.dev != nil { d.l.Log(logger.Debug, "closing ALSA device", "title", d.title) d.dev.Close() d.dev = nil } return } // Read from audio device. d.l.Log(logger.Debug, "recording audio for period", "seconds", d.RecPeriod) err := d.dev.Read(d.pb.Data) if err != nil { d.l.Log(logger.Debug, "read failed", "error", err.Error()) err = d.open() // re-open if err != nil { d.l.Log(logger.Fatal, "reopening device failed", "error", err.Error()) return } continue } // Process audio. d.l.Log(logger.Debug, "processing audio") toWrite := d.formatBuffer() // Write audio to ringbuffer. n, err := d.buf.Write(toWrite.Data) switch err { case nil: d.l.Log(logger.Debug, "wrote audio to ringbuffer", "length", n) case pool.ErrDropped: d.l.Log(logger.Warning, "old audio data overwritten") default: d.l.Log(logger.Error, "unexpected ringbuffer error", "error", err.Error()) return } } } // Read reads from the ringbuffer, returning the number of bytes read upon success. func (d *ALSA) Read(p []byte) (int, error) { // Ready ringbuffer for read. _, err := d.buf.Next(rbNextTimeout) if err != nil { return 0, err } // Read from pool buffer. return d.buf.Read(p) } // formatBuffer returns audio that has been converted to the desired format. func (d *ALSA) formatBuffer() pcm.Buffer { var err error // If nothing needs to be changed, return the original. if d.pb.Format.Channels == d.Channels && d.pb.Format.Rate == d.SampleRate { return d.pb } var formatted pcm.Buffer if d.pb.Format.Channels != d.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. if d.pb.Format.Channels == 2 && d.Channels == 1 { formatted, err = pcm.StereoToMono(d.pb) if err != nil { d.l.Log(logger.Fatal, "channel conversion failed", "error", err.Error()) } } } if d.pb.Format.Rate != d.SampleRate { // Convert rate. formatted, err = pcm.Resample(formatted, d.SampleRate) if err != nil { d.l.Log(logger.Fatal, "rate conversion failed", "error", err.Error()) } } switch d.Codec { case codecutil.PCM: case codecutil.ADPCM: b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data)))) enc := adpcm.NewEncoder(b) _, err = enc.Write(formatted.Data) if err != nil { d.l.Log(logger.Fatal, "unable to encode", "error", err.Error()) } formatted.Data = b.Bytes() default: d.l.Log(logger.Error, "unhandled audio codec") } return formatted } // DataSize returns the size in bytes of the data ALSA device d will // output in the duration of a single recording period. func (d *ALSA) DataSize() int { s := pcm.DataSize(d.SampleRate, d.Channels, d.BitDepth, d.RecPeriod) if d.Codec == codecutil.ADPCM { s = adpcm.EncBytes(s) } return s } // nearestPowerOfTwo finds and returns the nearest power of two to the given integer. // If the lower and higher power of two are the same distance, it returns the higher power. // For negative values, 1 is returned. // Source: https://stackoverflow.com/a/45859570 func nearestPowerOfTwo(n int) int { if n <= 0 { return 1 } if n == 1 { return 2 } v := n v-- v |= v >> 1 v |= v >> 2 v |= v >> 4 v |= v >> 8 v |= v >> 16 v++ // higher power of 2 x := v >> 1 // lower power of 2 if (v - n) > (n - x) { return x } return v } // IsRunning is used to determine if the ALSA device is running. func (d *ALSA) IsRunning() bool { return d.mode == running }