From 9fe09255bef40873d1769cef646099c4128c25a8 Mon Sep 17 00:00:00 2001 From: Trek H Date: Thu, 13 Jun 2019 23:35:52 +0930 Subject: [PATCH] audio and revid: changes for pr added license to lex.go changed pcm functions to return alsa.Buffers style, syntax and clarification added to audio.go new method of finding buffersize in audio.go uses a new function called nearestPowerOfTwo --- codec/codecutil/lex.go | 24 +++++++ codec/pcm/pcm.go | 71 ++++++++++++++------- codec/pcm/pcm_test.go | 4 +- container/mts/pes/pes.go | 2 +- input/audio/audio.go | 127 ++++++++++++++++++++++++++------------ input/audio/audio_test.go | 10 +++ 6 files changed, 173 insertions(+), 65 deletions(-) diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go index 8e8c36f6..3423e1ea 100644 --- a/codec/codecutil/lex.go +++ b/codec/codecutil/lex.go @@ -1,3 +1,27 @@ +/* +NAME + lex.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + package codecutil import ( diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go index bb200d50..4882ffc3 100644 --- a/codec/pcm/pcm.go +++ b/codec/pcm/pcm.go @@ -35,20 +35,21 @@ import ( "github.com/yobert/alsa" ) -// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original b's data. +// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data. // Notes: // - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. // - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(b alsa.Buffer, rate int) ([]byte, error) { - fromRate := b.Format.Rate - if fromRate == rate { - return b.Data, nil - } else if fromRate < 0 { - return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) - } else if rate < 0 { - return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) +func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { + var newBuf alsa.Buffer + if b.Format.Rate == rate { + return newBuf, nil + } + if b.Format.Rate < 0 { + return newBuf, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate) + } + if rate < 0 { + return newBuf, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. @@ -59,22 +60,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) { case alsa.S16_LE: sampleLen = 2 * b.Format.Channels default: - return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) + return newBuf, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) } inPcmLen := len(b.Data) // Calculate sample rate ratio ratioFrom:ratioTo. - rateGcd := gcd(rate, fromRate) - ratioFrom := fromRate / rateGcd + rateGcd := gcd(rate, b.Format.Rate) + ratioFrom := b.Format.Rate / rateGcd ratioTo := rate / rateGcd // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) + return newBuf, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom - result := make([]byte, 0, newLen) + resampled := make([]byte, 0, newLen) // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them // up and average them. The result is the new sample. @@ -96,19 +97,31 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) { case alsa.S16_LE: binary.LittleEndian.PutUint16(bAvg, uint16(avg)) } - result = append(result, bAvg...) + resampled = append(resampled, bAvg...) } - return result, nil + + // Create new alsa.Buffer with resampled data. + newBuf = alsa.Buffer{ + Format: alsa.BufferFormat{ + Channels: b.Format.Channels, + SampleFormat: b.Format.SampleFormat, + Rate: rate, + }, + Data: resampled, + } + + return newBuf, nil } // StereoToMono returns raw mono audio data generated from only the left channel from // the given stereo recording (ALSA buffer) -// if an error occurs, an error will be returned along with the original stereo data. -func StereoToMono(b alsa.Buffer) ([]byte, error) { +func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { + var newBuf alsa.Buffer if b.Format.Channels == 1 { - return b.Data, nil - } else if b.Format.Channels != 2 { - return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) + return b, nil + } + if b.Format.Channels != 2 { + return newBuf, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) } var stereoSampleBytes int @@ -118,7 +131,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) { case alsa.S16_LE: stereoSampleBytes = 4 default: - return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) + return newBuf, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) } recLength := len(b.Data) @@ -134,7 +147,17 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) { } } - return mono, nil + // Create new alsa.Buffer with resampled data. + newBuf = alsa.Buffer{ + Format: alsa.BufferFormat{ + Channels: 1, + SampleFormat: b.Format.SampleFormat, + Rate: b.Format.Rate, + }, + Data: mono, + } + + return newBuf, nil } // gcd is used for calculating the greatest common divisor of two positive integers, a and b. diff --git a/codec/pcm/pcm_test.go b/codec/pcm/pcm_test.go index 713d01d8..1aa1b9d2 100644 --- a/codec/pcm/pcm_test.go +++ b/codec/pcm/pcm_test.go @@ -71,7 +71,7 @@ func TestResample(t *testing.T) { } // Compare result with expected. - if !bytes.Equal(resampled, exp) { + if !bytes.Equal(resampled.Data, exp) { t.Error("Resampled data does not match expected result.") } } @@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) { } // Compare result with expected. - if !bytes.Equal(mono, exp) { + if !bytes.Equal(mono.Data, exp) { t.Error("Converted data does not match expected result.") } } diff --git a/container/mts/pes/pes.go b/container/mts/pes/pes.go index 16382d84..5b5cb612 100644 --- a/container/mts/pes/pes.go +++ b/container/mts/pes/pes.go @@ -28,7 +28,7 @@ package pes import "github.com/Comcast/gots" -const MaxPesSize = 64 * 1 << 10 // 65536 +const MaxPesSize = 64 * 1 << 10 /* The below data struct encapsulates the fields of an PES packet. Below is diff --git a/input/audio/audio.go b/input/audio/audio.go index 00bc6ef9..25fad00b 100644 --- a/input/audio/audio.go +++ b/input/audio/audio.go @@ -30,7 +30,6 @@ import ( "bytes" "errors" "fmt" - "io" "sync" "time" @@ -57,7 +56,7 @@ const ( stopped ) -// Rates contains the audio sample rates used by audio. +// Rates contains the standard audio sample rates used by package audio. var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} // Device holds everything we need to know about the audio input stream. @@ -98,31 +97,41 @@ type Logger interface { // NewDevice initializes and returns an Device which can be started, read from, and stopped. func NewDevice(cfg *Config, l Logger) (*Device, error) { - d := &Device{} - d.Config = cfg - d.l = l + d := &Device{ + Config: cfg, + l: l, + } // Open the requested audio device. err := d.open() if err != nil { - d.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error()) - return nil, errors.New("failed to open audio device") + d.l.Log(logger.Error, pkg+"failed to open device") + return nil, err } - // Setup ring buffer to capture audio in periods of d.RecPeriod seconds and buffer rbDuration seconds in total. + // Setup the device to record with desired period. d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) - cs := (float64((len(d.ab.Data)/d.dev.BufferFormat().Channels)*d.Channels) / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) - if cs < 1 { - d.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error()) + + // Account for channel conversion. + chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels) + + // Account for resampling. + chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) + if chunkSize < 1 { return nil, errors.New("given Config parameters are too small") } + + // Account for codec conversion. if d.Codec == codecutil.ADPCM { - d.chunkSize = adpcm.EncBytes(int(cs)) + d.chunkSize = adpcm.EncBytes(int(chunkSize)) } else { - d.chunkSize = int(cs) + d.chunkSize = int(chunkSize) } + + // Create ring buffer with appropriate chunk size. d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout) + // Start device in paused mode. d.mode = paused go d.input() @@ -211,10 +220,11 @@ func (d *Device) open() error { // 2 channels is what most devices need to record in. If mono is requested, // the recording will be converted in formatBuffer(). - _, err = d.dev.NegotiateChannels(2) + devChan, err := d.dev.NegotiateChannels(2) if err != nil { return err } + d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", devChan) // Try to negotiate a rate to record in that is divisible by the wanted rate // so that it can be easily downsampled to the wanted rate. @@ -222,15 +232,16 @@ func (d *Device) open() error { // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, // a fix for this is to remove 8000 and 16000 from the Rates slice. foundRate := false + var devRate int for i := 0; i < len(Rates) && !foundRate; i++ { if Rates[i] < d.SampleRate { continue } if Rates[i]%d.SampleRate == 0 { - _, err = d.dev.NegotiateRate(Rates[i]) + devRate, err = d.dev.NegotiateRate(Rates[i]) if err == nil { foundRate = true - d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i]) + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate) } } } @@ -238,11 +249,11 @@ func (d *Device) open() error { // If no easily divisible rate is found, then use the default rate. if !foundRate { d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) - _, err = d.dev.NegotiateRate(defaultSampleRate) + devRate, err = d.dev.NegotiateRate(defaultSampleRate) if err != nil { return err } - d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate) + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate) } var aFmt alsa.FormatType @@ -254,21 +265,46 @@ func (d *Device) open() error { default: return fmt.Errorf("unsupported sample bits %v", d.BitDepth) } - _, err = d.dev.NegotiateFormat(aFmt) + devFmt, err := d.dev.NegotiateFormat(aFmt) if err != nil { return err } + var devBits int + switch devFmt { + case alsa.S16_LE: + devBits = 16 + case alsa.S32_LE: + devBits = 32 + default: + return fmt.Errorf("unsupported sample bits %v", d.BitDepth) + } + d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", devBits) - // Either 8192 or 16384 bytes is a reasonable ALSA buffer size. - _, err = d.dev.NegotiateBufferSize(8192, 16384) + // A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed) + // Some devices only accept even period sizes while others want powers of 2. + // So we will find the closest power of 2 to the desired period size. + const wantPeriod = 0.05 //seconds + secondSize := devRate * devChan * (devBits / 8) + wantPeriodSize := int(float64(secondSize) * wantPeriod) + nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) + + devPeriodSize, err := d.dev.NegotiatePeriodSize(nearWantPeriodSize) if err != nil { return err } + d.l.Log(logger.Debug, pkg+"alsa device period size set", "periodsize", devPeriodSize) + + devBufferSize, err := d.dev.NegotiateBufferSize(devPeriodSize * 2) + if err != nil { + return err + } + d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", devBufferSize) if err = d.dev.Prepare(); err != nil { return err } - d.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params") + + d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params") return nil } @@ -307,7 +343,7 @@ func (d *Device) input() { } // Process audio. - d.l.Log(logger.Debug, "processing audio") + d.l.Log(logger.Debug, pkg+"processing audio") toWrite := d.formatBuffer() // Write audio to ringbuffer. @@ -328,24 +364,15 @@ func (d *Device) input() { func (d *Device) Read(p []byte) (n int, err error) { // Ready ringbuffer for read. _, err = d.rb.Next(rbNextTimeout) - switch err { - case nil: - case ring.ErrTimeout: - return 0, nil - default: + if err != nil { return 0, err } // Read from ring buffer. n, err = d.rb.Read(p) - switch err { - case nil: - case io.EOF: - return 0, nil - default: + if err != nil { return 0, err } - return n, nil } @@ -357,13 +384,12 @@ func (d *Device) formatBuffer() alsa.Buffer { if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate { return d.ab } - - formatted := alsa.Buffer{Format: d.ab.Format, Data: d.ab.Data} + var formatted alsa.Buffer if d.ab.Format.Channels != d.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. if d.ab.Format.Channels == 2 && d.Channels == 1 { - formatted.Data, err = pcm.StereoToMono(d.ab) + formatted, err = pcm.StereoToMono(d.ab) if err != nil { d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) } @@ -372,7 +398,7 @@ func (d *Device) formatBuffer() alsa.Buffer { if d.ab.Format.Rate != d.SampleRate { // Convert rate. - formatted.Data, err = pcm.Resample(formatted, d.SampleRate) + formatted, err = pcm.Resample(formatted, d.SampleRate) if err != nil { d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) } @@ -394,3 +420,28 @@ func (d *Device) formatBuffer() alsa.Buffer { return formatted } + +// nearestPowerOfTwo finds and returns the nearest power of two to the given integer. +// If the lower and higher power of two are the same distance, it returns the higher power. +// For negative values, 1 is returned. +func nearestPowerOfTwo(n int) int { + if n <= 0 { + return 1 + } + if n == 1 { + return 2 + } + v := n + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ // higher power of 2 + x := v >> 1 // lower power of 2 + if (v - n) > (n - x) { + return x + } + return v +} diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go index 02bf7e0a..fff51b07 100644 --- a/input/audio/audio_test.go +++ b/input/audio/audio_test.go @@ -144,3 +144,13 @@ func TestDevice(t *testing.T) { time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n)) ai.Stop() } + +func TestNearestPowerOfTwo(t *testing.T) { + testValues := []int{36, 47, 3, 46, 7, 2, 36, 757, 2464, 18980, 70000, 8192, 2048, 65536, -2048, -127, -1, 0, 1} + testAnswers := []int{32, 32, 4, 32, 8, 2, 32, 512, 2048, 16384, 65536, 8192, 2048, 65536, 1, 1, 1, 1, 2} + for i, v := range testValues { + if r := nearestPowerOfTwo(v); testAnswers[i] != r { + t.Errorf("test %v gave incorrect result: %v, should be %v", i, r, testAnswers[i]) + } + } +}