From 9fe09255bef40873d1769cef646099c4128c25a8 Mon Sep 17 00:00:00 2001
From: Trek H <trek.hopton@gmail.com>
Date: Thu, 13 Jun 2019 23:35:52 +0930
Subject: [PATCH] audio and revid: changes for pr

added license to lex.go
changed pcm functions to return alsa.Buffers
style, syntax and clarification added to audio.go
new method of finding buffersize in audio.go uses a new function called nearestPowerOfTwo
---
 codec/codecutil/lex.go    |  24 +++++++
 codec/pcm/pcm.go          |  71 ++++++++++++++-------
 codec/pcm/pcm_test.go     |   4 +-
 container/mts/pes/pes.go  |   2 +-
 input/audio/audio.go      | 127 ++++++++++++++++++++++++++------------
 input/audio/audio_test.go |  10 +++
 6 files changed, 173 insertions(+), 65 deletions(-)

diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go
index 8e8c36f6..3423e1ea 100644
--- a/codec/codecutil/lex.go
+++ b/codec/codecutil/lex.go
@@ -1,3 +1,27 @@
+/*
+NAME
+  lex.go
+
+AUTHOR
+  Trek Hopton <trek@ausocean.org>
+
+LICENSE
+  This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
+
+  It is free software: you can redistribute it and/or modify them
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation, either version 3 of the License, or (at your
+  option) any later version.
+
+  It is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+  for more details.
+
+  You should have received a copy of the GNU General Public License in gpl.txt.
+  If not, see [GNU licenses](http://www.gnu.org/licenses).
+*/
+
 package codecutil
 
 import (
diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go
index bb200d50..4882ffc3 100644
--- a/codec/pcm/pcm.go
+++ b/codec/pcm/pcm.go
@@ -35,20 +35,21 @@ import (
 	"github.com/yobert/alsa"
 )
 
-// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm.
-// If an error occurs, an error will be returned along with the original b's data.
+// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data.
 // Notes:
 // 	- Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur.
 // 	- If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
 // 	  not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
-func Resample(b alsa.Buffer, rate int) ([]byte, error) {
-	fromRate := b.Format.Rate
-	if fromRate == rate {
-		return b.Data, nil
-	} else if fromRate < 0 {
-		return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate)
-	} else if rate < 0 {
-		return nil, fmt.Errorf("Unable to convert to: %v Hz", rate)
+func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) {
+	var newBuf alsa.Buffer
+	if b.Format.Rate == rate {
+		return newBuf, nil
+	}
+	if b.Format.Rate < 0 {
+		return newBuf, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate)
+	}
+	if rate < 0 {
+		return newBuf, fmt.Errorf("Unable to convert to: %v Hz", rate)
 	}
 
 	// The number of bytes in a sample.
@@ -59,22 +60,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
 	case alsa.S16_LE:
 		sampleLen = 2 * b.Format.Channels
 	default:
-		return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
+		return newBuf, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
 	}
 	inPcmLen := len(b.Data)
 
 	// Calculate sample rate ratio ratioFrom:ratioTo.
-	rateGcd := gcd(rate, fromRate)
-	ratioFrom := fromRate / rateGcd
+	rateGcd := gcd(rate, b.Format.Rate)
+	ratioFrom := b.Format.Rate / rateGcd
 	ratioTo := rate / rateGcd
 
 	// ratioTo = 1 is the only number that will result in an even sampling.
 	if ratioTo != 1 {
-		return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
+		return newBuf, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
 	}
 
 	newLen := inPcmLen / ratioFrom
-	result := make([]byte, 0, newLen)
+	resampled := make([]byte, 0, newLen)
 
 	// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them
 	// up and average them. The result is the new sample.
@@ -96,19 +97,31 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
 		case alsa.S16_LE:
 			binary.LittleEndian.PutUint16(bAvg, uint16(avg))
 		}
-		result = append(result, bAvg...)
+		resampled = append(resampled, bAvg...)
 	}
-	return result, nil
+
+	// Create new alsa.Buffer with resampled data.
+	newBuf = alsa.Buffer{
+		Format: alsa.BufferFormat{
+			Channels:     b.Format.Channels,
+			SampleFormat: b.Format.SampleFormat,
+			Rate:         rate,
+		},
+		Data: resampled,
+	}
+
+	return newBuf, nil
 }
 
 // StereoToMono returns raw mono audio data generated from only the left channel from
 // the given stereo recording (ALSA buffer)
-// if an error occurs, an error will be returned along with the original stereo data.
-func StereoToMono(b alsa.Buffer) ([]byte, error) {
+func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) {
+	var newBuf alsa.Buffer
 	if b.Format.Channels == 1 {
-		return b.Data, nil
-	} else if b.Format.Channels != 2 {
-		return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
+		return b, nil
+	}
+	if b.Format.Channels != 2 {
+		return newBuf, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
 	}
 
 	var stereoSampleBytes int
@@ -118,7 +131,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
 	case alsa.S16_LE:
 		stereoSampleBytes = 4
 	default:
-		return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
+		return newBuf, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
 	}
 
 	recLength := len(b.Data)
@@ -134,7 +147,17 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
 		}
 	}
 
-	return mono, nil
+	// Create new alsa.Buffer with resampled data.
+	newBuf = alsa.Buffer{
+		Format: alsa.BufferFormat{
+			Channels:     1,
+			SampleFormat: b.Format.SampleFormat,
+			Rate:         b.Format.Rate,
+		},
+		Data: mono,
+	}
+
+	return newBuf, nil
 }
 
 // gcd is used for calculating the greatest common divisor of two positive integers, a and b.
diff --git a/codec/pcm/pcm_test.go b/codec/pcm/pcm_test.go
index 713d01d8..1aa1b9d2 100644
--- a/codec/pcm/pcm_test.go
+++ b/codec/pcm/pcm_test.go
@@ -71,7 +71,7 @@ func TestResample(t *testing.T) {
 	}
 
 	// Compare result with expected.
-	if !bytes.Equal(resampled, exp) {
+	if !bytes.Equal(resampled.Data, exp) {
 		t.Error("Resampled data does not match expected result.")
 	}
 }
@@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) {
 	}
 
 	// Compare result with expected.
-	if !bytes.Equal(mono, exp) {
+	if !bytes.Equal(mono.Data, exp) {
 		t.Error("Converted data does not match expected result.")
 	}
 }
diff --git a/container/mts/pes/pes.go b/container/mts/pes/pes.go
index 16382d84..5b5cb612 100644
--- a/container/mts/pes/pes.go
+++ b/container/mts/pes/pes.go
@@ -28,7 +28,7 @@ package pes
 
 import "github.com/Comcast/gots"
 
-const MaxPesSize = 64 * 1 << 10 // 65536
+const MaxPesSize = 64 * 1 << 10
 
 /*
 The below data struct encapsulates the fields of an PES packet. Below is
diff --git a/input/audio/audio.go b/input/audio/audio.go
index 00bc6ef9..25fad00b 100644
--- a/input/audio/audio.go
+++ b/input/audio/audio.go
@@ -30,7 +30,6 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
-	"io"
 	"sync"
 	"time"
 
@@ -57,7 +56,7 @@ const (
 	stopped
 )
 
-// Rates contains the audio sample rates used by audio.
+// Rates contains the standard audio sample rates used by package audio.
 var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
 
 // Device holds everything we need to know about the audio input stream.
@@ -98,31 +97,41 @@ type Logger interface {
 
 // NewDevice initializes and returns an Device which can be started, read from, and stopped.
 func NewDevice(cfg *Config, l Logger) (*Device, error) {
-	d := &Device{}
-	d.Config = cfg
-	d.l = l
+	d := &Device{
+		Config: cfg,
+		l:      l,
+	}
 
 	// Open the requested audio device.
 	err := d.open()
 	if err != nil {
-		d.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error())
-		return nil, errors.New("failed to open audio device")
+		d.l.Log(logger.Error, pkg+"failed to open device")
+		return nil, err
 	}
 
-	// Setup ring buffer to capture audio in periods of d.RecPeriod seconds and buffer rbDuration seconds in total.
+	// Setup the device to record with desired period.
 	d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
-	cs := (float64((len(d.ab.Data)/d.dev.BufferFormat().Channels)*d.Channels) / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
-	if cs < 1 {
-		d.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error())
+
+	// Account for channel conversion.
+	chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels)
+
+	// Account for resampling.
+	chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
+	if chunkSize < 1 {
 		return nil, errors.New("given Config parameters are too small")
 	}
+
+	// Account for codec conversion.
 	if d.Codec == codecutil.ADPCM {
-		d.chunkSize = adpcm.EncBytes(int(cs))
+		d.chunkSize = adpcm.EncBytes(int(chunkSize))
 	} else {
-		d.chunkSize = int(cs)
+		d.chunkSize = int(chunkSize)
 	}
+
+	// Create ring buffer with appropriate chunk size.
 	d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout)
 
+	// Start device in paused mode.
 	d.mode = paused
 	go d.input()
 
@@ -211,10 +220,11 @@ func (d *Device) open() error {
 
 	// 2 channels is what most devices need to record in. If mono is requested,
 	// the recording will be converted in formatBuffer().
-	_, err = d.dev.NegotiateChannels(2)
+	devChan, err := d.dev.NegotiateChannels(2)
 	if err != nil {
 		return err
 	}
+	d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", devChan)
 
 	// Try to negotiate a rate to record in that is divisible by the wanted rate
 	// so that it can be easily downsampled to the wanted rate.
@@ -222,15 +232,16 @@ func (d *Device) open() error {
 	// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
 	// a fix for this is to remove 8000 and 16000 from the Rates slice.
 	foundRate := false
+	var devRate int
 	for i := 0; i < len(Rates) && !foundRate; i++ {
 		if Rates[i] < d.SampleRate {
 			continue
 		}
 		if Rates[i]%d.SampleRate == 0 {
-			_, err = d.dev.NegotiateRate(Rates[i])
+			devRate, err = d.dev.NegotiateRate(Rates[i])
 			if err == nil {
 				foundRate = true
-				d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i])
+				d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate)
 			}
 		}
 	}
@@ -238,11 +249,11 @@ func (d *Device) open() error {
 	// If no easily divisible rate is found, then use the default rate.
 	if !foundRate {
 		d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate)
-		_, err = d.dev.NegotiateRate(defaultSampleRate)
+		devRate, err = d.dev.NegotiateRate(defaultSampleRate)
 		if err != nil {
 			return err
 		}
-		d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate)
+		d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate)
 	}
 
 	var aFmt alsa.FormatType
@@ -254,21 +265,46 @@ func (d *Device) open() error {
 	default:
 		return fmt.Errorf("unsupported sample bits %v", d.BitDepth)
 	}
-	_, err = d.dev.NegotiateFormat(aFmt)
+	devFmt, err := d.dev.NegotiateFormat(aFmt)
 	if err != nil {
 		return err
 	}
+	var devBits int
+	switch devFmt {
+	case alsa.S16_LE:
+		devBits = 16
+	case alsa.S32_LE:
+		devBits = 32
+	default:
+		return fmt.Errorf("unsupported sample bits %v", d.BitDepth)
+	}
+	d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", devBits)
 
-	// Either 8192 or 16384 bytes is a reasonable ALSA buffer size.
-	_, err = d.dev.NegotiateBufferSize(8192, 16384)
+	// A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed)
+	// Some devices only accept even period sizes while others want powers of 2.
+	// So we will find the closest power of 2 to the desired period size.
+	const wantPeriod = 0.05 //seconds
+	secondSize := devRate * devChan * (devBits / 8)
+	wantPeriodSize := int(float64(secondSize) * wantPeriod)
+	nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize)
+
+	devPeriodSize, err := d.dev.NegotiatePeriodSize(nearWantPeriodSize)
 	if err != nil {
 		return err
 	}
+	d.l.Log(logger.Debug, pkg+"alsa device period size set", "periodsize", devPeriodSize)
+
+	devBufferSize, err := d.dev.NegotiateBufferSize(devPeriodSize * 2)
+	if err != nil {
+		return err
+	}
+	d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", devBufferSize)
 
 	if err = d.dev.Prepare(); err != nil {
 		return err
 	}
-	d.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params")
+
+	d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params")
 	return nil
 }
 
@@ -307,7 +343,7 @@ func (d *Device) input() {
 		}
 
 		// Process audio.
-		d.l.Log(logger.Debug, "processing audio")
+		d.l.Log(logger.Debug, pkg+"processing audio")
 		toWrite := d.formatBuffer()
 
 		// Write audio to ringbuffer.
@@ -328,24 +364,15 @@ func (d *Device) input() {
 func (d *Device) Read(p []byte) (n int, err error) {
 	// Ready ringbuffer for read.
 	_, err = d.rb.Next(rbNextTimeout)
-	switch err {
-	case nil:
-	case ring.ErrTimeout:
-		return 0, nil
-	default:
+	if err != nil {
 		return 0, err
 	}
 
 	// Read from ring buffer.
 	n, err = d.rb.Read(p)
-	switch err {
-	case nil:
-	case io.EOF:
-		return 0, nil
-	default:
+	if err != nil {
 		return 0, err
 	}
-
 	return n, nil
 }
 
@@ -357,13 +384,12 @@ func (d *Device) formatBuffer() alsa.Buffer {
 	if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate {
 		return d.ab
 	}
-
-	formatted := alsa.Buffer{Format: d.ab.Format, Data: d.ab.Data}
+	var formatted alsa.Buffer
 	if d.ab.Format.Channels != d.Channels {
 		// Convert channels.
 		// TODO(Trek): Make this work for conversions other than stereo to mono.
 		if d.ab.Format.Channels == 2 && d.Channels == 1 {
-			formatted.Data, err = pcm.StereoToMono(d.ab)
+			formatted, err = pcm.StereoToMono(d.ab)
 			if err != nil {
 				d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
 			}
@@ -372,7 +398,7 @@ func (d *Device) formatBuffer() alsa.Buffer {
 
 	if d.ab.Format.Rate != d.SampleRate {
 		// Convert rate.
-		formatted.Data, err = pcm.Resample(formatted, d.SampleRate)
+		formatted, err = pcm.Resample(formatted, d.SampleRate)
 		if err != nil {
 			d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
 		}
@@ -394,3 +420,28 @@ func (d *Device) formatBuffer() alsa.Buffer {
 
 	return formatted
 }
+
+// nearestPowerOfTwo finds and returns the nearest power of two to the given integer.
+// If the lower and higher power of two are the same distance, it returns the higher power.
+// For negative values, 1 is returned.
+func nearestPowerOfTwo(n int) int {
+	if n <= 0 {
+		return 1
+	}
+	if n == 1 {
+		return 2
+	}
+	v := n
+	v--
+	v |= v >> 1
+	v |= v >> 2
+	v |= v >> 4
+	v |= v >> 8
+	v |= v >> 16
+	v++         // higher power of 2
+	x := v >> 1 // lower power of 2
+	if (v - n) > (n - x) {
+		return x
+	}
+	return v
+}
diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go
index 02bf7e0a..fff51b07 100644
--- a/input/audio/audio_test.go
+++ b/input/audio/audio_test.go
@@ -144,3 +144,13 @@ func TestDevice(t *testing.T) {
 	time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n))
 	ai.Stop()
 }
+
+func TestNearestPowerOfTwo(t *testing.T) {
+	testValues := []int{36, 47, 3, 46, 7, 2, 36, 757, 2464, 18980, 70000, 8192, 2048, 65536, -2048, -127, -1, 0, 1}
+	testAnswers := []int{32, 32, 4, 32, 8, 2, 32, 512, 2048, 16384, 65536, 8192, 2048, 65536, 1, 1, 1, 1, 2}
+	for i, v := range testValues {
+		if r := nearestPowerOfTwo(v); testAnswers[i] != r {
+			t.Errorf("test %v gave incorrect result: %v, should be %v", i, r, testAnswers[i])
+		}
+	}
+}