mirror of https://bitbucket.org/ausocean/av.git
audio and revid: changes for pr
added license to lex.go changed pcm functions to return alsa.Buffers style, syntax and clarification added to audio.go new method of finding buffersize in audio.go uses a new function called nearestPowerOfTwo
This commit is contained in:
parent
d23f40c85d
commit
9fe09255be
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
NAME
|
||||
lex.go
|
||||
|
||||
AUTHOR
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||
|
||||
It is free software: you can redistribute it and/or modify them
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
It is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||
*/
|
||||
|
||||
package codecutil
|
||||
|
||||
import (
|
||||
|
|
|
@ -35,20 +35,21 @@ import (
|
|||
"github.com/yobert/alsa"
|
||||
)
|
||||
|
||||
// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm.
|
||||
// If an error occurs, an error will be returned along with the original b's data.
|
||||
// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data.
|
||||
// Notes:
|
||||
// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur.
|
||||
// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
|
||||
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
|
||||
func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
||||
fromRate := b.Format.Rate
|
||||
if fromRate == rate {
|
||||
return b.Data, nil
|
||||
} else if fromRate < 0 {
|
||||
return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate)
|
||||
} else if rate < 0 {
|
||||
return nil, fmt.Errorf("Unable to convert to: %v Hz", rate)
|
||||
func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) {
|
||||
var newBuf alsa.Buffer
|
||||
if b.Format.Rate == rate {
|
||||
return newBuf, nil
|
||||
}
|
||||
if b.Format.Rate < 0 {
|
||||
return newBuf, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate)
|
||||
}
|
||||
if rate < 0 {
|
||||
return newBuf, fmt.Errorf("Unable to convert to: %v Hz", rate)
|
||||
}
|
||||
|
||||
// The number of bytes in a sample.
|
||||
|
@ -59,22 +60,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
|||
case alsa.S16_LE:
|
||||
sampleLen = 2 * b.Format.Channels
|
||||
default:
|
||||
return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
|
||||
return newBuf, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
|
||||
}
|
||||
inPcmLen := len(b.Data)
|
||||
|
||||
// Calculate sample rate ratio ratioFrom:ratioTo.
|
||||
rateGcd := gcd(rate, fromRate)
|
||||
ratioFrom := fromRate / rateGcd
|
||||
rateGcd := gcd(rate, b.Format.Rate)
|
||||
ratioFrom := b.Format.Rate / rateGcd
|
||||
ratioTo := rate / rateGcd
|
||||
|
||||
// ratioTo = 1 is the only number that will result in an even sampling.
|
||||
if ratioTo != 1 {
|
||||
return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
|
||||
return newBuf, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
|
||||
}
|
||||
|
||||
newLen := inPcmLen / ratioFrom
|
||||
result := make([]byte, 0, newLen)
|
||||
resampled := make([]byte, 0, newLen)
|
||||
|
||||
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them
|
||||
// up and average them. The result is the new sample.
|
||||
|
@ -96,19 +97,31 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
|||
case alsa.S16_LE:
|
||||
binary.LittleEndian.PutUint16(bAvg, uint16(avg))
|
||||
}
|
||||
result = append(result, bAvg...)
|
||||
resampled = append(resampled, bAvg...)
|
||||
}
|
||||
return result, nil
|
||||
|
||||
// Create new alsa.Buffer with resampled data.
|
||||
newBuf = alsa.Buffer{
|
||||
Format: alsa.BufferFormat{
|
||||
Channels: b.Format.Channels,
|
||||
SampleFormat: b.Format.SampleFormat,
|
||||
Rate: rate,
|
||||
},
|
||||
Data: resampled,
|
||||
}
|
||||
|
||||
return newBuf, nil
|
||||
}
|
||||
|
||||
// StereoToMono returns raw mono audio data generated from only the left channel from
|
||||
// the given stereo recording (ALSA buffer)
|
||||
// if an error occurs, an error will be returned along with the original stereo data.
|
||||
func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
||||
func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) {
|
||||
var newBuf alsa.Buffer
|
||||
if b.Format.Channels == 1 {
|
||||
return b.Data, nil
|
||||
} else if b.Format.Channels != 2 {
|
||||
return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
|
||||
return b, nil
|
||||
}
|
||||
if b.Format.Channels != 2 {
|
||||
return newBuf, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
|
||||
}
|
||||
|
||||
var stereoSampleBytes int
|
||||
|
@ -118,7 +131,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
|||
case alsa.S16_LE:
|
||||
stereoSampleBytes = 4
|
||||
default:
|
||||
return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
|
||||
return newBuf, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
|
||||
}
|
||||
|
||||
recLength := len(b.Data)
|
||||
|
@ -134,7 +147,17 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
|||
}
|
||||
}
|
||||
|
||||
return mono, nil
|
||||
// Create new alsa.Buffer with resampled data.
|
||||
newBuf = alsa.Buffer{
|
||||
Format: alsa.BufferFormat{
|
||||
Channels: 1,
|
||||
SampleFormat: b.Format.SampleFormat,
|
||||
Rate: b.Format.Rate,
|
||||
},
|
||||
Data: mono,
|
||||
}
|
||||
|
||||
return newBuf, nil
|
||||
}
|
||||
|
||||
// gcd is used for calculating the greatest common divisor of two positive integers, a and b.
|
||||
|
|
|
@ -71,7 +71,7 @@ func TestResample(t *testing.T) {
|
|||
}
|
||||
|
||||
// Compare result with expected.
|
||||
if !bytes.Equal(resampled, exp) {
|
||||
if !bytes.Equal(resampled.Data, exp) {
|
||||
t.Error("Resampled data does not match expected result.")
|
||||
}
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) {
|
|||
}
|
||||
|
||||
// Compare result with expected.
|
||||
if !bytes.Equal(mono, exp) {
|
||||
if !bytes.Equal(mono.Data, exp) {
|
||||
t.Error("Converted data does not match expected result.")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ package pes
|
|||
|
||||
import "github.com/Comcast/gots"
|
||||
|
||||
const MaxPesSize = 64 * 1 << 10 // 65536
|
||||
const MaxPesSize = 64 * 1 << 10
|
||||
|
||||
/*
|
||||
The below data struct encapsulates the fields of an PES packet. Below is
|
||||
|
|
|
@ -30,7 +30,6 @@ import (
|
|||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
@ -57,7 +56,7 @@ const (
|
|||
stopped
|
||||
)
|
||||
|
||||
// Rates contains the audio sample rates used by audio.
|
||||
// Rates contains the standard audio sample rates used by package audio.
|
||||
var Rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
|
||||
|
||||
// Device holds everything we need to know about the audio input stream.
|
||||
|
@ -98,31 +97,41 @@ type Logger interface {
|
|||
|
||||
// NewDevice initializes and returns an Device which can be started, read from, and stopped.
|
||||
func NewDevice(cfg *Config, l Logger) (*Device, error) {
|
||||
d := &Device{}
|
||||
d.Config = cfg
|
||||
d.l = l
|
||||
d := &Device{
|
||||
Config: cfg,
|
||||
l: l,
|
||||
}
|
||||
|
||||
// Open the requested audio device.
|
||||
err := d.open()
|
||||
if err != nil {
|
||||
d.l.Log(logger.Error, pkg+"failed to open audio device", "error", err.Error())
|
||||
return nil, errors.New("failed to open audio device")
|
||||
d.l.Log(logger.Error, pkg+"failed to open device")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Setup ring buffer to capture audio in periods of d.RecPeriod seconds and buffer rbDuration seconds in total.
|
||||
// Setup the device to record with desired period.
|
||||
d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
|
||||
cs := (float64((len(d.ab.Data)/d.dev.BufferFormat().Channels)*d.Channels) / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
|
||||
if cs < 1 {
|
||||
d.l.Log(logger.Error, pkg+"given Config parameters are too small", "error", err.Error())
|
||||
|
||||
// Account for channel conversion.
|
||||
chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels)
|
||||
|
||||
// Account for resampling.
|
||||
chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
|
||||
if chunkSize < 1 {
|
||||
return nil, errors.New("given Config parameters are too small")
|
||||
}
|
||||
|
||||
// Account for codec conversion.
|
||||
if d.Codec == codecutil.ADPCM {
|
||||
d.chunkSize = adpcm.EncBytes(int(cs))
|
||||
d.chunkSize = adpcm.EncBytes(int(chunkSize))
|
||||
} else {
|
||||
d.chunkSize = int(cs)
|
||||
d.chunkSize = int(chunkSize)
|
||||
}
|
||||
|
||||
// Create ring buffer with appropriate chunk size.
|
||||
d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout)
|
||||
|
||||
// Start device in paused mode.
|
||||
d.mode = paused
|
||||
go d.input()
|
||||
|
||||
|
@ -211,10 +220,11 @@ func (d *Device) open() error {
|
|||
|
||||
// 2 channels is what most devices need to record in. If mono is requested,
|
||||
// the recording will be converted in formatBuffer().
|
||||
_, err = d.dev.NegotiateChannels(2)
|
||||
devChan, err := d.dev.NegotiateChannels(2)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", devChan)
|
||||
|
||||
// Try to negotiate a rate to record in that is divisible by the wanted rate
|
||||
// so that it can be easily downsampled to the wanted rate.
|
||||
|
@ -222,15 +232,16 @@ func (d *Device) open() error {
|
|||
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
|
||||
// a fix for this is to remove 8000 and 16000 from the Rates slice.
|
||||
foundRate := false
|
||||
var devRate int
|
||||
for i := 0; i < len(Rates) && !foundRate; i++ {
|
||||
if Rates[i] < d.SampleRate {
|
||||
continue
|
||||
}
|
||||
if Rates[i]%d.SampleRate == 0 {
|
||||
_, err = d.dev.NegotiateRate(Rates[i])
|
||||
devRate, err = d.dev.NegotiateRate(Rates[i])
|
||||
if err == nil {
|
||||
foundRate = true
|
||||
d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", Rates[i])
|
||||
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -238,11 +249,11 @@ func (d *Device) open() error {
|
|||
// If no easily divisible rate is found, then use the default rate.
|
||||
if !foundRate {
|
||||
d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate)
|
||||
_, err = d.dev.NegotiateRate(defaultSampleRate)
|
||||
devRate, err = d.dev.NegotiateRate(defaultSampleRate)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"Sample rate set", "rate", defaultSampleRate)
|
||||
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", devRate)
|
||||
}
|
||||
|
||||
var aFmt alsa.FormatType
|
||||
|
@ -254,21 +265,46 @@ func (d *Device) open() error {
|
|||
default:
|
||||
return fmt.Errorf("unsupported sample bits %v", d.BitDepth)
|
||||
}
|
||||
_, err = d.dev.NegotiateFormat(aFmt)
|
||||
devFmt, err := d.dev.NegotiateFormat(aFmt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var devBits int
|
||||
switch devFmt {
|
||||
case alsa.S16_LE:
|
||||
devBits = 16
|
||||
case alsa.S32_LE:
|
||||
devBits = 32
|
||||
default:
|
||||
return fmt.Errorf("unsupported sample bits %v", d.BitDepth)
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", devBits)
|
||||
|
||||
// Either 8192 or 16384 bytes is a reasonable ALSA buffer size.
|
||||
_, err = d.dev.NegotiateBufferSize(8192, 16384)
|
||||
// A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed)
|
||||
// Some devices only accept even period sizes while others want powers of 2.
|
||||
// So we will find the closest power of 2 to the desired period size.
|
||||
const wantPeriod = 0.05 //seconds
|
||||
secondSize := devRate * devChan * (devBits / 8)
|
||||
wantPeriodSize := int(float64(secondSize) * wantPeriod)
|
||||
nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize)
|
||||
|
||||
devPeriodSize, err := d.dev.NegotiatePeriodSize(nearWantPeriodSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device period size set", "periodsize", devPeriodSize)
|
||||
|
||||
devBufferSize, err := d.dev.NegotiateBufferSize(devPeriodSize * 2)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", devBufferSize)
|
||||
|
||||
if err = d.dev.Prepare(); err != nil {
|
||||
return err
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"Successfully negotiated ALSA params")
|
||||
|
||||
d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -307,7 +343,7 @@ func (d *Device) input() {
|
|||
}
|
||||
|
||||
// Process audio.
|
||||
d.l.Log(logger.Debug, "processing audio")
|
||||
d.l.Log(logger.Debug, pkg+"processing audio")
|
||||
toWrite := d.formatBuffer()
|
||||
|
||||
// Write audio to ringbuffer.
|
||||
|
@ -328,24 +364,15 @@ func (d *Device) input() {
|
|||
func (d *Device) Read(p []byte) (n int, err error) {
|
||||
// Ready ringbuffer for read.
|
||||
_, err = d.rb.Next(rbNextTimeout)
|
||||
switch err {
|
||||
case nil:
|
||||
case ring.ErrTimeout:
|
||||
return 0, nil
|
||||
default:
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Read from ring buffer.
|
||||
n, err = d.rb.Read(p)
|
||||
switch err {
|
||||
case nil:
|
||||
case io.EOF:
|
||||
return 0, nil
|
||||
default:
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
|
@ -357,13 +384,12 @@ func (d *Device) formatBuffer() alsa.Buffer {
|
|||
if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate {
|
||||
return d.ab
|
||||
}
|
||||
|
||||
formatted := alsa.Buffer{Format: d.ab.Format, Data: d.ab.Data}
|
||||
var formatted alsa.Buffer
|
||||
if d.ab.Format.Channels != d.Channels {
|
||||
// Convert channels.
|
||||
// TODO(Trek): Make this work for conversions other than stereo to mono.
|
||||
if d.ab.Format.Channels == 2 && d.Channels == 1 {
|
||||
formatted.Data, err = pcm.StereoToMono(d.ab)
|
||||
formatted, err = pcm.StereoToMono(d.ab)
|
||||
if err != nil {
|
||||
d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
|
||||
}
|
||||
|
@ -372,7 +398,7 @@ func (d *Device) formatBuffer() alsa.Buffer {
|
|||
|
||||
if d.ab.Format.Rate != d.SampleRate {
|
||||
// Convert rate.
|
||||
formatted.Data, err = pcm.Resample(formatted, d.SampleRate)
|
||||
formatted, err = pcm.Resample(formatted, d.SampleRate)
|
||||
if err != nil {
|
||||
d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
|
||||
}
|
||||
|
@ -394,3 +420,28 @@ func (d *Device) formatBuffer() alsa.Buffer {
|
|||
|
||||
return formatted
|
||||
}
|
||||
|
||||
// nearestPowerOfTwo finds and returns the nearest power of two to the given integer.
|
||||
// If the lower and higher power of two are the same distance, it returns the higher power.
|
||||
// For negative values, 1 is returned.
|
||||
func nearestPowerOfTwo(n int) int {
|
||||
if n <= 0 {
|
||||
return 1
|
||||
}
|
||||
if n == 1 {
|
||||
return 2
|
||||
}
|
||||
v := n
|
||||
v--
|
||||
v |= v >> 1
|
||||
v |= v >> 2
|
||||
v |= v >> 4
|
||||
v |= v >> 8
|
||||
v |= v >> 16
|
||||
v++ // higher power of 2
|
||||
x := v >> 1 // lower power of 2
|
||||
if (v - n) > (n - x) {
|
||||
return x
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
|
|
@ -144,3 +144,13 @@ func TestDevice(t *testing.T) {
|
|||
time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n))
|
||||
ai.Stop()
|
||||
}
|
||||
|
||||
func TestNearestPowerOfTwo(t *testing.T) {
|
||||
testValues := []int{36, 47, 3, 46, 7, 2, 36, 757, 2464, 18980, 70000, 8192, 2048, 65536, -2048, -127, -1, 0, 1}
|
||||
testAnswers := []int{32, 32, 4, 32, 8, 2, 32, 512, 2048, 16384, 65536, 8192, 2048, 65536, 1, 1, 1, 1, 2}
|
||||
for i, v := range testValues {
|
||||
if r := nearestPowerOfTwo(v); testAnswers[i] != r {
|
||||
t.Errorf("test %v gave incorrect result: %v, should be %v", i, r, testAnswers[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue