Merged in general-pcm (pull request #279)

General pcm

Approved-by: Alan Noble <anoble@gmail.com>
This commit is contained in:
Trek Hopton 2019-11-16 13:32:39 +00:00
commit 6e3f0f2a61
6 changed files with 206 additions and 128 deletions

View File

@ -46,7 +46,7 @@ import (
"sync"
"time"
"github.com/yobert/alsa"
yalsa "github.com/yobert/alsa"
"bitbucket.org/ausocean/av/codec/pcm"
"bitbucket.org/ausocean/iot/pi/netsender"
@ -78,9 +78,9 @@ type audioClient struct {
parameters
// internals
dev *alsa.Device // audio input device
ab alsa.Buffer // ALSA's buffer
rb *ring.Buffer // our buffer
dev *yalsa.Device // audio input device
pb pcm.Buffer // Buffer to contain the direct audio from ALSA.
rb *ring.Buffer // Ring buffer to contain processed audio ready to be read.
ns *netsender.Sender // our NetSender
vs int // our "var sum" to track var changes
}
@ -132,12 +132,26 @@ func main() {
// Open the requested audio device.
err = ac.open()
if err != nil {
log.Log(logger.Fatal, "alsa.open failed", "error", err.Error())
log.Log(logger.Fatal, "yalsa.open failed", "error", err.Error())
}
// Capture audio in periods of ac.period seconds, and buffer rbDuration seconds in total.
ac.ab = ac.dev.NewBufferDuration(time.Second * time.Duration(ac.period))
recSize := (((len(ac.ab.Data) / ac.dev.BufferFormat().Channels) * ac.channels) / ac.dev.BufferFormat().Rate) * ac.rate
ab := ac.dev.NewBufferDuration(time.Second * time.Duration(ac.period))
sf, err := pcm.SFFromString(ab.Format.SampleFormat.String())
if err != nil {
log.Log(logger.Error, err.Error())
}
cf := pcm.BufferFormat{
SFormat: sf,
Channels: ab.Format.Channels,
Rate: ab.Format.Rate,
}
ac.pb = pcm.Buffer{
Format: cf,
Data: ab.Data,
}
recSize := (((len(ac.pb.Data) / ac.dev.BufferFormat().Channels) * ac.channels) / ac.dev.BufferFormat().Rate) * ac.rate
rbLen := rbDuration / ac.period
ac.rb = ring.NewBuffer(rbLen, recSize, rbTimeout)
@ -217,11 +231,11 @@ func (ac *audioClient) open() error {
}
log.Log(logger.Debug, "opening", "source", ac.source)
cards, err := alsa.OpenCards()
cards, err := yalsa.OpenCards()
if err != nil {
return err
}
defer alsa.CloseCards(cards)
defer yalsa.CloseCards(cards)
for _, card := range cards {
devices, err := card.Devices()
@ -229,7 +243,7 @@ func (ac *audioClient) open() error {
return err
}
for _, dev := range devices {
if dev.Type != alsa.PCM || !dev.Record {
if dev.Type != yalsa.PCM || !dev.Record {
continue
}
if dev.Title == ac.source || ac.source == "" {
@ -287,12 +301,12 @@ func (ac *audioClient) open() error {
log.Log(logger.Debug, "sample rate set", "rate", defaultFrameRate)
}
var fmt alsa.FormatType
var fmt yalsa.FormatType
switch ac.bits {
case 16:
fmt = alsa.S16_LE
fmt = yalsa.S16_LE
case 32:
fmt = alsa.S32_LE
fmt = yalsa.S32_LE
default:
return errors.New("unsupported sample bits")
}
@ -318,7 +332,7 @@ func (ac *audioClient) open() error {
// Re-opens the device and tries again if ASLA returns an error.
// Spends a lot of time sleeping in Paused mode.
// ToDo: Currently, reading audio and writing to the ringbuffer are synchronous.
// Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps.
// Need a way to asynchronously read from the buf, i.e., _while_ it is recording to avoid any gaps.
func (ac *audioClient) input() {
for {
ac.mu.Lock()
@ -330,14 +344,14 @@ func (ac *audioClient) input() {
}
log.Log(logger.Debug, "recording audio for period", "seconds", ac.period)
ac.mu.Lock()
err := ac.dev.Read(ac.ab.Data)
err := ac.dev.Read(ac.pb.Data)
ac.mu.Unlock()
if err != nil {
log.Log(logger.Debug, "device.Read failed", "error", err.Error())
ac.mu.Lock()
err = ac.open() // re-open
if err != nil {
log.Log(logger.Fatal, "alsa.open failed", "error", err.Error())
log.Log(logger.Fatal, "yalsa.open failed", "error", err.Error())
}
ac.mu.Unlock()
continue
@ -372,7 +386,7 @@ func (ac *audioClient) input() {
// This function also handles NetReceiver configuration requests and updating of NetReceiver vars.
func (ac *audioClient) output() {
// Calculate the size of the output data based on wanted channels and rate.
outLen := (((len(ac.ab.Data) / ac.ab.Format.Channels) * ac.channels) / ac.ab.Format.Rate) * ac.rate
outLen := (((len(ac.pb.Data) / ac.pb.Format.Channels) * ac.channels) / ac.pb.Format.Rate) * ac.rate
buf := make([]byte, outLen)
mime := "audio/x-wav;codec=pcm;rate=" + strconv.Itoa(ac.rate) + ";channels=" + strconv.Itoa(ac.channels) + ";bits=" + strconv.Itoa(ac.bits)
@ -509,9 +523,9 @@ func read(rb *ring.Buffer, buf []byte) (int, error) {
return n, nil
}
// formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored
// formatBuffer returns a Buffer that has the recording data from the ac's original Buffer but stored
// in the desired format specified by the ac's parameters.
func (ac *audioClient) formatBuffer() alsa.Buffer {
func (ac *audioClient) formatBuffer() pcm.Buffer {
var err error
ac.mu.Lock()
wantChannels := ac.channels
@ -519,17 +533,17 @@ func (ac *audioClient) formatBuffer() alsa.Buffer {
ac.mu.Unlock()
// If nothing needs to be changed, return the original.
if ac.ab.Format.Channels == wantChannels && ac.ab.Format.Rate == wantRate {
return ac.ab
if ac.pb.Format.Channels == wantChannels && ac.pb.Format.Rate == wantRate {
return ac.pb
}
formatted := alsa.Buffer{Format: ac.ab.Format}
formatted := pcm.Buffer{Format: ac.pb.Format}
bufCopied := false
if ac.ab.Format.Channels != wantChannels {
if ac.pb.Format.Channels != wantChannels {
// Convert channels.
if ac.ab.Format.Channels == 2 && wantChannels == 1 {
if formatted, err = pcm.StereoToMono(ac.ab); err != nil {
if ac.pb.Format.Channels == 2 && wantChannels == 1 {
if formatted, err = pcm.StereoToMono(ac.pb); err != nil {
log.Log(logger.Warning, "channel conversion failed, audio has remained stereo", "error", err.Error())
} else {
formatted.Format.Channels = 1
@ -538,13 +552,13 @@ func (ac *audioClient) formatBuffer() alsa.Buffer {
}
}
if ac.ab.Format.Rate != wantRate {
if ac.pb.Format.Rate != wantRate {
// Convert rate.
if bufCopied {
formatted, err = pcm.Resample(formatted, wantRate)
} else {
formatted, err = pcm.Resample(ac.ab, wantRate)
formatted, err = pcm.Resample(ac.pb, wantRate)
}
if err != nil {
log.Log(logger.Warning, "rate conversion failed, audio has remained original rate", "error", err.Error())

View File

@ -32,105 +32,135 @@ import (
"encoding/binary"
"fmt"
"github.com/yobert/alsa"
"github.com/pkg/errors"
)
// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data.
// SampleFormat is the format that a PCM Buffer's samples can be in.
type SampleFormat int
// Used to represent an unknown format.
const (
Unknown SampleFormat = -1
)
// Sample formats that we use.
const (
S16_LE SampleFormat = iota
S32_LE
// There are many more:
// https://linux.die.net/man/1/arecord
// https://trac.ffmpeg.org/wiki/audio%20types
)
// BufferFormat contains the format for a PCM Buffer.
type BufferFormat struct {
SFormat SampleFormat
Rate int
Channels int
}
// Buffer contains a buffer of PCM data and the format that it is in.
type Buffer struct {
Format BufferFormat
Data []byte
}
// Resample takes Buffer c and resamples the pcm audio data to 'rate' Hz and returns a Buffer with the resampled data.
// Notes:
// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur.
// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
// - Currently only downsampling is implemented and c's rate must be divisible by 'rate' or an error will occur.
// - If the number of bytes in c.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) {
if b.Format.Rate == rate {
return b, nil
func Resample(c Buffer, rate int) (Buffer, error) {
if c.Format.Rate == rate {
return c, nil
}
if b.Format.Rate < 0 {
return alsa.Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate)
if c.Format.Rate < 0 {
return Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", c.Format.Rate)
}
if rate < 0 {
return alsa.Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate)
return Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate)
}
// The number of bytes in a sample.
var sampleLen int
switch b.Format.SampleFormat {
case alsa.S32_LE:
sampleLen = 4 * b.Format.Channels
case alsa.S16_LE:
sampleLen = 2 * b.Format.Channels
switch c.Format.SFormat {
case S32_LE:
sampleLen = 4 * c.Format.Channels
case S16_LE:
sampleLen = 2 * c.Format.Channels
default:
return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
return Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", c.Format.SFormat)
}
inPcmLen := len(b.Data)
inPcmLen := len(c.Data)
// Calculate sample rate ratio ratioFrom:ratioTo.
rateGcd := gcd(rate, b.Format.Rate)
ratioFrom := b.Format.Rate / rateGcd
rateGcd := gcd(rate, c.Format.Rate)
ratioFrom := c.Format.Rate / rateGcd
ratioTo := rate / rateGcd
// ratioTo = 1 is the only number that will result in an even sampling.
if ratioTo != 1 {
return alsa.Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
return Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
}
newLen := inPcmLen / ratioFrom
resampled := make([]byte, 0, newLen)
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'c.Data' to add them
// up and average them. The result is the new sample.
bAvg := make([]byte, sampleLen)
for i := 0; i < newLen/sampleLen; i++ {
var sum int
for j := 0; j < ratioFrom; j++ {
switch b.Format.SampleFormat {
case alsa.S32_LE:
sum += int(int32(binary.LittleEndian.Uint32(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
case alsa.S16_LE:
sum += int(int16(binary.LittleEndian.Uint16(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
switch c.Format.SFormat {
case S32_LE:
sum += int(int32(binary.LittleEndian.Uint32(c.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
case S16_LE:
sum += int(int16(binary.LittleEndian.Uint16(c.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
}
}
avg := sum / ratioFrom
switch b.Format.SampleFormat {
case alsa.S32_LE:
switch c.Format.SFormat {
case S32_LE:
binary.LittleEndian.PutUint32(bAvg, uint32(avg))
case alsa.S16_LE:
case S16_LE:
binary.LittleEndian.PutUint16(bAvg, uint16(avg))
}
resampled = append(resampled, bAvg...)
}
// Return a new alsa.Buffer with resampled data.
return alsa.Buffer{
Format: alsa.BufferFormat{
Channels: b.Format.Channels,
SampleFormat: b.Format.SampleFormat,
Rate: rate,
// Return a new Buffer with resampled data.
return Buffer{
Format: BufferFormat{
Channels: c.Format.Channels,
SFormat: c.Format.SFormat,
Rate: rate,
},
Data: resampled,
}, nil
}
// StereoToMono returns raw mono audio data generated from only the left channel from
// the given stereo recording (ALSA buffer)
func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) {
if b.Format.Channels == 1 {
return b, nil
// the given stereo Buffer
func StereoToMono(c Buffer) (Buffer, error) {
if c.Format.Channels == 1 {
return c, nil
}
if b.Format.Channels != 2 {
return alsa.Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
if c.Format.Channels != 2 {
return Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", c.Format.Channels)
}
var stereoSampleBytes int
switch b.Format.SampleFormat {
case alsa.S32_LE:
switch c.Format.SFormat {
case S32_LE:
stereoSampleBytes = 8
case alsa.S16_LE:
case S16_LE:
stereoSampleBytes = 4
default:
return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
return Buffer{}, fmt.Errorf("Unhandled sample format %v", c.Format.SFormat)
}
recLength := len(b.Data)
recLength := len(c.Data)
mono := make([]byte, recLength/2)
// Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample
@ -138,17 +168,17 @@ func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) {
var inc int
for i := 0; i < recLength; i++ {
if i%stereoSampleBytes < stereoSampleBytes/2 {
mono[inc] = b.Data[i]
mono[inc] = c.Data[i]
inc++
}
}
// Return a new alsa.Buffer with resampled data.
return alsa.Buffer{
Format: alsa.BufferFormat{
Channels: 1,
SampleFormat: b.Format.SampleFormat,
Rate: b.Format.Rate,
// Return a new Buffer with resampled data.
return Buffer{
Format: BufferFormat{
Channels: 1,
SFormat: c.Format.SFormat,
Rate: c.Format.Rate,
},
Data: mono,
}, nil
@ -162,3 +192,27 @@ func gcd(a, b int) int {
}
return a
}
// String returns the string representation of a SampleFormat.
func (f SampleFormat) String() string {
switch f {
case S16_LE:
return "S16_LE"
case S32_LE:
return "S32_LE"
default:
return "Unknown"
}
}
// SFFromString takes a string representing a sample format and returns the corresponding SampleFormat.
func SFFromString(s string) (SampleFormat, error) {
switch s {
case "S16_LE":
return S16_LE, nil
case "S32_LE":
return S32_LE, nil
default:
return Unknown, errors.Errorf("unknown sample format (%s)", s)
}
}

View File

@ -31,8 +31,6 @@ import (
"io/ioutil"
"log"
"testing"
"github.com/yobert/alsa"
)
// TestResample tests the Resample function using a pcm file that contains audio of a freq. sweep.
@ -47,13 +45,13 @@ func TestResample(t *testing.T) {
log.Fatal(err)
}
format := alsa.BufferFormat{
Channels: 1,
Rate: 48000,
SampleFormat: alsa.S16_LE,
format := BufferFormat{
Channels: 1,
Rate: 48000,
SFormat: S16_LE,
}
buf := alsa.Buffer{
buf := Buffer{
Format: format,
Data: inPcm,
}
@ -88,13 +86,13 @@ func TestStereoToMono(t *testing.T) {
log.Fatal(err)
}
format := alsa.BufferFormat{
Channels: 2,
Rate: 44100,
SampleFormat: alsa.S16_LE,
format := BufferFormat{
Channels: 2,
Rate: 44100,
SFormat: S16_LE,
}
buf := alsa.Buffer{
buf := Buffer{
Format: format,
Data: inPcm,
}

View File

@ -68,8 +68,8 @@ type ALSA struct {
mu sync.Mutex // Provides synchronisation when changing modes concurrently.
title string // Name of audio title, or empty for the default title.
dev *yalsa.Device // ALSA device's Audio input device.
ab yalsa.Buffer // ALSA device's buffer.
rb *ring.Buffer // Our buffer.
pb pcm.Buffer // Buffer to contain the direct audio from ALSA.
rb *ring.Buffer // Ring buffer to contain processed audio ready to be read.
chunkSize int // This is the number of bytes that will be stored in rb at a time.
Config // Configuration parameters for this device.
}
@ -133,10 +133,24 @@ func (d *ALSA) Set(c config.Config) error {
}
// Setup the device to record with desired period.
d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
ab := d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
sf, err := pcm.SFFromString(ab.Format.SampleFormat.String())
if err != nil {
d.l.Log(logger.Error, pkg+err.Error())
return err
}
cf := pcm.BufferFormat{
SFormat: sf,
Channels: ab.Format.Channels,
Rate: ab.Format.Rate,
}
d.pb = pcm.Buffer{
Format: cf,
Data: ab.Data,
}
// Account for channel conversion.
chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels)
chunkSize := float64(len(d.pb.Data) / d.dev.BufferFormat().Channels * d.Channels)
// Account for resampling.
chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
@ -373,7 +387,7 @@ func (d *ALSA) input() {
// Read from audio device.
d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod)
err := d.dev.Read(d.ab.Data)
err := d.dev.Read(d.pb.Data)
if err != nil {
d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error())
err = d.open() // re-open
@ -415,26 +429,26 @@ func (d *ALSA) Read(p []byte) (int, error) {
}
// formatBuffer returns audio that has been converted to the desired format.
func (d *ALSA) formatBuffer() yalsa.Buffer {
func (d *ALSA) formatBuffer() pcm.Buffer {
var err error
// If nothing needs to be changed, return the original.
if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate {
return d.ab
if d.pb.Format.Channels == d.Channels && d.pb.Format.Rate == d.SampleRate {
return d.pb
}
var formatted yalsa.Buffer
if d.ab.Format.Channels != d.Channels {
var formatted pcm.Buffer
if d.pb.Format.Channels != d.Channels {
// Convert channels.
// TODO(Trek): Make this work for conversions other than stereo to mono.
if d.ab.Format.Channels == 2 && d.Channels == 1 {
formatted, err = pcm.StereoToMono(d.ab)
if d.pb.Format.Channels == 2 && d.Channels == 1 {
formatted, err = pcm.StereoToMono(d.pb)
if err != nil {
d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
}
}
}
if d.ab.Format.Rate != d.SampleRate {
if d.pb.Format.Rate != d.SampleRate {
// Convert rate.
formatted, err = pcm.Resample(formatted, d.SampleRate)
if err != nil {

View File

@ -32,7 +32,6 @@ import (
"log"
"bitbucket.org/ausocean/av/codec/pcm"
"github.com/yobert/alsa"
)
// This program accepts an input pcm file and outputs a resampled pcm file.
@ -43,7 +42,7 @@ func main() {
var from = *flag.Int("from", 48000, "sample rate of input file")
var to = *flag.Int("to", 8000, "sample rate of output file")
var channels = *flag.Int("ch", 1, "number of channels in input file")
var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE")
var SFString = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE")
flag.Parse()
// Read pcm.
@ -53,23 +52,23 @@ func main() {
}
fmt.Println("Read", len(inPcm), "bytes from file", inPath)
var sampleFormat alsa.FormatType
switch sf {
var sf pcm.SampleFormat
switch SFString {
case "S32_LE":
sampleFormat = alsa.S32_LE
sf = pcm.S32_LE
case "S16_LE":
sampleFormat = alsa.S16_LE
sf = pcm.S16_LE
default:
log.Fatalf("Unhandled ALSA format: %v", sf)
log.Fatalf("Unhandled ALSA format: %v", SFString)
}
format := alsa.BufferFormat{
Channels: channels,
Rate: from,
SampleFormat: sampleFormat,
format := pcm.BufferFormat{
Channels: channels,
Rate: from,
SFormat: sf,
}
buf := alsa.Buffer{
buf := pcm.Buffer{
Format: format,
Data: inPcm,
}

View File

@ -32,7 +32,6 @@ import (
"log"
"bitbucket.org/ausocean/av/codec/pcm"
"github.com/yobert/alsa"
)
// This program accepts an input pcm file and outputs a resampled pcm file.
@ -40,7 +39,7 @@ import (
func main() {
var inPath = *flag.String("in", "data.pcm", "file path of input data")
var outPath = *flag.String("out", "mono.pcm", "file path of output")
var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE")
var SFString = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE")
flag.Parse()
// Read pcm.
@ -50,22 +49,22 @@ func main() {
}
fmt.Println("Read", len(inPcm), "bytes from file", inPath)
var sampleFormat alsa.FormatType
switch sf {
var sf pcm.SampleFormat
switch SFString {
case "S32_LE":
sampleFormat = alsa.S32_LE
sf = pcm.S32_LE
case "S16_LE":
sampleFormat = alsa.S16_LE
sf = pcm.S16_LE
default:
log.Fatalf("Unhandled ALSA format: %v", sf)
log.Fatalf("Unhandled sample format: %v", SFString)
}
format := alsa.BufferFormat{
Channels: 2,
SampleFormat: sampleFormat,
format := pcm.BufferFormat{
Channels: 2,
SFormat: sf,
}
buf := alsa.Buffer{
buf := pcm.Buffer{
Format: format,
Data: inPcm,
}