pcm: Added test and command for mono conversion. Also made resampleing use alsa.Buffer

This commit is contained in:
Trek H 2019-03-13 13:19:53 +10:30
parent e9d4fb47fc
commit d04dc217ec
4 changed files with 176 additions and 42 deletions

View File

@ -7,35 +7,33 @@ import (
"github.com/yobert/alsa" "github.com/yobert/alsa"
) )
// Resample resamples pcm data (inPcm) from 'fromRate' Hz to 'toRate' Hz and returns the resulting pcm. // Resample resamples pcm data from fromBuf to 'toRate' Hz and returns the resulting pcm.
// If an error occurs, an error will be returned along with the original audio data // If an error occurs, an error will be returned along with the original fromBuf's data
// - channels: number of channels
// - bitDepth: number of bits in single sample
// Notes: // Notes:
// - Input and output is assumed to be Little Endian. // - Currently only downsampling is implemented and fromBuf's rate must be divisible by toRate or an error will occur.
// - Currently only downsampling is possible and fromRate must be divisible by toRate or an error will occur. // - If the number of bytes in fromBuf.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
// - If the number of bytes in 'inPcm' is not divisible by the decimation factor (ratioFrom), the remaining bytes will
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, error) { func Resample(fromBuf alsa.Buffer, toRate int) ([]byte, error) {
fromRate := fromBuf.Format.Rate
if fromRate == toRate { if fromRate == toRate {
return inPcm, nil return fromBuf.Data, nil
} else if fromRate < 0 { } else if fromRate < 0 {
return inPcm, fmt.Errorf("Unable to convert from: %v Hz", fromRate) return fromBuf.Data, fmt.Errorf("Unable to convert from: %v Hz", fromRate)
} else if toRate < 0 { } else if toRate < 0 {
return inPcm, fmt.Errorf("Unable to convert to: %v Hz", toRate) return fromBuf.Data, fmt.Errorf("Unable to convert to: %v Hz", toRate)
} }
// The number of bytes in a sample. // The number of bytes in a sample.
var sampleLen int var sampleLen int
switch bitDepth { switch fromBuf.Format.SampleFormat {
case 32: case alsa.S32_LE:
sampleLen = 4 * channels sampleLen = 4 * fromBuf.Format.Channels
case 16: case alsa.S16_LE:
sampleLen = 2 * channels sampleLen = 2 * fromBuf.Format.Channels
default: default:
return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth) return fromBuf.Data, fmt.Errorf("Unhandled ALSA format: %v", fromBuf.Format.SampleFormat)
} }
inPcmLen := len(inPcm) inPcmLen := len(fromBuf.Data)
// Calculate sample rate ratio ratioFrom:ratioTo. // Calculate sample rate ratio ratioFrom:ratioTo.
rateGcd := gcd(toRate, fromRate) rateGcd := gcd(toRate, fromRate)
@ -44,32 +42,30 @@ func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, e
// ratioTo = 1 is the only number that will result in an even sampling. // ratioTo = 1 is the only number that will result in an even sampling.
if ratioTo != 1 { if ratioTo != 1 {
return inPcm, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo) return fromBuf.Data, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo)
} }
newLen := inPcmLen / ratioFrom newLen := inPcmLen / ratioFrom
result := make([]byte, 0, newLen) result := make([]byte, 0, newLen)
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'inPcm' to add them // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'fromBuf.Data' to add them
// up and average them. The result is the new sample. // up and average them. The result is the new sample.
for i := 0; i < newLen/sampleLen; i++ { for i := 0; i < newLen/sampleLen; i++ {
var sum int var sum int
for j := 0; j < ratioFrom; j++ { for j := 0; j < ratioFrom; j++ {
switch bitDepth { switch fromBuf.Format.SampleFormat {
case 32: case alsa.S32_LE:
sum += int(int32(binary.LittleEndian.Uint32(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) sum += int(int32(binary.LittleEndian.Uint32(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
case 16: case alsa.S16_LE:
sum += int(int16(binary.LittleEndian.Uint16(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) sum += int(int16(binary.LittleEndian.Uint16(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
default:
return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth)
} }
} }
avg := sum / ratioFrom avg := sum / ratioFrom
bAvg := make([]byte, sampleLen) bAvg := make([]byte, sampleLen)
switch bitDepth { switch fromBuf.Format.SampleFormat {
case 32: case alsa.S32_LE:
binary.LittleEndian.PutUint32(bAvg, uint32(avg)) binary.LittleEndian.PutUint32(bAvg, uint32(avg))
case 16: case alsa.S16_LE:
binary.LittleEndian.PutUint16(bAvg, uint16(avg)) binary.LittleEndian.PutUint16(bAvg, uint16(avg))
} }
result = append(result, bAvg...) result = append(result, bAvg...)
@ -81,11 +77,10 @@ func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, e
// the given stereo recording (ALSA buffer) // the given stereo recording (ALSA buffer)
// if an error occurs, an error will be returned along with the original stereo data. // if an error occurs, an error will be returned along with the original stereo data.
func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) {
bufChannels := stereoBuf.Format.Channels if stereoBuf.Format.Channels == 1 {
if bufChannels == 1 {
return stereoBuf.Data, nil return stereoBuf.Data, nil
} else if bufChannels != 2 { } else if stereoBuf.Format.Channels != 2 {
return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", bufChannels) return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", stereoBuf.Format.Channels)
} }
var stereoSampleBytes int var stereoSampleBytes int

View File

@ -5,10 +5,12 @@ import (
"io/ioutil" "io/ioutil"
"log" "log"
"testing" "testing"
"github.com/yobert/alsa"
) )
// TestResample accepts an input pcm file (assumed to be mono and using 16-bit samples) and outputs a resampled pcm file. // TestResample tests the Resample function using a pcm file that contains audio of a freq. sweep.
// Input and output file names can be specified as arguments. // The output of the Resample function is compared with a file containing the expected result.
func TestResample(t *testing.T) { func TestResample(t *testing.T) {
inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm" inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm"
expPath := "../../../test/test-data/av/output/sweep_400Hz_20000Hz_resampled_48to8kHz.pcm" expPath := "../../../test/test-data/av/output/sweep_400Hz_20000Hz_resampled_48to8kHz.pcm"
@ -19,8 +21,19 @@ func TestResample(t *testing.T) {
log.Fatal(err) log.Fatal(err)
} }
format := alsa.BufferFormat{
Channels: 1,
Rate: 48000,
SampleFormat: alsa.S16_LE,
}
buf := alsa.Buffer{
Format: format,
Data: inPcm,
}
// Resample pcm. // Resample pcm.
resampled, err := Resample(inPcm, 48000, 8000, 1, 16) resampled, err := Resample(buf, 8000)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
@ -36,3 +49,44 @@ func TestResample(t *testing.T) {
t.Error("Resampled data does not match expected result.") t.Error("Resampled data does not match expected result.")
} }
} }
// TestStereoToMono tests the StereoToMono function using a pcm file that contains stereo audio.
// The output of the StereoToMono function is compared with a file containing the expected mono audio.
func TestStereoToMono(t *testing.T) {
inPath := "../../../test/test-data/av/input/stereo_DTMF_tones.pcm"
expPath := "../../../test/test-data/av/output/mono_DTMF_tones.pcm"
// Read input pcm.
inPcm, err := ioutil.ReadFile(inPath)
if err != nil {
log.Fatal(err)
}
format := alsa.BufferFormat{
Channels: 2,
Rate: 44100,
SampleFormat: alsa.S16_LE,
}
buf := alsa.Buffer{
Format: format,
Data: inPcm,
}
// Convert audio.
mono, err := StereoToMono(buf)
if err != nil {
log.Fatal(err)
}
// Read expected mono pcm.
exp, err := ioutil.ReadFile(expPath)
if err != nil {
log.Fatal(err)
}
// Compare result with expected.
if !bytes.Equal(mono, exp) {
t.Error("Converted data does not match expected result.")
}
}

View File

@ -7,23 +7,24 @@ import (
"log" "log"
"bitbucket.org/ausocean/av/audio/pcm" "bitbucket.org/ausocean/av/audio/pcm"
"github.com/yobert/alsa"
) )
// This program accepts an input pcm file and outputs a resampled pcm file. // This program accepts an input pcm file and outputs a resampled pcm file.
// Input and output file names, to and from sample rates, channels and bit-depth can be specified as arguments. // Input and output file names, to and from sample rates, channels and sample format can be specified as arguments.
func main() { func main() {
var inPath string var inPath string
var outPath string var outPath string
var from int var from int
var to int var to int
var channels int var channels int
var bitDepth int var sf string
flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") flag.StringVar(&inPath, "in", "data.pcm", "file path of input data")
flag.StringVar(&outPath, "out", "resampled.pcm", "file path of output") flag.StringVar(&outPath, "out", "resampled.pcm", "file path of output")
flag.IntVar(&from, "from", 48000, "sample rate of input file") flag.IntVar(&from, "from", 48000, "sample rate of input file")
flag.IntVar(&to, "to", 8000, "sample rate of output file") flag.IntVar(&to, "to", 8000, "sample rate of output file")
flag.IntVar(&channels, "ch", 1, "number of channels in input file") flag.IntVar(&channels, "ch", 1, "number of channels in input file")
flag.IntVar(&bitDepth, "bd", 16, "bit depth of input file") flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE")
flag.Parse() flag.Parse()
// Read pcm. // Read pcm.
@ -33,8 +34,29 @@ func main() {
} }
fmt.Println("Read", len(inPcm), "bytes from file", inPath) fmt.Println("Read", len(inPcm), "bytes from file", inPath)
// Resample pcm. var sampleFormat alsa.FormatType
resampled, err := pcm.Resample(inPcm, from, to, channels, bitDepth) switch sf {
case "S32_LE":
sampleFormat = alsa.S32_LE
case "S16_LE":
sampleFormat = alsa.S16_LE
default:
log.Fatalf("Unhandled ALSA format: %v", sf)
}
format := alsa.BufferFormat{
Channels: channels,
Rate: from,
SampleFormat: sampleFormat,
}
buf := alsa.Buffer{
Format: format,
Data: inPcm,
}
// Resample audio.
resampled, err := pcm.Resample(buf, to)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }

View File

@ -0,0 +1,63 @@
package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"bitbucket.org/ausocean/av/audio/pcm"
"github.com/yobert/alsa"
)
// This program accepts an input pcm file and outputs a resampled pcm file.
// Input and output file names, to and from sample rates, channels and sample format can be specified as arguments.
func main() {
var inPath string
var outPath string
var sf string
flag.StringVar(&inPath, "in", "data.pcm", "file path of input data")
flag.StringVar(&outPath, "out", "mono.pcm", "file path of output")
flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE")
flag.Parse()
// Read pcm.
inPcm, err := ioutil.ReadFile(inPath)
if err != nil {
log.Fatal(err)
}
fmt.Println("Read", len(inPcm), "bytes from file", inPath)
var sampleFormat alsa.FormatType
switch sf {
case "S32_LE":
sampleFormat = alsa.S32_LE
case "S16_LE":
sampleFormat = alsa.S16_LE
default:
log.Fatalf("Unhandled ALSA format: %v", sf)
}
format := alsa.BufferFormat{
Channels: 2,
SampleFormat: sampleFormat,
}
buf := alsa.Buffer{
Format: format,
Data: inPcm,
}
// Convert audio.
mono, err := pcm.StereoToMono(buf)
if err != nil {
log.Fatal(err)
}
// Save mono to file.
err = ioutil.WriteFile(outPath, mono, 0644)
if err != nil {
log.Fatal(err)
}
fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath)
}