From ad1e11ea5149ec8f4ad0850582e7892e1652e34b Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 12 Mar 2019 16:22:44 +1030 Subject: [PATCH 1/7] pcm: Resampling restructured to be in pcm package --- audio/pcm/pcm.go | 124 ++++++++++++++++++++++++++++++++++++++++++ audio/pcm/pcm_test.go | 38 +++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 audio/pcm/pcm.go create mode 100644 audio/pcm/pcm_test.go diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go new file mode 100644 index 00000000..73342b1b --- /dev/null +++ b/audio/pcm/pcm.go @@ -0,0 +1,124 @@ +package pcm + +import ( + "encoding/binary" + "fmt" + + "github.com/yobert/alsa" +) + +// Resample resamples pcm data (inPcm) from 'fromRate' Hz to 'toRate' Hz and returns the resulting pcm. +// If an error occurs, an error will be returned along with the original audio data +// - channels: number of channels +// - bitDepth: number of bits in single sample +// Notes: +// - Input and output is assumed to be Little Endian. +// - Currently only downsampling is possible and fromRate must be divisible by toRate or an error will occur. +// - If the number of bytes in 'inPcm' is not divisible by the decimation factor (ratioFrom), the remaining bytes will +// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. +func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, error) { + if fromRate == toRate { + return inPcm, nil + } else if fromRate < 0 { + return inPcm, fmt.Errorf("Unable to convert from: %v Hz", fromRate) + } else if toRate < 0 { + return inPcm, fmt.Errorf("Unable to convert to: %v Hz", toRate) + } + + // The number of bytes in a sample. + var sampleLen int + switch bitDepth { + case 32: + sampleLen = 4 * channels + case 16: + sampleLen = 2 * channels + default: + return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth) + } + inPcmLen := len(inPcm) + + // Calculate sample rate ratio ratioFrom:ratioTo. + rateGcd := gcd(toRate, fromRate) + ratioFrom := fromRate / rateGcd + ratioTo := toRate / rateGcd + + // ratioTo = 1 is the only number that will result in an even sampling. + if ratioTo != 1 { + return inPcm, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo) + } + + newLen := inPcmLen / ratioFrom + result := make([]byte, 0, newLen) + + // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'inPcm' to add them + // up and average them. The result is the new sample. + for i := 0; i < newLen/sampleLen; i++ { + var sum int + for j := 0; j < ratioFrom; j++ { + switch bitDepth { + case 32: + sum += int(int32(binary.LittleEndian.Uint32(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + case 16: + sum += int(int16(binary.LittleEndian.Uint16(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + default: + return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth) + } + } + avg := sum / ratioFrom + bAvg := make([]byte, sampleLen) + switch bitDepth { + case 32: + binary.LittleEndian.PutUint32(bAvg, uint32(avg)) + case 16: + binary.LittleEndian.PutUint16(bAvg, uint16(avg)) + } + result = append(result, bAvg...) + } + return result, nil +} + +// StereoToMono returns raw mono audio data generated from only the left channel from +// the given stereo recording (ALSA buffer) +// if an error occurs, an error will be returned along with the original stereo data. +func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { + bufChannels := stereoBuf.Format.Channels + if bufChannels == 1 { + return stereoBuf.Data, nil + } else if bufChannels != 2 { + return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", bufChannels) + } + + var stereoSampleBytes int + switch stereoBuf.Format.SampleFormat { + case alsa.S32_LE: + stereoSampleBytes = 8 + case alsa.S16_LE: + stereoSampleBytes = 4 + default: + return stereoBuf.Data, fmt.Errorf("Unhandled ALSA format %v", stereoBuf.Format.SampleFormat) + } + + recLength := len(stereoBuf.Data) + mono := make([]byte, recLength/2) + + // Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample + // (left channel), add it to the new mono audio data. + var inc int + for i := 0; i < recLength; i++ { + if i%stereoSampleBytes < stereoSampleBytes/2 { + mono[inc] = stereoBuf.Data[i] + inc++ + } + } + + return mono, nil +} + +// gcd is used for calculating the greatest common divisor of two positive integers, a and b. +// assumes given a and b are positive. +func gcd(a, b int) int { + if b != 0 { + return gcd(b, a%b) + } + return a +} diff --git a/audio/pcm/pcm_test.go b/audio/pcm/pcm_test.go new file mode 100644 index 00000000..21e66b25 --- /dev/null +++ b/audio/pcm/pcm_test.go @@ -0,0 +1,38 @@ +package pcm + +import ( + "bytes" + "io/ioutil" + "log" + "testing" +) + +// TestResample accepts an input pcm file (assumed to be mono and using 16-bit samples) and outputs a resampled pcm file. +// Input and output file names can be specified as arguments. +func TestResample(t *testing.T) { + inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm" + expPath := "../../../test/test-data/av/output/sweep_400Hz_20000Hz_resampled_48to8kHz.pcm" + + // Read input pcm. + inPcm, err := ioutil.ReadFile(inPath) + if err != nil { + log.Fatal(err) + } + + // Resample pcm. + resampled, err := Resample(inPcm, 48000, 8000, 1, 16) + if err != nil { + log.Fatal(err) + } + + // Read expected resampled pcm. + exp, err := ioutil.ReadFile(expPath) + if err != nil { + log.Fatal(err) + } + + // Compare result with expected. + if !bytes.Equal(resampled, exp) { + t.Error("Resampled data does not match expected result.") + } +} From e9d4fb47fc77359d83ce2e0d08ec99dbaf939331 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 12 Mar 2019 18:53:08 +1030 Subject: [PATCH 2/7] pcm: added to exp a program that resamples pcm files --- exp/pcm/resample-pcm.go | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 exp/pcm/resample-pcm.go diff --git a/exp/pcm/resample-pcm.go b/exp/pcm/resample-pcm.go new file mode 100644 index 00000000..2c575722 --- /dev/null +++ b/exp/pcm/resample-pcm.go @@ -0,0 +1,48 @@ +package main + +import ( + "flag" + "fmt" + "io/ioutil" + "log" + + "bitbucket.org/ausocean/av/audio/pcm" +) + +// This program accepts an input pcm file and outputs a resampled pcm file. +// Input and output file names, to and from sample rates, channels and bit-depth can be specified as arguments. +func main() { + var inPath string + var outPath string + var from int + var to int + var channels int + var bitDepth int + flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") + flag.StringVar(&outPath, "out", "resampled.pcm", "file path of output") + flag.IntVar(&from, "from", 48000, "sample rate of input file") + flag.IntVar(&to, "to", 8000, "sample rate of output file") + flag.IntVar(&channels, "ch", 1, "number of channels in input file") + flag.IntVar(&bitDepth, "bd", 16, "bit depth of input file") + flag.Parse() + + // Read pcm. + inPcm, err := ioutil.ReadFile(inPath) + if err != nil { + log.Fatal(err) + } + fmt.Println("Read", len(inPcm), "bytes from file", inPath) + + // Resample pcm. + resampled, err := pcm.Resample(inPcm, from, to, channels, bitDepth) + if err != nil { + log.Fatal(err) + } + + // Save resampled to file. + err = ioutil.WriteFile(outPath, resampled, 0644) + if err != nil { + log.Fatal(err) + } + fmt.Println("Encoded and wrote", len(resampled), "bytes to file", outPath) +} From d04dc217ecef60f0417205fb9611ce5867f38c58 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 13 Mar 2019 13:19:53 +1030 Subject: [PATCH 3/7] pcm: Added test and command for mono conversion. Also made resampleing use alsa.Buffer --- audio/pcm/pcm.go | 63 +++++++++---------- audio/pcm/pcm_test.go | 60 +++++++++++++++++- .../{resample-pcm.go => resample/resample.go} | 32 ++++++++-- exp/pcm/stereo-to-mono/stereo-to-mono.go | 63 +++++++++++++++++++ 4 files changed, 176 insertions(+), 42 deletions(-) rename exp/pcm/{resample-pcm.go => resample/resample.go} (63%) create mode 100644 exp/pcm/stereo-to-mono/stereo-to-mono.go diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go index 73342b1b..8e951120 100644 --- a/audio/pcm/pcm.go +++ b/audio/pcm/pcm.go @@ -7,35 +7,33 @@ import ( "github.com/yobert/alsa" ) -// Resample resamples pcm data (inPcm) from 'fromRate' Hz to 'toRate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original audio data -// - channels: number of channels -// - bitDepth: number of bits in single sample +// Resample resamples pcm data from fromBuf to 'toRate' Hz and returns the resulting pcm. +// If an error occurs, an error will be returned along with the original fromBuf's data // Notes: -// - Input and output is assumed to be Little Endian. -// - Currently only downsampling is possible and fromRate must be divisible by toRate or an error will occur. -// - If the number of bytes in 'inPcm' is not divisible by the decimation factor (ratioFrom), the remaining bytes will +// - Currently only downsampling is implemented and fromBuf's rate must be divisible by toRate or an error will occur. +// - If the number of bytes in fromBuf.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, error) { +func Resample(fromBuf alsa.Buffer, toRate int) ([]byte, error) { + fromRate := fromBuf.Format.Rate if fromRate == toRate { - return inPcm, nil + return fromBuf.Data, nil } else if fromRate < 0 { - return inPcm, fmt.Errorf("Unable to convert from: %v Hz", fromRate) + return fromBuf.Data, fmt.Errorf("Unable to convert from: %v Hz", fromRate) } else if toRate < 0 { - return inPcm, fmt.Errorf("Unable to convert to: %v Hz", toRate) + return fromBuf.Data, fmt.Errorf("Unable to convert to: %v Hz", toRate) } // The number of bytes in a sample. var sampleLen int - switch bitDepth { - case 32: - sampleLen = 4 * channels - case 16: - sampleLen = 2 * channels + switch fromBuf.Format.SampleFormat { + case alsa.S32_LE: + sampleLen = 4 * fromBuf.Format.Channels + case alsa.S16_LE: + sampleLen = 2 * fromBuf.Format.Channels default: - return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth) + return fromBuf.Data, fmt.Errorf("Unhandled ALSA format: %v", fromBuf.Format.SampleFormat) } - inPcmLen := len(inPcm) + inPcmLen := len(fromBuf.Data) // Calculate sample rate ratio ratioFrom:ratioTo. rateGcd := gcd(toRate, fromRate) @@ -44,32 +42,30 @@ func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, e // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return inPcm, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo) + return fromBuf.Data, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom result := make([]byte, 0, newLen) - // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'inPcm' to add them + // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'fromBuf.Data' to add them // up and average them. The result is the new sample. for i := 0; i < newLen/sampleLen; i++ { var sum int for j := 0; j < ratioFrom; j++ { - switch bitDepth { - case 32: - sum += int(int32(binary.LittleEndian.Uint32(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) - case 16: - sum += int(int16(binary.LittleEndian.Uint16(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) - default: - return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth) + switch fromBuf.Format.SampleFormat { + case alsa.S32_LE: + sum += int(int32(binary.LittleEndian.Uint32(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + case alsa.S16_LE: + sum += int(int16(binary.LittleEndian.Uint16(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) } } avg := sum / ratioFrom bAvg := make([]byte, sampleLen) - switch bitDepth { - case 32: + switch fromBuf.Format.SampleFormat { + case alsa.S32_LE: binary.LittleEndian.PutUint32(bAvg, uint32(avg)) - case 16: + case alsa.S16_LE: binary.LittleEndian.PutUint16(bAvg, uint16(avg)) } result = append(result, bAvg...) @@ -81,11 +77,10 @@ func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, e // the given stereo recording (ALSA buffer) // if an error occurs, an error will be returned along with the original stereo data. func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { - bufChannels := stereoBuf.Format.Channels - if bufChannels == 1 { + if stereoBuf.Format.Channels == 1 { return stereoBuf.Data, nil - } else if bufChannels != 2 { - return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", bufChannels) + } else if stereoBuf.Format.Channels != 2 { + return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", stereoBuf.Format.Channels) } var stereoSampleBytes int diff --git a/audio/pcm/pcm_test.go b/audio/pcm/pcm_test.go index 21e66b25..b543b823 100644 --- a/audio/pcm/pcm_test.go +++ b/audio/pcm/pcm_test.go @@ -5,10 +5,12 @@ import ( "io/ioutil" "log" "testing" + + "github.com/yobert/alsa" ) -// TestResample accepts an input pcm file (assumed to be mono and using 16-bit samples) and outputs a resampled pcm file. -// Input and output file names can be specified as arguments. +// TestResample tests the Resample function using a pcm file that contains audio of a freq. sweep. +// The output of the Resample function is compared with a file containing the expected result. func TestResample(t *testing.T) { inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm" expPath := "../../../test/test-data/av/output/sweep_400Hz_20000Hz_resampled_48to8kHz.pcm" @@ -19,8 +21,19 @@ func TestResample(t *testing.T) { log.Fatal(err) } + format := alsa.BufferFormat{ + Channels: 1, + Rate: 48000, + SampleFormat: alsa.S16_LE, + } + + buf := alsa.Buffer{ + Format: format, + Data: inPcm, + } + // Resample pcm. - resampled, err := Resample(inPcm, 48000, 8000, 1, 16) + resampled, err := Resample(buf, 8000) if err != nil { log.Fatal(err) } @@ -36,3 +49,44 @@ func TestResample(t *testing.T) { t.Error("Resampled data does not match expected result.") } } + +// TestStereoToMono tests the StereoToMono function using a pcm file that contains stereo audio. +// The output of the StereoToMono function is compared with a file containing the expected mono audio. +func TestStereoToMono(t *testing.T) { + inPath := "../../../test/test-data/av/input/stereo_DTMF_tones.pcm" + expPath := "../../../test/test-data/av/output/mono_DTMF_tones.pcm" + + // Read input pcm. + inPcm, err := ioutil.ReadFile(inPath) + if err != nil { + log.Fatal(err) + } + + format := alsa.BufferFormat{ + Channels: 2, + Rate: 44100, + SampleFormat: alsa.S16_LE, + } + + buf := alsa.Buffer{ + Format: format, + Data: inPcm, + } + + // Convert audio. + mono, err := StereoToMono(buf) + if err != nil { + log.Fatal(err) + } + + // Read expected mono pcm. + exp, err := ioutil.ReadFile(expPath) + if err != nil { + log.Fatal(err) + } + + // Compare result with expected. + if !bytes.Equal(mono, exp) { + t.Error("Converted data does not match expected result.") + } +} diff --git a/exp/pcm/resample-pcm.go b/exp/pcm/resample/resample.go similarity index 63% rename from exp/pcm/resample-pcm.go rename to exp/pcm/resample/resample.go index 2c575722..b896fafb 100644 --- a/exp/pcm/resample-pcm.go +++ b/exp/pcm/resample/resample.go @@ -7,23 +7,24 @@ import ( "log" "bitbucket.org/ausocean/av/audio/pcm" + "github.com/yobert/alsa" ) // This program accepts an input pcm file and outputs a resampled pcm file. -// Input and output file names, to and from sample rates, channels and bit-depth can be specified as arguments. +// Input and output file names, to and from sample rates, channels and sample format can be specified as arguments. func main() { var inPath string var outPath string var from int var to int var channels int - var bitDepth int + var sf string flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") flag.StringVar(&outPath, "out", "resampled.pcm", "file path of output") flag.IntVar(&from, "from", 48000, "sample rate of input file") flag.IntVar(&to, "to", 8000, "sample rate of output file") flag.IntVar(&channels, "ch", 1, "number of channels in input file") - flag.IntVar(&bitDepth, "bd", 16, "bit depth of input file") + flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm. @@ -33,8 +34,29 @@ func main() { } fmt.Println("Read", len(inPcm), "bytes from file", inPath) - // Resample pcm. - resampled, err := pcm.Resample(inPcm, from, to, channels, bitDepth) + var sampleFormat alsa.FormatType + switch sf { + case "S32_LE": + sampleFormat = alsa.S32_LE + case "S16_LE": + sampleFormat = alsa.S16_LE + default: + log.Fatalf("Unhandled ALSA format: %v", sf) + } + + format := alsa.BufferFormat{ + Channels: channels, + Rate: from, + SampleFormat: sampleFormat, + } + + buf := alsa.Buffer{ + Format: format, + Data: inPcm, + } + + // Resample audio. + resampled, err := pcm.Resample(buf, to) if err != nil { log.Fatal(err) } diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go new file mode 100644 index 00000000..b16f1ab3 --- /dev/null +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -0,0 +1,63 @@ +package main + +import ( + "flag" + "fmt" + "io/ioutil" + "log" + + "bitbucket.org/ausocean/av/audio/pcm" + "github.com/yobert/alsa" +) + +// This program accepts an input pcm file and outputs a resampled pcm file. +// Input and output file names, to and from sample rates, channels and sample format can be specified as arguments. +func main() { + var inPath string + var outPath string + var sf string + flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") + flag.StringVar(&outPath, "out", "mono.pcm", "file path of output") + flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE") + flag.Parse() + + // Read pcm. + inPcm, err := ioutil.ReadFile(inPath) + if err != nil { + log.Fatal(err) + } + fmt.Println("Read", len(inPcm), "bytes from file", inPath) + + var sampleFormat alsa.FormatType + switch sf { + case "S32_LE": + sampleFormat = alsa.S32_LE + case "S16_LE": + sampleFormat = alsa.S16_LE + default: + log.Fatalf("Unhandled ALSA format: %v", sf) + } + + format := alsa.BufferFormat{ + Channels: 2, + SampleFormat: sampleFormat, + } + + buf := alsa.Buffer{ + Format: format, + Data: inPcm, + } + + // Convert audio. + mono, err := pcm.StereoToMono(buf) + if err != nil { + log.Fatal(err) + } + + // Save mono to file. + err = ioutil.WriteFile(outPath, mono, 0644) + if err != nil { + log.Fatal(err) + } + fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath) +} From 927194de4cadba887df5b092b32d7150e7a55370 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 13 Mar 2019 13:59:21 +1030 Subject: [PATCH 4/7] pcm: added file headers --- audio/pcm/pcm.go | 26 ++++++++++++++++++++++++ audio/pcm/pcm_test.go | 26 ++++++++++++++++++++++++ exp/pcm/resample/resample.go | 26 ++++++++++++++++++++++++ exp/pcm/stereo-to-mono/stereo-to-mono.go | 26 ++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go index 8e951120..76594b88 100644 --- a/audio/pcm/pcm.go +++ b/audio/pcm/pcm.go @@ -1,3 +1,29 @@ +/* +NAME + pcm.go + +DESCRIPTION + pcm.go contains functions for processing pcm. + +AUTHOR + Trek Hopton + +LICENSE + pcm.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ package pcm import ( diff --git a/audio/pcm/pcm_test.go b/audio/pcm/pcm_test.go index b543b823..5abcd1a8 100644 --- a/audio/pcm/pcm_test.go +++ b/audio/pcm/pcm_test.go @@ -1,3 +1,29 @@ +/* +NAME + pcm_test.go + +DESCRIPTION + pcm_test.go contains functions for testing the pcm package. + +AUTHOR + Trek Hopton + +LICENSE + pcm_test.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ package pcm import ( diff --git a/exp/pcm/resample/resample.go b/exp/pcm/resample/resample.go index b896fafb..2fef9f7c 100644 --- a/exp/pcm/resample/resample.go +++ b/exp/pcm/resample/resample.go @@ -1,3 +1,29 @@ +/* +NAME + resample.go + +DESCRIPTION + resample.go is a program for resampling a pcm file. + +AUTHOR + Trek Hopton + +LICENSE + resample.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ package main import ( diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go index b16f1ab3..69bc081b 100644 --- a/exp/pcm/stereo-to-mono/stereo-to-mono.go +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -1,3 +1,29 @@ +/* +NAME + stereo-to-mono.go + +DESCRIPTION + stereo-to-mono.go is a program for converting a mono pcm file to a stereo pcm file. + +AUTHOR + Trek Hopton + +LICENSE + stereo-to-mono.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ package main import ( From b160e6a5e212b49095a61feaf1d5c1e18c9ee241 Mon Sep 17 00:00:00 2001 From: Trek H Date: Wed, 13 Mar 2019 16:29:44 +1030 Subject: [PATCH 5/7] pcm: comment fix --- audio/pcm/pcm.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go index 76594b88..5913fa0b 100644 --- a/audio/pcm/pcm.go +++ b/audio/pcm/pcm.go @@ -33,8 +33,8 @@ import ( "github.com/yobert/alsa" ) -// Resample resamples pcm data from fromBuf to 'toRate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original fromBuf's data +// Resample takes an alsa.Buffer (fromBuf) and resamples the pcm audio data to 'toRate' Hz and returns the resulting pcm. +// If an error occurs, an error will be returned along with the original fromBuf's data. // Notes: // - Currently only downsampling is implemented and fromBuf's rate must be divisible by toRate or an error will occur. // - If the number of bytes in fromBuf.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will From be29668c5d333fda2b03b308d2fe6f3ac1033259 Mon Sep 17 00:00:00 2001 From: Trek H Date: Fri, 15 Mar 2019 18:17:32 +1030 Subject: [PATCH 6/7] pcm: updated file header year --- audio/pcm/pcm.go | 2 +- audio/pcm/pcm_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go index 5913fa0b..b9ec7310 100644 --- a/audio/pcm/pcm.go +++ b/audio/pcm/pcm.go @@ -9,7 +9,7 @@ AUTHOR Trek Hopton LICENSE - pcm.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean) + pcm.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) It is free software: you can redistribute it and/or modify them under the terms of the GNU General Public License as published by the diff --git a/audio/pcm/pcm_test.go b/audio/pcm/pcm_test.go index 5abcd1a8..713d01d8 100644 --- a/audio/pcm/pcm_test.go +++ b/audio/pcm/pcm_test.go @@ -9,7 +9,7 @@ AUTHOR Trek Hopton LICENSE - pcm_test.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean) + pcm_test.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) It is free software: you can redistribute it and/or modify them under the terms of the GNU General Public License as published by the From 9a9a38dbe1902edf77c50e59e205b541127fd6b6 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 26 Mar 2019 15:43:11 +1030 Subject: [PATCH 7/7] pcm: simplified and improved efficiency of code. --- audio/pcm/pcm.go | 72 ++++++++++++------------ exp/pcm/resample/resample.go | 18 ++---- exp/pcm/stereo-to-mono/stereo-to-mono.go | 9 +-- 3 files changed, 45 insertions(+), 54 deletions(-) diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go index b9ec7310..5ead3143 100644 --- a/audio/pcm/pcm.go +++ b/audio/pcm/pcm.go @@ -33,62 +33,62 @@ import ( "github.com/yobert/alsa" ) -// Resample takes an alsa.Buffer (fromBuf) and resamples the pcm audio data to 'toRate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original fromBuf's data. +// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. +// If an error occurs, an error will be returned along with the original b's data. // Notes: -// - Currently only downsampling is implemented and fromBuf's rate must be divisible by toRate or an error will occur. -// - If the number of bytes in fromBuf.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will +// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. +// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(fromBuf alsa.Buffer, toRate int) ([]byte, error) { - fromRate := fromBuf.Format.Rate - if fromRate == toRate { - return fromBuf.Data, nil +func Resample(b alsa.Buffer, rate int) ([]byte, error) { + fromRate := b.Format.Rate + if fromRate == rate { + return b.Data, nil } else if fromRate < 0 { - return fromBuf.Data, fmt.Errorf("Unable to convert from: %v Hz", fromRate) - } else if toRate < 0 { - return fromBuf.Data, fmt.Errorf("Unable to convert to: %v Hz", toRate) + return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) + } else if rate < 0 { + return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. var sampleLen int - switch fromBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: - sampleLen = 4 * fromBuf.Format.Channels + sampleLen = 4 * b.Format.Channels case alsa.S16_LE: - sampleLen = 2 * fromBuf.Format.Channels + sampleLen = 2 * b.Format.Channels default: - return fromBuf.Data, fmt.Errorf("Unhandled ALSA format: %v", fromBuf.Format.SampleFormat) + return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) } - inPcmLen := len(fromBuf.Data) + inPcmLen := len(b.Data) // Calculate sample rate ratio ratioFrom:ratioTo. - rateGcd := gcd(toRate, fromRate) + rateGcd := gcd(rate, fromRate) ratioFrom := fromRate / rateGcd - ratioTo := toRate / rateGcd + ratioTo := rate / rateGcd // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return fromBuf.Data, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo) + return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom result := make([]byte, 0, newLen) - // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'fromBuf.Data' to add them + // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them // up and average them. The result is the new sample. + bAvg := make([]byte, sampleLen) for i := 0; i < newLen/sampleLen; i++ { var sum int for j := 0; j < ratioFrom; j++ { - switch fromBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: - sum += int(int32(binary.LittleEndian.Uint32(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + sum += int(int32(binary.LittleEndian.Uint32(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) case alsa.S16_LE: - sum += int(int16(binary.LittleEndian.Uint16(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + sum += int(int16(binary.LittleEndian.Uint16(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) } } avg := sum / ratioFrom - bAvg := make([]byte, sampleLen) - switch fromBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: binary.LittleEndian.PutUint32(bAvg, uint32(avg)) case alsa.S16_LE: @@ -102,24 +102,24 @@ func Resample(fromBuf alsa.Buffer, toRate int) ([]byte, error) { // StereoToMono returns raw mono audio data generated from only the left channel from // the given stereo recording (ALSA buffer) // if an error occurs, an error will be returned along with the original stereo data. -func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { - if stereoBuf.Format.Channels == 1 { - return stereoBuf.Data, nil - } else if stereoBuf.Format.Channels != 2 { - return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", stereoBuf.Format.Channels) +func StereoToMono(b alsa.Buffer) ([]byte, error) { + if b.Format.Channels == 1 { + return b.Data, nil + } else if b.Format.Channels != 2 { + return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) } var stereoSampleBytes int - switch stereoBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: stereoSampleBytes = 8 case alsa.S16_LE: stereoSampleBytes = 4 default: - return stereoBuf.Data, fmt.Errorf("Unhandled ALSA format %v", stereoBuf.Format.SampleFormat) + return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) } - recLength := len(stereoBuf.Data) + recLength := len(b.Data) mono := make([]byte, recLength/2) // Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample @@ -127,7 +127,7 @@ func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { var inc int for i := 0; i < recLength; i++ { if i%stereoSampleBytes < stereoSampleBytes/2 { - mono[inc] = stereoBuf.Data[i] + mono[inc] = b.Data[i] inc++ } } @@ -138,8 +138,8 @@ func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { // gcd is used for calculating the greatest common divisor of two positive integers, a and b. // assumes given a and b are positive. func gcd(a, b int) int { - if b != 0 { - return gcd(b, a%b) + for b != 0 { + a, b = b, a%b } return a } diff --git a/exp/pcm/resample/resample.go b/exp/pcm/resample/resample.go index 2fef9f7c..aaa8f77c 100644 --- a/exp/pcm/resample/resample.go +++ b/exp/pcm/resample/resample.go @@ -39,18 +39,12 @@ import ( // This program accepts an input pcm file and outputs a resampled pcm file. // Input and output file names, to and from sample rates, channels and sample format can be specified as arguments. func main() { - var inPath string - var outPath string - var from int - var to int - var channels int - var sf string - flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") - flag.StringVar(&outPath, "out", "resampled.pcm", "file path of output") - flag.IntVar(&from, "from", 48000, "sample rate of input file") - flag.IntVar(&to, "to", 8000, "sample rate of output file") - flag.IntVar(&channels, "ch", 1, "number of channels in input file") - flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE") + var inPath = *flag.String("in", "data.pcm", "file path of input data") + var outPath = *flag.String("out", "resampled.pcm", "file path of output") + var from = *flag.Int("from", 48000, "sample rate of input file") + var to = *flag.Int("to", 8000, "sample rate of output file") + var channels = *flag.Int("ch", 1, "number of channels in input file") + var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm. diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go index 69bc081b..231591f0 100644 --- a/exp/pcm/stereo-to-mono/stereo-to-mono.go +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -39,12 +39,9 @@ import ( // This program accepts an input pcm file and outputs a resampled pcm file. // Input and output file names, to and from sample rates, channels and sample format can be specified as arguments. func main() { - var inPath string - var outPath string - var sf string - flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") - flag.StringVar(&outPath, "out", "mono.pcm", "file path of output") - flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE") + var inPath = *flag.String("in", "data.pcm", "file path of input data") + var outPath = *flag.String("out", "mono.pcm", "file path of output") + var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm.