diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go new file mode 100644 index 00000000..5ead3143 --- /dev/null +++ b/codec/pcm/pcm.go @@ -0,0 +1,145 @@ +/* +NAME + pcm.go + +DESCRIPTION + pcm.go contains functions for processing pcm. + +AUTHOR + Trek Hopton + +LICENSE + pcm.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ +package pcm + +import ( + "encoding/binary" + "fmt" + + "github.com/yobert/alsa" +) + +// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. +// If an error occurs, an error will be returned along with the original b's data. +// Notes: +// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. +// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will +// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. +func Resample(b alsa.Buffer, rate int) ([]byte, error) { + fromRate := b.Format.Rate + if fromRate == rate { + return b.Data, nil + } else if fromRate < 0 { + return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) + } else if rate < 0 { + return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) + } + + // The number of bytes in a sample. + var sampleLen int + switch b.Format.SampleFormat { + case alsa.S32_LE: + sampleLen = 4 * b.Format.Channels + case alsa.S16_LE: + sampleLen = 2 * b.Format.Channels + default: + return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) + } + inPcmLen := len(b.Data) + + // Calculate sample rate ratio ratioFrom:ratioTo. + rateGcd := gcd(rate, fromRate) + ratioFrom := fromRate / rateGcd + ratioTo := rate / rateGcd + + // ratioTo = 1 is the only number that will result in an even sampling. + if ratioTo != 1 { + return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) + } + + newLen := inPcmLen / ratioFrom + result := make([]byte, 0, newLen) + + // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them + // up and average them. The result is the new sample. + bAvg := make([]byte, sampleLen) + for i := 0; i < newLen/sampleLen; i++ { + var sum int + for j := 0; j < ratioFrom; j++ { + switch b.Format.SampleFormat { + case alsa.S32_LE: + sum += int(int32(binary.LittleEndian.Uint32(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + case alsa.S16_LE: + sum += int(int16(binary.LittleEndian.Uint16(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + } + } + avg := sum / ratioFrom + switch b.Format.SampleFormat { + case alsa.S32_LE: + binary.LittleEndian.PutUint32(bAvg, uint32(avg)) + case alsa.S16_LE: + binary.LittleEndian.PutUint16(bAvg, uint16(avg)) + } + result = append(result, bAvg...) + } + return result, nil +} + +// StereoToMono returns raw mono audio data generated from only the left channel from +// the given stereo recording (ALSA buffer) +// if an error occurs, an error will be returned along with the original stereo data. +func StereoToMono(b alsa.Buffer) ([]byte, error) { + if b.Format.Channels == 1 { + return b.Data, nil + } else if b.Format.Channels != 2 { + return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) + } + + var stereoSampleBytes int + switch b.Format.SampleFormat { + case alsa.S32_LE: + stereoSampleBytes = 8 + case alsa.S16_LE: + stereoSampleBytes = 4 + default: + return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) + } + + recLength := len(b.Data) + mono := make([]byte, recLength/2) + + // Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample + // (left channel), add it to the new mono audio data. + var inc int + for i := 0; i < recLength; i++ { + if i%stereoSampleBytes < stereoSampleBytes/2 { + mono[inc] = b.Data[i] + inc++ + } + } + + return mono, nil +} + +// gcd is used for calculating the greatest common divisor of two positive integers, a and b. +// assumes given a and b are positive. +func gcd(a, b int) int { + for b != 0 { + a, b = b, a%b + } + return a +} diff --git a/codec/pcm/pcm_test.go b/codec/pcm/pcm_test.go new file mode 100644 index 00000000..713d01d8 --- /dev/null +++ b/codec/pcm/pcm_test.go @@ -0,0 +1,118 @@ +/* +NAME + pcm_test.go + +DESCRIPTION + pcm_test.go contains functions for testing the pcm package. + +AUTHOR + Trek Hopton + +LICENSE + pcm_test.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ +package pcm + +import ( + "bytes" + "io/ioutil" + "log" + "testing" + + "github.com/yobert/alsa" +) + +// TestResample tests the Resample function using a pcm file that contains audio of a freq. sweep. +// The output of the Resample function is compared with a file containing the expected result. +func TestResample(t *testing.T) { + inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm" + expPath := "../../../test/test-data/av/output/sweep_400Hz_20000Hz_resampled_48to8kHz.pcm" + + // Read input pcm. + inPcm, err := ioutil.ReadFile(inPath) + if err != nil { + log.Fatal(err) + } + + format := alsa.BufferFormat{ + Channels: 1, + Rate: 48000, + SampleFormat: alsa.S16_LE, + } + + buf := alsa.Buffer{ + Format: format, + Data: inPcm, + } + + // Resample pcm. + resampled, err := Resample(buf, 8000) + if err != nil { + log.Fatal(err) + } + + // Read expected resampled pcm. + exp, err := ioutil.ReadFile(expPath) + if err != nil { + log.Fatal(err) + } + + // Compare result with expected. + if !bytes.Equal(resampled, exp) { + t.Error("Resampled data does not match expected result.") + } +} + +// TestStereoToMono tests the StereoToMono function using a pcm file that contains stereo audio. +// The output of the StereoToMono function is compared with a file containing the expected mono audio. +func TestStereoToMono(t *testing.T) { + inPath := "../../../test/test-data/av/input/stereo_DTMF_tones.pcm" + expPath := "../../../test/test-data/av/output/mono_DTMF_tones.pcm" + + // Read input pcm. + inPcm, err := ioutil.ReadFile(inPath) + if err != nil { + log.Fatal(err) + } + + format := alsa.BufferFormat{ + Channels: 2, + Rate: 44100, + SampleFormat: alsa.S16_LE, + } + + buf := alsa.Buffer{ + Format: format, + Data: inPcm, + } + + // Convert audio. + mono, err := StereoToMono(buf) + if err != nil { + log.Fatal(err) + } + + // Read expected mono pcm. + exp, err := ioutil.ReadFile(expPath) + if err != nil { + log.Fatal(err) + } + + // Compare result with expected. + if !bytes.Equal(mono, exp) { + t.Error("Converted data does not match expected result.") + } +}