From 9a9a38dbe1902edf77c50e59e205b541127fd6b6 Mon Sep 17 00:00:00 2001 From: Trek H Date: Tue, 26 Mar 2019 15:43:11 +1030 Subject: [PATCH] pcm: simplified and improved efficiency of code. --- audio/pcm/pcm.go | 72 ++++++++++++------------ exp/pcm/resample/resample.go | 18 ++---- exp/pcm/stereo-to-mono/stereo-to-mono.go | 9 +-- 3 files changed, 45 insertions(+), 54 deletions(-) diff --git a/audio/pcm/pcm.go b/audio/pcm/pcm.go index b9ec7310..5ead3143 100644 --- a/audio/pcm/pcm.go +++ b/audio/pcm/pcm.go @@ -33,62 +33,62 @@ import ( "github.com/yobert/alsa" ) -// Resample takes an alsa.Buffer (fromBuf) and resamples the pcm audio data to 'toRate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original fromBuf's data. +// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. +// If an error occurs, an error will be returned along with the original b's data. // Notes: -// - Currently only downsampling is implemented and fromBuf's rate must be divisible by toRate or an error will occur. -// - If the number of bytes in fromBuf.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will +// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. +// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(fromBuf alsa.Buffer, toRate int) ([]byte, error) { - fromRate := fromBuf.Format.Rate - if fromRate == toRate { - return fromBuf.Data, nil +func Resample(b alsa.Buffer, rate int) ([]byte, error) { + fromRate := b.Format.Rate + if fromRate == rate { + return b.Data, nil } else if fromRate < 0 { - return fromBuf.Data, fmt.Errorf("Unable to convert from: %v Hz", fromRate) - } else if toRate < 0 { - return fromBuf.Data, fmt.Errorf("Unable to convert to: %v Hz", toRate) + return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) + } else if rate < 0 { + return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. var sampleLen int - switch fromBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: - sampleLen = 4 * fromBuf.Format.Channels + sampleLen = 4 * b.Format.Channels case alsa.S16_LE: - sampleLen = 2 * fromBuf.Format.Channels + sampleLen = 2 * b.Format.Channels default: - return fromBuf.Data, fmt.Errorf("Unhandled ALSA format: %v", fromBuf.Format.SampleFormat) + return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) } - inPcmLen := len(fromBuf.Data) + inPcmLen := len(b.Data) // Calculate sample rate ratio ratioFrom:ratioTo. - rateGcd := gcd(toRate, fromRate) + rateGcd := gcd(rate, fromRate) ratioFrom := fromRate / rateGcd - ratioTo := toRate / rateGcd + ratioTo := rate / rateGcd // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return fromBuf.Data, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo) + return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom result := make([]byte, 0, newLen) - // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'fromBuf.Data' to add them + // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them // up and average them. The result is the new sample. + bAvg := make([]byte, sampleLen) for i := 0; i < newLen/sampleLen; i++ { var sum int for j := 0; j < ratioFrom; j++ { - switch fromBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: - sum += int(int32(binary.LittleEndian.Uint32(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + sum += int(int32(binary.LittleEndian.Uint32(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) case alsa.S16_LE: - sum += int(int16(binary.LittleEndian.Uint16(fromBuf.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + sum += int(int16(binary.LittleEndian.Uint16(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) } } avg := sum / ratioFrom - bAvg := make([]byte, sampleLen) - switch fromBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: binary.LittleEndian.PutUint32(bAvg, uint32(avg)) case alsa.S16_LE: @@ -102,24 +102,24 @@ func Resample(fromBuf alsa.Buffer, toRate int) ([]byte, error) { // StereoToMono returns raw mono audio data generated from only the left channel from // the given stereo recording (ALSA buffer) // if an error occurs, an error will be returned along with the original stereo data. -func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { - if stereoBuf.Format.Channels == 1 { - return stereoBuf.Data, nil - } else if stereoBuf.Format.Channels != 2 { - return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", stereoBuf.Format.Channels) +func StereoToMono(b alsa.Buffer) ([]byte, error) { + if b.Format.Channels == 1 { + return b.Data, nil + } else if b.Format.Channels != 2 { + return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) } var stereoSampleBytes int - switch stereoBuf.Format.SampleFormat { + switch b.Format.SampleFormat { case alsa.S32_LE: stereoSampleBytes = 8 case alsa.S16_LE: stereoSampleBytes = 4 default: - return stereoBuf.Data, fmt.Errorf("Unhandled ALSA format %v", stereoBuf.Format.SampleFormat) + return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) } - recLength := len(stereoBuf.Data) + recLength := len(b.Data) mono := make([]byte, recLength/2) // Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample @@ -127,7 +127,7 @@ func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { var inc int for i := 0; i < recLength; i++ { if i%stereoSampleBytes < stereoSampleBytes/2 { - mono[inc] = stereoBuf.Data[i] + mono[inc] = b.Data[i] inc++ } } @@ -138,8 +138,8 @@ func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) { // gcd is used for calculating the greatest common divisor of two positive integers, a and b. // assumes given a and b are positive. func gcd(a, b int) int { - if b != 0 { - return gcd(b, a%b) + for b != 0 { + a, b = b, a%b } return a } diff --git a/exp/pcm/resample/resample.go b/exp/pcm/resample/resample.go index 2fef9f7c..aaa8f77c 100644 --- a/exp/pcm/resample/resample.go +++ b/exp/pcm/resample/resample.go @@ -39,18 +39,12 @@ import ( // This program accepts an input pcm file and outputs a resampled pcm file. // Input and output file names, to and from sample rates, channels and sample format can be specified as arguments. func main() { - var inPath string - var outPath string - var from int - var to int - var channels int - var sf string - flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") - flag.StringVar(&outPath, "out", "resampled.pcm", "file path of output") - flag.IntVar(&from, "from", 48000, "sample rate of input file") - flag.IntVar(&to, "to", 8000, "sample rate of output file") - flag.IntVar(&channels, "ch", 1, "number of channels in input file") - flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE") + var inPath = *flag.String("in", "data.pcm", "file path of input data") + var outPath = *flag.String("out", "resampled.pcm", "file path of output") + var from = *flag.Int("from", 48000, "sample rate of input file") + var to = *flag.Int("to", 8000, "sample rate of output file") + var channels = *flag.Int("ch", 1, "number of channels in input file") + var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm. diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go index 69bc081b..231591f0 100644 --- a/exp/pcm/stereo-to-mono/stereo-to-mono.go +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -39,12 +39,9 @@ import ( // This program accepts an input pcm file and outputs a resampled pcm file. // Input and output file names, to and from sample rates, channels and sample format can be specified as arguments. func main() { - var inPath string - var outPath string - var sf string - flag.StringVar(&inPath, "in", "data.pcm", "file path of input data") - flag.StringVar(&outPath, "out", "mono.pcm", "file path of output") - flag.StringVar(&sf, "sf", "S16_LE", "sample format of input audio, eg. S16_LE") + var inPath = *flag.String("in", "data.pcm", "file path of input data") + var outPath = *flag.String("out", "mono.pcm", "file path of output") + var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm.