av/audio/pcm/pcm.go

125 lines
4.0 KiB
Go

package pcm
import (
"encoding/binary"
"fmt"
"github.com/yobert/alsa"
)
// Resample resamples pcm data (inPcm) from 'fromRate' Hz to 'toRate' Hz and returns the resulting pcm.
// If an error occurs, an error will be returned along with the original audio data
// - channels: number of channels
// - bitDepth: number of bits in single sample
// Notes:
// - Input and output is assumed to be Little Endian.
// - Currently only downsampling is possible and fromRate must be divisible by toRate or an error will occur.
// - If the number of bytes in 'inPcm' is not divisible by the decimation factor (ratioFrom), the remaining bytes will
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
func Resample(inPcm []byte, fromRate, toRate, channels, bitDepth int) ([]byte, error) {
if fromRate == toRate {
return inPcm, nil
} else if fromRate < 0 {
return inPcm, fmt.Errorf("Unable to convert from: %v Hz", fromRate)
} else if toRate < 0 {
return inPcm, fmt.Errorf("Unable to convert to: %v Hz", toRate)
}
// The number of bytes in a sample.
var sampleLen int
switch bitDepth {
case 32:
sampleLen = 4 * channels
case 16:
sampleLen = 2 * channels
default:
return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth)
}
inPcmLen := len(inPcm)
// Calculate sample rate ratio ratioFrom:ratioTo.
rateGcd := gcd(toRate, fromRate)
ratioFrom := fromRate / rateGcd
ratioTo := toRate / rateGcd
// ratioTo = 1 is the only number that will result in an even sampling.
if ratioTo != 1 {
return inPcm, fmt.Errorf("%v:%v is an unhandled from:to rate ratio. must be n:1 for some rate n", ratioFrom, ratioTo)
}
newLen := inPcmLen / ratioFrom
result := make([]byte, 0, newLen)
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'inPcm' to add them
// up and average them. The result is the new sample.
for i := 0; i < newLen/sampleLen; i++ {
var sum int
for j := 0; j < ratioFrom; j++ {
switch bitDepth {
case 32:
sum += int(int32(binary.LittleEndian.Uint32(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
case 16:
sum += int(int16(binary.LittleEndian.Uint16(inPcm[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
default:
return inPcm, fmt.Errorf("Unhandled bitDepth: %v, must be 16 or 32", bitDepth)
}
}
avg := sum / ratioFrom
bAvg := make([]byte, sampleLen)
switch bitDepth {
case 32:
binary.LittleEndian.PutUint32(bAvg, uint32(avg))
case 16:
binary.LittleEndian.PutUint16(bAvg, uint16(avg))
}
result = append(result, bAvg...)
}
return result, nil
}
// StereoToMono returns raw mono audio data generated from only the left channel from
// the given stereo recording (ALSA buffer)
// if an error occurs, an error will be returned along with the original stereo data.
func StereoToMono(stereoBuf alsa.Buffer) ([]byte, error) {
bufChannels := stereoBuf.Format.Channels
if bufChannels == 1 {
return stereoBuf.Data, nil
} else if bufChannels != 2 {
return stereoBuf.Data, fmt.Errorf("Audio is not stereo or mono, it has %v channels", bufChannels)
}
var stereoSampleBytes int
switch stereoBuf.Format.SampleFormat {
case alsa.S32_LE:
stereoSampleBytes = 8
case alsa.S16_LE:
stereoSampleBytes = 4
default:
return stereoBuf.Data, fmt.Errorf("Unhandled ALSA format %v", stereoBuf.Format.SampleFormat)
}
recLength := len(stereoBuf.Data)
mono := make([]byte, recLength/2)
// Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample
// (left channel), add it to the new mono audio data.
var inc int
for i := 0; i < recLength; i++ {
if i%stereoSampleBytes < stereoSampleBytes/2 {
mono[inc] = stereoBuf.Data[i]
inc++
}
}
return mono, nil
}
// gcd is used for calculating the greatest common divisor of two positive integers, a and b.
// assumes given a and b are positive.
func gcd(a, b int) int {
if b != 0 {
return gcd(b, a%b)
}
return a
}