mirror of https://bitbucket.org/ausocean/av.git
Merged in channel-and-rate-conversion (pull request #169)
Channel and rate conversion Approved-by: kortschak <dan@kortschak.io>
This commit is contained in:
commit
d491e1fb6d
|
@ -0,0 +1,145 @@
|
||||||
|
/*
|
||||||
|
NAME
|
||||||
|
pcm.go
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
pcm.go contains functions for processing pcm.
|
||||||
|
|
||||||
|
AUTHOR
|
||||||
|
Trek Hopton <trek@ausocean.org>
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
pcm.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||||
|
|
||||||
|
It is free software: you can redistribute it and/or modify them
|
||||||
|
under the terms of the GNU General Public License as published by the
|
||||||
|
Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
It is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||||
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||||
|
*/
|
||||||
|
package pcm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/yobert/alsa"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm.
|
||||||
|
// If an error occurs, an error will be returned along with the original b's data.
|
||||||
|
// Notes:
|
||||||
|
// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur.
|
||||||
|
// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
|
||||||
|
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
|
||||||
|
func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
||||||
|
fromRate := b.Format.Rate
|
||||||
|
if fromRate == rate {
|
||||||
|
return b.Data, nil
|
||||||
|
} else if fromRate < 0 {
|
||||||
|
return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate)
|
||||||
|
} else if rate < 0 {
|
||||||
|
return nil, fmt.Errorf("Unable to convert to: %v Hz", rate)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The number of bytes in a sample.
|
||||||
|
var sampleLen int
|
||||||
|
switch b.Format.SampleFormat {
|
||||||
|
case alsa.S32_LE:
|
||||||
|
sampleLen = 4 * b.Format.Channels
|
||||||
|
case alsa.S16_LE:
|
||||||
|
sampleLen = 2 * b.Format.Channels
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
|
||||||
|
}
|
||||||
|
inPcmLen := len(b.Data)
|
||||||
|
|
||||||
|
// Calculate sample rate ratio ratioFrom:ratioTo.
|
||||||
|
rateGcd := gcd(rate, fromRate)
|
||||||
|
ratioFrom := fromRate / rateGcd
|
||||||
|
ratioTo := rate / rateGcd
|
||||||
|
|
||||||
|
// ratioTo = 1 is the only number that will result in an even sampling.
|
||||||
|
if ratioTo != 1 {
|
||||||
|
return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
|
||||||
|
}
|
||||||
|
|
||||||
|
newLen := inPcmLen / ratioFrom
|
||||||
|
result := make([]byte, 0, newLen)
|
||||||
|
|
||||||
|
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them
|
||||||
|
// up and average them. The result is the new sample.
|
||||||
|
bAvg := make([]byte, sampleLen)
|
||||||
|
for i := 0; i < newLen/sampleLen; i++ {
|
||||||
|
var sum int
|
||||||
|
for j := 0; j < ratioFrom; j++ {
|
||||||
|
switch b.Format.SampleFormat {
|
||||||
|
case alsa.S32_LE:
|
||||||
|
sum += int(int32(binary.LittleEndian.Uint32(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
|
||||||
|
case alsa.S16_LE:
|
||||||
|
sum += int(int16(binary.LittleEndian.Uint16(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
avg := sum / ratioFrom
|
||||||
|
switch b.Format.SampleFormat {
|
||||||
|
case alsa.S32_LE:
|
||||||
|
binary.LittleEndian.PutUint32(bAvg, uint32(avg))
|
||||||
|
case alsa.S16_LE:
|
||||||
|
binary.LittleEndian.PutUint16(bAvg, uint16(avg))
|
||||||
|
}
|
||||||
|
result = append(result, bAvg...)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// StereoToMono returns raw mono audio data generated from only the left channel from
|
||||||
|
// the given stereo recording (ALSA buffer)
|
||||||
|
// if an error occurs, an error will be returned along with the original stereo data.
|
||||||
|
func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
||||||
|
if b.Format.Channels == 1 {
|
||||||
|
return b.Data, nil
|
||||||
|
} else if b.Format.Channels != 2 {
|
||||||
|
return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
|
||||||
|
}
|
||||||
|
|
||||||
|
var stereoSampleBytes int
|
||||||
|
switch b.Format.SampleFormat {
|
||||||
|
case alsa.S32_LE:
|
||||||
|
stereoSampleBytes = 8
|
||||||
|
case alsa.S16_LE:
|
||||||
|
stereoSampleBytes = 4
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
recLength := len(b.Data)
|
||||||
|
mono := make([]byte, recLength/2)
|
||||||
|
|
||||||
|
// Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample
|
||||||
|
// (left channel), add it to the new mono audio data.
|
||||||
|
var inc int
|
||||||
|
for i := 0; i < recLength; i++ {
|
||||||
|
if i%stereoSampleBytes < stereoSampleBytes/2 {
|
||||||
|
mono[inc] = b.Data[i]
|
||||||
|
inc++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mono, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// gcd is used for calculating the greatest common divisor of two positive integers, a and b.
|
||||||
|
// assumes given a and b are positive.
|
||||||
|
func gcd(a, b int) int {
|
||||||
|
for b != 0 {
|
||||||
|
a, b = b, a%b
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
/*
|
||||||
|
NAME
|
||||||
|
pcm_test.go
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
pcm_test.go contains functions for testing the pcm package.
|
||||||
|
|
||||||
|
AUTHOR
|
||||||
|
Trek Hopton <trek@ausocean.org>
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
pcm_test.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||||
|
|
||||||
|
It is free software: you can redistribute it and/or modify them
|
||||||
|
under the terms of the GNU General Public License as published by the
|
||||||
|
Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
It is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||||
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||||
|
*/
|
||||||
|
package pcm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/yobert/alsa"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestResample tests the Resample function using a pcm file that contains audio of a freq. sweep.
|
||||||
|
// The output of the Resample function is compared with a file containing the expected result.
|
||||||
|
func TestResample(t *testing.T) {
|
||||||
|
inPath := "../../../test/test-data/av/input/sweep_400Hz_20000Hz_-3dBFS_5s_48khz.pcm"
|
||||||
|
expPath := "../../../test/test-data/av/output/sweep_400Hz_20000Hz_resampled_48to8kHz.pcm"
|
||||||
|
|
||||||
|
// Read input pcm.
|
||||||
|
inPcm, err := ioutil.ReadFile(inPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
format := alsa.BufferFormat{
|
||||||
|
Channels: 1,
|
||||||
|
Rate: 48000,
|
||||||
|
SampleFormat: alsa.S16_LE,
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := alsa.Buffer{
|
||||||
|
Format: format,
|
||||||
|
Data: inPcm,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resample pcm.
|
||||||
|
resampled, err := Resample(buf, 8000)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read expected resampled pcm.
|
||||||
|
exp, err := ioutil.ReadFile(expPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare result with expected.
|
||||||
|
if !bytes.Equal(resampled, exp) {
|
||||||
|
t.Error("Resampled data does not match expected result.")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestStereoToMono tests the StereoToMono function using a pcm file that contains stereo audio.
|
||||||
|
// The output of the StereoToMono function is compared with a file containing the expected mono audio.
|
||||||
|
func TestStereoToMono(t *testing.T) {
|
||||||
|
inPath := "../../../test/test-data/av/input/stereo_DTMF_tones.pcm"
|
||||||
|
expPath := "../../../test/test-data/av/output/mono_DTMF_tones.pcm"
|
||||||
|
|
||||||
|
// Read input pcm.
|
||||||
|
inPcm, err := ioutil.ReadFile(inPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
format := alsa.BufferFormat{
|
||||||
|
Channels: 2,
|
||||||
|
Rate: 44100,
|
||||||
|
SampleFormat: alsa.S16_LE,
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := alsa.Buffer{
|
||||||
|
Format: format,
|
||||||
|
Data: inPcm,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert audio.
|
||||||
|
mono, err := StereoToMono(buf)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read expected mono pcm.
|
||||||
|
exp, err := ioutil.ReadFile(expPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare result with expected.
|
||||||
|
if !bytes.Equal(mono, exp) {
|
||||||
|
t.Error("Converted data does not match expected result.")
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
/*
|
||||||
|
NAME
|
||||||
|
resample.go
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
resample.go is a program for resampling a pcm file.
|
||||||
|
|
||||||
|
AUTHOR
|
||||||
|
Trek Hopton <trek@ausocean.org>
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
resample.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean)
|
||||||
|
|
||||||
|
It is free software: you can redistribute it and/or modify them
|
||||||
|
under the terms of the GNU General Public License as published by the
|
||||||
|
Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
It is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||||
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||||
|
*/
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"bitbucket.org/ausocean/av/audio/pcm"
|
||||||
|
"github.com/yobert/alsa"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This program accepts an input pcm file and outputs a resampled pcm file.
|
||||||
|
// Input and output file names, to and from sample rates, channels and sample format can be specified as arguments.
|
||||||
|
func main() {
|
||||||
|
var inPath = *flag.String("in", "data.pcm", "file path of input data")
|
||||||
|
var outPath = *flag.String("out", "resampled.pcm", "file path of output")
|
||||||
|
var from = *flag.Int("from", 48000, "sample rate of input file")
|
||||||
|
var to = *flag.Int("to", 8000, "sample rate of output file")
|
||||||
|
var channels = *flag.Int("ch", 1, "number of channels in input file")
|
||||||
|
var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Read pcm.
|
||||||
|
inPcm, err := ioutil.ReadFile(inPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
fmt.Println("Read", len(inPcm), "bytes from file", inPath)
|
||||||
|
|
||||||
|
var sampleFormat alsa.FormatType
|
||||||
|
switch sf {
|
||||||
|
case "S32_LE":
|
||||||
|
sampleFormat = alsa.S32_LE
|
||||||
|
case "S16_LE":
|
||||||
|
sampleFormat = alsa.S16_LE
|
||||||
|
default:
|
||||||
|
log.Fatalf("Unhandled ALSA format: %v", sf)
|
||||||
|
}
|
||||||
|
|
||||||
|
format := alsa.BufferFormat{
|
||||||
|
Channels: channels,
|
||||||
|
Rate: from,
|
||||||
|
SampleFormat: sampleFormat,
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := alsa.Buffer{
|
||||||
|
Format: format,
|
||||||
|
Data: inPcm,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resample audio.
|
||||||
|
resampled, err := pcm.Resample(buf, to)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save resampled to file.
|
||||||
|
err = ioutil.WriteFile(outPath, resampled, 0644)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
fmt.Println("Encoded and wrote", len(resampled), "bytes to file", outPath)
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
/*
|
||||||
|
NAME
|
||||||
|
stereo-to-mono.go
|
||||||
|
|
||||||
|
DESCRIPTION
|
||||||
|
stereo-to-mono.go is a program for converting a mono pcm file to a stereo pcm file.
|
||||||
|
|
||||||
|
AUTHOR
|
||||||
|
Trek Hopton <trek@ausocean.org>
|
||||||
|
|
||||||
|
LICENSE
|
||||||
|
stereo-to-mono.go is Copyright (C) 2018 the Australian Ocean Lab (AusOcean)
|
||||||
|
|
||||||
|
It is free software: you can redistribute it and/or modify them
|
||||||
|
under the terms of the GNU General Public License as published by the
|
||||||
|
Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
option) any later version.
|
||||||
|
|
||||||
|
It is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||||
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||||
|
*/
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"bitbucket.org/ausocean/av/audio/pcm"
|
||||||
|
"github.com/yobert/alsa"
|
||||||
|
)
|
||||||
|
|
||||||
|
// This program accepts an input pcm file and outputs a resampled pcm file.
|
||||||
|
// Input and output file names, to and from sample rates, channels and sample format can be specified as arguments.
|
||||||
|
func main() {
|
||||||
|
var inPath = *flag.String("in", "data.pcm", "file path of input data")
|
||||||
|
var outPath = *flag.String("out", "mono.pcm", "file path of output")
|
||||||
|
var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Read pcm.
|
||||||
|
inPcm, err := ioutil.ReadFile(inPath)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
fmt.Println("Read", len(inPcm), "bytes from file", inPath)
|
||||||
|
|
||||||
|
var sampleFormat alsa.FormatType
|
||||||
|
switch sf {
|
||||||
|
case "S32_LE":
|
||||||
|
sampleFormat = alsa.S32_LE
|
||||||
|
case "S16_LE":
|
||||||
|
sampleFormat = alsa.S16_LE
|
||||||
|
default:
|
||||||
|
log.Fatalf("Unhandled ALSA format: %v", sf)
|
||||||
|
}
|
||||||
|
|
||||||
|
format := alsa.BufferFormat{
|
||||||
|
Channels: 2,
|
||||||
|
SampleFormat: sampleFormat,
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := alsa.Buffer{
|
||||||
|
Format: format,
|
||||||
|
Data: inPcm,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert audio.
|
||||||
|
mono, err := pcm.StereoToMono(buf)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save mono to file.
|
||||||
|
err = ioutil.WriteFile(outPath, mono, 0644)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath)
|
||||||
|
}
|
Loading…
Reference in New Issue