mirror of https://bitbucket.org/ausocean/av.git
299 lines
7.1 KiB
Go
299 lines
7.1 KiB
Go
/*
|
|
NAME
|
|
pcm.go
|
|
|
|
DESCRIPTION
|
|
pcm.go contains functions for processing pcm.
|
|
|
|
AUTHOR
|
|
Trek Hopton <trek@ausocean.org>
|
|
|
|
LICENSE
|
|
pcm.go is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
|
|
|
It is free software: you can redistribute it and/or modify them
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation, either version 3 of the License, or (at your
|
|
option) any later version.
|
|
|
|
It is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License in gpl.txt.
|
|
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
|
*/
|
|
|
|
// Package pcm provides functions for processing and converting pcm audio.
|
|
package pcm
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"fmt"
|
|
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
// SampleFormat is the format that a PCM Clip's samples can be in.
|
|
type SampleFormat int
|
|
|
|
// Used to represent an unknown format.
|
|
const (
|
|
Unknown SampleFormat = -1
|
|
)
|
|
|
|
// Common sample formats that are used.
|
|
const (
|
|
S8 SampleFormat = iota
|
|
U8
|
|
S16_LE
|
|
S16_BE
|
|
U16_LE
|
|
U16_BE
|
|
S24_LE
|
|
S24_BE
|
|
U24_LE
|
|
U24_BE
|
|
S32_LE
|
|
S32_BE
|
|
U32_LE
|
|
U32_BE
|
|
FLOAT_LE
|
|
FLOAT_BE
|
|
FLOAT64_LE
|
|
FLOAT64_BE
|
|
// There are many more:
|
|
// https://linux.die.net/man/1/arecord
|
|
// https://trac.ffmpeg.org/wiki/audio%20types
|
|
)
|
|
|
|
// ClipFormat contains the format for a PCM Clip.
|
|
type ClipFormat struct {
|
|
SFormat SampleFormat
|
|
Rate int
|
|
Channels int
|
|
}
|
|
|
|
// Clip contains a clip of PCM data and the format that it is in.
|
|
type Clip struct {
|
|
Format ClipFormat
|
|
Data []byte
|
|
}
|
|
|
|
// Resample takes Clip c and resamples the pcm audio data to 'rate' Hz and returns a Clip with the resampled data.
|
|
// Notes:
|
|
// - Currently only downsampling is implemented and c's rate must be divisible by 'rate' or an error will occur.
|
|
// - If the number of bytes in c.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
|
|
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
|
|
func Resample(c Clip, rate int) (Clip, error) {
|
|
if c.Format.Rate == rate {
|
|
return c, nil
|
|
}
|
|
if c.Format.Rate < 0 {
|
|
return Clip{}, fmt.Errorf("Unable to convert from: %v Hz", c.Format.Rate)
|
|
}
|
|
if rate < 0 {
|
|
return Clip{}, fmt.Errorf("Unable to convert to: %v Hz", rate)
|
|
}
|
|
|
|
// The number of bytes in a sample.
|
|
var sampleLen int
|
|
switch c.Format.SFormat {
|
|
case S32_LE:
|
|
sampleLen = 4 * c.Format.Channels
|
|
case S16_LE:
|
|
sampleLen = 2 * c.Format.Channels
|
|
default:
|
|
return Clip{}, fmt.Errorf("Unhandled ALSA format: %v", c.Format.SFormat)
|
|
}
|
|
inPcmLen := len(c.Data)
|
|
|
|
// Calculate sample rate ratio ratioFrom:ratioTo.
|
|
rateGcd := gcd(rate, c.Format.Rate)
|
|
ratioFrom := c.Format.Rate / rateGcd
|
|
ratioTo := rate / rateGcd
|
|
|
|
// ratioTo = 1 is the only number that will result in an even sampling.
|
|
if ratioTo != 1 {
|
|
return Clip{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
|
|
}
|
|
|
|
newLen := inPcmLen / ratioFrom
|
|
resampled := make([]byte, 0, newLen)
|
|
|
|
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'c.Data' to add them
|
|
// up and average them. The result is the new sample.
|
|
bAvg := make([]byte, sampleLen)
|
|
for i := 0; i < newLen/sampleLen; i++ {
|
|
var sum int
|
|
for j := 0; j < ratioFrom; j++ {
|
|
switch c.Format.SFormat {
|
|
case S32_LE:
|
|
sum += int(int32(binary.LittleEndian.Uint32(c.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
|
|
case S16_LE:
|
|
sum += int(int16(binary.LittleEndian.Uint16(c.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)])))
|
|
}
|
|
}
|
|
avg := sum / ratioFrom
|
|
switch c.Format.SFormat {
|
|
case S32_LE:
|
|
binary.LittleEndian.PutUint32(bAvg, uint32(avg))
|
|
case S16_LE:
|
|
binary.LittleEndian.PutUint16(bAvg, uint16(avg))
|
|
}
|
|
resampled = append(resampled, bAvg...)
|
|
}
|
|
|
|
// Return a new Clip with resampled data.
|
|
return Clip{
|
|
Format: ClipFormat{
|
|
Channels: c.Format.Channels,
|
|
SFormat: c.Format.SFormat,
|
|
Rate: rate,
|
|
},
|
|
Data: resampled,
|
|
}, nil
|
|
}
|
|
|
|
// StereoToMono returns raw mono audio data generated from only the left channel from
|
|
// the given stereo Clip
|
|
func StereoToMono(c Clip) (Clip, error) {
|
|
if c.Format.Channels == 1 {
|
|
return c, nil
|
|
}
|
|
if c.Format.Channels != 2 {
|
|
return Clip{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", c.Format.Channels)
|
|
}
|
|
|
|
var stereoSampleBytes int
|
|
switch c.Format.SFormat {
|
|
case S32_LE:
|
|
stereoSampleBytes = 8
|
|
case S16_LE:
|
|
stereoSampleBytes = 4
|
|
default:
|
|
return Clip{}, fmt.Errorf("Unhandled sample format %v", c.Format.SFormat)
|
|
}
|
|
|
|
recLength := len(c.Data)
|
|
mono := make([]byte, recLength/2)
|
|
|
|
// Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample
|
|
// (left channel), add it to the new mono audio data.
|
|
var inc int
|
|
for i := 0; i < recLength; i++ {
|
|
if i%stereoSampleBytes < stereoSampleBytes/2 {
|
|
mono[inc] = c.Data[i]
|
|
inc++
|
|
}
|
|
}
|
|
|
|
// Return a new Clip with resampled data.
|
|
return Clip{
|
|
Format: ClipFormat{
|
|
Channels: 1,
|
|
SFormat: c.Format.SFormat,
|
|
Rate: c.Format.Rate,
|
|
},
|
|
Data: mono,
|
|
}, nil
|
|
}
|
|
|
|
// gcd is used for calculating the greatest common divisor of two positive integers, a and b.
|
|
// assumes given a and b are positive.
|
|
func gcd(a, b int) int {
|
|
for b != 0 {
|
|
a, b = b, a%b
|
|
}
|
|
return a
|
|
}
|
|
|
|
// String returns the string representation of a SampleFormat.
|
|
func (f SampleFormat) String() string {
|
|
switch f {
|
|
case S8:
|
|
return "S8"
|
|
case U8:
|
|
return "U8"
|
|
case S16_LE:
|
|
return "S16_LE"
|
|
case S16_BE:
|
|
return "S16_BE"
|
|
case U16_LE:
|
|
return "U16_LE"
|
|
case U16_BE:
|
|
return "U16_BE"
|
|
case S24_LE:
|
|
return "S24_LE"
|
|
case S24_BE:
|
|
return "S24_BE"
|
|
case U24_LE:
|
|
return "U24_LE"
|
|
case U24_BE:
|
|
return "U24_BE"
|
|
case S32_LE:
|
|
return "S32_LE"
|
|
case S32_BE:
|
|
return "S32_BE"
|
|
case U32_LE:
|
|
return "U32_LE"
|
|
case U32_BE:
|
|
return "U32_BE"
|
|
case FLOAT_LE:
|
|
return "FLOAT_LE"
|
|
case FLOAT_BE:
|
|
return "FLOAT_BE"
|
|
case FLOAT64_LE:
|
|
return "FLOAT64_LE"
|
|
case FLOAT64_BE:
|
|
return "FLOAT64_BE"
|
|
default:
|
|
return fmt.Sprintf("Invalid FormatType (%d)", f)
|
|
}
|
|
}
|
|
|
|
// SFFromString takes a string representing a sample format and returns the corresponding SampleFormat.
|
|
func SFFromString(s string) (SampleFormat, error) {
|
|
switch s {
|
|
case "S8":
|
|
return S8, nil
|
|
case "U8":
|
|
return U8, nil
|
|
case "S16_LE":
|
|
return S16_LE, nil
|
|
case "S16_BE":
|
|
return S16_BE, nil
|
|
case "U16_LE":
|
|
return U16_LE, nil
|
|
case "U16_BE":
|
|
return U16_BE, nil
|
|
case "S24_LE":
|
|
return S24_LE, nil
|
|
case "S24_BE":
|
|
return S24_BE, nil
|
|
case "U24_LE":
|
|
return U24_LE, nil
|
|
case "U24_BE":
|
|
return U24_BE, nil
|
|
case "S32_LE":
|
|
return S32_LE, nil
|
|
case "S32_BE":
|
|
return S32_BE, nil
|
|
case "U32_LE":
|
|
return U32_LE, nil
|
|
case "U32_BE":
|
|
return U32_BE, nil
|
|
case "FLOAT_LE":
|
|
return FLOAT_LE, nil
|
|
case "FLOAT_BE":
|
|
return FLOAT_BE, nil
|
|
case "FLOAT64_LE":
|
|
return FLOAT64_LE, nil
|
|
case "FLOAT64_BE":
|
|
return FLOAT64_BE, nil
|
|
default:
|
|
return Unknown, errors.Errorf("Unknown FormatType (%d)", s)
|
|
}
|
|
}
|