diff --git a/cmd/audio-netsender/main.go b/cmd/audio-netsender/main.go index ac9e662d..ade701aa 100644 --- a/cmd/audio-netsender/main.go +++ b/cmd/audio-netsender/main.go @@ -46,7 +46,7 @@ import ( "sync" "time" - "github.com/yobert/alsa" + yalsa "github.com/yobert/alsa" "bitbucket.org/ausocean/av/codec/pcm" "bitbucket.org/ausocean/iot/pi/netsender" @@ -78,9 +78,9 @@ type audioClient struct { parameters // internals - dev *alsa.Device // audio input device - ab alsa.Buffer // ALSA's buffer - rb *ring.Buffer // our buffer + dev *yalsa.Device // audio input device + pb pcm.Buffer // Buffer to contain the direct audio from ALSA. + rb *ring.Buffer // Ring buffer to contain processed audio ready to be read. ns *netsender.Sender // our NetSender vs int // our "var sum" to track var changes } @@ -132,12 +132,26 @@ func main() { // Open the requested audio device. err = ac.open() if err != nil { - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + log.Log(logger.Fatal, "yalsa.open failed", "error", err.Error()) } // Capture audio in periods of ac.period seconds, and buffer rbDuration seconds in total. - ac.ab = ac.dev.NewBufferDuration(time.Second * time.Duration(ac.period)) - recSize := (((len(ac.ab.Data) / ac.dev.BufferFormat().Channels) * ac.channels) / ac.dev.BufferFormat().Rate) * ac.rate + ab := ac.dev.NewBufferDuration(time.Second * time.Duration(ac.period)) + sf, err := pcm.SFFromString(ab.Format.SampleFormat.String()) + if err != nil { + log.Log(logger.Error, err.Error()) + } + cf := pcm.BufferFormat{ + SFormat: sf, + Channels: ab.Format.Channels, + Rate: ab.Format.Rate, + } + ac.pb = pcm.Buffer{ + Format: cf, + Data: ab.Data, + } + + recSize := (((len(ac.pb.Data) / ac.dev.BufferFormat().Channels) * ac.channels) / ac.dev.BufferFormat().Rate) * ac.rate rbLen := rbDuration / ac.period ac.rb = ring.NewBuffer(rbLen, recSize, rbTimeout) @@ -217,11 +231,11 @@ func (ac *audioClient) open() error { } log.Log(logger.Debug, "opening", "source", ac.source) - cards, err := alsa.OpenCards() + cards, err := yalsa.OpenCards() if err != nil { return err } - defer alsa.CloseCards(cards) + defer yalsa.CloseCards(cards) for _, card := range cards { devices, err := card.Devices() @@ -229,7 +243,7 @@ func (ac *audioClient) open() error { return err } for _, dev := range devices { - if dev.Type != alsa.PCM || !dev.Record { + if dev.Type != yalsa.PCM || !dev.Record { continue } if dev.Title == ac.source || ac.source == "" { @@ -287,12 +301,12 @@ func (ac *audioClient) open() error { log.Log(logger.Debug, "sample rate set", "rate", defaultFrameRate) } - var fmt alsa.FormatType + var fmt yalsa.FormatType switch ac.bits { case 16: - fmt = alsa.S16_LE + fmt = yalsa.S16_LE case 32: - fmt = alsa.S32_LE + fmt = yalsa.S32_LE default: return errors.New("unsupported sample bits") } @@ -318,7 +332,7 @@ func (ac *audioClient) open() error { // Re-opens the device and tries again if ASLA returns an error. // Spends a lot of time sleeping in Paused mode. // ToDo: Currently, reading audio and writing to the ringbuffer are synchronous. -// Need a way to asynchronously read from the ALSA buffer, i.e., _while_ it is recording to avoid any gaps. +// Need a way to asynchronously read from the buf, i.e., _while_ it is recording to avoid any gaps. func (ac *audioClient) input() { for { ac.mu.Lock() @@ -330,14 +344,14 @@ func (ac *audioClient) input() { } log.Log(logger.Debug, "recording audio for period", "seconds", ac.period) ac.mu.Lock() - err := ac.dev.Read(ac.ab.Data) + err := ac.dev.Read(ac.pb.Data) ac.mu.Unlock() if err != nil { log.Log(logger.Debug, "device.Read failed", "error", err.Error()) ac.mu.Lock() err = ac.open() // re-open if err != nil { - log.Log(logger.Fatal, "alsa.open failed", "error", err.Error()) + log.Log(logger.Fatal, "yalsa.open failed", "error", err.Error()) } ac.mu.Unlock() continue @@ -372,7 +386,7 @@ func (ac *audioClient) input() { // This function also handles NetReceiver configuration requests and updating of NetReceiver vars. func (ac *audioClient) output() { // Calculate the size of the output data based on wanted channels and rate. - outLen := (((len(ac.ab.Data) / ac.ab.Format.Channels) * ac.channels) / ac.ab.Format.Rate) * ac.rate + outLen := (((len(ac.pb.Data) / ac.pb.Format.Channels) * ac.channels) / ac.pb.Format.Rate) * ac.rate buf := make([]byte, outLen) mime := "audio/x-wav;codec=pcm;rate=" + strconv.Itoa(ac.rate) + ";channels=" + strconv.Itoa(ac.channels) + ";bits=" + strconv.Itoa(ac.bits) @@ -509,9 +523,9 @@ func read(rb *ring.Buffer, buf []byte) (int, error) { return n, nil } -// formatBuffer returns an ALSA buffer that has the recording data from the ac's original ALSA buffer but stored +// formatBuffer returns a Buffer that has the recording data from the ac's original Buffer but stored // in the desired format specified by the ac's parameters. -func (ac *audioClient) formatBuffer() alsa.Buffer { +func (ac *audioClient) formatBuffer() pcm.Buffer { var err error ac.mu.Lock() wantChannels := ac.channels @@ -519,17 +533,17 @@ func (ac *audioClient) formatBuffer() alsa.Buffer { ac.mu.Unlock() // If nothing needs to be changed, return the original. - if ac.ab.Format.Channels == wantChannels && ac.ab.Format.Rate == wantRate { - return ac.ab + if ac.pb.Format.Channels == wantChannels && ac.pb.Format.Rate == wantRate { + return ac.pb } - formatted := alsa.Buffer{Format: ac.ab.Format} + formatted := pcm.Buffer{Format: ac.pb.Format} bufCopied := false - if ac.ab.Format.Channels != wantChannels { + if ac.pb.Format.Channels != wantChannels { // Convert channels. - if ac.ab.Format.Channels == 2 && wantChannels == 1 { - if formatted, err = pcm.StereoToMono(ac.ab); err != nil { + if ac.pb.Format.Channels == 2 && wantChannels == 1 { + if formatted, err = pcm.StereoToMono(ac.pb); err != nil { log.Log(logger.Warning, "channel conversion failed, audio has remained stereo", "error", err.Error()) } else { formatted.Format.Channels = 1 @@ -538,13 +552,13 @@ func (ac *audioClient) formatBuffer() alsa.Buffer { } } - if ac.ab.Format.Rate != wantRate { + if ac.pb.Format.Rate != wantRate { // Convert rate. if bufCopied { formatted, err = pcm.Resample(formatted, wantRate) } else { - formatted, err = pcm.Resample(ac.ab, wantRate) + formatted, err = pcm.Resample(ac.pb, wantRate) } if err != nil { log.Log(logger.Warning, "rate conversion failed, audio has remained original rate", "error", err.Error()) diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go index 8093401e..9c0b8c6e 100644 --- a/codec/pcm/pcm.go +++ b/codec/pcm/pcm.go @@ -32,105 +32,135 @@ import ( "encoding/binary" "fmt" - "github.com/yobert/alsa" + "github.com/pkg/errors" ) -// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data. +// SampleFormat is the format that a PCM Buffer's samples can be in. +type SampleFormat int + +// Used to represent an unknown format. +const ( + Unknown SampleFormat = -1 +) + +// Sample formats that we use. +const ( + S16_LE SampleFormat = iota + S32_LE + // There are many more: + // https://linux.die.net/man/1/arecord + // https://trac.ffmpeg.org/wiki/audio%20types +) + +// BufferFormat contains the format for a PCM Buffer. +type BufferFormat struct { + SFormat SampleFormat + Rate int + Channels int +} + +// Buffer contains a buffer of PCM data and the format that it is in. +type Buffer struct { + Format BufferFormat + Data []byte +} + +// Resample takes Buffer c and resamples the pcm audio data to 'rate' Hz and returns a Buffer with the resampled data. // Notes: -// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. -// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will +// - Currently only downsampling is implemented and c's rate must be divisible by 'rate' or an error will occur. +// - If the number of bytes in c.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { - if b.Format.Rate == rate { - return b, nil +func Resample(c Buffer, rate int) (Buffer, error) { + if c.Format.Rate == rate { + return c, nil } - if b.Format.Rate < 0 { - return alsa.Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate) + if c.Format.Rate < 0 { + return Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", c.Format.Rate) } if rate < 0 { - return alsa.Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate) + return Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. var sampleLen int - switch b.Format.SampleFormat { - case alsa.S32_LE: - sampleLen = 4 * b.Format.Channels - case alsa.S16_LE: - sampleLen = 2 * b.Format.Channels + switch c.Format.SFormat { + case S32_LE: + sampleLen = 4 * c.Format.Channels + case S16_LE: + sampleLen = 2 * c.Format.Channels default: - return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) + return Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", c.Format.SFormat) } - inPcmLen := len(b.Data) + inPcmLen := len(c.Data) // Calculate sample rate ratio ratioFrom:ratioTo. - rateGcd := gcd(rate, b.Format.Rate) - ratioFrom := b.Format.Rate / rateGcd + rateGcd := gcd(rate, c.Format.Rate) + ratioFrom := c.Format.Rate / rateGcd ratioTo := rate / rateGcd // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return alsa.Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) + return Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom resampled := make([]byte, 0, newLen) - // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them + // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'c.Data' to add them // up and average them. The result is the new sample. bAvg := make([]byte, sampleLen) for i := 0; i < newLen/sampleLen; i++ { var sum int for j := 0; j < ratioFrom; j++ { - switch b.Format.SampleFormat { - case alsa.S32_LE: - sum += int(int32(binary.LittleEndian.Uint32(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) - case alsa.S16_LE: - sum += int(int16(binary.LittleEndian.Uint16(b.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + switch c.Format.SFormat { + case S32_LE: + sum += int(int32(binary.LittleEndian.Uint32(c.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) + case S16_LE: + sum += int(int16(binary.LittleEndian.Uint16(c.Data[(i*ratioFrom*sampleLen)+(j*sampleLen) : (i*ratioFrom*sampleLen)+((j+1)*sampleLen)]))) } } avg := sum / ratioFrom - switch b.Format.SampleFormat { - case alsa.S32_LE: + switch c.Format.SFormat { + case S32_LE: binary.LittleEndian.PutUint32(bAvg, uint32(avg)) - case alsa.S16_LE: + case S16_LE: binary.LittleEndian.PutUint16(bAvg, uint16(avg)) } resampled = append(resampled, bAvg...) } - // Return a new alsa.Buffer with resampled data. - return alsa.Buffer{ - Format: alsa.BufferFormat{ - Channels: b.Format.Channels, - SampleFormat: b.Format.SampleFormat, - Rate: rate, + // Return a new Buffer with resampled data. + return Buffer{ + Format: BufferFormat{ + Channels: c.Format.Channels, + SFormat: c.Format.SFormat, + Rate: rate, }, Data: resampled, }, nil } // StereoToMono returns raw mono audio data generated from only the left channel from -// the given stereo recording (ALSA buffer) -func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { - if b.Format.Channels == 1 { - return b, nil +// the given stereo Buffer +func StereoToMono(c Buffer) (Buffer, error) { + if c.Format.Channels == 1 { + return c, nil } - if b.Format.Channels != 2 { - return alsa.Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) + if c.Format.Channels != 2 { + return Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", c.Format.Channels) } var stereoSampleBytes int - switch b.Format.SampleFormat { - case alsa.S32_LE: + switch c.Format.SFormat { + case S32_LE: stereoSampleBytes = 8 - case alsa.S16_LE: + case S16_LE: stereoSampleBytes = 4 default: - return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) + return Buffer{}, fmt.Errorf("Unhandled sample format %v", c.Format.SFormat) } - recLength := len(b.Data) + recLength := len(c.Data) mono := make([]byte, recLength/2) // Convert to mono: for each byte in the stereo recording, if it's in the first half of a stereo sample @@ -138,17 +168,17 @@ func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { var inc int for i := 0; i < recLength; i++ { if i%stereoSampleBytes < stereoSampleBytes/2 { - mono[inc] = b.Data[i] + mono[inc] = c.Data[i] inc++ } } - // Return a new alsa.Buffer with resampled data. - return alsa.Buffer{ - Format: alsa.BufferFormat{ - Channels: 1, - SampleFormat: b.Format.SampleFormat, - Rate: b.Format.Rate, + // Return a new Buffer with resampled data. + return Buffer{ + Format: BufferFormat{ + Channels: 1, + SFormat: c.Format.SFormat, + Rate: c.Format.Rate, }, Data: mono, }, nil @@ -162,3 +192,27 @@ func gcd(a, b int) int { } return a } + +// String returns the string representation of a SampleFormat. +func (f SampleFormat) String() string { + switch f { + case S16_LE: + return "S16_LE" + case S32_LE: + return "S32_LE" + default: + return "Unknown" + } +} + +// SFFromString takes a string representing a sample format and returns the corresponding SampleFormat. +func SFFromString(s string) (SampleFormat, error) { + switch s { + case "S16_LE": + return S16_LE, nil + case "S32_LE": + return S32_LE, nil + default: + return Unknown, errors.Errorf("unknown sample format (%s)", s) + } +} diff --git a/codec/pcm/pcm_test.go b/codec/pcm/pcm_test.go index 1aa1b9d2..8e9cf891 100644 --- a/codec/pcm/pcm_test.go +++ b/codec/pcm/pcm_test.go @@ -31,8 +31,6 @@ import ( "io/ioutil" "log" "testing" - - "github.com/yobert/alsa" ) // TestResample tests the Resample function using a pcm file that contains audio of a freq. sweep. @@ -47,13 +45,13 @@ func TestResample(t *testing.T) { log.Fatal(err) } - format := alsa.BufferFormat{ - Channels: 1, - Rate: 48000, - SampleFormat: alsa.S16_LE, + format := BufferFormat{ + Channels: 1, + Rate: 48000, + SFormat: S16_LE, } - buf := alsa.Buffer{ + buf := Buffer{ Format: format, Data: inPcm, } @@ -88,13 +86,13 @@ func TestStereoToMono(t *testing.T) { log.Fatal(err) } - format := alsa.BufferFormat{ - Channels: 2, - Rate: 44100, - SampleFormat: alsa.S16_LE, + format := BufferFormat{ + Channels: 2, + Rate: 44100, + SFormat: S16_LE, } - buf := alsa.Buffer{ + buf := Buffer{ Format: format, Data: inPcm, } diff --git a/device/alsa/alsa.go b/device/alsa/alsa.go index f8595aca..f31ea383 100644 --- a/device/alsa/alsa.go +++ b/device/alsa/alsa.go @@ -68,8 +68,8 @@ type ALSA struct { mu sync.Mutex // Provides synchronisation when changing modes concurrently. title string // Name of audio title, or empty for the default title. dev *yalsa.Device // ALSA device's Audio input device. - ab yalsa.Buffer // ALSA device's buffer. - rb *ring.Buffer // Our buffer. + pb pcm.Buffer // Buffer to contain the direct audio from ALSA. + rb *ring.Buffer // Ring buffer to contain processed audio ready to be read. chunkSize int // This is the number of bytes that will be stored in rb at a time. Config // Configuration parameters for this device. } @@ -133,10 +133,24 @@ func (d *ALSA) Set(c config.Config) error { } // Setup the device to record with desired period. - d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) + ab := d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) + sf, err := pcm.SFFromString(ab.Format.SampleFormat.String()) + if err != nil { + d.l.Log(logger.Error, pkg+err.Error()) + return err + } + cf := pcm.BufferFormat{ + SFormat: sf, + Channels: ab.Format.Channels, + Rate: ab.Format.Rate, + } + d.pb = pcm.Buffer{ + Format: cf, + Data: ab.Data, + } // Account for channel conversion. - chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels) + chunkSize := float64(len(d.pb.Data) / d.dev.BufferFormat().Channels * d.Channels) // Account for resampling. chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) @@ -373,7 +387,7 @@ func (d *ALSA) input() { // Read from audio device. d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod) - err := d.dev.Read(d.ab.Data) + err := d.dev.Read(d.pb.Data) if err != nil { d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error()) err = d.open() // re-open @@ -415,26 +429,26 @@ func (d *ALSA) Read(p []byte) (int, error) { } // formatBuffer returns audio that has been converted to the desired format. -func (d *ALSA) formatBuffer() yalsa.Buffer { +func (d *ALSA) formatBuffer() pcm.Buffer { var err error // If nothing needs to be changed, return the original. - if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate { - return d.ab + if d.pb.Format.Channels == d.Channels && d.pb.Format.Rate == d.SampleRate { + return d.pb } - var formatted yalsa.Buffer - if d.ab.Format.Channels != d.Channels { + var formatted pcm.Buffer + if d.pb.Format.Channels != d.Channels { // Convert channels. // TODO(Trek): Make this work for conversions other than stereo to mono. - if d.ab.Format.Channels == 2 && d.Channels == 1 { - formatted, err = pcm.StereoToMono(d.ab) + if d.pb.Format.Channels == 2 && d.Channels == 1 { + formatted, err = pcm.StereoToMono(d.pb) if err != nil { d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) } } } - if d.ab.Format.Rate != d.SampleRate { + if d.pb.Format.Rate != d.SampleRate { // Convert rate. formatted, err = pcm.Resample(formatted, d.SampleRate) if err != nil { diff --git a/exp/pcm/resample/resample.go b/exp/pcm/resample/resample.go index f7f5342e..6c7106b6 100644 --- a/exp/pcm/resample/resample.go +++ b/exp/pcm/resample/resample.go @@ -32,7 +32,6 @@ import ( "log" "bitbucket.org/ausocean/av/codec/pcm" - "github.com/yobert/alsa" ) // This program accepts an input pcm file and outputs a resampled pcm file. @@ -43,7 +42,7 @@ func main() { var from = *flag.Int("from", 48000, "sample rate of input file") var to = *flag.Int("to", 8000, "sample rate of output file") var channels = *flag.Int("ch", 1, "number of channels in input file") - var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") + var SFString = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm. @@ -53,23 +52,23 @@ func main() { } fmt.Println("Read", len(inPcm), "bytes from file", inPath) - var sampleFormat alsa.FormatType - switch sf { + var sf pcm.SampleFormat + switch SFString { case "S32_LE": - sampleFormat = alsa.S32_LE + sf = pcm.S32_LE case "S16_LE": - sampleFormat = alsa.S16_LE + sf = pcm.S16_LE default: - log.Fatalf("Unhandled ALSA format: %v", sf) + log.Fatalf("Unhandled ALSA format: %v", SFString) } - format := alsa.BufferFormat{ - Channels: channels, - Rate: from, - SampleFormat: sampleFormat, + format := pcm.BufferFormat{ + Channels: channels, + Rate: from, + SFormat: sf, } - buf := alsa.Buffer{ + buf := pcm.Buffer{ Format: format, Data: inPcm, } diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go index 729caa96..84700737 100644 --- a/exp/pcm/stereo-to-mono/stereo-to-mono.go +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -32,7 +32,6 @@ import ( "log" "bitbucket.org/ausocean/av/codec/pcm" - "github.com/yobert/alsa" ) // This program accepts an input pcm file and outputs a resampled pcm file. @@ -40,7 +39,7 @@ import ( func main() { var inPath = *flag.String("in", "data.pcm", "file path of input data") var outPath = *flag.String("out", "mono.pcm", "file path of output") - var sf = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") + var SFString = *flag.String("sf", "S16_LE", "sample format of input audio, eg. S16_LE") flag.Parse() // Read pcm. @@ -50,22 +49,22 @@ func main() { } fmt.Println("Read", len(inPcm), "bytes from file", inPath) - var sampleFormat alsa.FormatType - switch sf { + var sf pcm.SampleFormat + switch SFString { case "S32_LE": - sampleFormat = alsa.S32_LE + sf = pcm.S32_LE case "S16_LE": - sampleFormat = alsa.S16_LE + sf = pcm.S16_LE default: - log.Fatalf("Unhandled ALSA format: %v", sf) + log.Fatalf("Unhandled sample format: %v", SFString) } - format := alsa.BufferFormat{ - Channels: 2, - SampleFormat: sampleFormat, + format := pcm.BufferFormat{ + Channels: 2, + SFormat: sf, } - buf := alsa.Buffer{ + buf := pcm.Buffer{ Format: format, Data: inPcm, }