Merged in revid-audio (pull request #204)

Revid audio

Approved-by: kortschak <dan@kortschak.io>
This commit is contained in:
Trek Hopton 2019-08-05 01:57:35 +00:00
commit db659ab051
18 changed files with 970 additions and 73 deletions

View File

@ -8,6 +8,7 @@ DESCRIPTION
AUTHORS AUTHORS
Saxon A. Nelson-Milton <saxon@ausocean.org> Saxon A. Nelson-Milton <saxon@ausocean.org>
Jack Richardson <jack@ausocean.org> Jack Richardson <jack@ausocean.org>
Trek Hopton <trek@ausocean.org>
LICENSE LICENSE
revid-cli is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) revid-cli is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean)
@ -26,6 +27,7 @@ LICENSE
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/ */
// revid-cli is a command line interface for revid.
package main package main
import ( import (
@ -36,6 +38,7 @@ import (
"strings" "strings"
"time" "time"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/container/mts" "bitbucket.org/ausocean/av/container/mts"
"bitbucket.org/ausocean/av/container/mts/meta" "bitbucket.org/ausocean/av/container/mts/meta"
"bitbucket.org/ausocean/av/revid" "bitbucket.org/ausocean/av/revid"
@ -105,9 +108,9 @@ func handleFlags() revid.Config {
var ( var (
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
inputPtr = flag.String("Input", "", "The input type: Raspivid, File, Webcam, RTSP") inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg, PCM, ADPCM")
inputPtr = flag.String("Input", "", "The input type: Raspivid, File, v4l, Audio, RTSP")
rtspURLPtr = flag.String("RTSPURL", "", "The URL for an RTSP server.") rtspURLPtr = flag.String("RTSPURL", "", "The URL for an RTSP server.")
inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg")
quantizePtr = flag.Bool("Quantize", false, "Quantize input (non-variable bitrate)") quantizePtr = flag.Bool("Quantize", false, "Quantize input (non-variable bitrate)")
verbosityPtr = flag.String("Verbosity", "Info", "Verbosity: Debug, Info, Warning, Error, Fatal") verbosityPtr = flag.String("Verbosity", "Info", "Verbosity: Debug, Info, Warning, Error, Fatal")
rtpAddrPtr = flag.String("RtpAddr", "", "Rtp destination address: <IP>:<port> (port is generally 6970-6999)") rtpAddrPtr = flag.String("RtpAddr", "", "Rtp destination address: <IP>:<port> (port is generally 6970-6999)")
@ -131,6 +134,12 @@ func handleFlags() revid.Config {
saturationPtr = flag.Int("Saturation", 0, "Set Saturation. (100-100)") saturationPtr = flag.Int("Saturation", 0, "Set Saturation. (100-100)")
exposurePtr = flag.String("Exposure", "auto", "Set exposure mode. ("+strings.Join(revid.ExposureModes[:], ",")+")") exposurePtr = flag.String("Exposure", "auto", "Set exposure mode. ("+strings.Join(revid.ExposureModes[:], ",")+")")
autoWhiteBalancePtr = flag.String("Awb", "auto", "Set automatic white balance mode. ("+strings.Join(revid.AutoWhiteBalanceModes[:], ",")+")") autoWhiteBalancePtr = flag.String("Awb", "auto", "Set automatic white balance mode. ("+strings.Join(revid.AutoWhiteBalanceModes[:], ",")+")")
// Audio specific flags.
sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio")
channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)")
recPeriodPtr = flag.Float64("recPeriod", 1, "How many seconds to record at a time")
bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.")
) )
var outputs flagStrings var outputs flagStrings
@ -179,6 +188,8 @@ func handleFlags() revid.Config {
cfg.Input = revid.V4L cfg.Input = revid.V4L
case "File": case "File":
cfg.Input = revid.File cfg.Input = revid.File
case "Audio":
cfg.Input = revid.Audio
case "RTSP": case "RTSP":
cfg.Input = revid.RTSP cfg.Input = revid.RTSP
case "": case "":
@ -188,12 +199,23 @@ func handleFlags() revid.Config {
switch *inputCodecPtr { switch *inputCodecPtr {
case "H264": case "H264":
cfg.InputCodec = revid.H264 cfg.InputCodec = codecutil.H264
case "PCM":
cfg.InputCodec = codecutil.PCM
case "ADPCM":
cfg.InputCodec = codecutil.ADPCM
case "": case "":
default: default:
log.Log(logger.Error, pkg+"bad input codec argument") log.Log(logger.Error, pkg+"bad input codec argument")
} }
switch *inputPtr {
case "Audio":
cfg.WriteRate = 1.0 / (*recPeriodPtr)
default:
cfg.WriteRate = float64(*frameRatePtr)
}
for _, o := range outputs { for _, o := range outputs {
switch o { switch o {
case "File": case "File":
@ -235,6 +257,10 @@ func handleFlags() revid.Config {
cfg.Saturation = *saturationPtr cfg.Saturation = *saturationPtr
cfg.Exposure = *exposurePtr cfg.Exposure = *exposurePtr
cfg.AutoWhiteBalance = *autoWhiteBalancePtr cfg.AutoWhiteBalance = *autoWhiteBalancePtr
cfg.SampleRate = *sampleRatePtr
cfg.Channels = *channelsPtr
cfg.RecPeriod = *recPeriodPtr
cfg.BitDepth = *bitDepthPtr
return cfg return cfg
} }

84
codec/codecutil/lex.go Normal file
View File

@ -0,0 +1,84 @@
/*
NAME
lex.go
AUTHOR
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see [GNU licenses](http://www.gnu.org/licenses).
*/
package codecutil
import (
"fmt"
"io"
"time"
)
// ByteLexer is used to lex bytes using a buffer size which is configured upon construction.
type ByteLexer struct {
bufSize *int
}
// NewByteLexer returns a pointer to a ByteLexer with the given buffer size.
func NewByteLexer(bufSize *int) *ByteLexer {
return &ByteLexer{bufSize: bufSize}
}
// zeroTicks can be used to create an instant ticker.
var zeroTicks chan time.Time
func init() {
zeroTicks = make(chan time.Time)
close(zeroTicks)
}
// Lex reads *l.bufSize bytes from src and writes them to dst every d seconds.
func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, d time.Duration) error {
if l.bufSize == nil {
return fmt.Errorf("buffer size has not been set")
}
bufSize := *l.bufSize
if bufSize <= 0 {
return fmt.Errorf("invalid buffer size: %v", bufSize)
}
if d < 0 {
return fmt.Errorf("invalid delay: %v", d)
}
var ticker *time.Ticker
if d == 0 {
ticker = &time.Ticker{C: zeroTicks}
} else {
ticker = time.NewTicker(d)
defer ticker.Stop()
}
buf := make([]byte, bufSize)
for {
<-ticker.C
off, err := src.Read(buf)
if err != nil {
return err
}
_, err = dst.Write(buf[:off])
if err != nil {
return err
}
}
}

View File

@ -0,0 +1,65 @@
/*
NAME
lex_test.go
AUTHOR
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see [GNU licenses](http://www.gnu.org/licenses).
*/
package codecutil
import (
"bytes"
"io"
"strconv"
"testing"
"time"
)
var lexTests = []struct {
data []byte
t time.Duration
n int
isValid bool // Whether or not this test should fail.
}{
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, true},
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, true},
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 2, true},
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 1, true},
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Nanosecond, 0, false},
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, -1, false},
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, true},
}
func TestByteLexer(t *testing.T) {
for i, tt := range lexTests {
t.Run(strconv.Itoa(i), func(t *testing.T) {
dst := bytes.NewBuffer([]byte{})
l := NewByteLexer(&tt.n)
err := l.Lex(dst, bytes.NewReader(tt.data), tt.t)
if err != nil && err != io.EOF {
if tt.isValid {
t.Errorf("unexpected error: %v", err)
}
} else if !bytes.Equal(dst.Bytes(), tt.data) {
t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes())
}
})
}
}

43
codec/codecutil/list.go Normal file
View File

@ -0,0 +1,43 @@
/*
NAME
list.go
AUTHOR
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see [GNU licenses](http://www.gnu.org/licenses).
*/
package codecutil
// numCodecs is the number of entries in the list of codecs.
const numCodecs = 5
// A global list containing all available codecs for reference in any application.
// When adding or removing a codec from this list, the numCodecs const must be updated.
const (
PCM = iota
ADPCM
H264
H265
MJPEG
)
// IsValid recieves an int representing a codec and checks if it is valid.
func IsValid(codec uint8) bool {
return 0 <= codec && codec < numCodecs
}

View File

@ -35,20 +35,20 @@ import (
"github.com/yobert/alsa" "github.com/yobert/alsa"
) )
// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. // Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data.
// If an error occurs, an error will be returned along with the original b's data.
// Notes: // Notes:
// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. // - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur.
// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
func Resample(b alsa.Buffer, rate int) ([]byte, error) { func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) {
fromRate := b.Format.Rate if b.Format.Rate == rate {
if fromRate == rate { return b, nil
return b.Data, nil }
} else if fromRate < 0 { if b.Format.Rate < 0 {
return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) return alsa.Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate)
} else if rate < 0 { }
return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) if rate < 0 {
return alsa.Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate)
} }
// The number of bytes in a sample. // The number of bytes in a sample.
@ -59,22 +59,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
case alsa.S16_LE: case alsa.S16_LE:
sampleLen = 2 * b.Format.Channels sampleLen = 2 * b.Format.Channels
default: default:
return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
} }
inPcmLen := len(b.Data) inPcmLen := len(b.Data)
// Calculate sample rate ratio ratioFrom:ratioTo. // Calculate sample rate ratio ratioFrom:ratioTo.
rateGcd := gcd(rate, fromRate) rateGcd := gcd(rate, b.Format.Rate)
ratioFrom := fromRate / rateGcd ratioFrom := b.Format.Rate / rateGcd
ratioTo := rate / rateGcd ratioTo := rate / rateGcd
// ratioTo = 1 is the only number that will result in an even sampling. // ratioTo = 1 is the only number that will result in an even sampling.
if ratioTo != 1 { if ratioTo != 1 {
return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) return alsa.Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
} }
newLen := inPcmLen / ratioFrom newLen := inPcmLen / ratioFrom
result := make([]byte, 0, newLen) resampled := make([]byte, 0, newLen)
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them
// up and average them. The result is the new sample. // up and average them. The result is the new sample.
@ -96,19 +96,28 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
case alsa.S16_LE: case alsa.S16_LE:
binary.LittleEndian.PutUint16(bAvg, uint16(avg)) binary.LittleEndian.PutUint16(bAvg, uint16(avg))
} }
result = append(result, bAvg...) resampled = append(resampled, bAvg...)
} }
return result, nil
// Return a new alsa.Buffer with resampled data.
return alsa.Buffer{
Format: alsa.BufferFormat{
Channels: b.Format.Channels,
SampleFormat: b.Format.SampleFormat,
Rate: rate,
},
Data: resampled,
}, nil
} }
// StereoToMono returns raw mono audio data generated from only the left channel from // StereoToMono returns raw mono audio data generated from only the left channel from
// the given stereo recording (ALSA buffer) // the given stereo recording (ALSA buffer)
// if an error occurs, an error will be returned along with the original stereo data. func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) {
func StereoToMono(b alsa.Buffer) ([]byte, error) {
if b.Format.Channels == 1 { if b.Format.Channels == 1 {
return b.Data, nil return b, nil
} else if b.Format.Channels != 2 { }
return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) if b.Format.Channels != 2 {
return alsa.Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
} }
var stereoSampleBytes int var stereoSampleBytes int
@ -118,7 +127,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
case alsa.S16_LE: case alsa.S16_LE:
stereoSampleBytes = 4 stereoSampleBytes = 4
default: default:
return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
} }
recLength := len(b.Data) recLength := len(b.Data)
@ -134,7 +143,15 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
} }
} }
return mono, nil // Return a new alsa.Buffer with resampled data.
return alsa.Buffer{
Format: alsa.BufferFormat{
Channels: 1,
SampleFormat: b.Format.SampleFormat,
Rate: b.Format.Rate,
},
Data: mono,
}, nil
} }
// gcd is used for calculating the greatest common divisor of two positive integers, a and b. // gcd is used for calculating the greatest common divisor of two positive integers, a and b.

View File

@ -71,7 +71,7 @@ func TestResample(t *testing.T) {
} }
// Compare result with expected. // Compare result with expected.
if !bytes.Equal(resampled, exp) { if !bytes.Equal(resampled.Data, exp) {
t.Error("Resampled data does not match expected result.") t.Error("Resampled data does not match expected result.")
} }
} }
@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) {
} }
// Compare result with expected. // Compare result with expected.
if !bytes.Equal(mono, exp) { if !bytes.Equal(mono.Data, exp) {
t.Error("Converted data does not match expected result.") t.Error("Converted data does not match expected result.")
} }
} }

View File

@ -89,11 +89,6 @@ var (
) )
const ( const (
sdtPid = 17
patPid = 0
pmtPid = 4096
videoPid = 256
audioPid = 210
H264ID = 27 H264ID = 27
H265ID = 36 H265ID = 36
audioStreamID = 0xc0 // First audio stream ID. audioStreamID = 0xc0 // First audio stream ID.
@ -151,13 +146,13 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder {
var sid byte var sid byte
switch mediaType { switch mediaType {
case EncodeAudio: case EncodeAudio:
mPid = audioPid mPid = AudioPid
sid = audioStreamID sid = audioStreamID
case EncodeH265: case EncodeH265:
mPid = videoPid mPid = VideoPid
sid = H265ID sid = H265ID
case EncodeH264: case EncodeH264:
mPid = videoPid mPid = VideoPid
sid = H264ID sid = H264ID
} }
@ -187,8 +182,8 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder {
streamID: sid, streamID: sid,
continuity: map[int]byte{ continuity: map[int]byte{
patPid: 0, PatPid: 0,
pmtPid: 0, PmtPid: 0,
mPid: 0, mPid: 0,
}, },
} }
@ -214,7 +209,7 @@ func (e *Encoder) TimeBasedPsi(b bool, sendCount int) {
e.pktCount = e.psiSendCount e.pktCount = e.psiSendCount
} }
// Write implements io.Writer. Write takes raw h264 and encodes into MPEG-TS, // Write implements io.Writer. Write takes raw video or audio data and encodes into MPEG-TS,
// then sending it to the encoder's io.Writer destination. // then sending it to the encoder's io.Writer destination.
func (e *Encoder) Write(data []byte) (int, error) { func (e *Encoder) Write(data []byte) (int, error) {
now := time.Now() now := time.Now()

View File

@ -199,7 +199,7 @@ func TestEncodePcm(t *testing.T) {
for i+PacketSize <= len(clip) { for i+PacketSize <= len(clip) {
// Check MTS packet // Check MTS packet
if !(pkt.PID() == audioPid) { if pkt.PID() != AudioPid {
i += PacketSize i += PacketSize
if i+PacketSize <= len(clip) { if i+PacketSize <= len(clip) {
copy(pkt[:], clip[i:i+PacketSize]) copy(pkt[:], clip[i:i+PacketSize])

View File

@ -47,6 +47,7 @@ const (
PatPid = 0 PatPid = 0
PmtPid = 4096 PmtPid = 4096
VideoPid = 256 VideoPid = 256
AudioPid = 210
) )
// StreamID is the id of the first stream. // StreamID is the id of the first stream.

View File

@ -82,7 +82,7 @@ func TestGetPTSRange1(t *testing.T) {
curTime += interval curTime += interval
} }
got, err := GetPTSRange(clip.Bytes(), videoPid) got, err := GetPTSRange(clip.Bytes(), VideoPid)
if err != nil { if err != nil {
t.Fatalf("did not expect error getting PTS range: %v", err) t.Fatalf("did not expect error getting PTS range: %v", err)
} }
@ -142,7 +142,7 @@ func writeFrame(b *bytes.Buffer, frame []byte, pts uint64) error {
for len(buf) != 0 { for len(buf) != 0 {
pkt := Packet{ pkt := Packet{
PUSI: pusi, PUSI: pusi,
PID: videoPid, PID: VideoPid,
RAI: pusi, RAI: pusi,
CC: 0, CC: 0,
AFC: hasAdaptationField | hasPayload, AFC: hasAdaptationField | hasPayload,

View File

@ -81,9 +81,9 @@ func main() {
} }
// Save resampled to file. // Save resampled to file.
err = ioutil.WriteFile(outPath, resampled, 0644) err = ioutil.WriteFile(outPath, resampled.Data, 0644)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
fmt.Println("Encoded and wrote", len(resampled), "bytes to file", outPath) fmt.Println("Encoded and wrote", len(resampled.Data), "bytes to file", outPath)
} }

View File

@ -77,9 +77,9 @@ func main() {
} }
// Save mono to file. // Save mono to file.
err = ioutil.WriteFile(outPath, mono, 0644) err = ioutil.WriteFile(outPath, mono.Data, 0644)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath) fmt.Println("Encoded and wrote", len(mono.Data), "bytes to file", outPath)
} }

464
input/audio/audio.go Normal file
View File

@ -0,0 +1,464 @@
/*
NAME
audio.go
AUTHOR
Alan Noble <alan@ausocean.org>
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see [GNU licenses](http://www.gnu.org/licenses).
*/
// Package audio provides access to input from audio devices.
package audio
import (
"bytes"
"errors"
"fmt"
"sync"
"time"
"github.com/yobert/alsa"
"bitbucket.org/ausocean/av/codec/adpcm"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/codec/pcm"
"bitbucket.org/ausocean/utils/logger"
"bitbucket.org/ausocean/utils/ring"
)
const (
pkg = "audio: "
rbTimeout = 100 * time.Millisecond
rbNextTimeout = 100 * time.Millisecond
rbLen = 200
defaultSampleRate = 48000
)
// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer.
// "paused" means the input routine is sleeping until unpaused or stopped.
// "stopped" means the input routine is stopped and the ALSA device is closed.
const (
running = iota + 1
paused
stopped
)
// Device holds everything we need to know about the audio input stream and implements io.Reader.
type Device struct {
l Logger // Logger for device's routines to log to.
mode uint8 // Operating mode, either running, paused, or stopped.
mu sync.Mutex // Provides synchronisation when changing modes concurrently.
title string // Name of audio title, or empty for the default title.
dev *alsa.Device // ALSA's Audio input device.
ab alsa.Buffer // ALSA's buffer.
rb *ring.Buffer // Our buffer.
chunkSize int // This is the number of bytes that will be stored in rb at a time.
*Config // Configuration parameters for this device.
}
// Config provides parameters used by Device.
type Config struct {
SampleRate int
Channels int
BitDepth int
RecPeriod float64
Codec uint8
}
// Logger enables any implementation of a logger to be used.
// TODO: Make this part of the logger package.
type Logger interface {
SetLevel(int8)
Log(level int8, message string, params ...interface{})
}
// OpenError is used to determine whether an error has originated from attempting to open a device.
type OpenError error
// NewDevice initializes and returns an Device which can be started, read from, and stopped.
func NewDevice(cfg *Config, l Logger) (*Device, error) {
err := validate(cfg)
if err != nil {
return nil, err
}
d := &Device{
Config: cfg,
l: l,
}
// Open the requested audio device.
err = d.open()
if err != nil {
d.l.Log(logger.Error, pkg+"failed to open device")
return nil, err
}
// Setup the device to record with desired period.
d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
// Account for channel conversion.
chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels)
// Account for resampling.
chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
if chunkSize < 1 {
return nil, errors.New("given Config parameters are too small")
}
// Account for codec conversion.
if d.Codec == codecutil.ADPCM {
d.chunkSize = adpcm.EncBytes(int(chunkSize))
} else {
d.chunkSize = int(chunkSize)
}
// Create ring buffer with appropriate chunk size.
d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout)
// Start device in paused mode.
d.mode = paused
go d.input()
return d, nil
}
// Start will start recording audio and writing to the ringbuffer.
// Once a Device has been stopped it cannot be started again. This is likely to change in future.
func (d *Device) Start() error {
d.mu.Lock()
mode := d.mode
d.mu.Unlock()
switch mode {
case paused:
d.mu.Lock()
d.mode = running
d.mu.Unlock()
return nil
case stopped:
// TODO(Trek): Make this reopen device and start recording.
return errors.New("device is stopped")
case running:
return nil
default:
return fmt.Errorf("invalid mode: %d", mode)
}
}
// Stop will stop recording audio and close the device.
// Once a Device has been stopped it cannot be started again. This is likely to change in future.
func (d *Device) Stop() {
d.mu.Lock()
d.mode = stopped
d.mu.Unlock()
}
// ChunkSize returns the number of bytes written to the ringbuffer per d.RecPeriod.
func (d *Device) ChunkSize() int {
return d.chunkSize
}
// validate checks if Config parameters are valid and returns an error if they are not.
func validate(c *Config) error {
if c.SampleRate <= 0 {
return fmt.Errorf("invalid sample rate: %v", c.SampleRate)
}
if c.Channels <= 0 {
return fmt.Errorf("invalid number of channels: %v", c.Channels)
}
if c.BitDepth <= 0 {
return fmt.Errorf("invalid bitdepth: %v", c.BitDepth)
}
if c.RecPeriod <= 0 {
return fmt.Errorf("invalid recording period: %v", c.RecPeriod)
}
if !codecutil.IsValid(c.Codec) {
return errors.New("invalid codec")
}
return nil
}
// open the recording device with the given name and prepare it to record.
// If name is empty, the first recording device is used.
func (d *Device) open() error {
// Close any existing device.
if d.dev != nil {
d.l.Log(logger.Debug, pkg+"closing device", "title", d.title)
d.dev.Close()
d.dev = nil
}
// Open sound card and open recording device.
d.l.Log(logger.Debug, pkg+"opening sound card")
cards, err := alsa.OpenCards()
if err != nil {
return OpenError(err)
}
defer alsa.CloseCards(cards)
d.l.Log(logger.Debug, pkg+"finding audio device")
for _, card := range cards {
devices, err := card.Devices()
if err != nil {
continue
}
for _, dev := range devices {
if dev.Type != alsa.PCM || !dev.Record {
continue
}
if dev.Title == d.title || d.title == "" {
d.dev = dev
break
}
}
}
if d.dev == nil {
return OpenError(errors.New("no audio device found"))
}
d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title)
err = d.dev.Open()
if err != nil {
return OpenError(err)
}
// 2 channels is what most devices need to record in. If mono is requested,
// the recording will be converted in formatBuffer().
channels, err := d.dev.NegotiateChannels(2)
if err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels)
// Try to negotiate a rate to record in that is divisible by the wanted rate
// so that it can be easily downsampled to the wanted rate.
// rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz).
// Note: if a card thinks it can record at a rate but can't actually, this can cause a failure.
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
// a fix for this is to remove 8000 and 16000 from the rates slice.
var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
var rate int
foundRate := false
for r := range rates {
if r < d.SampleRate {
continue
}
if r%d.SampleRate == 0 {
rate, err = d.dev.NegotiateRate(r)
if err == nil {
foundRate = true
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
break
}
}
}
// If no easily divisible rate is found, then use the default rate.
if !foundRate {
d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate)
rate, err = d.dev.NegotiateRate(defaultSampleRate)
if err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
}
var aFmt alsa.FormatType
switch d.BitDepth {
case 16:
aFmt = alsa.S16_LE
case 32:
aFmt = alsa.S32_LE
default:
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
}
devFmt, err := d.dev.NegotiateFormat(aFmt)
if err != nil {
return err
}
var bitdepth int
switch devFmt {
case alsa.S16_LE:
bitdepth = 16
case alsa.S32_LE:
bitdepth = 32
default:
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
}
d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth)
// A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed)
// Some devices only accept even period sizes while others want powers of 2.
// So we will find the closest power of 2 to the desired period size.
const wantPeriod = 0.05 //seconds
bytesPerSecond := rate * channels * (bitdepth / 8)
wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod)
nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize)
// At least two period sizes should fit within the buffer.
bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2)
if err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize)
if err = d.dev.Prepare(); err != nil {
return OpenError(err)
}
d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params")
return nil
}
// input continously records audio and writes it to the ringbuffer.
// Re-opens the device and tries again if ASLA returns an error.
func (d *Device) input() {
for {
// Check mode.
d.mu.Lock()
mode := d.mode
d.mu.Unlock()
switch mode {
case paused:
time.Sleep(time.Duration(d.RecPeriod) * time.Second)
continue
case stopped:
if d.dev != nil {
d.l.Log(logger.Debug, pkg+"closing audio device", "title", d.title)
d.dev.Close()
d.dev = nil
}
return
}
// Read from audio device.
d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod)
err := d.dev.Read(d.ab.Data)
if err != nil {
d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error())
err = d.open() // re-open
if err != nil {
d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error())
return
}
continue
}
// Process audio.
d.l.Log(logger.Debug, pkg+"processing audio")
toWrite := d.formatBuffer()
// Write audio to ringbuffer.
n, err := d.rb.Write(toWrite.Data)
switch err {
case nil:
d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n)
case ring.ErrDropped:
d.l.Log(logger.Warning, pkg+"old audio data overwritten")
default:
d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error())
return
}
}
}
// Read reads from the ringbuffer, returning the number of bytes read upon success.
func (d *Device) Read(p []byte) (int, error) {
// Ready ringbuffer for read.
_, err := d.rb.Next(rbNextTimeout)
if err != nil {
return 0, err
}
// Read from ring buffer.
return d.rb.Read(p)
}
// formatBuffer returns audio that has been converted to the desired format.
func (d *Device) formatBuffer() alsa.Buffer {
var err error
// If nothing needs to be changed, return the original.
if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate {
return d.ab
}
var formatted alsa.Buffer
if d.ab.Format.Channels != d.Channels {
// Convert channels.
// TODO(Trek): Make this work for conversions other than stereo to mono.
if d.ab.Format.Channels == 2 && d.Channels == 1 {
formatted, err = pcm.StereoToMono(d.ab)
if err != nil {
d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
}
}
}
if d.ab.Format.Rate != d.SampleRate {
// Convert rate.
formatted, err = pcm.Resample(formatted, d.SampleRate)
if err != nil {
d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
}
}
switch d.Codec {
case codecutil.PCM:
case codecutil.ADPCM:
b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data))))
enc := adpcm.NewEncoder(b)
_, err = enc.Write(formatted.Data)
if err != nil {
d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error())
}
formatted.Data = b.Bytes()
default:
d.l.Log(logger.Error, pkg+"unhandled audio codec")
}
return formatted
}
// nearestPowerOfTwo finds and returns the nearest power of two to the given integer.
// If the lower and higher power of two are the same distance, it returns the higher power.
// For negative values, 1 is returned.
// Source: https://stackoverflow.com/a/45859570
func nearestPowerOfTwo(n int) int {
if n <= 0 {
return 1
}
if n == 1 {
return 2
}
v := n
v--
v |= v >> 1
v |= v >> 2
v |= v >> 4
v |= v >> 8
v |= v >> 16
v++ // higher power of 2
x := v >> 1 // lower power of 2
if (v - n) > (n - x) {
return x
}
return v
}

105
input/audio/audio_test.go Normal file
View File

@ -0,0 +1,105 @@
/*
NAME
audio_test.go
AUTHOR
Trek Hopton <trek@ausocean.org>
LICENSE
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License in gpl.txt.
If not, see [GNU licenses](http://www.gnu.org/licenses).
*/
package audio
import (
"io/ioutil"
"os"
"strconv"
"testing"
"time"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/utils/logger"
)
func TestDevice(t *testing.T) {
// We want to open a device with a standard configuration.
ac := &Config{
SampleRate: 8000,
Channels: 1,
RecPeriod: 0.3,
BitDepth: 16,
Codec: codecutil.ADPCM,
}
n := 2 // Number of periods to wait while recording.
// Create a new audio Device, start, read/lex, and then stop it.
l := logger.New(logger.Debug, os.Stderr)
ai, err := NewDevice(ac, l)
// If there was an error opening the device, skip this test.
if _, ok := err.(OpenError); ok {
t.Skip(err)
}
// For any other error, report it.
if err != nil {
t.Error(err)
}
err = ai.Start()
if err != nil {
t.Error(err)
}
chunkSize := ai.ChunkSize()
lexer := codecutil.NewByteLexer(&chunkSize)
go lexer.Lex(ioutil.Discard, ai, time.Duration(ac.RecPeriod*float64(time.Second)))
time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n))
ai.Stop()
}
var powerTests = []struct {
in int
out int
}{
{36, 32},
{47, 32},
{3, 4},
{46, 32},
{7, 8},
{2, 2},
{36, 32},
{757, 512},
{2464, 2048},
{18980, 16384},
{70000, 65536},
{8192, 8192},
{2048, 2048},
{65536, 65536},
{-2048, 1},
{-127, 1},
{-1, 1},
{0, 1},
{1, 2},
}
func TestNearestPowerOfTwo(t *testing.T) {
for _, tt := range powerTests {
t.Run(strconv.Itoa(tt.in), func(t *testing.T) {
v := nearestPowerOfTwo(tt.in)
if v != tt.out {
t.Errorf("got %v, want %v", v, tt.out)
}
})
}
}

View File

@ -2,11 +2,9 @@
NAME NAME
Config.go Config.go
DESCRIPTION
See Readme.md
AUTHORS AUTHORS
Saxon A. Nelson-Milton <saxon@ausocean.org> Saxon A. Nelson-Milton <saxon@ausocean.org>
Trek Hopton <trek@ausocean.org>
LICENSE LICENSE
Config.go is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) Config.go is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean)
@ -30,6 +28,7 @@ package revid
import ( import (
"errors" "errors"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/utils/logger" "bitbucket.org/ausocean/utils/logger"
) )
@ -75,6 +74,7 @@ const (
Raspivid Raspivid
V4L V4L
RTSP RTSP
Audio
// Outputs. // Outputs.
RTMP RTMP
@ -93,6 +93,7 @@ const (
defaultInput = Raspivid defaultInput = Raspivid
defaultOutput = HTTP defaultOutput = HTTP
defaultFrameRate = 25 defaultFrameRate = 25
defaultWriteRate = 25
defaultWidth = 1280 defaultWidth = 1280
defaultHeight = 720 defaultHeight = 720
defaultIntraRefreshPeriod = 100 defaultIntraRefreshPeriod = 100
@ -101,7 +102,7 @@ const (
defaultBitrate = 400000 defaultBitrate = 400000
defaultFramesPerClip = 1 defaultFramesPerClip = 1
httpFramesPerClip = 560 httpFramesPerClip = 560
defaultInputCodec = H264 defaultInputCodec = codecutil.H264
defaultVerbosity = logger.Error defaultVerbosity = logger.Error
defaultRtpAddr = "localhost:6970" defaultRtpAddr = "localhost:6970"
defaultBurstPeriod = 10 // Seconds defaultBurstPeriod = 10 // Seconds
@ -109,6 +110,12 @@ const (
defaultBrightness = 50 defaultBrightness = 50
defaultExposure = "auto" defaultExposure = "auto"
defaultAutoWhiteBalance = "auto" defaultAutoWhiteBalance = "auto"
defaultAudioInputCodec = codecutil.ADPCM
defaultSampleRate = 48000
defaultBitDepth = 16
defaultChannels = 1
defaultRecPeriod = 1.0
) )
// Config provides parameters relevant to a revid instance. A new config must // Config provides parameters relevant to a revid instance. A new config must
@ -183,6 +190,9 @@ type Config struct {
// Raspivid input supports custom framerate. // Raspivid input supports custom framerate.
FrameRate uint FrameRate uint
// WriteRate is how many times a second revid encoders will be written to.
WriteRate float64
// HTTPAddress defines a custom HTTP destination if we do not wish to use that // HTTPAddress defines a custom HTTP destination if we do not wish to use that
// defined in /etc/netsender.conf. // defined in /etc/netsender.conf.
HTTPAddress string HTTPAddress string
@ -215,6 +225,13 @@ type Config struct {
// defined at the start of the file. // defined at the start of the file.
AutoWhiteBalance string AutoWhiteBalance string
// Audio
SampleRate int // Samples a second (Hz).
RecPeriod float64 // How many seconds to record at a time.
Channels int // Number of audio channels, 1 for mono, 2 for stereo.
BitDepth int // Sample bit depth.
ChunkSize int // ChunkSize is the size of the chunks in the audio.Device's ringbuffer.
RTPAddress string // RTPAddress defines the RTP output destination. RTPAddress string // RTPAddress defines the RTP output destination.
BurstPeriod uint // BurstPeriod defines the revid burst period in seconds. BurstPeriod uint // BurstPeriod defines the revid burst period in seconds.
Rotation uint // Rotation defines the video rotation angle in degrees Raspivid input. Rotation uint // Rotation defines the video rotation angle in degrees Raspivid input.
@ -240,7 +257,7 @@ func (c *Config) Validate(r *Revid) error {
} }
switch c.Input { switch c.Input {
case Raspivid, V4L, File, RTSP: case Raspivid, V4L, File, Audio, RTSP:
case NothingDefined: case NothingDefined:
c.Logger.Log(logger.Info, pkg+"no input type defined, defaulting", "input", defaultInput) c.Logger.Log(logger.Info, pkg+"no input type defined, defaulting", "input", defaultInput)
c.Input = defaultInput c.Input = defaultInput
@ -249,7 +266,7 @@ func (c *Config) Validate(r *Revid) error {
} }
switch c.InputCodec { switch c.InputCodec {
case H264: case codecutil.H264:
// FIXME(kortschak): This is not really what we want. // FIXME(kortschak): This is not really what we want.
// Configuration really needs to be rethought here. // Configuration really needs to be rethought here.
if c.Quantize && c.Quantization == 0 { if c.Quantize && c.Quantization == 0 {
@ -260,18 +277,22 @@ func (c *Config) Validate(r *Revid) error {
return errors.New("bad bitrate and quantization combination for H264 input") return errors.New("bad bitrate and quantization combination for H264 input")
} }
case MJPEG: case codecutil.MJPEG:
if c.Quantization > 0 || c.Bitrate == 0 { if c.Quantization > 0 || c.Bitrate == 0 {
return errors.New("bad bitrate or quantization for mjpeg input") return errors.New("bad bitrate or quantization for mjpeg input")
} }
case codecutil.PCM, codecutil.ADPCM:
case NothingDefined:
c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec)
c.InputCodec = defaultInputCodec
c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization)
c.Quantization = defaultQuantization
default: default:
return errors.New("bad input codec defined in config") switch c.Input {
case Audio:
c.Logger.Log(logger.Info, pkg+"input is audio but no codec defined, defaulting", "inputCodec", defaultAudioInputCodec)
c.InputCodec = defaultAudioInputCodec
default:
c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec)
c.InputCodec = defaultInputCodec
c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization)
c.Quantization = defaultQuantization
}
} }
if c.Outputs == nil { if c.Outputs == nil {
@ -330,6 +351,31 @@ func (c *Config) Validate(r *Revid) error {
c.FrameRate = defaultFrameRate c.FrameRate = defaultFrameRate
} }
if c.SampleRate == 0 {
c.Logger.Log(logger.Info, pkg+"no sample rate defined, defaulting", "sampleRate", defaultSampleRate)
c.SampleRate = defaultSampleRate
}
if c.Channels == 0 {
c.Logger.Log(logger.Info, pkg+"no number of channels defined, defaulting", "Channels", defaultChannels)
c.Channels = defaultChannels
}
if c.BitDepth == 0 {
c.Logger.Log(logger.Info, pkg+"no bit depth defined, defaulting", "BitDepth", defaultBitDepth)
c.BitDepth = defaultBitDepth
}
if c.RecPeriod == 0 {
c.Logger.Log(logger.Info, pkg+"no record period defined, defaulting", "recPeriod", defaultRecPeriod)
c.RecPeriod = defaultRecPeriod
}
if c.WriteRate == 0 {
c.Logger.Log(logger.Info, pkg+"no write rate defined, defaulting", "writeRate", defaultWriteRate)
c.WriteRate = defaultWriteRate
}
if c.Bitrate == 0 { if c.Bitrate == 0 {
c.Logger.Log(logger.Info, pkg+"no bitrate defined, defaulting", "bitrate", defaultBitrate) c.Logger.Log(logger.Info, pkg+"no bitrate defined, defaulting", "bitrate", defaultBitrate)
c.Bitrate = defaultBitrate c.Bitrate = defaultBitrate

View File

@ -2,13 +2,11 @@
NAME NAME
revid.go revid.go
DESCRIPTION
See Readme.md
AUTHORS AUTHORS
Saxon A. Nelson-Milton <saxon@ausocean.org> Saxon A. Nelson-Milton <saxon@ausocean.org>
Alan Noble <alan@ausocean.org> Alan Noble <alan@ausocean.org>
Dan Kortschak <dan@ausocean.org> Dan Kortschak <dan@ausocean.org>
Trek Hopton <trek@ausocean.org>
LICENSE LICENSE
revid is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) revid is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean)
@ -27,6 +25,7 @@ LICENSE
in gpl.txt. If not, see http://www.gnu.org/licenses. in gpl.txt. If not, see http://www.gnu.org/licenses.
*/ */
// Package revid provides an API for reading, transcoding, and writing audio/video streams and files.
package revid package revid
import ( import (
@ -41,10 +40,12 @@ import (
"sync" "sync"
"time" "time"
"bitbucket.org/ausocean/av/codec/codecutil"
"bitbucket.org/ausocean/av/codec/h264" "bitbucket.org/ausocean/av/codec/h264"
"bitbucket.org/ausocean/av/codec/h265" "bitbucket.org/ausocean/av/codec/h265"
"bitbucket.org/ausocean/av/container/flv" "bitbucket.org/ausocean/av/container/flv"
"bitbucket.org/ausocean/av/container/mts" "bitbucket.org/ausocean/av/container/mts"
"bitbucket.org/ausocean/av/input/audio"
"bitbucket.org/ausocean/av/protocol/rtcp" "bitbucket.org/ausocean/av/protocol/rtcp"
"bitbucket.org/ausocean/av/protocol/rtp" "bitbucket.org/ausocean/av/protocol/rtp"
"bitbucket.org/ausocean/av/protocol/rtsp" "bitbucket.org/ausocean/av/protocol/rtsp"
@ -173,13 +174,15 @@ func (r *Revid) reset(config Config) error {
r.config.Logger.SetLevel(config.LogLevel) r.config.Logger.SetLevel(config.LogLevel)
err = r.setupPipeline( err = r.setupPipeline(
func(dst io.WriteCloser, fps int) (io.WriteCloser, error) { func(dst io.WriteCloser, fps float64) (io.WriteCloser, error) {
var st int var st int
switch r.config.Input { switch r.config.Input {
case Raspivid, File, V4L: case Raspivid, File, V4L:
st = mts.EncodeH264 st = mts.EncodeH264
case RTSP: case RTSP:
st = mts.EncodeH265 st = mts.EncodeH265
case Audio:
st = mts.EncodeAudio
} }
e := mts.NewEncoder(dst, float64(fps), st) e := mts.NewEncoder(dst, float64(fps), st)
return e, nil return e, nil
@ -215,7 +218,7 @@ func (r *Revid) setConfig(config Config) error {
// mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder // mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder
// respectively. multiWriter will be used to create an ioext.multiWriteCloser // respectively. multiWriter will be used to create an ioext.multiWriteCloser
// so that encoders can write to multiple senders. // so that encoders can write to multiple senders.
func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error {
// encoders will hold the encoders that are required for revid's current // encoders will hold the encoders that are required for revid's current
// configuration. // configuration.
var encoders []io.WriteCloser var encoders []io.WriteCloser
@ -259,7 +262,7 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int)
// as a destination. // as a destination.
if len(mtsSenders) != 0 { if len(mtsSenders) != 0 {
mw := multiWriter(mtsSenders...) mw := multiWriter(mtsSenders...)
e, _ := mtsEnc(mw, int(r.config.FrameRate)) e, _ := mtsEnc(mw, r.config.WriteRate)
encoders = append(encoders, e) encoders = append(encoders, e)
} }
@ -289,6 +292,9 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int)
case RTSP: case RTSP:
r.setupInput = r.startRTSPCamera r.setupInput = r.startRTSPCamera
r.lexTo = h265.NewLexer(false).Lex r.lexTo = h265.NewLexer(false).Lex
case Audio:
r.setupInput = r.startAudioDevice
r.lexTo = codecutil.NewByteLexer(&r.config.ChunkSize).Lex
} }
return nil return nil
@ -533,7 +539,7 @@ func (r *Revid) startRaspivid() (func() error, error) {
switch r.config.InputCodec { switch r.config.InputCodec {
default: default:
return nil, fmt.Errorf("revid: invalid input codec: %v", r.config.InputCodec) return nil, fmt.Errorf("revid: invalid input codec: %v", r.config.InputCodec)
case H264: case codecutil.H264:
args = append(args, args = append(args,
"--codec", "H264", "--codec", "H264",
"--inline", "--inline",
@ -542,7 +548,7 @@ func (r *Revid) startRaspivid() (func() error, error) {
if r.config.Quantize { if r.config.Quantize {
args = append(args, "-qp", fmt.Sprint(r.config.Quantization)) args = append(args, "-qp", fmt.Sprint(r.config.Quantization))
} }
case MJPEG: case codecutil.MJPEG:
args = append(args, "--codec", "MJPEG") args = append(args, "--codec", "MJPEG")
} }
r.config.Logger.Log(logger.Info, pkg+"raspivid args", "raspividArgs", strings.Join(args, " ")) r.config.Logger.Log(logger.Info, pkg+"raspivid args", "raspividArgs", strings.Join(args, " "))
@ -615,10 +621,55 @@ func (r *Revid) setupInputForFile() (func() error, error) {
// TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop. // TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop.
r.wg.Add(1) r.wg.Add(1)
go r.processFrom(f, time.Second/time.Duration(r.config.FrameRate)) go r.processFrom(f, 0)
return func() error { return f.Close() }, nil return func() error { return f.Close() }, nil
} }
// startAudioDevice is used to start capturing audio from an audio device and processing it.
// It returns a function that can be used to stop the device and any errors that occur.
func (r *Revid) startAudioDevice() (func() error, error) {
// Create audio device.
ac := &audio.Config{
SampleRate: r.config.SampleRate,
Channels: r.config.Channels,
RecPeriod: r.config.RecPeriod,
BitDepth: r.config.BitDepth,
Codec: r.config.InputCodec,
}
mts.Meta.Add("sampleRate", strconv.Itoa(r.config.SampleRate))
mts.Meta.Add("channels", strconv.Itoa(r.config.Channels))
mts.Meta.Add("period", fmt.Sprintf("%.6f", r.config.RecPeriod))
mts.Meta.Add("bitDepth", strconv.Itoa(r.config.BitDepth))
switch r.config.InputCodec {
case codecutil.PCM:
mts.Meta.Add("codec", "pcm")
case codecutil.ADPCM:
mts.Meta.Add("codec", "adpcm")
default:
r.config.Logger.Log(logger.Fatal, pkg+"no audio codec set in config")
}
ai, err := audio.NewDevice(ac, r.config.Logger)
if err != nil {
r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error())
}
// Start audio device
err = ai.Start()
if err != nil {
r.config.Logger.Log(logger.Fatal, pkg+"failed to start audio device", "error", err.Error())
}
// Process output from audio device.
r.config.ChunkSize = ai.ChunkSize()
r.wg.Add(1)
go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate))
return func() error {
ai.Stop()
return nil
}, nil
}
// startRTSPCamera uses RTSP to request an RTP stream from an IP camera. An RTP // startRTSPCamera uses RTSP to request an RTP stream from an IP camera. An RTP
// client is created from which RTP packets containing either h264/h265 can read // client is created from which RTP packets containing either h264/h265 can read
// by the selected lexer. // by the selected lexer.

View File

@ -41,7 +41,7 @@ import (
const raspividPath = "/usr/local/bin/raspivid" const raspividPath = "/usr/local/bin/raspivid"
// Suppress all test logging, except for t.Errorf output. // Suppress all test logging, except for t.Errorf output.
var silent bool var silent = true
// TestRaspivid tests that raspivid starts correctly. // TestRaspivid tests that raspivid starts correctly.
// It is intended to be run on a Raspberry Pi. // It is intended to be run on a Raspberry Pi.
@ -232,7 +232,7 @@ func TestResetEncoderSenderSetup(t *testing.T) {
// This logic is what we want to check. // This logic is what we want to check.
err = rv.setupPipeline( err = rv.setupPipeline(
func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { func(dst io.WriteCloser, rate float64) (io.WriteCloser, error) {
return &tstMtsEncoder{dst: dst}, nil return &tstMtsEncoder{dst: dst}, nil
}, },
func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { func(dst io.WriteCloser, rate int) (io.WriteCloser, error) {

View File

@ -57,7 +57,7 @@ type httpSender struct {
log func(lvl int8, msg string, args ...interface{}) log func(lvl int8, msg string, args ...interface{})
} }
// newMinimalHttpSender returns a pointer to a new minimalHttpSender. // newHttpSender returns a pointer to a new httpSender.
func newHttpSender(ns *netsender.Sender, log func(lvl int8, msg string, args ...interface{})) *httpSender { func newHttpSender(ns *netsender.Sender, log func(lvl int8, msg string, args ...interface{})) *httpSender {
return &httpSender{ return &httpSender{
client: ns, client: ns,