mirror of https://bitbucket.org/ausocean/av.git
codec/h264/h264dec: merged in master and fixed resultant problems
This commit is contained in:
commit
150492a5bd
|
@ -8,6 +8,7 @@ DESCRIPTION
|
|||
AUTHORS
|
||||
Saxon A. Nelson-Milton <saxon@ausocean.org>
|
||||
Jack Richardson <jack@ausocean.org>
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
revid-cli is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean)
|
||||
|
@ -26,6 +27,7 @@ LICENSE
|
|||
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
|
||||
*/
|
||||
|
||||
// revid-cli is a command line interface for revid.
|
||||
package main
|
||||
|
||||
import (
|
||||
|
@ -36,6 +38,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"bitbucket.org/ausocean/av/codec/codecutil"
|
||||
"bitbucket.org/ausocean/av/container/mts"
|
||||
"bitbucket.org/ausocean/av/container/mts/meta"
|
||||
"bitbucket.org/ausocean/av/revid"
|
||||
|
@ -105,9 +108,9 @@ func handleFlags() revid.Config {
|
|||
var (
|
||||
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
|
||||
|
||||
inputPtr = flag.String("Input", "", "The input type: Raspivid, File, Webcam, RTSP")
|
||||
inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg, PCM, ADPCM")
|
||||
inputPtr = flag.String("Input", "", "The input type: Raspivid, File, v4l, Audio, RTSP")
|
||||
rtspURLPtr = flag.String("RTSPURL", "", "The URL for an RTSP server.")
|
||||
inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg")
|
||||
quantizePtr = flag.Bool("Quantize", false, "Quantize input (non-variable bitrate)")
|
||||
verbosityPtr = flag.String("Verbosity", "Info", "Verbosity: Debug, Info, Warning, Error, Fatal")
|
||||
rtpAddrPtr = flag.String("RtpAddr", "", "Rtp destination address: <IP>:<port> (port is generally 6970-6999)")
|
||||
|
@ -131,6 +134,12 @@ func handleFlags() revid.Config {
|
|||
saturationPtr = flag.Int("Saturation", 0, "Set Saturation. (100-100)")
|
||||
exposurePtr = flag.String("Exposure", "auto", "Set exposure mode. ("+strings.Join(revid.ExposureModes[:], ",")+")")
|
||||
autoWhiteBalancePtr = flag.String("Awb", "auto", "Set automatic white balance mode. ("+strings.Join(revid.AutoWhiteBalanceModes[:], ",")+")")
|
||||
|
||||
// Audio specific flags.
|
||||
sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio")
|
||||
channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)")
|
||||
recPeriodPtr = flag.Float64("recPeriod", 1, "How many seconds to record at a time")
|
||||
bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.")
|
||||
)
|
||||
|
||||
var outputs flagStrings
|
||||
|
@ -179,6 +188,8 @@ func handleFlags() revid.Config {
|
|||
cfg.Input = revid.V4L
|
||||
case "File":
|
||||
cfg.Input = revid.File
|
||||
case "Audio":
|
||||
cfg.Input = revid.Audio
|
||||
case "RTSP":
|
||||
cfg.Input = revid.RTSP
|
||||
case "":
|
||||
|
@ -188,12 +199,23 @@ func handleFlags() revid.Config {
|
|||
|
||||
switch *inputCodecPtr {
|
||||
case "H264":
|
||||
cfg.InputCodec = revid.H264
|
||||
cfg.InputCodec = codecutil.H264
|
||||
case "PCM":
|
||||
cfg.InputCodec = codecutil.PCM
|
||||
case "ADPCM":
|
||||
cfg.InputCodec = codecutil.ADPCM
|
||||
case "":
|
||||
default:
|
||||
log.Log(logger.Error, pkg+"bad input codec argument")
|
||||
}
|
||||
|
||||
switch *inputPtr {
|
||||
case "Audio":
|
||||
cfg.WriteRate = 1.0 / (*recPeriodPtr)
|
||||
default:
|
||||
cfg.WriteRate = float64(*frameRatePtr)
|
||||
}
|
||||
|
||||
for _, o := range outputs {
|
||||
switch o {
|
||||
case "File":
|
||||
|
@ -235,6 +257,10 @@ func handleFlags() revid.Config {
|
|||
cfg.Saturation = *saturationPtr
|
||||
cfg.Exposure = *exposurePtr
|
||||
cfg.AutoWhiteBalance = *autoWhiteBalancePtr
|
||||
cfg.SampleRate = *sampleRatePtr
|
||||
cfg.Channels = *channelsPtr
|
||||
cfg.RecPeriod = *recPeriodPtr
|
||||
cfg.BitDepth = *bitDepthPtr
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
NAME
|
||||
lex.go
|
||||
|
||||
AUTHOR
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||
|
||||
It is free software: you can redistribute it and/or modify them
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
It is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||
*/
|
||||
|
||||
package codecutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ByteLexer is used to lex bytes using a buffer size which is configured upon construction.
|
||||
type ByteLexer struct {
|
||||
bufSize *int
|
||||
}
|
||||
|
||||
// NewByteLexer returns a pointer to a ByteLexer with the given buffer size.
|
||||
func NewByteLexer(bufSize *int) *ByteLexer {
|
||||
return &ByteLexer{bufSize: bufSize}
|
||||
}
|
||||
|
||||
// zeroTicks can be used to create an instant ticker.
|
||||
var zeroTicks chan time.Time
|
||||
|
||||
func init() {
|
||||
zeroTicks = make(chan time.Time)
|
||||
close(zeroTicks)
|
||||
}
|
||||
|
||||
// Lex reads *l.bufSize bytes from src and writes them to dst every d seconds.
|
||||
func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, d time.Duration) error {
|
||||
if l.bufSize == nil {
|
||||
return fmt.Errorf("buffer size has not been set")
|
||||
}
|
||||
bufSize := *l.bufSize
|
||||
if bufSize <= 0 {
|
||||
return fmt.Errorf("invalid buffer size: %v", bufSize)
|
||||
}
|
||||
if d < 0 {
|
||||
return fmt.Errorf("invalid delay: %v", d)
|
||||
}
|
||||
|
||||
var ticker *time.Ticker
|
||||
if d == 0 {
|
||||
ticker = &time.Ticker{C: zeroTicks}
|
||||
} else {
|
||||
ticker = time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
}
|
||||
|
||||
buf := make([]byte, bufSize)
|
||||
for {
|
||||
<-ticker.C
|
||||
off, err := src.Read(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = dst.Write(buf[:off])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
NAME
|
||||
lex_test.go
|
||||
|
||||
AUTHOR
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||
|
||||
It is free software: you can redistribute it and/or modify them
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
It is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||
*/
|
||||
|
||||
package codecutil
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
var lexTests = []struct {
|
||||
data []byte
|
||||
t time.Duration
|
||||
n int
|
||||
isValid bool // Whether or not this test should fail.
|
||||
}{
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, true},
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, true},
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 2, true},
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 1, true},
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Nanosecond, 0, false},
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, -1, false},
|
||||
{[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, true},
|
||||
}
|
||||
|
||||
func TestByteLexer(t *testing.T) {
|
||||
for i, tt := range lexTests {
|
||||
t.Run(strconv.Itoa(i), func(t *testing.T) {
|
||||
dst := bytes.NewBuffer([]byte{})
|
||||
l := NewByteLexer(&tt.n)
|
||||
err := l.Lex(dst, bytes.NewReader(tt.data), tt.t)
|
||||
if err != nil && err != io.EOF {
|
||||
if tt.isValid {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
} else if !bytes.Equal(dst.Bytes(), tt.data) {
|
||||
t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
NAME
|
||||
list.go
|
||||
|
||||
AUTHOR
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||
|
||||
It is free software: you can redistribute it and/or modify them
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
It is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||
*/
|
||||
|
||||
package codecutil
|
||||
|
||||
// numCodecs is the number of entries in the list of codecs.
|
||||
const numCodecs = 5
|
||||
|
||||
// A global list containing all available codecs for reference in any application.
|
||||
// When adding or removing a codec from this list, the numCodecs const must be updated.
|
||||
const (
|
||||
PCM = iota
|
||||
ADPCM
|
||||
H264
|
||||
H265
|
||||
MJPEG
|
||||
)
|
||||
|
||||
// IsValid recieves an int representing a codec and checks if it is valid.
|
||||
func IsValid(codec uint8) bool {
|
||||
return 0 <= codec && codec < numCodecs
|
||||
}
|
|
@ -35,14 +35,14 @@ func YOffset(yRefMin16, refMbH int) int {
|
|||
}
|
||||
func MbWidthC(sps *SPS) int {
|
||||
mbWidthC := 16 / SubWidthC(sps)
|
||||
if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane {
|
||||
if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag {
|
||||
mbWidthC = 0
|
||||
}
|
||||
return mbWidthC
|
||||
}
|
||||
func MbHeightC(sps *SPS) int {
|
||||
mbHeightC := 16 / SubHeightC(sps)
|
||||
if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane {
|
||||
if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag {
|
||||
mbHeightC = 0
|
||||
}
|
||||
return mbHeightC
|
||||
|
|
|
@ -64,11 +64,11 @@ func (r fieldReader) readBits(n int) uint64 {
|
|||
// Exp-Golomb-coded element using method as specified in section 9.1 of ITU-T
|
||||
// H.264 and return as an int. The read does not happen if the fieldReader
|
||||
// has a non-nil error.
|
||||
func (r fieldReader) readUe() int {
|
||||
func (r fieldReader) readUe() uint64 {
|
||||
if r.e != nil {
|
||||
return 0
|
||||
}
|
||||
var i int
|
||||
var i uint64
|
||||
i, r.e = readUe(r.br)
|
||||
return i
|
||||
}
|
||||
|
@ -77,11 +77,11 @@ func (r fieldReader) readUe() int {
|
|||
// Exp-Golomb-coded syntax element using method as specified in section 9.1
|
||||
// and returns as an int. The read does not happen if the fieldReader
|
||||
// has a non-nil error.
|
||||
func (r fieldReader) readTe(x uint) int {
|
||||
func (r fieldReader) readTe(x uint) int64 {
|
||||
if r.e != nil {
|
||||
return 0
|
||||
}
|
||||
var i int
|
||||
var i int64
|
||||
i, r.e = readTe(r.br, x)
|
||||
return i
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ func (r fieldReader) err() error {
|
|||
//
|
||||
// TODO: this should return uint, but rest of code needs to be changed for this
|
||||
// to happen.
|
||||
func readUe(r *bits.BitReader) (int, error) {
|
||||
func readUe(r *bits.BitReader) (uint64, error) {
|
||||
nZeros := -1
|
||||
var err error
|
||||
for b := uint64(0); b == 0; nZeros++ {
|
||||
|
@ -135,7 +135,7 @@ func readUe(r *bits.BitReader) (int, error) {
|
|||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil
|
||||
return uint64(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil
|
||||
}
|
||||
|
||||
// readTe parses a syntax element of te(v) descriptor i.e, truncated
|
||||
|
@ -143,9 +143,10 @@ func readUe(r *bits.BitReader) (int, error) {
|
|||
// Rec. ITU-T H.264 (04/2017).
|
||||
//
|
||||
// TODO: this should also return uint.
|
||||
func readTe(r *bits.BitReader, x uint) (int, error) {
|
||||
func readTe(r *bits.BitReader, x uint) (int64, error) {
|
||||
if x > 1 {
|
||||
return readUe(r)
|
||||
ue, err := readUe(r)
|
||||
return int64(ue), err
|
||||
}
|
||||
|
||||
if x == 1 {
|
||||
|
@ -181,7 +182,7 @@ func readSe(r *bits.BitReader) (int, error) {
|
|||
// in Rec. ITU-T H.264 (04/2017).
|
||||
func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, error) {
|
||||
// Indexes to codedBlockPattern map.
|
||||
var i1, i2, i3 int
|
||||
var i1, i2, i3 uint64
|
||||
|
||||
// ChromaArrayType selects first index.
|
||||
switch chromaArrayType {
|
||||
|
@ -200,7 +201,7 @@ func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint,
|
|||
}
|
||||
|
||||
// Need to check that we won't go out of bounds with this index.
|
||||
if i2 >= len(codedBlockPattern[i1]) {
|
||||
if int(i2) >= len(codedBlockPattern[i1]) {
|
||||
return 0, errInvalidCodeNum
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ import (
|
|||
"math"
|
||||
|
||||
"bitbucket.org/ausocean/av/codec/h264/h264dec/bits"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// import "strings"
|
||||
|
@ -42,152 +41,54 @@ type PPS struct {
|
|||
|
||||
func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) {
|
||||
pps := PPS{}
|
||||
var err error
|
||||
r := newFieldReader(br)
|
||||
|
||||
pps.ID, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse ID")
|
||||
}
|
||||
|
||||
pps.SPSID, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse SPS ID")
|
||||
}
|
||||
|
||||
b, err := br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read EntropyCodingMode")
|
||||
}
|
||||
pps.EntropyCodingMode = int(b)
|
||||
|
||||
b, err = br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read BottomFieldPicOrderInFramePresent")
|
||||
}
|
||||
pps.BottomFieldPicOrderInFramePresent = b == 1
|
||||
|
||||
pps.NumSliceGroupsMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse NumSliceGroupsMinus1")
|
||||
}
|
||||
pps.ID = int(r.readUe())
|
||||
pps.SPSID = int(r.readUe())
|
||||
pps.EntropyCodingMode = int(r.readBits(1))
|
||||
pps.BottomFieldPicOrderInFramePresent = r.readBits(1) == 1
|
||||
pps.NumSliceGroupsMinus1 = int(r.readUe())
|
||||
|
||||
if pps.NumSliceGroupsMinus1 > 0 {
|
||||
pps.SliceGroupMapType, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse SliceGroupMapType")
|
||||
}
|
||||
pps.SliceGroupMapType = int(r.readUe())
|
||||
|
||||
if pps.SliceGroupMapType == 0 {
|
||||
for iGroup := 0; iGroup <= pps.NumSliceGroupsMinus1; iGroup++ {
|
||||
b, err := readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse RunLengthMinus1")
|
||||
}
|
||||
pps.RunLengthMinus1 = append(pps.RunLengthMinus1, b)
|
||||
pps.RunLengthMinus1 = append(pps.RunLengthMinus1, int(r.readUe()))
|
||||
}
|
||||
} else if pps.SliceGroupMapType == 2 {
|
||||
for iGroup := 0; iGroup < pps.NumSliceGroupsMinus1; iGroup++ {
|
||||
pps.TopLeft[iGroup], err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]")
|
||||
}
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]")
|
||||
}
|
||||
|
||||
pps.BottomRight[iGroup], err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse BottomRight[iGroup]")
|
||||
}
|
||||
pps.TopLeft[iGroup] = int(r.readUe())
|
||||
pps.BottomRight[iGroup] = int(r.readUe())
|
||||
}
|
||||
} else if pps.SliceGroupMapType > 2 && pps.SliceGroupMapType < 6 {
|
||||
b, err = br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read SliceGroupChangeDirection")
|
||||
}
|
||||
pps.SliceGroupChangeDirection = b == 1
|
||||
|
||||
pps.SliceGroupChangeRateMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse SliceGroupChangeRateMinus1")
|
||||
}
|
||||
pps.SliceGroupChangeDirection = r.readBits(1) == 1
|
||||
pps.SliceGroupChangeRateMinus1 = int(r.readUe())
|
||||
} else if pps.SliceGroupMapType == 6 {
|
||||
pps.PicSizeInMapUnitsMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse PicSizeInMapUnitsMinus1")
|
||||
}
|
||||
pps.PicSizeInMapUnitsMinus1 = int(r.readUe())
|
||||
|
||||
for i := 0; i <= pps.PicSizeInMapUnitsMinus1; i++ {
|
||||
b, err = br.ReadBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1)))))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "coult not read SliceGroupId")
|
||||
}
|
||||
pps.SliceGroupId[i] = int(b)
|
||||
pps.SliceGroupId[i] = int(r.readBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1))))))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
pps.NumRefIdxL0DefaultActiveMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.New("could not parse NumRefIdxL0DefaultActiveMinus1")
|
||||
}
|
||||
|
||||
pps.NumRefIdxL1DefaultActiveMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.New("could not parse NumRefIdxL1DefaultActiveMinus1")
|
||||
}
|
||||
|
||||
b, err = br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read WeightedPred")
|
||||
}
|
||||
pps.WeightedPred = b == 1
|
||||
|
||||
b, err = br.ReadBits(2)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read WeightedBipred")
|
||||
}
|
||||
pps.WeightedBipred = int(b)
|
||||
|
||||
pps.PicInitQpMinus26, err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.New("could not parse PicInitQpMinus26")
|
||||
}
|
||||
|
||||
pps.PicInitQsMinus26, err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.New("could not parse PicInitQsMinus26")
|
||||
}
|
||||
|
||||
pps.ChromaQpIndexOffset, err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.New("could not parse ChromaQpIndexOffset")
|
||||
}
|
||||
|
||||
err = readFlags(br, []flag{
|
||||
{&pps.DeblockingFilterControlPresent, "DeblockingFilterControlPresent"},
|
||||
{&pps.ConstrainedIntraPred, "ConstrainedIntraPred"},
|
||||
{&pps.RedundantPicCntPresent, "RedundantPicCntPresent"},
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pps.NumRefIdxL0DefaultActiveMinus1 = int(r.readUe())
|
||||
pps.NumRefIdxL1DefaultActiveMinus1 = int(r.readUe())
|
||||
pps.WeightedPred = r.readBits(1) == 1
|
||||
pps.WeightedBipred = int(r.readBits(2))
|
||||
pps.PicInitQpMinus26 = int(r.readSe())
|
||||
pps.PicInitQsMinus26 = int(r.readSe())
|
||||
pps.ChromaQpIndexOffset = int(r.readSe())
|
||||
pps.DeblockingFilterControlPresent = r.readBits(1) == 1
|
||||
pps.ConstrainedIntraPred = r.readBits(1) == 1
|
||||
pps.RedundantPicCntPresent = r.readBits(1) == 1
|
||||
|
||||
logger.Printf("debug: \tChecking for more PPS data")
|
||||
if moreRBSPData(br) {
|
||||
logger.Printf("debug: \tProcessing additional PPS data")
|
||||
|
||||
b, err = br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read Transform8x8Mode")
|
||||
}
|
||||
pps.Transform8x8Mode = int(b)
|
||||
|
||||
b, err = br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read PicScalingMatrixPresent")
|
||||
}
|
||||
pps.PicScalingMatrixPresent = b == 1
|
||||
pps.Transform8x8Mode = int(r.readBits(1))
|
||||
pps.PicScalingMatrixPresent = r.readBits(1) == 1
|
||||
|
||||
if pps.PicScalingMatrixPresent {
|
||||
v := 6
|
||||
|
@ -195,11 +96,7 @@ func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) {
|
|||
v = 2
|
||||
}
|
||||
for i := 0; i < 6+(v*pps.Transform8x8Mode); i++ {
|
||||
b, err = br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read PicScalingListPresent")
|
||||
}
|
||||
pps.PicScalingListPresent[i] = b == 1
|
||||
pps.PicScalingListPresent[i] = r.readBits(1) == 1
|
||||
if pps.PicScalingListPresent[i] {
|
||||
if i < 6 {
|
||||
scalingList(
|
||||
|
@ -219,11 +116,9 @@ func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) {
|
|||
}
|
||||
}
|
||||
}
|
||||
pps.SecondChromaQpIndexOffset, err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.New("could not parse SecondChromaQpIndexOffset")
|
||||
}
|
||||
}
|
||||
pps.SecondChromaQpIndexOffset = r.readSe()
|
||||
moreRBSPData(br)
|
||||
// rbspTrailingBits()
|
||||
}
|
||||
return &pps, nil
|
||||
}
|
||||
|
|
|
@ -71,7 +71,8 @@ func (h *H264Reader) Start() {
|
|||
case naluTypePPS:
|
||||
videoStream := h.VideoStreams[len(h.VideoStreams)-1]
|
||||
// TODO: handle this error
|
||||
videoStream.PPS, _ = NewPPS(nil, videoStream.SPS.ChromaFormat)
|
||||
// TODO: fix chromaFormat
|
||||
videoStream.PPS, _ = NewPPS(nil, 0)
|
||||
case naluTypeSliceIDRPicture:
|
||||
fallthrough
|
||||
case naluTypeSliceNonIDRPicture:
|
||||
|
|
|
@ -81,12 +81,12 @@ func NewRefPicListModification(br *bits.BitReader, p *PPS, s *SliceHeader) (*Ref
|
|||
|
||||
if r.RefPicListModificationFlag[0] {
|
||||
for i := 0; ; i++ {
|
||||
r.ModificationOfPicNums[0][i] = fr.readUe()
|
||||
r.ModificationOfPicNums[0][i] = int(fr.readUe())
|
||||
|
||||
if r.ModificationOfPicNums[0][i] == 0 || r.ModificationOfPicNums[0][i] == 1 {
|
||||
r.AbsDiffPicNumMinus1[0][i] = fr.readUe()
|
||||
r.AbsDiffPicNumMinus1[0][i] = int(fr.readUe())
|
||||
} else if r.ModificationOfPicNums[0][i] == 2 {
|
||||
r.LongTermPicNum[0][i] = fr.readUe()
|
||||
r.LongTermPicNum[0][i] = int(fr.readUe())
|
||||
}
|
||||
|
||||
if r.ModificationOfPicNums[0][i] == 3 {
|
||||
|
@ -101,12 +101,12 @@ func NewRefPicListModification(br *bits.BitReader, p *PPS, s *SliceHeader) (*Ref
|
|||
|
||||
if r.RefPicListModificationFlag[1] {
|
||||
for i := 0; ; i++ {
|
||||
r.ModificationOfPicNums[1][i] = fr.readUe()
|
||||
r.ModificationOfPicNums[1][i] = int(fr.readUe())
|
||||
|
||||
if r.ModificationOfPicNums[1][i] == 0 || r.ModificationOfPicNums[1][i] == 1 {
|
||||
r.AbsDiffPicNumMinus1[1][i] = fr.readUe()
|
||||
r.AbsDiffPicNumMinus1[1][i] = int(fr.readUe())
|
||||
} else if r.ModificationOfPicNums[1][i] == 2 {
|
||||
r.LongTermPicNum[1][i] = fr.readUe()
|
||||
r.LongTermPicNum[1][i] = int(fr.readUe())
|
||||
}
|
||||
|
||||
if r.ModificationOfPicNums[1][i] == 3 {
|
||||
|
@ -142,25 +142,15 @@ type PredWeightTable struct {
|
|||
// PredWeightTable.
|
||||
func NewPredWeightTable(br *bits.BitReader, h *SliceHeader, chromaArrayType int) (*PredWeightTable, error) {
|
||||
p := &PredWeightTable{}
|
||||
var err error
|
||||
r := newFieldReader(br)
|
||||
|
||||
p.LumaLog2WeightDenom, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse LumaLog2WeightDenom")
|
||||
}
|
||||
p.LumaLog2WeightDenom = int(r.readUe())
|
||||
|
||||
if chromaArrayType != 0 {
|
||||
p.ChromaLog2WeightDenom, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse ChromaLog2WeightDenom")
|
||||
}
|
||||
p.ChromaLog2WeightDenom = int(r.readUe())
|
||||
}
|
||||
for i := 0; i <= h.NumRefIdxL0ActiveMinus1; i++ {
|
||||
b, err := br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read LumaWeightL0Flag")
|
||||
}
|
||||
p.LumaWeightL0Flag = b == 1
|
||||
p.LumaWeightL0Flag = r.readBits(1) == 1
|
||||
|
||||
if p.LumaWeightL0Flag {
|
||||
se, err := readSe(br)
|
||||
|
@ -274,6 +264,7 @@ type drpmElement struct {
|
|||
// DecRefPicMarking.
|
||||
func NewDecRefPicMarking(br *bits.BitReader, idrPic bool) (*DecRefPicMarking, error) {
|
||||
d := &DecRefPicMarking{}
|
||||
r := newFieldReader(br)
|
||||
if idrPic {
|
||||
b, err := br.ReadBits(1)
|
||||
if err != nil {
|
||||
|
@ -295,36 +286,21 @@ func NewDecRefPicMarking(br *bits.BitReader, idrPic bool) (*DecRefPicMarking, er
|
|||
|
||||
if d.AdaptiveRefPicMarkingModeFlag {
|
||||
for i := 0; ; i++ {
|
||||
d.elements = append(d.elements,drpmElement{})
|
||||
d.elements = append(d.elements, drpmElement{})
|
||||
|
||||
d.elements[i].MemoryManagementControlOperation, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation")
|
||||
}
|
||||
d.elements[i].MemoryManagementControlOperation = int(r.readUe())
|
||||
|
||||
if d.elements[i].MemoryManagementControlOperation == 1 || d.elements[i].MemoryManagementControlOperation == 3 {
|
||||
d.elements[i].DifferenceOfPicNumsMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation")
|
||||
}
|
||||
d.elements[i].DifferenceOfPicNumsMinus1 = int(r.readUe())
|
||||
}
|
||||
if d.elements[i].MemoryManagementControlOperation == 2 {
|
||||
d.elements[i].LongTermPicNum, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse LongTermPicNum")
|
||||
}
|
||||
d.elements[i].LongTermPicNum = int(r.readUe())
|
||||
}
|
||||
if d.elements[i].MemoryManagementControlOperation == 3 || d.elements[i].MemoryManagementControlOperation == 6 {
|
||||
d.elements[i].LongTermFrameIdx, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse LongTermFrameIdx")
|
||||
}
|
||||
d.elements[i].LongTermFrameIdx = int(r.readUe())
|
||||
}
|
||||
if d.elements[i].MemoryManagementControlOperation == 4 {
|
||||
d.elements[i].MaxLongTermFrameIdxPlus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse MaxLongTermFrameIdxPlus1")
|
||||
}
|
||||
d.elements[i].MaxLongTermFrameIdxPlus1 = int(r.readUe())
|
||||
}
|
||||
|
||||
if d.elements[i].MemoryManagementControlOperation == 0 {
|
||||
|
@ -425,13 +401,13 @@ func (d SliceData) ae(v int) int {
|
|||
// 8.2.2
|
||||
func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int {
|
||||
mbaffFrameFlag := 0
|
||||
if sps.MBAdaptiveFrameField && !header.FieldPic {
|
||||
if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic {
|
||||
mbaffFrameFlag = 1
|
||||
}
|
||||
mapUnitToSliceGroupMap := MapUnitToSliceGroupMap(sps, pps, header)
|
||||
mbToSliceGroupMap := []int{}
|
||||
for i := 0; i <= PicSizeInMbs(sps, header)-1; i++ {
|
||||
if sps.FrameMbsOnly || header.FieldPic {
|
||||
if sps.FrameMBSOnlyFlag || header.FieldPic {
|
||||
mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i])
|
||||
continue
|
||||
}
|
||||
|
@ -439,7 +415,7 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int {
|
|||
mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i/2])
|
||||
continue
|
||||
}
|
||||
if !sps.FrameMbsOnly && !sps.MBAdaptiveFrameField && !header.FieldPic {
|
||||
if !sps.FrameMBSOnlyFlag && !sps.MBAdaptiveFrameFieldFlag && !header.FieldPic {
|
||||
mbToSliceGroupMap = append(
|
||||
mbToSliceGroupMap,
|
||||
mapUnitToSliceGroupMap[(i/(2*PicWidthInMbs(sps)))*PicWidthInMbs(sps)+(i%PicWidthInMbs(sps))])
|
||||
|
@ -449,34 +425,34 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int {
|
|||
|
||||
}
|
||||
func PicWidthInMbs(sps *SPS) int {
|
||||
return sps.PicWidthInMbsMinus1 + 1
|
||||
return int(sps.PicWidthInMBSMinus1 + 1)
|
||||
}
|
||||
func PicHeightInMapUnits(sps *SPS) int {
|
||||
return sps.PicHeightInMapUnitsMinus1 + 1
|
||||
return int(sps.PicHeightInMapUnitsMinus1 + 1)
|
||||
}
|
||||
func PicSizeInMapUnits(sps *SPS) int {
|
||||
return PicWidthInMbs(sps) * PicHeightInMapUnits(sps)
|
||||
return int(PicWidthInMbs(sps) * PicHeightInMapUnits(sps))
|
||||
}
|
||||
func FrameHeightInMbs(sps *SPS) int {
|
||||
return (2 - flagVal(sps.FrameMbsOnly)) * PicHeightInMapUnits(sps)
|
||||
return int((2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps))
|
||||
}
|
||||
func PicHeightInMbs(sps *SPS, header *SliceHeader) int {
|
||||
return FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic))
|
||||
return int(FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic)))
|
||||
}
|
||||
func PicSizeInMbs(sps *SPS, header *SliceHeader) int {
|
||||
return PicWidthInMbs(sps) * PicHeightInMbs(sps, header)
|
||||
return int(PicWidthInMbs(sps) * PicHeightInMbs(sps, header))
|
||||
}
|
||||
|
||||
// table 6-1
|
||||
func SubWidthC(sps *SPS) int {
|
||||
n := 17
|
||||
if sps.UseSeparateColorPlane {
|
||||
if sps.ChromaFormat == chroma444 {
|
||||
if sps.SeparateColorPlaneFlag {
|
||||
if sps.ChromaFormatIDC == chroma444 {
|
||||
return n
|
||||
}
|
||||
}
|
||||
|
||||
switch sps.ChromaFormat {
|
||||
switch sps.ChromaFormatIDC {
|
||||
case chromaMonochrome:
|
||||
return n
|
||||
case chroma420:
|
||||
|
@ -491,12 +467,12 @@ func SubWidthC(sps *SPS) int {
|
|||
}
|
||||
func SubHeightC(sps *SPS) int {
|
||||
n := 17
|
||||
if sps.UseSeparateColorPlane {
|
||||
if sps.ChromaFormat == chroma444 {
|
||||
if sps.SeparateColorPlaneFlag {
|
||||
if sps.ChromaFormatIDC == chroma444 {
|
||||
return n
|
||||
}
|
||||
}
|
||||
switch sps.ChromaFormat {
|
||||
switch sps.ChromaFormatIDC {
|
||||
case chromaMonochrome:
|
||||
return n
|
||||
case chroma420:
|
||||
|
@ -543,6 +519,8 @@ func NumMbPart(nalUnit *NALUnit, sps *SPS, header *SliceHeader, data *SliceData)
|
|||
|
||||
func MbPred(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error {
|
||||
var cabac *CABAC
|
||||
r := newFieldReader(br)
|
||||
|
||||
sliceType := sliceTypeMap[sliceContext.Slice.Header.SliceType]
|
||||
mbPartPredMode, err := MbPartPredMode(sliceContext.Slice.Data, sliceType, sliceContext.Slice.Data.MbType, 0)
|
||||
if err != nil {
|
||||
|
@ -652,11 +630,7 @@ func MbPred(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader,
|
|||
|
||||
logger.Printf("TODO: ae for IntraChromaPredMode\n")
|
||||
} else {
|
||||
var err error
|
||||
sliceContext.Slice.Data.IntraChromaPredMode, err = readUe(br)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "could not parse IntraChromaPredMode")
|
||||
}
|
||||
sliceContext.Slice.Data.IntraChromaPredMode = int(r.readUe())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -685,14 +659,10 @@ func MbPred(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader,
|
|||
// TODO: Only one reference picture is used for inter-prediction,
|
||||
// then the value should be 0
|
||||
if MbaffFrameFlag(sliceContext.SPS, sliceContext.Slice.Header) == 0 || !sliceContext.Slice.Data.MbFieldDecodingFlag {
|
||||
sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe(
|
||||
br,
|
||||
uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1))
|
||||
sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1)))
|
||||
} else {
|
||||
rangeMax := 2*sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1 + 1
|
||||
sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe(
|
||||
br,
|
||||
uint(rangeMax))
|
||||
sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(rangeMax)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -858,19 +828,19 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int {
|
|||
i := n + 1
|
||||
// picSizeInMbs is the number of macroblocks in picture 0
|
||||
// 7-13
|
||||
// PicWidthInMbs = sps.PicWidthInMbsMinus1 + 1
|
||||
// PicWidthInMbs = sps.PicWidthInMBSMinus1 + 1
|
||||
// PicHeightInMapUnits = sps.PicHeightInMapUnitsMinus1 + 1
|
||||
// 7-29
|
||||
// picSizeInMbs = PicWidthInMbs * PicHeightInMbs
|
||||
// 7-26
|
||||
// PicHeightInMbs = FrameHeightInMbs / (1 + header.fieldPicFlag)
|
||||
// 7-18
|
||||
// FrameHeightInMbs = (2 - ps.FrameMbsOnly) * PicHeightInMapUnits
|
||||
picWidthInMbs := sps.PicWidthInMbsMinus1 + 1
|
||||
// FrameHeightInMbs = (2 - ps.FrameMBSOnlyFlag) * PicHeightInMapUnits
|
||||
picWidthInMbs := sps.PicWidthInMBSMinus1 + 1
|
||||
picHeightInMapUnits := sps.PicHeightInMapUnitsMinus1 + 1
|
||||
frameHeightInMbs := (2 - flagVal(sps.FrameMbsOnly)) * picHeightInMapUnits
|
||||
frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * int(picHeightInMapUnits)
|
||||
picHeightInMbs := frameHeightInMbs / (1 + flagVal(header.FieldPic))
|
||||
picSizeInMbs := picWidthInMbs * picHeightInMbs
|
||||
picSizeInMbs := int(picWidthInMbs) * picHeightInMbs
|
||||
mbToSliceGroupMap := MbToSliceGroupMap(sps, pps, header)
|
||||
for i < picSizeInMbs && mbToSliceGroupMap[i] != mbToSliceGroupMap[i] {
|
||||
i++
|
||||
|
@ -880,7 +850,7 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int {
|
|||
|
||||
func CurrMbAddr(sps *SPS, header *SliceHeader) int {
|
||||
mbaffFrameFlag := 0
|
||||
if sps.MBAdaptiveFrameField && !header.FieldPic {
|
||||
if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic {
|
||||
mbaffFrameFlag = 1
|
||||
}
|
||||
|
||||
|
@ -888,15 +858,15 @@ func CurrMbAddr(sps *SPS, header *SliceHeader) int {
|
|||
}
|
||||
|
||||
func MbaffFrameFlag(sps *SPS, header *SliceHeader) int {
|
||||
if sps.MBAdaptiveFrameField && !header.FieldPic {
|
||||
if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader) (*SliceData, error) {
|
||||
r := newFieldReader(br)
|
||||
var cabac *CABAC
|
||||
var err error
|
||||
sliceContext.Slice.Data = &SliceData{BitReader: br}
|
||||
// TODO: Why is this being initialized here?
|
||||
// initCabac(sliceContext)
|
||||
|
@ -910,7 +880,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
}
|
||||
}
|
||||
mbaffFrameFlag := 0
|
||||
if sliceContext.SPS.MBAdaptiveFrameField && !sliceContext.Slice.Header.FieldPic {
|
||||
if sliceContext.SPS.MBAdaptiveFrameFieldFlag && !sliceContext.Slice.Header.FieldPic {
|
||||
mbaffFrameFlag = 1
|
||||
}
|
||||
currMbAddr := sliceContext.Slice.Header.FirstMbInSlice * (1 * mbaffFrameFlag)
|
||||
|
@ -925,10 +895,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
if sliceContext.Slice.Data.SliceTypeName != "I" && sliceContext.Slice.Data.SliceTypeName != "SI" {
|
||||
logger.Printf("debug: \tNonI/SI slice, processing moreData\n")
|
||||
if sliceContext.PPS.EntropyCodingMode == 0 {
|
||||
sliceContext.Slice.Data.MbSkipRun, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse MbSkipRun")
|
||||
}
|
||||
sliceContext.Slice.Data.MbSkipRun = int(r.readUe())
|
||||
|
||||
if sliceContext.Slice.Data.MbSkipRun > 0 {
|
||||
prevMbSkipped = 1
|
||||
|
@ -1042,10 +1009,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
|
||||
logger.Printf("TODO: ae for MBType\n")
|
||||
} else {
|
||||
sliceContext.Slice.Data.MbType, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse MbType")
|
||||
}
|
||||
sliceContext.Slice.Data.MbType = int(r.readUe())
|
||||
}
|
||||
if sliceContext.Slice.Data.MbTypeName == "I_PCM" {
|
||||
for !br.ByteAligned() {
|
||||
|
@ -1057,7 +1021,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
// 7-3 p95
|
||||
bitDepthY := 8 + sliceContext.SPS.BitDepthLumaMinus8
|
||||
for i := 0; i < 256; i++ {
|
||||
s, err := br.ReadBits(bitDepthY)
|
||||
s, err := br.ReadBits(int(bitDepthY))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleLuma[%d]", i))
|
||||
}
|
||||
|
@ -1071,14 +1035,14 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
mbWidthC := 16 / SubWidthC(sliceContext.SPS)
|
||||
mbHeightC := 16 / SubHeightC(sliceContext.SPS)
|
||||
// if monochrome
|
||||
if sliceContext.SPS.ChromaFormat == chromaMonochrome || sliceContext.SPS.UseSeparateColorPlane {
|
||||
if sliceContext.SPS.ChromaFormatIDC == chromaMonochrome || sliceContext.SPS.SeparateColorPlaneFlag {
|
||||
mbWidthC = 0
|
||||
mbHeightC = 0
|
||||
}
|
||||
|
||||
bitDepthC := 8 + sliceContext.SPS.BitDepthChromaMinus8
|
||||
for i := 0; i < 2*mbWidthC*mbHeightC; i++ {
|
||||
s, err := br.ReadBits(bitDepthC)
|
||||
s, err := br.ReadBits(int(bitDepthC))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleChroma[%d]", i))
|
||||
}
|
||||
|
@ -1104,7 +1068,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
if NumbSubMbPart(subMbType[mbPartIdx]) > 1 {
|
||||
noSubMbPartSizeLessThan8x8Flag = 0
|
||||
}
|
||||
} else if !sliceContext.SPS.Direct8x8Inference {
|
||||
} else if !sliceContext.SPS.Direct8x8InferenceFlag {
|
||||
noSubMbPartSizeLessThan8x8Flag = 0
|
||||
}
|
||||
}
|
||||
|
@ -1156,7 +1120,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR
|
|||
}
|
||||
|
||||
// sliceContext.Slice.Data.CodedBlockPattern = me(v) | ae(v)
|
||||
if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8Inference) {
|
||||
if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8InferenceFlag) {
|
||||
// TODO: 1 bit or ae(v)
|
||||
if sliceContext.PPS.EntropyCodingMode == 1 {
|
||||
binarization := NewBinarization("Transform8x8Flag", sliceContext.Slice.Data)
|
||||
|
@ -1236,31 +1200,21 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
idrPic = true
|
||||
}
|
||||
header := SliceHeader{}
|
||||
if sps.UseSeparateColorPlane {
|
||||
if sps.SeparateColorPlaneFlag {
|
||||
vid.ChromaArrayType = 0
|
||||
} else {
|
||||
vid.ChromaArrayType = sps.ChromaFormat
|
||||
vid.ChromaArrayType = int(sps.ChromaFormatIDC)
|
||||
}
|
||||
br := bits.NewBitReader(bytes.NewReader(rbsp))
|
||||
r := newFieldReader(br)
|
||||
|
||||
header.FirstMbInSlice, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse FirstMbInSlice")
|
||||
}
|
||||
|
||||
header.SliceType, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse SliceType")
|
||||
}
|
||||
header.FirstMbInSlice = int(r.readUe())
|
||||
header.SliceType = int(r.readUe())
|
||||
|
||||
sliceType := sliceTypeMap[header.SliceType]
|
||||
logger.Printf("debug: %s (%s) slice of %d bytes\n", NALUnitType[int(nalUnit.Type)], sliceType, len(rbsp))
|
||||
header.PPSID, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse PPSID")
|
||||
}
|
||||
|
||||
if sps.UseSeparateColorPlane {
|
||||
header.PPSID = int(r.readUe())
|
||||
if sps.SeparateColorPlaneFlag {
|
||||
b, err := br.ReadBits(2)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read ColorPlaneID")
|
||||
|
@ -1269,7 +1223,7 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
}
|
||||
// TODO: See 7.4.3
|
||||
// header.FrameNum = b.NextField("FrameNum", 0)
|
||||
if !sps.FrameMbsOnly {
|
||||
if !sps.FrameMBSOnlyFlag {
|
||||
b, err := br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read FieldPic")
|
||||
|
@ -1284,13 +1238,10 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
}
|
||||
}
|
||||
if idrPic {
|
||||
header.IDRPicID, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse IDRPicID")
|
||||
}
|
||||
header.IDRPicID = int(r.readUe())
|
||||
}
|
||||
if sps.PicOrderCountType == 0 {
|
||||
b, err := br.ReadBits(sps.Log2MaxPicOrderCntLSBMin4 + 4)
|
||||
b, err := br.ReadBits(int(sps.Log2MaxPicOrderCntLSBMin4 + 4))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read PicOrderCntLsb")
|
||||
}
|
||||
|
@ -1303,7 +1254,7 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
}
|
||||
}
|
||||
}
|
||||
if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZero {
|
||||
if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZeroFlag {
|
||||
header.DeltaPicOrderCnt[0], err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse DeltaPicOrderCnt")
|
||||
|
@ -1317,10 +1268,7 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
}
|
||||
}
|
||||
if pps.RedundantPicCntPresent {
|
||||
header.RedundantPicCnt, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse RedundantPicCnt")
|
||||
}
|
||||
header.RedundantPicCnt = int(r.readUe())
|
||||
}
|
||||
if sliceType == "B" {
|
||||
b, err := br.ReadBits(1)
|
||||
|
@ -1337,15 +1285,9 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
header.NumRefIdxActiveOverride = b == 1
|
||||
|
||||
if header.NumRefIdxActiveOverride {
|
||||
header.NumRefIdxL0ActiveMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse NumRefIdxL0ActiveMinus1")
|
||||
}
|
||||
header.NumRefIdxL0ActiveMinus1 = int(r.readUe())
|
||||
if sliceType == "B" {
|
||||
header.NumRefIdxL1ActiveMinus1, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse NumRefIdxL1ActiveMinus1")
|
||||
}
|
||||
header.NumRefIdxL1ActiveMinus1 = int(r.readUe())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1375,35 +1317,18 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
}
|
||||
}
|
||||
if pps.EntropyCodingMode == 1 && sliceType != "I" && sliceType != "SI" {
|
||||
header.CabacInit, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse CabacInit")
|
||||
}
|
||||
}
|
||||
header.SliceQpDelta, err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse SliceQpDelta")
|
||||
header.CabacInit = int(r.readUe())
|
||||
}
|
||||
header.SliceQpDelta = int(r.readSe())
|
||||
|
||||
if sliceType == "SP" || sliceType == "SI" {
|
||||
if sliceType == "SP" {
|
||||
b, err := br.ReadBits(1)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not read SpForSwitch")
|
||||
}
|
||||
header.SpForSwitch = b == 1
|
||||
}
|
||||
header.SliceQsDelta, err = readSe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse SliceQsDelta")
|
||||
header.SpForSwitch = r.readBits(1) == 1
|
||||
}
|
||||
header.SliceQsDelta = int(r.readSe())
|
||||
}
|
||||
if pps.DeblockingFilterControlPresent {
|
||||
header.DisableDeblockingFilter, err = readUe(br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not parse DisableDeblockingFilter")
|
||||
}
|
||||
|
||||
header.DisableDeblockingFilter = int(r.readUe())
|
||||
if header.DisableDeblockingFilter != 1 {
|
||||
header.SliceAlphaC0OffsetDiv2, err = readSe(br)
|
||||
if err != nil {
|
||||
|
@ -1432,13 +1357,10 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket
|
|||
Header: &header,
|
||||
},
|
||||
}
|
||||
sliceContext.Slice.Data, err = NewSliceData(vid.ChromaArrayType,sliceContext, br)
|
||||
sliceContext.Slice.Data, err = NewSliceData(vid.ChromaArrayType, sliceContext, br)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not create slice data")
|
||||
}
|
||||
if showPacket {
|
||||
debugPacket("debug: Header", sliceContext.Slice.Header)
|
||||
debugPacket("debug: Data", sliceContext.Slice.Data)
|
||||
}
|
||||
|
||||
return sliceContext, nil
|
||||
}
|
||||
|
|
|
@ -22,12 +22,12 @@ var subWidthCTests = []struct {
|
|||
want int
|
||||
}{
|
||||
{SPS{}, 17},
|
||||
{SPS{ChromaFormat: 0}, 17},
|
||||
{SPS{ChromaFormat: 1}, 2},
|
||||
{SPS{ChromaFormat: 2}, 2},
|
||||
{SPS{ChromaFormat: 3}, 1},
|
||||
{SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17},
|
||||
{SPS{ChromaFormat: 999}, 17},
|
||||
{SPS{ChromaFormatIDC: 0}, 17},
|
||||
{SPS{ChromaFormatIDC: 1}, 2},
|
||||
{SPS{ChromaFormatIDC: 2}, 2},
|
||||
{SPS{ChromaFormatIDC: 3}, 1},
|
||||
{SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17},
|
||||
{SPS{ChromaFormatIDC: 999}, 17},
|
||||
}
|
||||
|
||||
// TestSubWidthC tests that the correct SubWidthC is returned given
|
||||
|
@ -45,12 +45,12 @@ var subHeightCTests = []struct {
|
|||
want int
|
||||
}{
|
||||
{SPS{}, 17},
|
||||
{SPS{ChromaFormat: 0}, 17},
|
||||
{SPS{ChromaFormat: 1}, 2},
|
||||
{SPS{ChromaFormat: 2}, 1},
|
||||
{SPS{ChromaFormat: 3}, 1},
|
||||
{SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17},
|
||||
{SPS{ChromaFormat: 999}, 17},
|
||||
{SPS{ChromaFormatIDC: 0}, 17},
|
||||
{SPS{ChromaFormatIDC: 1}, 2},
|
||||
{SPS{ChromaFormatIDC: 2}, 1},
|
||||
{SPS{ChromaFormatIDC: 3}, 1},
|
||||
{SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17},
|
||||
{SPS{ChromaFormatIDC: 999}, 17},
|
||||
}
|
||||
|
||||
// TestSubHeightC tests that the correct SubHeightC is returned given
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -35,20 +35,20 @@ import (
|
|||
"github.com/yobert/alsa"
|
||||
)
|
||||
|
||||
// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm.
|
||||
// If an error occurs, an error will be returned along with the original b's data.
|
||||
// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data.
|
||||
// Notes:
|
||||
// - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur.
|
||||
// - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will
|
||||
// not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000.
|
||||
func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
||||
fromRate := b.Format.Rate
|
||||
if fromRate == rate {
|
||||
return b.Data, nil
|
||||
} else if fromRate < 0 {
|
||||
return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate)
|
||||
} else if rate < 0 {
|
||||
return nil, fmt.Errorf("Unable to convert to: %v Hz", rate)
|
||||
func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) {
|
||||
if b.Format.Rate == rate {
|
||||
return b, nil
|
||||
}
|
||||
if b.Format.Rate < 0 {
|
||||
return alsa.Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate)
|
||||
}
|
||||
if rate < 0 {
|
||||
return alsa.Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate)
|
||||
}
|
||||
|
||||
// The number of bytes in a sample.
|
||||
|
@ -59,22 +59,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
|||
case alsa.S16_LE:
|
||||
sampleLen = 2 * b.Format.Channels
|
||||
default:
|
||||
return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
|
||||
return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat)
|
||||
}
|
||||
inPcmLen := len(b.Data)
|
||||
|
||||
// Calculate sample rate ratio ratioFrom:ratioTo.
|
||||
rateGcd := gcd(rate, fromRate)
|
||||
ratioFrom := fromRate / rateGcd
|
||||
rateGcd := gcd(rate, b.Format.Rate)
|
||||
ratioFrom := b.Format.Rate / rateGcd
|
||||
ratioTo := rate / rateGcd
|
||||
|
||||
// ratioTo = 1 is the only number that will result in an even sampling.
|
||||
if ratioTo != 1 {
|
||||
return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
|
||||
return alsa.Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo)
|
||||
}
|
||||
|
||||
newLen := inPcmLen / ratioFrom
|
||||
result := make([]byte, 0, newLen)
|
||||
resampled := make([]byte, 0, newLen)
|
||||
|
||||
// For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them
|
||||
// up and average them. The result is the new sample.
|
||||
|
@ -96,19 +96,28 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) {
|
|||
case alsa.S16_LE:
|
||||
binary.LittleEndian.PutUint16(bAvg, uint16(avg))
|
||||
}
|
||||
result = append(result, bAvg...)
|
||||
resampled = append(resampled, bAvg...)
|
||||
}
|
||||
return result, nil
|
||||
|
||||
// Return a new alsa.Buffer with resampled data.
|
||||
return alsa.Buffer{
|
||||
Format: alsa.BufferFormat{
|
||||
Channels: b.Format.Channels,
|
||||
SampleFormat: b.Format.SampleFormat,
|
||||
Rate: rate,
|
||||
},
|
||||
Data: resampled,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// StereoToMono returns raw mono audio data generated from only the left channel from
|
||||
// the given stereo recording (ALSA buffer)
|
||||
// if an error occurs, an error will be returned along with the original stereo data.
|
||||
func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
||||
func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) {
|
||||
if b.Format.Channels == 1 {
|
||||
return b.Data, nil
|
||||
} else if b.Format.Channels != 2 {
|
||||
return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
|
||||
return b, nil
|
||||
}
|
||||
if b.Format.Channels != 2 {
|
||||
return alsa.Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels)
|
||||
}
|
||||
|
||||
var stereoSampleBytes int
|
||||
|
@ -118,7 +127,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
|||
case alsa.S16_LE:
|
||||
stereoSampleBytes = 4
|
||||
default:
|
||||
return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
|
||||
return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat)
|
||||
}
|
||||
|
||||
recLength := len(b.Data)
|
||||
|
@ -134,7 +143,15 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) {
|
|||
}
|
||||
}
|
||||
|
||||
return mono, nil
|
||||
// Return a new alsa.Buffer with resampled data.
|
||||
return alsa.Buffer{
|
||||
Format: alsa.BufferFormat{
|
||||
Channels: 1,
|
||||
SampleFormat: b.Format.SampleFormat,
|
||||
Rate: b.Format.Rate,
|
||||
},
|
||||
Data: mono,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// gcd is used for calculating the greatest common divisor of two positive integers, a and b.
|
||||
|
|
|
@ -71,7 +71,7 @@ func TestResample(t *testing.T) {
|
|||
}
|
||||
|
||||
// Compare result with expected.
|
||||
if !bytes.Equal(resampled, exp) {
|
||||
if !bytes.Equal(resampled.Data, exp) {
|
||||
t.Error("Resampled data does not match expected result.")
|
||||
}
|
||||
}
|
||||
|
@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) {
|
|||
}
|
||||
|
||||
// Compare result with expected.
|
||||
if !bytes.Equal(mono, exp) {
|
||||
if !bytes.Equal(mono.Data, exp) {
|
||||
t.Error("Converted data does not match expected result.")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,11 +89,6 @@ var (
|
|||
)
|
||||
|
||||
const (
|
||||
sdtPid = 17
|
||||
patPid = 0
|
||||
pmtPid = 4096
|
||||
videoPid = 256
|
||||
audioPid = 210
|
||||
H264ID = 27
|
||||
H265ID = 36
|
||||
audioStreamID = 0xc0 // First audio stream ID.
|
||||
|
@ -151,13 +146,13 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder {
|
|||
var sid byte
|
||||
switch mediaType {
|
||||
case EncodeAudio:
|
||||
mPid = audioPid
|
||||
mPid = AudioPid
|
||||
sid = audioStreamID
|
||||
case EncodeH265:
|
||||
mPid = videoPid
|
||||
mPid = VideoPid
|
||||
sid = H265ID
|
||||
case EncodeH264:
|
||||
mPid = videoPid
|
||||
mPid = VideoPid
|
||||
sid = H264ID
|
||||
}
|
||||
|
||||
|
@ -187,8 +182,8 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder {
|
|||
streamID: sid,
|
||||
|
||||
continuity: map[int]byte{
|
||||
patPid: 0,
|
||||
pmtPid: 0,
|
||||
PatPid: 0,
|
||||
PmtPid: 0,
|
||||
mPid: 0,
|
||||
},
|
||||
}
|
||||
|
@ -214,7 +209,7 @@ func (e *Encoder) TimeBasedPsi(b bool, sendCount int) {
|
|||
e.pktCount = e.psiSendCount
|
||||
}
|
||||
|
||||
// Write implements io.Writer. Write takes raw h264 and encodes into MPEG-TS,
|
||||
// Write implements io.Writer. Write takes raw video or audio data and encodes into MPEG-TS,
|
||||
// then sending it to the encoder's io.Writer destination.
|
||||
func (e *Encoder) Write(data []byte) (int, error) {
|
||||
now := time.Now()
|
||||
|
|
|
@ -199,7 +199,7 @@ func TestEncodePcm(t *testing.T) {
|
|||
for i+PacketSize <= len(clip) {
|
||||
|
||||
// Check MTS packet
|
||||
if !(pkt.PID() == audioPid) {
|
||||
if pkt.PID() != AudioPid {
|
||||
i += PacketSize
|
||||
if i+PacketSize <= len(clip) {
|
||||
copy(pkt[:], clip[i:i+PacketSize])
|
||||
|
|
|
@ -47,6 +47,7 @@ const (
|
|||
PatPid = 0
|
||||
PmtPid = 4096
|
||||
VideoPid = 256
|
||||
AudioPid = 210
|
||||
)
|
||||
|
||||
// StreamID is the id of the first stream.
|
||||
|
|
|
@ -82,7 +82,7 @@ func TestGetPTSRange1(t *testing.T) {
|
|||
curTime += interval
|
||||
}
|
||||
|
||||
got, err := GetPTSRange(clip.Bytes(), videoPid)
|
||||
got, err := GetPTSRange(clip.Bytes(), VideoPid)
|
||||
if err != nil {
|
||||
t.Fatalf("did not expect error getting PTS range: %v", err)
|
||||
}
|
||||
|
@ -142,7 +142,7 @@ func writeFrame(b *bytes.Buffer, frame []byte, pts uint64) error {
|
|||
for len(buf) != 0 {
|
||||
pkt := Packet{
|
||||
PUSI: pusi,
|
||||
PID: videoPid,
|
||||
PID: VideoPid,
|
||||
RAI: pusi,
|
||||
CC: 0,
|
||||
AFC: hasAdaptationField | hasPayload,
|
||||
|
|
|
@ -81,9 +81,9 @@ func main() {
|
|||
}
|
||||
|
||||
// Save resampled to file.
|
||||
err = ioutil.WriteFile(outPath, resampled, 0644)
|
||||
err = ioutil.WriteFile(outPath, resampled.Data, 0644)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Println("Encoded and wrote", len(resampled), "bytes to file", outPath)
|
||||
fmt.Println("Encoded and wrote", len(resampled.Data), "bytes to file", outPath)
|
||||
}
|
||||
|
|
|
@ -77,9 +77,9 @@ func main() {
|
|||
}
|
||||
|
||||
// Save mono to file.
|
||||
err = ioutil.WriteFile(outPath, mono, 0644)
|
||||
err = ioutil.WriteFile(outPath, mono.Data, 0644)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath)
|
||||
fmt.Println("Encoded and wrote", len(mono.Data), "bytes to file", outPath)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,464 @@
|
|||
/*
|
||||
NAME
|
||||
audio.go
|
||||
|
||||
AUTHOR
|
||||
Alan Noble <alan@ausocean.org>
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||
|
||||
It is free software: you can redistribute it and/or modify them
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
It is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||
*/
|
||||
|
||||
// Package audio provides access to input from audio devices.
|
||||
package audio
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/yobert/alsa"
|
||||
|
||||
"bitbucket.org/ausocean/av/codec/adpcm"
|
||||
"bitbucket.org/ausocean/av/codec/codecutil"
|
||||
"bitbucket.org/ausocean/av/codec/pcm"
|
||||
"bitbucket.org/ausocean/utils/logger"
|
||||
"bitbucket.org/ausocean/utils/ring"
|
||||
)
|
||||
|
||||
const (
|
||||
pkg = "audio: "
|
||||
rbTimeout = 100 * time.Millisecond
|
||||
rbNextTimeout = 100 * time.Millisecond
|
||||
rbLen = 200
|
||||
defaultSampleRate = 48000
|
||||
)
|
||||
|
||||
// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer.
|
||||
// "paused" means the input routine is sleeping until unpaused or stopped.
|
||||
// "stopped" means the input routine is stopped and the ALSA device is closed.
|
||||
const (
|
||||
running = iota + 1
|
||||
paused
|
||||
stopped
|
||||
)
|
||||
|
||||
// Device holds everything we need to know about the audio input stream and implements io.Reader.
|
||||
type Device struct {
|
||||
l Logger // Logger for device's routines to log to.
|
||||
mode uint8 // Operating mode, either running, paused, or stopped.
|
||||
mu sync.Mutex // Provides synchronisation when changing modes concurrently.
|
||||
title string // Name of audio title, or empty for the default title.
|
||||
dev *alsa.Device // ALSA's Audio input device.
|
||||
ab alsa.Buffer // ALSA's buffer.
|
||||
rb *ring.Buffer // Our buffer.
|
||||
chunkSize int // This is the number of bytes that will be stored in rb at a time.
|
||||
*Config // Configuration parameters for this device.
|
||||
}
|
||||
|
||||
// Config provides parameters used by Device.
|
||||
type Config struct {
|
||||
SampleRate int
|
||||
Channels int
|
||||
BitDepth int
|
||||
RecPeriod float64
|
||||
Codec uint8
|
||||
}
|
||||
|
||||
// Logger enables any implementation of a logger to be used.
|
||||
// TODO: Make this part of the logger package.
|
||||
type Logger interface {
|
||||
SetLevel(int8)
|
||||
Log(level int8, message string, params ...interface{})
|
||||
}
|
||||
|
||||
// OpenError is used to determine whether an error has originated from attempting to open a device.
|
||||
type OpenError error
|
||||
|
||||
// NewDevice initializes and returns an Device which can be started, read from, and stopped.
|
||||
func NewDevice(cfg *Config, l Logger) (*Device, error) {
|
||||
|
||||
err := validate(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
d := &Device{
|
||||
Config: cfg,
|
||||
l: l,
|
||||
}
|
||||
|
||||
// Open the requested audio device.
|
||||
err = d.open()
|
||||
if err != nil {
|
||||
d.l.Log(logger.Error, pkg+"failed to open device")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Setup the device to record with desired period.
|
||||
d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second)))
|
||||
|
||||
// Account for channel conversion.
|
||||
chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels)
|
||||
|
||||
// Account for resampling.
|
||||
chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate)
|
||||
if chunkSize < 1 {
|
||||
return nil, errors.New("given Config parameters are too small")
|
||||
}
|
||||
|
||||
// Account for codec conversion.
|
||||
if d.Codec == codecutil.ADPCM {
|
||||
d.chunkSize = adpcm.EncBytes(int(chunkSize))
|
||||
} else {
|
||||
d.chunkSize = int(chunkSize)
|
||||
}
|
||||
|
||||
// Create ring buffer with appropriate chunk size.
|
||||
d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout)
|
||||
|
||||
// Start device in paused mode.
|
||||
d.mode = paused
|
||||
go d.input()
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Start will start recording audio and writing to the ringbuffer.
|
||||
// Once a Device has been stopped it cannot be started again. This is likely to change in future.
|
||||
func (d *Device) Start() error {
|
||||
d.mu.Lock()
|
||||
mode := d.mode
|
||||
d.mu.Unlock()
|
||||
switch mode {
|
||||
case paused:
|
||||
d.mu.Lock()
|
||||
d.mode = running
|
||||
d.mu.Unlock()
|
||||
return nil
|
||||
case stopped:
|
||||
// TODO(Trek): Make this reopen device and start recording.
|
||||
return errors.New("device is stopped")
|
||||
case running:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("invalid mode: %d", mode)
|
||||
}
|
||||
}
|
||||
|
||||
// Stop will stop recording audio and close the device.
|
||||
// Once a Device has been stopped it cannot be started again. This is likely to change in future.
|
||||
func (d *Device) Stop() {
|
||||
d.mu.Lock()
|
||||
d.mode = stopped
|
||||
d.mu.Unlock()
|
||||
}
|
||||
|
||||
// ChunkSize returns the number of bytes written to the ringbuffer per d.RecPeriod.
|
||||
func (d *Device) ChunkSize() int {
|
||||
return d.chunkSize
|
||||
}
|
||||
|
||||
// validate checks if Config parameters are valid and returns an error if they are not.
|
||||
func validate(c *Config) error {
|
||||
if c.SampleRate <= 0 {
|
||||
return fmt.Errorf("invalid sample rate: %v", c.SampleRate)
|
||||
}
|
||||
if c.Channels <= 0 {
|
||||
return fmt.Errorf("invalid number of channels: %v", c.Channels)
|
||||
}
|
||||
if c.BitDepth <= 0 {
|
||||
return fmt.Errorf("invalid bitdepth: %v", c.BitDepth)
|
||||
}
|
||||
if c.RecPeriod <= 0 {
|
||||
return fmt.Errorf("invalid recording period: %v", c.RecPeriod)
|
||||
}
|
||||
if !codecutil.IsValid(c.Codec) {
|
||||
return errors.New("invalid codec")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// open the recording device with the given name and prepare it to record.
|
||||
// If name is empty, the first recording device is used.
|
||||
func (d *Device) open() error {
|
||||
// Close any existing device.
|
||||
if d.dev != nil {
|
||||
d.l.Log(logger.Debug, pkg+"closing device", "title", d.title)
|
||||
d.dev.Close()
|
||||
d.dev = nil
|
||||
}
|
||||
|
||||
// Open sound card and open recording device.
|
||||
d.l.Log(logger.Debug, pkg+"opening sound card")
|
||||
cards, err := alsa.OpenCards()
|
||||
if err != nil {
|
||||
return OpenError(err)
|
||||
}
|
||||
defer alsa.CloseCards(cards)
|
||||
|
||||
d.l.Log(logger.Debug, pkg+"finding audio device")
|
||||
for _, card := range cards {
|
||||
devices, err := card.Devices()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, dev := range devices {
|
||||
if dev.Type != alsa.PCM || !dev.Record {
|
||||
continue
|
||||
}
|
||||
if dev.Title == d.title || d.title == "" {
|
||||
d.dev = dev
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if d.dev == nil {
|
||||
return OpenError(errors.New("no audio device found"))
|
||||
}
|
||||
|
||||
d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title)
|
||||
err = d.dev.Open()
|
||||
if err != nil {
|
||||
return OpenError(err)
|
||||
}
|
||||
|
||||
// 2 channels is what most devices need to record in. If mono is requested,
|
||||
// the recording will be converted in formatBuffer().
|
||||
channels, err := d.dev.NegotiateChannels(2)
|
||||
if err != nil {
|
||||
return OpenError(err)
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels)
|
||||
|
||||
// Try to negotiate a rate to record in that is divisible by the wanted rate
|
||||
// so that it can be easily downsampled to the wanted rate.
|
||||
// rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz).
|
||||
// Note: if a card thinks it can record at a rate but can't actually, this can cause a failure.
|
||||
// Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue,
|
||||
// a fix for this is to remove 8000 and 16000 from the rates slice.
|
||||
var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000}
|
||||
|
||||
var rate int
|
||||
foundRate := false
|
||||
for r := range rates {
|
||||
if r < d.SampleRate {
|
||||
continue
|
||||
}
|
||||
if r%d.SampleRate == 0 {
|
||||
rate, err = d.dev.NegotiateRate(r)
|
||||
if err == nil {
|
||||
foundRate = true
|
||||
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no easily divisible rate is found, then use the default rate.
|
||||
if !foundRate {
|
||||
d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate)
|
||||
rate, err = d.dev.NegotiateRate(defaultSampleRate)
|
||||
if err != nil {
|
||||
return OpenError(err)
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate)
|
||||
}
|
||||
|
||||
var aFmt alsa.FormatType
|
||||
switch d.BitDepth {
|
||||
case 16:
|
||||
aFmt = alsa.S16_LE
|
||||
case 32:
|
||||
aFmt = alsa.S32_LE
|
||||
default:
|
||||
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
|
||||
}
|
||||
devFmt, err := d.dev.NegotiateFormat(aFmt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var bitdepth int
|
||||
switch devFmt {
|
||||
case alsa.S16_LE:
|
||||
bitdepth = 16
|
||||
case alsa.S32_LE:
|
||||
bitdepth = 32
|
||||
default:
|
||||
return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth))
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth)
|
||||
|
||||
// A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed)
|
||||
// Some devices only accept even period sizes while others want powers of 2.
|
||||
// So we will find the closest power of 2 to the desired period size.
|
||||
const wantPeriod = 0.05 //seconds
|
||||
bytesPerSecond := rate * channels * (bitdepth / 8)
|
||||
wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod)
|
||||
nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize)
|
||||
|
||||
// At least two period sizes should fit within the buffer.
|
||||
bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2)
|
||||
if err != nil {
|
||||
return OpenError(err)
|
||||
}
|
||||
d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize)
|
||||
|
||||
if err = d.dev.Prepare(); err != nil {
|
||||
return OpenError(err)
|
||||
}
|
||||
|
||||
d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params")
|
||||
return nil
|
||||
}
|
||||
|
||||
// input continously records audio and writes it to the ringbuffer.
|
||||
// Re-opens the device and tries again if ASLA returns an error.
|
||||
func (d *Device) input() {
|
||||
for {
|
||||
// Check mode.
|
||||
d.mu.Lock()
|
||||
mode := d.mode
|
||||
d.mu.Unlock()
|
||||
switch mode {
|
||||
case paused:
|
||||
time.Sleep(time.Duration(d.RecPeriod) * time.Second)
|
||||
continue
|
||||
case stopped:
|
||||
if d.dev != nil {
|
||||
d.l.Log(logger.Debug, pkg+"closing audio device", "title", d.title)
|
||||
d.dev.Close()
|
||||
d.dev = nil
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Read from audio device.
|
||||
d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod)
|
||||
err := d.dev.Read(d.ab.Data)
|
||||
if err != nil {
|
||||
d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error())
|
||||
err = d.open() // re-open
|
||||
if err != nil {
|
||||
d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error())
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Process audio.
|
||||
d.l.Log(logger.Debug, pkg+"processing audio")
|
||||
toWrite := d.formatBuffer()
|
||||
|
||||
// Write audio to ringbuffer.
|
||||
n, err := d.rb.Write(toWrite.Data)
|
||||
switch err {
|
||||
case nil:
|
||||
d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n)
|
||||
case ring.ErrDropped:
|
||||
d.l.Log(logger.Warning, pkg+"old audio data overwritten")
|
||||
default:
|
||||
d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read reads from the ringbuffer, returning the number of bytes read upon success.
|
||||
func (d *Device) Read(p []byte) (int, error) {
|
||||
// Ready ringbuffer for read.
|
||||
_, err := d.rb.Next(rbNextTimeout)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Read from ring buffer.
|
||||
return d.rb.Read(p)
|
||||
}
|
||||
|
||||
// formatBuffer returns audio that has been converted to the desired format.
|
||||
func (d *Device) formatBuffer() alsa.Buffer {
|
||||
var err error
|
||||
|
||||
// If nothing needs to be changed, return the original.
|
||||
if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate {
|
||||
return d.ab
|
||||
}
|
||||
var formatted alsa.Buffer
|
||||
if d.ab.Format.Channels != d.Channels {
|
||||
// Convert channels.
|
||||
// TODO(Trek): Make this work for conversions other than stereo to mono.
|
||||
if d.ab.Format.Channels == 2 && d.Channels == 1 {
|
||||
formatted, err = pcm.StereoToMono(d.ab)
|
||||
if err != nil {
|
||||
d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if d.ab.Format.Rate != d.SampleRate {
|
||||
// Convert rate.
|
||||
formatted, err = pcm.Resample(formatted, d.SampleRate)
|
||||
if err != nil {
|
||||
d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
switch d.Codec {
|
||||
case codecutil.PCM:
|
||||
case codecutil.ADPCM:
|
||||
b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data))))
|
||||
enc := adpcm.NewEncoder(b)
|
||||
_, err = enc.Write(formatted.Data)
|
||||
if err != nil {
|
||||
d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error())
|
||||
}
|
||||
formatted.Data = b.Bytes()
|
||||
default:
|
||||
d.l.Log(logger.Error, pkg+"unhandled audio codec")
|
||||
}
|
||||
|
||||
return formatted
|
||||
}
|
||||
|
||||
// nearestPowerOfTwo finds and returns the nearest power of two to the given integer.
|
||||
// If the lower and higher power of two are the same distance, it returns the higher power.
|
||||
// For negative values, 1 is returned.
|
||||
// Source: https://stackoverflow.com/a/45859570
|
||||
func nearestPowerOfTwo(n int) int {
|
||||
if n <= 0 {
|
||||
return 1
|
||||
}
|
||||
if n == 1 {
|
||||
return 2
|
||||
}
|
||||
v := n
|
||||
v--
|
||||
v |= v >> 1
|
||||
v |= v >> 2
|
||||
v |= v >> 4
|
||||
v |= v >> 8
|
||||
v |= v >> 16
|
||||
v++ // higher power of 2
|
||||
x := v >> 1 // lower power of 2
|
||||
if (v - n) > (n - x) {
|
||||
return x
|
||||
}
|
||||
return v
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
NAME
|
||||
audio_test.go
|
||||
|
||||
AUTHOR
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
|
||||
|
||||
It is free software: you can redistribute it and/or modify them
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
It is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License in gpl.txt.
|
||||
If not, see [GNU licenses](http://www.gnu.org/licenses).
|
||||
*/
|
||||
|
||||
package audio
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"bitbucket.org/ausocean/av/codec/codecutil"
|
||||
"bitbucket.org/ausocean/utils/logger"
|
||||
)
|
||||
|
||||
func TestDevice(t *testing.T) {
|
||||
// We want to open a device with a standard configuration.
|
||||
ac := &Config{
|
||||
SampleRate: 8000,
|
||||
Channels: 1,
|
||||
RecPeriod: 0.3,
|
||||
BitDepth: 16,
|
||||
Codec: codecutil.ADPCM,
|
||||
}
|
||||
n := 2 // Number of periods to wait while recording.
|
||||
|
||||
// Create a new audio Device, start, read/lex, and then stop it.
|
||||
l := logger.New(logger.Debug, os.Stderr)
|
||||
ai, err := NewDevice(ac, l)
|
||||
// If there was an error opening the device, skip this test.
|
||||
if _, ok := err.(OpenError); ok {
|
||||
t.Skip(err)
|
||||
}
|
||||
// For any other error, report it.
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
err = ai.Start()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
chunkSize := ai.ChunkSize()
|
||||
lexer := codecutil.NewByteLexer(&chunkSize)
|
||||
go lexer.Lex(ioutil.Discard, ai, time.Duration(ac.RecPeriod*float64(time.Second)))
|
||||
time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n))
|
||||
ai.Stop()
|
||||
}
|
||||
|
||||
var powerTests = []struct {
|
||||
in int
|
||||
out int
|
||||
}{
|
||||
{36, 32},
|
||||
{47, 32},
|
||||
{3, 4},
|
||||
{46, 32},
|
||||
{7, 8},
|
||||
{2, 2},
|
||||
{36, 32},
|
||||
{757, 512},
|
||||
{2464, 2048},
|
||||
{18980, 16384},
|
||||
{70000, 65536},
|
||||
{8192, 8192},
|
||||
{2048, 2048},
|
||||
{65536, 65536},
|
||||
{-2048, 1},
|
||||
{-127, 1},
|
||||
{-1, 1},
|
||||
{0, 1},
|
||||
{1, 2},
|
||||
}
|
||||
|
||||
func TestNearestPowerOfTwo(t *testing.T) {
|
||||
for _, tt := range powerTests {
|
||||
t.Run(strconv.Itoa(tt.in), func(t *testing.T) {
|
||||
v := nearestPowerOfTwo(tt.in)
|
||||
if v != tt.out {
|
||||
t.Errorf("got %v, want %v", v, tt.out)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -2,11 +2,9 @@
|
|||
NAME
|
||||
Config.go
|
||||
|
||||
DESCRIPTION
|
||||
See Readme.md
|
||||
|
||||
AUTHORS
|
||||
Saxon A. Nelson-Milton <saxon@ausocean.org>
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
Config.go is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean)
|
||||
|
@ -30,6 +28,7 @@ package revid
|
|||
import (
|
||||
"errors"
|
||||
|
||||
"bitbucket.org/ausocean/av/codec/codecutil"
|
||||
"bitbucket.org/ausocean/utils/logger"
|
||||
)
|
||||
|
||||
|
@ -75,6 +74,7 @@ const (
|
|||
Raspivid
|
||||
V4L
|
||||
RTSP
|
||||
Audio
|
||||
|
||||
// Outputs.
|
||||
RTMP
|
||||
|
@ -93,6 +93,7 @@ const (
|
|||
defaultInput = Raspivid
|
||||
defaultOutput = HTTP
|
||||
defaultFrameRate = 25
|
||||
defaultWriteRate = 25
|
||||
defaultWidth = 1280
|
||||
defaultHeight = 720
|
||||
defaultIntraRefreshPeriod = 100
|
||||
|
@ -101,7 +102,7 @@ const (
|
|||
defaultBitrate = 400000
|
||||
defaultFramesPerClip = 1
|
||||
httpFramesPerClip = 560
|
||||
defaultInputCodec = H264
|
||||
defaultInputCodec = codecutil.H264
|
||||
defaultVerbosity = logger.Error
|
||||
defaultRtpAddr = "localhost:6970"
|
||||
defaultBurstPeriod = 10 // Seconds
|
||||
|
@ -109,6 +110,12 @@ const (
|
|||
defaultBrightness = 50
|
||||
defaultExposure = "auto"
|
||||
defaultAutoWhiteBalance = "auto"
|
||||
|
||||
defaultAudioInputCodec = codecutil.ADPCM
|
||||
defaultSampleRate = 48000
|
||||
defaultBitDepth = 16
|
||||
defaultChannels = 1
|
||||
defaultRecPeriod = 1.0
|
||||
)
|
||||
|
||||
// Config provides parameters relevant to a revid instance. A new config must
|
||||
|
@ -183,6 +190,9 @@ type Config struct {
|
|||
// Raspivid input supports custom framerate.
|
||||
FrameRate uint
|
||||
|
||||
// WriteRate is how many times a second revid encoders will be written to.
|
||||
WriteRate float64
|
||||
|
||||
// HTTPAddress defines a custom HTTP destination if we do not wish to use that
|
||||
// defined in /etc/netsender.conf.
|
||||
HTTPAddress string
|
||||
|
@ -215,6 +225,13 @@ type Config struct {
|
|||
// defined at the start of the file.
|
||||
AutoWhiteBalance string
|
||||
|
||||
// Audio
|
||||
SampleRate int // Samples a second (Hz).
|
||||
RecPeriod float64 // How many seconds to record at a time.
|
||||
Channels int // Number of audio channels, 1 for mono, 2 for stereo.
|
||||
BitDepth int // Sample bit depth.
|
||||
ChunkSize int // ChunkSize is the size of the chunks in the audio.Device's ringbuffer.
|
||||
|
||||
RTPAddress string // RTPAddress defines the RTP output destination.
|
||||
BurstPeriod uint // BurstPeriod defines the revid burst period in seconds.
|
||||
Rotation uint // Rotation defines the video rotation angle in degrees Raspivid input.
|
||||
|
@ -240,7 +257,7 @@ func (c *Config) Validate(r *Revid) error {
|
|||
}
|
||||
|
||||
switch c.Input {
|
||||
case Raspivid, V4L, File, RTSP:
|
||||
case Raspivid, V4L, File, Audio, RTSP:
|
||||
case NothingDefined:
|
||||
c.Logger.Log(logger.Info, pkg+"no input type defined, defaulting", "input", defaultInput)
|
||||
c.Input = defaultInput
|
||||
|
@ -249,7 +266,7 @@ func (c *Config) Validate(r *Revid) error {
|
|||
}
|
||||
|
||||
switch c.InputCodec {
|
||||
case H264:
|
||||
case codecutil.H264:
|
||||
// FIXME(kortschak): This is not really what we want.
|
||||
// Configuration really needs to be rethought here.
|
||||
if c.Quantize && c.Quantization == 0 {
|
||||
|
@ -260,18 +277,22 @@ func (c *Config) Validate(r *Revid) error {
|
|||
return errors.New("bad bitrate and quantization combination for H264 input")
|
||||
}
|
||||
|
||||
case MJPEG:
|
||||
case codecutil.MJPEG:
|
||||
if c.Quantization > 0 || c.Bitrate == 0 {
|
||||
return errors.New("bad bitrate or quantization for mjpeg input")
|
||||
}
|
||||
|
||||
case NothingDefined:
|
||||
case codecutil.PCM, codecutil.ADPCM:
|
||||
default:
|
||||
switch c.Input {
|
||||
case Audio:
|
||||
c.Logger.Log(logger.Info, pkg+"input is audio but no codec defined, defaulting", "inputCodec", defaultAudioInputCodec)
|
||||
c.InputCodec = defaultAudioInputCodec
|
||||
default:
|
||||
c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec)
|
||||
c.InputCodec = defaultInputCodec
|
||||
c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization)
|
||||
c.Quantization = defaultQuantization
|
||||
default:
|
||||
return errors.New("bad input codec defined in config")
|
||||
}
|
||||
}
|
||||
|
||||
if c.Outputs == nil {
|
||||
|
@ -330,6 +351,31 @@ func (c *Config) Validate(r *Revid) error {
|
|||
c.FrameRate = defaultFrameRate
|
||||
}
|
||||
|
||||
if c.SampleRate == 0 {
|
||||
c.Logger.Log(logger.Info, pkg+"no sample rate defined, defaulting", "sampleRate", defaultSampleRate)
|
||||
c.SampleRate = defaultSampleRate
|
||||
}
|
||||
|
||||
if c.Channels == 0 {
|
||||
c.Logger.Log(logger.Info, pkg+"no number of channels defined, defaulting", "Channels", defaultChannels)
|
||||
c.Channels = defaultChannels
|
||||
}
|
||||
|
||||
if c.BitDepth == 0 {
|
||||
c.Logger.Log(logger.Info, pkg+"no bit depth defined, defaulting", "BitDepth", defaultBitDepth)
|
||||
c.BitDepth = defaultBitDepth
|
||||
}
|
||||
|
||||
if c.RecPeriod == 0 {
|
||||
c.Logger.Log(logger.Info, pkg+"no record period defined, defaulting", "recPeriod", defaultRecPeriod)
|
||||
c.RecPeriod = defaultRecPeriod
|
||||
}
|
||||
|
||||
if c.WriteRate == 0 {
|
||||
c.Logger.Log(logger.Info, pkg+"no write rate defined, defaulting", "writeRate", defaultWriteRate)
|
||||
c.WriteRate = defaultWriteRate
|
||||
}
|
||||
|
||||
if c.Bitrate == 0 {
|
||||
c.Logger.Log(logger.Info, pkg+"no bitrate defined, defaulting", "bitrate", defaultBitrate)
|
||||
c.Bitrate = defaultBitrate
|
||||
|
|
|
@ -2,13 +2,11 @@
|
|||
NAME
|
||||
revid.go
|
||||
|
||||
DESCRIPTION
|
||||
See Readme.md
|
||||
|
||||
AUTHORS
|
||||
Saxon A. Nelson-Milton <saxon@ausocean.org>
|
||||
Alan Noble <alan@ausocean.org>
|
||||
Dan Kortschak <dan@ausocean.org>
|
||||
Trek Hopton <trek@ausocean.org>
|
||||
|
||||
LICENSE
|
||||
revid is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean)
|
||||
|
@ -27,6 +25,7 @@ LICENSE
|
|||
in gpl.txt. If not, see http://www.gnu.org/licenses.
|
||||
*/
|
||||
|
||||
// Package revid provides an API for reading, transcoding, and writing audio/video streams and files.
|
||||
package revid
|
||||
|
||||
import (
|
||||
|
@ -41,10 +40,12 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"bitbucket.org/ausocean/av/codec/codecutil"
|
||||
"bitbucket.org/ausocean/av/codec/h264"
|
||||
"bitbucket.org/ausocean/av/codec/h265"
|
||||
"bitbucket.org/ausocean/av/container/flv"
|
||||
"bitbucket.org/ausocean/av/container/mts"
|
||||
"bitbucket.org/ausocean/av/input/audio"
|
||||
"bitbucket.org/ausocean/av/protocol/rtcp"
|
||||
"bitbucket.org/ausocean/av/protocol/rtp"
|
||||
"bitbucket.org/ausocean/av/protocol/rtsp"
|
||||
|
@ -173,13 +174,15 @@ func (r *Revid) reset(config Config) error {
|
|||
r.config.Logger.SetLevel(config.LogLevel)
|
||||
|
||||
err = r.setupPipeline(
|
||||
func(dst io.WriteCloser, fps int) (io.WriteCloser, error) {
|
||||
func(dst io.WriteCloser, fps float64) (io.WriteCloser, error) {
|
||||
var st int
|
||||
switch r.config.Input {
|
||||
case Raspivid, File, V4L:
|
||||
st = mts.EncodeH264
|
||||
case RTSP:
|
||||
st = mts.EncodeH265
|
||||
case Audio:
|
||||
st = mts.EncodeAudio
|
||||
}
|
||||
e := mts.NewEncoder(dst, float64(fps), st)
|
||||
return e, nil
|
||||
|
@ -215,7 +218,7 @@ func (r *Revid) setConfig(config Config) error {
|
|||
// mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder
|
||||
// respectively. multiWriter will be used to create an ioext.multiWriteCloser
|
||||
// so that encoders can write to multiple senders.
|
||||
func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error {
|
||||
func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error {
|
||||
// encoders will hold the encoders that are required for revid's current
|
||||
// configuration.
|
||||
var encoders []io.WriteCloser
|
||||
|
@ -259,7 +262,7 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int)
|
|||
// as a destination.
|
||||
if len(mtsSenders) != 0 {
|
||||
mw := multiWriter(mtsSenders...)
|
||||
e, _ := mtsEnc(mw, int(r.config.FrameRate))
|
||||
e, _ := mtsEnc(mw, r.config.WriteRate)
|
||||
encoders = append(encoders, e)
|
||||
}
|
||||
|
||||
|
@ -289,6 +292,9 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int)
|
|||
case RTSP:
|
||||
r.setupInput = r.startRTSPCamera
|
||||
r.lexTo = h265.NewLexer(false).Lex
|
||||
case Audio:
|
||||
r.setupInput = r.startAudioDevice
|
||||
r.lexTo = codecutil.NewByteLexer(&r.config.ChunkSize).Lex
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -533,7 +539,7 @@ func (r *Revid) startRaspivid() (func() error, error) {
|
|||
switch r.config.InputCodec {
|
||||
default:
|
||||
return nil, fmt.Errorf("revid: invalid input codec: %v", r.config.InputCodec)
|
||||
case H264:
|
||||
case codecutil.H264:
|
||||
args = append(args,
|
||||
"--codec", "H264",
|
||||
"--inline",
|
||||
|
@ -542,7 +548,7 @@ func (r *Revid) startRaspivid() (func() error, error) {
|
|||
if r.config.Quantize {
|
||||
args = append(args, "-qp", fmt.Sprint(r.config.Quantization))
|
||||
}
|
||||
case MJPEG:
|
||||
case codecutil.MJPEG:
|
||||
args = append(args, "--codec", "MJPEG")
|
||||
}
|
||||
r.config.Logger.Log(logger.Info, pkg+"raspivid args", "raspividArgs", strings.Join(args, " "))
|
||||
|
@ -615,10 +621,55 @@ func (r *Revid) setupInputForFile() (func() error, error) {
|
|||
|
||||
// TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop.
|
||||
r.wg.Add(1)
|
||||
go r.processFrom(f, time.Second/time.Duration(r.config.FrameRate))
|
||||
go r.processFrom(f, 0)
|
||||
return func() error { return f.Close() }, nil
|
||||
}
|
||||
|
||||
// startAudioDevice is used to start capturing audio from an audio device and processing it.
|
||||
// It returns a function that can be used to stop the device and any errors that occur.
|
||||
func (r *Revid) startAudioDevice() (func() error, error) {
|
||||
// Create audio device.
|
||||
ac := &audio.Config{
|
||||
SampleRate: r.config.SampleRate,
|
||||
Channels: r.config.Channels,
|
||||
RecPeriod: r.config.RecPeriod,
|
||||
BitDepth: r.config.BitDepth,
|
||||
Codec: r.config.InputCodec,
|
||||
}
|
||||
mts.Meta.Add("sampleRate", strconv.Itoa(r.config.SampleRate))
|
||||
mts.Meta.Add("channels", strconv.Itoa(r.config.Channels))
|
||||
mts.Meta.Add("period", fmt.Sprintf("%.6f", r.config.RecPeriod))
|
||||
mts.Meta.Add("bitDepth", strconv.Itoa(r.config.BitDepth))
|
||||
switch r.config.InputCodec {
|
||||
case codecutil.PCM:
|
||||
mts.Meta.Add("codec", "pcm")
|
||||
case codecutil.ADPCM:
|
||||
mts.Meta.Add("codec", "adpcm")
|
||||
default:
|
||||
r.config.Logger.Log(logger.Fatal, pkg+"no audio codec set in config")
|
||||
}
|
||||
|
||||
ai, err := audio.NewDevice(ac, r.config.Logger)
|
||||
if err != nil {
|
||||
r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error())
|
||||
}
|
||||
|
||||
// Start audio device
|
||||
err = ai.Start()
|
||||
if err != nil {
|
||||
r.config.Logger.Log(logger.Fatal, pkg+"failed to start audio device", "error", err.Error())
|
||||
}
|
||||
|
||||
// Process output from audio device.
|
||||
r.config.ChunkSize = ai.ChunkSize()
|
||||
r.wg.Add(1)
|
||||
go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate))
|
||||
return func() error {
|
||||
ai.Stop()
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
|
||||
// startRTSPCamera uses RTSP to request an RTP stream from an IP camera. An RTP
|
||||
// client is created from which RTP packets containing either h264/h265 can read
|
||||
// by the selected lexer.
|
||||
|
|
|
@ -41,7 +41,7 @@ import (
|
|||
const raspividPath = "/usr/local/bin/raspivid"
|
||||
|
||||
// Suppress all test logging, except for t.Errorf output.
|
||||
var silent bool
|
||||
var silent = true
|
||||
|
||||
// TestRaspivid tests that raspivid starts correctly.
|
||||
// It is intended to be run on a Raspberry Pi.
|
||||
|
@ -232,7 +232,7 @@ func TestResetEncoderSenderSetup(t *testing.T) {
|
|||
|
||||
// This logic is what we want to check.
|
||||
err = rv.setupPipeline(
|
||||
func(dst io.WriteCloser, rate int) (io.WriteCloser, error) {
|
||||
func(dst io.WriteCloser, rate float64) (io.WriteCloser, error) {
|
||||
return &tstMtsEncoder{dst: dst}, nil
|
||||
},
|
||||
func(dst io.WriteCloser, rate int) (io.WriteCloser, error) {
|
||||
|
|
|
@ -57,7 +57,7 @@ type httpSender struct {
|
|||
log func(lvl int8, msg string, args ...interface{})
|
||||
}
|
||||
|
||||
// newMinimalHttpSender returns a pointer to a new minimalHttpSender.
|
||||
// newHttpSender returns a pointer to a new httpSender.
|
||||
func newHttpSender(ns *netsender.Sender, log func(lvl int8, msg string, args ...interface{})) *httpSender {
|
||||
return &httpSender{
|
||||
client: ns,
|
||||
|
|
Loading…
Reference in New Issue