diff --git a/cmd/revid-cli/main.go b/cmd/revid-cli/main.go index cf2ee174..0c8a6af9 100644 --- a/cmd/revid-cli/main.go +++ b/cmd/revid-cli/main.go @@ -8,6 +8,7 @@ DESCRIPTION AUTHORS Saxon A. Nelson-Milton Jack Richardson + Trek Hopton LICENSE revid-cli is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) @@ -26,6 +27,7 @@ LICENSE along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. */ +// revid-cli is a command line interface for revid. package main import ( @@ -36,6 +38,7 @@ import ( "strings" "time" + "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/av/container/mts" "bitbucket.org/ausocean/av/container/mts/meta" "bitbucket.org/ausocean/av/revid" @@ -105,9 +108,9 @@ func handleFlags() revid.Config { var ( cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") - inputPtr = flag.String("Input", "", "The input type: Raspivid, File, Webcam, RTSP") + inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg, PCM, ADPCM") + inputPtr = flag.String("Input", "", "The input type: Raspivid, File, v4l, Audio, RTSP") rtspURLPtr = flag.String("RTSPURL", "", "The URL for an RTSP server.") - inputCodecPtr = flag.String("InputCodec", "", "The codec of the input: H264, Mjpeg") quantizePtr = flag.Bool("Quantize", false, "Quantize input (non-variable bitrate)") verbosityPtr = flag.String("Verbosity", "Info", "Verbosity: Debug, Info, Warning, Error, Fatal") rtpAddrPtr = flag.String("RtpAddr", "", "Rtp destination address: : (port is generally 6970-6999)") @@ -131,6 +134,12 @@ func handleFlags() revid.Config { saturationPtr = flag.Int("Saturation", 0, "Set Saturation. (100-100)") exposurePtr = flag.String("Exposure", "auto", "Set exposure mode. ("+strings.Join(revid.ExposureModes[:], ",")+")") autoWhiteBalancePtr = flag.String("Awb", "auto", "Set automatic white balance mode. ("+strings.Join(revid.AutoWhiteBalanceModes[:], ",")+")") + + // Audio specific flags. + sampleRatePtr = flag.Int("SampleRate", 48000, "Sample rate of recorded audio") + channelsPtr = flag.Int("Channels", 1, "Record in Mono or Stereo (1 or 2)") + recPeriodPtr = flag.Float64("recPeriod", 1, "How many seconds to record at a time") + bitDepthPtr = flag.Int("bitDepth", 16, "Bit Depth to record audio at.") ) var outputs flagStrings @@ -179,6 +188,8 @@ func handleFlags() revid.Config { cfg.Input = revid.V4L case "File": cfg.Input = revid.File + case "Audio": + cfg.Input = revid.Audio case "RTSP": cfg.Input = revid.RTSP case "": @@ -188,12 +199,23 @@ func handleFlags() revid.Config { switch *inputCodecPtr { case "H264": - cfg.InputCodec = revid.H264 + cfg.InputCodec = codecutil.H264 + case "PCM": + cfg.InputCodec = codecutil.PCM + case "ADPCM": + cfg.InputCodec = codecutil.ADPCM case "": default: log.Log(logger.Error, pkg+"bad input codec argument") } + switch *inputPtr { + case "Audio": + cfg.WriteRate = 1.0 / (*recPeriodPtr) + default: + cfg.WriteRate = float64(*frameRatePtr) + } + for _, o := range outputs { switch o { case "File": @@ -235,6 +257,10 @@ func handleFlags() revid.Config { cfg.Saturation = *saturationPtr cfg.Exposure = *exposurePtr cfg.AutoWhiteBalance = *autoWhiteBalancePtr + cfg.SampleRate = *sampleRatePtr + cfg.Channels = *channelsPtr + cfg.RecPeriod = *recPeriodPtr + cfg.BitDepth = *bitDepthPtr return cfg } diff --git a/codec/codecutil/lex.go b/codec/codecutil/lex.go new file mode 100644 index 00000000..e1498b96 --- /dev/null +++ b/codec/codecutil/lex.go @@ -0,0 +1,84 @@ +/* +NAME + lex.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package codecutil + +import ( + "fmt" + "io" + "time" +) + +// ByteLexer is used to lex bytes using a buffer size which is configured upon construction. +type ByteLexer struct { + bufSize *int +} + +// NewByteLexer returns a pointer to a ByteLexer with the given buffer size. +func NewByteLexer(bufSize *int) *ByteLexer { + return &ByteLexer{bufSize: bufSize} +} + +// zeroTicks can be used to create an instant ticker. +var zeroTicks chan time.Time + +func init() { + zeroTicks = make(chan time.Time) + close(zeroTicks) +} + +// Lex reads *l.bufSize bytes from src and writes them to dst every d seconds. +func (l *ByteLexer) Lex(dst io.Writer, src io.Reader, d time.Duration) error { + if l.bufSize == nil { + return fmt.Errorf("buffer size has not been set") + } + bufSize := *l.bufSize + if bufSize <= 0 { + return fmt.Errorf("invalid buffer size: %v", bufSize) + } + if d < 0 { + return fmt.Errorf("invalid delay: %v", d) + } + + var ticker *time.Ticker + if d == 0 { + ticker = &time.Ticker{C: zeroTicks} + } else { + ticker = time.NewTicker(d) + defer ticker.Stop() + } + + buf := make([]byte, bufSize) + for { + <-ticker.C + off, err := src.Read(buf) + if err != nil { + return err + } + _, err = dst.Write(buf[:off]) + if err != nil { + return err + } + } +} diff --git a/codec/codecutil/lex_test.go b/codec/codecutil/lex_test.go new file mode 100644 index 00000000..70fd3d39 --- /dev/null +++ b/codec/codecutil/lex_test.go @@ -0,0 +1,65 @@ +/* +NAME + lex_test.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package codecutil + +import ( + "bytes" + "io" + "strconv" + "testing" + "time" +) + +var lexTests = []struct { + data []byte + t time.Duration + n int + isValid bool // Whether or not this test should fail. +}{ + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 4, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 3, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 2, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, 0, 1, true}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Nanosecond, 0, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, -1, false}, + {[]byte{0x10, 0x00, 0xf3, 0x45, 0xfe, 0xd2, 0xaa, 0x4e}, time.Millisecond, 15, true}, +} + +func TestByteLexer(t *testing.T) { + for i, tt := range lexTests { + t.Run(strconv.Itoa(i), func(t *testing.T) { + dst := bytes.NewBuffer([]byte{}) + l := NewByteLexer(&tt.n) + err := l.Lex(dst, bytes.NewReader(tt.data), tt.t) + if err != nil && err != io.EOF { + if tt.isValid { + t.Errorf("unexpected error: %v", err) + } + } else if !bytes.Equal(dst.Bytes(), tt.data) { + t.Errorf("data before and after lex are not equal: want %v, got %v", tt.data, dst.Bytes()) + } + }) + } +} diff --git a/codec/codecutil/list.go b/codec/codecutil/list.go new file mode 100644 index 00000000..c270eb6b --- /dev/null +++ b/codec/codecutil/list.go @@ -0,0 +1,43 @@ +/* +NAME + list.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package codecutil + +// numCodecs is the number of entries in the list of codecs. +const numCodecs = 5 + +// A global list containing all available codecs for reference in any application. +// When adding or removing a codec from this list, the numCodecs const must be updated. +const ( + PCM = iota + ADPCM + H264 + H265 + MJPEG +) + +// IsValid recieves an int representing a codec and checks if it is valid. +func IsValid(codec uint8) bool { + return 0 <= codec && codec < numCodecs +} diff --git a/codec/h264/h264dec/cabac.go b/codec/h264/h264dec/cabac.go index 47d90835..8b3a6e4c 100644 --- a/codec/h264/h264dec/cabac.go +++ b/codec/h264/h264dec/cabac.go @@ -35,14 +35,14 @@ func YOffset(yRefMin16, refMbH int) int { } func MbWidthC(sps *SPS) int { mbWidthC := 16 / SubWidthC(sps) - if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane { + if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag { mbWidthC = 0 } return mbWidthC } func MbHeightC(sps *SPS) int { mbHeightC := 16 / SubHeightC(sps) - if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane { + if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag { mbHeightC = 0 } return mbHeightC diff --git a/codec/h264/h264dec/parse.go b/codec/h264/h264dec/parse.go index 0763be27..b2981958 100644 --- a/codec/h264/h264dec/parse.go +++ b/codec/h264/h264dec/parse.go @@ -64,11 +64,11 @@ func (r fieldReader) readBits(n int) uint64 { // Exp-Golomb-coded element using method as specified in section 9.1 of ITU-T // H.264 and return as an int. The read does not happen if the fieldReader // has a non-nil error. -func (r fieldReader) readUe() int { +func (r fieldReader) readUe() uint64 { if r.e != nil { return 0 } - var i int + var i uint64 i, r.e = readUe(r.br) return i } @@ -77,11 +77,11 @@ func (r fieldReader) readUe() int { // Exp-Golomb-coded syntax element using method as specified in section 9.1 // and returns as an int. The read does not happen if the fieldReader // has a non-nil error. -func (r fieldReader) readTe(x uint) int { +func (r fieldReader) readTe(x uint) int64 { if r.e != nil { return 0 } - var i int + var i int64 i, r.e = readTe(r.br, x) return i } @@ -122,7 +122,7 @@ func (r fieldReader) err() error { // // TODO: this should return uint, but rest of code needs to be changed for this // to happen. -func readUe(r *bits.BitReader) (int, error) { +func readUe(r *bits.BitReader) (uint64, error) { nZeros := -1 var err error for b := uint64(0); b == 0; nZeros++ { @@ -135,7 +135,7 @@ func readUe(r *bits.BitReader) (int, error) { if err != nil { return 0, err } - return int(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil + return uint64(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil } // readTe parses a syntax element of te(v) descriptor i.e, truncated @@ -143,9 +143,10 @@ func readUe(r *bits.BitReader) (int, error) { // Rec. ITU-T H.264 (04/2017). // // TODO: this should also return uint. -func readTe(r *bits.BitReader, x uint) (int, error) { +func readTe(r *bits.BitReader, x uint) (int64, error) { if x > 1 { - return readUe(r) + ue, err := readUe(r) + return int64(ue), err } if x == 1 { @@ -181,7 +182,7 @@ func readSe(r *bits.BitReader) (int, error) { // in Rec. ITU-T H.264 (04/2017). func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, error) { // Indexes to codedBlockPattern map. - var i1, i2, i3 int + var i1, i2, i3 uint64 // ChromaArrayType selects first index. switch chromaArrayType { @@ -200,7 +201,7 @@ func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, } // Need to check that we won't go out of bounds with this index. - if i2 >= len(codedBlockPattern[i1]) { + if int(i2) >= len(codedBlockPattern[i1]) { return 0, errInvalidCodeNum } diff --git a/codec/h264/h264dec/pps.go b/codec/h264/h264dec/pps.go index 51f508b5..67b1abf2 100644 --- a/codec/h264/h264dec/pps.go +++ b/codec/h264/h264dec/pps.go @@ -4,7 +4,6 @@ import ( "math" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" - "github.com/pkg/errors" ) // import "strings" @@ -42,152 +41,54 @@ type PPS struct { func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) { pps := PPS{} - var err error + r := newFieldReader(br) - pps.ID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ID") - } - - pps.SPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SPS ID") - } - - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read EntropyCodingMode") - } - pps.EntropyCodingMode = int(b) - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read BottomFieldPicOrderInFramePresent") - } - pps.BottomFieldPicOrderInFramePresent = b == 1 - - pps.NumSliceGroupsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumSliceGroupsMinus1") - } + pps.ID = int(r.readUe()) + pps.SPSID = int(r.readUe()) + pps.EntropyCodingMode = int(r.readBits(1)) + pps.BottomFieldPicOrderInFramePresent = r.readBits(1) == 1 + pps.NumSliceGroupsMinus1 = int(r.readUe()) if pps.NumSliceGroupsMinus1 > 0 { - pps.SliceGroupMapType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceGroupMapType") - } + pps.SliceGroupMapType = int(r.readUe()) if pps.SliceGroupMapType == 0 { for iGroup := 0; iGroup <= pps.NumSliceGroupsMinus1; iGroup++ { - b, err := readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse RunLengthMinus1") - } - pps.RunLengthMinus1 = append(pps.RunLengthMinus1, b) + pps.RunLengthMinus1 = append(pps.RunLengthMinus1, int(r.readUe())) } } else if pps.SliceGroupMapType == 2 { for iGroup := 0; iGroup < pps.NumSliceGroupsMinus1; iGroup++ { - pps.TopLeft[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]") - } - if err != nil { - return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]") - } - - pps.BottomRight[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BottomRight[iGroup]") - } + pps.TopLeft[iGroup] = int(r.readUe()) + pps.BottomRight[iGroup] = int(r.readUe()) } } else if pps.SliceGroupMapType > 2 && pps.SliceGroupMapType < 6 { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SliceGroupChangeDirection") - } - pps.SliceGroupChangeDirection = b == 1 - - pps.SliceGroupChangeRateMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceGroupChangeRateMinus1") - } + pps.SliceGroupChangeDirection = r.readBits(1) == 1 + pps.SliceGroupChangeRateMinus1 = int(r.readUe()) } else if pps.SliceGroupMapType == 6 { - pps.PicSizeInMapUnitsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicSizeInMapUnitsMinus1") - } + pps.PicSizeInMapUnitsMinus1 = int(r.readUe()) for i := 0; i <= pps.PicSizeInMapUnitsMinus1; i++ { - b, err = br.ReadBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1))))) - if err != nil { - return nil, errors.Wrap(err, "coult not read SliceGroupId") - } - pps.SliceGroupId[i] = int(b) + pps.SliceGroupId[i] = int(r.readBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1)))))) } } } - pps.NumRefIdxL0DefaultActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.New("could not parse NumRefIdxL0DefaultActiveMinus1") - } - - pps.NumRefIdxL1DefaultActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.New("could not parse NumRefIdxL1DefaultActiveMinus1") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read WeightedPred") - } - pps.WeightedPred = b == 1 - - b, err = br.ReadBits(2) - if err != nil { - return nil, errors.Wrap(err, "could not read WeightedBipred") - } - pps.WeightedBipred = int(b) - - pps.PicInitQpMinus26, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse PicInitQpMinus26") - } - - pps.PicInitQsMinus26, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse PicInitQsMinus26") - } - - pps.ChromaQpIndexOffset, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse ChromaQpIndexOffset") - } - - err = readFlags(br, []flag{ - {&pps.DeblockingFilterControlPresent, "DeblockingFilterControlPresent"}, - {&pps.ConstrainedIntraPred, "ConstrainedIntraPred"}, - {&pps.RedundantPicCntPresent, "RedundantPicCntPresent"}, - }) - if err != nil { - return nil, err - } + pps.NumRefIdxL0DefaultActiveMinus1 = int(r.readUe()) + pps.NumRefIdxL1DefaultActiveMinus1 = int(r.readUe()) + pps.WeightedPred = r.readBits(1) == 1 + pps.WeightedBipred = int(r.readBits(2)) + pps.PicInitQpMinus26 = int(r.readSe()) + pps.PicInitQsMinus26 = int(r.readSe()) + pps.ChromaQpIndexOffset = int(r.readSe()) + pps.DeblockingFilterControlPresent = r.readBits(1) == 1 + pps.ConstrainedIntraPred = r.readBits(1) == 1 + pps.RedundantPicCntPresent = r.readBits(1) == 1 logger.Printf("debug: \tChecking for more PPS data") if moreRBSPData(br) { logger.Printf("debug: \tProcessing additional PPS data") - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read Transform8x8Mode") - } - pps.Transform8x8Mode = int(b) - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read PicScalingMatrixPresent") - } - pps.PicScalingMatrixPresent = b == 1 + pps.Transform8x8Mode = int(r.readBits(1)) + pps.PicScalingMatrixPresent = r.readBits(1) == 1 if pps.PicScalingMatrixPresent { v := 6 @@ -195,11 +96,7 @@ func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) { v = 2 } for i := 0; i < 6+(v*pps.Transform8x8Mode); i++ { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read PicScalingListPresent") - } - pps.PicScalingListPresent[i] = b == 1 + pps.PicScalingListPresent[i] = r.readBits(1) == 1 if pps.PicScalingListPresent[i] { if i < 6 { scalingList( @@ -219,11 +116,9 @@ func NewPPS(br *bits.BitReader, chromaFormat int) (*PPS, error) { } } } - pps.SecondChromaQpIndexOffset, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse SecondChromaQpIndexOffset") - } + pps.SecondChromaQpIndexOffset = r.readSe() + moreRBSPData(br) + // rbspTrailingBits() } - moreRBSPData(br) return &pps, nil } diff --git a/codec/h264/h264dec/read.go b/codec/h264/h264dec/read.go index a13c42bc..c98d46ca 100644 --- a/codec/h264/h264dec/read.go +++ b/codec/h264/h264dec/read.go @@ -71,7 +71,8 @@ func (h *H264Reader) Start() { case naluTypePPS: videoStream := h.VideoStreams[len(h.VideoStreams)-1] // TODO: handle this error - videoStream.PPS, _ = NewPPS(nil, videoStream.SPS.ChromaFormat) + // TODO: fix chromaFormat + videoStream.PPS, _ = NewPPS(nil, 0) case naluTypeSliceIDRPicture: fallthrough case naluTypeSliceNonIDRPicture: diff --git a/codec/h264/h264dec/slice.go b/codec/h264/h264dec/slice.go index 5f819601..a46360c9 100644 --- a/codec/h264/h264dec/slice.go +++ b/codec/h264/h264dec/slice.go @@ -81,12 +81,12 @@ func NewRefPicListModification(br *bits.BitReader, p *PPS, s *SliceHeader) (*Ref if r.RefPicListModificationFlag[0] { for i := 0; ; i++ { - r.ModificationOfPicNums[0][i] = fr.readUe() + r.ModificationOfPicNums[0][i] = int(fr.readUe()) if r.ModificationOfPicNums[0][i] == 0 || r.ModificationOfPicNums[0][i] == 1 { - r.AbsDiffPicNumMinus1[0][i] = fr.readUe() + r.AbsDiffPicNumMinus1[0][i] = int(fr.readUe()) } else if r.ModificationOfPicNums[0][i] == 2 { - r.LongTermPicNum[0][i] = fr.readUe() + r.LongTermPicNum[0][i] = int(fr.readUe()) } if r.ModificationOfPicNums[0][i] == 3 { @@ -101,12 +101,12 @@ func NewRefPicListModification(br *bits.BitReader, p *PPS, s *SliceHeader) (*Ref if r.RefPicListModificationFlag[1] { for i := 0; ; i++ { - r.ModificationOfPicNums[1][i] = fr.readUe() + r.ModificationOfPicNums[1][i] = int(fr.readUe()) if r.ModificationOfPicNums[1][i] == 0 || r.ModificationOfPicNums[1][i] == 1 { - r.AbsDiffPicNumMinus1[1][i] = fr.readUe() + r.AbsDiffPicNumMinus1[1][i] = int(fr.readUe()) } else if r.ModificationOfPicNums[1][i] == 2 { - r.LongTermPicNum[1][i] = fr.readUe() + r.LongTermPicNum[1][i] = int(fr.readUe()) } if r.ModificationOfPicNums[1][i] == 3 { @@ -142,25 +142,15 @@ type PredWeightTable struct { // PredWeightTable. func NewPredWeightTable(br *bits.BitReader, h *SliceHeader, chromaArrayType int) (*PredWeightTable, error) { p := &PredWeightTable{} - var err error + r := newFieldReader(br) - p.LumaLog2WeightDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LumaLog2WeightDenom") - } + p.LumaLog2WeightDenom = int(r.readUe()) if chromaArrayType != 0 { - p.ChromaLog2WeightDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaLog2WeightDenom") - } + p.ChromaLog2WeightDenom = int(r.readUe()) } for i := 0; i <= h.NumRefIdxL0ActiveMinus1; i++ { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LumaWeightL0Flag") - } - p.LumaWeightL0Flag = b == 1 + p.LumaWeightL0Flag = r.readBits(1) == 1 if p.LumaWeightL0Flag { se, err := readSe(br) @@ -255,10 +245,10 @@ func NewPredWeightTable(br *bits.BitReader, h *SliceHeader, chromaArrayType int) // DecRefPicMarking provides elements of a dec_ref_pic_marking syntax structure // as defined in section 7.3.3.3 of the specifications. type DecRefPicMarking struct { - NoOutputOfPriorPicsFlag bool - LongTermReferenceFlag bool - AdaptiveRefPicMarkingModeFlag bool - elements []drpmElement + NoOutputOfPriorPicsFlag bool + LongTermReferenceFlag bool + AdaptiveRefPicMarkingModeFlag bool + elements []drpmElement } type drpmElement struct { @@ -274,6 +264,7 @@ type drpmElement struct { // DecRefPicMarking. func NewDecRefPicMarking(br *bits.BitReader, idrPic bool) (*DecRefPicMarking, error) { d := &DecRefPicMarking{} + r := newFieldReader(br) if idrPic { b, err := br.ReadBits(1) if err != nil { @@ -295,36 +286,21 @@ func NewDecRefPicMarking(br *bits.BitReader, idrPic bool) (*DecRefPicMarking, er if d.AdaptiveRefPicMarkingModeFlag { for i := 0; ; i++ { - d.elements = append(d.elements,drpmElement{}) + d.elements = append(d.elements, drpmElement{}) - d.elements[i].MemoryManagementControlOperation, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation") - } + d.elements[i].MemoryManagementControlOperation = int(r.readUe()) if d.elements[i].MemoryManagementControlOperation == 1 || d.elements[i].MemoryManagementControlOperation == 3 { - d.elements[i].DifferenceOfPicNumsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation") - } + d.elements[i].DifferenceOfPicNumsMinus1 = int(r.readUe()) } if d.elements[i].MemoryManagementControlOperation == 2 { - d.elements[i].LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + d.elements[i].LongTermPicNum = int(r.readUe()) } if d.elements[i].MemoryManagementControlOperation == 3 || d.elements[i].MemoryManagementControlOperation == 6 { - d.elements[i].LongTermFrameIdx, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermFrameIdx") - } + d.elements[i].LongTermFrameIdx = int(r.readUe()) } if d.elements[i].MemoryManagementControlOperation == 4 { - d.elements[i].MaxLongTermFrameIdxPlus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxLongTermFrameIdxPlus1") - } + d.elements[i].MaxLongTermFrameIdxPlus1 = int(r.readUe()) } if d.elements[i].MemoryManagementControlOperation == 0 { @@ -425,13 +401,13 @@ func (d SliceData) ae(v int) int { // 8.2.2 func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { mbaffFrameFlag := 0 - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbaffFrameFlag = 1 } mapUnitToSliceGroupMap := MapUnitToSliceGroupMap(sps, pps, header) mbToSliceGroupMap := []int{} for i := 0; i <= PicSizeInMbs(sps, header)-1; i++ { - if sps.FrameMbsOnly || header.FieldPic { + if sps.FrameMBSOnlyFlag || header.FieldPic { mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i]) continue } @@ -439,7 +415,7 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i/2]) continue } - if !sps.FrameMbsOnly && !sps.MBAdaptiveFrameField && !header.FieldPic { + if !sps.FrameMBSOnlyFlag && !sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbToSliceGroupMap = append( mbToSliceGroupMap, mapUnitToSliceGroupMap[(i/(2*PicWidthInMbs(sps)))*PicWidthInMbs(sps)+(i%PicWidthInMbs(sps))]) @@ -449,34 +425,34 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { } func PicWidthInMbs(sps *SPS) int { - return sps.PicWidthInMbsMinus1 + 1 + return int(sps.PicWidthInMBSMinus1 + 1) } func PicHeightInMapUnits(sps *SPS) int { - return sps.PicHeightInMapUnitsMinus1 + 1 + return int(sps.PicHeightInMapUnitsMinus1 + 1) } func PicSizeInMapUnits(sps *SPS) int { - return PicWidthInMbs(sps) * PicHeightInMapUnits(sps) + return int(PicWidthInMbs(sps) * PicHeightInMapUnits(sps)) } func FrameHeightInMbs(sps *SPS) int { - return (2 - flagVal(sps.FrameMbsOnly)) * PicHeightInMapUnits(sps) + return int((2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps)) } func PicHeightInMbs(sps *SPS, header *SliceHeader) int { - return FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic)) + return int(FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic))) } func PicSizeInMbs(sps *SPS, header *SliceHeader) int { - return PicWidthInMbs(sps) * PicHeightInMbs(sps, header) + return int(PicWidthInMbs(sps) * PicHeightInMbs(sps, header)) } // table 6-1 func SubWidthC(sps *SPS) int { n := 17 - if sps.UseSeparateColorPlane { - if sps.ChromaFormat == chroma444 { + if sps.SeparateColorPlaneFlag { + if sps.ChromaFormatIDC == chroma444 { return n } } - switch sps.ChromaFormat { + switch sps.ChromaFormatIDC { case chromaMonochrome: return n case chroma420: @@ -491,12 +467,12 @@ func SubWidthC(sps *SPS) int { } func SubHeightC(sps *SPS) int { n := 17 - if sps.UseSeparateColorPlane { - if sps.ChromaFormat == chroma444 { + if sps.SeparateColorPlaneFlag { + if sps.ChromaFormatIDC == chroma444 { return n } } - switch sps.ChromaFormat { + switch sps.ChromaFormatIDC { case chromaMonochrome: return n case chroma420: @@ -543,6 +519,8 @@ func NumMbPart(nalUnit *NALUnit, sps *SPS, header *SliceHeader, data *SliceData) func MbPred(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { var cabac *CABAC + r := newFieldReader(br) + sliceType := sliceTypeMap[sliceContext.Slice.Header.SliceType] mbPartPredMode, err := MbPartPredMode(sliceContext.Slice.Data, sliceType, sliceContext.Slice.Data.MbType, 0) if err != nil { @@ -652,11 +630,7 @@ func MbPred(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader, logger.Printf("TODO: ae for IntraChromaPredMode\n") } else { - var err error - sliceContext.Slice.Data.IntraChromaPredMode, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse IntraChromaPredMode") - } + sliceContext.Slice.Data.IntraChromaPredMode = int(r.readUe()) } } @@ -685,14 +659,10 @@ func MbPred(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader, // TODO: Only one reference picture is used for inter-prediction, // then the value should be 0 if MbaffFrameFlag(sliceContext.SPS, sliceContext.Slice.Header) == 0 || !sliceContext.Slice.Data.MbFieldDecodingFlag { - sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe( - br, - uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1)) + sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1))) } else { rangeMax := 2*sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1 + 1 - sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe( - br, - uint(rangeMax)) + sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(rangeMax))) } } } @@ -858,19 +828,19 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { i := n + 1 // picSizeInMbs is the number of macroblocks in picture 0 // 7-13 - // PicWidthInMbs = sps.PicWidthInMbsMinus1 + 1 + // PicWidthInMbs = sps.PicWidthInMBSMinus1 + 1 // PicHeightInMapUnits = sps.PicHeightInMapUnitsMinus1 + 1 // 7-29 // picSizeInMbs = PicWidthInMbs * PicHeightInMbs // 7-26 // PicHeightInMbs = FrameHeightInMbs / (1 + header.fieldPicFlag) // 7-18 - // FrameHeightInMbs = (2 - ps.FrameMbsOnly) * PicHeightInMapUnits - picWidthInMbs := sps.PicWidthInMbsMinus1 + 1 + // FrameHeightInMbs = (2 - ps.FrameMBSOnlyFlag) * PicHeightInMapUnits + picWidthInMbs := sps.PicWidthInMBSMinus1 + 1 picHeightInMapUnits := sps.PicHeightInMapUnitsMinus1 + 1 - frameHeightInMbs := (2 - flagVal(sps.FrameMbsOnly)) * picHeightInMapUnits + frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * int(picHeightInMapUnits) picHeightInMbs := frameHeightInMbs / (1 + flagVal(header.FieldPic)) - picSizeInMbs := picWidthInMbs * picHeightInMbs + picSizeInMbs := int(picWidthInMbs) * picHeightInMbs mbToSliceGroupMap := MbToSliceGroupMap(sps, pps, header) for i < picSizeInMbs && mbToSliceGroupMap[i] != mbToSliceGroupMap[i] { i++ @@ -880,7 +850,7 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { func CurrMbAddr(sps *SPS, header *SliceHeader) int { mbaffFrameFlag := 0 - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbaffFrameFlag = 1 } @@ -888,15 +858,15 @@ func CurrMbAddr(sps *SPS, header *SliceHeader) int { } func MbaffFrameFlag(sps *SPS, header *SliceHeader) int { - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { return 1 } return 0 } func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitReader) (*SliceData, error) { + r := newFieldReader(br) var cabac *CABAC - var err error sliceContext.Slice.Data = &SliceData{BitReader: br} // TODO: Why is this being initialized here? // initCabac(sliceContext) @@ -910,7 +880,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR } } mbaffFrameFlag := 0 - if sliceContext.SPS.MBAdaptiveFrameField && !sliceContext.Slice.Header.FieldPic { + if sliceContext.SPS.MBAdaptiveFrameFieldFlag && !sliceContext.Slice.Header.FieldPic { mbaffFrameFlag = 1 } currMbAddr := sliceContext.Slice.Header.FirstMbInSlice * (1 * mbaffFrameFlag) @@ -925,10 +895,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR if sliceContext.Slice.Data.SliceTypeName != "I" && sliceContext.Slice.Data.SliceTypeName != "SI" { logger.Printf("debug: \tNonI/SI slice, processing moreData\n") if sliceContext.PPS.EntropyCodingMode == 0 { - sliceContext.Slice.Data.MbSkipRun, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MbSkipRun") - } + sliceContext.Slice.Data.MbSkipRun = int(r.readUe()) if sliceContext.Slice.Data.MbSkipRun > 0 { prevMbSkipped = 1 @@ -1042,10 +1009,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR logger.Printf("TODO: ae for MBType\n") } else { - sliceContext.Slice.Data.MbType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MbType") - } + sliceContext.Slice.Data.MbType = int(r.readUe()) } if sliceContext.Slice.Data.MbTypeName == "I_PCM" { for !br.ByteAligned() { @@ -1057,7 +1021,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR // 7-3 p95 bitDepthY := 8 + sliceContext.SPS.BitDepthLumaMinus8 for i := 0; i < 256; i++ { - s, err := br.ReadBits(bitDepthY) + s, err := br.ReadBits(int(bitDepthY)) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleLuma[%d]", i)) } @@ -1071,14 +1035,14 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR mbWidthC := 16 / SubWidthC(sliceContext.SPS) mbHeightC := 16 / SubHeightC(sliceContext.SPS) // if monochrome - if sliceContext.SPS.ChromaFormat == chromaMonochrome || sliceContext.SPS.UseSeparateColorPlane { + if sliceContext.SPS.ChromaFormatIDC == chromaMonochrome || sliceContext.SPS.SeparateColorPlaneFlag { mbWidthC = 0 mbHeightC = 0 } bitDepthC := 8 + sliceContext.SPS.BitDepthChromaMinus8 for i := 0; i < 2*mbWidthC*mbHeightC; i++ { - s, err := br.ReadBits(bitDepthC) + s, err := br.ReadBits(int(bitDepthC)) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleChroma[%d]", i)) } @@ -1104,7 +1068,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR if NumbSubMbPart(subMbType[mbPartIdx]) > 1 { noSubMbPartSizeLessThan8x8Flag = 0 } - } else if !sliceContext.SPS.Direct8x8Inference { + } else if !sliceContext.SPS.Direct8x8InferenceFlag { noSubMbPartSizeLessThan8x8Flag = 0 } } @@ -1156,7 +1120,7 @@ func NewSliceData(chromaArrayType int, sliceContext *SliceContext, br *bits.BitR } // sliceContext.Slice.Data.CodedBlockPattern = me(v) | ae(v) - if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8Inference) { + if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8InferenceFlag) { // TODO: 1 bit or ae(v) if sliceContext.PPS.EntropyCodingMode == 1 { binarization := NewBinarization("Transform8x8Flag", sliceContext.Slice.Data) @@ -1236,31 +1200,21 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket idrPic = true } header := SliceHeader{} - if sps.UseSeparateColorPlane { + if sps.SeparateColorPlaneFlag { vid.ChromaArrayType = 0 } else { - vid.ChromaArrayType = sps.ChromaFormat + vid.ChromaArrayType = int(sps.ChromaFormatIDC) } br := bits.NewBitReader(bytes.NewReader(rbsp)) + r := newFieldReader(br) - header.FirstMbInSlice, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FirstMbInSlice") - } - - header.SliceType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceType") - } + header.FirstMbInSlice = int(r.readUe()) + header.SliceType = int(r.readUe()) sliceType := sliceTypeMap[header.SliceType] logger.Printf("debug: %s (%s) slice of %d bytes\n", NALUnitType[int(nalUnit.Type)], sliceType, len(rbsp)) - header.PPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PPSID") - } - - if sps.UseSeparateColorPlane { + header.PPSID = int(r.readUe()) + if sps.SeparateColorPlaneFlag { b, err := br.ReadBits(2) if err != nil { return nil, errors.Wrap(err, "could not read ColorPlaneID") @@ -1269,7 +1223,7 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket } // TODO: See 7.4.3 // header.FrameNum = b.NextField("FrameNum", 0) - if !sps.FrameMbsOnly { + if !sps.FrameMBSOnlyFlag { b, err := br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read FieldPic") @@ -1284,13 +1238,10 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket } } if idrPic { - header.IDRPicID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse IDRPicID") - } + header.IDRPicID = int(r.readUe()) } if sps.PicOrderCountType == 0 { - b, err := br.ReadBits(sps.Log2MaxPicOrderCntLSBMin4 + 4) + b, err := br.ReadBits(int(sps.Log2MaxPicOrderCntLSBMin4 + 4)) if err != nil { return nil, errors.Wrap(err, "could not read PicOrderCntLsb") } @@ -1303,7 +1254,7 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket } } } - if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZero { + if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZeroFlag { header.DeltaPicOrderCnt[0], err = readSe(br) if err != nil { return nil, errors.Wrap(err, "could not parse DeltaPicOrderCnt") @@ -1317,10 +1268,7 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket } } if pps.RedundantPicCntPresent { - header.RedundantPicCnt, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse RedundantPicCnt") - } + header.RedundantPicCnt = int(r.readUe()) } if sliceType == "B" { b, err := br.ReadBits(1) @@ -1337,15 +1285,9 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket header.NumRefIdxActiveOverride = b == 1 if header.NumRefIdxActiveOverride { - header.NumRefIdxL0ActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefIdxL0ActiveMinus1") - } + header.NumRefIdxL0ActiveMinus1 = int(r.readUe()) if sliceType == "B" { - header.NumRefIdxL1ActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefIdxL1ActiveMinus1") - } + header.NumRefIdxL1ActiveMinus1 = int(r.readUe()) } } } @@ -1375,35 +1317,18 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket } } if pps.EntropyCodingMode == 1 && sliceType != "I" && sliceType != "SI" { - header.CabacInit, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CabacInit") - } - } - header.SliceQpDelta, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceQpDelta") + header.CabacInit = int(r.readUe()) } + header.SliceQpDelta = int(r.readSe()) if sliceType == "SP" || sliceType == "SI" { if sliceType == "SP" { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SpForSwitch") - } - header.SpForSwitch = b == 1 - } - header.SliceQsDelta, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceQsDelta") + header.SpForSwitch = r.readBits(1) == 1 } + header.SliceQsDelta = int(r.readSe()) } if pps.DeblockingFilterControlPresent { - header.DisableDeblockingFilter, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse DisableDeblockingFilter") - } - + header.DisableDeblockingFilter = int(r.readUe()) if header.DisableDeblockingFilter != 1 { header.SliceAlphaC0OffsetDiv2, err = readSe(br) if err != nil { @@ -1432,13 +1357,10 @@ func NewSliceContext(vid *VideoStream, nalUnit *NALUnit, rbsp []byte, showPacket Header: &header, }, } - sliceContext.Slice.Data, err = NewSliceData(vid.ChromaArrayType,sliceContext, br) + sliceContext.Slice.Data, err = NewSliceData(vid.ChromaArrayType, sliceContext, br) if err != nil { return nil, errors.Wrap(err, "could not create slice data") } - if showPacket { - debugPacket("debug: Header", sliceContext.Slice.Header) - debugPacket("debug: Data", sliceContext.Slice.Data) - } + return sliceContext, nil } diff --git a/codec/h264/h264dec/slice_test.go b/codec/h264/h264dec/slice_test.go index f0914a4c..55e249ea 100644 --- a/codec/h264/h264dec/slice_test.go +++ b/codec/h264/h264dec/slice_test.go @@ -22,12 +22,12 @@ var subWidthCTests = []struct { want int }{ {SPS{}, 17}, - {SPS{ChromaFormat: 0}, 17}, - {SPS{ChromaFormat: 1}, 2}, - {SPS{ChromaFormat: 2}, 2}, - {SPS{ChromaFormat: 3}, 1}, - {SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17}, - {SPS{ChromaFormat: 999}, 17}, + {SPS{ChromaFormatIDC: 0}, 17}, + {SPS{ChromaFormatIDC: 1}, 2}, + {SPS{ChromaFormatIDC: 2}, 2}, + {SPS{ChromaFormatIDC: 3}, 1}, + {SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17}, + {SPS{ChromaFormatIDC: 999}, 17}, } // TestSubWidthC tests that the correct SubWidthC is returned given @@ -45,12 +45,12 @@ var subHeightCTests = []struct { want int }{ {SPS{}, 17}, - {SPS{ChromaFormat: 0}, 17}, - {SPS{ChromaFormat: 1}, 2}, - {SPS{ChromaFormat: 2}, 1}, - {SPS{ChromaFormat: 3}, 1}, - {SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17}, - {SPS{ChromaFormat: 999}, 17}, + {SPS{ChromaFormatIDC: 0}, 17}, + {SPS{ChromaFormatIDC: 1}, 2}, + {SPS{ChromaFormatIDC: 2}, 1}, + {SPS{ChromaFormatIDC: 3}, 1}, + {SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17}, + {SPS{ChromaFormatIDC: 999}, 17}, } // TestSubHeightC tests that the correct SubHeightC is returned given diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index c4aabd22..4da3fc69 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -3,111 +3,11 @@ package h264dec import ( "bytes" "fmt" - "strings" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" "github.com/pkg/errors" ) -// Specification Page 43 7.3.2.1.1 -// Range is always inclusive -// XRange is always exclusive -type SPS struct { - // 8 bits - Profile int - // 6 bits - Constraint0, Constraint1 int - Constraint2, Constraint3 int - Constraint4, Constraint5 int - // 2 bit reserved 0 bits - // 8 bits - Level int - // Range 0 - 31 ; 6 bits - ID int - ChromaFormat int - UseSeparateColorPlane bool - BitDepthLumaMinus8 int - BitDepthChromaMinus8 int - QPrimeYZeroTransformBypass bool - SeqScalingMatrixPresent bool - // Delta is (0-12)-1 ; 4 bits - SeqScalingList []bool // se - // Range 0 - 12; 4 bits - Log2MaxFrameNumMinus4 int - // Range 0 - 2; 2 bits - PicOrderCountType int - // Range 0 - 12; 4 bits - Log2MaxPicOrderCntLSBMin4 int - DeltaPicOrderAlwaysZero bool - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForNonRefPic int // Value - 1 (se) - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForTopToBottomField int // Value - 1 (se) - // Range 0 - 255 ; 8 bits - NumRefFramesInPicOrderCntCycle int - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForRefFrameList []int // Value - 1 ([]se) - // Range 0 - MaxDpbFrames - MaxNumRefFrames int - GapsInFrameNumValueAllowed bool - // Page 77 - PicWidthInMbsMinus1 int - // Page 77 - PicHeightInMapUnitsMinus1 int - FrameMbsOnly bool - MBAdaptiveFrameField bool - Direct8x8Inference bool - FrameCropping bool - FrameCropLeftOffset int - FrameCropRightOffset int - FrameCropTopOffset int - FrameCropBottomOffset int - VuiParametersPresent bool - VuiParameters []int - AspectRatioInfoPresent bool - AspectRatio int - SarWidth int - SarHeight int - OverscanInfoPresent bool - OverscanAppropriate bool - VideoSignalTypePresent bool - VideoFormat int - VideoFullRange bool - ColorDescriptionPresent bool - ColorPrimaries int - TransferCharacteristics int - MatrixCoefficients int - ChromaLocInfoPresent bool - ChromaSampleLocTypeTopField int - ChromaSampleLocTypeBottomField int - CpbCntMinus1 int - BitRateScale int - CpbSizeScale int - BitRateValueMinus1 []int - Cbr []bool - InitialCpbRemovalDelayLengthMinus1 int - CpbRemovalDelayLengthMinus1 int - CpbSizeValueMinus1 []int - DpbOutputDelayLengthMinus1 int - TimeOffsetLength int - TimingInfoPresent bool - NumUnitsInTick int - TimeScale int - NalHrdParametersPresent bool - FixedFrameRate bool - VclHrdParametersPresent bool - LowHrdDelay bool - PicStructPresent bool - BitstreamRestriction bool - MotionVectorsOverPicBoundaries bool - MaxBytesPerPicDenom int - MaxBitsPerMbDenom int - Log2MaxMvLengthHorizontal int - Log2MaxMvLengthVertical int - MaxDecFrameBuffering int - MaxNumReorderFrames int -} - var ( DefaultScalingMatrix4x4 = [][]int{ {6, 13, 20, 28, 13, 20, 28, 32, 20, 28, 32, 37, 28, 32, 37, 42}, @@ -161,6 +61,608 @@ var ( ScalingList8x8 = ScalingList4x4 ) +// SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in +// the Specifications. +// For semantics see section 7.4.2.1. Comments for fields are excerpts from +// section 7.4.2.1. +type SPS struct { + // pofile_idx and level_idc indicate the profile and level to which the + // coded video sequence conforms. + Profile, LevelIDC uint8 + + // The constraint_setx_flag flags specify the constraints defined in A.2 for + // which this stream conforms. + Constraint0 bool + Constraint1 bool + Constraint2 bool + Constraint3 bool + Constraint4 bool + Constraint5 bool + + // seq_parameter_set_id identifies this sequence parameter set, and can then + // be reference by the picture parameter set. The seq_parameter_set_id is + // in the range of 0 to 30 inclusive. + SPSID uint64 + + // chroma_format_idc specifies the chroma sampling relative to the luma + // sampling as specified in caluse 6.2. Range of chroma_format_idc is in + // from 0 to 3 inclusive. + ChromaFormatIDC uint64 + + // separate_color_plane_flag if true specifies that the three components of + // the 4:4:4 chroma formta are coded separately. + SeparateColorPlaneFlag bool + + // bit_depth_luma_minus8 specifies the luma array sample bit depth and the + // luma quantisation parameter range offset QpBdOffset_y (eq 7-3 and 7-4). + BitDepthLumaMinus8 uint64 + + // bit_depth_luma_minus8 specifies the chroma array sample bit depth and the + // chroma quantisation parameter range offset QpBdOffset_c (eq 7-3 and 7-4). + BitDepthChromaMinus8 uint64 + + // qpprime_y_zero_transform_bypass_flag equal to 1 specifies that, when QP′ Y + // is equal to 0, a transform bypass operation for the transform coefficient + // decoding process and picture construction process prior to deblocking + // filter process as specified in clause 8.5 shall be applied. + QPPrimeYZeroTransformBypassFlag bool + + // seq_scaling_matrix_present_flag equal to 1 specifies that + // seq_scaling_list_present_flag[ i ] are present. When 0 they are not present + // and the sequence-level scaling lists specified by Flat_4x4_16 and + // Flat_8x8_16 shall be inferred. + SeqScalingMatrixPresentFlag bool + + // seq_scaling_lit_present_flag[i] specifics whether the syntax structure for + // scaling list i is present. If 1 then present, otherwise not, and scaling + // list for i is inferred as per rule set A in table 7-2. + SeqScalingListPresentFlag []bool + + // The 4x4 sequence scaling lists for each i. + ScalingList4x4 [][]uint64 + + // Flag to indicate for a 4x4 scaling list, if we use the default. + UseDefaultScalingMatrix4x4Flag []bool + + // The 8x8 sequence scaling lists for each i. + ScalingList8x8 [][]uint64 + + // Flag to indicate for a 8x8 scaling list, if we use the default. + UseDefaultScalingMatrix8x8Flag []bool + + // log2_max_frame_num_minus4 allows for derivation of MaxFrameNum using eq 7-10. + Log2MaxFrameNumMinus4 uint64 + + // pic_order_cnt_type specifiess the method to decode picture order count. + PicOrderCountType uint64 + + // log2_max_pic_order_cnt_lsb_minus4 allows for the dreivation of + // MaxPicOrderCntLsb using eq 7-11. + Log2MaxPicOrderCntLSBMin4 uint64 + + // delta_pic_order_always_zero_flag if true indicates delta_pic_order_cnt[0] + // and delta_pic_order_cnt[1]. + DeltaPicOrderAlwaysZeroFlag bool + + // offset_for_non_ref_pic is used to calculate the picture order count of a + // non-reference picture as specified in clause 8.2.1. + OffsetForNonRefPic int64 + + // offset_for_top_to_bottom_field is used to calculate the picture order count + // of a bottom field as specified in clause 8.2.1. + OffsetForTopToBottomField int64 + + // num_ref_frames_in_pic_order_cnt_cycle is used in the decoding process for + // picture order count as specified in clause 8.2.1. + NumRefFramesInPicOrderCntCycle uint64 + + // offset_for_ref_frame[ i ] is an element of a list of + // num_ref_frames_in_pic_order_cnt_cycle values used in the decoding process + // for picture order count as specified in clause 8.2.1. + OffsetForRefFrameList []int + + // max_num_ref_frames specifies the max number of short-term and long-term + // reference frames, complementary reference field pairs, and non-paired + // reference fields that may be used by the decoding process for inter prediction. + MaxNumRefFrames uint64 + + // gaps_in_frame_num_value_allowed_flag specifies the allowed values of + // frame_num as specified in clause 7.4.3 and the decoding process in case of + // an inferred gap between values of frame_num as specified in clause 8.2.5.2. + GapsInFrameNumValueAllowed bool + + // pic_width_in_mbs_minus1 plus 1 specifies the width of each decode picutre + // in units of macroblocks. See eq 7-13. + PicWidthInMBSMinus1 uint64 + + // pic_height_in_map_units_minus1 plus 1 specifies the height in slice group + // map units of a decoded frame or field. See eq 7-16. + PicHeightInMapUnitsMinus1 uint64 + + // frame_mbs_only_flag if 0 coded pictures of the coded video sequence may be + // coded fields or coded frames. If 1 every coded picture of the coded video + // sequence is a coded frame containing only frame macroblocks. + FrameMBSOnlyFlag bool + + // mb_adaptive_frame_field_flag if 0 specifies no switching between + // frame and field macroblocks within a picture. If 1 specifies the possible + // use of switching between frame and field macroblocks within frames. + MBAdaptiveFrameFieldFlag bool + + // direct_8x8_inference_flag specifies the method used in the derivation + // process for luma motion vectors for B_Skip, B_Direct_16x16 and B_Direct_8x8 + // as specified in clause 8.4.1.2. + Direct8x8InferenceFlag bool + + // frame_cropping_flag if 1 then frame cropping offset parameters are next in + // the sequence parameter set. If 0 they are not. + FrameCroppingFlag bool + + // frame_crop_left_offset, frame_crop_right_offset, frame_crop_top_offset, + // frame_crop_bottom_offset specify the samples of the pictures in the coded + // video sequence that are output from the decoding process, in terms of a + // rectangular region specified in frame coordinates for output. + FrameCropLeftOffset uint64 + FrameCropRightOffset uint64 + FrameCropTopOffset uint64 + FrameCropBottomOffset uint64 + + // vui_parameters_present_flag if 1 the vui_parameters() syntax structure is + // present, otherwise it is not. + VUIParametersPresentFlag bool + + // The vui_parameters() syntax structure specified in appendix E. + VUIParameters *VUIParameters +} + +// NewSPS parses a sequence parameter set raw byte sequence from br following +// the syntax structure specified in section 7.3.2.1.1, and returns as a new +// SPS. +func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { + logger.Printf("debug: SPS RBSP %d bytes %d bits\n", len(rbsp), len(rbsp)*8) + logger.Printf("debug: \t%#v\n", rbsp[0:8]) + sps := SPS{} + br := bits.NewBitReader(bytes.NewReader(rbsp)) + r := newFieldReader(br) + + sps.Profile = uint8(r.readBits(8)) + sps.Constraint0 = r.readBits(1) == 1 + sps.Constraint1 = r.readBits(1) == 1 + sps.Constraint2 = r.readBits(1) == 1 + sps.Constraint3 = r.readBits(1) == 1 + sps.Constraint4 = r.readBits(1) == 1 + sps.Constraint5 = r.readBits(1) == 1 + r.readBits(2) // 2 reserved bits. + sps.LevelIDC = uint8(r.readBits(8)) + sps.SPSID = r.readUe() + sps.ChromaFormatIDC = r.readUe() + + // This should be done only for certain ProfileIDC: + isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135} + // SpecialProfileCase1 + if isInList(isProfileIDC, int(sps.Profile)) { + if sps.ChromaFormatIDC == chroma444 { + // TODO: should probably deal with error here. + sps.SeparateColorPlaneFlag = r.readBits(1) == 1 + } + + sps.BitDepthLumaMinus8 = r.readUe() + sps.BitDepthChromaMinus8 = r.readUe() + sps.QPPrimeYZeroTransformBypassFlag = r.readBits(1) == 1 + sps.SeqScalingMatrixPresentFlag = r.readBits(1) == 1 + + if sps.SeqScalingMatrixPresentFlag { + max := 12 + if sps.ChromaFormatIDC != chroma444 { + max = 8 + } + logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max) + for i := 0; i < max; i++ { + sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, r.readBits(1) == 1) + + if sps.SeqScalingListPresentFlag[i] { + if i < 6 { + scalingList( + br, + ScalingList4x4[i], + 16, + DefaultScalingMatrix4x4[i]) + // 4x4: Page 75 bottom + } else { + // 8x8 Page 76 top + scalingList( + br, + ScalingList8x8[i], + 64, + DefaultScalingMatrix8x8[i-6]) + } + } + } + } + } // End SpecialProfileCase1 + + // showSPS() + // return sps + // Possibly wrong due to no scaling list being built + sps.Log2MaxFrameNumMinus4 = r.readUe() + sps.PicOrderCountType = r.readUe() + + if sps.PicOrderCountType == 0 { + sps.Log2MaxPicOrderCntLSBMin4 = r.readUe() + } else if sps.PicOrderCountType == 1 { + sps.DeltaPicOrderAlwaysZeroFlag = r.readBits(1) == 1 + sps.OffsetForNonRefPic = int64(r.readSe()) + sps.OffsetForTopToBottomField = int64(r.readSe()) + sps.NumRefFramesInPicOrderCntCycle = r.readUe() + + for i := 0; i < int(sps.NumRefFramesInPicOrderCntCycle); i++ { + sps.OffsetForRefFrameList = append(sps.OffsetForRefFrameList, r.readSe()) + } + + } + + sps.MaxNumRefFrames = r.readUe() + sps.GapsInFrameNumValueAllowed = r.readBits(1) == 1 + sps.PicWidthInMBSMinus1 = r.readUe() + sps.PicHeightInMapUnitsMinus1 = r.readUe() + sps.FrameMBSOnlyFlag = r.readBits(1) == 1 + + if !sps.FrameMBSOnlyFlag { + sps.MBAdaptiveFrameFieldFlag = r.readBits(1) == 1 + } + + sps.Direct8x8InferenceFlag = r.readBits(1) == 1 + sps.FrameCroppingFlag = r.readBits(1) == 1 + + if sps.FrameCroppingFlag { + sps.FrameCropLeftOffset = r.readUe() + sps.FrameCropRightOffset = r.readUe() + sps.FrameCropTopOffset = r.readUe() + sps.FrameCropBottomOffset = r.readUe() + } + + sps.VUIParametersPresentFlag = r.readBits(1) == 1 + + if sps.VUIParametersPresentFlag { + + } // End VuiParameters Annex E.1.1 + + return &sps, nil +} + +// SPS describes a sequence parameter set as defined by section E.1.1 in the +// Specifications. +// Semantics for fields are define in section E.2.1. Comments on fields are +// excerpts from the this section. +type VUIParameters struct { + // aspect_ratio_info_present_flag if 1 then aspect_ratio_idc is present, + // otherwsise is not. + AspectRatioInfoPresentFlag bool + + // aspect_ratio_idc specifies the value of sample aspect ratio of the luma samples. + AspectRatioIDC uint8 + + // sar_width indicates the horizontal size of the sample aspect ratio (in + // arbitrary units). + SARWidth uint32 + + // sar_height indicates the vertical size of the sample aspect ratio (in the + // same arbitrary units as sar_width). + SARHeight uint32 + + // overscan_info_present_flag if 1 then overscan_appropriate_flag is present, + // otherwise if 0, then the display method for the video signal is unspecified. + OverscanInfoPresentFlag bool + + // overscan_appropriate_flag if 1 then the cropped decoded pictures output + // are suitable for display using overscan, othersise if 0, then the cropped + // decoded pictures output should not be displayed using overscan. + OverscanAppropriateFlag bool + + // video_signal_type_present_flag equal to 1 specifies that video_format, + // video_full_range_flag and colour_description_present_flag are present, + // otherwise if 0, then they are not present. + VideoSignalTypePresentFlag bool + + // video_format indicates the representation of the pictures as specified in + // Table E-2, before being coded in accordance with this Recommendation | + // International Standard. + VideoFormat uint8 + + // video_full_range_flag indicates the black level and range of the luma and + // chroma signals as derived from E′_Y, E′_PB, and E′_PR or E′_R, E′_G, + // and E′_B real-valued component signals. + VideoFullRangeFlag bool + + // colour_description_present_flag if 1 specifies that colour_primaries, + // transfer_characteristics and matrix_coefficients are present, otherwise if + // 0 then they are not present. + ColorDescriptionPresentFlag bool + + // colour_primaries indicates the chromaticity coordinates of the source + // primaries as specified in Table E-3 in terms of the CIE 1931 definition of + // x and y as specified by ISO 11664-1. + ColorPrimaries uint8 + + // transfer_characteristics either indicates the reference opto-electronic + // transfer characteristic function of the source picture, or indicates the + // inverse of the reference electro-optical transfer characteristic function. + TransferCharacteristics uint8 + + // matrix_coefficients describes the matrix coefficients used in deriving luma + // and chroma signals from the green, blue, and red, or Y, Z, and X primaries, + // as specified in Table E-5. + MatrixCoefficients uint8 + + // chroma_loc_info_present_flag if 1 specifies that chroma_sample_loc_type_top_field + // and chroma_sample_loc_type_bottom_field are present, otherwise if 0, + // they are not present. + ChromaLocInfoPresentFlag bool + + // chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field + // specify the location of chroma samples. + ChromaSampleLocTypeTopField, ChromaSampleLocTypeBottomField uint64 + + // timing_info_present_flag if 1 specifies that num_units_in_tick, time_scale + // and fixed_frame_rate_flag are present in the bitstream, otherwise if 0, + // they are not present. + TimingInfoPresentFlag bool + + // num_units_in_tick is the number of time units of a clock operating at the + // frequency time_scale Hz that corresponds to one increment (called a clock + // tick) of a clock tick counter. + NumUnitsInTick uint32 + + // time_scale is the number of time units that pass in one second. + TimeScale uint32 + + // fixed_frame_rate_flag if 1 indicates that the temporal distance + // between the HRD output times of any two consecutive pictures in output + // order is constrained as follows. fixed_frame_rate_flag equal to 0 indicates + // that no such constraints apply to the temporal distance between the HRD + // output times of any two consecutive pictures in output order. + FixedFrameRateFlag bool + + // nal_hrd_parameters_present_flag if 1 then NAL HRD parameters (pertaining to + // Type II bitstream conformance) are present, otherwise if 0, then they + // are not present. + NALHRDParametersPresentFlag bool + + // The nal_hrd_parameters() syntax structure as specified in section E.1.2. + NALHRDParameters *HRDParameters + + // vcl_hrd_parameters_present_flag if 1 specifies that VCL HRD parameters + // (pertaining to all bitstream conformance) are present, otherwise if 0, then + // they are not present. + VCLHRDParametersPresentFlag bool + + // The vcl_nal_hrd_parameters() syntax structure as specified in section E.1.2. + VCLHRDParameters *HRDParameters + + // low_delay_hrd_flag specifies the HRD operational mode as specified in Annex C. + LowDelayHRDFlag bool + + // pic_struct_present_flag if 1 then picture timing SEI messages (clause D.2.3) + // are present that include the pic_struct syntax element, otherwise if 0, then + // not present. + PicStructPresentFlag bool + + // bitstream_restriction_flag if 1, then the following coded video sequence + // bitstream restriction parameters are present, otherwise if 0, then they are + // not present. + BitstreamRestrictionFlag bool + + // motion_vectors_over_pic_boundaries_flag if 0 then no sample outside the + // picture boundaries and no sample at a fractional sample position for which + // the sample value is derived using one or more samples outside the picture + // boundaries is used for inter prediction of any sample, otherwise if 1, + // indicates that one or more samples outside picture boundaries may be used + // in inter prediction. + MotionVectorsOverPicBoundariesFlag bool + + // max_bytes_per_pic_denom indicates a number of bytes not exceeded by the sum + // of the sizes of the VCL NAL units associated with any coded picture in the + // coded video sequence. + MaxBytesPerPicDenom uint64 + + // max_bits_per_mb_denom indicates an upper bound for the number of coded bits + // of macroblock_layer() data for any macroblock in any picture of the coded + // video sequence. + MaxBitsPerMBDenom uint64 + + // log2_max_mv_length_horizontal and log2_max_mv_length_vertical indicate the + // maximum absolute value of a decoded horizontal and vertical motion vector + // component, respectively, in 1⁄4 luma sample units, for all pictures in the + // coded video sequence. + Log2MaxMVLengthHorizontal, Log2MaxMVLengthVertical uint64 + + // max_num_reorder_frames indicates an upper bound for the number of frames + // buffers, in the decoded picture buffer (DPB), that are required for storing + // frames, complementary field pairs, and non-paired fields before output. + MaxNumReorderFrames uint64 + + // max_dec_frame_buffering specifies the required size of the HRD decoded + // picture buffer (DPB) in units of frame buffers. + MaxDecFrameBuffering uint64 +} + +// NewVUIParameters parses video usability information parameters from br +// following the syntax structure specified in section E.1.1, and returns as a +// new VUIParameters. +func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { + p := &VUIParameters{} + r := newFieldReader(br) + + p.AspectRatioInfoPresentFlag = r.readBits(1) == 1 + + if p.AspectRatioInfoPresentFlag { + p.AspectRatioIDC = uint8(r.readBits(8)) + + EXTENDED_SAR := 999 + if int(p.AspectRatioIDC) == EXTENDED_SAR { + p.SARWidth = uint32(r.readBits(16)) + p.SARHeight = uint32(r.readBits(16)) + } + } + + p.OverscanInfoPresentFlag = r.readBits(1) == 1 + + if p.OverscanInfoPresentFlag { + p.OverscanAppropriateFlag = r.readBits(1) == 1 + } + + p.VideoSignalTypePresentFlag = r.readBits(1) == 1 + + if p.VideoSignalTypePresentFlag { + p.VideoFormat = uint8(r.readBits(3)) + } + + if p.VideoSignalTypePresentFlag { + p.VideoFullRangeFlag = r.readBits(1) == 1 + p.ColorDescriptionPresentFlag = r.readBits(1) == 1 + + if p.ColorDescriptionPresentFlag { + p.ColorPrimaries = uint8(r.readBits(8)) + p.TransferCharacteristics = uint8(r.readBits(8)) + p.MatrixCoefficients = uint8(r.readBits(8)) + } + } + p.ChromaLocInfoPresentFlag = r.readBits(1) == 1 + + if p.ChromaLocInfoPresentFlag { + p.ChromaSampleLocTypeTopField = uint64(r.readUe()) + p.ChromaSampleLocTypeBottomField = uint64(r.readUe()) + } + + p.TimingInfoPresentFlag = r.readBits(1) == 1 + + if p.TimingInfoPresentFlag { + p.NumUnitsInTick = uint32(r.readBits(32)) + p.TimeScale = uint32(r.readBits(32)) + p.FixedFrameRateFlag = r.readBits(1) == 1 + } + + p.NALHRDParametersPresentFlag = r.readBits(1) == 1 + + var err error + if p.NALHRDParametersPresentFlag { + p.NALHRDParameters, err = NewHRDParameters(br) + if err != nil { + return nil, errors.Wrap(err, "could not get hrdParameters") + } + } + + p.VCLHRDParametersPresentFlag = r.readBits(1) == 1 + + if p.VCLHRDParametersPresentFlag { + p.VCLHRDParameters, err = NewHRDParameters(br) + if err != nil { + return nil, errors.Wrap(err, "could not get hrdParameters") + } + } + if p.NALHRDParametersPresentFlag || p.VCLHRDParametersPresentFlag { + p.LowDelayHRDFlag = r.readBits(1) == 1 + } + + p.PicStructPresentFlag = r.readBits(1) == 1 + p.BitstreamRestrictionFlag = r.readBits(1) == 1 + + if p.BitstreamRestrictionFlag { + p.MotionVectorsOverPicBoundariesFlag = r.readBits(1) == 1 + p.MaxBytesPerPicDenom = r.readUe() + p.MaxBitsPerMBDenom = r.readUe() + p.Log2MaxMVLengthHorizontal = r.readUe() + p.Log2MaxMVLengthVertical = r.readUe() + p.MaxNumReorderFrames = r.readUe() + p.MaxDecFrameBuffering = r.readUe() + } + return p, nil +} + +// HRDParameters describes hypothetical reference decoder parameters as defined +// by section E.1.2 in the specifications. +// Field semantics are defined in section E.2.2. Comments on fields are excerpts +// from section E.2.2. +type HRDParameters struct { + // cpb_cnt_minus1 plus 1 specifies the number of alternative CPB specifications + // in the bitstream. + CPBCntMinus1 uint64 + + // bit_rate_scale (together with bit_rate_value_minus1[ SchedSelIdx ]) + // specifies the maximum input bit rate of the SchedSelIdx-th CPB. + BitRateScale uint8 + + // cpb_size_scale (together with cpb_size_value_minus1[ SchedSelIdx ]) + // specifies the CPB size of the SchedSelIdx-th CPB. + CPBSizeScale uint8 + + // bit_rate_value_minus1[ SchedSelIdx ] (together with bit_rate_scale) + //specifies the maximum input bit rate for the SchedSelIdx-th CPB. + BitRateValueMinus1 []uint64 + + // cpb_size_value_minus1[ SchedSelIdx ] is used together with cpb_size_scale + // to specify the SchedSelIdx-th CPB size. + CPBSizeValueMinus1 []uint64 + + // cbr_flag[ SchedSelIdx ] equal to 0 specifies that to decode this bitstream + // by the HRD using the SchedSelIdx-th CPB specification, the hypothetical + // stream delivery scheduler (HSS) operates in an intermittent bit rate mode, + // otherwise if 1 specifies that the HSS operates in a constant bit rate mode. + CBRFlag []bool + + // initial_cpb_removal_delay_length_minus1 specifies the length in bits of the + // initial_cpb_removal_delay[ SchedSelIdx ] and + // initial_cpb_removal_delay_offset[ SchedSelIdx ] syntax elements of the + // buffering period SEI message. + InitialCPBRemovalDelayLenMinus1 uint8 + + // cpb_removal_delay_length_minus1 specifies the length in bits of the + // cpb_removal_delay syntax element. + CPBRemovalDelayLenMinus1 uint8 + + // dpb_output_delay_length_minus1 specifies the length in bits of the + // dpb_output_delay syntax element. + DPBOutputDelayLenMinus1 uint8 + + // time_offset_length greater than 0 specifies the length in bits of the + // time_offset syntax element. + TimeOffsetLen uint8 +} + +// NewHRDParameters parses hypothetical reference decoder parameter from br +// following the syntax structure specified in section E.1.2, and returns as a +// new HRDParameters. +func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { + h := &HRDParameters{} + r := newFieldReader(br) + + h.CPBCntMinus1 = r.readUe() + h.BitRateScale = uint8(r.readBits(4)) + h.CPBSizeScale = uint8(r.readBits(4)) + + // SchedSelIdx E1.2 + for sseli := 0; sseli <= int(h.CPBCntMinus1); sseli++ { + h.BitRateValueMinus1 = append(h.BitRateValueMinus1, r.readUe()) + h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, r.readUe()) + + if v, _ := br.ReadBits(1); v == 1 { + h.CBRFlag = append(h.CBRFlag, true) + } else { + h.CBRFlag = append(h.CBRFlag, false) + } + + h.InitialCPBRemovalDelayLenMinus1 = uint8(r.readBits(5)) + h.CPBRemovalDelayLenMinus1 = uint8(r.readBits(5)) + h.DPBOutputDelayLenMinus1 = uint8(r.readBits(5)) + h.TimeOffsetLen = uint8(r.readBits(5)) + } + + if r.err() != nil { + return nil, fmt.Errorf("error from fieldReader: %v", r.err()) + } + return h, nil +} + func isInList(l []int, term int) bool { for _, m := range l { if m == term { @@ -169,12 +671,7 @@ func isInList(l []int, term int) bool { } return false } -func debugPacket(name string, packet interface{}) { - logger.Printf("debug: %s packet\n", name) - for _, line := range strings.Split(fmt.Sprintf("%+v", packet), " ") { - logger.Printf("debug: \t%#v\n", line) - } -} + func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, defaultScalingMatrix []int) error { lastScale := 8 nextScale := 8 @@ -199,492 +696,3 @@ func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, d } return nil } -func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { - logger.Printf("debug: SPS RBSP %d bytes %d bits\n", len(rbsp), len(rbsp)*8) - logger.Printf("debug: \t%#v\n", rbsp[0:8]) - sps := SPS{} - br := bits.NewBitReader(bytes.NewReader(rbsp)) - var err error - hrdParameters := func() error { - sps.CpbCntMinus1, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse CpbCntMinus1") - } - - err := readFields(br, []field{ - {&sps.BitRateScale, "BitRateScale", 4}, - {&sps.CpbSizeScale, "CpbSizeScale", 4}, - }) - if err != nil { - return err - } - - // SchedSelIdx E1.2 - for sseli := 0; sseli <= sps.CpbCntMinus1; sseli++ { - ue, err := readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse BitRateValueMinus1") - } - sps.BitRateValueMinus1 = append(sps.BitRateValueMinus1, ue) - - ue, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse CpbSizeValueMinus1") - } - sps.CpbSizeValueMinus1 = append(sps.CpbSizeValueMinus1, ue) - - if v, _ := br.ReadBits(1); v == 1 { - sps.Cbr = append(sps.Cbr, true) - } else { - sps.Cbr = append(sps.Cbr, false) - } - - err = readFields(br, - []field{ - {&sps.InitialCpbRemovalDelayLengthMinus1, "InitialCpbRemovalDelayLengthMinus1", 5}, - {&sps.CpbRemovalDelayLengthMinus1, "CpbRemovalDelayLengthMinus1", 5}, - {&sps.DpbOutputDelayLengthMinus1, "DpbOutputDelayLengthMinus1", 5}, - {&sps.TimeOffsetLength, "TimeOffsetLength", 5}, - }, - ) - if err != nil { - return err - } - } - return nil - } - - err = readFields(br, - []field{ - {&sps.Profile, "ProfileIDC", 8}, - {&sps.Constraint0, "Constraint0", 1}, - {&sps.Constraint1, "Constraint1", 1}, - {&sps.Constraint2, "Constraint2", 1}, - {&sps.Constraint3, "Constraint3", 1}, - {&sps.Constraint4, "Constraint4", 1}, - {&sps.Constraint5, "Constraint5", 1}, - }, - ) - - _, err = br.ReadBits(2) - if err != nil { - return nil, errors.Wrap(err, "could not read ReservedZeroBits") - } - - b, err := br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read Level") - } - sps.Level = int(b) - - // sps.ID = b.NextField("SPSID", 6) // proper - sps.ID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ID") - } - - sps.ChromaFormat, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaFormat") - } - - // This should be done only for certain ProfileIDC: - isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135} - // SpecialProfileCase1 - if isInList(isProfileIDC, sps.Profile) { - if sps.ChromaFormat == chroma444 { - // TODO: should probably deal with error here. - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read UseSeparateColorPlaneFlag") - } - sps.UseSeparateColorPlane = b == 1 - } - - sps.BitDepthLumaMinus8, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitDepthLumaMinus8") - } - - sps.BitDepthChromaMinus8, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitDepthChromaMinus8") - } - - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read QPrimeYZeroTransformBypass") - } - sps.QPrimeYZeroTransformBypass = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SeqScalingMatrixPresent") - } - sps.SeqScalingMatrixPresent = b == 1 - - if sps.SeqScalingMatrixPresent { - max := 12 - if sps.ChromaFormat != chroma444 { - max = 8 - } - logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max) - for i := 0; i < max; i++ { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SeqScalingList") - } - sps.SeqScalingList = append(sps.SeqScalingList, b == 1) - - if sps.SeqScalingList[i] { - if i < 6 { - scalingList( - br, - ScalingList4x4[i], - 16, - DefaultScalingMatrix4x4[i]) - // 4x4: Page 75 bottom - } else { - // 8x8 Page 76 top - scalingList( - br, - ScalingList8x8[i], - 64, - DefaultScalingMatrix8x8[i-6]) - } - } - } - } - } // End SpecialProfileCase1 - - // showSPS() - // return sps - // Possibly wrong due to no scaling list being built - sps.Log2MaxFrameNumMinus4, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxFrameNumMinus4") - } - - sps.PicOrderCountType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicOrderCountType") - } - - if sps.PicOrderCountType == 0 { - sps.Log2MaxPicOrderCntLSBMin4, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxPicOrderCntLSBMin4") - } - } else if sps.PicOrderCountType == 1 { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read DeltaPicOrderAlwaysZero") - } - sps.DeltaPicOrderAlwaysZero = b == 1 - - sps.OffsetForNonRefPic, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForNonRefPic") - } - - sps.OffsetForTopToBottomField, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForTopToBottomField") - } - - sps.NumRefFramesInPicOrderCntCycle, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefFramesInPicOrderCntCycle") - } - - for i := 0; i < sps.NumRefFramesInPicOrderCntCycle; i++ { - se, err := readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForRefFrameList") - } - sps.OffsetForRefFrameList = append( - sps.OffsetForRefFrameList, - se) - } - - } - - sps.MaxNumRefFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumRefFrames") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read GapsInFrameNumValueAllowed") - } - sps.GapsInFrameNumValueAllowed = b == 1 - - sps.PicWidthInMbsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicWidthInMbsMinus1") - } - - sps.PicHeightInMapUnitsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicHeightInMapUnitsMinus1") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FrameMbsOnly") - } - sps.FrameMbsOnly = b == 1 - - if !sps.FrameMbsOnly { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MBAdaptiveFrameField") - } - sps.MBAdaptiveFrameField = b == 1 - } - - err = readFlags(br, []flag{ - {&sps.Direct8x8Inference, "Direct8x8Inference"}, - {&sps.FrameCropping, "FrameCropping"}, - }) - if err != nil { - return nil, err - } - - if sps.FrameCropping { - sps.FrameCropLeftOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropLeftOffset") - } - - sps.FrameCropRightOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropRightOffset") - } - - sps.FrameCropTopOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropTopOffset") - } - - sps.FrameCropBottomOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropBottomOffset") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VuiParametersPresent") - } - sps.VuiParametersPresent = b == 1 - - if sps.VuiParametersPresent { - // vui_parameters - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") - } - sps.AspectRatioInfoPresent = b == 1 - - if sps.AspectRatioInfoPresent { - b, err = br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatio") - } - sps.AspectRatio = int(b) - - EXTENDED_SAR := 999 - if sps.AspectRatio == EXTENDED_SAR { - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarWidth") - } - sps.SarWidth = int(b) - - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarHeight") - } - sps.SarHeight = int(b) - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanInfoPresent") - } - sps.OverscanInfoPresent = b == 1 - - if sps.OverscanInfoPresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanAppropriate") - } - sps.OverscanAppropriate = b == 1 - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") - } - sps.VideoSignalTypePresent = b == 1 - - if sps.VideoSignalTypePresent { - b, err = br.ReadBits(3) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFormat") - } - sps.VideoFormat = int(b) - } - - if sps.VideoSignalTypePresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFullRange") - } - sps.VideoFullRange = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") - } - sps.ColorDescriptionPresent = b == 1 - - if sps.ColorDescriptionPresent { - err = readFields(br, - []field{ - {&sps.ColorPrimaries, "ColorPrimaries", 8}, - {&sps.TransferCharacteristics, "TransferCharacteristics", 8}, - {&sps.MatrixCoefficients, "MatrixCoefficients", 8}, - }, - ) - if err != nil { - return nil, err - } - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") - } - sps.ChromaLocInfoPresent = b == 1 - - if sps.ChromaLocInfoPresent { - sps.ChromaSampleLocTypeTopField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") - } - - sps.ChromaSampleLocTypeBottomField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read TimingInfoPresent") - } - sps.TimingInfoPresent = b == 1 - - if sps.TimingInfoPresent { - err := readFields(br, []field{ - {&sps.NumUnitsInTick, "NumUnitsInTick", 32}, - {&sps.TimeScale, "TimeScale", 32}, - }) - if err != nil { - return nil, err - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FixedFrameRate") - } - sps.FixedFrameRate = b == 1 - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") - } - sps.NalHrdParametersPresent = b == 1 - - if sps.NalHrdParametersPresent { - err = hrdParameters() - if err != nil { - return nil, errors.Wrap(err, "could not get hrdParameters") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") - } - sps.VclHrdParametersPresent = b == 1 - - if sps.VclHrdParametersPresent { - err = hrdParameters() - if err != nil { - return nil, errors.Wrap(err, "could not get hrdParameters") - } - } - if sps.NalHrdParametersPresent || sps.VclHrdParametersPresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LowHrdDelay") - } - sps.LowHrdDelay = b == 1 - } - - err := readFlags(br, []flag{ - {&sps.PicStructPresent, "PicStructPresent"}, - {&sps.BitstreamRestriction, "BitStreamRestriction"}, - }) - - if sps.BitstreamRestriction { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") - } - sps.MotionVectorsOverPicBoundaries = b == 1 - - sps.MaxBytesPerPicDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") - } - - sps.MaxBitsPerMbDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") - } - - sps.Log2MaxMvLengthHorizontal, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") - } - - sps.Log2MaxMvLengthVertical, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") - } - - sps.MaxNumReorderFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") - } - - sps.MaxDecFrameBuffering, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") - } - } - - } // End VuiParameters Annex E.1.1 - if showPacket { - debugPacket("SPS", sps) - } - return &sps, nil -} diff --git a/codec/pcm/pcm.go b/codec/pcm/pcm.go index bb200d50..8093401e 100644 --- a/codec/pcm/pcm.go +++ b/codec/pcm/pcm.go @@ -35,20 +35,20 @@ import ( "github.com/yobert/alsa" ) -// Resample takes an alsa.Buffer (b) and resamples the pcm audio data to 'rate' Hz and returns the resulting pcm. -// If an error occurs, an error will be returned along with the original b's data. +// Resample takes alsa.Buffer b and resamples the pcm audio data to 'rate' Hz and returns an alsa.Buffer with the resampled data. // Notes: // - Currently only downsampling is implemented and b's rate must be divisible by 'rate' or an error will occur. // - If the number of bytes in b.Data is not divisible by the decimation factor (ratioFrom), the remaining bytes will // not be included in the result. Eg. input of length 480002 downsampling 6:1 will result in output length 80000. -func Resample(b alsa.Buffer, rate int) ([]byte, error) { - fromRate := b.Format.Rate - if fromRate == rate { - return b.Data, nil - } else if fromRate < 0 { - return nil, fmt.Errorf("Unable to convert from: %v Hz", fromRate) - } else if rate < 0 { - return nil, fmt.Errorf("Unable to convert to: %v Hz", rate) +func Resample(b alsa.Buffer, rate int) (alsa.Buffer, error) { + if b.Format.Rate == rate { + return b, nil + } + if b.Format.Rate < 0 { + return alsa.Buffer{}, fmt.Errorf("Unable to convert from: %v Hz", b.Format.Rate) + } + if rate < 0 { + return alsa.Buffer{}, fmt.Errorf("Unable to convert to: %v Hz", rate) } // The number of bytes in a sample. @@ -59,22 +59,22 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) { case alsa.S16_LE: sampleLen = 2 * b.Format.Channels default: - return nil, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) + return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format: %v", b.Format.SampleFormat) } inPcmLen := len(b.Data) // Calculate sample rate ratio ratioFrom:ratioTo. - rateGcd := gcd(rate, fromRate) - ratioFrom := fromRate / rateGcd + rateGcd := gcd(rate, b.Format.Rate) + ratioFrom := b.Format.Rate / rateGcd ratioTo := rate / rateGcd // ratioTo = 1 is the only number that will result in an even sampling. if ratioTo != 1 { - return nil, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) + return alsa.Buffer{}, fmt.Errorf("unhandled from:to rate ratio %v:%v: 'to' must be 1", ratioFrom, ratioTo) } newLen := inPcmLen / ratioFrom - result := make([]byte, 0, newLen) + resampled := make([]byte, 0, newLen) // For each new sample to be generated, loop through the respective 'ratioFrom' samples in 'b.Data' to add them // up and average them. The result is the new sample. @@ -96,19 +96,28 @@ func Resample(b alsa.Buffer, rate int) ([]byte, error) { case alsa.S16_LE: binary.LittleEndian.PutUint16(bAvg, uint16(avg)) } - result = append(result, bAvg...) + resampled = append(resampled, bAvg...) } - return result, nil + + // Return a new alsa.Buffer with resampled data. + return alsa.Buffer{ + Format: alsa.BufferFormat{ + Channels: b.Format.Channels, + SampleFormat: b.Format.SampleFormat, + Rate: rate, + }, + Data: resampled, + }, nil } // StereoToMono returns raw mono audio data generated from only the left channel from // the given stereo recording (ALSA buffer) -// if an error occurs, an error will be returned along with the original stereo data. -func StereoToMono(b alsa.Buffer) ([]byte, error) { +func StereoToMono(b alsa.Buffer) (alsa.Buffer, error) { if b.Format.Channels == 1 { - return b.Data, nil - } else if b.Format.Channels != 2 { - return nil, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) + return b, nil + } + if b.Format.Channels != 2 { + return alsa.Buffer{}, fmt.Errorf("Audio is not stereo or mono, it has %v channels", b.Format.Channels) } var stereoSampleBytes int @@ -118,7 +127,7 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) { case alsa.S16_LE: stereoSampleBytes = 4 default: - return nil, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) + return alsa.Buffer{}, fmt.Errorf("Unhandled ALSA format %v", b.Format.SampleFormat) } recLength := len(b.Data) @@ -134,7 +143,15 @@ func StereoToMono(b alsa.Buffer) ([]byte, error) { } } - return mono, nil + // Return a new alsa.Buffer with resampled data. + return alsa.Buffer{ + Format: alsa.BufferFormat{ + Channels: 1, + SampleFormat: b.Format.SampleFormat, + Rate: b.Format.Rate, + }, + Data: mono, + }, nil } // gcd is used for calculating the greatest common divisor of two positive integers, a and b. diff --git a/codec/pcm/pcm_test.go b/codec/pcm/pcm_test.go index 713d01d8..1aa1b9d2 100644 --- a/codec/pcm/pcm_test.go +++ b/codec/pcm/pcm_test.go @@ -71,7 +71,7 @@ func TestResample(t *testing.T) { } // Compare result with expected. - if !bytes.Equal(resampled, exp) { + if !bytes.Equal(resampled.Data, exp) { t.Error("Resampled data does not match expected result.") } } @@ -112,7 +112,7 @@ func TestStereoToMono(t *testing.T) { } // Compare result with expected. - if !bytes.Equal(mono, exp) { + if !bytes.Equal(mono.Data, exp) { t.Error("Converted data does not match expected result.") } } diff --git a/container/mts/encoder.go b/container/mts/encoder.go index 883e3a5e..5d5533cb 100644 --- a/container/mts/encoder.go +++ b/container/mts/encoder.go @@ -89,11 +89,6 @@ var ( ) const ( - sdtPid = 17 - patPid = 0 - pmtPid = 4096 - videoPid = 256 - audioPid = 210 H264ID = 27 H265ID = 36 audioStreamID = 0xc0 // First audio stream ID. @@ -151,13 +146,13 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder { var sid byte switch mediaType { case EncodeAudio: - mPid = audioPid + mPid = AudioPid sid = audioStreamID case EncodeH265: - mPid = videoPid + mPid = VideoPid sid = H265ID case EncodeH264: - mPid = videoPid + mPid = VideoPid sid = H264ID } @@ -187,8 +182,8 @@ func NewEncoder(dst io.WriteCloser, rate float64, mediaType int) *Encoder { streamID: sid, continuity: map[int]byte{ - patPid: 0, - pmtPid: 0, + PatPid: 0, + PmtPid: 0, mPid: 0, }, } @@ -214,7 +209,7 @@ func (e *Encoder) TimeBasedPsi(b bool, sendCount int) { e.pktCount = e.psiSendCount } -// Write implements io.Writer. Write takes raw h264 and encodes into MPEG-TS, +// Write implements io.Writer. Write takes raw video or audio data and encodes into MPEG-TS, // then sending it to the encoder's io.Writer destination. func (e *Encoder) Write(data []byte) (int, error) { now := time.Now() diff --git a/container/mts/encoder_test.go b/container/mts/encoder_test.go index 2d4501d9..47e9a809 100644 --- a/container/mts/encoder_test.go +++ b/container/mts/encoder_test.go @@ -199,7 +199,7 @@ func TestEncodePcm(t *testing.T) { for i+PacketSize <= len(clip) { // Check MTS packet - if !(pkt.PID() == audioPid) { + if pkt.PID() != AudioPid { i += PacketSize if i+PacketSize <= len(clip) { copy(pkt[:], clip[i:i+PacketSize]) diff --git a/container/mts/mpegts.go b/container/mts/mpegts.go index 0b356770..f091b10e 100644 --- a/container/mts/mpegts.go +++ b/container/mts/mpegts.go @@ -47,6 +47,7 @@ const ( PatPid = 0 PmtPid = 4096 VideoPid = 256 + AudioPid = 210 ) // StreamID is the id of the first stream. diff --git a/container/mts/mpegts_test.go b/container/mts/mpegts_test.go index 9ce93b5b..1cd1f643 100644 --- a/container/mts/mpegts_test.go +++ b/container/mts/mpegts_test.go @@ -82,7 +82,7 @@ func TestGetPTSRange1(t *testing.T) { curTime += interval } - got, err := GetPTSRange(clip.Bytes(), videoPid) + got, err := GetPTSRange(clip.Bytes(), VideoPid) if err != nil { t.Fatalf("did not expect error getting PTS range: %v", err) } @@ -142,7 +142,7 @@ func writeFrame(b *bytes.Buffer, frame []byte, pts uint64) error { for len(buf) != 0 { pkt := Packet{ PUSI: pusi, - PID: videoPid, + PID: VideoPid, RAI: pusi, CC: 0, AFC: hasAdaptationField | hasPayload, diff --git a/exp/pcm/resample/resample.go b/exp/pcm/resample/resample.go index 3d595bb8..f7f5342e 100644 --- a/exp/pcm/resample/resample.go +++ b/exp/pcm/resample/resample.go @@ -81,9 +81,9 @@ func main() { } // Save resampled to file. - err = ioutil.WriteFile(outPath, resampled, 0644) + err = ioutil.WriteFile(outPath, resampled.Data, 0644) if err != nil { log.Fatal(err) } - fmt.Println("Encoded and wrote", len(resampled), "bytes to file", outPath) + fmt.Println("Encoded and wrote", len(resampled.Data), "bytes to file", outPath) } diff --git a/exp/pcm/stereo-to-mono/stereo-to-mono.go b/exp/pcm/stereo-to-mono/stereo-to-mono.go index 7dbfd9a5..729caa96 100644 --- a/exp/pcm/stereo-to-mono/stereo-to-mono.go +++ b/exp/pcm/stereo-to-mono/stereo-to-mono.go @@ -77,9 +77,9 @@ func main() { } // Save mono to file. - err = ioutil.WriteFile(outPath, mono, 0644) + err = ioutil.WriteFile(outPath, mono.Data, 0644) if err != nil { log.Fatal(err) } - fmt.Println("Encoded and wrote", len(mono), "bytes to file", outPath) + fmt.Println("Encoded and wrote", len(mono.Data), "bytes to file", outPath) } diff --git a/input/audio/audio.go b/input/audio/audio.go new file mode 100644 index 00000000..54edb00c --- /dev/null +++ b/input/audio/audio.go @@ -0,0 +1,464 @@ +/* +NAME + audio.go + +AUTHOR + Alan Noble + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +// Package audio provides access to input from audio devices. +package audio + +import ( + "bytes" + "errors" + "fmt" + "sync" + "time" + + "github.com/yobert/alsa" + + "bitbucket.org/ausocean/av/codec/adpcm" + "bitbucket.org/ausocean/av/codec/codecutil" + "bitbucket.org/ausocean/av/codec/pcm" + "bitbucket.org/ausocean/utils/logger" + "bitbucket.org/ausocean/utils/ring" +) + +const ( + pkg = "audio: " + rbTimeout = 100 * time.Millisecond + rbNextTimeout = 100 * time.Millisecond + rbLen = 200 + defaultSampleRate = 48000 +) + +// "running" means the input goroutine is reading from the ALSA device and writing to the ringbuffer. +// "paused" means the input routine is sleeping until unpaused or stopped. +// "stopped" means the input routine is stopped and the ALSA device is closed. +const ( + running = iota + 1 + paused + stopped +) + +// Device holds everything we need to know about the audio input stream and implements io.Reader. +type Device struct { + l Logger // Logger for device's routines to log to. + mode uint8 // Operating mode, either running, paused, or stopped. + mu sync.Mutex // Provides synchronisation when changing modes concurrently. + title string // Name of audio title, or empty for the default title. + dev *alsa.Device // ALSA's Audio input device. + ab alsa.Buffer // ALSA's buffer. + rb *ring.Buffer // Our buffer. + chunkSize int // This is the number of bytes that will be stored in rb at a time. + *Config // Configuration parameters for this device. +} + +// Config provides parameters used by Device. +type Config struct { + SampleRate int + Channels int + BitDepth int + RecPeriod float64 + Codec uint8 +} + +// Logger enables any implementation of a logger to be used. +// TODO: Make this part of the logger package. +type Logger interface { + SetLevel(int8) + Log(level int8, message string, params ...interface{}) +} + +// OpenError is used to determine whether an error has originated from attempting to open a device. +type OpenError error + +// NewDevice initializes and returns an Device which can be started, read from, and stopped. +func NewDevice(cfg *Config, l Logger) (*Device, error) { + + err := validate(cfg) + if err != nil { + return nil, err + } + + d := &Device{ + Config: cfg, + l: l, + } + + // Open the requested audio device. + err = d.open() + if err != nil { + d.l.Log(logger.Error, pkg+"failed to open device") + return nil, err + } + + // Setup the device to record with desired period. + d.ab = d.dev.NewBufferDuration(time.Duration(d.RecPeriod * float64(time.Second))) + + // Account for channel conversion. + chunkSize := float64(len(d.ab.Data) / d.dev.BufferFormat().Channels * d.Channels) + + // Account for resampling. + chunkSize = (chunkSize / float64(d.dev.BufferFormat().Rate)) * float64(d.SampleRate) + if chunkSize < 1 { + return nil, errors.New("given Config parameters are too small") + } + + // Account for codec conversion. + if d.Codec == codecutil.ADPCM { + d.chunkSize = adpcm.EncBytes(int(chunkSize)) + } else { + d.chunkSize = int(chunkSize) + } + + // Create ring buffer with appropriate chunk size. + d.rb = ring.NewBuffer(rbLen, d.chunkSize, rbTimeout) + + // Start device in paused mode. + d.mode = paused + go d.input() + + return d, nil +} + +// Start will start recording audio and writing to the ringbuffer. +// Once a Device has been stopped it cannot be started again. This is likely to change in future. +func (d *Device) Start() error { + d.mu.Lock() + mode := d.mode + d.mu.Unlock() + switch mode { + case paused: + d.mu.Lock() + d.mode = running + d.mu.Unlock() + return nil + case stopped: + // TODO(Trek): Make this reopen device and start recording. + return errors.New("device is stopped") + case running: + return nil + default: + return fmt.Errorf("invalid mode: %d", mode) + } +} + +// Stop will stop recording audio and close the device. +// Once a Device has been stopped it cannot be started again. This is likely to change in future. +func (d *Device) Stop() { + d.mu.Lock() + d.mode = stopped + d.mu.Unlock() +} + +// ChunkSize returns the number of bytes written to the ringbuffer per d.RecPeriod. +func (d *Device) ChunkSize() int { + return d.chunkSize +} + +// validate checks if Config parameters are valid and returns an error if they are not. +func validate(c *Config) error { + if c.SampleRate <= 0 { + return fmt.Errorf("invalid sample rate: %v", c.SampleRate) + } + if c.Channels <= 0 { + return fmt.Errorf("invalid number of channels: %v", c.Channels) + } + if c.BitDepth <= 0 { + return fmt.Errorf("invalid bitdepth: %v", c.BitDepth) + } + if c.RecPeriod <= 0 { + return fmt.Errorf("invalid recording period: %v", c.RecPeriod) + } + if !codecutil.IsValid(c.Codec) { + return errors.New("invalid codec") + } + return nil +} + +// open the recording device with the given name and prepare it to record. +// If name is empty, the first recording device is used. +func (d *Device) open() error { + // Close any existing device. + if d.dev != nil { + d.l.Log(logger.Debug, pkg+"closing device", "title", d.title) + d.dev.Close() + d.dev = nil + } + + // Open sound card and open recording device. + d.l.Log(logger.Debug, pkg+"opening sound card") + cards, err := alsa.OpenCards() + if err != nil { + return OpenError(err) + } + defer alsa.CloseCards(cards) + + d.l.Log(logger.Debug, pkg+"finding audio device") + for _, card := range cards { + devices, err := card.Devices() + if err != nil { + continue + } + for _, dev := range devices { + if dev.Type != alsa.PCM || !dev.Record { + continue + } + if dev.Title == d.title || d.title == "" { + d.dev = dev + break + } + } + } + if d.dev == nil { + return OpenError(errors.New("no audio device found")) + } + + d.l.Log(logger.Debug, pkg+"opening audio device", "title", d.dev.Title) + err = d.dev.Open() + if err != nil { + return OpenError(err) + } + + // 2 channels is what most devices need to record in. If mono is requested, + // the recording will be converted in formatBuffer(). + channels, err := d.dev.NegotiateChannels(2) + if err != nil { + return OpenError(err) + } + d.l.Log(logger.Debug, pkg+"alsa device channels set", "channels", channels) + + // Try to negotiate a rate to record in that is divisible by the wanted rate + // so that it can be easily downsampled to the wanted rate. + // rates is a slice of common sample rates including the standard for CD (44100Hz) and standard for professional audio recording (48000Hz). + // Note: if a card thinks it can record at a rate but can't actually, this can cause a failure. + // Eg. the audioinjector sound card is supposed to record at 8000Hz and 16000Hz but it can't due to a firmware issue, + // a fix for this is to remove 8000 and 16000 from the rates slice. + var rates = [8]int{8000, 16000, 32000, 44100, 48000, 88200, 96000, 192000} + + var rate int + foundRate := false + for r := range rates { + if r < d.SampleRate { + continue + } + if r%d.SampleRate == 0 { + rate, err = d.dev.NegotiateRate(r) + if err == nil { + foundRate = true + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) + break + } + } + } + + // If no easily divisible rate is found, then use the default rate. + if !foundRate { + d.l.Log(logger.Warning, pkg+"Unable to sample at requested rate, default used.", "rateRequested", d.SampleRate) + rate, err = d.dev.NegotiateRate(defaultSampleRate) + if err != nil { + return OpenError(err) + } + d.l.Log(logger.Debug, pkg+"alsa device sample rate set", "rate", rate) + } + + var aFmt alsa.FormatType + switch d.BitDepth { + case 16: + aFmt = alsa.S16_LE + case 32: + aFmt = alsa.S32_LE + default: + return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth)) + } + devFmt, err := d.dev.NegotiateFormat(aFmt) + if err != nil { + return err + } + var bitdepth int + switch devFmt { + case alsa.S16_LE: + bitdepth = 16 + case alsa.S32_LE: + bitdepth = 32 + default: + return OpenError(fmt.Errorf("unsupported sample bits %v", d.BitDepth)) + } + d.l.Log(logger.Debug, pkg+"alsa device bit depth set", "bitdepth", bitdepth) + + // A 50ms period is a sensible value for low-ish latency. (this could be made configurable if needed) + // Some devices only accept even period sizes while others want powers of 2. + // So we will find the closest power of 2 to the desired period size. + const wantPeriod = 0.05 //seconds + bytesPerSecond := rate * channels * (bitdepth / 8) + wantPeriodSize := int(float64(bytesPerSecond) * wantPeriod) + nearWantPeriodSize := nearestPowerOfTwo(wantPeriodSize) + + // At least two period sizes should fit within the buffer. + bufSize, err := d.dev.NegotiateBufferSize(nearWantPeriodSize * 2) + if err != nil { + return OpenError(err) + } + d.l.Log(logger.Debug, pkg+"alsa device buffer size set", "buffersize", bufSize) + + if err = d.dev.Prepare(); err != nil { + return OpenError(err) + } + + d.l.Log(logger.Debug, pkg+"successfully negotiated ALSA params") + return nil +} + +// input continously records audio and writes it to the ringbuffer. +// Re-opens the device and tries again if ASLA returns an error. +func (d *Device) input() { + for { + // Check mode. + d.mu.Lock() + mode := d.mode + d.mu.Unlock() + switch mode { + case paused: + time.Sleep(time.Duration(d.RecPeriod) * time.Second) + continue + case stopped: + if d.dev != nil { + d.l.Log(logger.Debug, pkg+"closing audio device", "title", d.title) + d.dev.Close() + d.dev = nil + } + return + } + + // Read from audio device. + d.l.Log(logger.Debug, pkg+"recording audio for period", "seconds", d.RecPeriod) + err := d.dev.Read(d.ab.Data) + if err != nil { + d.l.Log(logger.Debug, pkg+"read failed", "error", err.Error()) + err = d.open() // re-open + if err != nil { + d.l.Log(logger.Fatal, pkg+"reopening device failed", "error", err.Error()) + return + } + continue + } + + // Process audio. + d.l.Log(logger.Debug, pkg+"processing audio") + toWrite := d.formatBuffer() + + // Write audio to ringbuffer. + n, err := d.rb.Write(toWrite.Data) + switch err { + case nil: + d.l.Log(logger.Debug, pkg+"wrote audio to ringbuffer", "length", n) + case ring.ErrDropped: + d.l.Log(logger.Warning, pkg+"old audio data overwritten") + default: + d.l.Log(logger.Error, pkg+"unexpected ringbuffer error", "error", err.Error()) + return + } + } +} + +// Read reads from the ringbuffer, returning the number of bytes read upon success. +func (d *Device) Read(p []byte) (int, error) { + // Ready ringbuffer for read. + _, err := d.rb.Next(rbNextTimeout) + if err != nil { + return 0, err + } + + // Read from ring buffer. + return d.rb.Read(p) +} + +// formatBuffer returns audio that has been converted to the desired format. +func (d *Device) formatBuffer() alsa.Buffer { + var err error + + // If nothing needs to be changed, return the original. + if d.ab.Format.Channels == d.Channels && d.ab.Format.Rate == d.SampleRate { + return d.ab + } + var formatted alsa.Buffer + if d.ab.Format.Channels != d.Channels { + // Convert channels. + // TODO(Trek): Make this work for conversions other than stereo to mono. + if d.ab.Format.Channels == 2 && d.Channels == 1 { + formatted, err = pcm.StereoToMono(d.ab) + if err != nil { + d.l.Log(logger.Fatal, pkg+"channel conversion failed", "error", err.Error()) + } + } + } + + if d.ab.Format.Rate != d.SampleRate { + // Convert rate. + formatted, err = pcm.Resample(formatted, d.SampleRate) + if err != nil { + d.l.Log(logger.Fatal, pkg+"rate conversion failed", "error", err.Error()) + } + } + + switch d.Codec { + case codecutil.PCM: + case codecutil.ADPCM: + b := bytes.NewBuffer(make([]byte, 0, adpcm.EncBytes(len(formatted.Data)))) + enc := adpcm.NewEncoder(b) + _, err = enc.Write(formatted.Data) + if err != nil { + d.l.Log(logger.Fatal, pkg+"unable to encode", "error", err.Error()) + } + formatted.Data = b.Bytes() + default: + d.l.Log(logger.Error, pkg+"unhandled audio codec") + } + + return formatted +} + +// nearestPowerOfTwo finds and returns the nearest power of two to the given integer. +// If the lower and higher power of two are the same distance, it returns the higher power. +// For negative values, 1 is returned. +// Source: https://stackoverflow.com/a/45859570 +func nearestPowerOfTwo(n int) int { + if n <= 0 { + return 1 + } + if n == 1 { + return 2 + } + v := n + v-- + v |= v >> 1 + v |= v >> 2 + v |= v >> 4 + v |= v >> 8 + v |= v >> 16 + v++ // higher power of 2 + x := v >> 1 // lower power of 2 + if (v - n) > (n - x) { + return x + } + return v +} diff --git a/input/audio/audio_test.go b/input/audio/audio_test.go new file mode 100644 index 00000000..5618c63a --- /dev/null +++ b/input/audio/audio_test.go @@ -0,0 +1,105 @@ +/* +NAME + audio_test.go + +AUTHOR + Trek Hopton + +LICENSE + This file is Copyright (C) 2019 the Australian Ocean Lab (AusOcean) + + It is free software: you can redistribute it and/or modify them + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + It is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License in gpl.txt. + If not, see [GNU licenses](http://www.gnu.org/licenses). +*/ + +package audio + +import ( + "io/ioutil" + "os" + "strconv" + "testing" + "time" + + "bitbucket.org/ausocean/av/codec/codecutil" + "bitbucket.org/ausocean/utils/logger" +) + +func TestDevice(t *testing.T) { + // We want to open a device with a standard configuration. + ac := &Config{ + SampleRate: 8000, + Channels: 1, + RecPeriod: 0.3, + BitDepth: 16, + Codec: codecutil.ADPCM, + } + n := 2 // Number of periods to wait while recording. + + // Create a new audio Device, start, read/lex, and then stop it. + l := logger.New(logger.Debug, os.Stderr) + ai, err := NewDevice(ac, l) + // If there was an error opening the device, skip this test. + if _, ok := err.(OpenError); ok { + t.Skip(err) + } + // For any other error, report it. + if err != nil { + t.Error(err) + } + err = ai.Start() + if err != nil { + t.Error(err) + } + chunkSize := ai.ChunkSize() + lexer := codecutil.NewByteLexer(&chunkSize) + go lexer.Lex(ioutil.Discard, ai, time.Duration(ac.RecPeriod*float64(time.Second))) + time.Sleep(time.Duration(ac.RecPeriod*float64(time.Second)) * time.Duration(n)) + ai.Stop() +} + +var powerTests = []struct { + in int + out int +}{ + {36, 32}, + {47, 32}, + {3, 4}, + {46, 32}, + {7, 8}, + {2, 2}, + {36, 32}, + {757, 512}, + {2464, 2048}, + {18980, 16384}, + {70000, 65536}, + {8192, 8192}, + {2048, 2048}, + {65536, 65536}, + {-2048, 1}, + {-127, 1}, + {-1, 1}, + {0, 1}, + {1, 2}, +} + +func TestNearestPowerOfTwo(t *testing.T) { + for _, tt := range powerTests { + t.Run(strconv.Itoa(tt.in), func(t *testing.T) { + v := nearestPowerOfTwo(tt.in) + if v != tt.out { + t.Errorf("got %v, want %v", v, tt.out) + } + }) + } +} diff --git a/revid/config.go b/revid/config.go index 4046f320..cf108db6 100644 --- a/revid/config.go +++ b/revid/config.go @@ -2,11 +2,9 @@ NAME Config.go -DESCRIPTION - See Readme.md - AUTHORS Saxon A. Nelson-Milton + Trek Hopton LICENSE Config.go is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) @@ -30,6 +28,7 @@ package revid import ( "errors" + "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/utils/logger" ) @@ -75,6 +74,7 @@ const ( Raspivid V4L RTSP + Audio // Outputs. RTMP @@ -93,6 +93,7 @@ const ( defaultInput = Raspivid defaultOutput = HTTP defaultFrameRate = 25 + defaultWriteRate = 25 defaultWidth = 1280 defaultHeight = 720 defaultIntraRefreshPeriod = 100 @@ -101,7 +102,7 @@ const ( defaultBitrate = 400000 defaultFramesPerClip = 1 httpFramesPerClip = 560 - defaultInputCodec = H264 + defaultInputCodec = codecutil.H264 defaultVerbosity = logger.Error defaultRtpAddr = "localhost:6970" defaultBurstPeriod = 10 // Seconds @@ -109,6 +110,12 @@ const ( defaultBrightness = 50 defaultExposure = "auto" defaultAutoWhiteBalance = "auto" + + defaultAudioInputCodec = codecutil.ADPCM + defaultSampleRate = 48000 + defaultBitDepth = 16 + defaultChannels = 1 + defaultRecPeriod = 1.0 ) // Config provides parameters relevant to a revid instance. A new config must @@ -183,6 +190,9 @@ type Config struct { // Raspivid input supports custom framerate. FrameRate uint + // WriteRate is how many times a second revid encoders will be written to. + WriteRate float64 + // HTTPAddress defines a custom HTTP destination if we do not wish to use that // defined in /etc/netsender.conf. HTTPAddress string @@ -215,6 +225,13 @@ type Config struct { // defined at the start of the file. AutoWhiteBalance string + // Audio + SampleRate int // Samples a second (Hz). + RecPeriod float64 // How many seconds to record at a time. + Channels int // Number of audio channels, 1 for mono, 2 for stereo. + BitDepth int // Sample bit depth. + ChunkSize int // ChunkSize is the size of the chunks in the audio.Device's ringbuffer. + RTPAddress string // RTPAddress defines the RTP output destination. BurstPeriod uint // BurstPeriod defines the revid burst period in seconds. Rotation uint // Rotation defines the video rotation angle in degrees Raspivid input. @@ -240,7 +257,7 @@ func (c *Config) Validate(r *Revid) error { } switch c.Input { - case Raspivid, V4L, File, RTSP: + case Raspivid, V4L, File, Audio, RTSP: case NothingDefined: c.Logger.Log(logger.Info, pkg+"no input type defined, defaulting", "input", defaultInput) c.Input = defaultInput @@ -249,7 +266,7 @@ func (c *Config) Validate(r *Revid) error { } switch c.InputCodec { - case H264: + case codecutil.H264: // FIXME(kortschak): This is not really what we want. // Configuration really needs to be rethought here. if c.Quantize && c.Quantization == 0 { @@ -260,18 +277,22 @@ func (c *Config) Validate(r *Revid) error { return errors.New("bad bitrate and quantization combination for H264 input") } - case MJPEG: + case codecutil.MJPEG: if c.Quantization > 0 || c.Bitrate == 0 { return errors.New("bad bitrate or quantization for mjpeg input") } - - case NothingDefined: - c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec) - c.InputCodec = defaultInputCodec - c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization) - c.Quantization = defaultQuantization + case codecutil.PCM, codecutil.ADPCM: default: - return errors.New("bad input codec defined in config") + switch c.Input { + case Audio: + c.Logger.Log(logger.Info, pkg+"input is audio but no codec defined, defaulting", "inputCodec", defaultAudioInputCodec) + c.InputCodec = defaultAudioInputCodec + default: + c.Logger.Log(logger.Info, pkg+"no input codec defined, defaulting", "inputCodec", defaultInputCodec) + c.InputCodec = defaultInputCodec + c.Logger.Log(logger.Info, pkg+"defaulting quantization", "quantization", defaultQuantization) + c.Quantization = defaultQuantization + } } if c.Outputs == nil { @@ -330,6 +351,31 @@ func (c *Config) Validate(r *Revid) error { c.FrameRate = defaultFrameRate } + if c.SampleRate == 0 { + c.Logger.Log(logger.Info, pkg+"no sample rate defined, defaulting", "sampleRate", defaultSampleRate) + c.SampleRate = defaultSampleRate + } + + if c.Channels == 0 { + c.Logger.Log(logger.Info, pkg+"no number of channels defined, defaulting", "Channels", defaultChannels) + c.Channels = defaultChannels + } + + if c.BitDepth == 0 { + c.Logger.Log(logger.Info, pkg+"no bit depth defined, defaulting", "BitDepth", defaultBitDepth) + c.BitDepth = defaultBitDepth + } + + if c.RecPeriod == 0 { + c.Logger.Log(logger.Info, pkg+"no record period defined, defaulting", "recPeriod", defaultRecPeriod) + c.RecPeriod = defaultRecPeriod + } + + if c.WriteRate == 0 { + c.Logger.Log(logger.Info, pkg+"no write rate defined, defaulting", "writeRate", defaultWriteRate) + c.WriteRate = defaultWriteRate + } + if c.Bitrate == 0 { c.Logger.Log(logger.Info, pkg+"no bitrate defined, defaulting", "bitrate", defaultBitrate) c.Bitrate = defaultBitrate diff --git a/revid/revid.go b/revid/revid.go index c05d00a5..410db7a2 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -2,13 +2,11 @@ NAME revid.go -DESCRIPTION - See Readme.md - AUTHORS Saxon A. Nelson-Milton Alan Noble Dan Kortschak + Trek Hopton LICENSE revid is Copyright (C) 2017-2018 the Australian Ocean Lab (AusOcean) @@ -27,6 +25,7 @@ LICENSE in gpl.txt. If not, see http://www.gnu.org/licenses. */ +// Package revid provides an API for reading, transcoding, and writing audio/video streams and files. package revid import ( @@ -41,10 +40,12 @@ import ( "sync" "time" + "bitbucket.org/ausocean/av/codec/codecutil" "bitbucket.org/ausocean/av/codec/h264" "bitbucket.org/ausocean/av/codec/h265" "bitbucket.org/ausocean/av/container/flv" "bitbucket.org/ausocean/av/container/mts" + "bitbucket.org/ausocean/av/input/audio" "bitbucket.org/ausocean/av/protocol/rtcp" "bitbucket.org/ausocean/av/protocol/rtp" "bitbucket.org/ausocean/av/protocol/rtsp" @@ -173,13 +174,15 @@ func (r *Revid) reset(config Config) error { r.config.Logger.SetLevel(config.LogLevel) err = r.setupPipeline( - func(dst io.WriteCloser, fps int) (io.WriteCloser, error) { + func(dst io.WriteCloser, fps float64) (io.WriteCloser, error) { var st int switch r.config.Input { case Raspivid, File, V4L: st = mts.EncodeH264 case RTSP: st = mts.EncodeH265 + case Audio: + st = mts.EncodeAudio } e := mts.NewEncoder(dst, float64(fps), st) return e, nil @@ -215,7 +218,7 @@ func (r *Revid) setConfig(config Config) error { // mtsEnc and flvEnc will be called to obtain an mts encoder and flv encoder // respectively. multiWriter will be used to create an ioext.multiWriteCloser // so that encoders can write to multiple senders. -func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { +func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64) (io.WriteCloser, error), flvEnc func(dst io.WriteCloser, rate int) (io.WriteCloser, error), multiWriter func(...io.WriteCloser) io.WriteCloser) error { // encoders will hold the encoders that are required for revid's current // configuration. var encoders []io.WriteCloser @@ -259,7 +262,7 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int) // as a destination. if len(mtsSenders) != 0 { mw := multiWriter(mtsSenders...) - e, _ := mtsEnc(mw, int(r.config.FrameRate)) + e, _ := mtsEnc(mw, r.config.WriteRate) encoders = append(encoders, e) } @@ -289,6 +292,9 @@ func (r *Revid) setupPipeline(mtsEnc, flvEnc func(dst io.WriteCloser, rate int) case RTSP: r.setupInput = r.startRTSPCamera r.lexTo = h265.NewLexer(false).Lex + case Audio: + r.setupInput = r.startAudioDevice + r.lexTo = codecutil.NewByteLexer(&r.config.ChunkSize).Lex } return nil @@ -533,7 +539,7 @@ func (r *Revid) startRaspivid() (func() error, error) { switch r.config.InputCodec { default: return nil, fmt.Errorf("revid: invalid input codec: %v", r.config.InputCodec) - case H264: + case codecutil.H264: args = append(args, "--codec", "H264", "--inline", @@ -542,7 +548,7 @@ func (r *Revid) startRaspivid() (func() error, error) { if r.config.Quantize { args = append(args, "-qp", fmt.Sprint(r.config.Quantization)) } - case MJPEG: + case codecutil.MJPEG: args = append(args, "--codec", "MJPEG") } r.config.Logger.Log(logger.Info, pkg+"raspivid args", "raspividArgs", strings.Join(args, " ")) @@ -615,10 +621,55 @@ func (r *Revid) setupInputForFile() (func() error, error) { // TODO(kortschak): Maybe we want a context.Context-aware parser that we can stop. r.wg.Add(1) - go r.processFrom(f, time.Second/time.Duration(r.config.FrameRate)) + go r.processFrom(f, 0) return func() error { return f.Close() }, nil } +// startAudioDevice is used to start capturing audio from an audio device and processing it. +// It returns a function that can be used to stop the device and any errors that occur. +func (r *Revid) startAudioDevice() (func() error, error) { + // Create audio device. + ac := &audio.Config{ + SampleRate: r.config.SampleRate, + Channels: r.config.Channels, + RecPeriod: r.config.RecPeriod, + BitDepth: r.config.BitDepth, + Codec: r.config.InputCodec, + } + mts.Meta.Add("sampleRate", strconv.Itoa(r.config.SampleRate)) + mts.Meta.Add("channels", strconv.Itoa(r.config.Channels)) + mts.Meta.Add("period", fmt.Sprintf("%.6f", r.config.RecPeriod)) + mts.Meta.Add("bitDepth", strconv.Itoa(r.config.BitDepth)) + switch r.config.InputCodec { + case codecutil.PCM: + mts.Meta.Add("codec", "pcm") + case codecutil.ADPCM: + mts.Meta.Add("codec", "adpcm") + default: + r.config.Logger.Log(logger.Fatal, pkg+"no audio codec set in config") + } + + ai, err := audio.NewDevice(ac, r.config.Logger) + if err != nil { + r.config.Logger.Log(logger.Fatal, pkg+"failed to create audio device", "error", err.Error()) + } + + // Start audio device + err = ai.Start() + if err != nil { + r.config.Logger.Log(logger.Fatal, pkg+"failed to start audio device", "error", err.Error()) + } + + // Process output from audio device. + r.config.ChunkSize = ai.ChunkSize() + r.wg.Add(1) + go r.processFrom(ai, time.Duration(float64(time.Second)/r.config.WriteRate)) + return func() error { + ai.Stop() + return nil + }, nil +} + // startRTSPCamera uses RTSP to request an RTP stream from an IP camera. An RTP // client is created from which RTP packets containing either h264/h265 can read // by the selected lexer. diff --git a/revid/revid_test.go b/revid/revid_test.go index 36cc913d..8ab2e62f 100644 --- a/revid/revid_test.go +++ b/revid/revid_test.go @@ -41,7 +41,7 @@ import ( const raspividPath = "/usr/local/bin/raspivid" // Suppress all test logging, except for t.Errorf output. -var silent bool +var silent = true // TestRaspivid tests that raspivid starts correctly. // It is intended to be run on a Raspberry Pi. @@ -232,7 +232,7 @@ func TestResetEncoderSenderSetup(t *testing.T) { // This logic is what we want to check. err = rv.setupPipeline( - func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { + func(dst io.WriteCloser, rate float64) (io.WriteCloser, error) { return &tstMtsEncoder{dst: dst}, nil }, func(dst io.WriteCloser, rate int) (io.WriteCloser, error) { diff --git a/revid/senders.go b/revid/senders.go index 7ae3c769..c9c980c7 100644 --- a/revid/senders.go +++ b/revid/senders.go @@ -57,7 +57,7 @@ type httpSender struct { log func(lvl int8, msg string, args ...interface{}) } -// newMinimalHttpSender returns a pointer to a new minimalHttpSender. +// newHttpSender returns a pointer to a new httpSender. func newHttpSender(ns *netsender.Sender, log func(lvl int8, msg string, args ...interface{})) *httpSender { return &httpSender{ client: ns,