From a755ccfc587342372ec67ec9ebc8b70ddd1ddc1c Mon Sep 17 00:00:00 2001 From: Saxon Date: Sun, 21 Jul 2019 22:11:24 +0930 Subject: [PATCH 1/3] codec/h264/h264dec: separated VUI and HRD from SPS struct Took out all VUI and HRD parameters from the SPS struct and gave them their own structs - VUIParameters and HRDParameters, along with 'contructors' NewVUIParameters and NewHRDParameters to parse from a bits.BitReader and populate the fields of the struct. --- codec/h264/h264dec/cabac.go | 4 +- codec/h264/h264dec/pps.go | 5 +- codec/h264/h264dec/slice.go | 57 ++- codec/h264/h264dec/slice_test.go | 24 +- codec/h264/h264dec/sps.go | 807 ++++++++++++++++--------------- 5 files changed, 451 insertions(+), 446 deletions(-) diff --git a/codec/h264/h264dec/cabac.go b/codec/h264/h264dec/cabac.go index 47d90835..8b3a6e4c 100644 --- a/codec/h264/h264dec/cabac.go +++ b/codec/h264/h264dec/cabac.go @@ -35,14 +35,14 @@ func YOffset(yRefMin16, refMbH int) int { } func MbWidthC(sps *SPS) int { mbWidthC := 16 / SubWidthC(sps) - if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane { + if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag { mbWidthC = 0 } return mbWidthC } func MbHeightC(sps *SPS) int { mbHeightC := 16 / SubHeightC(sps) - if sps.ChromaFormat == chromaMonochrome || sps.UseSeparateColorPlane { + if sps.ChromaFormatIDC == chromaMonochrome || sps.SeparateColorPlaneFlag { mbHeightC = 0 } return mbHeightC diff --git a/codec/h264/h264dec/pps.go b/codec/h264/h264dec/pps.go index 139306c6..1f618c2e 100644 --- a/codec/h264/h264dec/pps.go +++ b/codec/h264/h264dec/pps.go @@ -195,7 +195,7 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { if pps.PicScalingMatrixPresent { v := 6 - if sps.ChromaFormat != chroma444 { + if sps.ChromaFormatIDC != chroma444 { v = 2 } for i := 0; i < 6+(v*pps.Transform8x8Mode); i++ { @@ -231,9 +231,6 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { // rbspTrailingBits() } - if showPacket { - debugPacket("PPS", pps) - } return &pps, nil } diff --git a/codec/h264/h264dec/slice.go b/codec/h264/h264dec/slice.go index b25b4502..8192d465 100644 --- a/codec/h264/h264dec/slice.go +++ b/codec/h264/h264dec/slice.go @@ -145,13 +145,13 @@ func (d SliceData) ae(v int) int { // 8.2.2 func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { mbaffFrameFlag := 0 - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbaffFrameFlag = 1 } mapUnitToSliceGroupMap := MapUnitToSliceGroupMap(sps, pps, header) mbToSliceGroupMap := []int{} for i := 0; i <= PicSizeInMbs(sps, header)-1; i++ { - if sps.FrameMbsOnly || header.FieldPic { + if sps.FrameMBSOnlyFlag || header.FieldPic { mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i]) continue } @@ -159,7 +159,7 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { mbToSliceGroupMap = append(mbToSliceGroupMap, mapUnitToSliceGroupMap[i/2]) continue } - if !sps.FrameMbsOnly && !sps.MBAdaptiveFrameField && !header.FieldPic { + if !sps.FrameMBSOnlyFlag && !sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbToSliceGroupMap = append( mbToSliceGroupMap, mapUnitToSliceGroupMap[(i/(2*PicWidthInMbs(sps)))*PicWidthInMbs(sps)+(i%PicWidthInMbs(sps))]) @@ -169,7 +169,7 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { } func PicWidthInMbs(sps *SPS) int { - return sps.PicWidthInMbsMinus1 + 1 + return sps.PicWidthInMBSMinus1 + 1 } func PicHeightInMapUnits(sps *SPS) int { return sps.PicHeightInMapUnitsMinus1 + 1 @@ -178,7 +178,7 @@ func PicSizeInMapUnits(sps *SPS) int { return PicWidthInMbs(sps) * PicHeightInMapUnits(sps) } func FrameHeightInMbs(sps *SPS) int { - return (2 - flagVal(sps.FrameMbsOnly)) * PicHeightInMapUnits(sps) + return (2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps) } func PicHeightInMbs(sps *SPS, header *SliceHeader) int { return FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic)) @@ -190,13 +190,13 @@ func PicSizeInMbs(sps *SPS, header *SliceHeader) int { // table 6-1 func SubWidthC(sps *SPS) int { n := 17 - if sps.UseSeparateColorPlane { - if sps.ChromaFormat == chroma444 { + if sps.SeparateColorPlaneFlag { + if sps.ChromaFormatIDC == chroma444 { return n } } - switch sps.ChromaFormat { + switch sps.ChromaFormatIDC { case chromaMonochrome: return n case chroma420: @@ -211,12 +211,12 @@ func SubWidthC(sps *SPS) int { } func SubHeightC(sps *SPS) int { n := 17 - if sps.UseSeparateColorPlane { - if sps.ChromaFormat == chroma444 { + if sps.SeparateColorPlaneFlag { + if sps.ChromaFormatIDC == chroma444 { return n } } - switch sps.ChromaFormat { + switch sps.ChromaFormatIDC { case chromaMonochrome: return n case chroma420: @@ -578,17 +578,17 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { i := n + 1 // picSizeInMbs is the number of macroblocks in picture 0 // 7-13 - // PicWidthInMbs = sps.PicWidthInMbsMinus1 + 1 + // PicWidthInMbs = sps.PicWidthInMBSMinus1 + 1 // PicHeightInMapUnits = sps.PicHeightInMapUnitsMinus1 + 1 // 7-29 // picSizeInMbs = PicWidthInMbs * PicHeightInMbs // 7-26 // PicHeightInMbs = FrameHeightInMbs / (1 + header.fieldPicFlag) // 7-18 - // FrameHeightInMbs = (2 - ps.FrameMbsOnly) * PicHeightInMapUnits - picWidthInMbs := sps.PicWidthInMbsMinus1 + 1 + // FrameHeightInMbs = (2 - ps.FrameMBSOnlyFlag) * PicHeightInMapUnits + picWidthInMbs := sps.PicWidthInMBSMinus1 + 1 picHeightInMapUnits := sps.PicHeightInMapUnitsMinus1 + 1 - frameHeightInMbs := (2 - flagVal(sps.FrameMbsOnly)) * picHeightInMapUnits + frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * picHeightInMapUnits picHeightInMbs := frameHeightInMbs / (1 + flagVal(header.FieldPic)) picSizeInMbs := picWidthInMbs * picHeightInMbs mbToSliceGroupMap := MbToSliceGroupMap(sps, pps, header) @@ -600,7 +600,7 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { func CurrMbAddr(sps *SPS, header *SliceHeader) int { mbaffFrameFlag := 0 - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { mbaffFrameFlag = 1 } @@ -608,7 +608,7 @@ func CurrMbAddr(sps *SPS, header *SliceHeader) int { } func MbaffFrameFlag(sps *SPS, header *SliceHeader) int { - if sps.MBAdaptiveFrameField && !header.FieldPic { + if sps.MBAdaptiveFrameFieldFlag && !header.FieldPic { return 1 } return 0 @@ -630,7 +630,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e } } mbaffFrameFlag := 0 - if sliceContext.SPS.MBAdaptiveFrameField && !sliceContext.Slice.Header.FieldPic { + if sliceContext.SPS.MBAdaptiveFrameFieldFlag && !sliceContext.Slice.Header.FieldPic { mbaffFrameFlag = 1 } currMbAddr := sliceContext.Slice.Header.FirstMbInSlice * (1 * mbaffFrameFlag) @@ -791,7 +791,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e mbWidthC := 16 / SubWidthC(sliceContext.SPS) mbHeightC := 16 / SubHeightC(sliceContext.SPS) // if monochrome - if sliceContext.SPS.ChromaFormat == chromaMonochrome || sliceContext.SPS.UseSeparateColorPlane { + if sliceContext.SPS.ChromaFormatIDC == chromaMonochrome || sliceContext.SPS.SeparateColorPlaneFlag { mbWidthC = 0 mbHeightC = 0 } @@ -824,7 +824,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e if NumbSubMbPart(subMbType[mbPartIdx]) > 1 { noSubMbPartSizeLessThan8x8Flag = 0 } - } else if !sliceContext.SPS.Direct8x8Inference { + } else if !sliceContext.SPS.Direct8x8InferenceFlag { noSubMbPartSizeLessThan8x8Flag = 0 } } @@ -876,7 +876,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e } // sliceContext.Slice.Data.CodedBlockPattern = me(v) | ae(v) - if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8Inference) { + if CodedBlockPatternLuma(sliceContext.Slice.Data) > 0 && sliceContext.PPS.Transform8x8Mode == 1 && sliceContext.Slice.Data.MbTypeName != "I_NxN" && noSubMbPartSizeLessThan8x8Flag == 1 && (sliceContext.Slice.Data.MbTypeName != "B_Direct_16x16" || sliceContext.SPS.Direct8x8InferenceFlag) { // TODO: 1 bit or ae(v) if sliceContext.PPS.EntropyCodingMode == 1 { binarization := NewBinarization("Transform8x8Flag", sliceContext.Slice.Data) @@ -956,10 +956,10 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh idrPic = true } header := SliceHeader{} - if sps.UseSeparateColorPlane { + if sps.SeparateColorPlaneFlag { header.ChromaArrayType = 0 } else { - header.ChromaArrayType = sps.ChromaFormat + header.ChromaArrayType = sps.ChromaFormatIDC } br := bits.NewBitReader(bytes.NewReader(rbsp)) @@ -980,7 +980,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh return nil, errors.Wrap(err, "could not parse PPSID") } - if sps.UseSeparateColorPlane { + if sps.SeparateColorPlaneFlag { b, err := br.ReadBits(2) if err != nil { return nil, errors.Wrap(err, "could not read ColorPlaneID") @@ -989,7 +989,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh } // TODO: See 7.4.3 // header.FrameNum = b.NextField("FrameNum", 0) - if !sps.FrameMbsOnly { + if !sps.FrameMBSOnlyFlag { b, err := br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read FieldPic") @@ -1023,7 +1023,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh } } } - if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZero { + if sps.PicOrderCountType == 1 && !sps.DeltaPicOrderAlwaysZeroFlag { header.DeltaPicOrderCnt[0], err = readSe(br) if err != nil { return nil, errors.Wrap(err, "could not parse DeltaPicOrderCnt") @@ -1361,9 +1361,6 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NalUnit, rbsp []byte, sh if err != nil { return nil, errors.Wrap(err, "could not create slice data") } - if showPacket { - debugPacket("debug: Header", sliceContext.Slice.Header) - debugPacket("debug: Data", sliceContext.Slice.Data) - } + return sliceContext, nil } diff --git a/codec/h264/h264dec/slice_test.go b/codec/h264/h264dec/slice_test.go index e7988a7e..d1db5df1 100644 --- a/codec/h264/h264dec/slice_test.go +++ b/codec/h264/h264dec/slice_test.go @@ -7,12 +7,12 @@ var subWidthCTests = []struct { want int }{ {SPS{}, 17}, - {SPS{ChromaFormat: 0}, 17}, - {SPS{ChromaFormat: 1}, 2}, - {SPS{ChromaFormat: 2}, 2}, - {SPS{ChromaFormat: 3}, 1}, - {SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17}, - {SPS{ChromaFormat: 999}, 17}, + {SPS{ChromaFormatIDC: 0}, 17}, + {SPS{ChromaFormatIDC: 1}, 2}, + {SPS{ChromaFormatIDC: 2}, 2}, + {SPS{ChromaFormatIDC: 3}, 1}, + {SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17}, + {SPS{ChromaFormatIDC: 999}, 17}, } // TestSubWidthC tests that the correct SubWidthC is returned given @@ -30,12 +30,12 @@ var subHeightCTests = []struct { want int }{ {SPS{}, 17}, - {SPS{ChromaFormat: 0}, 17}, - {SPS{ChromaFormat: 1}, 2}, - {SPS{ChromaFormat: 2}, 1}, - {SPS{ChromaFormat: 3}, 1}, - {SPS{ChromaFormat: 3, UseSeparateColorPlane: true}, 17}, - {SPS{ChromaFormat: 999}, 17}, + {SPS{ChromaFormatIDC: 0}, 17}, + {SPS{ChromaFormatIDC: 1}, 2}, + {SPS{ChromaFormatIDC: 2}, 1}, + {SPS{ChromaFormatIDC: 3}, 1}, + {SPS{ChromaFormatIDC: 3, SeparateColorPlaneFlag: true}, 17}, + {SPS{ChromaFormatIDC: 999}, 17}, } // TestSubHeightC tests that the correct SubHeightC is returned given diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index c4aabd22..bd3e1184 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -2,112 +2,11 @@ package h264dec import ( "bytes" - "fmt" - "strings" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" "github.com/pkg/errors" ) -// Specification Page 43 7.3.2.1.1 -// Range is always inclusive -// XRange is always exclusive -type SPS struct { - // 8 bits - Profile int - // 6 bits - Constraint0, Constraint1 int - Constraint2, Constraint3 int - Constraint4, Constraint5 int - // 2 bit reserved 0 bits - // 8 bits - Level int - // Range 0 - 31 ; 6 bits - ID int - ChromaFormat int - UseSeparateColorPlane bool - BitDepthLumaMinus8 int - BitDepthChromaMinus8 int - QPrimeYZeroTransformBypass bool - SeqScalingMatrixPresent bool - // Delta is (0-12)-1 ; 4 bits - SeqScalingList []bool // se - // Range 0 - 12; 4 bits - Log2MaxFrameNumMinus4 int - // Range 0 - 2; 2 bits - PicOrderCountType int - // Range 0 - 12; 4 bits - Log2MaxPicOrderCntLSBMin4 int - DeltaPicOrderAlwaysZero bool - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForNonRefPic int // Value - 1 (se) - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForTopToBottomField int // Value - 1 (se) - // Range 0 - 255 ; 8 bits - NumRefFramesInPicOrderCntCycle int - // Range (-2^31)+1 to (2^31)-1 ; 31 bits - OffsetForRefFrameList []int // Value - 1 ([]se) - // Range 0 - MaxDpbFrames - MaxNumRefFrames int - GapsInFrameNumValueAllowed bool - // Page 77 - PicWidthInMbsMinus1 int - // Page 77 - PicHeightInMapUnitsMinus1 int - FrameMbsOnly bool - MBAdaptiveFrameField bool - Direct8x8Inference bool - FrameCropping bool - FrameCropLeftOffset int - FrameCropRightOffset int - FrameCropTopOffset int - FrameCropBottomOffset int - VuiParametersPresent bool - VuiParameters []int - AspectRatioInfoPresent bool - AspectRatio int - SarWidth int - SarHeight int - OverscanInfoPresent bool - OverscanAppropriate bool - VideoSignalTypePresent bool - VideoFormat int - VideoFullRange bool - ColorDescriptionPresent bool - ColorPrimaries int - TransferCharacteristics int - MatrixCoefficients int - ChromaLocInfoPresent bool - ChromaSampleLocTypeTopField int - ChromaSampleLocTypeBottomField int - CpbCntMinus1 int - BitRateScale int - CpbSizeScale int - BitRateValueMinus1 []int - Cbr []bool - InitialCpbRemovalDelayLengthMinus1 int - CpbRemovalDelayLengthMinus1 int - CpbSizeValueMinus1 []int - DpbOutputDelayLengthMinus1 int - TimeOffsetLength int - TimingInfoPresent bool - NumUnitsInTick int - TimeScale int - NalHrdParametersPresent bool - FixedFrameRate bool - VclHrdParametersPresent bool - LowHrdDelay bool - PicStructPresent bool - BitstreamRestriction bool - MotionVectorsOverPicBoundaries bool - MaxBytesPerPicDenom int - MaxBitsPerMbDenom int - Log2MaxMvLengthHorizontal int - Log2MaxMvLengthVertical int - MaxDecFrameBuffering int - MaxNumReorderFrames int -} - var ( DefaultScalingMatrix4x4 = [][]int{ {6, 13, 20, 28, 13, 20, 28, 32, 20, 28, 32, 37, 28, 32, 37, 42}, @@ -161,98 +60,62 @@ var ( ScalingList8x8 = ScalingList4x4 ) -func isInList(l []int, term int) bool { - for _, m := range l { - if m == term { - return true - } - } - return false -} -func debugPacket(name string, packet interface{}) { - logger.Printf("debug: %s packet\n", name) - for _, line := range strings.Split(fmt.Sprintf("%+v", packet), " ") { - logger.Printf("debug: \t%#v\n", line) - } -} -func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, defaultScalingMatrix []int) error { - lastScale := 8 - nextScale := 8 - for i := 0; i < sizeOfScalingList; i++ { - if nextScale != 0 { - deltaScale, err := readSe(br) - if err != nil { - return errors.Wrap(err, "could not parse deltaScale") - } - nextScale = (lastScale + deltaScale + 256) % 256 - if i == 0 && nextScale == 0 { - // Scaling list should use the default list for this point in the matrix - _ = defaultScalingMatrix - } - } - if nextScale == 0 { - scalingList[i] = lastScale - } else { - scalingList[i] = nextScale - } - lastScale = scalingList[i] - } - return nil +// SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in +// the Specifications. +type SPS struct { + Profile int + Constraint0 int + Constraint1 int + Constraint2 int + Constraint3 int + Constraint4 int + Constraint5 int + LevelIDC int + SPSID int + ChromaFormatIDC int + SeparateColorPlaneFlag bool + BitDepthLumaMinus8 int + BitDepthChromaMinus8 int + QPPrimeYZeroTransformBypassFlag bool + SeqScalingMatrixPresentFlag bool + SeqScalingListPresentFlag []bool + ScalingList4x4 [][]int + UseDefaultScalingMatrix4x4Flag []bool + ScalingList8x8 [][]int + UseDefaultScalingMatrix8x8Flag []bool + Log2MaxFrameNumMinus4 int + PicOrderCountType int + Log2MaxPicOrderCntLSBMin4 int + DeltaPicOrderAlwaysZeroFlag bool + OffsetForNonRefPic int + OffsetForTopToBottomField int + NumRefFramesInPicOrderCntCycle int + OffsetForRefFrameList []int + MaxNumRefFrames int + GapsInFrameNumValueAllowed bool + PicWidthInMBSMinus1 int + PicHeightInMapUnitsMinus1 int + FrameMBSOnlyFlag bool + MBAdaptiveFrameFieldFlag bool + Direct8x8InferenceFlag bool + FrameCroppingFlag bool + FrameCropLeftOffset int + FrameCropRightOffset int + FrameCropTopOffset int + FrameCropBottomOffset int + VUIParametersPresentFlag bool + VUIParameters *VUIParameters } + +// NewSPS parses a sequence parameter set raw byte sequence from br following +// the syntax structure specified in section 7.3.2.1.1, and returns as a new +// SPS. func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { logger.Printf("debug: SPS RBSP %d bytes %d bits\n", len(rbsp), len(rbsp)*8) logger.Printf("debug: \t%#v\n", rbsp[0:8]) sps := SPS{} br := bits.NewBitReader(bytes.NewReader(rbsp)) var err error - hrdParameters := func() error { - sps.CpbCntMinus1, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse CpbCntMinus1") - } - - err := readFields(br, []field{ - {&sps.BitRateScale, "BitRateScale", 4}, - {&sps.CpbSizeScale, "CpbSizeScale", 4}, - }) - if err != nil { - return err - } - - // SchedSelIdx E1.2 - for sseli := 0; sseli <= sps.CpbCntMinus1; sseli++ { - ue, err := readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse BitRateValueMinus1") - } - sps.BitRateValueMinus1 = append(sps.BitRateValueMinus1, ue) - - ue, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse CpbSizeValueMinus1") - } - sps.CpbSizeValueMinus1 = append(sps.CpbSizeValueMinus1, ue) - - if v, _ := br.ReadBits(1); v == 1 { - sps.Cbr = append(sps.Cbr, true) - } else { - sps.Cbr = append(sps.Cbr, false) - } - - err = readFields(br, - []field{ - {&sps.InitialCpbRemovalDelayLengthMinus1, "InitialCpbRemovalDelayLengthMinus1", 5}, - {&sps.CpbRemovalDelayLengthMinus1, "CpbRemovalDelayLengthMinus1", 5}, - {&sps.DpbOutputDelayLengthMinus1, "DpbOutputDelayLengthMinus1", 5}, - {&sps.TimeOffsetLength, "TimeOffsetLength", 5}, - }, - ) - if err != nil { - return err - } - } - return nil - } err = readFields(br, []field{ @@ -275,30 +138,30 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read Level") } - sps.Level = int(b) + sps.LevelIDC = int(b) // sps.ID = b.NextField("SPSID", 6) // proper - sps.ID, err = readUe(br) + sps.SPSID, err = readUe(br) if err != nil { return nil, errors.Wrap(err, "could not parse ID") } - sps.ChromaFormat, err = readUe(br) + sps.ChromaFormatIDC, err = readUe(br) if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaFormat") + return nil, errors.Wrap(err, "could not parse ChromaFormatIDC") } // This should be done only for certain ProfileIDC: isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135} // SpecialProfileCase1 if isInList(isProfileIDC, sps.Profile) { - if sps.ChromaFormat == chroma444 { + if sps.ChromaFormatIDC == chroma444 { // TODO: should probably deal with error here. b, err := br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read UseSeparateColorPlaneFlag") } - sps.UseSeparateColorPlane = b == 1 + sps.SeparateColorPlaneFlag = b == 1 } sps.BitDepthLumaMinus8, err = readUe(br) @@ -315,17 +178,17 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read QPrimeYZeroTransformBypass") } - sps.QPrimeYZeroTransformBypass = b == 1 + sps.QPPrimeYZeroTransformBypassFlag = b == 1 b, err = br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read SeqScalingMatrixPresent") } - sps.SeqScalingMatrixPresent = b == 1 + sps.SeqScalingMatrixPresentFlag = b == 1 - if sps.SeqScalingMatrixPresent { + if sps.SeqScalingMatrixPresentFlag { max := 12 - if sps.ChromaFormat != chroma444 { + if sps.ChromaFormatIDC != chroma444 { max = 8 } logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max) @@ -334,9 +197,9 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read SeqScalingList") } - sps.SeqScalingList = append(sps.SeqScalingList, b == 1) + sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, b == 1) - if sps.SeqScalingList[i] { + if sps.SeqScalingListPresentFlag[i] { if i < 6 { scalingList( br, @@ -380,7 +243,7 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read DeltaPicOrderAlwaysZero") } - sps.DeltaPicOrderAlwaysZero = b == 1 + sps.DeltaPicOrderAlwaysZeroFlag = b == 1 sps.OffsetForNonRefPic, err = readSe(br) if err != nil { @@ -420,7 +283,7 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { } sps.GapsInFrameNumValueAllowed = b == 1 - sps.PicWidthInMbsMinus1, err = readUe(br) + sps.PicWidthInMBSMinus1, err = readUe(br) if err != nil { return nil, errors.Wrap(err, "could not parse PicWidthInMbsMinus1") } @@ -434,25 +297,25 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read FrameMbsOnly") } - sps.FrameMbsOnly = b == 1 + sps.FrameMBSOnlyFlag = b == 1 - if !sps.FrameMbsOnly { + if !sps.FrameMBSOnlyFlag { b, err = br.ReadBits(1) if err != nil { return nil, errors.Wrap(err, "could not read MBAdaptiveFrameField") } - sps.MBAdaptiveFrameField = b == 1 + sps.MBAdaptiveFrameFieldFlag = b == 1 } err = readFlags(br, []flag{ - {&sps.Direct8x8Inference, "Direct8x8Inference"}, - {&sps.FrameCropping, "FrameCropping"}, + {&sps.Direct8x8InferenceFlag, "Direct8x8Inference"}, + {&sps.FrameCroppingFlag, "FrameCropping"}, }) if err != nil { return nil, err } - if sps.FrameCropping { + if sps.FrameCroppingFlag { sps.FrameCropLeftOffset, err = readUe(br) if err != nil { return nil, errors.Wrap(err, "could not parse FrameCropLeftOffset") @@ -478,213 +341,361 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { if err != nil { return nil, errors.Wrap(err, "could not read VuiParametersPresent") } - sps.VuiParametersPresent = b == 1 + sps.VUIParametersPresentFlag = b == 1 - if sps.VuiParametersPresent { - // vui_parameters + if sps.VUIParametersPresentFlag { + + } // End VuiParameters Annex E.1.1 + + return &sps, nil +} + +// SPS describes a sequence parameter set as defined by section E.1.1 in the +// Specifications. +type VUIParameters struct { + AspectRatioInfoPresentFlag bool + AspectRatioIDC int + SARWidth int + SARHeight int + OverscanInfoPresentFlag bool + OverscanAppropriateFlag bool + VideoSignalTypePresentFlag bool + VideoFormat int + VideoFullRangeFlag bool + ColorDescriptionPresentFlag bool + ColorPrimaries int + TransferCharacteristics int + MatrixCoefficients int + ChromaLocInfoPresentFlag bool + ChromaSampleLocTypeTopField int + ChromaSampleLocTypeBottomField int + TimingInfoPresentFlag bool + NumUnitsInTick int + TimeScale int + FixedFrameRateFlag bool + NALHRDParametersPresentFlag bool + NALHRDParameters *HRDParameters + VCLHRDParametersPresentFlag bool + VCLHRDParameters *HRDParameters + LowDelayHRDFlag bool + PicStructPresentFlag bool + BitstreamRestrictionFlag bool + MotionVectorsOverPicBoundariesFlag bool + MaxBytesPerPicDenom int + MaxBitsPerMBDenom int + Log2MaxMVLengthHorizontal int + Log2MaxMVLengthVertical int + MaxNumReorderFrames int + MaxDecFrameBuffering int +} + +// NewVUIParameters parses video usability information parameters from br +// following the syntax structure specified in section E.1.1, and returns as a +// new VUIParameters. +func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { + p := &VUIParameters{} + + b, err := br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") + } + p.AspectRatioInfoPresentFlag = b == 1 + + if p.AspectRatioInfoPresentFlag { + b, err = br.ReadBits(8) + if err != nil { + return nil, errors.Wrap(err, "could not read AspectRatio") + } + p.AspectRatioIDC = int(b) + + EXTENDED_SAR := 999 + if p.AspectRatioIDC == EXTENDED_SAR { + b, err = br.ReadBits(16) + if err != nil { + return nil, errors.Wrap(err, "could not read SarWidth") + } + p.SARWidth = int(b) + + b, err = br.ReadBits(16) + if err != nil { + return nil, errors.Wrap(err, "could not read SarHeight") + } + p.SARHeight = int(b) + } + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read OverscanInfoPresent") + } + p.OverscanInfoPresentFlag = b == 1 + + if p.OverscanInfoPresentFlag { b, err = br.ReadBits(1) if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") + return nil, errors.Wrap(err, "could not read OverscanAppropriate") } - sps.AspectRatioInfoPresent = b == 1 + p.OverscanAppropriateFlag = b == 1 + } - if sps.AspectRatioInfoPresent { - b, err = br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatio") - } - sps.AspectRatio = int(b) + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") + } + p.VideoSignalTypePresentFlag = b == 1 - EXTENDED_SAR := 999 - if sps.AspectRatio == EXTENDED_SAR { - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarWidth") - } - sps.SarWidth = int(b) - - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarHeight") - } - sps.SarHeight = int(b) - } + if p.VideoSignalTypePresentFlag { + b, err = br.ReadBits(3) + if err != nil { + return nil, errors.Wrap(err, "could not read VideoFormat") } + p.VideoFormat = int(b) + } + + if p.VideoSignalTypePresentFlag { + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read VideoFullRange") + } + p.VideoFullRangeFlag = b == 1 b, err = br.ReadBits(1) if err != nil { - return nil, errors.Wrap(err, "could not read OverscanInfoPresent") + return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") } - sps.OverscanInfoPresent = b == 1 + p.ColorDescriptionPresentFlag = b == 1 - if sps.OverscanInfoPresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanAppropriate") - } - sps.OverscanAppropriate = b == 1 - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") - } - sps.VideoSignalTypePresent = b == 1 - - if sps.VideoSignalTypePresent { - b, err = br.ReadBits(3) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFormat") - } - sps.VideoFormat = int(b) - } - - if sps.VideoSignalTypePresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFullRange") - } - sps.VideoFullRange = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") - } - sps.ColorDescriptionPresent = b == 1 - - if sps.ColorDescriptionPresent { - err = readFields(br, - []field{ - {&sps.ColorPrimaries, "ColorPrimaries", 8}, - {&sps.TransferCharacteristics, "TransferCharacteristics", 8}, - {&sps.MatrixCoefficients, "MatrixCoefficients", 8}, - }, - ) - if err != nil { - return nil, err - } - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") - } - sps.ChromaLocInfoPresent = b == 1 - - if sps.ChromaLocInfoPresent { - sps.ChromaSampleLocTypeTopField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") - } - - sps.ChromaSampleLocTypeBottomField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read TimingInfoPresent") - } - sps.TimingInfoPresent = b == 1 - - if sps.TimingInfoPresent { - err := readFields(br, []field{ - {&sps.NumUnitsInTick, "NumUnitsInTick", 32}, - {&sps.TimeScale, "TimeScale", 32}, - }) + if p.ColorDescriptionPresentFlag { + err = readFields(br, + []field{ + {&p.ColorPrimaries, "ColorPrimaries", 8}, + {&p.TransferCharacteristics, "TransferCharacteristics", 8}, + {&p.MatrixCoefficients, "MatrixCoefficients", 8}, + }, + ) if err != nil { return nil, err } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FixedFrameRate") - } - sps.FixedFrameRate = b == 1 } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") - } - sps.NalHrdParametersPresent = b == 1 - - if sps.NalHrdParametersPresent { - err = hrdParameters() - if err != nil { - return nil, errors.Wrap(err, "could not get hrdParameters") - } - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") - } - sps.VclHrdParametersPresent = b == 1 - - if sps.VclHrdParametersPresent { - err = hrdParameters() - if err != nil { - return nil, errors.Wrap(err, "could not get hrdParameters") - } - } - if sps.NalHrdParametersPresent || sps.VclHrdParametersPresent { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LowHrdDelay") - } - sps.LowHrdDelay = b == 1 - } - - err := readFlags(br, []flag{ - {&sps.PicStructPresent, "PicStructPresent"}, - {&sps.BitstreamRestriction, "BitStreamRestriction"}, - }) - - if sps.BitstreamRestriction { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") - } - sps.MotionVectorsOverPicBoundaries = b == 1 - - sps.MaxBytesPerPicDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") - } - - sps.MaxBitsPerMbDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") - } - - sps.Log2MaxMvLengthHorizontal, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") - } - - sps.Log2MaxMvLengthVertical, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") - } - - sps.MaxNumReorderFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") - } - - sps.MaxDecFrameBuffering, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") - } - } - - } // End VuiParameters Annex E.1.1 - if showPacket { - debugPacket("SPS", sps) } - return &sps, nil + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") + } + p.ChromaLocInfoPresentFlag = b == 1 + + if p.ChromaLocInfoPresentFlag { + p.ChromaSampleLocTypeTopField, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") + } + + p.ChromaSampleLocTypeBottomField, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") + } + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read TimingInfoPresent") + } + p.TimingInfoPresentFlag = b == 1 + + if p.TimingInfoPresentFlag { + err := readFields(br, []field{ + {&p.NumUnitsInTick, "NumUnitsInTick", 32}, + {&p.TimeScale, "TimeScale", 32}, + }) + if err != nil { + return nil, err + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read FixedFrameRate") + } + p.FixedFrameRateFlag = b == 1 + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") + } + p.NALHRDParametersPresentFlag = b == 1 + + if p.NALHRDParametersPresentFlag { + p.NALHRDParameters, err = NewHRDParameters(br) + if err != nil { + return nil, errors.Wrap(err, "could not get hrdParameters") + } + } + + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") + } + p.VCLHRDParametersPresentFlag = b == 1 + + if p.VCLHRDParametersPresentFlag { + p.VCLHRDParameters, err = NewHRDParameters(br) + if err != nil { + return nil, errors.Wrap(err, "could not get hrdParameters") + } + } + if p.NALHRDParametersPresentFlag || p.VCLHRDParametersPresentFlag { + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read LowHrdDelay") + } + p.LowDelayHRDFlag = b == 1 + } + + err = readFlags(br, []flag{ + {&p.PicStructPresentFlag, "PicStructPresent"}, + {&p.BitstreamRestrictionFlag, "BitStreamRestriction"}, + }) + + if p.BitstreamRestrictionFlag { + b, err = br.ReadBits(1) + if err != nil { + return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") + } + p.MotionVectorsOverPicBoundariesFlag = b == 1 + + p.MaxBytesPerPicDenom, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") + } + + p.MaxBitsPerMBDenom, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") + } + + p.Log2MaxMVLengthHorizontal, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") + } + + p.Log2MaxMVLengthVertical, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") + } + + p.MaxNumReorderFrames, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") + } + + p.MaxDecFrameBuffering, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") + } + } + return p, nil +} + +// HRDParameters describes hypothetical reference decoder parameters as defined +// by section E.1.2 in the specifications. +type HRDParameters struct { + CPBCntMinus1 int + BitRateScale int + CPBSizeScale int + BitRateValueMinus1 []int + CPBSizeValueMinus1 []int + CBRFlag []bool + InitialCPBRemovalDelayLenMinus1 int + CPBRemovalDelayLenMinus1 int + DPBOutputDelayLenMinus1 int + TimeOffsetLen int +} + +// NewHRDParameters parses hypothetical reference decoder parameter from br +// following the syntax structure specified in section E.1.2, and returns as a +// new HRDParameters. +func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { + h := &HRDParameters{} + var err error + h.CPBCntMinus1, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse CPBCntMinus1") + } + + err = readFields(br, []field{ + {&h.BitRateScale, "BitRateScale", 4}, + {&h.CPBSizeScale, "CPBSizeScale", 4}, + }) + if err != nil { + return nil, err + } + + // SchedSelIdx E1.2 + for sseli := 0; sseli <= h.CPBCntMinus1; sseli++ { + ue, err := readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse BitRateValueMinus1") + } + h.BitRateValueMinus1 = append(h.BitRateValueMinus1, ue) + + ue, err = readUe(br) + if err != nil { + return nil, errors.Wrap(err, "could not parse CPBSizeValueMinus1") + } + h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, ue) + + if v, _ := br.ReadBits(1); v == 1 { + h.CBRFlag = append(h.CBRFlag, true) + } else { + h.CBRFlag = append(h.CBRFlag, false) + } + + err = readFields(br, + []field{ + {&h.InitialCPBRemovalDelayLenMinus1, "InitialCPBRemovalDelayLenMinus1", 5}, + {&h.CPBRemovalDelayLenMinus1, "CPBRemovalDelayLenMinus1", 5}, + {&h.DPBOutputDelayLenMinus1, "DpbOutputDelayLenMinus1", 5}, + {&h.TimeOffsetLen, "TimeOffsetLen", 5}, + }, + ) + if err != nil { + return nil, err + } + } + return h, nil +} + +func isInList(l []int, term int) bool { + for _, m := range l { + if m == term { + return true + } + } + return false +} + +func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, defaultScalingMatrix []int) error { + lastScale := 8 + nextScale := 8 + for i := 0; i < sizeOfScalingList; i++ { + if nextScale != 0 { + deltaScale, err := readSe(br) + if err != nil { + return errors.Wrap(err, "could not parse deltaScale") + } + nextScale = (lastScale + deltaScale + 256) % 256 + if i == 0 && nextScale == 0 { + // Scaling list should use the default list for this point in the matrix + _ = defaultScalingMatrix + } + } + if nextScale == 0 { + scalingList[i] = lastScale + } else { + scalingList[i] = nextScale + } + lastScale = scalingList[i] + } + return nil } From 1d6c501bb8c970eaf369796a79d87d7815a0b74d Mon Sep 17 00:00:00 2001 From: Saxon Date: Tue, 30 Jul 2019 10:16:08 +0930 Subject: [PATCH 2/3] codec/h264/h264dec: fixed field types in sps.go and corrected code after merge of master into branch --- codec/h264/h264dec/parse.go | 21 +- codec/h264/h264dec/pps.go | 161 ++--------- codec/h264/h264dec/slice.go | 187 +++--------- codec/h264/h264dec/sps.go | 552 ++++++++++-------------------------- 4 files changed, 230 insertions(+), 691 deletions(-) diff --git a/codec/h264/h264dec/parse.go b/codec/h264/h264dec/parse.go index 0763be27..b2981958 100644 --- a/codec/h264/h264dec/parse.go +++ b/codec/h264/h264dec/parse.go @@ -64,11 +64,11 @@ func (r fieldReader) readBits(n int) uint64 { // Exp-Golomb-coded element using method as specified in section 9.1 of ITU-T // H.264 and return as an int. The read does not happen if the fieldReader // has a non-nil error. -func (r fieldReader) readUe() int { +func (r fieldReader) readUe() uint64 { if r.e != nil { return 0 } - var i int + var i uint64 i, r.e = readUe(r.br) return i } @@ -77,11 +77,11 @@ func (r fieldReader) readUe() int { // Exp-Golomb-coded syntax element using method as specified in section 9.1 // and returns as an int. The read does not happen if the fieldReader // has a non-nil error. -func (r fieldReader) readTe(x uint) int { +func (r fieldReader) readTe(x uint) int64 { if r.e != nil { return 0 } - var i int + var i int64 i, r.e = readTe(r.br, x) return i } @@ -122,7 +122,7 @@ func (r fieldReader) err() error { // // TODO: this should return uint, but rest of code needs to be changed for this // to happen. -func readUe(r *bits.BitReader) (int, error) { +func readUe(r *bits.BitReader) (uint64, error) { nZeros := -1 var err error for b := uint64(0); b == 0; nZeros++ { @@ -135,7 +135,7 @@ func readUe(r *bits.BitReader) (int, error) { if err != nil { return 0, err } - return int(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil + return uint64(math.Pow(float64(2), float64(nZeros)) - 1 + float64(rem)), nil } // readTe parses a syntax element of te(v) descriptor i.e, truncated @@ -143,9 +143,10 @@ func readUe(r *bits.BitReader) (int, error) { // Rec. ITU-T H.264 (04/2017). // // TODO: this should also return uint. -func readTe(r *bits.BitReader, x uint) (int, error) { +func readTe(r *bits.BitReader, x uint) (int64, error) { if x > 1 { - return readUe(r) + ue, err := readUe(r) + return int64(ue), err } if x == 1 { @@ -181,7 +182,7 @@ func readSe(r *bits.BitReader) (int, error) { // in Rec. ITU-T H.264 (04/2017). func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, error) { // Indexes to codedBlockPattern map. - var i1, i2, i3 int + var i1, i2, i3 uint64 // ChromaArrayType selects first index. switch chromaArrayType { @@ -200,7 +201,7 @@ func readMe(r *bits.BitReader, chromaArrayType uint, mpm mbPartPredMode) (uint, } // Need to check that we won't go out of bounds with this index. - if i2 >= len(codedBlockPattern[i1]) { + if int(i2) >= len(codedBlockPattern[i1]) { return 0, errInvalidCodeNum } diff --git a/codec/h264/h264dec/pps.go b/codec/h264/h264dec/pps.go index 1f618c2e..2467a550 100644 --- a/codec/h264/h264dec/pps.go +++ b/codec/h264/h264dec/pps.go @@ -4,7 +4,6 @@ import ( "math" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" - "github.com/pkg/errors" ) // import "strings" @@ -47,151 +46,54 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { pps := PPS{} // TODO: give this io.Reader br := bits.NewBitReader(nil) + r := newFieldReader(br) - var err error - pps.ID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ID") - } - - pps.SPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SPS ID") - } - - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read EntropyCodingMode") - } - pps.EntropyCodingMode = int(b) - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read BottomFieldPicOrderInFramePresent") - } - pps.BottomFieldPicOrderInFramePresent = b == 1 - - pps.NumSliceGroupsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumSliceGroupsMinus1") - } + pps.ID = int(r.readUe()) + pps.SPSID = int(r.readUe()) + pps.EntropyCodingMode = int(r.readBits(1)) + pps.BottomFieldPicOrderInFramePresent = r.readBits(1) == 1 + pps.NumSliceGroupsMinus1 = int(r.readUe()) if pps.NumSliceGroupsMinus1 > 0 { - pps.SliceGroupMapType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceGroupMapType") - } + pps.SliceGroupMapType = int(r.readUe()) if pps.SliceGroupMapType == 0 { for iGroup := 0; iGroup <= pps.NumSliceGroupsMinus1; iGroup++ { - pps.RunLengthMinus1[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse RunLengthMinus1") - } + pps.RunLengthMinus1[iGroup] = int(r.readUe()) } } else if pps.SliceGroupMapType == 2 { for iGroup := 0; iGroup < pps.NumSliceGroupsMinus1; iGroup++ { - pps.TopLeft[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]") - } - if err != nil { - return nil, errors.Wrap(err, "could not parse TopLeft[iGroup]") - } - - pps.BottomRight[iGroup], err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BottomRight[iGroup]") - } + pps.TopLeft[iGroup] = int(r.readUe()) + pps.BottomRight[iGroup] = int(r.readUe()) } } else if pps.SliceGroupMapType > 2 && pps.SliceGroupMapType < 6 { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SliceGroupChangeDirection") - } - pps.SliceGroupChangeDirection = b == 1 - - pps.SliceGroupChangeRateMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceGroupChangeRateMinus1") - } + pps.SliceGroupChangeDirection = r.readBits(1) == 1 + pps.SliceGroupChangeRateMinus1 = int(r.readUe()) } else if pps.SliceGroupMapType == 6 { - pps.PicSizeInMapUnitsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicSizeInMapUnitsMinus1") - } + pps.PicSizeInMapUnitsMinus1 = int(r.readUe()) for i := 0; i <= pps.PicSizeInMapUnitsMinus1; i++ { - b, err = br.ReadBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1))))) - if err != nil { - return nil, errors.Wrap(err, "coult not read SliceGroupId") - } - pps.SliceGroupId[i] = int(b) + pps.SliceGroupId[i] = int(r.readBits(int(math.Ceil(math.Log2(float64(pps.NumSliceGroupsMinus1 + 1)))))) } } } - pps.NumRefIdxL0DefaultActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.New("could not parse NumRefIdxL0DefaultActiveMinus1") - } - - pps.NumRefIdxL1DefaultActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.New("could not parse NumRefIdxL1DefaultActiveMinus1") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read WeightedPred") - } - pps.WeightedPred = b == 1 - - b, err = br.ReadBits(2) - if err != nil { - return nil, errors.Wrap(err, "could not read WeightedBipred") - } - pps.WeightedBipred = int(b) - - pps.PicInitQpMinus26, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse PicInitQpMinus26") - } - - pps.PicInitQsMinus26, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse PicInitQsMinus26") - } - - pps.ChromaQpIndexOffset, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse ChromaQpIndexOffset") - } - - err = readFlags(br, []flag{ - {&pps.DeblockingFilterControlPresent, "DeblockingFilterControlPresent"}, - {&pps.ConstrainedIntraPred, "ConstrainedIntraPred"}, - {&pps.RedundantPicCntPresent, "RedundantPicCntPresent"}, - }) - if err != nil { - return nil, err - } + pps.NumRefIdxL0DefaultActiveMinus1 = int(r.readUe()) + pps.NumRefIdxL1DefaultActiveMinus1 = int(r.readUe()) + pps.WeightedPred = r.readBits(1) == 1 + pps.WeightedBipred = int(r.readBits(2)) + pps.PicInitQpMinus26 = int(r.readSe()) + pps.PicInitQsMinus26 = int(r.readSe()) + pps.ChromaQpIndexOffset = int(r.readSe()) + pps.DeblockingFilterControlPresent = r.readBits(1) == 1 + pps.ConstrainedIntraPred = r.readBits(1) == 1 + pps.RedundantPicCntPresent = r.readBits(1) == 1 logger.Printf("debug: \tChecking for more PPS data") if moreRBSPData(br) { logger.Printf("debug: \tProcessing additional PPS data") - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read Transform8x8Mode") - } - pps.Transform8x8Mode = int(b) - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read PicScalingMatrixPresent") - } - pps.PicScalingMatrixPresent = b == 1 + pps.Transform8x8Mode = int(r.readBits(1)) + pps.PicScalingMatrixPresent = r.readBits(1) == 1 if pps.PicScalingMatrixPresent { v := 6 @@ -199,11 +101,7 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { v = 2 } for i := 0; i < 6+(v*pps.Transform8x8Mode); i++ { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read PicScalingListPresent") - } - pps.PicScalingListPresent[i] = b == 1 + pps.PicScalingListPresent[i] = r.readBits(1) == 1 if pps.PicScalingListPresent[i] { if i < 6 { scalingList( @@ -222,10 +120,7 @@ func NewPPS(sps *SPS, rbsp []byte, showPacket bool) (*PPS, error) { } } } - pps.SecondChromaQpIndexOffset, err = readSe(br) - if err != nil { - return nil, errors.New("could not parse SecondChromaQpIndexOffset") - } + pps.SecondChromaQpIndexOffset = r.readSe() } moreRBSPData(br) // rbspTrailingBits() diff --git a/codec/h264/h264dec/slice.go b/codec/h264/h264dec/slice.go index 0a20c781..4b620cad 100644 --- a/codec/h264/h264dec/slice.go +++ b/codec/h264/h264dec/slice.go @@ -169,22 +169,22 @@ func MbToSliceGroupMap(sps *SPS, pps *PPS, header *SliceHeader) []int { } func PicWidthInMbs(sps *SPS) int { - return sps.PicWidthInMBSMinus1 + 1 + return int(sps.PicWidthInMBSMinus1 + 1) } func PicHeightInMapUnits(sps *SPS) int { - return sps.PicHeightInMapUnitsMinus1 + 1 + return int(sps.PicHeightInMapUnitsMinus1 + 1) } func PicSizeInMapUnits(sps *SPS) int { - return PicWidthInMbs(sps) * PicHeightInMapUnits(sps) + return int(PicWidthInMbs(sps) * PicHeightInMapUnits(sps)) } func FrameHeightInMbs(sps *SPS) int { - return (2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps) + return int((2 - flagVal(sps.FrameMBSOnlyFlag)) * PicHeightInMapUnits(sps)) } func PicHeightInMbs(sps *SPS, header *SliceHeader) int { - return FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic)) + return int(FrameHeightInMbs(sps) / (1 + flagVal(header.FieldPic))) } func PicSizeInMbs(sps *SPS, header *SliceHeader) int { - return PicWidthInMbs(sps) * PicHeightInMbs(sps, header) + return int(PicWidthInMbs(sps) * PicHeightInMbs(sps, header)) } // table 6-1 @@ -262,6 +262,7 @@ func NumMbPart(nalUnit *NALUnit, sps *SPS, header *SliceHeader, data *SliceData) } func MbPred(sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { + r := newFieldReader(br) var cabac *CABAC sliceType := sliceTypeMap[sliceContext.Slice.Header.SliceType] mbPartPredMode, err := MbPartPredMode(sliceContext.Slice.Data, sliceType, sliceContext.Slice.Data.MbType, 0) @@ -372,11 +373,7 @@ func MbPred(sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { logger.Printf("TODO: ae for IntraChromaPredMode\n") } else { - var err error - sliceContext.Slice.Data.IntraChromaPredMode, err = readUe(br) - if err != nil { - return errors.Wrap(err, "could not parse IntraChromaPredMode") - } + sliceContext.Slice.Data.IntraChromaPredMode = int(r.readUe()) } } @@ -405,14 +402,10 @@ func MbPred(sliceContext *SliceContext, br *bits.BitReader, rbsp []byte) error { // TODO: Only one reference picture is used for inter-prediction, // then the value should be 0 if MbaffFrameFlag(sliceContext.SPS, sliceContext.Slice.Header) == 0 || !sliceContext.Slice.Data.MbFieldDecodingFlag { - sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe( - br, - uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1)) + sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1))) } else { rangeMax := 2*sliceContext.Slice.Header.NumRefIdxL0ActiveMinus1 + 1 - sliceContext.Slice.Data.RefIdxL0[mbPartIdx], _ = readTe( - br, - uint(rangeMax)) + sliceContext.Slice.Data.RefIdxL0[mbPartIdx] = int(r.readTe(uint(rangeMax))) } } } @@ -588,9 +581,9 @@ func nextMbAddress(n int, sps *SPS, pps *PPS, header *SliceHeader) int { // FrameHeightInMbs = (2 - ps.FrameMBSOnlyFlag) * PicHeightInMapUnits picWidthInMbs := sps.PicWidthInMBSMinus1 + 1 picHeightInMapUnits := sps.PicHeightInMapUnitsMinus1 + 1 - frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * picHeightInMapUnits + frameHeightInMbs := (2 - flagVal(sps.FrameMBSOnlyFlag)) * int(picHeightInMapUnits) picHeightInMbs := frameHeightInMbs / (1 + flagVal(header.FieldPic)) - picSizeInMbs := picWidthInMbs * picHeightInMbs + picSizeInMbs := int(picWidthInMbs) * picHeightInMbs mbToSliceGroupMap := MbToSliceGroupMap(sps, pps, header) for i < picSizeInMbs && mbToSliceGroupMap[i] != mbToSliceGroupMap[i] { i++ @@ -615,8 +608,8 @@ func MbaffFrameFlag(sps *SPS, header *SliceHeader) int { } func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, error) { + r := newFieldReader(br) var cabac *CABAC - var err error sliceContext.Slice.Data = &SliceData{BitReader: br} // TODO: Why is this being initialized here? // initCabac(sliceContext) @@ -645,10 +638,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e if sliceContext.Slice.Data.SliceTypeName != "I" && sliceContext.Slice.Data.SliceTypeName != "SI" { logger.Printf("debug: \tNonI/SI slice, processing moreData\n") if sliceContext.PPS.EntropyCodingMode == 0 { - sliceContext.Slice.Data.MbSkipRun, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MbSkipRun") - } + sliceContext.Slice.Data.MbSkipRun = int(r.readUe()) if sliceContext.Slice.Data.MbSkipRun > 0 { prevMbSkipped = 1 @@ -762,10 +752,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e logger.Printf("TODO: ae for MBType\n") } else { - sliceContext.Slice.Data.MbType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MbType") - } + sliceContext.Slice.Data.MbType = int(r.readUe()) } if sliceContext.Slice.Data.MbTypeName == "I_PCM" { for !br.ByteAligned() { @@ -777,7 +764,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e // 7-3 p95 bitDepthY := 8 + sliceContext.SPS.BitDepthLumaMinus8 for i := 0; i < 256; i++ { - s, err := br.ReadBits(bitDepthY) + s, err := br.ReadBits(int(bitDepthY)) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleLuma[%d]", i)) } @@ -798,7 +785,7 @@ func NewSliceData(sliceContext *SliceContext, br *bits.BitReader) (*SliceData, e bitDepthC := 8 + sliceContext.SPS.BitDepthChromaMinus8 for i := 0; i < 2*mbWidthC*mbHeightC; i++ { - s, err := br.ReadBits(bitDepthC) + s, err := br.ReadBits(int(bitDepthC)) if err != nil { return nil, errors.Wrap(err, fmt.Sprintf("could not read PcmSampleChroma[%d]", i)) } @@ -959,27 +946,17 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if sps.SeparateColorPlaneFlag { header.ChromaArrayType = 0 } else { - header.ChromaArrayType = sps.ChromaFormatIDC + header.ChromaArrayType = int(sps.ChromaFormatIDC) } br := bits.NewBitReader(bytes.NewReader(rbsp)) + r := newFieldReader(br) - header.FirstMbInSlice, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FirstMbInSlice") - } - - header.SliceType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceType") - } + header.FirstMbInSlice = int(r.readUe()) + header.SliceType = int(r.readUe()) sliceType := sliceTypeMap[header.SliceType] logger.Printf("debug: %s (%s) slice of %d bytes\n", NALUnitType[int(nalUnit.Type)], sliceType, len(rbsp)) - header.PPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PPSID") - } - + header.PPSID = int(r.readUe()) if sps.SeparateColorPlaneFlag { b, err := br.ReadBits(2) if err != nil { @@ -1004,13 +981,10 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh } } if idrPic { - header.IDRPicID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse IDRPicID") - } + header.IDRPicID = int(r.readUe()) } if sps.PicOrderCountType == 0 { - b, err := br.ReadBits(sps.Log2MaxPicOrderCntLSBMin4 + 4) + b, err := br.ReadBits(int(sps.Log2MaxPicOrderCntLSBMin4 + 4)) if err != nil { return nil, errors.Wrap(err, "could not read PicOrderCntLsb") } @@ -1037,10 +1011,7 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh } } if pps.RedundantPicCntPresent { - header.RedundantPicCnt, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse RedundantPicCnt") - } + header.RedundantPicCnt = int(r.readUe()) } if sliceType == "B" { b, err := br.ReadBits(1) @@ -1057,15 +1028,9 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh header.NumRefIdxActiveOverride = b == 1 if header.NumRefIdxActiveOverride { - header.NumRefIdxL0ActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefIdxL0ActiveMinus1") - } + header.NumRefIdxL0ActiveMinus1 = int(r.readUe()) if sliceType == "B" { - header.NumRefIdxL1ActiveMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefIdxL1ActiveMinus1") - } + header.NumRefIdxL1ActiveMinus1 = int(r.readUe()) } } } @@ -1085,21 +1050,12 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if header.RefPicListModificationFlagL0 { for header.ModificationOfPicNums != 3 { - header.ModificationOfPicNums, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ModificationOfPicNums") - } + header.ModificationOfPicNums = int(r.readUe()) if header.ModificationOfPicNums == 0 || header.ModificationOfPicNums == 1 { - header.AbsDiffPicNumMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse AbsDiffPicNumMinus1") - } + header.AbsDiffPicNumMinus1 = int(r.readUe()) } else if header.ModificationOfPicNums == 2 { - header.LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + header.LongTermPicNum = int(r.readUe()) } } } @@ -1114,21 +1070,12 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if header.RefPicListModificationFlagL1 { for header.ModificationOfPicNums != 3 { - header.ModificationOfPicNums, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ModificationOfPicNums") - } + header.ModificationOfPicNums = int(r.readUe()) if header.ModificationOfPicNums == 0 || header.ModificationOfPicNums == 1 { - header.AbsDiffPicNumMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse AbsDiffPicNumMinus1") - } + header.AbsDiffPicNumMinus1 = int(r.readUe()) } else if header.ModificationOfPicNums == 2 { - header.LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + header.LongTermPicNum = int(r.readUe()) } } } @@ -1138,23 +1085,13 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh if (pps.WeightedPred && (sliceType == "P" || sliceType == "SP")) || (pps.WeightedBipred == 1 && sliceType == "B") { // predWeightTable() - header.LumaLog2WeightDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LumaLog2WeightDenom") - } + header.LumaLog2WeightDenom = int(r.readUe()) if header.ChromaArrayType != 0 { - header.ChromaLog2WeightDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaLog2WeightDenom") - } + header.ChromaLog2WeightDenom = int(r.readUe()) } for i := 0; i <= header.NumRefIdxL0ActiveMinus1; i++ { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LumaWeightL0Flag") - } - header.LumaWeightL0Flag = b == 1 + header.LumaWeightL0Flag = r.readBits(1) == 1 if header.LumaWeightL0Flag { se, err := readSe(br) @@ -1266,69 +1203,37 @@ func NewSliceContext(videoStream *VideoStream, nalUnit *NALUnit, rbsp []byte, sh header.AdaptiveRefPicMarkingModeFlag = b == 1 if header.AdaptiveRefPicMarkingModeFlag { - header.MemoryManagementControlOperation, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation") - } + header.MemoryManagementControlOperation = int(r.readUe()) for header.MemoryManagementControlOperation != 0 { if header.MemoryManagementControlOperation == 1 || header.MemoryManagementControlOperation == 3 { - header.DifferenceOfPicNumsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MemoryManagementControlOperation") - } + header.DifferenceOfPicNumsMinus1 = int(r.readUe()) } if header.MemoryManagementControlOperation == 2 { - header.LongTermPicNum, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermPicNum") - } + header.LongTermPicNum = int(r.readUe()) } if header.MemoryManagementControlOperation == 3 || header.MemoryManagementControlOperation == 6 { - header.LongTermFrameIdx, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse LongTermFrameIdx") - } + header.LongTermFrameIdx = int(r.readUe()) } if header.MemoryManagementControlOperation == 4 { - header.MaxLongTermFrameIdxPlus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxLongTermFrameIdxPlus1") - } + header.MaxLongTermFrameIdxPlus1 = int(r.readUe()) } } } } // end decRefPicMarking } if pps.EntropyCodingMode == 1 && sliceType != "I" && sliceType != "SI" { - header.CabacInit, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CabacInit") - } - } - header.SliceQpDelta, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceQpDelta") + header.CabacInit = int(r.readUe()) } + header.SliceQpDelta = int(r.readSe()) if sliceType == "SP" || sliceType == "SI" { if sliceType == "SP" { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SpForSwitch") - } - header.SpForSwitch = b == 1 - } - header.SliceQsDelta, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse SliceQsDelta") + header.SpForSwitch = r.readBits(1) == 1 } + header.SliceQsDelta = int(r.readSe()) } if pps.DeblockingFilterControlPresent { - header.DisableDeblockingFilter, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse DisableDeblockingFilter") - } - + header.DisableDeblockingFilter = int(r.readUe()) if header.DisableDeblockingFilter != 1 { header.SliceAlphaC0OffsetDiv2, err = readSe(br) if err != nil { diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index bd3e1184..2c37746b 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -2,6 +2,7 @@ package h264dec import ( "bytes" + "fmt" "bitbucket.org/ausocean/av/codec/h264/h264dec/bits" "github.com/pkg/errors" @@ -63,46 +64,46 @@ var ( // SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in // the Specifications. type SPS struct { - Profile int - Constraint0 int - Constraint1 int - Constraint2 int - Constraint3 int - Constraint4 int - Constraint5 int - LevelIDC int - SPSID int - ChromaFormatIDC int + Profile uint8 + Constraint0 bool + Constraint1 bool + Constraint2 bool + Constraint3 bool + Constraint4 bool + Constraint5 bool + LevelIDC uint8 + SPSID uint64 + ChromaFormatIDC uint64 SeparateColorPlaneFlag bool - BitDepthLumaMinus8 int - BitDepthChromaMinus8 int + BitDepthLumaMinus8 uint64 + BitDepthChromaMinus8 uint64 QPPrimeYZeroTransformBypassFlag bool SeqScalingMatrixPresentFlag bool SeqScalingListPresentFlag []bool - ScalingList4x4 [][]int + ScalingList4x4 [][]uint64 UseDefaultScalingMatrix4x4Flag []bool - ScalingList8x8 [][]int + ScalingList8x8 [][]uint64 UseDefaultScalingMatrix8x8Flag []bool - Log2MaxFrameNumMinus4 int - PicOrderCountType int - Log2MaxPicOrderCntLSBMin4 int + Log2MaxFrameNumMinus4 uint64 + PicOrderCountType uint64 + Log2MaxPicOrderCntLSBMin4 uint64 DeltaPicOrderAlwaysZeroFlag bool - OffsetForNonRefPic int - OffsetForTopToBottomField int - NumRefFramesInPicOrderCntCycle int + OffsetForNonRefPic int64 + OffsetForTopToBottomField int64 + NumRefFramesInPicOrderCntCycle uint64 OffsetForRefFrameList []int - MaxNumRefFrames int + MaxNumRefFrames uint64 GapsInFrameNumValueAllowed bool - PicWidthInMBSMinus1 int - PicHeightInMapUnitsMinus1 int + PicWidthInMBSMinus1 uint64 + PicHeightInMapUnitsMinus1 uint64 FrameMBSOnlyFlag bool MBAdaptiveFrameFieldFlag bool Direct8x8InferenceFlag bool FrameCroppingFlag bool - FrameCropLeftOffset int - FrameCropRightOffset int - FrameCropTopOffset int - FrameCropBottomOffset int + FrameCropLeftOffset uint64 + FrameCropRightOffset uint64 + FrameCropTopOffset uint64 + FrameCropBottomOffset uint64 VUIParametersPresentFlag bool VUIParameters *VUIParameters } @@ -115,76 +116,33 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { logger.Printf("debug: \t%#v\n", rbsp[0:8]) sps := SPS{} br := bits.NewBitReader(bytes.NewReader(rbsp)) - var err error + r := newFieldReader(br) - err = readFields(br, - []field{ - {&sps.Profile, "ProfileIDC", 8}, - {&sps.Constraint0, "Constraint0", 1}, - {&sps.Constraint1, "Constraint1", 1}, - {&sps.Constraint2, "Constraint2", 1}, - {&sps.Constraint3, "Constraint3", 1}, - {&sps.Constraint4, "Constraint4", 1}, - {&sps.Constraint5, "Constraint5", 1}, - }, - ) - - _, err = br.ReadBits(2) - if err != nil { - return nil, errors.Wrap(err, "could not read ReservedZeroBits") - } - - b, err := br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read Level") - } - sps.LevelIDC = int(b) - - // sps.ID = b.NextField("SPSID", 6) // proper - sps.SPSID, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ID") - } - - sps.ChromaFormatIDC, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaFormatIDC") - } + sps.Profile = uint8(r.readBits(8)) + sps.Constraint0 = r.readBits(1) == 1 + sps.Constraint1 = r.readBits(1) == 1 + sps.Constraint2 = r.readBits(1) == 1 + sps.Constraint3 = r.readBits(1) == 1 + sps.Constraint4 = r.readBits(1) == 1 + sps.Constraint5 = r.readBits(1) == 1 + r.readBits(2) // 2 reserved bits. + sps.LevelIDC = uint8(r.readBits(8)) + sps.SPSID = r.readUe() + sps.ChromaFormatIDC = r.readUe() // This should be done only for certain ProfileIDC: isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135} // SpecialProfileCase1 - if isInList(isProfileIDC, sps.Profile) { + if isInList(isProfileIDC, int(sps.Profile)) { if sps.ChromaFormatIDC == chroma444 { // TODO: should probably deal with error here. - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read UseSeparateColorPlaneFlag") - } - sps.SeparateColorPlaneFlag = b == 1 + sps.SeparateColorPlaneFlag = r.readBits(1) == 1 } - sps.BitDepthLumaMinus8, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitDepthLumaMinus8") - } - - sps.BitDepthChromaMinus8, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitDepthChromaMinus8") - } - - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read QPrimeYZeroTransformBypass") - } - sps.QPPrimeYZeroTransformBypassFlag = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SeqScalingMatrixPresent") - } - sps.SeqScalingMatrixPresentFlag = b == 1 + sps.BitDepthLumaMinus8 = r.readUe() + sps.BitDepthChromaMinus8 = r.readUe() + sps.QPPrimeYZeroTransformBypassFlag = r.readBits(1) == 1 + sps.SeqScalingMatrixPresentFlag = r.readBits(1) == 1 if sps.SeqScalingMatrixPresentFlag { max := 12 @@ -193,11 +151,7 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { } logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max) for i := 0; i < max; i++ { - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read SeqScalingList") - } - sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, b == 1) + sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, r.readBits(1) == 1) if sps.SeqScalingListPresentFlag[i] { if i < 6 { @@ -223,125 +177,44 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { // showSPS() // return sps // Possibly wrong due to no scaling list being built - sps.Log2MaxFrameNumMinus4, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxFrameNumMinus4") - } - - sps.PicOrderCountType, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicOrderCountType") - } + sps.Log2MaxFrameNumMinus4 = r.readUe() + sps.PicOrderCountType = r.readUe() if sps.PicOrderCountType == 0 { - sps.Log2MaxPicOrderCntLSBMin4, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxPicOrderCntLSBMin4") - } + sps.Log2MaxPicOrderCntLSBMin4 = r.readUe() } else if sps.PicOrderCountType == 1 { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read DeltaPicOrderAlwaysZero") - } - sps.DeltaPicOrderAlwaysZeroFlag = b == 1 + sps.DeltaPicOrderAlwaysZeroFlag = r.readBits(1) == 1 + sps.OffsetForNonRefPic = int64(r.readSe()) + sps.OffsetForTopToBottomField = int64(r.readSe()) + sps.NumRefFramesInPicOrderCntCycle = r.readUe() - sps.OffsetForNonRefPic, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForNonRefPic") - } - - sps.OffsetForTopToBottomField, err = readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForTopToBottomField") - } - - sps.NumRefFramesInPicOrderCntCycle, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse NumRefFramesInPicOrderCntCycle") - } - - for i := 0; i < sps.NumRefFramesInPicOrderCntCycle; i++ { - se, err := readSe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse OffsetForRefFrameList") - } - sps.OffsetForRefFrameList = append( - sps.OffsetForRefFrameList, - se) + for i := 0; i < int(sps.NumRefFramesInPicOrderCntCycle); i++ { + sps.OffsetForRefFrameList = append(sps.OffsetForRefFrameList, r.readSe()) } } - sps.MaxNumRefFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumRefFrames") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read GapsInFrameNumValueAllowed") - } - sps.GapsInFrameNumValueAllowed = b == 1 - - sps.PicWidthInMBSMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicWidthInMbsMinus1") - } - - sps.PicHeightInMapUnitsMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse PicHeightInMapUnitsMinus1") - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FrameMbsOnly") - } - sps.FrameMBSOnlyFlag = b == 1 + sps.MaxNumRefFrames = r.readUe() + sps.GapsInFrameNumValueAllowed = r.readBits(1) == 1 + sps.PicWidthInMBSMinus1 = r.readUe() + sps.PicHeightInMapUnitsMinus1 = r.readUe() + sps.FrameMBSOnlyFlag = r.readBits(1) == 1 if !sps.FrameMBSOnlyFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MBAdaptiveFrameField") - } - sps.MBAdaptiveFrameFieldFlag = b == 1 + sps.MBAdaptiveFrameFieldFlag = r.readBits(1) == 1 } - err = readFlags(br, []flag{ - {&sps.Direct8x8InferenceFlag, "Direct8x8Inference"}, - {&sps.FrameCroppingFlag, "FrameCropping"}, - }) - if err != nil { - return nil, err - } + sps.Direct8x8InferenceFlag = r.readBits(1) == 1 + sps.FrameCroppingFlag = r.readBits(1) == 1 if sps.FrameCroppingFlag { - sps.FrameCropLeftOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropLeftOffset") - } - - sps.FrameCropRightOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropRightOffset") - } - - sps.FrameCropTopOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropTopOffset") - } - - sps.FrameCropBottomOffset, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse FrameCropBottomOffset") - } + sps.FrameCropLeftOffset = r.readUe() + sps.FrameCropRightOffset = r.readUe() + sps.FrameCropTopOffset = r.readUe() + sps.FrameCropBottomOffset = r.readUe() } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VuiParametersPresent") - } - sps.VUIParametersPresentFlag = b == 1 + sps.VUIParametersPresentFlag = r.readBits(1) == 1 if sps.VUIParametersPresentFlag { @@ -354,24 +227,24 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { // Specifications. type VUIParameters struct { AspectRatioInfoPresentFlag bool - AspectRatioIDC int - SARWidth int - SARHeight int + AspectRatioIDC uint8 + SARWidth uint32 + SARHeight uint32 OverscanInfoPresentFlag bool OverscanAppropriateFlag bool VideoSignalTypePresentFlag bool - VideoFormat int + VideoFormat uint8 VideoFullRangeFlag bool ColorDescriptionPresentFlag bool - ColorPrimaries int - TransferCharacteristics int - MatrixCoefficients int + ColorPrimaries uint8 + TransferCharacteristics uint8 + MatrixCoefficients uint8 ChromaLocInfoPresentFlag bool - ChromaSampleLocTypeTopField int - ChromaSampleLocTypeBottomField int + ChromaSampleLocTypeTopField uint64 + ChromaSampleLocTypeBottomField uint64 TimingInfoPresentFlag bool - NumUnitsInTick int - TimeScale int + NumUnitsInTick uint32 + TimeScale uint32 FixedFrameRateFlag bool NALHRDParametersPresentFlag bool NALHRDParameters *HRDParameters @@ -381,12 +254,12 @@ type VUIParameters struct { PicStructPresentFlag bool BitstreamRestrictionFlag bool MotionVectorsOverPicBoundariesFlag bool - MaxBytesPerPicDenom int - MaxBitsPerMBDenom int - Log2MaxMVLengthHorizontal int - Log2MaxMVLengthVertical int - MaxNumReorderFrames int - MaxDecFrameBuffering int + MaxBytesPerPicDenom uint64 + MaxBitsPerMBDenom uint64 + Log2MaxMVLengthHorizontal uint64 + Log2MaxMVLengthVertical uint64 + MaxNumReorderFrames uint64 + MaxDecFrameBuffering uint64 } // NewVUIParameters parses video usability information parameters from br @@ -394,137 +267,60 @@ type VUIParameters struct { // new VUIParameters. func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { p := &VUIParameters{} + r := newFieldReader(br) - b, err := br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent") - } - p.AspectRatioInfoPresentFlag = b == 1 + p.AspectRatioInfoPresentFlag = r.readBits(1) == 1 if p.AspectRatioInfoPresentFlag { - b, err = br.ReadBits(8) - if err != nil { - return nil, errors.Wrap(err, "could not read AspectRatio") - } - p.AspectRatioIDC = int(b) + p.AspectRatioIDC = uint8(r.readBits(8)) EXTENDED_SAR := 999 - if p.AspectRatioIDC == EXTENDED_SAR { - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarWidth") - } - p.SARWidth = int(b) - - b, err = br.ReadBits(16) - if err != nil { - return nil, errors.Wrap(err, "could not read SarHeight") - } - p.SARHeight = int(b) + if int(p.AspectRatioIDC) == EXTENDED_SAR { + p.SARWidth = uint32(r.readBits(16)) + p.SARHeight = uint32(r.readBits(16)) } } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanInfoPresent") - } - p.OverscanInfoPresentFlag = b == 1 + p.OverscanInfoPresentFlag = r.readBits(1) == 1 if p.OverscanInfoPresentFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read OverscanAppropriate") - } - p.OverscanAppropriateFlag = b == 1 + p.OverscanAppropriateFlag = r.readBits(1) == 1 } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoSignalTypePresent") - } - p.VideoSignalTypePresentFlag = b == 1 + p.VideoSignalTypePresentFlag = r.readBits(1) == 1 if p.VideoSignalTypePresentFlag { - b, err = br.ReadBits(3) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFormat") - } - p.VideoFormat = int(b) + p.VideoFormat = uint8(r.readBits(3)) } if p.VideoSignalTypePresentFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VideoFullRange") - } - p.VideoFullRangeFlag = b == 1 - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ColorDescriptionPresent") - } - p.ColorDescriptionPresentFlag = b == 1 + p.VideoFullRangeFlag = r.readBits(1) == 1 + p.ColorDescriptionPresentFlag = r.readBits(1) == 1 if p.ColorDescriptionPresentFlag { - err = readFields(br, - []field{ - {&p.ColorPrimaries, "ColorPrimaries", 8}, - {&p.TransferCharacteristics, "TransferCharacteristics", 8}, - {&p.MatrixCoefficients, "MatrixCoefficients", 8}, - }, - ) - if err != nil { - return nil, err - } + p.ColorPrimaries = uint8(r.readBits(8)) + p.TransferCharacteristics = uint8(r.readBits(8)) + p.MatrixCoefficients = uint8(r.readBits(8)) } } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent") - } - p.ChromaLocInfoPresentFlag = b == 1 + p.ChromaLocInfoPresentFlag = r.readBits(1) == 1 if p.ChromaLocInfoPresentFlag { - p.ChromaSampleLocTypeTopField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField") - } - - p.ChromaSampleLocTypeBottomField, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField") - } + p.ChromaSampleLocTypeTopField = uint64(r.readUe()) + p.ChromaSampleLocTypeBottomField = uint64(r.readUe()) } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read TimingInfoPresent") - } - p.TimingInfoPresentFlag = b == 1 + p.TimingInfoPresentFlag = r.readBits(1) == 1 if p.TimingInfoPresentFlag { - err := readFields(br, []field{ - {&p.NumUnitsInTick, "NumUnitsInTick", 32}, - {&p.TimeScale, "TimeScale", 32}, - }) - if err != nil { - return nil, err - } - - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read FixedFrameRate") - } - p.FixedFrameRateFlag = b == 1 + p.NumUnitsInTick = uint32(r.readBits(32)) + p.TimeScale = uint32(r.readBits(32)) + p.FixedFrameRateFlag = r.readBits(1) == 1 } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read NalHrdParametersPresent") - } - p.NALHRDParametersPresentFlag = b == 1 + p.NALHRDParametersPresentFlag = r.readBits(1) == 1 + var err error if p.NALHRDParametersPresentFlag { p.NALHRDParameters, err = NewHRDParameters(br) if err != nil { @@ -532,11 +328,7 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { } } - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read VclHrdParametersPresent") - } - p.VCLHRDParametersPresentFlag = b == 1 + p.VCLHRDParametersPresentFlag = r.readBits(1) == 1 if p.VCLHRDParametersPresentFlag { p.VCLHRDParameters, err = NewHRDParameters(br) @@ -545,54 +337,20 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { } } if p.NALHRDParametersPresentFlag || p.VCLHRDParametersPresentFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read LowHrdDelay") - } - p.LowDelayHRDFlag = b == 1 + p.LowDelayHRDFlag = r.readBits(1) == 1 } - err = readFlags(br, []flag{ - {&p.PicStructPresentFlag, "PicStructPresent"}, - {&p.BitstreamRestrictionFlag, "BitStreamRestriction"}, - }) + p.PicStructPresentFlag = r.readBits(1) == 1 + p.BitstreamRestrictionFlag = r.readBits(1) == 1 if p.BitstreamRestrictionFlag { - b, err = br.ReadBits(1) - if err != nil { - return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries") - } - p.MotionVectorsOverPicBoundariesFlag = b == 1 - - p.MaxBytesPerPicDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom") - } - - p.MaxBitsPerMBDenom, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom") - } - - p.Log2MaxMVLengthHorizontal, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal") - } - - p.Log2MaxMVLengthVertical, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical") - } - - p.MaxNumReorderFrames, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames") - } - - p.MaxDecFrameBuffering, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering") - } + p.MotionVectorsOverPicBoundariesFlag = r.readBits(1) == 1 + p.MaxBytesPerPicDenom = r.readUe() + p.MaxBitsPerMBDenom = r.readUe() + p.Log2MaxMVLengthHorizontal = r.readUe() + p.Log2MaxMVLengthVertical = r.readUe() + p.MaxNumReorderFrames = r.readUe() + p.MaxDecFrameBuffering = r.readUe() } return p, nil } @@ -600,16 +358,16 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { // HRDParameters describes hypothetical reference decoder parameters as defined // by section E.1.2 in the specifications. type HRDParameters struct { - CPBCntMinus1 int - BitRateScale int - CPBSizeScale int - BitRateValueMinus1 []int - CPBSizeValueMinus1 []int + CPBCntMinus1 uint64 + BitRateScale uint8 + CPBSizeScale uint8 + BitRateValueMinus1 []uint64 + CPBSizeValueMinus1 []uint64 CBRFlag []bool - InitialCPBRemovalDelayLenMinus1 int - CPBRemovalDelayLenMinus1 int - DPBOutputDelayLenMinus1 int - TimeOffsetLen int + InitialCPBRemovalDelayLenMinus1 uint8 + CPBRemovalDelayLenMinus1 uint8 + DPBOutputDelayLenMinus1 uint8 + TimeOffsetLen uint8 } // NewHRDParameters parses hypothetical reference decoder parameter from br @@ -617,33 +375,16 @@ type HRDParameters struct { // new HRDParameters. func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { h := &HRDParameters{} - var err error - h.CPBCntMinus1, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CPBCntMinus1") - } + r := newFieldReader(br) - err = readFields(br, []field{ - {&h.BitRateScale, "BitRateScale", 4}, - {&h.CPBSizeScale, "CPBSizeScale", 4}, - }) - if err != nil { - return nil, err - } + h.CPBCntMinus1 = r.readUe() + h.BitRateScale = uint8(r.readBits(4)) + h.CPBSizeScale = uint8(r.readBits(4)) // SchedSelIdx E1.2 - for sseli := 0; sseli <= h.CPBCntMinus1; sseli++ { - ue, err := readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse BitRateValueMinus1") - } - h.BitRateValueMinus1 = append(h.BitRateValueMinus1, ue) - - ue, err = readUe(br) - if err != nil { - return nil, errors.Wrap(err, "could not parse CPBSizeValueMinus1") - } - h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, ue) + for sseli := 0; sseli <= int(h.CPBCntMinus1); sseli++ { + h.BitRateValueMinus1 = append(h.BitRateValueMinus1, r.readUe()) + h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, r.readUe()) if v, _ := br.ReadBits(1); v == 1 { h.CBRFlag = append(h.CBRFlag, true) @@ -651,17 +392,14 @@ func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) { h.CBRFlag = append(h.CBRFlag, false) } - err = readFields(br, - []field{ - {&h.InitialCPBRemovalDelayLenMinus1, "InitialCPBRemovalDelayLenMinus1", 5}, - {&h.CPBRemovalDelayLenMinus1, "CPBRemovalDelayLenMinus1", 5}, - {&h.DPBOutputDelayLenMinus1, "DpbOutputDelayLenMinus1", 5}, - {&h.TimeOffsetLen, "TimeOffsetLen", 5}, - }, - ) - if err != nil { - return nil, err - } + h.InitialCPBRemovalDelayLenMinus1 = uint8(r.readBits(5)) + h.CPBRemovalDelayLenMinus1 = uint8(r.readBits(5)) + h.DPBOutputDelayLenMinus1 = uint8(r.readBits(5)) + h.TimeOffsetLen = uint8(r.readBits(5)) + } + + if r.err() != nil { + return nil, fmt.Errorf("error from fieldReader: %v", r.err()) } return h, nil } From 7a4ee894d4e2e9efe228196b5c06a810fcf67ba2 Mon Sep 17 00:00:00 2001 From: Saxon Date: Wed, 31 Jul 2019 20:25:38 +0930 Subject: [PATCH 3/3] codec/h264/h264dec/sps.go: commented fields of syntax structures --- codec/h264/h264dec/sps.go | 425 ++++++++++++++++++++++++++++++-------- 1 file changed, 342 insertions(+), 83 deletions(-) diff --git a/codec/h264/h264dec/sps.go b/codec/h264/h264dec/sps.go index 2c37746b..4da3fc69 100644 --- a/codec/h264/h264dec/sps.go +++ b/codec/h264/h264dec/sps.go @@ -63,49 +63,156 @@ var ( // SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in // the Specifications. +// For semantics see section 7.4.2.1. Comments for fields are excerpts from +// section 7.4.2.1. type SPS struct { - Profile uint8 - Constraint0 bool - Constraint1 bool - Constraint2 bool - Constraint3 bool - Constraint4 bool - Constraint5 bool - LevelIDC uint8 - SPSID uint64 - ChromaFormatIDC uint64 - SeparateColorPlaneFlag bool - BitDepthLumaMinus8 uint64 - BitDepthChromaMinus8 uint64 + // pofile_idx and level_idc indicate the profile and level to which the + // coded video sequence conforms. + Profile, LevelIDC uint8 + + // The constraint_setx_flag flags specify the constraints defined in A.2 for + // which this stream conforms. + Constraint0 bool + Constraint1 bool + Constraint2 bool + Constraint3 bool + Constraint4 bool + Constraint5 bool + + // seq_parameter_set_id identifies this sequence parameter set, and can then + // be reference by the picture parameter set. The seq_parameter_set_id is + // in the range of 0 to 30 inclusive. + SPSID uint64 + + // chroma_format_idc specifies the chroma sampling relative to the luma + // sampling as specified in caluse 6.2. Range of chroma_format_idc is in + // from 0 to 3 inclusive. + ChromaFormatIDC uint64 + + // separate_color_plane_flag if true specifies that the three components of + // the 4:4:4 chroma formta are coded separately. + SeparateColorPlaneFlag bool + + // bit_depth_luma_minus8 specifies the luma array sample bit depth and the + // luma quantisation parameter range offset QpBdOffset_y (eq 7-3 and 7-4). + BitDepthLumaMinus8 uint64 + + // bit_depth_luma_minus8 specifies the chroma array sample bit depth and the + // chroma quantisation parameter range offset QpBdOffset_c (eq 7-3 and 7-4). + BitDepthChromaMinus8 uint64 + + // qpprime_y_zero_transform_bypass_flag equal to 1 specifies that, when QP′ Y + // is equal to 0, a transform bypass operation for the transform coefficient + // decoding process and picture construction process prior to deblocking + // filter process as specified in clause 8.5 shall be applied. QPPrimeYZeroTransformBypassFlag bool - SeqScalingMatrixPresentFlag bool - SeqScalingListPresentFlag []bool - ScalingList4x4 [][]uint64 - UseDefaultScalingMatrix4x4Flag []bool - ScalingList8x8 [][]uint64 - UseDefaultScalingMatrix8x8Flag []bool - Log2MaxFrameNumMinus4 uint64 - PicOrderCountType uint64 - Log2MaxPicOrderCntLSBMin4 uint64 - DeltaPicOrderAlwaysZeroFlag bool - OffsetForNonRefPic int64 - OffsetForTopToBottomField int64 - NumRefFramesInPicOrderCntCycle uint64 - OffsetForRefFrameList []int - MaxNumRefFrames uint64 - GapsInFrameNumValueAllowed bool - PicWidthInMBSMinus1 uint64 - PicHeightInMapUnitsMinus1 uint64 - FrameMBSOnlyFlag bool - MBAdaptiveFrameFieldFlag bool - Direct8x8InferenceFlag bool - FrameCroppingFlag bool - FrameCropLeftOffset uint64 - FrameCropRightOffset uint64 - FrameCropTopOffset uint64 - FrameCropBottomOffset uint64 - VUIParametersPresentFlag bool - VUIParameters *VUIParameters + + // seq_scaling_matrix_present_flag equal to 1 specifies that + // seq_scaling_list_present_flag[ i ] are present. When 0 they are not present + // and the sequence-level scaling lists specified by Flat_4x4_16 and + // Flat_8x8_16 shall be inferred. + SeqScalingMatrixPresentFlag bool + + // seq_scaling_lit_present_flag[i] specifics whether the syntax structure for + // scaling list i is present. If 1 then present, otherwise not, and scaling + // list for i is inferred as per rule set A in table 7-2. + SeqScalingListPresentFlag []bool + + // The 4x4 sequence scaling lists for each i. + ScalingList4x4 [][]uint64 + + // Flag to indicate for a 4x4 scaling list, if we use the default. + UseDefaultScalingMatrix4x4Flag []bool + + // The 8x8 sequence scaling lists for each i. + ScalingList8x8 [][]uint64 + + // Flag to indicate for a 8x8 scaling list, if we use the default. + UseDefaultScalingMatrix8x8Flag []bool + + // log2_max_frame_num_minus4 allows for derivation of MaxFrameNum using eq 7-10. + Log2MaxFrameNumMinus4 uint64 + + // pic_order_cnt_type specifiess the method to decode picture order count. + PicOrderCountType uint64 + + // log2_max_pic_order_cnt_lsb_minus4 allows for the dreivation of + // MaxPicOrderCntLsb using eq 7-11. + Log2MaxPicOrderCntLSBMin4 uint64 + + // delta_pic_order_always_zero_flag if true indicates delta_pic_order_cnt[0] + // and delta_pic_order_cnt[1]. + DeltaPicOrderAlwaysZeroFlag bool + + // offset_for_non_ref_pic is used to calculate the picture order count of a + // non-reference picture as specified in clause 8.2.1. + OffsetForNonRefPic int64 + + // offset_for_top_to_bottom_field is used to calculate the picture order count + // of a bottom field as specified in clause 8.2.1. + OffsetForTopToBottomField int64 + + // num_ref_frames_in_pic_order_cnt_cycle is used in the decoding process for + // picture order count as specified in clause 8.2.1. + NumRefFramesInPicOrderCntCycle uint64 + + // offset_for_ref_frame[ i ] is an element of a list of + // num_ref_frames_in_pic_order_cnt_cycle values used in the decoding process + // for picture order count as specified in clause 8.2.1. + OffsetForRefFrameList []int + + // max_num_ref_frames specifies the max number of short-term and long-term + // reference frames, complementary reference field pairs, and non-paired + // reference fields that may be used by the decoding process for inter prediction. + MaxNumRefFrames uint64 + + // gaps_in_frame_num_value_allowed_flag specifies the allowed values of + // frame_num as specified in clause 7.4.3 and the decoding process in case of + // an inferred gap between values of frame_num as specified in clause 8.2.5.2. + GapsInFrameNumValueAllowed bool + + // pic_width_in_mbs_minus1 plus 1 specifies the width of each decode picutre + // in units of macroblocks. See eq 7-13. + PicWidthInMBSMinus1 uint64 + + // pic_height_in_map_units_minus1 plus 1 specifies the height in slice group + // map units of a decoded frame or field. See eq 7-16. + PicHeightInMapUnitsMinus1 uint64 + + // frame_mbs_only_flag if 0 coded pictures of the coded video sequence may be + // coded fields or coded frames. If 1 every coded picture of the coded video + // sequence is a coded frame containing only frame macroblocks. + FrameMBSOnlyFlag bool + + // mb_adaptive_frame_field_flag if 0 specifies no switching between + // frame and field macroblocks within a picture. If 1 specifies the possible + // use of switching between frame and field macroblocks within frames. + MBAdaptiveFrameFieldFlag bool + + // direct_8x8_inference_flag specifies the method used in the derivation + // process for luma motion vectors for B_Skip, B_Direct_16x16 and B_Direct_8x8 + // as specified in clause 8.4.1.2. + Direct8x8InferenceFlag bool + + // frame_cropping_flag if 1 then frame cropping offset parameters are next in + // the sequence parameter set. If 0 they are not. + FrameCroppingFlag bool + + // frame_crop_left_offset, frame_crop_right_offset, frame_crop_top_offset, + // frame_crop_bottom_offset specify the samples of the pictures in the coded + // video sequence that are output from the decoding process, in terms of a + // rectangular region specified in frame coordinates for output. + FrameCropLeftOffset uint64 + FrameCropRightOffset uint64 + FrameCropTopOffset uint64 + FrameCropBottomOffset uint64 + + // vui_parameters_present_flag if 1 the vui_parameters() syntax structure is + // present, otherwise it is not. + VUIParametersPresentFlag bool + + // The vui_parameters() syntax structure specified in appendix E. + VUIParameters *VUIParameters } // NewSPS parses a sequence parameter set raw byte sequence from br following @@ -225,41 +332,158 @@ func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) { // SPS describes a sequence parameter set as defined by section E.1.1 in the // Specifications. +// Semantics for fields are define in section E.2.1. Comments on fields are +// excerpts from the this section. type VUIParameters struct { - AspectRatioInfoPresentFlag bool - AspectRatioIDC uint8 - SARWidth uint32 - SARHeight uint32 - OverscanInfoPresentFlag bool - OverscanAppropriateFlag bool - VideoSignalTypePresentFlag bool - VideoFormat uint8 - VideoFullRangeFlag bool - ColorDescriptionPresentFlag bool - ColorPrimaries uint8 - TransferCharacteristics uint8 - MatrixCoefficients uint8 - ChromaLocInfoPresentFlag bool - ChromaSampleLocTypeTopField uint64 - ChromaSampleLocTypeBottomField uint64 - TimingInfoPresentFlag bool - NumUnitsInTick uint32 - TimeScale uint32 - FixedFrameRateFlag bool - NALHRDParametersPresentFlag bool - NALHRDParameters *HRDParameters - VCLHRDParametersPresentFlag bool - VCLHRDParameters *HRDParameters - LowDelayHRDFlag bool - PicStructPresentFlag bool - BitstreamRestrictionFlag bool + // aspect_ratio_info_present_flag if 1 then aspect_ratio_idc is present, + // otherwsise is not. + AspectRatioInfoPresentFlag bool + + // aspect_ratio_idc specifies the value of sample aspect ratio of the luma samples. + AspectRatioIDC uint8 + + // sar_width indicates the horizontal size of the sample aspect ratio (in + // arbitrary units). + SARWidth uint32 + + // sar_height indicates the vertical size of the sample aspect ratio (in the + // same arbitrary units as sar_width). + SARHeight uint32 + + // overscan_info_present_flag if 1 then overscan_appropriate_flag is present, + // otherwise if 0, then the display method for the video signal is unspecified. + OverscanInfoPresentFlag bool + + // overscan_appropriate_flag if 1 then the cropped decoded pictures output + // are suitable for display using overscan, othersise if 0, then the cropped + // decoded pictures output should not be displayed using overscan. + OverscanAppropriateFlag bool + + // video_signal_type_present_flag equal to 1 specifies that video_format, + // video_full_range_flag and colour_description_present_flag are present, + // otherwise if 0, then they are not present. + VideoSignalTypePresentFlag bool + + // video_format indicates the representation of the pictures as specified in + // Table E-2, before being coded in accordance with this Recommendation | + // International Standard. + VideoFormat uint8 + + // video_full_range_flag indicates the black level and range of the luma and + // chroma signals as derived from E′_Y, E′_PB, and E′_PR or E′_R, E′_G, + // and E′_B real-valued component signals. + VideoFullRangeFlag bool + + // colour_description_present_flag if 1 specifies that colour_primaries, + // transfer_characteristics and matrix_coefficients are present, otherwise if + // 0 then they are not present. + ColorDescriptionPresentFlag bool + + // colour_primaries indicates the chromaticity coordinates of the source + // primaries as specified in Table E-3 in terms of the CIE 1931 definition of + // x and y as specified by ISO 11664-1. + ColorPrimaries uint8 + + // transfer_characteristics either indicates the reference opto-electronic + // transfer characteristic function of the source picture, or indicates the + // inverse of the reference electro-optical transfer characteristic function. + TransferCharacteristics uint8 + + // matrix_coefficients describes the matrix coefficients used in deriving luma + // and chroma signals from the green, blue, and red, or Y, Z, and X primaries, + // as specified in Table E-5. + MatrixCoefficients uint8 + + // chroma_loc_info_present_flag if 1 specifies that chroma_sample_loc_type_top_field + // and chroma_sample_loc_type_bottom_field are present, otherwise if 0, + // they are not present. + ChromaLocInfoPresentFlag bool + + // chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field + // specify the location of chroma samples. + ChromaSampleLocTypeTopField, ChromaSampleLocTypeBottomField uint64 + + // timing_info_present_flag if 1 specifies that num_units_in_tick, time_scale + // and fixed_frame_rate_flag are present in the bitstream, otherwise if 0, + // they are not present. + TimingInfoPresentFlag bool + + // num_units_in_tick is the number of time units of a clock operating at the + // frequency time_scale Hz that corresponds to one increment (called a clock + // tick) of a clock tick counter. + NumUnitsInTick uint32 + + // time_scale is the number of time units that pass in one second. + TimeScale uint32 + + // fixed_frame_rate_flag if 1 indicates that the temporal distance + // between the HRD output times of any two consecutive pictures in output + // order is constrained as follows. fixed_frame_rate_flag equal to 0 indicates + // that no such constraints apply to the temporal distance between the HRD + // output times of any two consecutive pictures in output order. + FixedFrameRateFlag bool + + // nal_hrd_parameters_present_flag if 1 then NAL HRD parameters (pertaining to + // Type II bitstream conformance) are present, otherwise if 0, then they + // are not present. + NALHRDParametersPresentFlag bool + + // The nal_hrd_parameters() syntax structure as specified in section E.1.2. + NALHRDParameters *HRDParameters + + // vcl_hrd_parameters_present_flag if 1 specifies that VCL HRD parameters + // (pertaining to all bitstream conformance) are present, otherwise if 0, then + // they are not present. + VCLHRDParametersPresentFlag bool + + // The vcl_nal_hrd_parameters() syntax structure as specified in section E.1.2. + VCLHRDParameters *HRDParameters + + // low_delay_hrd_flag specifies the HRD operational mode as specified in Annex C. + LowDelayHRDFlag bool + + // pic_struct_present_flag if 1 then picture timing SEI messages (clause D.2.3) + // are present that include the pic_struct syntax element, otherwise if 0, then + // not present. + PicStructPresentFlag bool + + // bitstream_restriction_flag if 1, then the following coded video sequence + // bitstream restriction parameters are present, otherwise if 0, then they are + // not present. + BitstreamRestrictionFlag bool + + // motion_vectors_over_pic_boundaries_flag if 0 then no sample outside the + // picture boundaries and no sample at a fractional sample position for which + // the sample value is derived using one or more samples outside the picture + // boundaries is used for inter prediction of any sample, otherwise if 1, + // indicates that one or more samples outside picture boundaries may be used + // in inter prediction. MotionVectorsOverPicBoundariesFlag bool - MaxBytesPerPicDenom uint64 - MaxBitsPerMBDenom uint64 - Log2MaxMVLengthHorizontal uint64 - Log2MaxMVLengthVertical uint64 - MaxNumReorderFrames uint64 - MaxDecFrameBuffering uint64 + + // max_bytes_per_pic_denom indicates a number of bytes not exceeded by the sum + // of the sizes of the VCL NAL units associated with any coded picture in the + // coded video sequence. + MaxBytesPerPicDenom uint64 + + // max_bits_per_mb_denom indicates an upper bound for the number of coded bits + // of macroblock_layer() data for any macroblock in any picture of the coded + // video sequence. + MaxBitsPerMBDenom uint64 + + // log2_max_mv_length_horizontal and log2_max_mv_length_vertical indicate the + // maximum absolute value of a decoded horizontal and vertical motion vector + // component, respectively, in 1⁄4 luma sample units, for all pictures in the + // coded video sequence. + Log2MaxMVLengthHorizontal, Log2MaxMVLengthVertical uint64 + + // max_num_reorder_frames indicates an upper bound for the number of frames + // buffers, in the decoded picture buffer (DPB), that are required for storing + // frames, complementary field pairs, and non-paired fields before output. + MaxNumReorderFrames uint64 + + // max_dec_frame_buffering specifies the required size of the HRD decoded + // picture buffer (DPB) in units of frame buffers. + MaxDecFrameBuffering uint64 } // NewVUIParameters parses video usability information parameters from br @@ -357,17 +581,52 @@ func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) { // HRDParameters describes hypothetical reference decoder parameters as defined // by section E.1.2 in the specifications. +// Field semantics are defined in section E.2.2. Comments on fields are excerpts +// from section E.2.2. type HRDParameters struct { - CPBCntMinus1 uint64 - BitRateScale uint8 - CPBSizeScale uint8 - BitRateValueMinus1 []uint64 - CPBSizeValueMinus1 []uint64 - CBRFlag []bool + // cpb_cnt_minus1 plus 1 specifies the number of alternative CPB specifications + // in the bitstream. + CPBCntMinus1 uint64 + + // bit_rate_scale (together with bit_rate_value_minus1[ SchedSelIdx ]) + // specifies the maximum input bit rate of the SchedSelIdx-th CPB. + BitRateScale uint8 + + // cpb_size_scale (together with cpb_size_value_minus1[ SchedSelIdx ]) + // specifies the CPB size of the SchedSelIdx-th CPB. + CPBSizeScale uint8 + + // bit_rate_value_minus1[ SchedSelIdx ] (together with bit_rate_scale) + //specifies the maximum input bit rate for the SchedSelIdx-th CPB. + BitRateValueMinus1 []uint64 + + // cpb_size_value_minus1[ SchedSelIdx ] is used together with cpb_size_scale + // to specify the SchedSelIdx-th CPB size. + CPBSizeValueMinus1 []uint64 + + // cbr_flag[ SchedSelIdx ] equal to 0 specifies that to decode this bitstream + // by the HRD using the SchedSelIdx-th CPB specification, the hypothetical + // stream delivery scheduler (HSS) operates in an intermittent bit rate mode, + // otherwise if 1 specifies that the HSS operates in a constant bit rate mode. + CBRFlag []bool + + // initial_cpb_removal_delay_length_minus1 specifies the length in bits of the + // initial_cpb_removal_delay[ SchedSelIdx ] and + // initial_cpb_removal_delay_offset[ SchedSelIdx ] syntax elements of the + // buffering period SEI message. InitialCPBRemovalDelayLenMinus1 uint8 - CPBRemovalDelayLenMinus1 uint8 - DPBOutputDelayLenMinus1 uint8 - TimeOffsetLen uint8 + + // cpb_removal_delay_length_minus1 specifies the length in bits of the + // cpb_removal_delay syntax element. + CPBRemovalDelayLenMinus1 uint8 + + // dpb_output_delay_length_minus1 specifies the length in bits of the + // dpb_output_delay syntax element. + DPBOutputDelayLenMinus1 uint8 + + // time_offset_length greater than 0 specifies the length in bits of the + // time_offset syntax element. + TimeOffsetLen uint8 } // NewHRDParameters parses hypothetical reference decoder parameter from br