av/codec/h264/h264dec/sps.go

702 lines
20 KiB
Go
Raw Normal View History

package h264dec
import (
"bytes"
2019-07-19 09:14:45 +03:00
"bitbucket.org/ausocean/av/codec/h264/h264dec/bits"
"github.com/pkg/errors"
)
var (
DefaultScalingMatrix4x4 = [][]int{
{6, 13, 20, 28, 13, 20, 28, 32, 20, 28, 32, 37, 28, 32, 37, 42},
{10, 14, 20, 24, 14, 20, 24, 27, 20, 24, 27, 30, 24, 27, 30, 34},
}
DefaultScalingMatrix8x8 = [][]int{
{6, 10, 13, 16, 18, 23, 25, 27,
10, 11, 16, 18, 23, 25, 27, 29,
13, 16, 18, 23, 25, 27, 29, 31,
16, 18, 23, 25, 27, 29, 31, 33,
18, 23, 25, 27, 29, 31, 33, 36,
23, 25, 27, 29, 31, 33, 36, 38,
25, 27, 29, 31, 33, 36, 38, 40,
27, 29, 31, 33, 36, 38, 40, 42},
{9, 13, 15, 17, 19, 21, 22, 24,
13, 13, 17, 19, 21, 22, 24, 25,
15, 17, 19, 21, 22, 24, 25, 27,
17, 19, 21, 22, 24, 25, 27, 28,
19, 21, 22, 24, 25, 27, 28, 30,
21, 22, 24, 25, 27, 28, 30, 32,
22, 24, 25, 27, 28, 30, 32, 33,
24, 25, 27, 28, 30, 32, 33, 35},
}
Default4x4IntraList = []int{6, 13, 13, 20, 20, 20, 38, 38, 38, 38, 32, 32, 32, 37, 37, 42}
Default4x4InterList = []int{10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34}
Default8x8IntraList = []int{
6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23,
23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27,
27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31,
31, 33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42}
Default8x8InterList = []int{
9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21,
21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24,
24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27,
27, 28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35}
ScalingList4x4 = map[int][]int{
0: Default4x4IntraList,
1: Default4x4IntraList,
2: Default4x4IntraList,
3: Default4x4InterList,
4: Default4x4InterList,
5: Default4x4InterList,
6: Default8x8IntraList,
7: Default8x8InterList,
8: Default8x8IntraList,
9: Default8x8InterList,
10: Default8x8IntraList,
11: Default8x8InterList,
}
ScalingList8x8 = ScalingList4x4
)
// SPS describes a sequence parameter set as defined by section 7.3.2.1.1 in
// the Specifications.
type SPS struct {
Profile int
Constraint0 int
Constraint1 int
Constraint2 int
Constraint3 int
Constraint4 int
Constraint5 int
LevelIDC int
SPSID int
ChromaFormatIDC int
SeparateColorPlaneFlag bool
BitDepthLumaMinus8 int
BitDepthChromaMinus8 int
QPPrimeYZeroTransformBypassFlag bool
SeqScalingMatrixPresentFlag bool
SeqScalingListPresentFlag []bool
ScalingList4x4 [][]int
UseDefaultScalingMatrix4x4Flag []bool
ScalingList8x8 [][]int
UseDefaultScalingMatrix8x8Flag []bool
Log2MaxFrameNumMinus4 int
PicOrderCountType int
Log2MaxPicOrderCntLSBMin4 int
DeltaPicOrderAlwaysZeroFlag bool
OffsetForNonRefPic int
OffsetForTopToBottomField int
NumRefFramesInPicOrderCntCycle int
OffsetForRefFrameList []int
MaxNumRefFrames int
GapsInFrameNumValueAllowed bool
PicWidthInMBSMinus1 int
PicHeightInMapUnitsMinus1 int
FrameMBSOnlyFlag bool
MBAdaptiveFrameFieldFlag bool
Direct8x8InferenceFlag bool
FrameCroppingFlag bool
FrameCropLeftOffset int
FrameCropRightOffset int
FrameCropTopOffset int
FrameCropBottomOffset int
VUIParametersPresentFlag bool
VUIParameters *VUIParameters
}
// NewSPS parses a sequence parameter set raw byte sequence from br following
// the syntax structure specified in section 7.3.2.1.1, and returns as a new
// SPS.
func NewSPS(rbsp []byte, showPacket bool) (*SPS, error) {
logger.Printf("debug: SPS RBSP %d bytes %d bits\n", len(rbsp), len(rbsp)*8)
logger.Printf("debug: \t%#v\n", rbsp[0:8])
sps := SPS{}
br := bits.NewBitReader(bytes.NewReader(rbsp))
var err error
err = readFields(br,
[]field{
{&sps.Profile, "ProfileIDC", 8},
{&sps.Constraint0, "Constraint0", 1},
{&sps.Constraint1, "Constraint1", 1},
{&sps.Constraint2, "Constraint2", 1},
{&sps.Constraint3, "Constraint3", 1},
{&sps.Constraint4, "Constraint4", 1},
{&sps.Constraint5, "Constraint5", 1},
},
)
_, err = br.ReadBits(2)
if err != nil {
return nil, errors.Wrap(err, "could not read ReservedZeroBits")
}
b, err := br.ReadBits(8)
if err != nil {
return nil, errors.Wrap(err, "could not read Level")
}
sps.LevelIDC = int(b)
// sps.ID = b.NextField("SPSID", 6) // proper
sps.SPSID, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse ID")
}
sps.ChromaFormatIDC, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse ChromaFormatIDC")
}
// This should be done only for certain ProfileIDC:
isProfileIDC := []int{100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134, 135}
// SpecialProfileCase1
if isInList(isProfileIDC, sps.Profile) {
if sps.ChromaFormatIDC == chroma444 {
// TODO: should probably deal with error here.
b, err := br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read UseSeparateColorPlaneFlag")
}
sps.SeparateColorPlaneFlag = b == 1
}
sps.BitDepthLumaMinus8, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse BitDepthLumaMinus8")
}
sps.BitDepthChromaMinus8, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse BitDepthChromaMinus8")
}
b, err := br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read QPrimeYZeroTransformBypass")
}
sps.QPPrimeYZeroTransformBypassFlag = b == 1
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read SeqScalingMatrixPresent")
}
sps.SeqScalingMatrixPresentFlag = b == 1
if sps.SeqScalingMatrixPresentFlag {
max := 12
if sps.ChromaFormatIDC != chroma444 {
max = 8
}
logger.Printf("debug: \tbuilding Scaling matrix for %d elements\n", max)
for i := 0; i < max; i++ {
b, err := br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read SeqScalingList")
}
sps.SeqScalingListPresentFlag = append(sps.SeqScalingListPresentFlag, b == 1)
if sps.SeqScalingListPresentFlag[i] {
if i < 6 {
scalingList(
br,
ScalingList4x4[i],
16,
DefaultScalingMatrix4x4[i])
// 4x4: Page 75 bottom
} else {
// 8x8 Page 76 top
scalingList(
br,
ScalingList8x8[i],
64,
DefaultScalingMatrix8x8[i-6])
}
}
}
}
} // End SpecialProfileCase1
// showSPS()
// return sps
// Possibly wrong due to no scaling list being built
sps.Log2MaxFrameNumMinus4, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse Log2MaxFrameNumMinus4")
}
sps.PicOrderCountType, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse PicOrderCountType")
}
if sps.PicOrderCountType == 0 {
sps.Log2MaxPicOrderCntLSBMin4, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse Log2MaxPicOrderCntLSBMin4")
}
} else if sps.PicOrderCountType == 1 {
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read DeltaPicOrderAlwaysZero")
}
sps.DeltaPicOrderAlwaysZeroFlag = b == 1
sps.OffsetForNonRefPic, err = readSe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse OffsetForNonRefPic")
}
sps.OffsetForTopToBottomField, err = readSe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse OffsetForTopToBottomField")
}
sps.NumRefFramesInPicOrderCntCycle, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse NumRefFramesInPicOrderCntCycle")
}
for i := 0; i < sps.NumRefFramesInPicOrderCntCycle; i++ {
se, err := readSe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse OffsetForRefFrameList")
}
sps.OffsetForRefFrameList = append(
sps.OffsetForRefFrameList,
se)
}
}
sps.MaxNumRefFrames, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse MaxNumRefFrames")
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read GapsInFrameNumValueAllowed")
}
sps.GapsInFrameNumValueAllowed = b == 1
sps.PicWidthInMBSMinus1, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse PicWidthInMbsMinus1")
}
sps.PicHeightInMapUnitsMinus1, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse PicHeightInMapUnitsMinus1")
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read FrameMbsOnly")
}
sps.FrameMBSOnlyFlag = b == 1
if !sps.FrameMBSOnlyFlag {
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read MBAdaptiveFrameField")
}
sps.MBAdaptiveFrameFieldFlag = b == 1
}
err = readFlags(br, []flag{
{&sps.Direct8x8InferenceFlag, "Direct8x8Inference"},
{&sps.FrameCroppingFlag, "FrameCropping"},
})
if err != nil {
return nil, err
}
if sps.FrameCroppingFlag {
sps.FrameCropLeftOffset, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse FrameCropLeftOffset")
}
sps.FrameCropRightOffset, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse FrameCropRightOffset")
}
sps.FrameCropTopOffset, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse FrameCropTopOffset")
}
sps.FrameCropBottomOffset, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse FrameCropBottomOffset")
}
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read VuiParametersPresent")
}
sps.VUIParametersPresentFlag = b == 1
if sps.VUIParametersPresentFlag {
} // End VuiParameters Annex E.1.1
return &sps, nil
}
// SPS describes a sequence parameter set as defined by section E.1.1 in the
// Specifications.
type VUIParameters struct {
AspectRatioInfoPresentFlag bool
AspectRatioIDC int
SARWidth int
SARHeight int
OverscanInfoPresentFlag bool
OverscanAppropriateFlag bool
VideoSignalTypePresentFlag bool
VideoFormat int
VideoFullRangeFlag bool
ColorDescriptionPresentFlag bool
ColorPrimaries int
TransferCharacteristics int
MatrixCoefficients int
ChromaLocInfoPresentFlag bool
ChromaSampleLocTypeTopField int
ChromaSampleLocTypeBottomField int
TimingInfoPresentFlag bool
NumUnitsInTick int
TimeScale int
FixedFrameRateFlag bool
NALHRDParametersPresentFlag bool
NALHRDParameters *HRDParameters
VCLHRDParametersPresentFlag bool
VCLHRDParameters *HRDParameters
LowDelayHRDFlag bool
PicStructPresentFlag bool
BitstreamRestrictionFlag bool
MotionVectorsOverPicBoundariesFlag bool
MaxBytesPerPicDenom int
MaxBitsPerMBDenom int
Log2MaxMVLengthHorizontal int
Log2MaxMVLengthVertical int
MaxNumReorderFrames int
MaxDecFrameBuffering int
}
// NewVUIParameters parses video usability information parameters from br
// following the syntax structure specified in section E.1.1, and returns as a
// new VUIParameters.
func NewVUIParameters(br *bits.BitReader) (*VUIParameters, error) {
p := &VUIParameters{}
b, err := br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read AspectRatioInfoPresent")
}
p.AspectRatioInfoPresentFlag = b == 1
if p.AspectRatioInfoPresentFlag {
b, err = br.ReadBits(8)
if err != nil {
return nil, errors.Wrap(err, "could not read AspectRatio")
}
p.AspectRatioIDC = int(b)
EXTENDED_SAR := 999
if p.AspectRatioIDC == EXTENDED_SAR {
b, err = br.ReadBits(16)
if err != nil {
return nil, errors.Wrap(err, "could not read SarWidth")
}
p.SARWidth = int(b)
b, err = br.ReadBits(16)
if err != nil {
return nil, errors.Wrap(err, "could not read SarHeight")
}
p.SARHeight = int(b)
}
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read OverscanInfoPresent")
}
p.OverscanInfoPresentFlag = b == 1
if p.OverscanInfoPresentFlag {
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read OverscanAppropriate")
}
p.OverscanAppropriateFlag = b == 1
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read VideoSignalTypePresent")
}
p.VideoSignalTypePresentFlag = b == 1
if p.VideoSignalTypePresentFlag {
b, err = br.ReadBits(3)
if err != nil {
return nil, errors.Wrap(err, "could not read VideoFormat")
}
p.VideoFormat = int(b)
}
if p.VideoSignalTypePresentFlag {
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read VideoFullRange")
}
p.VideoFullRangeFlag = b == 1
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read ColorDescriptionPresent")
}
p.ColorDescriptionPresentFlag = b == 1
if p.ColorDescriptionPresentFlag {
err = readFields(br,
[]field{
{&p.ColorPrimaries, "ColorPrimaries", 8},
{&p.TransferCharacteristics, "TransferCharacteristics", 8},
{&p.MatrixCoefficients, "MatrixCoefficients", 8},
},
)
if err != nil {
return nil, err
}
}
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read ChromaLocInfoPresent")
}
p.ChromaLocInfoPresentFlag = b == 1
if p.ChromaLocInfoPresentFlag {
p.ChromaSampleLocTypeTopField, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeTopField")
}
p.ChromaSampleLocTypeBottomField, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse ChromaSampleLocTypeBottomField")
}
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read TimingInfoPresent")
}
p.TimingInfoPresentFlag = b == 1
if p.TimingInfoPresentFlag {
err := readFields(br, []field{
{&p.NumUnitsInTick, "NumUnitsInTick", 32},
{&p.TimeScale, "TimeScale", 32},
})
if err != nil {
return nil, err
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read FixedFrameRate")
}
p.FixedFrameRateFlag = b == 1
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read NalHrdParametersPresent")
}
p.NALHRDParametersPresentFlag = b == 1
if p.NALHRDParametersPresentFlag {
p.NALHRDParameters, err = NewHRDParameters(br)
if err != nil {
return nil, errors.Wrap(err, "could not get hrdParameters")
}
}
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read VclHrdParametersPresent")
}
p.VCLHRDParametersPresentFlag = b == 1
if p.VCLHRDParametersPresentFlag {
p.VCLHRDParameters, err = NewHRDParameters(br)
if err != nil {
return nil, errors.Wrap(err, "could not get hrdParameters")
}
}
if p.NALHRDParametersPresentFlag || p.VCLHRDParametersPresentFlag {
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read LowHrdDelay")
}
p.LowDelayHRDFlag = b == 1
}
err = readFlags(br, []flag{
{&p.PicStructPresentFlag, "PicStructPresent"},
{&p.BitstreamRestrictionFlag, "BitStreamRestriction"},
})
if p.BitstreamRestrictionFlag {
b, err = br.ReadBits(1)
if err != nil {
return nil, errors.Wrap(err, "could not read MotionVectorsOverPicBoundaries")
}
p.MotionVectorsOverPicBoundariesFlag = b == 1
p.MaxBytesPerPicDenom, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse MaxBytesPerPicDenom")
}
p.MaxBitsPerMBDenom, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse MaxBitsPerMbDenom")
}
p.Log2MaxMVLengthHorizontal, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthHorizontal")
}
p.Log2MaxMVLengthVertical, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse Log2MaxMvLengthVertical")
}
p.MaxNumReorderFrames, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse MaxNumReorderFrames")
}
p.MaxDecFrameBuffering, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse MaxDecFrameBuffering")
}
}
return p, nil
}
// HRDParameters describes hypothetical reference decoder parameters as defined
// by section E.1.2 in the specifications.
type HRDParameters struct {
CPBCntMinus1 int
BitRateScale int
CPBSizeScale int
BitRateValueMinus1 []int
CPBSizeValueMinus1 []int
CBRFlag []bool
InitialCPBRemovalDelayLenMinus1 int
CPBRemovalDelayLenMinus1 int
DPBOutputDelayLenMinus1 int
TimeOffsetLen int
}
// NewHRDParameters parses hypothetical reference decoder parameter from br
// following the syntax structure specified in section E.1.2, and returns as a
// new HRDParameters.
func NewHRDParameters(br *bits.BitReader) (*HRDParameters, error) {
h := &HRDParameters{}
var err error
h.CPBCntMinus1, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse CPBCntMinus1")
}
err = readFields(br, []field{
{&h.BitRateScale, "BitRateScale", 4},
{&h.CPBSizeScale, "CPBSizeScale", 4},
})
if err != nil {
return nil, err
}
// SchedSelIdx E1.2
for sseli := 0; sseli <= h.CPBCntMinus1; sseli++ {
ue, err := readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse BitRateValueMinus1")
}
h.BitRateValueMinus1 = append(h.BitRateValueMinus1, ue)
ue, err = readUe(br)
if err != nil {
return nil, errors.Wrap(err, "could not parse CPBSizeValueMinus1")
}
h.CPBSizeValueMinus1 = append(h.CPBSizeValueMinus1, ue)
if v, _ := br.ReadBits(1); v == 1 {
h.CBRFlag = append(h.CBRFlag, true)
} else {
h.CBRFlag = append(h.CBRFlag, false)
}
err = readFields(br,
[]field{
{&h.InitialCPBRemovalDelayLenMinus1, "InitialCPBRemovalDelayLenMinus1", 5},
{&h.CPBRemovalDelayLenMinus1, "CPBRemovalDelayLenMinus1", 5},
{&h.DPBOutputDelayLenMinus1, "DpbOutputDelayLenMinus1", 5},
{&h.TimeOffsetLen, "TimeOffsetLen", 5},
},
)
if err != nil {
return nil, err
}
}
return h, nil
}
func isInList(l []int, term int) bool {
for _, m := range l {
if m == term {
return true
}
}
return false
}
func scalingList(br *bits.BitReader, scalingList []int, sizeOfScalingList int, defaultScalingMatrix []int) error {
lastScale := 8
nextScale := 8
for i := 0; i < sizeOfScalingList; i++ {
if nextScale != 0 {
deltaScale, err := readSe(br)
if err != nil {
return errors.Wrap(err, "could not parse deltaScale")
}
nextScale = (lastScale + deltaScale + 256) % 256
if i == 0 && nextScale == 0 {
// Scaling list should use the default list for this point in the matrix
_ = defaultScalingMatrix
}
}
if nextScale == 0 {
scalingList[i] = lastScale
} else {
scalingList[i] = nextScale
}
lastScale = scalingList[i]
}
return nil
}