From 372c42d5aa8770583cc0a1cdf6d68f415f59a87b Mon Sep 17 00:00:00 2001 From: Dan Kortschak Date: Sat, 7 Jul 2018 15:27:59 +0930 Subject: [PATCH] flv,generator: clean up FLV handling --- flv/audio_tag.go | 67 ------------ flv/flv.go | 123 +++++++++++++++++---- flv/video_tag.go | 71 ------------ generator/flv_generator.go | 219 +++++++++++++++++++++---------------- revid/revid.go | 12 +- 5 files changed, 235 insertions(+), 257 deletions(-) delete mode 100644 flv/audio_tag.go delete mode 100644 flv/video_tag.go diff --git a/flv/audio_tag.go b/flv/audio_tag.go deleted file mode 100644 index 397bc9d6..00000000 --- a/flv/audio_tag.go +++ /dev/null @@ -1,67 +0,0 @@ -/* -NAME - audio_tag.go - -DESCRIPTION - See Readme.md - -AUTHORS - Saxon Nelson-Milton - -LICENSE - audio_tag.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean) - - It is free software: you can redistribute it and/or modify them - under the terms of the GNU General Public License as published by the - Free Software Foundation, either version 3 of the License, or (at your - option) any later version. - - It is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. -*/ - -package flv - -type AudioTag struct { - TagType uint8 - DataSize uint32 - Timestamp uint32 - TimestampExtended uint32 - SoundFormat uint8 - SoundRate uint8 - SoundSize bool - SoundType bool - Data []byte - PrevTagSize uint32 -} - -func (t *AudioTag) ToByteSlice() (output []byte) { - output = make([]byte, 0, maxVideoTagSize) - output = append(output, []byte{ - byte(t.TagType), - byte(t.DataSize >> 16), - byte(t.DataSize >> 8), - byte(t.DataSize), - byte(t.Timestamp >> 16), - byte(t.Timestamp >> 8), - byte(t.Timestamp), - byte(t.TimestampExtended), - 0x00, - 0x00, - 0x00, - byte(t.SoundFormat<<4) | byte(t.SoundRate<<2) | btb(t.SoundSize)<<1 | btb(t.SoundType), - }...) - output = append(output, t.Data...) - output = append(output, []byte{ - byte(t.PrevTagSize >> 24), - byte(t.PrevTagSize >> 16), - byte(t.PrevTagSize >> 8), - byte(t.PrevTagSize), - }...) - return -} diff --git a/flv/flv.go b/flv/flv.go index dd2b7a04..32deb0df 100644 --- a/flv/flv.go +++ b/flv/flv.go @@ -7,6 +7,7 @@ DESCRIPTION AUTHORS Saxon A. Nelson-Milton + Dan Kortschak LICENSE flv.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean) @@ -22,16 +23,17 @@ LICENSE for more details. You should have received a copy of the GNU General Public License - along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. + along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. */ +// See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/flv/video_file_format_spec_v10.pdf +// for format specification. + package flv -import "bitbucket.org/ausocean/av/tools" +import "encoding/binary" const ( - headerLength = 72 - version = 0x01 maxVideoTagSize = 10000 maxAudioTagSize = 10000 ) @@ -50,24 +52,107 @@ const ( PCMAudioFormat = 0 ) -var flvHeaderCode = []byte{0x46, 0x4C, 0x56} +const ( + sizeofFLVTagHeader = 11 + sizeofPrevTagSize = 4 +) + +const version = 0x01 + +// FLV is big-endian. +var order = binary.BigEndian + +// orderPutUint24 is a binary.BigEndian method look-alike for +// writing 24 bit words to a byte slice. +func orderPutUint24(b []byte, v uint32) { + _ = b[2] // early bounds check to guarantee safety of writes below + b[0] = byte(v >> 16) + b[1] = byte(v >> 8) + b[2] = byte(v) +} + +var flvHeaderCode = []byte{'F', 'L', 'V', version} type Header struct { - AudioFlag bool - VideoFlag bool + HasAudio bool + HasVideo bool +} + +func (h *Header) Bytes() []byte { + const headerLength = 9 + b := [headerLength]byte{ + 0: 'F', 1: 'L', 2: 'V', 3: version, + 4: btb(h.HasAudio)<<2 | btb(h.HasVideo), + 8: headerLength, // order.PutUint32(b[5:9], headerLength) + } + return b[:] +} + +type VideoTag struct { + TagType uint8 + DataSize uint32 + Timestamp uint32 + TimestampExtended uint8 + FrameType uint8 + Codec uint8 + PacketType uint8 + CompositionTime uint32 + Data []byte + PrevTagSize uint32 +} + +func (t *VideoTag) Bytes() []byte { + // FIXME(kortschak): This should probably be an encoding.BinaryMarshaler. + // This will allow handling of invalid field values. + + b := make([]byte, t.DataSize+sizeofFLVTagHeader+sizeofPrevTagSize) + + b[0] = t.TagType + orderPutUint24(b[1:4], t.DataSize) + orderPutUint24(b[4:7], t.Timestamp) + b[7] = t.TimestampExtended + b[11] = t.FrameType<<4 | t.Codec + b[12] = t.PacketType + orderPutUint24(b[13:16], t.CompositionTime) + copy(b[16:], t.Data) + order.PutUint32(b[len(b)-4:], t.PrevTagSize) + + return b +} + +type AudioTag struct { + TagType uint8 + DataSize uint32 + Timestamp uint32 + TimestampExtended uint8 + SoundFormat uint8 + SoundRate uint8 + SoundSize bool + SoundType bool + Data []byte + PrevTagSize uint32 +} + +func (t *AudioTag) Bytes() []byte { + // FIXME(kortschak): This should probably be an encoding.BinaryMarshaler. + // This will allow handling of invalid field values. + + b := make([]byte, t.DataSize+sizeofFLVTagHeader+sizeofPrevTagSize) + + b[0] = t.TagType + orderPutUint24(b[1:4], t.DataSize) + orderPutUint24(b[4:7], t.Timestamp) + b[7] = t.TimestampExtended + b[11] = t.SoundFormat<<4 | t.SoundRate<<2 | btb(t.SoundSize)<<1 | btb(t.SoundType) + copy(b[12:], t.Data) + order.PutUint32(b[len(b)-4:], t.PrevTagSize) + + return b } func btb(b bool) byte { - return tools.BoolToByte(b) -} - -func (h *Header) ToByteSlice() (output []byte) { - output = make([]byte, 0, headerLength) - output = append(output, flvHeaderCode...) - output = append(output, []byte{ - version, - 0x00 | btb(h.AudioFlag)<<2 | btb(h.VideoFlag), - 0x00, 0x00, 0x00, byte(9), - }...) - return + if b { + return 1 + } + return 0 } diff --git a/flv/video_tag.go b/flv/video_tag.go deleted file mode 100644 index cfc27fa8..00000000 --- a/flv/video_tag.go +++ /dev/null @@ -1,71 +0,0 @@ -/* -NAME - video_tag.go - -DESCRIPTION - See Readme.md - -AUTHORS - Saxon Nelson-Milton - -LICENSE - video_tag.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean) - - It is free software: you can redistribute it and/or modify them - under the terms of the GNU General Public License as published by the - Free Software Foundation, either version 3 of the License, or (at your - option) any later version. - - It is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. - - You should have received a copy of the GNU General Public License - along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. -*/ - -package flv - -type VideoTag struct { - TagType uint8 - DataSize uint32 - Timestamp uint32 - TimestampExtended uint32 - FrameType byte - Codec byte - PacketType byte - CompositionTime uint32 - Data []byte - PrevTagSize uint32 -} - -func (t *VideoTag) ToByteSlice() (output []byte) { - output = make([]byte, 0, maxVideoTagSize) - output = append(output, []byte{ - byte(t.TagType), - byte(t.DataSize >> 16), - byte(t.DataSize >> 8), - byte(t.DataSize), - byte(t.Timestamp >> 16), - byte(t.Timestamp >> 8), - byte(t.Timestamp), - byte(t.TimestampExtended), - 0x00, - 0x00, - 0x00, - 0x00 | byte(t.FrameType<<4) | byte(t.Codec), - t.PacketType, - byte(t.CompositionTime >> 16), - byte(t.CompositionTime >> 8), - byte(t.CompositionTime), - }...) - output = append(output, t.Data...) - output = append(output, []byte{ - byte(t.PrevTagSize >> 24), - byte(t.PrevTagSize >> 16), - byte(t.PrevTagSize >> 8), - byte(t.PrevTagSize), - }...) - return -} diff --git a/generator/flv_generator.go b/generator/flv_generator.go index 375e2154..296e8849 100644 --- a/generator/flv_generator.go +++ b/generator/flv_generator.go @@ -37,24 +37,29 @@ const ( outputChanLength = 500 audioSize = 18 videoHeaderSize = 16 - interFrameCode = 1 - keyFrameCode = 5 - sequenceCode = 6 ) // Data representing silent audio (required for youtube) -var dummyAudioTag1Data = []byte{0x00, 0x12, 0x08, 0x56, 0xe5, 0x00} -var dummyAudioTag2Data = []byte{0x01, 0xdc, 0x00, 0x4c, 0x61, 0x76, 0x63, 0x35, - 0x38, 0x2e, 0x36, 0x2e, 0x31, 0x30, 0x32, 0x00, 0x02, 0x30, 0x40, 0x0e} +var ( + dummyAudioTag1Data = []byte{ + 0x00, 0x12, 0x08, 0x56, 0xe5, 0x00, + } + + dummyAudioTag2Data = []byte{ + 0x01, 0xdc, 0x00, 0x4c, 0x61, 0x76, 0x63, 0x35, 0x38, + 0x2e, 0x36, 0x2e, 0x31, 0x30, 0x32, 0x00, 0x02, 0x30, + 0x40, 0x0e, + } +) // flvGenerator provides properties required for the generation of flv video // from raw video data type flvGenerator struct { - fps uint + fps int inputChan chan []byte outputChan chan []byte - audioFlag bool - videoFlag bool + audio bool + video bool lastTagSize int header flv.Header startTime time.Time @@ -75,17 +80,15 @@ func (g *flvGenerator) OutputChan() <-chan []byte { } // NewFlvGenerator retuns an instance of the flvGenerator struct -func NewFlvGenerator(audio bool, video bool, fps uint) (g *flvGenerator) { - g = new(flvGenerator) - g.fps = fps - g.audioFlag = audio - g.videoFlag = video - g.lastTagSize = 0 - g.inputChan = make(chan []byte, inputChanLength) - g.outputChan = make(chan []byte, outputChanLength) - g.firstTag = true - g.isGenerating = false - return +func NewFlvGenerator(audio, video bool, fps int) *flvGenerator { + return &flvGenerator{ + fps: fps, + audio: audio, + video: video, + inputChan: make(chan []byte, inputChanLength), + outputChan: make(chan []byte, outputChanLength), + firstTag: true, + } } // Start begins the generation routine - i.e. if raw data is given to the input @@ -103,10 +106,10 @@ func (g *flvGenerator) Stop() { // This will generally be called once at the start of file writing/transmission. func (g *flvGenerator) GenHeader() { header := flv.Header{ - AudioFlag: g.audioFlag, - VideoFlag: g.videoFlag, + HasAudio: g.audio, + HasVideo: g.video, } - g.outputChan <- header.ToByteSlice() + g.outputChan <- header.Bytes() } // getNextTimestamp generates and returns the next timestamp based on current time @@ -114,35 +117,48 @@ func (g *flvGenerator) getNextTimestamp() (timestamp uint32) { if g.firstTag { g.startTime = time.Now() g.firstTag = false - timestamp = 0 - return + return 0 } - timestamp = uint32(time.Now().Sub(g.startTime).Seconds() * float64(1000)) - return + return uint32(time.Now().Sub(g.startTime).Seconds() * float64(1000)) } +// http://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-200305-S!!PDF-E&type=items +// Table 7-1 NAL unit type codes +const ( + nonIdrPic = 1 + idrPic = 5 + suppEnhInf = 6 + seqParamSet = 7 + paramSet = 8 +) + // isKeyFrame returns true if the passed frame data represents that of a keyframe -// TODO: clean this up and use conts for naltype codes +// FIXME(kortschak): Clarify and document the logic of this functions. func isKeyFrame(frame []byte) bool { - byteChannel := make(chan byte, len(frame)) - for i := range frame { - byteChannel <- frame[i] - } - for len(byteChannel) >= 5 { - aByte := <-byteChannel - for i := 1; aByte == 0x00 && i != 4; i++ { - aByte = <-byteChannel - if (aByte == 0x01 && i == 2) || (aByte == 0x01 && i == 3) { - aByte = <-byteChannel - nalType := aByte & 0x1F - switch nalType { - case interFrameCode: - return false - case keyFrameCode: - return true - case 6: - return true - } + sc := frameScanner{buf: frame} + for { + b, ok := sc.readByte() + if !ok { + return false + } + for i := 1; b == 0x00 && i < 4; i++ { + b, ok = sc.readByte() + if !ok { + return false + } + if b != 0x01 || (i != 3 && i != 2) { + continue + } + + b, ok = sc.readByte() + if !ok { + return false + } + switch nalTyp := b & 0x1f; nalTyp { + case idrPic, suppEnhInf: + return true + case nonIdrPic: + return false } } } @@ -151,35 +167,49 @@ func isKeyFrame(frame []byte) bool { // isSequenceHeader returns true if the passed frame data represents that of a // a sequence header. -// TODO: clean this up and use consts for the nalTypes +// FIXME(kortschak): Clarify and document the logic of this functions. func isSequenceHeader(frame []byte) bool { - byteChannel := make(chan byte, len(frame)) - for i := range frame { - byteChannel <- frame[i] - } - for len(byteChannel) >= 5 { - aByte := <-byteChannel - for i := 1; aByte == 0x00 && i != 4; i++ { - aByte = <-byteChannel - if (aByte == 0x01 && i == 2) || (aByte == 0x01 && i == 3) { - aByte = <-byteChannel - nalType := aByte & 0x1F - switch nalType { - case 1: - return false - case 5: - return false - case 6: - return true - case 7: - return true - case 8: - return true - } + sc := frameScanner{buf: frame} + for { + b, ok := sc.readByte() + if !ok { + return false + } + for i := 1; b == 0x00 && i != 4; i++ { + b, ok = sc.readByte() + if !ok { + return false + } + if b != 0x01 || (i != 2 && i != 3) { + continue + } + + b, ok = sc.readByte() + if !ok { + return false + } + switch nalTyp := b & 0x1f; nalTyp { + case suppEnhInf, seqParamSet, paramSet: + return true + case nonIdrPic, idrPic: + return false } } } - return false +} + +type frameScanner struct { + off int + buf []byte +} + +func (s *frameScanner) readByte() (b byte, ok bool) { + if s.off >= len(s.buf) { + return 0, false + } + b = s.buf[s.off] + s.off++ + return b, true } // generate takes in raw video data from the input chan and packetises it into @@ -190,36 +220,39 @@ func (g *flvGenerator) generate() { var packetType byte for g.isGenerating { select { - case videoFrame := <-g.inputChan: - if isKeyFrame(videoFrame) { - frameType = flv.KeyFrameType - } else { - frameType = flv.InterFrameType - } - if isSequenceHeader(videoFrame) { - packetType = flv.SequenceHeader - } else { - packetType = flv.AVCNALU - } + default: + time.Sleep(time.Duration(5) * time.Millisecond) + case frame := <-g.inputChan: timeStamp := g.getNextTimestamp() - // Do we have video to send off ? - if g.videoFlag { + // Do we have video to send off? + if g.video { + if isKeyFrame(frame) { + frameType = flv.KeyFrameType + } else { + frameType = flv.InterFrameType + } + if isSequenceHeader(frame) { + packetType = flv.SequenceHeader + } else { + packetType = flv.AVCNALU + } + tag := flv.VideoTag{ TagType: uint8(flv.VideoTagType), - DataSize: uint32(len(videoFrame)) + flv.DataHeaderLength, + DataSize: uint32(len(frame)) + flv.DataHeaderLength, Timestamp: timeStamp, TimestampExtended: flv.NoTimestampExtension, FrameType: frameType, Codec: flv.H264, PacketType: packetType, CompositionTime: 0, - Data: videoFrame, - PrevTagSize: uint32(videoHeaderSize + len(videoFrame)), + Data: frame, + PrevTagSize: uint32(videoHeaderSize + len(frame)), } - g.outputChan <- tag.ToByteSlice() + g.outputChan <- tag.Bytes() } // Do we even have some audio to send off ? - if g.audioFlag { + if g.audio { // Not sure why but we need two audio tags for dummy silent audio // TODO: create constants or SoundSize and SoundType parameters tag := flv.AudioTag{ @@ -234,7 +267,7 @@ func (g *flvGenerator) generate() { Data: dummyAudioTag1Data, PrevTagSize: uint32(audioSize), } - g.outputChan <- tag.ToByteSlice() + g.outputChan <- tag.Bytes() tag = flv.AudioTag{ TagType: uint8(flv.AudioTagType), @@ -248,10 +281,8 @@ func (g *flvGenerator) generate() { Data: dummyAudioTag2Data, PrevTagSize: uint32(22), } - g.outputChan <- tag.ToByteSlice() + g.outputChan <- tag.Bytes() } - default: - time.Sleep(time.Duration(5) * time.Millisecond) } } } diff --git a/revid/revid.go b/revid/revid.go index 57f55fb7..d3109578 100644 --- a/revid/revid.go +++ b/revid/revid.go @@ -207,13 +207,13 @@ func (r *Revid) reset(config Config) error { r.getFrame = r.getFrameNoPacketization return nil case Mpegts: - r.Log(Info, "Using MPEGTS packetisation!") - frameRateAsInt, _ := strconv.Atoi(r.config.FrameRate) - r.generator = generator.NewTsGenerator(uint(frameRateAsInt)) + r.Log(Info, "Using MPEGTS packetisation") + frameRate, _ := strconv.Atoi(r.config.FrameRate) + r.generator = generator.NewTsGenerator(uint(frameRate)) case Flv: - r.Log(Info, "Using FLV packetisation!") - frameRateAsInt, _ := strconv.Atoi(r.config.FrameRate) - r.generator = generator.NewFlvGenerator(true, true, uint(frameRateAsInt)) + r.Log(Info, "Using FLV packetisation") + frameRate, _ := strconv.Atoi(r.config.FrameRate) + r.generator = generator.NewFlvGenerator(true, true, frameRate) } // We have packetization of some sort, so we want to send data to Generator // to perform packetization