flv,generator: clean up FLV handling

This commit is contained in:
Dan Kortschak 2018-07-07 15:27:59 +09:30
parent d7525e0a47
commit 372c42d5aa
5 changed files with 235 additions and 257 deletions

View File

@ -1,67 +0,0 @@
/*
NAME
audio_tag.go
DESCRIPTION
See Readme.md
AUTHORS
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
audio_tag.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
package flv
type AudioTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint32
SoundFormat uint8
SoundRate uint8
SoundSize bool
SoundType bool
Data []byte
PrevTagSize uint32
}
func (t *AudioTag) ToByteSlice() (output []byte) {
output = make([]byte, 0, maxVideoTagSize)
output = append(output, []byte{
byte(t.TagType),
byte(t.DataSize >> 16),
byte(t.DataSize >> 8),
byte(t.DataSize),
byte(t.Timestamp >> 16),
byte(t.Timestamp >> 8),
byte(t.Timestamp),
byte(t.TimestampExtended),
0x00,
0x00,
0x00,
byte(t.SoundFormat<<4) | byte(t.SoundRate<<2) | btb(t.SoundSize)<<1 | btb(t.SoundType),
}...)
output = append(output, t.Data...)
output = append(output, []byte{
byte(t.PrevTagSize >> 24),
byte(t.PrevTagSize >> 16),
byte(t.PrevTagSize >> 8),
byte(t.PrevTagSize),
}...)
return
}

View File

@ -7,6 +7,7 @@ DESCRIPTION
AUTHORS AUTHORS
Saxon A. Nelson-Milton <saxon@ausocean.org> Saxon A. Nelson-Milton <saxon@ausocean.org>
Dan Kortschak <dan@ausocean.org>
LICENSE LICENSE
flv.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean) flv.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean)
@ -22,16 +23,17 @@ LICENSE
for more details. for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses. along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/ */
// See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/flv/video_file_format_spec_v10.pdf
// for format specification.
package flv package flv
import "bitbucket.org/ausocean/av/tools" import "encoding/binary"
const ( const (
headerLength = 72
version = 0x01
maxVideoTagSize = 10000 maxVideoTagSize = 10000
maxAudioTagSize = 10000 maxAudioTagSize = 10000
) )
@ -50,24 +52,107 @@ const (
PCMAudioFormat = 0 PCMAudioFormat = 0
) )
var flvHeaderCode = []byte{0x46, 0x4C, 0x56} const (
sizeofFLVTagHeader = 11
sizeofPrevTagSize = 4
)
const version = 0x01
// FLV is big-endian.
var order = binary.BigEndian
// orderPutUint24 is a binary.BigEndian method look-alike for
// writing 24 bit words to a byte slice.
func orderPutUint24(b []byte, v uint32) {
_ = b[2] // early bounds check to guarantee safety of writes below
b[0] = byte(v >> 16)
b[1] = byte(v >> 8)
b[2] = byte(v)
}
var flvHeaderCode = []byte{'F', 'L', 'V', version}
type Header struct { type Header struct {
AudioFlag bool HasAudio bool
VideoFlag bool HasVideo bool
}
func (h *Header) Bytes() []byte {
const headerLength = 9
b := [headerLength]byte{
0: 'F', 1: 'L', 2: 'V', 3: version,
4: btb(h.HasAudio)<<2 | btb(h.HasVideo),
8: headerLength, // order.PutUint32(b[5:9], headerLength)
}
return b[:]
}
type VideoTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint8
FrameType uint8
Codec uint8
PacketType uint8
CompositionTime uint32
Data []byte
PrevTagSize uint32
}
func (t *VideoTag) Bytes() []byte {
// FIXME(kortschak): This should probably be an encoding.BinaryMarshaler.
// This will allow handling of invalid field values.
b := make([]byte, t.DataSize+sizeofFLVTagHeader+sizeofPrevTagSize)
b[0] = t.TagType
orderPutUint24(b[1:4], t.DataSize)
orderPutUint24(b[4:7], t.Timestamp)
b[7] = t.TimestampExtended
b[11] = t.FrameType<<4 | t.Codec
b[12] = t.PacketType
orderPutUint24(b[13:16], t.CompositionTime)
copy(b[16:], t.Data)
order.PutUint32(b[len(b)-4:], t.PrevTagSize)
return b
}
type AudioTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint8
SoundFormat uint8
SoundRate uint8
SoundSize bool
SoundType bool
Data []byte
PrevTagSize uint32
}
func (t *AudioTag) Bytes() []byte {
// FIXME(kortschak): This should probably be an encoding.BinaryMarshaler.
// This will allow handling of invalid field values.
b := make([]byte, t.DataSize+sizeofFLVTagHeader+sizeofPrevTagSize)
b[0] = t.TagType
orderPutUint24(b[1:4], t.DataSize)
orderPutUint24(b[4:7], t.Timestamp)
b[7] = t.TimestampExtended
b[11] = t.SoundFormat<<4 | t.SoundRate<<2 | btb(t.SoundSize)<<1 | btb(t.SoundType)
copy(b[12:], t.Data)
order.PutUint32(b[len(b)-4:], t.PrevTagSize)
return b
} }
func btb(b bool) byte { func btb(b bool) byte {
return tools.BoolToByte(b) if b {
} return 1
}
func (h *Header) ToByteSlice() (output []byte) { return 0
output = make([]byte, 0, headerLength)
output = append(output, flvHeaderCode...)
output = append(output, []byte{
version,
0x00 | btb(h.AudioFlag)<<2 | btb(h.VideoFlag),
0x00, 0x00, 0x00, byte(9),
}...)
return
} }

View File

@ -1,71 +0,0 @@
/*
NAME
video_tag.go
DESCRIPTION
See Readme.md
AUTHORS
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
video_tag.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
package flv
type VideoTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint32
FrameType byte
Codec byte
PacketType byte
CompositionTime uint32
Data []byte
PrevTagSize uint32
}
func (t *VideoTag) ToByteSlice() (output []byte) {
output = make([]byte, 0, maxVideoTagSize)
output = append(output, []byte{
byte(t.TagType),
byte(t.DataSize >> 16),
byte(t.DataSize >> 8),
byte(t.DataSize),
byte(t.Timestamp >> 16),
byte(t.Timestamp >> 8),
byte(t.Timestamp),
byte(t.TimestampExtended),
0x00,
0x00,
0x00,
0x00 | byte(t.FrameType<<4) | byte(t.Codec),
t.PacketType,
byte(t.CompositionTime >> 16),
byte(t.CompositionTime >> 8),
byte(t.CompositionTime),
}...)
output = append(output, t.Data...)
output = append(output, []byte{
byte(t.PrevTagSize >> 24),
byte(t.PrevTagSize >> 16),
byte(t.PrevTagSize >> 8),
byte(t.PrevTagSize),
}...)
return
}

View File

@ -37,24 +37,29 @@ const (
outputChanLength = 500 outputChanLength = 500
audioSize = 18 audioSize = 18
videoHeaderSize = 16 videoHeaderSize = 16
interFrameCode = 1
keyFrameCode = 5
sequenceCode = 6
) )
// Data representing silent audio (required for youtube) // Data representing silent audio (required for youtube)
var dummyAudioTag1Data = []byte{0x00, 0x12, 0x08, 0x56, 0xe5, 0x00} var (
var dummyAudioTag2Data = []byte{0x01, 0xdc, 0x00, 0x4c, 0x61, 0x76, 0x63, 0x35, dummyAudioTag1Data = []byte{
0x38, 0x2e, 0x36, 0x2e, 0x31, 0x30, 0x32, 0x00, 0x02, 0x30, 0x40, 0x0e} 0x00, 0x12, 0x08, 0x56, 0xe5, 0x00,
}
dummyAudioTag2Data = []byte{
0x01, 0xdc, 0x00, 0x4c, 0x61, 0x76, 0x63, 0x35, 0x38,
0x2e, 0x36, 0x2e, 0x31, 0x30, 0x32, 0x00, 0x02, 0x30,
0x40, 0x0e,
}
)
// flvGenerator provides properties required for the generation of flv video // flvGenerator provides properties required for the generation of flv video
// from raw video data // from raw video data
type flvGenerator struct { type flvGenerator struct {
fps uint fps int
inputChan chan []byte inputChan chan []byte
outputChan chan []byte outputChan chan []byte
audioFlag bool audio bool
videoFlag bool video bool
lastTagSize int lastTagSize int
header flv.Header header flv.Header
startTime time.Time startTime time.Time
@ -75,17 +80,15 @@ func (g *flvGenerator) OutputChan() <-chan []byte {
} }
// NewFlvGenerator retuns an instance of the flvGenerator struct // NewFlvGenerator retuns an instance of the flvGenerator struct
func NewFlvGenerator(audio bool, video bool, fps uint) (g *flvGenerator) { func NewFlvGenerator(audio, video bool, fps int) *flvGenerator {
g = new(flvGenerator) return &flvGenerator{
g.fps = fps fps: fps,
g.audioFlag = audio audio: audio,
g.videoFlag = video video: video,
g.lastTagSize = 0 inputChan: make(chan []byte, inputChanLength),
g.inputChan = make(chan []byte, inputChanLength) outputChan: make(chan []byte, outputChanLength),
g.outputChan = make(chan []byte, outputChanLength) firstTag: true,
g.firstTag = true }
g.isGenerating = false
return
} }
// Start begins the generation routine - i.e. if raw data is given to the input // Start begins the generation routine - i.e. if raw data is given to the input
@ -103,10 +106,10 @@ func (g *flvGenerator) Stop() {
// This will generally be called once at the start of file writing/transmission. // This will generally be called once at the start of file writing/transmission.
func (g *flvGenerator) GenHeader() { func (g *flvGenerator) GenHeader() {
header := flv.Header{ header := flv.Header{
AudioFlag: g.audioFlag, HasAudio: g.audio,
VideoFlag: g.videoFlag, HasVideo: g.video,
} }
g.outputChan <- header.ToByteSlice() g.outputChan <- header.Bytes()
} }
// getNextTimestamp generates and returns the next timestamp based on current time // getNextTimestamp generates and returns the next timestamp based on current time
@ -114,35 +117,48 @@ func (g *flvGenerator) getNextTimestamp() (timestamp uint32) {
if g.firstTag { if g.firstTag {
g.startTime = time.Now() g.startTime = time.Now()
g.firstTag = false g.firstTag = false
timestamp = 0 return 0
return
} }
timestamp = uint32(time.Now().Sub(g.startTime).Seconds() * float64(1000)) return uint32(time.Now().Sub(g.startTime).Seconds() * float64(1000))
return
} }
// http://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-200305-S!!PDF-E&type=items
// Table 7-1 NAL unit type codes
const (
nonIdrPic = 1
idrPic = 5
suppEnhInf = 6
seqParamSet = 7
paramSet = 8
)
// isKeyFrame returns true if the passed frame data represents that of a keyframe // isKeyFrame returns true if the passed frame data represents that of a keyframe
// TODO: clean this up and use conts for naltype codes // FIXME(kortschak): Clarify and document the logic of this functions.
func isKeyFrame(frame []byte) bool { func isKeyFrame(frame []byte) bool {
byteChannel := make(chan byte, len(frame)) sc := frameScanner{buf: frame}
for i := range frame { for {
byteChannel <- frame[i] b, ok := sc.readByte()
} if !ok {
for len(byteChannel) >= 5 { return false
aByte := <-byteChannel }
for i := 1; aByte == 0x00 && i != 4; i++ { for i := 1; b == 0x00 && i < 4; i++ {
aByte = <-byteChannel b, ok = sc.readByte()
if (aByte == 0x01 && i == 2) || (aByte == 0x01 && i == 3) { if !ok {
aByte = <-byteChannel return false
nalType := aByte & 0x1F }
switch nalType { if b != 0x01 || (i != 3 && i != 2) {
case interFrameCode: continue
return false }
case keyFrameCode:
return true b, ok = sc.readByte()
case 6: if !ok {
return true return false
} }
switch nalTyp := b & 0x1f; nalTyp {
case idrPic, suppEnhInf:
return true
case nonIdrPic:
return false
} }
} }
} }
@ -151,35 +167,49 @@ func isKeyFrame(frame []byte) bool {
// isSequenceHeader returns true if the passed frame data represents that of a // isSequenceHeader returns true if the passed frame data represents that of a
// a sequence header. // a sequence header.
// TODO: clean this up and use consts for the nalTypes // FIXME(kortschak): Clarify and document the logic of this functions.
func isSequenceHeader(frame []byte) bool { func isSequenceHeader(frame []byte) bool {
byteChannel := make(chan byte, len(frame)) sc := frameScanner{buf: frame}
for i := range frame { for {
byteChannel <- frame[i] b, ok := sc.readByte()
} if !ok {
for len(byteChannel) >= 5 { return false
aByte := <-byteChannel }
for i := 1; aByte == 0x00 && i != 4; i++ { for i := 1; b == 0x00 && i != 4; i++ {
aByte = <-byteChannel b, ok = sc.readByte()
if (aByte == 0x01 && i == 2) || (aByte == 0x01 && i == 3) { if !ok {
aByte = <-byteChannel return false
nalType := aByte & 0x1F }
switch nalType { if b != 0x01 || (i != 2 && i != 3) {
case 1: continue
return false }
case 5:
return false b, ok = sc.readByte()
case 6: if !ok {
return true return false
case 7: }
return true switch nalTyp := b & 0x1f; nalTyp {
case 8: case suppEnhInf, seqParamSet, paramSet:
return true return true
} case nonIdrPic, idrPic:
return false
} }
} }
} }
return false }
type frameScanner struct {
off int
buf []byte
}
func (s *frameScanner) readByte() (b byte, ok bool) {
if s.off >= len(s.buf) {
return 0, false
}
b = s.buf[s.off]
s.off++
return b, true
} }
// generate takes in raw video data from the input chan and packetises it into // generate takes in raw video data from the input chan and packetises it into
@ -190,36 +220,39 @@ func (g *flvGenerator) generate() {
var packetType byte var packetType byte
for g.isGenerating { for g.isGenerating {
select { select {
case videoFrame := <-g.inputChan: default:
if isKeyFrame(videoFrame) { time.Sleep(time.Duration(5) * time.Millisecond)
frameType = flv.KeyFrameType case frame := <-g.inputChan:
} else {
frameType = flv.InterFrameType
}
if isSequenceHeader(videoFrame) {
packetType = flv.SequenceHeader
} else {
packetType = flv.AVCNALU
}
timeStamp := g.getNextTimestamp() timeStamp := g.getNextTimestamp()
// Do we have video to send off ? // Do we have video to send off?
if g.videoFlag { if g.video {
if isKeyFrame(frame) {
frameType = flv.KeyFrameType
} else {
frameType = flv.InterFrameType
}
if isSequenceHeader(frame) {
packetType = flv.SequenceHeader
} else {
packetType = flv.AVCNALU
}
tag := flv.VideoTag{ tag := flv.VideoTag{
TagType: uint8(flv.VideoTagType), TagType: uint8(flv.VideoTagType),
DataSize: uint32(len(videoFrame)) + flv.DataHeaderLength, DataSize: uint32(len(frame)) + flv.DataHeaderLength,
Timestamp: timeStamp, Timestamp: timeStamp,
TimestampExtended: flv.NoTimestampExtension, TimestampExtended: flv.NoTimestampExtension,
FrameType: frameType, FrameType: frameType,
Codec: flv.H264, Codec: flv.H264,
PacketType: packetType, PacketType: packetType,
CompositionTime: 0, CompositionTime: 0,
Data: videoFrame, Data: frame,
PrevTagSize: uint32(videoHeaderSize + len(videoFrame)), PrevTagSize: uint32(videoHeaderSize + len(frame)),
} }
g.outputChan <- tag.ToByteSlice() g.outputChan <- tag.Bytes()
} }
// Do we even have some audio to send off ? // Do we even have some audio to send off ?
if g.audioFlag { if g.audio {
// Not sure why but we need two audio tags for dummy silent audio // Not sure why but we need two audio tags for dummy silent audio
// TODO: create constants or SoundSize and SoundType parameters // TODO: create constants or SoundSize and SoundType parameters
tag := flv.AudioTag{ tag := flv.AudioTag{
@ -234,7 +267,7 @@ func (g *flvGenerator) generate() {
Data: dummyAudioTag1Data, Data: dummyAudioTag1Data,
PrevTagSize: uint32(audioSize), PrevTagSize: uint32(audioSize),
} }
g.outputChan <- tag.ToByteSlice() g.outputChan <- tag.Bytes()
tag = flv.AudioTag{ tag = flv.AudioTag{
TagType: uint8(flv.AudioTagType), TagType: uint8(flv.AudioTagType),
@ -248,10 +281,8 @@ func (g *flvGenerator) generate() {
Data: dummyAudioTag2Data, Data: dummyAudioTag2Data,
PrevTagSize: uint32(22), PrevTagSize: uint32(22),
} }
g.outputChan <- tag.ToByteSlice() g.outputChan <- tag.Bytes()
} }
default:
time.Sleep(time.Duration(5) * time.Millisecond)
} }
} }
} }

View File

@ -207,13 +207,13 @@ func (r *Revid) reset(config Config) error {
r.getFrame = r.getFrameNoPacketization r.getFrame = r.getFrameNoPacketization
return nil return nil
case Mpegts: case Mpegts:
r.Log(Info, "Using MPEGTS packetisation!") r.Log(Info, "Using MPEGTS packetisation")
frameRateAsInt, _ := strconv.Atoi(r.config.FrameRate) frameRate, _ := strconv.Atoi(r.config.FrameRate)
r.generator = generator.NewTsGenerator(uint(frameRateAsInt)) r.generator = generator.NewTsGenerator(uint(frameRate))
case Flv: case Flv:
r.Log(Info, "Using FLV packetisation!") r.Log(Info, "Using FLV packetisation")
frameRateAsInt, _ := strconv.Atoi(r.config.FrameRate) frameRate, _ := strconv.Atoi(r.config.FrameRate)
r.generator = generator.NewFlvGenerator(true, true, uint(frameRateAsInt)) r.generator = generator.NewFlvGenerator(true, true, frameRate)
} }
// We have packetization of some sort, so we want to send data to Generator // We have packetization of some sort, so we want to send data to Generator
// to perform packetization // to perform packetization