flv,generator: clean up FLV handling

This commit is contained in:
Dan Kortschak 2018-07-07 15:27:59 +09:30
parent d7525e0a47
commit 372c42d5aa
5 changed files with 235 additions and 257 deletions

View File

@ -1,67 +0,0 @@
/*
NAME
audio_tag.go
DESCRIPTION
See Readme.md
AUTHORS
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
audio_tag.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
package flv
type AudioTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint32
SoundFormat uint8
SoundRate uint8
SoundSize bool
SoundType bool
Data []byte
PrevTagSize uint32
}
func (t *AudioTag) ToByteSlice() (output []byte) {
output = make([]byte, 0, maxVideoTagSize)
output = append(output, []byte{
byte(t.TagType),
byte(t.DataSize >> 16),
byte(t.DataSize >> 8),
byte(t.DataSize),
byte(t.Timestamp >> 16),
byte(t.Timestamp >> 8),
byte(t.Timestamp),
byte(t.TimestampExtended),
0x00,
0x00,
0x00,
byte(t.SoundFormat<<4) | byte(t.SoundRate<<2) | btb(t.SoundSize)<<1 | btb(t.SoundType),
}...)
output = append(output, t.Data...)
output = append(output, []byte{
byte(t.PrevTagSize >> 24),
byte(t.PrevTagSize >> 16),
byte(t.PrevTagSize >> 8),
byte(t.PrevTagSize),
}...)
return
}

View File

@ -7,6 +7,7 @@ DESCRIPTION
AUTHORS
Saxon A. Nelson-Milton <saxon@ausocean.org>
Dan Kortschak <dan@ausocean.org>
LICENSE
flv.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean)
@ -25,13 +26,14 @@ LICENSE
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
// See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/flv/video_file_format_spec_v10.pdf
// for format specification.
package flv
import "bitbucket.org/ausocean/av/tools"
import "encoding/binary"
const (
headerLength = 72
version = 0x01
maxVideoTagSize = 10000
maxAudioTagSize = 10000
)
@ -50,24 +52,107 @@ const (
PCMAudioFormat = 0
)
var flvHeaderCode = []byte{0x46, 0x4C, 0x56}
const (
sizeofFLVTagHeader = 11
sizeofPrevTagSize = 4
)
const version = 0x01
// FLV is big-endian.
var order = binary.BigEndian
// orderPutUint24 is a binary.BigEndian method look-alike for
// writing 24 bit words to a byte slice.
func orderPutUint24(b []byte, v uint32) {
_ = b[2] // early bounds check to guarantee safety of writes below
b[0] = byte(v >> 16)
b[1] = byte(v >> 8)
b[2] = byte(v)
}
var flvHeaderCode = []byte{'F', 'L', 'V', version}
type Header struct {
AudioFlag bool
VideoFlag bool
HasAudio bool
HasVideo bool
}
func (h *Header) Bytes() []byte {
const headerLength = 9
b := [headerLength]byte{
0: 'F', 1: 'L', 2: 'V', 3: version,
4: btb(h.HasAudio)<<2 | btb(h.HasVideo),
8: headerLength, // order.PutUint32(b[5:9], headerLength)
}
return b[:]
}
type VideoTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint8
FrameType uint8
Codec uint8
PacketType uint8
CompositionTime uint32
Data []byte
PrevTagSize uint32
}
func (t *VideoTag) Bytes() []byte {
// FIXME(kortschak): This should probably be an encoding.BinaryMarshaler.
// This will allow handling of invalid field values.
b := make([]byte, t.DataSize+sizeofFLVTagHeader+sizeofPrevTagSize)
b[0] = t.TagType
orderPutUint24(b[1:4], t.DataSize)
orderPutUint24(b[4:7], t.Timestamp)
b[7] = t.TimestampExtended
b[11] = t.FrameType<<4 | t.Codec
b[12] = t.PacketType
orderPutUint24(b[13:16], t.CompositionTime)
copy(b[16:], t.Data)
order.PutUint32(b[len(b)-4:], t.PrevTagSize)
return b
}
type AudioTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint8
SoundFormat uint8
SoundRate uint8
SoundSize bool
SoundType bool
Data []byte
PrevTagSize uint32
}
func (t *AudioTag) Bytes() []byte {
// FIXME(kortschak): This should probably be an encoding.BinaryMarshaler.
// This will allow handling of invalid field values.
b := make([]byte, t.DataSize+sizeofFLVTagHeader+sizeofPrevTagSize)
b[0] = t.TagType
orderPutUint24(b[1:4], t.DataSize)
orderPutUint24(b[4:7], t.Timestamp)
b[7] = t.TimestampExtended
b[11] = t.SoundFormat<<4 | t.SoundRate<<2 | btb(t.SoundSize)<<1 | btb(t.SoundType)
copy(b[12:], t.Data)
order.PutUint32(b[len(b)-4:], t.PrevTagSize)
return b
}
func btb(b bool) byte {
return tools.BoolToByte(b)
if b {
return 1
}
func (h *Header) ToByteSlice() (output []byte) {
output = make([]byte, 0, headerLength)
output = append(output, flvHeaderCode...)
output = append(output, []byte{
version,
0x00 | btb(h.AudioFlag)<<2 | btb(h.VideoFlag),
0x00, 0x00, 0x00, byte(9),
}...)
return
return 0
}

View File

@ -1,71 +0,0 @@
/*
NAME
video_tag.go
DESCRIPTION
See Readme.md
AUTHORS
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
video_tag.go is Copyright (C) 2017 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
package flv
type VideoTag struct {
TagType uint8
DataSize uint32
Timestamp uint32
TimestampExtended uint32
FrameType byte
Codec byte
PacketType byte
CompositionTime uint32
Data []byte
PrevTagSize uint32
}
func (t *VideoTag) ToByteSlice() (output []byte) {
output = make([]byte, 0, maxVideoTagSize)
output = append(output, []byte{
byte(t.TagType),
byte(t.DataSize >> 16),
byte(t.DataSize >> 8),
byte(t.DataSize),
byte(t.Timestamp >> 16),
byte(t.Timestamp >> 8),
byte(t.Timestamp),
byte(t.TimestampExtended),
0x00,
0x00,
0x00,
0x00 | byte(t.FrameType<<4) | byte(t.Codec),
t.PacketType,
byte(t.CompositionTime >> 16),
byte(t.CompositionTime >> 8),
byte(t.CompositionTime),
}...)
output = append(output, t.Data...)
output = append(output, []byte{
byte(t.PrevTagSize >> 24),
byte(t.PrevTagSize >> 16),
byte(t.PrevTagSize >> 8),
byte(t.PrevTagSize),
}...)
return
}

View File

@ -37,24 +37,29 @@ const (
outputChanLength = 500
audioSize = 18
videoHeaderSize = 16
interFrameCode = 1
keyFrameCode = 5
sequenceCode = 6
)
// Data representing silent audio (required for youtube)
var dummyAudioTag1Data = []byte{0x00, 0x12, 0x08, 0x56, 0xe5, 0x00}
var dummyAudioTag2Data = []byte{0x01, 0xdc, 0x00, 0x4c, 0x61, 0x76, 0x63, 0x35,
0x38, 0x2e, 0x36, 0x2e, 0x31, 0x30, 0x32, 0x00, 0x02, 0x30, 0x40, 0x0e}
var (
dummyAudioTag1Data = []byte{
0x00, 0x12, 0x08, 0x56, 0xe5, 0x00,
}
dummyAudioTag2Data = []byte{
0x01, 0xdc, 0x00, 0x4c, 0x61, 0x76, 0x63, 0x35, 0x38,
0x2e, 0x36, 0x2e, 0x31, 0x30, 0x32, 0x00, 0x02, 0x30,
0x40, 0x0e,
}
)
// flvGenerator provides properties required for the generation of flv video
// from raw video data
type flvGenerator struct {
fps uint
fps int
inputChan chan []byte
outputChan chan []byte
audioFlag bool
videoFlag bool
audio bool
video bool
lastTagSize int
header flv.Header
startTime time.Time
@ -75,17 +80,15 @@ func (g *flvGenerator) OutputChan() <-chan []byte {
}
// NewFlvGenerator retuns an instance of the flvGenerator struct
func NewFlvGenerator(audio bool, video bool, fps uint) (g *flvGenerator) {
g = new(flvGenerator)
g.fps = fps
g.audioFlag = audio
g.videoFlag = video
g.lastTagSize = 0
g.inputChan = make(chan []byte, inputChanLength)
g.outputChan = make(chan []byte, outputChanLength)
g.firstTag = true
g.isGenerating = false
return
func NewFlvGenerator(audio, video bool, fps int) *flvGenerator {
return &flvGenerator{
fps: fps,
audio: audio,
video: video,
inputChan: make(chan []byte, inputChanLength),
outputChan: make(chan []byte, outputChanLength),
firstTag: true,
}
}
// Start begins the generation routine - i.e. if raw data is given to the input
@ -103,10 +106,10 @@ func (g *flvGenerator) Stop() {
// This will generally be called once at the start of file writing/transmission.
func (g *flvGenerator) GenHeader() {
header := flv.Header{
AudioFlag: g.audioFlag,
VideoFlag: g.videoFlag,
HasAudio: g.audio,
HasVideo: g.video,
}
g.outputChan <- header.ToByteSlice()
g.outputChan <- header.Bytes()
}
// getNextTimestamp generates and returns the next timestamp based on current time
@ -114,35 +117,48 @@ func (g *flvGenerator) getNextTimestamp() (timestamp uint32) {
if g.firstTag {
g.startTime = time.Now()
g.firstTag = false
timestamp = 0
return
return 0
}
timestamp = uint32(time.Now().Sub(g.startTime).Seconds() * float64(1000))
return
return uint32(time.Now().Sub(g.startTime).Seconds() * float64(1000))
}
// http://www.itu.int/rec/dologin_pub.asp?lang=e&id=T-REC-H.264-200305-S!!PDF-E&type=items
// Table 7-1 NAL unit type codes
const (
nonIdrPic = 1
idrPic = 5
suppEnhInf = 6
seqParamSet = 7
paramSet = 8
)
// isKeyFrame returns true if the passed frame data represents that of a keyframe
// TODO: clean this up and use conts for naltype codes
// FIXME(kortschak): Clarify and document the logic of this functions.
func isKeyFrame(frame []byte) bool {
byteChannel := make(chan byte, len(frame))
for i := range frame {
byteChannel <- frame[i]
}
for len(byteChannel) >= 5 {
aByte := <-byteChannel
for i := 1; aByte == 0x00 && i != 4; i++ {
aByte = <-byteChannel
if (aByte == 0x01 && i == 2) || (aByte == 0x01 && i == 3) {
aByte = <-byteChannel
nalType := aByte & 0x1F
switch nalType {
case interFrameCode:
sc := frameScanner{buf: frame}
for {
b, ok := sc.readByte()
if !ok {
return false
case keyFrameCode:
return true
case 6:
return true
}
for i := 1; b == 0x00 && i < 4; i++ {
b, ok = sc.readByte()
if !ok {
return false
}
if b != 0x01 || (i != 3 && i != 2) {
continue
}
b, ok = sc.readByte()
if !ok {
return false
}
switch nalTyp := b & 0x1f; nalTyp {
case idrPic, suppEnhInf:
return true
case nonIdrPic:
return false
}
}
}
@ -151,36 +167,50 @@ func isKeyFrame(frame []byte) bool {
// isSequenceHeader returns true if the passed frame data represents that of a
// a sequence header.
// TODO: clean this up and use consts for the nalTypes
// FIXME(kortschak): Clarify and document the logic of this functions.
func isSequenceHeader(frame []byte) bool {
byteChannel := make(chan byte, len(frame))
for i := range frame {
byteChannel <- frame[i]
}
for len(byteChannel) >= 5 {
aByte := <-byteChannel
for i := 1; aByte == 0x00 && i != 4; i++ {
aByte = <-byteChannel
if (aByte == 0x01 && i == 2) || (aByte == 0x01 && i == 3) {
aByte = <-byteChannel
nalType := aByte & 0x1F
switch nalType {
case 1:
sc := frameScanner{buf: frame}
for {
b, ok := sc.readByte()
if !ok {
return false
case 5:
}
for i := 1; b == 0x00 && i != 4; i++ {
b, ok = sc.readByte()
if !ok {
return false
case 6:
return true
case 7:
return true
case 8:
return true
}
}
}
if b != 0x01 || (i != 2 && i != 3) {
continue
}
b, ok = sc.readByte()
if !ok {
return false
}
switch nalTyp := b & 0x1f; nalTyp {
case suppEnhInf, seqParamSet, paramSet:
return true
case nonIdrPic, idrPic:
return false
}
}
}
}
type frameScanner struct {
off int
buf []byte
}
func (s *frameScanner) readByte() (b byte, ok bool) {
if s.off >= len(s.buf) {
return 0, false
}
b = s.buf[s.off]
s.off++
return b, true
}
// generate takes in raw video data from the input chan and packetises it into
// flv tags, which are then passed to the output channel.
@ -190,36 +220,39 @@ func (g *flvGenerator) generate() {
var packetType byte
for g.isGenerating {
select {
case videoFrame := <-g.inputChan:
if isKeyFrame(videoFrame) {
default:
time.Sleep(time.Duration(5) * time.Millisecond)
case frame := <-g.inputChan:
timeStamp := g.getNextTimestamp()
// Do we have video to send off?
if g.video {
if isKeyFrame(frame) {
frameType = flv.KeyFrameType
} else {
frameType = flv.InterFrameType
}
if isSequenceHeader(videoFrame) {
if isSequenceHeader(frame) {
packetType = flv.SequenceHeader
} else {
packetType = flv.AVCNALU
}
timeStamp := g.getNextTimestamp()
// Do we have video to send off ?
if g.videoFlag {
tag := flv.VideoTag{
TagType: uint8(flv.VideoTagType),
DataSize: uint32(len(videoFrame)) + flv.DataHeaderLength,
DataSize: uint32(len(frame)) + flv.DataHeaderLength,
Timestamp: timeStamp,
TimestampExtended: flv.NoTimestampExtension,
FrameType: frameType,
Codec: flv.H264,
PacketType: packetType,
CompositionTime: 0,
Data: videoFrame,
PrevTagSize: uint32(videoHeaderSize + len(videoFrame)),
Data: frame,
PrevTagSize: uint32(videoHeaderSize + len(frame)),
}
g.outputChan <- tag.ToByteSlice()
g.outputChan <- tag.Bytes()
}
// Do we even have some audio to send off ?
if g.audioFlag {
if g.audio {
// Not sure why but we need two audio tags for dummy silent audio
// TODO: create constants or SoundSize and SoundType parameters
tag := flv.AudioTag{
@ -234,7 +267,7 @@ func (g *flvGenerator) generate() {
Data: dummyAudioTag1Data,
PrevTagSize: uint32(audioSize),
}
g.outputChan <- tag.ToByteSlice()
g.outputChan <- tag.Bytes()
tag = flv.AudioTag{
TagType: uint8(flv.AudioTagType),
@ -248,10 +281,8 @@ func (g *flvGenerator) generate() {
Data: dummyAudioTag2Data,
PrevTagSize: uint32(22),
}
g.outputChan <- tag.ToByteSlice()
}
default:
time.Sleep(time.Duration(5) * time.Millisecond)
g.outputChan <- tag.Bytes()
}
}
}
}

View File

@ -207,13 +207,13 @@ func (r *Revid) reset(config Config) error {
r.getFrame = r.getFrameNoPacketization
return nil
case Mpegts:
r.Log(Info, "Using MPEGTS packetisation!")
frameRateAsInt, _ := strconv.Atoi(r.config.FrameRate)
r.generator = generator.NewTsGenerator(uint(frameRateAsInt))
r.Log(Info, "Using MPEGTS packetisation")
frameRate, _ := strconv.Atoi(r.config.FrameRate)
r.generator = generator.NewTsGenerator(uint(frameRate))
case Flv:
r.Log(Info, "Using FLV packetisation!")
frameRateAsInt, _ := strconv.Atoi(r.config.FrameRate)
r.generator = generator.NewFlvGenerator(true, true, uint(frameRateAsInt))
r.Log(Info, "Using FLV packetisation")
frameRate, _ := strconv.Atoi(r.config.FrameRate)
r.generator = generator.NewFlvGenerator(true, true, frameRate)
}
// We have packetization of some sort, so we want to send data to Generator
// to perform packetization