Merged in rtp-jpeg-extractor (pull request #289)

codec/mjpeg: RTP/JPEG extractor

Approved-by: Alan Noble <anoble@gmail.com>
This commit is contained in:
Saxon Milton 2020-01-07 04:43:07 +00:00
commit 36220e967f
7 changed files with 1103 additions and 4 deletions

82
codec/mjpeg/extract.go Normal file
View File

@ -0,0 +1,82 @@
/*
DESCRIPTION
extract.go provides an Extractor to get JPEG images from an RTP/JPEG stream
defined by RFC 2435 (see https://tools.ietf.org/html/rfc2435).
AUTHOR
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
Copyright (C) 2019 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
package mjpeg
import (
"fmt"
"io"
"time"
"bitbucket.org/ausocean/av/protocol/rtp"
)
const maxRTPSize = 1500 // Max ethernet transmission unit in bytes.
// Extractor is an Extractor for extracting JPEG from an RTP stream.
type Extractor struct {
dst io.Writer // The destination we'll be writing extracted JPEGs to.
}
// NewExtractor returns a new Extractor.
func NewExtractor() *Extractor { return &Extractor{} }
// Extract will continously read RTP packets from src containing JPEG (in RTP
// payload format) and extract the JPEG images, sending them to dst. This
// function expects that each read from src will provide a single RTP packet.
func (e *Extractor) Extract(dst io.Writer, src io.Reader, delay time.Duration) error {
buf := make([]byte, maxRTPSize)
ctx := NewContext(dst)
for {
n, err := src.Read(buf)
switch err {
case nil: // Do nothing.
case io.EOF:
return nil
default:
return fmt.Errorf("source read error: %v\n", err)
}
// Get payload from RTP packet.
p, err := rtp.Payload(buf[:n])
if err != nil {
return fmt.Errorf("could not get RTP payload: %w\n", err)
}
// Also grab the marker so that we know when the JPEG is finished.
m, err := rtp.Marker(buf[:n])
if err != nil {
return fmt.Errorf("could not read RTP marker: %w", err)
}
err = ctx.ParsePayload(p, m)
switch err {
case ErrNoFrameStart: // If no frame start then we continue until we get one.
default:
return fmt.Errorf("could not parse JPEG scan: %w", err)
}
}
}

447
codec/mjpeg/jpeg.go Normal file
View File

@ -0,0 +1,447 @@
/*
DESCRIPTION
jpeg.go contains code ported from FFmpeg's C implementation of an RTP
JPEG-compressed Video Depacketizer following RFC 2435. See
https://ffmpeg.org/doxygen/2.6/rtpdec__jpeg_8c_source.html and
https://tools.ietf.org/html/rfc2435).
This code can be used to build JPEG images from an RTP/JPEG stream.
AUTHOR
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
Copyright (c) 2012 Samuel Pitoiset.
This file is part of FFmpeg.
FFmpeg is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
FFmpeg is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with FFmpeg; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package mjpeg
import (
"encoding/binary"
"errors"
"fmt"
"io"
)
const maxJPEG = 1000000 // 1 MB (arbitrary)
// JPEG marker codes.
const (
codeSOI = 0xd8 // Start of image.
codeDRI = 0xdd // Define restart interval.
codeDQT = 0xdb // Define quantization tables.
codeDHT = 0xc4 // Define huffman tables.
codeSOS = 0xda // Start of scan.
codeAPP0 = 0xe0 // TODO: find out what this is.
codeSOF0 = 0xc0 // Baseline
codeEOI = 0xd9 // End of image.
)
// Density units.
const (
unitNone = iota
unitPxIN // Pixels per inch.
unitPxCM // Pixels per centimeter.
)
// JFIF header fields.
const (
jfifLabel = "JFIF\000"
jfifVer = 0x0201
jfifDensityUnit = unitNone // Units for pixel density fields.
jfifXDensity = 1 // Horizontal pixel desnity.
jfifYDensity = 1 // Vertical pixel density.
jfifXThumbCnt = 0 // Horizontal pixel count of embedded thumbnail.
jfifYThumbCnt = 0 // Vertical pixel count of embedded thumbnail.
jfifHeadLen = 16 // Length of JFIF header segment excluding APP0 marker.
)
// SOF0 (start of frame) header fields.
const (
sofLen = 17 // Length of SOF0 segment excluding marker.
sofPrecision = 8 // Data precision in bits/sample.
sofNoOfComponents = 3 // Number of components (1 = grey scaled, 3 = color YcbCr or YIQ 4 = color CMYK)
)
// SOS (start of scan) header fields.
const (
sosLen = 12 // Length of SOS segment excluding marker.
sosComponentsInScan = 3 // Number of components in scan.
)
// Errors returned from ParsePayload.
var (
ErrNoQTable = errors.New("no quantization table")
ErrReservedQ = errors.New("q value is reserved")
ErrUnimplementedType = errors.New("unimplemented RTP/JPEG type")
ErrUnsupportedPrecision = errors.New("unsupported precision")
ErrNoFrameStart = errors.New("missing start of frame")
)
// n values required for huffman table generation.
var (
nDCLum = deriveN(bitsDCLum)
nDCChr = deriveN(bitsDCChr)
nACLum = deriveN(bitsACLum)
nACChr = deriveN(bitsACChr)
)
// Slices used in the creation of huffman tables.
var (
bitsDCLum = []byte{0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0}
bitsDCChr = []byte{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
bitsACLum = []byte{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d}
bitsACChr = []byte{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77}
valDC = []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
valACLum = []byte{
0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa,
}
valACChr = []byte{
0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa,
}
)
var defaultQuantisers = []byte{
// Luma table.
16, 11, 12, 14, 12, 10, 16, 14,
13, 14, 18, 17, 16, 19, 24, 40,
26, 24, 22, 22, 24, 49, 35, 37,
29, 40, 58, 51, 61, 60, 57, 51,
56, 55, 64, 72, 92, 78, 64, 68,
87, 69, 55, 56, 80, 109, 81, 87,
95, 98, 103, 104, 103, 62, 77, 113,
121, 112, 100, 120, 92, 101, 103, 99,
/* chroma table */
17, 18, 18, 24, 21, 24, 47, 26,
26, 47, 99, 66, 56, 66, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
}
// Context describes a RTP/JPEG parsing context that will keep track of the current
// JPEG (held by p), and the state of the quantization tables.
type Context struct {
qTables [128][128]byte
qTablesLen [128]byte
buf []byte
blen int
dst io.Writer
}
// NewContext will return a new Context with destination d.
func NewContext(d io.Writer) *Context {
return &Context{
dst: d,
buf: make([]byte, maxJPEG),
}
}
// ParsePayload will parse an RTP/JPEG payload and append to current image.
func (c *Context) ParsePayload(p []byte, m bool) error {
idx := 1 // Ignore type-specific flag (skip to index 1).
off := get24(p[idx:]) // Fragment offset (3 bytes).
t := int(p[idx+3]) // Type (1 byte).
q := p[idx+4] // Quantization value (1 byte).
width := p[idx+5] // Picture width (1 byte).
height := p[idx+6] // Picture height (1 byte).
idx += 7
var dri uint16 // Restart interval.
if t&0x40 != 0 {
dri = binary.BigEndian.Uint16(p[idx:])
idx += 4 // Ignore restart count (2 bytes).
t &= ^0x40
}
if t > 1 {
return ErrUnimplementedType
}
// Parse quantization table if our offset is 0.
if off == 0 {
var qTable []byte
var qLen int
if q > 127 {
idx++
prec := p[idx] // The size of coefficients (1 byte).
qLen = int(binary.BigEndian.Uint16(p[idx+1:])) // The length of the quantization table (2 bytes).
idx += 3
if prec != 0 {
return ErrUnsupportedPrecision
}
q -= 128
if qLen > 0 {
qTable = p[idx : idx+qLen]
idx += qLen
if q < 127 && c.qTablesLen[q] == 0 && qLen <= 0 {
copy(c.qTables[q][:], qTable)
c.qTablesLen[q] = byte(qLen)
}
} else {
if q == 127 {
return ErrNoQTable
}
if c.qTablesLen[q] == 0 {
return fmt.Errorf("no quantization tables known for q %d yet", q)
}
qTable = c.qTables[q][:]
qLen = int(c.qTablesLen[q])
}
} else { // q <= 127
if q == 0 || q > 99 {
return ErrReservedQ
}
qTable = defaultQTable(int(q))
qLen = len(qTable)
}
c.blen = writeHeader(c.buf[c.blen:], int(t), int(width), int(height), qLen/64, dri, qTable)
}
if c.blen == 0 {
// Must have missed start of frame? So ignore and wait for start.
return ErrNoFrameStart
}
// TODO: check that timestamp is consistent
// This will need expansion to RTP package to create Timestamp parsing func.
// TODO: could also check offset with how many bytes we currently have
// to determine if there are missing frames.
// Write frame data.
rem := len(p)
c.blen += copy(c.buf[c.blen:], p[idx:rem])
idx += rem
if m {
// End of image marker.
binary.BigEndian.PutUint16(c.buf[c.blen:], 0xff00|codeEOI)
c.blen += 2
n, err := c.dst.Write(c.buf[0:c.blen])
if err != nil {
return fmt.Errorf("could not write JPEG to dst: %w", err)
}
c.blen -= n
}
return nil
}
// writeHeader writes a JPEG header to the writer slice p.
func writeHeader(p []byte, _type, width, height, nbqTab int, dri uint16, qtable []byte) int {
width <<= 3
height <<= 3
// Indicate start of image.
idx := 0
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeSOI)
// Write JFIF header.
binary.BigEndian.PutUint16(p[idx+2:], 0xff00|codeAPP0)
binary.BigEndian.PutUint16(p[idx+4:], jfifHeadLen)
idx += 6
idx += copy(p[idx:], jfifLabel)
binary.BigEndian.PutUint16(p[idx:], jfifVer)
p[idx+2] = jfifDensityUnit
binary.BigEndian.PutUint16(p[idx+3:], jfifXDensity)
binary.BigEndian.PutUint16(p[idx+5:], jfifYDensity)
p[idx+7] = jfifXThumbCnt
p[idx+8] = jfifYThumbCnt
idx += 9
// If we want to define restart interval then write that.
if dri != 0 {
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeDRI)
binary.BigEndian.PutUint16(p[idx+2:], 4)
binary.BigEndian.PutUint16(p[idx+4:], dri)
idx += 6
}
// Define quantization tables.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeDQT)
// Calculate table size and create slice for table.
ts := 2 + nbqTab*(1+64)
binary.BigEndian.PutUint16(p[idx+2:], uint16(ts))
idx += 4
for i := 0; i < nbqTab; i++ {
p[idx] = byte(i)
idx++
idx += copy(p[idx:], qtable[64*i:(64*i)+64])
}
// Define huffman table.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeDHT)
idx += 2
lenIdx := idx
binary.BigEndian.PutUint16(p[idx:], 0)
idx += 2
idx += writeHuffman(p[idx:], bitsDCLum, valDC, 0, nDCLum)
idx += writeHuffman(p[idx:], bitsDCChr, valDC, 1, nDCChr)
idx += writeHuffman(p[idx:], bitsACLum, valACLum, 1<<4, nACLum)
idx += writeHuffman(p[idx:], bitsACChr, valACChr, 1<<4|1, nACChr)
binary.BigEndian.PutUint16(p[lenIdx:], uint16(idx-lenIdx))
// Start of frame.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeSOF0)
idx += 2
// Derive sample type.
sample := 1
if _type != 0 {
sample = 2
}
// Derive matrix number.
var mtxNo uint8
if nbqTab == 2 {
mtxNo = 1
}
binary.BigEndian.PutUint16(p[idx:], sofLen)
p[idx+2] = byte(sofPrecision)
binary.BigEndian.PutUint16(p[idx+3:], uint16(height))
binary.BigEndian.PutUint16(p[idx+5:], uint16(width))
p[idx+7] = byte(sofNoOfComponents)
idx += 8
// TODO: find meaning of these fields.
idx += copy(p[idx:], []byte{1, uint8((2 << 4) | sample), 0, 2, 1<<4 | 1, mtxNo, 3, 1<<4 | 1, mtxNo})
// Write start of scan.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeSOS)
binary.BigEndian.PutUint16(p[idx+2:], sosLen)
p[idx+4] = sosComponentsInScan
idx += 5
// TODO: find out what remaining fields are.
idx += copy(p[idx:], []byte{1, 0, 2, 17, 3, 17, 0, 63, 0})
return idx
}
// writeHuffman write a JPEG huffman table to alice p.
func writeHuffman(p, bits, values []byte, prefix byte, n int) int {
p[0] = prefix
i := copy(p[1:], bits[1:17])
return copy(p[i+1:], values[0:n]) + i + 1
}
// defaultQTable returns a default quantization table.
func defaultQTable(q int) []byte {
f := clip(q, q, 99)
const tabLen = 128
tab := make([]byte, tabLen)
if q < 50 {
q = 5000 / f
} else {
q = 200 - f*2
}
for i := 0; i < tabLen; i++ {
v := (int(defaultQuantisers[i])*q + 50) / 100
v = clip(v, 1, 255)
tab[i] = byte(v)
}
return tab
}
// clip clips the value v to the bounds defined by min and max.
func clip(v, min, max int) int {
if v < min {
return min
}
if v > max {
return max
}
return v
}
// get24 parses an int24 from p using big endian order.
func get24(p []byte) int {
return int(p[0]<<16) | int(p[1]<<8) | int(p[2])
}
// deriveN calculates n values required for huffman table generation.
func deriveN(bits []byte) int {
var n int
for i := 1; i <= 16; i++ {
n += int(bits[i])
}
return n
}

69
codec/mjpeg/jpeg_test.go Normal file
View File

@ -0,0 +1,69 @@
/*
DESCRIPTION
jpeg_test.go provides testing for utilities found in jpeg.go.
AUTHOR
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
Copyright (C) 2020 the Australian Ocean Lab (AusOcean)
It is free software: you can redistribute it and/or modify them
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
It is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with revid in gpl.txt. If not, see http://www.gnu.org/licenses.
*/
package mjpeg
import (
"bytes"
"io/ioutil"
"testing"
"bitbucket.org/ausocean/av/protocol/rtp"
)
func TestParsePayload(t *testing.T) {
const (
wantPath = "testdata/expect.mjpeg"
noOfPkts = 5629
)
got := &bytes.Buffer{}
c := NewContext(got)
for i, pkt := range testPackets {
p, err := rtp.Payload(pkt)
if err != nil {
t.Fatalf("could not get payload for packet %d: %v", i, err)
}
m, err := rtp.Marker(pkt)
if err != nil {
t.Fatalf("could not get marker for packet %d: %v", i, err)
}
err = c.ParsePayload(p, m)
if err != nil {
t.Fatalf("could not parse pyload for packet %d: %v", i, err)
}
}
want, err := ioutil.ReadFile(wantPath)
if err != nil {
t.Fatalf("could not read file for wanted MJPEG data: %v", err)
}
if !bytes.Equal(got.Bytes(), want) {
t.Error("did not get expected result")
}
}

BIN
codec/mjpeg/testdata/expect.mjpeg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 615 KiB

500
codec/mjpeg/tests.go Normal file

File diff suppressed because one or more lines are too long

View File

@ -68,7 +68,7 @@ const (
defaultFrameRate = 25
defaultBitrate = 400
defaultVBRBitrate = 400
defaultMinFrames = 100
defaultMinFrames = 3
defaultVBRQuality = avconfig.QualityStandard
defaultCameraChan = 2
)
@ -136,9 +136,10 @@ func (g *GeoVision) Set(c avconfig.Config) error {
c.Bitrate = defaultBitrate
}
if c.MinFrames <= 0 {
refresh := float64(c.MinFrames) / float64(c.FrameRate)
if refresh < 1 || refresh > 5 {
errs = append(errs, errGVBadMinFrames)
c.MinFrames = defaultMinFrames
c.MinFrames = 4 * c.FrameRate
}
// If we're using RTMP then we should default to constant bitrate.

View File

@ -411,7 +411,7 @@ func (r *Revid) setupPipeline(mtsEnc func(dst io.WriteCloser, rate float64) (io.
case codecutil.H265:
r.lexTo = h265.NewLexer(false).Lex
case codecutil.MJPEG:
panic("not implemented")
r.lexTo = mjpeg.NewExtractor().Extract
}
case config.InputAudio: