av/codec/mjpeg/jpeg.go

468 lines
12 KiB
Go

/*
DESCRIPTION
jpeg.go contains code ported from FFmpeg's C implementation of an RTP
JPEG-compressed Video Depacketizer following RFC 2435. See
https://ffmpeg.org/doxygen/2.6/rtpdec__jpeg_8c_source.html and
https://tools.ietf.org/html/rfc2435).
This code can be used to build JPEG images from an RTP/JPEG stream.
AUTHOR
Saxon Nelson-Milton <saxon@ausocean.org>
LICENSE
Copyright (c) 2012 Samuel Pitoiset.
This file is part of FFmpeg.
FFmpeg is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
FFmpeg is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with FFmpeg; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package mjpeg
import (
"encoding/binary"
"errors"
"fmt"
"io"
)
const maxJPEG = 1000000 // 1 MB (arbitrary)
// JPEG marker codes.
const (
codeSOI = 0xd8 // Start of image.
codeDRI = 0xdd // Define restart interval.
codeDQT = 0xdb // Define quantization tables.
codeDHT = 0xc4 // Define huffman tables.
codeSOS = 0xda // Start of scan.
codeAPP0 = 0xe0 // TODO: find out what this is.
codeSOF0 = 0xc0 // Baseline
codeEOI = 0xd9 // End of image.
)
// Density units.
const (
unitNone = iota
unitPxIN // Pixels per inch.
unitPxCM // Pixels per centimeter.
)
// JFIF header fields.
const (
jfifLabel = "JFIF\000"
jfifVer = 0x0201
jfifDensityUnit = unitNone // Units for pixel density fields.
jfifXDensity = 1 // Horizontal pixel desnity.
jfifYDensity = 1 // Vertical pixel density.
jfifXThumbCnt = 0 // Horizontal pixel count of embedded thumbnail.
jfifYThumbCnt = 0 // Vertical pixel count of embedded thumbnail.
jfifHeadLen = 16 // Length of JFIF header segment excluding APP0 marker.
)
// SOF0 (start of frame) header fields.
const (
sofLen = 17 // Length of SOF0 segment excluding marker.
sofPrecision = 8 // Data precision in bits/sample.
sofNoOfComponents = 3 // Number of components (1 = grey scaled, 3 = color YcbCr or YIQ 4 = color CMYK)
)
// SOS (start of scan) header fields.
const (
sosLen = 12 // Length of SOS segment excluding marker.
sosComponentsInScan = 3 // Number of components in scan.
)
var (
errNoQTable = errors.New("no quantization table")
errReservedQ = errors.New("q value is reserved")
)
// Slices used in the creation of huffman tables.
var (
bitsDCLum = []byte{0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0}
bitsDCChr = []byte{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}
bitsACLum = []byte{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d}
bitsACChr = []byte{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77}
valDC = []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}
valACLum = []byte{
0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa,
}
valACChr = []byte{
0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa,
}
)
var defaultQuantisers = []byte{
// Luma table.
16, 11, 12, 14, 12, 10, 16, 14,
13, 14, 18, 17, 16, 19, 24, 40,
26, 24, 22, 22, 24, 49, 35, 37,
29, 40, 58, 51, 61, 60, 57, 51,
56, 55, 64, 72, 92, 78, 64, 68,
87, 69, 55, 56, 80, 109, 81, 87,
95, 98, 103, 104, 103, 62, 77, 113,
121, 112, 100, 120, 92, 101, 103, 99,
/* chroma table */
17, 18, 18, 24, 21, 24, 47, 26,
26, 47, 99, 66, 56, 66, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
}
// Context describes a RTP/JPEG parsing context that will keep track of the current
// JPEG (held by p), and the state of the quantization tables.
type Context struct {
qTables [128][128]byte
qTablesLen [128]byte
buf []byte
blen int
dst io.Writer
}
// NewContext will return a new Context with destination d.
func NewContext(d io.Writer) *Context {
return &Context{
dst: d,
buf: make([]byte, maxJPEG),
}
}
// ParsePayload will parse an RTP/JPEG payload and append to current image.
func (c *Context) ParsePayload(p []byte, m bool) error {
idx := 1 // Ignore type-specific flag (skip to index 1).
off := get24(p[idx:]) // Fragment offset (3 bytes).
t := int(p[idx+3]) // Type (1 byte).
q := p[idx+4] // Quantization value (1 byte).
width := p[idx+5] // Picture width (1 byte).
height := p[idx+6] // Picture height (1 byte).
idx += 7
var dri int // Restart interval.
if t&0x40 != 0 {
dri = int(binary.BigEndian.Uint16(p[idx:]))
idx += 4 // Ignore restart count (2 bytes).
t &= ^0x40
}
if t > 1 {
panic("unimplemented RTP/JPEG type")
}
// Parse quantization table if our offset is 0.
if off == 0 {
var qTable []byte
var qLen int
if q > 127 {
idx++
prec := p[idx] // The size of coefficients (1 byte).
qLen = int(binary.BigEndian.Uint16(p[idx+1:])) // The length of the quantization table (2 bytes).
idx += 3
if prec != 0 {
panic("unsupported precision")
}
if qLen > 0 {
qTable = p[idx : idx+qLen]
idx += qLen
if q < 255 && c.qTablesLen[q-128] == 0 && qLen <= 128 {
copy(c.qTables[q-128][:], qTable)
c.qTablesLen[q-128] = byte(qLen)
}
} else {
if q == 255 {
return errNoQTable
}
if c.qTablesLen[q-128] == 0 {
return fmt.Errorf("no quantization tables known for q %d yet", q)
}
qTable = c.qTables[q-128][:]
qLen = int(c.qTablesLen[q-128])
}
} else { // q <= 127
if q == 0 || q > 99 {
return errReservedQ
}
qTable = defaultQTable(int(q))
qLen = len(qTable)
}
c.blen = writeHeader(c.buf[c.blen:], int(t), int(width), int(height), qLen/64, dri, qTable)
}
if c.blen == 0 {
// Must have missed start of frame? So ignore and wait for start.
return nil
}
// TODO: check that timestamp is consistent
// This will need expansion to RTP package to create Timestamp parsing func.
// TODO: could also check offset with how many bytes we currently have
// to determine if there are missing frames.
// Write frame data
rem := len(p)
f := p[idx:rem]
copy(c.buf[c.blen:], f)
c.blen += len(f)
idx += rem
if m {
// End of image marker.
binary.BigEndian.PutUint16(c.buf[c.blen:], 0xff00|codeEOI)
c.blen += 2
n, err := c.dst.Write(c.buf[0:c.blen])
if err != nil {
return fmt.Errorf("could not write JPEG to dst: %w", err)
}
c.blen -= n
}
return nil
}
func get24(p []byte) int {
return int(p[0]<<16) | int(p[1]<<8) | int(p[2])
}
// writeHeader writes a JPEG header to the writer w.
func writeHeader(p []byte, _type, width, height, nbqTab, dri int, qtable []byte) int {
width <<= 3
height <<= 3
// Indicate start of image.
idx := 0
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeSOI)
idx += 2
// Write JFIF header.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeAPP0)
binary.BigEndian.PutUint16(p[idx+2:], jfifHeadLen)
idx += 4
src := []byte(jfifLabel)
copy(p[idx:], src)
idx += len(src)
binary.BigEndian.PutUint16(p[idx:], jfifVer)
p[idx+2] = jfifDensityUnit
binary.BigEndian.PutUint16(p[idx+3:], jfifXDensity)
binary.BigEndian.PutUint16(p[idx+5:], jfifYDensity)
p[idx+7] = jfifXThumbCnt
p[idx+8] = jfifYThumbCnt
idx += 9
// If we want to define restart interval then write that.
if dri != 0 {
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeDRI)
binary.BigEndian.PutUint16(p[idx+2:], 4)
binary.BigEndian.PutUint16(p[idx+4:], uint16(dri))
idx += 6
}
// Define quantization tables.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeDQT)
idx += 2
// Calculate table size and create slice for table.
ts := 2 + nbqTab*(1+64)
binary.BigEndian.PutUint16(p[idx:], uint16(ts))
idx += 2
for i := 0; i < nbqTab; i++ {
p[idx] = byte(i)
idx++
src := qtable[64*i : (64*i)+64]
copy(p[idx:], src)
idx += len(src)
}
// Define huffman table.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeDHT)
idx += 2
lenIdx := idx
binary.BigEndian.PutUint16(p[idx:], 0)
idx += 2
idx += writeHuffman(p[idx:], 0, 0, bitsDCLum, valDC)
idx += writeHuffman(p[idx:], 0, 1, bitsDCChr, valDC)
idx += writeHuffman(p[idx:], 1, 0, bitsACLum, valACLum)
idx += writeHuffman(p[idx:], 1, 1, bitsACChr, valACChr)
binary.BigEndian.PutUint16(p[lenIdx:], uint16(idx-lenIdx))
// Start of frame.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeSOF0)
idx += 2
// Derive sample type.
sample := 1
if _type != 0 {
sample = 2
}
// Derive matrix number.
mtxNo := 0
if nbqTab == 2 {
mtxNo = 1
}
binary.BigEndian.PutUint16(p[idx:], sofLen)
p[idx+2] = byte(sofPrecision)
binary.BigEndian.PutUint16(p[idx+3:], uint16(height))
binary.BigEndian.PutUint16(p[idx+5:], uint16(width))
p[idx+7] = byte(sofNoOfComponents)
idx += 8
// TODO: find meaning of these fields.
p[idx] = 1
p[idx+1] = uint8((2 << 4) | sample)
p[idx+2] = 0
p[idx+3] = 2
p[idx+4] = 1<<4 | 1
p[idx+5] = uint8(mtxNo)
p[idx+6] = 3
p[idx+7] = 1<<4 | 1
p[idx+8] = uint8(mtxNo)
idx += 9
// Write start of scan.
binary.BigEndian.PutUint16(p[idx:], 0xff00|codeSOS)
binary.BigEndian.PutUint16(p[idx+2:], sosLen)
p[idx+4] = sosComponentsInScan
idx += 5
// TODO: find out what remaining fields are.
p[idx] = 1
p[idx+1] = 0
p[idx+2] = 2
p[idx+3] = 17
p[idx+4] = 3
p[idx+5] = 17
p[idx+6] = 0
p[idx+7] = 63
p[idx+8] = 0
idx += 9
return idx
}
// writeHuffman write a JPEG huffman table to w.
func writeHuffman(p []byte, class, id int, bits, values []byte) int {
idx := 0
p[idx] = uint8(class<<4 | id)
idx++
var n int
for i := 1; i <= 16; i++ {
n += int(bits[i])
}
src := bits[1:17]
copy(p[idx:], src)
idx += len(src)
src = values[0:n]
copy(p[idx:], src)
idx += len(src)
return idx
}
// defaultQTable returns a default quantization table.
func defaultQTable(q int) []byte {
f := clip(q, q, 99)
const tabLen = 128
tab := make([]byte, tabLen)
if q < 50 {
q = 5000 / f
} else {
q = 200 - f*2
}
for i := 0; i < tabLen; i++ {
v := (int(defaultQuantisers[i])*q + 50) / 100
v = clip(v, 1, 255)
tab[i] = byte(v)
}
return tab
}
// clip clips the value v to the bounds defined by min and max.
func clip(v, min, max int) int {
if v < min {
return min
}
if v > max {
return max
}
return v
}