Add matchfinder package.

I've been experimenting for a while with a new brotli compressor.
Instead of being a translation of the C implementation,
it's a rewrite in Go, with a modular structure thanks to interfaces.
(A few low-level functions still come from the C version, though.)

The performance is getting to the point where it seems to be worth
adding to the brotli repository.
This commit is contained in:
Andy Balholm 2023-12-28 16:09:32 -08:00
parent b7a4cf9ec5
commit 349ed2fce1
10 changed files with 1031 additions and 50 deletions

56
bitwriter.go Normal file
View File

@ -0,0 +1,56 @@
package brotli
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Write bits into a byte array. */
type bitWriter struct {
dst []byte
// Data waiting to be written is the low nbits of bits.
bits uint64
nbits uint
}
func (w *bitWriter) writeBits(nb uint, b uint64) {
w.bits |= b << w.nbits
w.nbits += nb
if w.nbits >= 32 {
bits := w.bits
w.bits >>= 32
w.nbits -= 32
w.dst = append(w.dst,
byte(bits),
byte(bits>>8),
byte(bits>>16),
byte(bits>>24),
)
}
}
func (w *bitWriter) writeSingleBit(bit bool) {
if bit {
w.writeBits(1, 1)
} else {
w.writeBits(1, 0)
}
}
func (w *bitWriter) jumpToByteBoundary() {
dst := w.dst
for w.nbits != 0 {
dst = append(dst, byte(w.bits))
w.bits >>= 8
if w.nbits > 8 { // Avoid underflow
w.nbits -= 8
} else {
w.nbits = 0
}
}
w.bits = 0
w.dst = dst
}

View File

@ -7,12 +7,18 @@ import (
const maxHuffmanTreeSize = (2*numCommandSymbols + 1) const maxHuffmanTreeSize = (2*numCommandSymbols + 1)
/* The maximum size of Huffman dictionary for distances assuming that /*
NPOSTFIX = 0 and NDIRECT = 0. */ The maximum size of Huffman dictionary for distances assuming that
NPOSTFIX = 0 and NDIRECT = 0.
*/
const maxSimpleDistanceAlphabetSize = 140 const maxSimpleDistanceAlphabetSize = 140
/* Represents the range of values belonging to a prefix code: /*
[offset, offset + 2^nbits) */ Represents the range of values belonging to a prefix code:
[offset, offset + 2^nbits)
*/
type prefixCodeRange struct { type prefixCodeRange struct {
offset uint32 offset uint32
nbits uint32 nbits uint32
@ -96,9 +102,12 @@ func nextBlockTypeCode(calculator *blockTypeCodeCalculator, type_ byte) uint {
return type_code return type_code
} }
/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3) /*
|nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
REQUIRES: length > 0 REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */ REQUIRES: length <= (1 << 24)
*/
func encodeMlen(length uint, bits *uint64, numbits *uint, nibblesbits *uint64) { func encodeMlen(length uint, bits *uint64, numbits *uint, nibblesbits *uint64) {
var lg uint var lg uint
if length == 1 { if length == 1 {
@ -132,8 +141,11 @@ func storeCommandExtra(cmd *command, storage_ix *uint, storage []byte) {
writeBits(uint(insnumextra+getCopyExtra(copycode)), bits, storage_ix, storage) writeBits(uint(insnumextra+getCopyExtra(copycode)), bits, storage_ix, storage)
} }
/* Data structure that stores almost everything that is needed to encode each /*
block switch command. */ Data structure that stores almost everything that is needed to encode each
block switch command.
*/
type blockSplitCode struct { type blockSplitCode struct {
type_code_calculator blockTypeCodeCalculator type_code_calculator blockTypeCodeCalculator
type_depths [maxBlockTypeSymbols]byte type_depths [maxBlockTypeSymbols]byte
@ -154,9 +166,12 @@ func storeVarLenUint8(n uint, storage_ix *uint, storage []byte) {
} }
} }
/* Stores the compressed meta-block header. /*
Stores the compressed meta-block header.
REQUIRES: length > 0 REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */ REQUIRES: length <= (1 << 24)
*/
func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix *uint, storage []byte) { func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix *uint, storage []byte) {
var lenbits uint64 var lenbits uint64
var nlenbits uint var nlenbits uint
@ -186,9 +201,12 @@ func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix
} }
} }
/* Stores the uncompressed meta-block header. /*
Stores the uncompressed meta-block header.
REQUIRES: length > 0 REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */ REQUIRES: length <= (1 << 24)
*/
func storeUncompressedMetaBlockHeader(length uint, storage_ix *uint, storage []byte) { func storeUncompressedMetaBlockHeader(length uint, storage_ix *uint, storage []byte) {
var lenbits uint64 var lenbits uint64
var nlenbits uint var nlenbits uint
@ -312,8 +330,11 @@ func storeSimpleHuffmanTree(depths []byte, symbols []uint, num_symbols uint, max
} }
} }
/* num = alphabet size /*
depths = symbol depths */ num = alphabet size
depths = symbol depths
*/
func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *uint, storage []byte) { func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *uint, storage []byte) {
var huffman_tree [numCommandSymbols]byte var huffman_tree [numCommandSymbols]byte
var huffman_tree_extra_bits [numCommandSymbols]byte var huffman_tree_extra_bits [numCommandSymbols]byte
@ -367,8 +388,11 @@ func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *u
storeHuffmanTreeToBitMask(huffman_tree_size, huffman_tree[:], huffman_tree_extra_bits[:], code_length_bitdepth[:], code_length_bitdepth_symbols[:], storage_ix, storage) storeHuffmanTreeToBitMask(huffman_tree_size, huffman_tree[:], huffman_tree_extra_bits[:], code_length_bitdepth[:], code_length_bitdepth_symbols[:], storage_ix, storage)
} }
/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and /*
bits[0:length] and stores the encoded tree to the bit stream. */ Builds a Huffman tree from histogram[0:length] into depth[0:length] and
bits[0:length] and stores the encoded tree to the bit stream.
*/
func buildAndStoreHuffmanTree(histogram []uint32, histogram_length uint, alphabet_size uint, tree []huffmanTree, depth []byte, bits []uint16, storage_ix *uint, storage []byte) { func buildAndStoreHuffmanTree(histogram []uint32, histogram_length uint, alphabet_size uint, tree []huffmanTree, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
var count uint = 0 var count uint = 0
var s4 = [4]uint{0} var s4 = [4]uint{0}
@ -623,6 +647,203 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_
} }
} }
func buildAndStoreHuffmanTreeFastBW(histogram []uint32, histogram_total uint, max_bits uint, depth []byte, bits []uint16, bw *bitWriter) {
var count uint = 0
var symbols = [4]uint{0}
var length uint = 0
var total uint = histogram_total
for total != 0 {
if histogram[length] != 0 {
if count < 4 {
symbols[count] = length
}
count++
total -= uint(histogram[length])
}
length++
}
if count <= 1 {
bw.writeBits(4, 1)
bw.writeBits(max_bits, uint64(symbols[0]))
depth[symbols[0]] = 0
bits[symbols[0]] = 0
return
}
for i := 0; i < int(length); i++ {
depth[i] = 0
}
{
var max_tree_size uint = 2*length + 1
tree, _ := huffmanTreePool.Get().(*[]huffmanTree)
if tree == nil || cap(*tree) < int(max_tree_size) {
tmp := make([]huffmanTree, max_tree_size)
tree = &tmp
} else {
*tree = (*tree)[:max_tree_size]
}
var count_limit uint32
for count_limit = 1; ; count_limit *= 2 {
var node int = 0
var l uint
for l = length; l != 0; {
l--
if histogram[l] != 0 {
if histogram[l] >= count_limit {
initHuffmanTree(&(*tree)[node:][0], histogram[l], -1, int16(l))
} else {
initHuffmanTree(&(*tree)[node:][0], count_limit, -1, int16(l))
}
node++
}
}
{
var n int = node
/* Points to the next leaf node. */ /* Points to the next non-leaf node. */
var sentinel huffmanTree
var i int = 0
var j int = n + 1
var k int
sortHuffmanTreeItems(*tree, uint(n), huffmanTreeComparator(sortHuffmanTree1))
/* The nodes are:
[0, n): the sorted leaf nodes that we start with.
[n]: we add a sentinel here.
[n + 1, 2n): new parent nodes are added here, starting from
(n+1). These are naturally in ascending order.
[2n]: we add a sentinel at the end as well.
There will be (2n+1) elements at the end. */
initHuffmanTree(&sentinel, math.MaxUint32, -1, -1)
(*tree)[node] = sentinel
node++
(*tree)[node] = sentinel
node++
for k = n - 1; k > 0; k-- {
var left int
var right int
if (*tree)[i].total_count_ <= (*tree)[j].total_count_ {
left = i
i++
} else {
left = j
j++
}
if (*tree)[i].total_count_ <= (*tree)[j].total_count_ {
right = i
i++
} else {
right = j
j++
}
/* The sentinel node becomes the parent node. */
(*tree)[node-1].total_count_ = (*tree)[left].total_count_ + (*tree)[right].total_count_
(*tree)[node-1].index_left_ = int16(left)
(*tree)[node-1].index_right_or_value_ = int16(right)
/* Add back the last sentinel node. */
(*tree)[node] = sentinel
node++
}
if setDepth(2*n-1, *tree, depth, 14) {
/* We need to pack the Huffman tree in 14 bits. If this was not
successful, add fake entities to the lowest values and retry. */
break
}
}
}
huffmanTreePool.Put(tree)
}
convertBitDepthsToSymbols(depth, length, bits)
if count <= 4 {
var i uint
/* value of 1 indicates a simple Huffman code */
bw.writeBits(2, 1)
bw.writeBits(2, uint64(count)-1) /* NSYM - 1 */
/* Sort */
for i = 0; i < count; i++ {
var j uint
for j = i + 1; j < count; j++ {
if depth[symbols[j]] < depth[symbols[i]] {
var tmp uint = symbols[j]
symbols[j] = symbols[i]
symbols[i] = tmp
}
}
}
if count == 2 {
bw.writeBits(max_bits, uint64(symbols[0]))
bw.writeBits(max_bits, uint64(symbols[1]))
} else if count == 3 {
bw.writeBits(max_bits, uint64(symbols[0]))
bw.writeBits(max_bits, uint64(symbols[1]))
bw.writeBits(max_bits, uint64(symbols[2]))
} else {
bw.writeBits(max_bits, uint64(symbols[0]))
bw.writeBits(max_bits, uint64(symbols[1]))
bw.writeBits(max_bits, uint64(symbols[2]))
bw.writeBits(max_bits, uint64(symbols[3]))
/* tree-select */
bw.writeSingleBit(depth[symbols[0]] == 1)
}
} else {
var previous_value byte = 8
var i uint
/* Complex Huffman Tree */
storeStaticCodeLengthCodeBW(bw)
/* Actual RLE coding. */
for i = 0; i < length; {
var value byte = depth[i]
var reps uint = 1
var k uint
for k = i + 1; k < length && depth[k] == value; k++ {
reps++
}
i += reps
if value == 0 {
bw.writeBits(uint(kZeroRepsDepth[reps]), kZeroRepsBits[reps])
} else {
if previous_value != value {
bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value]))
reps--
}
if reps < 3 {
for reps != 0 {
reps--
bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value]))
}
} else {
reps -= 3
bw.writeBits(uint(kNonZeroRepsDepth[reps]), kNonZeroRepsBits[reps])
}
previous_value = value
}
}
}
}
func indexOf(v []byte, v_size uint, value byte) uint { func indexOf(v []byte, v_size uint, value byte) uint {
var i uint = 0 var i uint = 0
for ; i < v_size; i++ { for ; i < v_size; i++ {
@ -674,12 +895,15 @@ func moveToFrontTransform(v_in []uint32, v_size uint, v_out []uint32) {
} }
} }
/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of /*
Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
the run length plus extra bits (lower 9 bits is the prefix code and the rest the run length plus extra bits (lower 9 bits is the prefix code and the rest
are the extra bits). Non-zero values in v[] are shifted by are the extra bits). Non-zero values in v[] are shifted by
*max_length_prefix. Will not create prefix codes bigger than the initial *max_length_prefix. Will not create prefix codes bigger than the initial
value of *max_run_length_prefix. The prefix code of run length L is simply value of *max_run_length_prefix. The prefix code of run length L is simply
Log2Floor(L) and the number of extra bits is the same as the prefix code. */ Log2Floor(L) and the number of extra bits is the same as the prefix code.
*/
func runLengthCodeZeros(in_size uint, v []uint32, out_size *uint, max_run_length_prefix *uint32) { func runLengthCodeZeros(in_size uint, v []uint32, out_size *uint, max_run_length_prefix *uint32) {
var max_reps uint32 = 0 var max_reps uint32 = 0
var i uint var i uint
@ -799,8 +1023,11 @@ func storeBlockSwitch(code *blockSplitCode, block_len uint32, block_type byte, i
writeBits(uint(len_nextra), uint64(len_extra), storage_ix, storage) writeBits(uint(len_nextra), uint64(len_extra), storage_ix, storage)
} }
/* Builds a BlockSplitCode data structure from the block split given by the /*
vector of block types and block lengths and stores it to the bit stream. */ Builds a BlockSplitCode data structure from the block split given by the
vector of block types and block lengths and stores it to the bit stream.
*/
func buildAndStoreBlockSplitCode(types []byte, lengths []uint32, num_blocks uint, num_types uint, tree []huffmanTree, code *blockSplitCode, storage_ix *uint, storage []byte) { func buildAndStoreBlockSplitCode(types []byte, lengths []uint32, num_blocks uint, num_types uint, tree []huffmanTree, code *blockSplitCode, storage_ix *uint, storage []byte) {
var type_histo [maxBlockTypeSymbols]uint32 var type_histo [maxBlockTypeSymbols]uint32
var length_histo [numBlockLenSymbols]uint32 var length_histo [numBlockLenSymbols]uint32
@ -919,14 +1146,20 @@ func cleanupBlockEncoder(self *blockEncoder) {
blockEncoderPool.Put(self) blockEncoderPool.Put(self)
} }
/* Creates entropy codes of block lengths and block types and stores them /*
to the bit stream. */ Creates entropy codes of block lengths and block types and stores them
to the bit stream.
*/
func buildAndStoreBlockSwitchEntropyCodes(self *blockEncoder, tree []huffmanTree, storage_ix *uint, storage []byte) { func buildAndStoreBlockSwitchEntropyCodes(self *blockEncoder, tree []huffmanTree, storage_ix *uint, storage []byte) {
buildAndStoreBlockSplitCode(self.block_types_, self.block_lengths_, self.num_blocks_, self.num_block_types_, tree, &self.block_split_code_, storage_ix, storage) buildAndStoreBlockSplitCode(self.block_types_, self.block_lengths_, self.num_blocks_, self.num_block_types_, tree, &self.block_split_code_, storage_ix, storage)
} }
/* Stores the next symbol with the entropy code of the current block type. /*
Updates the block type and block length at block boundaries. */ Stores the next symbol with the entropy code of the current block type.
Updates the block type and block length at block boundaries.
*/
func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []byte) { func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []byte) {
if self.block_len_ == 0 { if self.block_len_ == 0 {
self.block_ix_++ self.block_ix_++
@ -945,9 +1178,12 @@ func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []by
} }
} }
/* Stores the next symbol with the entropy code of the current block type and /*
Stores the next symbol with the entropy code of the current block type and
context value. context value.
Updates the block type and block length at block boundaries. */ Updates the block type and block length at block boundaries.
*/
func storeSymbolWithContext(self *blockEncoder, symbol uint, context uint, context_map []uint32, storage_ix *uint, storage []byte, context_bits uint) { func storeSymbolWithContext(self *blockEncoder, symbol uint, context uint, context_map []uint32, storage_ix *uint, storage []byte, context_bits uint) {
if self.block_len_ == 0 { if self.block_len_ == 0 {
self.block_ix_++ self.block_ix_++
@ -1268,8 +1504,11 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is
} }
} }
/* This is for storing uncompressed blocks (simple raw storage of /*
bytes-as-bytes). */ This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes).
*/
func storeUncompressedMetaBlock(is_final_block bool, input []byte, position uint, mask uint, len uint, storage_ix *uint, storage []byte) { func storeUncompressedMetaBlock(is_final_block bool, input []byte, position uint, mask uint, len uint, storage_ix *uint, storage []byte) {
var masked_pos uint = position & mask var masked_pos uint = position & mask
storeUncompressedMetaBlockHeader(uint(len), storage_ix, storage) storeUncompressedMetaBlockHeader(uint(len), storage_ix, storage)

View File

@ -16,6 +16,8 @@ import (
"os" "os"
"testing" "testing"
"time" "time"
"github.com/andybalholm/brotli/matchfinder"
) )
func checkCompressedData(compressedData, wantOriginalData []byte) error { func checkCompressedData(compressedData, wantOriginalData []byte) error {
@ -595,3 +597,63 @@ func BenchmarkDecodeLevels(b *testing.B) {
}) })
} }
} }
func test(t *testing.T, filename string, m matchfinder.MatchFinder, blockSize int) {
data, err := ioutil.ReadFile(filename)
if err != nil {
t.Fatal(err)
}
b := new(bytes.Buffer)
w := &matchfinder.Writer{
Dest: b,
MatchFinder: m,
Encoder: &Encoder{},
BlockSize: blockSize,
}
w.Write(data)
w.Close()
compressed := b.Bytes()
sr := NewReader(bytes.NewReader(compressed))
decompressed, err := ioutil.ReadAll(sr)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(decompressed, data) {
t.Fatal("decompressed output doesn't match")
}
}
func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSize int) {
b.StopTimer()
b.ReportAllocs()
data, err := ioutil.ReadFile(filename)
if err != nil {
b.Fatal(err)
}
b.SetBytes(int64(len(data)))
buf := new(bytes.Buffer)
w := &matchfinder.Writer{
Dest: buf,
MatchFinder: m,
Encoder: &Encoder{},
BlockSize: blockSize,
}
w.Write(data)
w.Close()
b.ReportMetric(float64(len(data))/float64(buf.Len()), "ratio")
b.StartTimer()
for i := 0; i < b.N; i++ {
w.Reset(ioutil.Discard)
w.Write(data)
w.Close()
}
}
func TestEncodeM4(t *testing.T) {
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18}, 1<<16)
}
func BenchmarkEncodeM4(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
}

View File

@ -39,8 +39,11 @@ func isMatch1(p1 []byte, p2 []byte, length uint) bool {
return p1[4] == p2[4] && p1[5] == p2[5] return p1[4] == p2[4] && p1[5] == p2[5]
} }
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and /*
"bits" based on "histogram" and stores it into the bit stream. */ Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream.
*/
func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) { func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
var tree [129]huffmanTree var tree [129]huffmanTree
var cmd_depth = [numCommandSymbols]byte{0} var cmd_depth = [numCommandSymbols]byte{0}
@ -216,6 +219,25 @@ func storeMetaBlockHeader(len uint, is_uncompressed bool, storage_ix *uint, stor
writeSingleBit(is_uncompressed, storage_ix, storage) writeSingleBit(is_uncompressed, storage_ix, storage)
} }
func storeMetaBlockHeaderBW(len uint, is_uncompressed bool, bw *bitWriter) {
var nibbles uint = 6
/* ISLAST */
bw.writeBits(1, 0)
if len <= 1<<16 {
nibbles = 4
} else if len <= 1<<20 {
nibbles = 5
}
bw.writeBits(2, uint64(nibbles)-4)
bw.writeBits(nibbles*4, uint64(len)-1)
/* ISUNCOMPRESSED */
bw.writeSingleBit(is_uncompressed)
}
func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) { func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) {
var ip int = 0 var ip int = 0
var shift uint = 64 - table_bits var shift uint = 64 - table_bits
@ -710,7 +732,9 @@ func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, co
} }
} }
/* Compresses "input" string to the "*storage" buffer as one or more complete /*
Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position. meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block. If "is_last" is 1, emits an additional empty last meta-block.
@ -722,7 +746,8 @@ func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, co
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is a power of two REQUIRES: "table_size" is a power of two
OUTPUT: maximal copy distance <= |input_size| OUTPUT: maximal copy distance <= |input_size|
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */ OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18)
*/
func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) { func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) {
var initial_storage_ix uint = *storage_ix var initial_storage_ix uint = *storage_ix
var table_bits uint = uint(log2FloorNonZero(table_size)) var table_bits uint = uint(log2FloorNonZero(table_size))

168
encoder.go Normal file
View File

@ -0,0 +1,168 @@
package brotli
import "github.com/andybalholm/brotli/matchfinder"
// An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
type Encoder struct {
wroteHeader bool
bw bitWriter
distCache []distanceCode
}
func (e *Encoder) Reset() {
e.wroteHeader = false
e.bw = bitWriter{}
}
func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
e.bw.dst = dst
if !e.wroteHeader {
e.bw.writeBits(4, 15)
e.wroteHeader = true
}
var literalHisto [256]uint32
var commandHisto [704]uint32
var distanceHisto [64]uint32
literalCount := 0
commandCount := 0
distanceCount := 0
if len(e.distCache) < len(matches) {
e.distCache = make([]distanceCode, len(matches))
}
// first pass: build the histograms
pos := 0
// d is the ring buffer of the last 4 distances.
d := [4]int{-10, -10, -10, -10}
for i, m := range matches {
if m.Unmatched > 0 {
for _, c := range src[pos : pos+m.Unmatched] {
literalHisto[c]++
}
literalCount += m.Unmatched
}
insertCode := getInsertLengthCode(uint(m.Unmatched))
copyCode := getCopyLengthCode(uint(m.Length))
if m.Length == 0 {
// If the stream ends with unmatched bytes, we need a dummy copy length.
copyCode = 2
}
command := combineLengthCodes(insertCode, copyCode, false)
commandHisto[command]++
commandCount++
if command >= 128 && m.Length != 0 {
var distCode distanceCode
switch m.Distance {
case d[3]:
distCode.code = 0
case d[2]:
distCode.code = 1
case d[1]:
distCode.code = 2
case d[0]:
distCode.code = 3
case d[3] - 1:
distCode.code = 4
case d[3] + 1:
distCode.code = 5
case d[3] - 2:
distCode.code = 6
case d[3] + 2:
distCode.code = 7
case d[3] - 3:
distCode.code = 8
case d[3] + 3:
distCode.code = 9
// In my testing, codes 1015 actually reduced the compression ratio.
default:
distCode = getDistanceCode(m.Distance)
}
e.distCache[i] = distCode
distanceHisto[distCode.code]++
distanceCount++
if distCode.code != 0 {
d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
}
}
pos += m.Unmatched + m.Length
}
storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
e.bw.writeBits(13, 0)
var literalDepths [256]byte
var literalBits [256]uint16
buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
var commandDepths [704]byte
var commandBits [704]uint16
buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
var distanceDepths [64]byte
var distanceBits [64]uint16
buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
pos = 0
for i, m := range matches {
insertCode := getInsertLengthCode(uint(m.Unmatched))
copyCode := getCopyLengthCode(uint(m.Length))
if m.Length == 0 {
// If the stream ends with unmatched bytes, we need a dummy copy length.
copyCode = 2
}
command := combineLengthCodes(insertCode, copyCode, false)
e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
if kInsExtra[insertCode] > 0 {
e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
}
if kCopyExtra[copyCode] > 0 {
e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
}
if m.Unmatched > 0 {
for _, c := range src[pos : pos+m.Unmatched] {
e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
}
}
if command >= 128 && m.Length != 0 {
distCode := e.distCache[i]
e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
if distCode.nExtra > 0 {
e.bw.writeBits(distCode.nExtra, distCode.extraBits)
}
}
pos += m.Unmatched + m.Length
}
if lastBlock {
e.bw.writeBits(2, 3) // islast + isempty
e.bw.jumpToByteBoundary()
}
return e.bw.dst
}
type distanceCode struct {
code int
nExtra uint
extraBits uint64
}
func getDistanceCode(distance int) distanceCode {
d := distance + 3
nbits := log2FloorNonZero(uint(d)) - 1
prefix := (d >> nbits) & 1
offset := (2 + prefix) << nbits
distcode := int(2*(nbits-1)) + prefix + 16
extra := d - offset
return distanceCode{distcode, uint(nbits), uint64(extra)}
}

View File

@ -782,6 +782,11 @@ func storeStaticCodeLengthCode(storage_ix *uint, storage []byte) {
writeBits(40, 0x0000FF55555554, storage_ix, storage) writeBits(40, 0x0000FF55555554, storage_ix, storage)
} }
func storeStaticCodeLengthCodeBW(bw *bitWriter) {
bw.writeBits(32, 0x55555554)
bw.writeBits(8, 0xFF)
}
var kZeroRepsBits = [numCommandSymbols]uint64{ var kZeroRepsBits = [numCommandSymbols]uint64{
0x00000000, 0x00000000,
0x00000000, 0x00000000,

2
go.mod
View File

@ -1,5 +1,5 @@
module github.com/andybalholm/brotli module github.com/andybalholm/brotli
go 1.12 go 1.13
retract v1.0.1 // occasional panics and data corruption retract v1.0.1 // occasional panics and data corruption

270
matchfinder/m4.go Normal file
View File

@ -0,0 +1,270 @@
package matchfinder
import (
"encoding/binary"
"math/bits"
"runtime"
)
const (
ssapBits = 17
ssapMask = (1 << ssapBits) - 1
)
// M4 is an implementation of the MatchFinder
// interface that uses a simple hash table to find matches,
// but the advanced parsing technique from
// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html,
// except that it looks for matches at every input position.
type M4 struct {
// MaxDistance is the maximum distance (in bytes) to look back for
// a match. The default is 65535.
MaxDistance int
// MinLength is the length of the shortest match to return.
// The default is 4.
MinLength int
// HashLen is the number of bytes to use to calculate the hashes.
// The maximum is 8 and the default is 6.
HashLen int
table [1 << ssapBits]uint32
history []byte
}
func (q *M4) Reset() {
q.table = [1 << ssapBits]uint32{}
q.history = q.history[:0]
}
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
if q.MaxDistance == 0 {
q.MaxDistance = 65535
}
if q.MinLength == 0 {
q.MinLength = 4
}
if q.HashLen == 0 {
q.HashLen = 6
}
var nextEmit int
if len(q.history) > q.MaxDistance*2 {
// Trim down the history buffer.
delta := len(q.history) - q.MaxDistance
copy(q.history, q.history[delta:])
q.history = q.history[:q.MaxDistance]
for i, v := range q.table {
newV := int(v) - delta
if newV < 0 {
newV = 0
}
q.table[i] = uint32(newV)
}
}
// Append src to the history buffer.
nextEmit = len(q.history)
q.history = append(q.history, src...)
src = q.history
// matches stores the matches that have been found but not emitted,
// in reverse order. (matches[0] is the most recent one.)
var matches [3]absoluteMatch
for i := nextEmit; i < len(src)-7; i++ {
if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
// We have found some matches, and we're far enough along that we probably
// won't find overlapping matches, so we might as well emit them.
if matches[1] != (absoluteMatch{}) {
if matches[1].End > matches[0].Start {
matches[1].End = matches[0].Start
}
if matches[1].End-matches[1].Start >= q.MinLength {
dst = append(dst, Match{
Unmatched: matches[1].Start - nextEmit,
Length: matches[1].End - matches[1].Start,
Distance: matches[1].Start - matches[1].Match,
})
nextEmit = matches[1].End
}
}
dst = append(dst, Match{
Unmatched: matches[0].Start - nextEmit,
Length: matches[0].End - matches[0].Start,
Distance: matches[0].Start - matches[0].Match,
})
nextEmit = matches[0].End
matches = [3]absoluteMatch{}
}
// Now look for a match.
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - ssapBits)
candidate := int(q.table[h&ssapMask])
q.table[h&ssapMask] = uint32(i)
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
continue
}
if binary.LittleEndian.Uint32(src[candidate:]) != binary.LittleEndian.Uint32(src[i:]) {
continue
}
// We have a 4-byte match now.
start := i
match := candidate
end := extendMatch(src, match+4, start+4)
for start > nextEmit && match > 0 && src[start-1] == src[match-1] {
start--
match--
}
if end-start <= matches[0].End-matches[0].Start {
continue
}
matches = [3]absoluteMatch{
absoluteMatch{
Start: start,
End: end,
Match: match,
},
matches[0],
matches[1],
}
if matches[2] == (absoluteMatch{}) {
continue
}
// We have three matches, so it's time to emit one and/or eliminate one.
switch {
case matches[0].Start < matches[2].End:
// The first and third matches overlap; discard the one in between.
matches = [3]absoluteMatch{
matches[0],
matches[2],
absoluteMatch{},
}
case matches[0].Start < matches[2].End+q.MinLength:
// The first and third matches don't overlap, but there's no room for
// another match between them. Emit the first match and discard the second.
dst = append(dst, Match{
Unmatched: matches[2].Start - nextEmit,
Length: matches[2].End - matches[2].Start,
Distance: matches[2].Start - matches[2].Match,
})
nextEmit = matches[2].End
matches = [3]absoluteMatch{
matches[0],
absoluteMatch{},
absoluteMatch{},
}
default:
// Emit the first match, shortening it if necessary to avoid overlap with the second.
if matches[2].End > matches[1].Start {
matches[2].End = matches[1].Start
}
if matches[2].End-matches[2].Start >= q.MinLength {
dst = append(dst, Match{
Unmatched: matches[2].Start - nextEmit,
Length: matches[2].End - matches[2].Start,
Distance: matches[2].Start - matches[2].Match,
})
nextEmit = matches[2].End
}
matches[2] = absoluteMatch{}
}
}
// We've found all the matches now; emit the remaining ones.
if matches[1] != (absoluteMatch{}) {
if matches[1].End > matches[0].Start {
matches[1].End = matches[0].Start
}
if matches[1].End-matches[1].Start >= q.MinLength {
dst = append(dst, Match{
Unmatched: matches[1].Start - nextEmit,
Length: matches[1].End - matches[1].Start,
Distance: matches[1].Start - matches[1].Match,
})
nextEmit = matches[1].End
}
}
if matches[0] != (absoluteMatch{}) {
dst = append(dst, Match{
Unmatched: matches[0].Start - nextEmit,
Length: matches[0].End - matches[0].Start,
Distance: matches[0].Start - matches[0].Match,
})
nextEmit = matches[0].End
}
if nextEmit < len(src) {
dst = append(dst, Match{
Unmatched: len(src) - nextEmit,
})
}
return dst
}
const hashMul64 = 0x1E35A7BD1E35A7BD
// An absoluteMatch is like a Match, but it stores indexes into the byte
// stream instead of lengths.
type absoluteMatch struct {
// Start is the index of the first byte.
Start int
// End is the index of the byte after the last byte
// (so that End - Start = Length).
End int
// Match is the index of the previous data that matches
// (Start - Match = Distance).
Match int
}
// extendMatch returns the largest k such that k <= len(src) and that
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
//
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
switch runtime.GOARCH {
case "amd64":
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
for j+8 < len(src) {
iBytes := binary.LittleEndian.Uint64(src[i:])
jBytes := binary.LittleEndian.Uint64(src[j:])
if iBytes != jBytes {
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
return j + bits.TrailingZeros64(iBytes^jBytes)>>3
}
i, j = i+8, j+8
}
case "386":
// On a 32-bit CPU, we do it 4 bytes at a time.
for j+4 < len(src) {
iBytes := binary.LittleEndian.Uint32(src[i:])
jBytes := binary.LittleEndian.Uint32(src[j:])
if iBytes != jBytes {
return j + bits.TrailingZeros32(iBytes^jBytes)>>3
}
i, j = i+4, j+4
}
}
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
}
return j
}

103
matchfinder/matchfinder.go Normal file
View File

@ -0,0 +1,103 @@
// The matchfinder package defines reusable components for data compression.
//
// Many compression libraries have two main parts:
// - Something that looks for repeated sequences of bytes
// - An encoder for the compressed data format (often an entropy coder)
//
// Although these are logically two separate steps, the implementations are
// usually closely tied together. You can't use flate's matcher with snappy's
// encoder, for example. This package defines interfaces and an intermediate
// representation to allow mixing and matching compression components.
package matchfinder
import "io"
// A Match is the basic unit of LZ77 compression.
type Match struct {
Unmatched int // the number of unmatched bytes since the previous match
Length int // the number of bytes in the matched string; it may be 0 at the end of the input
Distance int // how far back in the stream to copy from
}
// A MatchFinder performs the LZ77 stage of compression, looking for matches.
type MatchFinder interface {
// FindMatches looks for matches in src, appends them to dst, and returns dst.
FindMatches(dst []Match, src []byte) []Match
// Reset clears any internal state, preparing the MatchFinder to be used with
// a new stream.
Reset()
}
// An Encoder encodes the data in its final format.
type Encoder interface {
// Encode appends the encoded format of src to dst, using the match
// information from matches.
Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
// Reset clears any internal state, preparing the Encoder to be used with
// a new stream.
Reset()
}
// A Writer uses MatchFinder and Encoder to write compressed data to Dest.
type Writer struct {
Dest io.Writer
MatchFinder MatchFinder
Encoder Encoder
// BlockSize is the number of bytes to compress at a time. If it is zero,
// each Write operation will be treated as one block.
BlockSize int
err error
inBuf []byte
outBuf []byte
matches []Match
}
func (w *Writer) Write(p []byte) (n int, err error) {
if w.err != nil {
return 0, w.err
}
if w.BlockSize == 0 {
return w.writeBlock(p, false)
}
w.inBuf = append(w.inBuf, p...)
var pos int
for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
}
if pos > 0 {
n := copy(w.inBuf, w.inBuf[pos:])
w.inBuf = w.inBuf[:n]
}
return len(p), w.err
}
func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
w.outBuf = w.outBuf[:0]
w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
_, w.err = w.Dest.Write(w.outBuf)
return len(p), w.err
}
func (w *Writer) Close() error {
w.writeBlock(w.inBuf, true)
w.inBuf = w.inBuf[:0]
return w.err
}
func (w *Writer) Reset(newDest io.Writer) {
w.MatchFinder.Reset()
w.Encoder.Reset()
w.err = nil
w.inBuf = w.inBuf[:0]
w.outBuf = w.outBuf[:0]
w.matches = w.matches[:0]
w.Dest = newDest
}

View File

@ -0,0 +1,53 @@
package matchfinder
import "fmt"
// A TextEncoder is an Encoder that produces a human-readable representation of
// the LZ77 compression. Matches are replaced with <Length,Distance> symbols.
type TextEncoder struct{}
func (t TextEncoder) Reset() {}
func (t TextEncoder) Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte {
pos := 0
for _, m := range matches {
if m.Unmatched > 0 {
dst = append(dst, src[pos:pos+m.Unmatched]...)
pos += m.Unmatched
}
if m.Length > 0 {
dst = append(dst, []byte(fmt.Sprintf("<%d,%d>", m.Length, m.Distance))...)
pos += m.Length
}
}
if pos < len(src) {
dst = append(dst, src[pos:]...)
}
return dst
}
// A NoMatchFinder implements MatchFinder, but doesn't find any matches.
// It can be used to implement the equivalent of the standard library flate package's
// HuffmanOnly setting.
type NoMatchFinder struct{}
func (n NoMatchFinder) Reset() {}
func (n NoMatchFinder) FindMatches(dst []Match, src []byte) []Match {
return append(dst, Match{
Unmatched: len(src),
})
}
// AutoReset wraps a MatchFinder that can return references to data in previous
// blocks, and calls Reset before each block. It is useful for (e.g.) using a
// snappy Encoder with a MatchFinder designed for flate. (Snappy doesn't
// support references between blocks.)
type AutoReset struct {
MatchFinder
}
func (a AutoReset) FindMatches(dst []Match, src []byte) []Match {
a.Reset()
return a.MatchFinder.FindMatches(dst, src)
}