Add matchfinder package.

I've been experimenting for a while with a new brotli compressor. Instead of being a translation of the C implementation, it's a rewrite in Go, with a modular structure thanks to interfaces. (A few low-level functions still come from the C version, though.) The performance is getting to the point where it seems to be worth adding to the brotli repository.
2023-12-28 16:09:32 -08:00 · 2023-12-28 16:09:32 -08:00 · 349ed2fce1
parent b7a4cf9ec5
commit 349ed2fce1
10 changed files with 1031 additions and 50 deletions
--- a/bitwriter.go
+++ b/bitwriter.go
@ -0,0 +1,56 @@
 package brotli
 /* Copyright 2010 Google Inc. All Rights Reserved.
   Distributed under MIT license.
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
 */
 /* Write bits into a byte array. */
 type bitWriter struct {
 	dst []byte
 	// Data waiting to be written is the low nbits of bits.
 	bits  uint64
 	nbits uint
 }
 func (w *bitWriter) writeBits(nb uint, b uint64) {
 	w.bits |= b << w.nbits
 	w.nbits += nb
 	if w.nbits >= 32 {
 		bits := w.bits
 		w.bits >>= 32
 		w.nbits -= 32
 		w.dst = append(w.dst,
 			byte(bits),
 			byte(bits>>8),
 			byte(bits>>16),
 			byte(bits>>24),
 		)
 	}
 }
 func (w *bitWriter) writeSingleBit(bit bool) {
 	if bit {
 		w.writeBits(1, 1)
 	} else {
 		w.writeBits(1, 0)
 	}
 }
 func (w *bitWriter) jumpToByteBoundary() {
 	dst := w.dst
 	for w.nbits != 0 {
 		dst = append(dst, byte(w.bits))
 		w.bits >>= 8
 		if w.nbits > 8 { // Avoid underflow
 			w.nbits -= 8
 		} else {
 			w.nbits = 0
 		}
 	}
 	w.bits = 0
 	w.dst = dst
 }
--- a/brotli_bit_stream.go
+++ b/brotli_bit_stream.go
@ -7,12 +7,18 @@ import (
 const maxHuffmanTreeSize = (2*numCommandSymbols + 1)
-/* The maximum size of Huffman dictionary for distances assuming that
+/*
-   NPOSTFIX = 0 and NDIRECT = 0. */
+The maximum size of Huffman dictionary for distances assuming that
 	NPOSTFIX = 0 and NDIRECT = 0.
 */
 const maxSimpleDistanceAlphabetSize = 140
-/* Represents the range of values belonging to a prefix code:
+/*
-   [offset, offset + 2^nbits) */
+Represents the range of values belonging to a prefix code:
 	[offset, offset + 2^nbits)
 */
 type prefixCodeRange struct {
 	offset uint32
 	nbits  uint32
@ -96,9 +102,12 @@ func nextBlockTypeCode(calculator *blockTypeCodeCalculator, type_ byte) uint {
 	return type_code
 }
-/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
+/*
 |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
 	REQUIRES: length > 0
-   REQUIRES: length <= (1 << 24) */
+	REQUIRES: length <= (1 << 24)
 */
 func encodeMlen(length uint, bits *uint64, numbits *uint, nibblesbits *uint64) {
 	var lg uint
 	if length == 1 {
@ -132,8 +141,11 @@ func storeCommandExtra(cmd *command, storage_ix *uint, storage []byte) {
 	writeBits(uint(insnumextra+getCopyExtra(copycode)), bits, storage_ix, storage)
 }
-/* Data structure that stores almost everything that is needed to encode each
+/*
-   block switch command. */
+Data structure that stores almost everything that is needed to encode each
 	block switch command.
 */
 type blockSplitCode struct {
 	type_code_calculator blockTypeCodeCalculator
 	type_depths          [maxBlockTypeSymbols]byte
@ -154,9 +166,12 @@ func storeVarLenUint8(n uint, storage_ix *uint, storage []byte) {
 	}
 }
-/* Stores the compressed meta-block header.
+/*
 Stores the compressed meta-block header.
 	REQUIRES: length > 0
-   REQUIRES: length <= (1 << 24) */
+	REQUIRES: length <= (1 << 24)
 */
 func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix *uint, storage []byte) {
 	var lenbits uint64
 	var nlenbits uint
@ -186,9 +201,12 @@ func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix
 	}
 }
-/* Stores the uncompressed meta-block header.
+/*
 Stores the uncompressed meta-block header.
 	REQUIRES: length > 0
-   REQUIRES: length <= (1 << 24) */
+	REQUIRES: length <= (1 << 24)
 */
 func storeUncompressedMetaBlockHeader(length uint, storage_ix *uint, storage []byte) {
 	var lenbits uint64
 	var nlenbits uint
@ -312,8 +330,11 @@ func storeSimpleHuffmanTree(depths []byte, symbols []uint, num_symbols uint, max
 	}
 }
-/* num = alphabet size
+/*
-   depths = symbol depths */
+num = alphabet size
 	depths = symbol depths
 */
 func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *uint, storage []byte) {
 	var huffman_tree [numCommandSymbols]byte
 	var huffman_tree_extra_bits [numCommandSymbols]byte
@ -367,8 +388,11 @@ func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *u
 	storeHuffmanTreeToBitMask(huffman_tree_size, huffman_tree[:], huffman_tree_extra_bits[:], code_length_bitdepth[:], code_length_bitdepth_symbols[:], storage_ix, storage)
 }
-/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
+/*
-   bits[0:length] and stores the encoded tree to the bit stream. */
+Builds a Huffman tree from histogram[0:length] into depth[0:length] and
 	bits[0:length] and stores the encoded tree to the bit stream.
 */
 func buildAndStoreHuffmanTree(histogram []uint32, histogram_length uint, alphabet_size uint, tree []huffmanTree, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
 	var count uint = 0
 	var s4 = [4]uint{0}
@ -623,6 +647,203 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_
 	}
 }
 func buildAndStoreHuffmanTreeFastBW(histogram []uint32, histogram_total uint, max_bits uint, depth []byte, bits []uint16, bw *bitWriter) {
 	var count uint = 0
 	var symbols = [4]uint{0}
 	var length uint = 0
 	var total uint = histogram_total
 	for total != 0 {
 		if histogram[length] != 0 {
 			if count < 4 {
 				symbols[count] = length
 			}
 			count++
 			total -= uint(histogram[length])
 		}
 		length++
 	}
 	if count <= 1 {
 		bw.writeBits(4, 1)
 		bw.writeBits(max_bits, uint64(symbols[0]))
 		depth[symbols[0]] = 0
 		bits[symbols[0]] = 0
 		return
 	}
 	for i := 0; i < int(length); i++ {
 		depth[i] = 0
 	}
 	{
 		var max_tree_size uint = 2*length + 1
 		tree, _ := huffmanTreePool.Get().(*[]huffmanTree)
 		if tree == nil || cap(*tree) < int(max_tree_size) {
 			tmp := make([]huffmanTree, max_tree_size)
 			tree = &tmp
 		} else {
 			*tree = (*tree)[:max_tree_size]
 		}
 		var count_limit uint32
 		for count_limit = 1; ; count_limit *= 2 {
 			var node int = 0
 			var l uint
 			for l = length; l != 0; {
 				l--
 				if histogram[l] != 0 {
 					if histogram[l] >= count_limit {
 						initHuffmanTree(&(*tree)[node:][0], histogram[l], -1, int16(l))
 					} else {
 						initHuffmanTree(&(*tree)[node:][0], count_limit, -1, int16(l))
 					}
 					node++
 				}
 			}
 			{
 				var n int = node
 				/* Points to the next leaf node. */ /* Points to the next non-leaf node. */
 				var sentinel huffmanTree
 				var i int = 0
 				var j int = n + 1
 				var k int
 				sortHuffmanTreeItems(*tree, uint(n), huffmanTreeComparator(sortHuffmanTree1))
 				/* The nodes are:
 				   [0, n): the sorted leaf nodes that we start with.
 				   [n]: we add a sentinel here.
 				   [n + 1, 2n): new parent nodes are added here, starting from
 				                (n+1). These are naturally in ascending order.
 				   [2n]: we add a sentinel at the end as well.
 				   There will be (2n+1) elements at the end. */
 				initHuffmanTree(&sentinel, math.MaxUint32, -1, -1)
 				(*tree)[node] = sentinel
 				node++
 				(*tree)[node] = sentinel
 				node++
 				for k = n - 1; k > 0; k-- {
 					var left int
 					var right int
 					if (*tree)[i].total_count_ <= (*tree)[j].total_count_ {
 						left = i
 						i++
 					} else {
 						left = j
 						j++
 					}
 					if (*tree)[i].total_count_ <= (*tree)[j].total_count_ {
 						right = i
 						i++
 					} else {
 						right = j
 						j++
 					}
 					/* The sentinel node becomes the parent node. */
 					(*tree)[node-1].total_count_ = (*tree)[left].total_count_ + (*tree)[right].total_count_
 					(*tree)[node-1].index_left_ = int16(left)
 					(*tree)[node-1].index_right_or_value_ = int16(right)
 					/* Add back the last sentinel node. */
 					(*tree)[node] = sentinel
 					node++
 				}
 				if setDepth(2*n-1, *tree, depth, 14) {
 					/* We need to pack the Huffman tree in 14 bits. If this was not
 					   successful, add fake entities to the lowest values and retry. */
 					break
 				}
 			}
 		}
 		huffmanTreePool.Put(tree)
 	}
 	convertBitDepthsToSymbols(depth, length, bits)
 	if count <= 4 {
 		var i uint
 		/* value of 1 indicates a simple Huffman code */
 		bw.writeBits(2, 1)
 		bw.writeBits(2, uint64(count)-1) /* NSYM - 1 */
 		/* Sort */
 		for i = 0; i < count; i++ {
 			var j uint
 			for j = i + 1; j < count; j++ {
 				if depth[symbols[j]] < depth[symbols[i]] {
 					var tmp uint = symbols[j]
 					symbols[j] = symbols[i]
 					symbols[i] = tmp
 				}
 			}
 		}
 		if count == 2 {
 			bw.writeBits(max_bits, uint64(symbols[0]))
 			bw.writeBits(max_bits, uint64(symbols[1]))
 		} else if count == 3 {
 			bw.writeBits(max_bits, uint64(symbols[0]))
 			bw.writeBits(max_bits, uint64(symbols[1]))
 			bw.writeBits(max_bits, uint64(symbols[2]))
 		} else {
 			bw.writeBits(max_bits, uint64(symbols[0]))
 			bw.writeBits(max_bits, uint64(symbols[1]))
 			bw.writeBits(max_bits, uint64(symbols[2]))
 			bw.writeBits(max_bits, uint64(symbols[3]))
 			/* tree-select */
 			bw.writeSingleBit(depth[symbols[0]] == 1)
 		}
 	} else {
 		var previous_value byte = 8
 		var i uint
 		/* Complex Huffman Tree */
 		storeStaticCodeLengthCodeBW(bw)
 		/* Actual RLE coding. */
 		for i = 0; i < length; {
 			var value byte = depth[i]
 			var reps uint = 1
 			var k uint
 			for k = i + 1; k < length && depth[k] == value; k++ {
 				reps++
 			}
 			i += reps
 			if value == 0 {
 				bw.writeBits(uint(kZeroRepsDepth[reps]), kZeroRepsBits[reps])
 			} else {
 				if previous_value != value {
 					bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value]))
 					reps--
 				}
 				if reps < 3 {
 					for reps != 0 {
 						reps--
 						bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value]))
 					}
 				} else {
 					reps -= 3
 					bw.writeBits(uint(kNonZeroRepsDepth[reps]), kNonZeroRepsBits[reps])
 				}
 				previous_value = value
 			}
 		}
 	}
 }
 func indexOf(v []byte, v_size uint, value byte) uint {
 	var i uint = 0
 	for ; i < v_size; i++ {
@ -674,12 +895,15 @@ func moveToFrontTransform(v_in []uint32, v_size uint, v_out []uint32) {
 	}
 }
-/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
+/*
 Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
 	the run length plus extra bits (lower 9 bits is the prefix code and the rest
 	are the extra bits). Non-zero values in v[] are shifted by
 	*max_length_prefix. Will not create prefix codes bigger than the initial
 	value of *max_run_length_prefix. The prefix code of run length L is simply
-   Log2Floor(L) and the number of extra bits is the same as the prefix code. */
+	Log2Floor(L) and the number of extra bits is the same as the prefix code.
 */
 func runLengthCodeZeros(in_size uint, v []uint32, out_size *uint, max_run_length_prefix *uint32) {
 	var max_reps uint32 = 0
 	var i uint
@ -799,8 +1023,11 @@ func storeBlockSwitch(code *blockSplitCode, block_len uint32, block_type byte, i
 	writeBits(uint(len_nextra), uint64(len_extra), storage_ix, storage)
 }
-/* Builds a BlockSplitCode data structure from the block split given by the
+/*
-   vector of block types and block lengths and stores it to the bit stream. */
+Builds a BlockSplitCode data structure from the block split given by the
 	vector of block types and block lengths and stores it to the bit stream.
 */
 func buildAndStoreBlockSplitCode(types []byte, lengths []uint32, num_blocks uint, num_types uint, tree []huffmanTree, code *blockSplitCode, storage_ix *uint, storage []byte) {
 	var type_histo [maxBlockTypeSymbols]uint32
 	var length_histo [numBlockLenSymbols]uint32
@ -919,14 +1146,20 @@ func cleanupBlockEncoder(self *blockEncoder) {
 	blockEncoderPool.Put(self)
 }
-/* Creates entropy codes of block lengths and block types and stores them
+/*
-   to the bit stream. */
+Creates entropy codes of block lengths and block types and stores them
 	to the bit stream.
 */
 func buildAndStoreBlockSwitchEntropyCodes(self *blockEncoder, tree []huffmanTree, storage_ix *uint, storage []byte) {
 	buildAndStoreBlockSplitCode(self.block_types_, self.block_lengths_, self.num_blocks_, self.num_block_types_, tree, &self.block_split_code_, storage_ix, storage)
 }
-/* Stores the next symbol with the entropy code of the current block type.
+/*
-   Updates the block type and block length at block boundaries. */
+Stores the next symbol with the entropy code of the current block type.
 	Updates the block type and block length at block boundaries.
 */
 func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []byte) {
 	if self.block_len_ == 0 {
 		self.block_ix_++
@ -945,9 +1178,12 @@ func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []by
 	}
 }
-/* Stores the next symbol with the entropy code of the current block type and
+/*
 Stores the next symbol with the entropy code of the current block type and
 	context value.
-   Updates the block type and block length at block boundaries. */
+	Updates the block type and block length at block boundaries.
 */
 func storeSymbolWithContext(self *blockEncoder, symbol uint, context uint, context_map []uint32, storage_ix *uint, storage []byte, context_bits uint) {
 	if self.block_len_ == 0 {
 		self.block_ix_++
@ -1268,8 +1504,11 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is
 	}
 }
-/* This is for storing uncompressed blocks (simple raw storage of
+/*
-   bytes-as-bytes). */
+This is for storing uncompressed blocks (simple raw storage of
 	bytes-as-bytes).
 */
 func storeUncompressedMetaBlock(is_final_block bool, input []byte, position uint, mask uint, len uint, storage_ix *uint, storage []byte) {
 	var masked_pos uint = position & mask
 	storeUncompressedMetaBlockHeader(uint(len), storage_ix, storage)
--- a/brotli_test.go
+++ b/brotli_test.go
@ -16,6 +16,8 @@ import (
 	"os"
 	"testing"
 	"time"
 	"github.com/andybalholm/brotli/matchfinder"
 )
 func checkCompressedData(compressedData, wantOriginalData []byte) error {
@ -595,3 +597,63 @@ func BenchmarkDecodeLevels(b *testing.B) {
 		})
 	}
 }
 func test(t *testing.T, filename string, m matchfinder.MatchFinder, blockSize int) {
 	data, err := ioutil.ReadFile(filename)
 	if err != nil {
 		t.Fatal(err)
 	}
 	b := new(bytes.Buffer)
 	w := &matchfinder.Writer{
 		Dest:        b,
 		MatchFinder: m,
 		Encoder:     &Encoder{},
 		BlockSize:   blockSize,
 	}
 	w.Write(data)
 	w.Close()
 	compressed := b.Bytes()
 	sr := NewReader(bytes.NewReader(compressed))
 	decompressed, err := ioutil.ReadAll(sr)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if !bytes.Equal(decompressed, data) {
 		t.Fatal("decompressed output doesn't match")
 	}
 }
 func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSize int) {
 	b.StopTimer()
 	b.ReportAllocs()
 	data, err := ioutil.ReadFile(filename)
 	if err != nil {
 		b.Fatal(err)
 	}
 	b.SetBytes(int64(len(data)))
 	buf := new(bytes.Buffer)
 	w := &matchfinder.Writer{
 		Dest:        buf,
 		MatchFinder: m,
 		Encoder:     &Encoder{},
 		BlockSize:   blockSize,
 	}
 	w.Write(data)
 	w.Close()
 	b.ReportMetric(float64(len(data))/float64(buf.Len()), "ratio")
 	b.StartTimer()
 	for i := 0; i < b.N; i++ {
 		w.Reset(ioutil.Discard)
 		w.Write(data)
 		w.Close()
 	}
 }
 func TestEncodeM4(t *testing.T) {
 	test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18}, 1<<16)
 }
 func BenchmarkEncodeM4(b *testing.B) {
 	benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
 }
--- a/compress_fragment_two_pass.go
+++ b/compress_fragment_two_pass.go
@ -39,8 +39,11 @@ func isMatch1(p1 []byte, p2 []byte, length uint) bool {
 	return p1[4] == p2[4] && p1[5] == p2[5]
 }
-/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
+/*
-   "bits" based on "histogram" and stores it into the bit stream. */
+Builds a command and distance prefix code (each 64 symbols) into "depth" and
 	"bits" based on "histogram" and stores it into the bit stream.
 */
 func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
 	var tree [129]huffmanTree
 	var cmd_depth = [numCommandSymbols]byte{0}
@ -216,6 +219,25 @@ func storeMetaBlockHeader(len uint, is_uncompressed bool, storage_ix *uint, stor
 	writeSingleBit(is_uncompressed, storage_ix, storage)
 }
 func storeMetaBlockHeaderBW(len uint, is_uncompressed bool, bw *bitWriter) {
 	var nibbles uint = 6
 	/* ISLAST */
 	bw.writeBits(1, 0)
 	if len <= 1<<16 {
 		nibbles = 4
 	} else if len <= 1<<20 {
 		nibbles = 5
 	}
 	bw.writeBits(2, uint64(nibbles)-4)
 	bw.writeBits(nibbles*4, uint64(len)-1)
 	/* ISUNCOMPRESSED */
 	bw.writeSingleBit(is_uncompressed)
 }
 func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) {
 	var ip int = 0
 	var shift uint = 64 - table_bits
@ -710,7 +732,9 @@ func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, co
 	}
 }
-/* Compresses "input" string to the "*storage" buffer as one or more complete
+/*
 Compresses "input" string to the "*storage" buffer as one or more complete
 	meta-blocks, and updates the "*storage_ix" bit position.
 	If "is_last" is 1, emits an additional empty last meta-block.
@ -722,7 +746,8 @@ func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, co
 	REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
 	REQUIRES: "table_size" is a power of two
 	OUTPUT: maximal copy distance <= |input_size|
-   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
+	OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18)
 */
 func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) {
 	var initial_storage_ix uint = *storage_ix
 	var table_bits uint = uint(log2FloorNonZero(table_size))
--- a/encoder.go
+++ b/encoder.go
@ -0,0 +1,168 @@
 package brotli
 import "github.com/andybalholm/brotli/matchfinder"
 // An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
 type Encoder struct {
 	wroteHeader bool
 	bw          bitWriter
 	distCache   []distanceCode
 }
 func (e *Encoder) Reset() {
 	e.wroteHeader = false
 	e.bw = bitWriter{}
 }
 func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
 	e.bw.dst = dst
 	if !e.wroteHeader {
 		e.bw.writeBits(4, 15)
 		e.wroteHeader = true
 	}
 	var literalHisto [256]uint32
 	var commandHisto [704]uint32
 	var distanceHisto [64]uint32
 	literalCount := 0
 	commandCount := 0
 	distanceCount := 0
 	if len(e.distCache) < len(matches) {
 		e.distCache = make([]distanceCode, len(matches))
 	}
 	// first pass: build the histograms
 	pos := 0
 	// d is the ring buffer of the last 4 distances.
 	d := [4]int{-10, -10, -10, -10}
 	for i, m := range matches {
 		if m.Unmatched > 0 {
 			for _, c := range src[pos : pos+m.Unmatched] {
 				literalHisto[c]++
 			}
 			literalCount += m.Unmatched
 		}
 		insertCode := getInsertLengthCode(uint(m.Unmatched))
 		copyCode := getCopyLengthCode(uint(m.Length))
 		if m.Length == 0 {
 			// If the stream ends with unmatched bytes, we need a dummy copy length.
 			copyCode = 2
 		}
 		command := combineLengthCodes(insertCode, copyCode, false)
 		commandHisto[command]++
 		commandCount++
 		if command >= 128 && m.Length != 0 {
 			var distCode distanceCode
 			switch m.Distance {
 			case d[3]:
 				distCode.code = 0
 			case d[2]:
 				distCode.code = 1
 			case d[1]:
 				distCode.code = 2
 			case d[0]:
 				distCode.code = 3
 			case d[3] - 1:
 				distCode.code = 4
 			case d[3] + 1:
 				distCode.code = 5
 			case d[3] - 2:
 				distCode.code = 6
 			case d[3] + 2:
 				distCode.code = 7
 			case d[3] - 3:
 				distCode.code = 8
 			case d[3] + 3:
 				distCode.code = 9
 				// In my testing, codes 10–15 actually reduced the compression ratio.
 			default:
 				distCode = getDistanceCode(m.Distance)
 			}
 			e.distCache[i] = distCode
 			distanceHisto[distCode.code]++
 			distanceCount++
 			if distCode.code != 0 {
 				d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
 			}
 		}
 		pos += m.Unmatched + m.Length
 	}
 	storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
 	e.bw.writeBits(13, 0)
 	var literalDepths [256]byte
 	var literalBits [256]uint16
 	buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
 	var commandDepths [704]byte
 	var commandBits [704]uint16
 	buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
 	var distanceDepths [64]byte
 	var distanceBits [64]uint16
 	buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
 	pos = 0
 	for i, m := range matches {
 		insertCode := getInsertLengthCode(uint(m.Unmatched))
 		copyCode := getCopyLengthCode(uint(m.Length))
 		if m.Length == 0 {
 			// If the stream ends with unmatched bytes, we need a dummy copy length.
 			copyCode = 2
 		}
 		command := combineLengthCodes(insertCode, copyCode, false)
 		e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
 		if kInsExtra[insertCode] > 0 {
 			e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
 		}
 		if kCopyExtra[copyCode] > 0 {
 			e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
 		}
 		if m.Unmatched > 0 {
 			for _, c := range src[pos : pos+m.Unmatched] {
 				e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
 			}
 		}
 		if command >= 128 && m.Length != 0 {
 			distCode := e.distCache[i]
 			e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
 			if distCode.nExtra > 0 {
 				e.bw.writeBits(distCode.nExtra, distCode.extraBits)
 			}
 		}
 		pos += m.Unmatched + m.Length
 	}
 	if lastBlock {
 		e.bw.writeBits(2, 3) // islast + isempty
 		e.bw.jumpToByteBoundary()
 	}
 	return e.bw.dst
 }
 type distanceCode struct {
 	code      int
 	nExtra    uint
 	extraBits uint64
 }
 func getDistanceCode(distance int) distanceCode {
 	d := distance + 3
 	nbits := log2FloorNonZero(uint(d)) - 1
 	prefix := (d >> nbits) & 1
 	offset := (2 + prefix) << nbits
 	distcode := int(2*(nbits-1)) + prefix + 16
 	extra := d - offset
 	return distanceCode{distcode, uint(nbits), uint64(extra)}
 }
--- a/entropy_encode_static.go
+++ b/entropy_encode_static.go
@ -782,6 +782,11 @@ func storeStaticCodeLengthCode(storage_ix *uint, storage []byte) {
 	writeBits(40, 0x0000FF55555554, storage_ix, storage)
 }
 func storeStaticCodeLengthCodeBW(bw *bitWriter) {
 	bw.writeBits(32, 0x55555554)
 	bw.writeBits(8, 0xFF)
 }
 var kZeroRepsBits = [numCommandSymbols]uint64{
 	0x00000000,
 	0x00000000,
--- a/go.mod
+++ b/go.mod
@ -1,5 +1,5 @@
 module github.com/andybalholm/brotli
-go 1.12
+go 1.13
 retract v1.0.1 // occasional panics and data corruption
--- a/matchfinder/m4.go
+++ b/matchfinder/m4.go
@ -0,0 +1,270 @@
 package matchfinder
 import (
 	"encoding/binary"
 	"math/bits"
 	"runtime"
 )
 const (
 	ssapBits = 17
 	ssapMask = (1 << ssapBits) - 1
 )
 // M4 is an implementation of the MatchFinder
 // interface that uses a simple hash table to find matches,
 // but the advanced parsing technique from
 // https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html,
 // except that it looks for matches at every input position.
 type M4 struct {
 	// MaxDistance is the maximum distance (in bytes) to look back for
 	// a match. The default is 65535.
 	MaxDistance int
 	// MinLength is the length of the shortest match to return.
 	// The default is 4.
 	MinLength int
 	// HashLen is the number of bytes to use to calculate the hashes.
 	// The maximum is 8 and the default is 6.
 	HashLen int
 	table [1 << ssapBits]uint32
 	history []byte
 }
 func (q *M4) Reset() {
 	q.table = [1 << ssapBits]uint32{}
 	q.history = q.history[:0]
 }
 func (q *M4) FindMatches(dst []Match, src []byte) []Match {
 	if q.MaxDistance == 0 {
 		q.MaxDistance = 65535
 	}
 	if q.MinLength == 0 {
 		q.MinLength = 4
 	}
 	if q.HashLen == 0 {
 		q.HashLen = 6
 	}
 	var nextEmit int
 	if len(q.history) > q.MaxDistance*2 {
 		// Trim down the history buffer.
 		delta := len(q.history) - q.MaxDistance
 		copy(q.history, q.history[delta:])
 		q.history = q.history[:q.MaxDistance]
 		for i, v := range q.table {
 			newV := int(v) - delta
 			if newV < 0 {
 				newV = 0
 			}
 			q.table[i] = uint32(newV)
 		}
 	}
 	// Append src to the history buffer.
 	nextEmit = len(q.history)
 	q.history = append(q.history, src...)
 	src = q.history
 	// matches stores the matches that have been found but not emitted,
 	// in reverse order. (matches[0] is the most recent one.)
 	var matches [3]absoluteMatch
 	for i := nextEmit; i < len(src)-7; i++ {
 		if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
 			// We have found some matches, and we're far enough along that we probably
 			// won't find overlapping matches, so we might as well emit them.
 			if matches[1] != (absoluteMatch{}) {
 				if matches[1].End > matches[0].Start {
 					matches[1].End = matches[0].Start
 				}
 				if matches[1].End-matches[1].Start >= q.MinLength {
 					dst = append(dst, Match{
 						Unmatched: matches[1].Start - nextEmit,
 						Length:    matches[1].End - matches[1].Start,
 						Distance:  matches[1].Start - matches[1].Match,
 					})
 					nextEmit = matches[1].End
 				}
 			}
 			dst = append(dst, Match{
 				Unmatched: matches[0].Start - nextEmit,
 				Length:    matches[0].End - matches[0].Start,
 				Distance:  matches[0].Start - matches[0].Match,
 			})
 			nextEmit = matches[0].End
 			matches = [3]absoluteMatch{}
 		}
 		// Now look for a match.
 		h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - ssapBits)
 		candidate := int(q.table[h&ssapMask])
 		q.table[h&ssapMask] = uint32(i)
 		if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
 			continue
 		}
 		if binary.LittleEndian.Uint32(src[candidate:]) != binary.LittleEndian.Uint32(src[i:]) {
 			continue
 		}
 		// We have a 4-byte match now.
 		start := i
 		match := candidate
 		end := extendMatch(src, match+4, start+4)
 		for start > nextEmit && match > 0 && src[start-1] == src[match-1] {
 			start--
 			match--
 		}
 		if end-start <= matches[0].End-matches[0].Start {
 			continue
 		}
 		matches = [3]absoluteMatch{
 			absoluteMatch{
 				Start: start,
 				End:   end,
 				Match: match,
 			},
 			matches[0],
 			matches[1],
 		}
 		if matches[2] == (absoluteMatch{}) {
 			continue
 		}
 		// We have three matches, so it's time to emit one and/or eliminate one.
 		switch {
 		case matches[0].Start < matches[2].End:
 			// The first and third matches overlap; discard the one in between.
 			matches = [3]absoluteMatch{
 				matches[0],
 				matches[2],
 				absoluteMatch{},
 			}
 		case matches[0].Start < matches[2].End+q.MinLength:
 			// The first and third matches don't overlap, but there's no room for
 			// another match between them. Emit the first match and discard the second.
 			dst = append(dst, Match{
 				Unmatched: matches[2].Start - nextEmit,
 				Length:    matches[2].End - matches[2].Start,
 				Distance:  matches[2].Start - matches[2].Match,
 			})
 			nextEmit = matches[2].End
 			matches = [3]absoluteMatch{
 				matches[0],
 				absoluteMatch{},
 				absoluteMatch{},
 			}
 		default:
 			// Emit the first match, shortening it if necessary to avoid overlap with the second.
 			if matches[2].End > matches[1].Start {
 				matches[2].End = matches[1].Start
 			}
 			if matches[2].End-matches[2].Start >= q.MinLength {
 				dst = append(dst, Match{
 					Unmatched: matches[2].Start - nextEmit,
 					Length:    matches[2].End - matches[2].Start,
 					Distance:  matches[2].Start - matches[2].Match,
 				})
 				nextEmit = matches[2].End
 			}
 			matches[2] = absoluteMatch{}
 		}
 	}
 	// We've found all the matches now; emit the remaining ones.
 	if matches[1] != (absoluteMatch{}) {
 		if matches[1].End > matches[0].Start {
 			matches[1].End = matches[0].Start
 		}
 		if matches[1].End-matches[1].Start >= q.MinLength {
 			dst = append(dst, Match{
 				Unmatched: matches[1].Start - nextEmit,
 				Length:    matches[1].End - matches[1].Start,
 				Distance:  matches[1].Start - matches[1].Match,
 			})
 			nextEmit = matches[1].End
 		}
 	}
 	if matches[0] != (absoluteMatch{}) {
 		dst = append(dst, Match{
 			Unmatched: matches[0].Start - nextEmit,
 			Length:    matches[0].End - matches[0].Start,
 			Distance:  matches[0].Start - matches[0].Match,
 		})
 		nextEmit = matches[0].End
 	}
 	if nextEmit < len(src) {
 		dst = append(dst, Match{
 			Unmatched: len(src) - nextEmit,
 		})
 	}
 	return dst
 }
 const hashMul64 = 0x1E35A7BD1E35A7BD
 // An absoluteMatch is like a Match, but it stores indexes into the byte
 // stream instead of lengths.
 type absoluteMatch struct {
 	// Start is the index of the first byte.
 	Start int
 	// End is the index of the byte after the last byte
 	// (so that End - Start = Length).
 	End int
 	// Match is the index of the previous data that matches
 	// (Start - Match = Distance).
 	Match int
 }
 // extendMatch returns the largest k such that k <= len(src) and that
 // src[i:i+k-j] and src[j:k] have the same contents.
 //
 // It assumes that:
 //
 //	0 <= i && i < j && j <= len(src)
 func extendMatch(src []byte, i, j int) int {
 	switch runtime.GOARCH {
 	case "amd64":
 		// As long as we are 8 or more bytes before the end of src, we can load and
 		// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 		for j+8 < len(src) {
 			iBytes := binary.LittleEndian.Uint64(src[i:])
 			jBytes := binary.LittleEndian.Uint64(src[j:])
 			if iBytes != jBytes {
 				// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 				// the index of the first byte that differs. The BSF instruction finds the
 				// least significant 1 bit, the amd64 architecture is little-endian, and
 				// the shift by 3 converts a bit index to a byte index.
 				return j + bits.TrailingZeros64(iBytes^jBytes)>>3
 			}
 			i, j = i+8, j+8
 		}
 	case "386":
 		// On a 32-bit CPU, we do it 4 bytes at a time.
 		for j+4 < len(src) {
 			iBytes := binary.LittleEndian.Uint32(src[i:])
 			jBytes := binary.LittleEndian.Uint32(src[j:])
 			if iBytes != jBytes {
 				return j + bits.TrailingZeros32(iBytes^jBytes)>>3
 			}
 			i, j = i+4, j+4
 		}
 	}
 	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
 	}
 	return j
 }
--- a/matchfinder/matchfinder.go
+++ b/matchfinder/matchfinder.go
@ -0,0 +1,103 @@
 // The matchfinder package defines reusable components for data compression.
 //
 // Many compression libraries have two main parts:
 //   - Something that looks for repeated sequences of bytes
 //   - An encoder for the compressed data format (often an entropy coder)
 //
 // Although these are logically two separate steps, the implementations are
 // usually closely tied together. You can't use flate's matcher with snappy's
 // encoder, for example. This package defines interfaces and an intermediate
 // representation to allow mixing and matching compression components.
 package matchfinder
 import "io"
 // A Match is the basic unit of LZ77 compression.
 type Match struct {
 	Unmatched int // the number of unmatched bytes since the previous match
 	Length    int // the number of bytes in the matched string; it may be 0 at the end of the input
 	Distance  int // how far back in the stream to copy from
 }
 // A MatchFinder performs the LZ77 stage of compression, looking for matches.
 type MatchFinder interface {
 	// FindMatches looks for matches in src, appends them to dst, and returns dst.
 	FindMatches(dst []Match, src []byte) []Match
 	// Reset clears any internal state, preparing the MatchFinder to be used with
 	// a new stream.
 	Reset()
 }
 // An Encoder encodes the data in its final format.
 type Encoder interface {
 	// Encode appends the encoded format of src to dst, using the match
 	// information from matches.
 	Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
 	// Reset clears any internal state, preparing the Encoder to be used with
 	// a new stream.
 	Reset()
 }
 // A Writer uses MatchFinder and Encoder to write compressed data to Dest.
 type Writer struct {
 	Dest        io.Writer
 	MatchFinder MatchFinder
 	Encoder     Encoder
 	// BlockSize is the number of bytes to compress at a time. If it is zero,
 	// each Write operation will be treated as one block.
 	BlockSize int
 	err     error
 	inBuf   []byte
 	outBuf  []byte
 	matches []Match
 }
 func (w *Writer) Write(p []byte) (n int, err error) {
 	if w.err != nil {
 		return 0, w.err
 	}
 	if w.BlockSize == 0 {
 		return w.writeBlock(p, false)
 	}
 	w.inBuf = append(w.inBuf, p...)
 	var pos int
 	for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
 		w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
 	}
 	if pos > 0 {
 		n := copy(w.inBuf, w.inBuf[pos:])
 		w.inBuf = w.inBuf[:n]
 	}
 	return len(p), w.err
 }
 func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
 	w.outBuf = w.outBuf[:0]
 	w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
 	w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
 	_, w.err = w.Dest.Write(w.outBuf)
 	return len(p), w.err
 }
 func (w *Writer) Close() error {
 	w.writeBlock(w.inBuf, true)
 	w.inBuf = w.inBuf[:0]
 	return w.err
 }
 func (w *Writer) Reset(newDest io.Writer) {
 	w.MatchFinder.Reset()
 	w.Encoder.Reset()
 	w.err = nil
 	w.inBuf = w.inBuf[:0]
 	w.outBuf = w.outBuf[:0]
 	w.matches = w.matches[:0]
 	w.Dest = newDest
 }
--- a/matchfinder/textencoder.go
+++ b/matchfinder/textencoder.go
@ -0,0 +1,53 @@
 package matchfinder
 import "fmt"
 // A TextEncoder is an Encoder that produces a human-readable representation of
 // the LZ77 compression. Matches are replaced with <Length,Distance> symbols.
 type TextEncoder struct{}
 func (t TextEncoder) Reset() {}
 func (t TextEncoder) Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte {
 	pos := 0
 	for _, m := range matches {
 		if m.Unmatched > 0 {
 			dst = append(dst, src[pos:pos+m.Unmatched]...)
 			pos += m.Unmatched
 		}
 		if m.Length > 0 {
 			dst = append(dst, []byte(fmt.Sprintf("<%d,%d>", m.Length, m.Distance))...)
 			pos += m.Length
 		}
 	}
 	if pos < len(src) {
 		dst = append(dst, src[pos:]...)
 	}
 	return dst
 }
 // A NoMatchFinder implements MatchFinder, but doesn't find any matches.
 // It can be used to implement the equivalent of the standard library flate package's
 // HuffmanOnly setting.
 type NoMatchFinder struct{}
 func (n NoMatchFinder) Reset() {}
 func (n NoMatchFinder) FindMatches(dst []Match, src []byte) []Match {
 	return append(dst, Match{
 		Unmatched: len(src),
 	})
 }
 // AutoReset wraps a MatchFinder that can return references to data in previous
 // blocks, and calls Reset before each block. It is useful for (e.g.) using a
 // snappy Encoder with a MatchFinder designed for flate. (Snappy doesn't
 // support references between blocks.)
 type AutoReset struct {
 	MatchFinder
 }
 func (a AutoReset) FindMatches(dst []Match, src []byte) []Match {
 	a.Reset()
 	return a.MatchFinder.FindMatches(dst, src)
 }