From 349ed2fce16810af932c7e526f9cd8dc448e4f2a Mon Sep 17 00:00:00 2001 From: Andy Balholm Date: Thu, 28 Dec 2023 16:09:32 -0800 Subject: [PATCH] Add matchfinder package. I've been experimenting for a while with a new brotli compressor. Instead of being a translation of the C implementation, it's a rewrite in Go, with a modular structure thanks to interfaces. (A few low-level functions still come from the C version, though.) The performance is getting to the point where it seems to be worth adding to the brotli repository. --- bitwriter.go | 56 ++++++ brotli_bit_stream.go | 311 ++++++++++++++++++++++++++++++---- brotli_test.go | 62 +++++++ compress_fragment_two_pass.go | 51 ++++-- encoder.go | 168 ++++++++++++++++++ entropy_encode_static.go | 5 + go.mod | 2 +- matchfinder/m4.go | 270 +++++++++++++++++++++++++++++ matchfinder/matchfinder.go | 103 +++++++++++ matchfinder/textencoder.go | 53 ++++++ 10 files changed, 1031 insertions(+), 50 deletions(-) create mode 100644 bitwriter.go create mode 100644 encoder.go create mode 100644 matchfinder/m4.go create mode 100644 matchfinder/matchfinder.go create mode 100644 matchfinder/textencoder.go diff --git a/bitwriter.go b/bitwriter.go new file mode 100644 index 0000000..dfc6036 --- /dev/null +++ b/bitwriter.go @@ -0,0 +1,56 @@ +package brotli + +/* Copyright 2010 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Write bits into a byte array. */ + +type bitWriter struct { + dst []byte + + // Data waiting to be written is the low nbits of bits. + bits uint64 + nbits uint +} + +func (w *bitWriter) writeBits(nb uint, b uint64) { + w.bits |= b << w.nbits + w.nbits += nb + if w.nbits >= 32 { + bits := w.bits + w.bits >>= 32 + w.nbits -= 32 + w.dst = append(w.dst, + byte(bits), + byte(bits>>8), + byte(bits>>16), + byte(bits>>24), + ) + } +} + +func (w *bitWriter) writeSingleBit(bit bool) { + if bit { + w.writeBits(1, 1) + } else { + w.writeBits(1, 0) + } +} + +func (w *bitWriter) jumpToByteBoundary() { + dst := w.dst + for w.nbits != 0 { + dst = append(dst, byte(w.bits)) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + } + w.bits = 0 + w.dst = dst +} diff --git a/brotli_bit_stream.go b/brotli_bit_stream.go index 7acfb18..ee65529 100644 --- a/brotli_bit_stream.go +++ b/brotli_bit_stream.go @@ -7,12 +7,18 @@ import ( const maxHuffmanTreeSize = (2*numCommandSymbols + 1) -/* The maximum size of Huffman dictionary for distances assuming that - NPOSTFIX = 0 and NDIRECT = 0. */ +/* +The maximum size of Huffman dictionary for distances assuming that + + NPOSTFIX = 0 and NDIRECT = 0. +*/ const maxSimpleDistanceAlphabetSize = 140 -/* Represents the range of values belonging to a prefix code: - [offset, offset + 2^nbits) */ +/* +Represents the range of values belonging to a prefix code: + + [offset, offset + 2^nbits) +*/ type prefixCodeRange struct { offset uint32 nbits uint32 @@ -96,9 +102,12 @@ func nextBlockTypeCode(calculator *blockTypeCodeCalculator, type_ byte) uint { return type_code } -/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3) - REQUIRES: length > 0 - REQUIRES: length <= (1 << 24) */ +/* +|nibblesbits| represents the 2 bits to encode MNIBBLES (0-3) + + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) +*/ func encodeMlen(length uint, bits *uint64, numbits *uint, nibblesbits *uint64) { var lg uint if length == 1 { @@ -132,8 +141,11 @@ func storeCommandExtra(cmd *command, storage_ix *uint, storage []byte) { writeBits(uint(insnumextra+getCopyExtra(copycode)), bits, storage_ix, storage) } -/* Data structure that stores almost everything that is needed to encode each - block switch command. */ +/* +Data structure that stores almost everything that is needed to encode each + + block switch command. +*/ type blockSplitCode struct { type_code_calculator blockTypeCodeCalculator type_depths [maxBlockTypeSymbols]byte @@ -154,9 +166,12 @@ func storeVarLenUint8(n uint, storage_ix *uint, storage []byte) { } } -/* Stores the compressed meta-block header. - REQUIRES: length > 0 - REQUIRES: length <= (1 << 24) */ +/* +Stores the compressed meta-block header. + + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) +*/ func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix *uint, storage []byte) { var lenbits uint64 var nlenbits uint @@ -186,9 +201,12 @@ func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix } } -/* Stores the uncompressed meta-block header. - REQUIRES: length > 0 - REQUIRES: length <= (1 << 24) */ +/* +Stores the uncompressed meta-block header. + + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) +*/ func storeUncompressedMetaBlockHeader(length uint, storage_ix *uint, storage []byte) { var lenbits uint64 var nlenbits uint @@ -312,8 +330,11 @@ func storeSimpleHuffmanTree(depths []byte, symbols []uint, num_symbols uint, max } } -/* num = alphabet size - depths = symbol depths */ +/* +num = alphabet size + + depths = symbol depths +*/ func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *uint, storage []byte) { var huffman_tree [numCommandSymbols]byte var huffman_tree_extra_bits [numCommandSymbols]byte @@ -367,8 +388,11 @@ func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *u storeHuffmanTreeToBitMask(huffman_tree_size, huffman_tree[:], huffman_tree_extra_bits[:], code_length_bitdepth[:], code_length_bitdepth_symbols[:], storage_ix, storage) } -/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and - bits[0:length] and stores the encoded tree to the bit stream. */ +/* +Builds a Huffman tree from histogram[0:length] into depth[0:length] and + + bits[0:length] and stores the encoded tree to the bit stream. +*/ func buildAndStoreHuffmanTree(histogram []uint32, histogram_length uint, alphabet_size uint, tree []huffmanTree, depth []byte, bits []uint16, storage_ix *uint, storage []byte) { var count uint = 0 var s4 = [4]uint{0} @@ -623,6 +647,203 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_ } } +func buildAndStoreHuffmanTreeFastBW(histogram []uint32, histogram_total uint, max_bits uint, depth []byte, bits []uint16, bw *bitWriter) { + var count uint = 0 + var symbols = [4]uint{0} + var length uint = 0 + var total uint = histogram_total + for total != 0 { + if histogram[length] != 0 { + if count < 4 { + symbols[count] = length + } + + count++ + total -= uint(histogram[length]) + } + + length++ + } + + if count <= 1 { + bw.writeBits(4, 1) + bw.writeBits(max_bits, uint64(symbols[0])) + depth[symbols[0]] = 0 + bits[symbols[0]] = 0 + return + } + + for i := 0; i < int(length); i++ { + depth[i] = 0 + } + { + var max_tree_size uint = 2*length + 1 + tree, _ := huffmanTreePool.Get().(*[]huffmanTree) + if tree == nil || cap(*tree) < int(max_tree_size) { + tmp := make([]huffmanTree, max_tree_size) + tree = &tmp + } else { + *tree = (*tree)[:max_tree_size] + } + var count_limit uint32 + for count_limit = 1; ; count_limit *= 2 { + var node int = 0 + var l uint + for l = length; l != 0; { + l-- + if histogram[l] != 0 { + if histogram[l] >= count_limit { + initHuffmanTree(&(*tree)[node:][0], histogram[l], -1, int16(l)) + } else { + initHuffmanTree(&(*tree)[node:][0], count_limit, -1, int16(l)) + } + + node++ + } + } + { + var n int = node + /* Points to the next leaf node. */ /* Points to the next non-leaf node. */ + var sentinel huffmanTree + var i int = 0 + var j int = n + 1 + var k int + + sortHuffmanTreeItems(*tree, uint(n), huffmanTreeComparator(sortHuffmanTree1)) + + /* The nodes are: + [0, n): the sorted leaf nodes that we start with. + [n]: we add a sentinel here. + [n + 1, 2n): new parent nodes are added here, starting from + (n+1). These are naturally in ascending order. + [2n]: we add a sentinel at the end as well. + There will be (2n+1) elements at the end. */ + initHuffmanTree(&sentinel, math.MaxUint32, -1, -1) + + (*tree)[node] = sentinel + node++ + (*tree)[node] = sentinel + node++ + + for k = n - 1; k > 0; k-- { + var left int + var right int + if (*tree)[i].total_count_ <= (*tree)[j].total_count_ { + left = i + i++ + } else { + left = j + j++ + } + + if (*tree)[i].total_count_ <= (*tree)[j].total_count_ { + right = i + i++ + } else { + right = j + j++ + } + + /* The sentinel node becomes the parent node. */ + (*tree)[node-1].total_count_ = (*tree)[left].total_count_ + (*tree)[right].total_count_ + + (*tree)[node-1].index_left_ = int16(left) + (*tree)[node-1].index_right_or_value_ = int16(right) + + /* Add back the last sentinel node. */ + (*tree)[node] = sentinel + node++ + } + + if setDepth(2*n-1, *tree, depth, 14) { + /* We need to pack the Huffman tree in 14 bits. If this was not + successful, add fake entities to the lowest values and retry. */ + break + } + } + } + + huffmanTreePool.Put(tree) + } + + convertBitDepthsToSymbols(depth, length, bits) + if count <= 4 { + var i uint + + /* value of 1 indicates a simple Huffman code */ + bw.writeBits(2, 1) + + bw.writeBits(2, uint64(count)-1) /* NSYM - 1 */ + + /* Sort */ + for i = 0; i < count; i++ { + var j uint + for j = i + 1; j < count; j++ { + if depth[symbols[j]] < depth[symbols[i]] { + var tmp uint = symbols[j] + symbols[j] = symbols[i] + symbols[i] = tmp + } + } + } + + if count == 2 { + bw.writeBits(max_bits, uint64(symbols[0])) + bw.writeBits(max_bits, uint64(symbols[1])) + } else if count == 3 { + bw.writeBits(max_bits, uint64(symbols[0])) + bw.writeBits(max_bits, uint64(symbols[1])) + bw.writeBits(max_bits, uint64(symbols[2])) + } else { + bw.writeBits(max_bits, uint64(symbols[0])) + bw.writeBits(max_bits, uint64(symbols[1])) + bw.writeBits(max_bits, uint64(symbols[2])) + bw.writeBits(max_bits, uint64(symbols[3])) + + /* tree-select */ + bw.writeSingleBit(depth[symbols[0]] == 1) + } + } else { + var previous_value byte = 8 + var i uint + + /* Complex Huffman Tree */ + storeStaticCodeLengthCodeBW(bw) + + /* Actual RLE coding. */ + for i = 0; i < length; { + var value byte = depth[i] + var reps uint = 1 + var k uint + for k = i + 1; k < length && depth[k] == value; k++ { + reps++ + } + + i += reps + if value == 0 { + bw.writeBits(uint(kZeroRepsDepth[reps]), kZeroRepsBits[reps]) + } else { + if previous_value != value { + bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value])) + reps-- + } + + if reps < 3 { + for reps != 0 { + reps-- + bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value])) + } + } else { + reps -= 3 + bw.writeBits(uint(kNonZeroRepsDepth[reps]), kNonZeroRepsBits[reps]) + } + + previous_value = value + } + } + } +} + func indexOf(v []byte, v_size uint, value byte) uint { var i uint = 0 for ; i < v_size; i++ { @@ -674,12 +895,15 @@ func moveToFrontTransform(v_in []uint32, v_size uint, v_out []uint32) { } } -/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of - the run length plus extra bits (lower 9 bits is the prefix code and the rest - are the extra bits). Non-zero values in v[] are shifted by - *max_length_prefix. Will not create prefix codes bigger than the initial - value of *max_run_length_prefix. The prefix code of run length L is simply - Log2Floor(L) and the number of extra bits is the same as the prefix code. */ +/* +Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of + + the run length plus extra bits (lower 9 bits is the prefix code and the rest + are the extra bits). Non-zero values in v[] are shifted by + *max_length_prefix. Will not create prefix codes bigger than the initial + value of *max_run_length_prefix. The prefix code of run length L is simply + Log2Floor(L) and the number of extra bits is the same as the prefix code. +*/ func runLengthCodeZeros(in_size uint, v []uint32, out_size *uint, max_run_length_prefix *uint32) { var max_reps uint32 = 0 var i uint @@ -799,8 +1023,11 @@ func storeBlockSwitch(code *blockSplitCode, block_len uint32, block_type byte, i writeBits(uint(len_nextra), uint64(len_extra), storage_ix, storage) } -/* Builds a BlockSplitCode data structure from the block split given by the - vector of block types and block lengths and stores it to the bit stream. */ +/* +Builds a BlockSplitCode data structure from the block split given by the + + vector of block types and block lengths and stores it to the bit stream. +*/ func buildAndStoreBlockSplitCode(types []byte, lengths []uint32, num_blocks uint, num_types uint, tree []huffmanTree, code *blockSplitCode, storage_ix *uint, storage []byte) { var type_histo [maxBlockTypeSymbols]uint32 var length_histo [numBlockLenSymbols]uint32 @@ -919,14 +1146,20 @@ func cleanupBlockEncoder(self *blockEncoder) { blockEncoderPool.Put(self) } -/* Creates entropy codes of block lengths and block types and stores them - to the bit stream. */ +/* +Creates entropy codes of block lengths and block types and stores them + + to the bit stream. +*/ func buildAndStoreBlockSwitchEntropyCodes(self *blockEncoder, tree []huffmanTree, storage_ix *uint, storage []byte) { buildAndStoreBlockSplitCode(self.block_types_, self.block_lengths_, self.num_blocks_, self.num_block_types_, tree, &self.block_split_code_, storage_ix, storage) } -/* Stores the next symbol with the entropy code of the current block type. - Updates the block type and block length at block boundaries. */ +/* +Stores the next symbol with the entropy code of the current block type. + + Updates the block type and block length at block boundaries. +*/ func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []byte) { if self.block_len_ == 0 { self.block_ix_++ @@ -945,9 +1178,12 @@ func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []by } } -/* Stores the next symbol with the entropy code of the current block type and - context value. - Updates the block type and block length at block boundaries. */ +/* +Stores the next symbol with the entropy code of the current block type and + + context value. + Updates the block type and block length at block boundaries. +*/ func storeSymbolWithContext(self *blockEncoder, symbol uint, context uint, context_map []uint32, storage_ix *uint, storage []byte, context_bits uint) { if self.block_len_ == 0 { self.block_ix_++ @@ -1268,8 +1504,11 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is } } -/* This is for storing uncompressed blocks (simple raw storage of - bytes-as-bytes). */ +/* +This is for storing uncompressed blocks (simple raw storage of + + bytes-as-bytes). +*/ func storeUncompressedMetaBlock(is_final_block bool, input []byte, position uint, mask uint, len uint, storage_ix *uint, storage []byte) { var masked_pos uint = position & mask storeUncompressedMetaBlockHeader(uint(len), storage_ix, storage) diff --git a/brotli_test.go b/brotli_test.go index 45b989e..4dd8b54 100644 --- a/brotli_test.go +++ b/brotli_test.go @@ -16,6 +16,8 @@ import ( "os" "testing" "time" + + "github.com/andybalholm/brotli/matchfinder" ) func checkCompressedData(compressedData, wantOriginalData []byte) error { @@ -595,3 +597,63 @@ func BenchmarkDecodeLevels(b *testing.B) { }) } } + +func test(t *testing.T, filename string, m matchfinder.MatchFinder, blockSize int) { + data, err := ioutil.ReadFile(filename) + if err != nil { + t.Fatal(err) + } + b := new(bytes.Buffer) + w := &matchfinder.Writer{ + Dest: b, + MatchFinder: m, + Encoder: &Encoder{}, + BlockSize: blockSize, + } + w.Write(data) + w.Close() + compressed := b.Bytes() + sr := NewReader(bytes.NewReader(compressed)) + decompressed, err := ioutil.ReadAll(sr) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(decompressed, data) { + t.Fatal("decompressed output doesn't match") + } +} + +func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSize int) { + b.StopTimer() + b.ReportAllocs() + data, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + + b.SetBytes(int64(len(data))) + buf := new(bytes.Buffer) + w := &matchfinder.Writer{ + Dest: buf, + MatchFinder: m, + Encoder: &Encoder{}, + BlockSize: blockSize, + } + w.Write(data) + w.Close() + b.ReportMetric(float64(len(data))/float64(buf.Len()), "ratio") + b.StartTimer() + for i := 0; i < b.N; i++ { + w.Reset(ioutil.Discard) + w.Write(data) + w.Close() + } +} + +func TestEncodeM4(t *testing.T) { + test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18}, 1<<16) +} + +func BenchmarkEncodeM4(b *testing.B) { + benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16) +} diff --git a/compress_fragment_two_pass.go b/compress_fragment_two_pass.go index 172dc7f..79f9c7f 100644 --- a/compress_fragment_two_pass.go +++ b/compress_fragment_two_pass.go @@ -39,8 +39,11 @@ func isMatch1(p1 []byte, p2 []byte, length uint) bool { return p1[4] == p2[4] && p1[5] == p2[5] } -/* Builds a command and distance prefix code (each 64 symbols) into "depth" and - "bits" based on "histogram" and stores it into the bit stream. */ +/* +Builds a command and distance prefix code (each 64 symbols) into "depth" and + + "bits" based on "histogram" and stores it into the bit stream. +*/ func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) { var tree [129]huffmanTree var cmd_depth = [numCommandSymbols]byte{0} @@ -216,6 +219,25 @@ func storeMetaBlockHeader(len uint, is_uncompressed bool, storage_ix *uint, stor writeSingleBit(is_uncompressed, storage_ix, storage) } +func storeMetaBlockHeaderBW(len uint, is_uncompressed bool, bw *bitWriter) { + var nibbles uint = 6 + + /* ISLAST */ + bw.writeBits(1, 0) + + if len <= 1<<16 { + nibbles = 4 + } else if len <= 1<<20 { + nibbles = 5 + } + + bw.writeBits(2, uint64(nibbles)-4) + bw.writeBits(nibbles*4, uint64(len)-1) + + /* ISUNCOMPRESSED */ + bw.writeSingleBit(is_uncompressed) +} + func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) { var ip int = 0 var shift uint = 64 - table_bits @@ -710,19 +732,22 @@ func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, co } } -/* Compresses "input" string to the "*storage" buffer as one or more complete - meta-blocks, and updates the "*storage_ix" bit position. +/* +Compresses "input" string to the "*storage" buffer as one or more complete - If "is_last" is 1, emits an additional empty last meta-block. + meta-blocks, and updates the "*storage_ix" bit position. - REQUIRES: "input_size" is greater than zero, or "is_last" is 1. - REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24). - REQUIRES: "command_buf" and "literal_buf" point to at least - kCompressFragmentTwoPassBlockSize long arrays. - REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. - REQUIRES: "table_size" is a power of two - OUTPUT: maximal copy distance <= |input_size| - OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */ + If "is_last" is 1, emits an additional empty last meta-block. + + REQUIRES: "input_size" is greater than zero, or "is_last" is 1. + REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24). + REQUIRES: "command_buf" and "literal_buf" point to at least + kCompressFragmentTwoPassBlockSize long arrays. + REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. + REQUIRES: "table_size" is a power of two + OUTPUT: maximal copy distance <= |input_size| + OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) +*/ func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) { var initial_storage_ix uint = *storage_ix var table_bits uint = uint(log2FloorNonZero(table_size)) diff --git a/encoder.go b/encoder.go new file mode 100644 index 0000000..650d1e4 --- /dev/null +++ b/encoder.go @@ -0,0 +1,168 @@ +package brotli + +import "github.com/andybalholm/brotli/matchfinder" + +// An Encoder implements the matchfinder.Encoder interface, writing in Brotli format. +type Encoder struct { + wroteHeader bool + bw bitWriter + distCache []distanceCode +} + +func (e *Encoder) Reset() { + e.wroteHeader = false + e.bw = bitWriter{} +} + +func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte { + e.bw.dst = dst + if !e.wroteHeader { + e.bw.writeBits(4, 15) + e.wroteHeader = true + } + + var literalHisto [256]uint32 + var commandHisto [704]uint32 + var distanceHisto [64]uint32 + literalCount := 0 + commandCount := 0 + distanceCount := 0 + + if len(e.distCache) < len(matches) { + e.distCache = make([]distanceCode, len(matches)) + } + + // first pass: build the histograms + pos := 0 + + // d is the ring buffer of the last 4 distances. + d := [4]int{-10, -10, -10, -10} + for i, m := range matches { + if m.Unmatched > 0 { + for _, c := range src[pos : pos+m.Unmatched] { + literalHisto[c]++ + } + literalCount += m.Unmatched + } + + insertCode := getInsertLengthCode(uint(m.Unmatched)) + copyCode := getCopyLengthCode(uint(m.Length)) + if m.Length == 0 { + // If the stream ends with unmatched bytes, we need a dummy copy length. + copyCode = 2 + } + command := combineLengthCodes(insertCode, copyCode, false) + commandHisto[command]++ + commandCount++ + + if command >= 128 && m.Length != 0 { + var distCode distanceCode + switch m.Distance { + case d[3]: + distCode.code = 0 + case d[2]: + distCode.code = 1 + case d[1]: + distCode.code = 2 + case d[0]: + distCode.code = 3 + case d[3] - 1: + distCode.code = 4 + case d[3] + 1: + distCode.code = 5 + case d[3] - 2: + distCode.code = 6 + case d[3] + 2: + distCode.code = 7 + case d[3] - 3: + distCode.code = 8 + case d[3] + 3: + distCode.code = 9 + + // In my testing, codes 10–15 actually reduced the compression ratio. + + default: + distCode = getDistanceCode(m.Distance) + } + e.distCache[i] = distCode + distanceHisto[distCode.code]++ + distanceCount++ + if distCode.code != 0 { + d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance + } + } + + pos += m.Unmatched + m.Length + } + + storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw) + e.bw.writeBits(13, 0) + + var literalDepths [256]byte + var literalBits [256]uint16 + buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw) + + var commandDepths [704]byte + var commandBits [704]uint16 + buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw) + + var distanceDepths [64]byte + var distanceBits [64]uint16 + buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw) + + pos = 0 + for i, m := range matches { + insertCode := getInsertLengthCode(uint(m.Unmatched)) + copyCode := getCopyLengthCode(uint(m.Length)) + if m.Length == 0 { + // If the stream ends with unmatched bytes, we need a dummy copy length. + copyCode = 2 + } + command := combineLengthCodes(insertCode, copyCode, false) + e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command])) + if kInsExtra[insertCode] > 0 { + e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode])) + } + if kCopyExtra[copyCode] > 0 { + e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode])) + } + + if m.Unmatched > 0 { + for _, c := range src[pos : pos+m.Unmatched] { + e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c])) + } + } + + if command >= 128 && m.Length != 0 { + distCode := e.distCache[i] + e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code])) + if distCode.nExtra > 0 { + e.bw.writeBits(distCode.nExtra, distCode.extraBits) + } + } + + pos += m.Unmatched + m.Length + } + + if lastBlock { + e.bw.writeBits(2, 3) // islast + isempty + e.bw.jumpToByteBoundary() + } + return e.bw.dst +} + +type distanceCode struct { + code int + nExtra uint + extraBits uint64 +} + +func getDistanceCode(distance int) distanceCode { + d := distance + 3 + nbits := log2FloorNonZero(uint(d)) - 1 + prefix := (d >> nbits) & 1 + offset := (2 + prefix) << nbits + distcode := int(2*(nbits-1)) + prefix + 16 + extra := d - offset + return distanceCode{distcode, uint(nbits), uint64(extra)} +} diff --git a/entropy_encode_static.go b/entropy_encode_static.go index 5ddf3fc..294aff4 100644 --- a/entropy_encode_static.go +++ b/entropy_encode_static.go @@ -782,6 +782,11 @@ func storeStaticCodeLengthCode(storage_ix *uint, storage []byte) { writeBits(40, 0x0000FF55555554, storage_ix, storage) } +func storeStaticCodeLengthCodeBW(bw *bitWriter) { + bw.writeBits(32, 0x55555554) + bw.writeBits(8, 0xFF) +} + var kZeroRepsBits = [numCommandSymbols]uint64{ 0x00000000, 0x00000000, diff --git a/go.mod b/go.mod index 1c94232..50324ea 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/andybalholm/brotli -go 1.12 +go 1.13 retract v1.0.1 // occasional panics and data corruption diff --git a/matchfinder/m4.go b/matchfinder/m4.go new file mode 100644 index 0000000..6bafe27 --- /dev/null +++ b/matchfinder/m4.go @@ -0,0 +1,270 @@ +package matchfinder + +import ( + "encoding/binary" + "math/bits" + "runtime" +) + +const ( + ssapBits = 17 + ssapMask = (1 << ssapBits) - 1 +) + +// M4 is an implementation of the MatchFinder +// interface that uses a simple hash table to find matches, +// but the advanced parsing technique from +// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html, +// except that it looks for matches at every input position. +type M4 struct { + // MaxDistance is the maximum distance (in bytes) to look back for + // a match. The default is 65535. + MaxDistance int + + // MinLength is the length of the shortest match to return. + // The default is 4. + MinLength int + + // HashLen is the number of bytes to use to calculate the hashes. + // The maximum is 8 and the default is 6. + HashLen int + + table [1 << ssapBits]uint32 + + history []byte +} + +func (q *M4) Reset() { + q.table = [1 << ssapBits]uint32{} + q.history = q.history[:0] +} + +func (q *M4) FindMatches(dst []Match, src []byte) []Match { + if q.MaxDistance == 0 { + q.MaxDistance = 65535 + } + if q.MinLength == 0 { + q.MinLength = 4 + } + if q.HashLen == 0 { + q.HashLen = 6 + } + var nextEmit int + + if len(q.history) > q.MaxDistance*2 { + // Trim down the history buffer. + delta := len(q.history) - q.MaxDistance + copy(q.history, q.history[delta:]) + q.history = q.history[:q.MaxDistance] + + for i, v := range q.table { + newV := int(v) - delta + if newV < 0 { + newV = 0 + } + q.table[i] = uint32(newV) + } + } + + // Append src to the history buffer. + nextEmit = len(q.history) + q.history = append(q.history, src...) + src = q.history + + // matches stores the matches that have been found but not emitted, + // in reverse order. (matches[0] is the most recent one.) + var matches [3]absoluteMatch + for i := nextEmit; i < len(src)-7; i++ { + if matches[0] != (absoluteMatch{}) && i >= matches[0].End { + // We have found some matches, and we're far enough along that we probably + // won't find overlapping matches, so we might as well emit them. + if matches[1] != (absoluteMatch{}) { + if matches[1].End > matches[0].Start { + matches[1].End = matches[0].Start + } + if matches[1].End-matches[1].Start >= q.MinLength { + dst = append(dst, Match{ + Unmatched: matches[1].Start - nextEmit, + Length: matches[1].End - matches[1].Start, + Distance: matches[1].Start - matches[1].Match, + }) + nextEmit = matches[1].End + } + } + dst = append(dst, Match{ + Unmatched: matches[0].Start - nextEmit, + Length: matches[0].End - matches[0].Start, + Distance: matches[0].Start - matches[0].Match, + }) + nextEmit = matches[0].End + matches = [3]absoluteMatch{} + } + + // Now look for a match. + h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - ssapBits) + candidate := int(q.table[h&ssapMask]) + q.table[h&ssapMask] = uint32(i) + + if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match { + continue + } + if binary.LittleEndian.Uint32(src[candidate:]) != binary.LittleEndian.Uint32(src[i:]) { + continue + } + + // We have a 4-byte match now. + + start := i + match := candidate + end := extendMatch(src, match+4, start+4) + for start > nextEmit && match > 0 && src[start-1] == src[match-1] { + start-- + match-- + } + if end-start <= matches[0].End-matches[0].Start { + continue + } + + matches = [3]absoluteMatch{ + absoluteMatch{ + Start: start, + End: end, + Match: match, + }, + matches[0], + matches[1], + } + + if matches[2] == (absoluteMatch{}) { + continue + } + + // We have three matches, so it's time to emit one and/or eliminate one. + switch { + case matches[0].Start < matches[2].End: + // The first and third matches overlap; discard the one in between. + matches = [3]absoluteMatch{ + matches[0], + matches[2], + absoluteMatch{}, + } + + case matches[0].Start < matches[2].End+q.MinLength: + // The first and third matches don't overlap, but there's no room for + // another match between them. Emit the first match and discard the second. + dst = append(dst, Match{ + Unmatched: matches[2].Start - nextEmit, + Length: matches[2].End - matches[2].Start, + Distance: matches[2].Start - matches[2].Match, + }) + nextEmit = matches[2].End + matches = [3]absoluteMatch{ + matches[0], + absoluteMatch{}, + absoluteMatch{}, + } + + default: + // Emit the first match, shortening it if necessary to avoid overlap with the second. + if matches[2].End > matches[1].Start { + matches[2].End = matches[1].Start + } + if matches[2].End-matches[2].Start >= q.MinLength { + dst = append(dst, Match{ + Unmatched: matches[2].Start - nextEmit, + Length: matches[2].End - matches[2].Start, + Distance: matches[2].Start - matches[2].Match, + }) + nextEmit = matches[2].End + } + matches[2] = absoluteMatch{} + } + } + + // We've found all the matches now; emit the remaining ones. + if matches[1] != (absoluteMatch{}) { + if matches[1].End > matches[0].Start { + matches[1].End = matches[0].Start + } + if matches[1].End-matches[1].Start >= q.MinLength { + dst = append(dst, Match{ + Unmatched: matches[1].Start - nextEmit, + Length: matches[1].End - matches[1].Start, + Distance: matches[1].Start - matches[1].Match, + }) + nextEmit = matches[1].End + } + } + if matches[0] != (absoluteMatch{}) { + dst = append(dst, Match{ + Unmatched: matches[0].Start - nextEmit, + Length: matches[0].End - matches[0].Start, + Distance: matches[0].Start - matches[0].Match, + }) + nextEmit = matches[0].End + } + + if nextEmit < len(src) { + dst = append(dst, Match{ + Unmatched: len(src) - nextEmit, + }) + } + + return dst +} + +const hashMul64 = 0x1E35A7BD1E35A7BD + +// An absoluteMatch is like a Match, but it stores indexes into the byte +// stream instead of lengths. +type absoluteMatch struct { + // Start is the index of the first byte. + Start int + + // End is the index of the byte after the last byte + // (so that End - Start = Length). + End int + + // Match is the index of the previous data that matches + // (Start - Match = Distance). + Match int +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + switch runtime.GOARCH { + case "amd64": + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + for j+8 < len(src) { + iBytes := binary.LittleEndian.Uint64(src[i:]) + jBytes := binary.LittleEndian.Uint64(src[j:]) + if iBytes != jBytes { + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + return j + bits.TrailingZeros64(iBytes^jBytes)>>3 + } + i, j = i+8, j+8 + } + case "386": + // On a 32-bit CPU, we do it 4 bytes at a time. + for j+4 < len(src) { + iBytes := binary.LittleEndian.Uint32(src[i:]) + jBytes := binary.LittleEndian.Uint32(src[j:]) + if iBytes != jBytes { + return j + bits.TrailingZeros32(iBytes^jBytes)>>3 + } + i, j = i+4, j+4 + } + } + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} diff --git a/matchfinder/matchfinder.go b/matchfinder/matchfinder.go new file mode 100644 index 0000000..f6bcfdb --- /dev/null +++ b/matchfinder/matchfinder.go @@ -0,0 +1,103 @@ +// The matchfinder package defines reusable components for data compression. +// +// Many compression libraries have two main parts: +// - Something that looks for repeated sequences of bytes +// - An encoder for the compressed data format (often an entropy coder) +// +// Although these are logically two separate steps, the implementations are +// usually closely tied together. You can't use flate's matcher with snappy's +// encoder, for example. This package defines interfaces and an intermediate +// representation to allow mixing and matching compression components. +package matchfinder + +import "io" + +// A Match is the basic unit of LZ77 compression. +type Match struct { + Unmatched int // the number of unmatched bytes since the previous match + Length int // the number of bytes in the matched string; it may be 0 at the end of the input + Distance int // how far back in the stream to copy from +} + +// A MatchFinder performs the LZ77 stage of compression, looking for matches. +type MatchFinder interface { + // FindMatches looks for matches in src, appends them to dst, and returns dst. + FindMatches(dst []Match, src []byte) []Match + + // Reset clears any internal state, preparing the MatchFinder to be used with + // a new stream. + Reset() +} + +// An Encoder encodes the data in its final format. +type Encoder interface { + // Encode appends the encoded format of src to dst, using the match + // information from matches. + Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte + + // Reset clears any internal state, preparing the Encoder to be used with + // a new stream. + Reset() +} + +// A Writer uses MatchFinder and Encoder to write compressed data to Dest. +type Writer struct { + Dest io.Writer + MatchFinder MatchFinder + Encoder Encoder + + // BlockSize is the number of bytes to compress at a time. If it is zero, + // each Write operation will be treated as one block. + BlockSize int + + err error + inBuf []byte + outBuf []byte + matches []Match +} + +func (w *Writer) Write(p []byte) (n int, err error) { + if w.err != nil { + return 0, w.err + } + + if w.BlockSize == 0 { + return w.writeBlock(p, false) + } + + w.inBuf = append(w.inBuf, p...) + var pos int + for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize { + w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false) + } + if pos > 0 { + n := copy(w.inBuf, w.inBuf[pos:]) + w.inBuf = w.inBuf[:n] + } + + return len(p), w.err +} + +func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) { + w.outBuf = w.outBuf[:0] + w.matches = w.MatchFinder.FindMatches(w.matches[:0], p) + w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock) + _, w.err = w.Dest.Write(w.outBuf) + return len(p), w.err +} + +func (w *Writer) Close() error { + w.writeBlock(w.inBuf, true) + w.inBuf = w.inBuf[:0] + return w.err +} + +func (w *Writer) Reset(newDest io.Writer) { + w.MatchFinder.Reset() + w.Encoder.Reset() + w.err = nil + w.inBuf = w.inBuf[:0] + w.outBuf = w.outBuf[:0] + w.matches = w.matches[:0] + w.Dest = newDest +} diff --git a/matchfinder/textencoder.go b/matchfinder/textencoder.go new file mode 100644 index 0000000..75ecc59 --- /dev/null +++ b/matchfinder/textencoder.go @@ -0,0 +1,53 @@ +package matchfinder + +import "fmt" + +// A TextEncoder is an Encoder that produces a human-readable representation of +// the LZ77 compression. Matches are replaced with symbols. +type TextEncoder struct{} + +func (t TextEncoder) Reset() {} + +func (t TextEncoder) Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte { + pos := 0 + for _, m := range matches { + if m.Unmatched > 0 { + dst = append(dst, src[pos:pos+m.Unmatched]...) + pos += m.Unmatched + } + if m.Length > 0 { + dst = append(dst, []byte(fmt.Sprintf("<%d,%d>", m.Length, m.Distance))...) + pos += m.Length + } + } + if pos < len(src) { + dst = append(dst, src[pos:]...) + } + return dst +} + +// A NoMatchFinder implements MatchFinder, but doesn't find any matches. +// It can be used to implement the equivalent of the standard library flate package's +// HuffmanOnly setting. +type NoMatchFinder struct{} + +func (n NoMatchFinder) Reset() {} + +func (n NoMatchFinder) FindMatches(dst []Match, src []byte) []Match { + return append(dst, Match{ + Unmatched: len(src), + }) +} + +// AutoReset wraps a MatchFinder that can return references to data in previous +// blocks, and calls Reset before each block. It is useful for (e.g.) using a +// snappy Encoder with a MatchFinder designed for flate. (Snappy doesn't +// support references between blocks.) +type AutoReset struct { + MatchFinder +} + +func (a AutoReset) FindMatches(dst []Match, src []byte) []Match { + a.Reset() + return a.MatchFinder.FindMatches(dst, src) +}