Add matchfinder package.
I've been experimenting for a while with a new brotli compressor. Instead of being a translation of the C implementation, it's a rewrite in Go, with a modular structure thanks to interfaces. (A few low-level functions still come from the C version, though.) The performance is getting to the point where it seems to be worth adding to the brotli repository.
This commit is contained in:
parent
b7a4cf9ec5
commit
349ed2fce1
|
@ -0,0 +1,56 @@
|
|||
package brotli
|
||||
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Write bits into a byte array. */
|
||||
|
||||
type bitWriter struct {
|
||||
dst []byte
|
||||
|
||||
// Data waiting to be written is the low nbits of bits.
|
||||
bits uint64
|
||||
nbits uint
|
||||
}
|
||||
|
||||
func (w *bitWriter) writeBits(nb uint, b uint64) {
|
||||
w.bits |= b << w.nbits
|
||||
w.nbits += nb
|
||||
if w.nbits >= 32 {
|
||||
bits := w.bits
|
||||
w.bits >>= 32
|
||||
w.nbits -= 32
|
||||
w.dst = append(w.dst,
|
||||
byte(bits),
|
||||
byte(bits>>8),
|
||||
byte(bits>>16),
|
||||
byte(bits>>24),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *bitWriter) writeSingleBit(bit bool) {
|
||||
if bit {
|
||||
w.writeBits(1, 1)
|
||||
} else {
|
||||
w.writeBits(1, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *bitWriter) jumpToByteBoundary() {
|
||||
dst := w.dst
|
||||
for w.nbits != 0 {
|
||||
dst = append(dst, byte(w.bits))
|
||||
w.bits >>= 8
|
||||
if w.nbits > 8 { // Avoid underflow
|
||||
w.nbits -= 8
|
||||
} else {
|
||||
w.nbits = 0
|
||||
}
|
||||
}
|
||||
w.bits = 0
|
||||
w.dst = dst
|
||||
}
|
|
@ -7,12 +7,18 @@ import (
|
|||
|
||||
const maxHuffmanTreeSize = (2*numCommandSymbols + 1)
|
||||
|
||||
/* The maximum size of Huffman dictionary for distances assuming that
|
||||
NPOSTFIX = 0 and NDIRECT = 0. */
|
||||
/*
|
||||
The maximum size of Huffman dictionary for distances assuming that
|
||||
|
||||
NPOSTFIX = 0 and NDIRECT = 0.
|
||||
*/
|
||||
const maxSimpleDistanceAlphabetSize = 140
|
||||
|
||||
/* Represents the range of values belonging to a prefix code:
|
||||
[offset, offset + 2^nbits) */
|
||||
/*
|
||||
Represents the range of values belonging to a prefix code:
|
||||
|
||||
[offset, offset + 2^nbits)
|
||||
*/
|
||||
type prefixCodeRange struct {
|
||||
offset uint32
|
||||
nbits uint32
|
||||
|
@ -96,9 +102,12 @@ func nextBlockTypeCode(calculator *blockTypeCodeCalculator, type_ byte) uint {
|
|||
return type_code
|
||||
}
|
||||
|
||||
/* |nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
/*
|
||||
|nibblesbits| represents the 2 bits to encode MNIBBLES (0-3)
|
||||
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24)
|
||||
*/
|
||||
func encodeMlen(length uint, bits *uint64, numbits *uint, nibblesbits *uint64) {
|
||||
var lg uint
|
||||
if length == 1 {
|
||||
|
@ -132,8 +141,11 @@ func storeCommandExtra(cmd *command, storage_ix *uint, storage []byte) {
|
|||
writeBits(uint(insnumextra+getCopyExtra(copycode)), bits, storage_ix, storage)
|
||||
}
|
||||
|
||||
/* Data structure that stores almost everything that is needed to encode each
|
||||
block switch command. */
|
||||
/*
|
||||
Data structure that stores almost everything that is needed to encode each
|
||||
|
||||
block switch command.
|
||||
*/
|
||||
type blockSplitCode struct {
|
||||
type_code_calculator blockTypeCodeCalculator
|
||||
type_depths [maxBlockTypeSymbols]byte
|
||||
|
@ -154,9 +166,12 @@ func storeVarLenUint8(n uint, storage_ix *uint, storage []byte) {
|
|||
}
|
||||
}
|
||||
|
||||
/* Stores the compressed meta-block header.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
/*
|
||||
Stores the compressed meta-block header.
|
||||
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24)
|
||||
*/
|
||||
func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix *uint, storage []byte) {
|
||||
var lenbits uint64
|
||||
var nlenbits uint
|
||||
|
@ -186,9 +201,12 @@ func storeCompressedMetaBlockHeader(is_final_block bool, length uint, storage_ix
|
|||
}
|
||||
}
|
||||
|
||||
/* Stores the uncompressed meta-block header.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
/*
|
||||
Stores the uncompressed meta-block header.
|
||||
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24)
|
||||
*/
|
||||
func storeUncompressedMetaBlockHeader(length uint, storage_ix *uint, storage []byte) {
|
||||
var lenbits uint64
|
||||
var nlenbits uint
|
||||
|
@ -312,8 +330,11 @@ func storeSimpleHuffmanTree(depths []byte, symbols []uint, num_symbols uint, max
|
|||
}
|
||||
}
|
||||
|
||||
/* num = alphabet size
|
||||
depths = symbol depths */
|
||||
/*
|
||||
num = alphabet size
|
||||
|
||||
depths = symbol depths
|
||||
*/
|
||||
func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *uint, storage []byte) {
|
||||
var huffman_tree [numCommandSymbols]byte
|
||||
var huffman_tree_extra_bits [numCommandSymbols]byte
|
||||
|
@ -367,8 +388,11 @@ func storeHuffmanTree(depths []byte, num uint, tree []huffmanTree, storage_ix *u
|
|||
storeHuffmanTreeToBitMask(huffman_tree_size, huffman_tree[:], huffman_tree_extra_bits[:], code_length_bitdepth[:], code_length_bitdepth_symbols[:], storage_ix, storage)
|
||||
}
|
||||
|
||||
/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
||||
bits[0:length] and stores the encoded tree to the bit stream. */
|
||||
/*
|
||||
Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
||||
|
||||
bits[0:length] and stores the encoded tree to the bit stream.
|
||||
*/
|
||||
func buildAndStoreHuffmanTree(histogram []uint32, histogram_length uint, alphabet_size uint, tree []huffmanTree, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
|
||||
var count uint = 0
|
||||
var s4 = [4]uint{0}
|
||||
|
@ -623,6 +647,203 @@ func buildAndStoreHuffmanTreeFast(histogram []uint32, histogram_total uint, max_
|
|||
}
|
||||
}
|
||||
|
||||
func buildAndStoreHuffmanTreeFastBW(histogram []uint32, histogram_total uint, max_bits uint, depth []byte, bits []uint16, bw *bitWriter) {
|
||||
var count uint = 0
|
||||
var symbols = [4]uint{0}
|
||||
var length uint = 0
|
||||
var total uint = histogram_total
|
||||
for total != 0 {
|
||||
if histogram[length] != 0 {
|
||||
if count < 4 {
|
||||
symbols[count] = length
|
||||
}
|
||||
|
||||
count++
|
||||
total -= uint(histogram[length])
|
||||
}
|
||||
|
||||
length++
|
||||
}
|
||||
|
||||
if count <= 1 {
|
||||
bw.writeBits(4, 1)
|
||||
bw.writeBits(max_bits, uint64(symbols[0]))
|
||||
depth[symbols[0]] = 0
|
||||
bits[symbols[0]] = 0
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < int(length); i++ {
|
||||
depth[i] = 0
|
||||
}
|
||||
{
|
||||
var max_tree_size uint = 2*length + 1
|
||||
tree, _ := huffmanTreePool.Get().(*[]huffmanTree)
|
||||
if tree == nil || cap(*tree) < int(max_tree_size) {
|
||||
tmp := make([]huffmanTree, max_tree_size)
|
||||
tree = &tmp
|
||||
} else {
|
||||
*tree = (*tree)[:max_tree_size]
|
||||
}
|
||||
var count_limit uint32
|
||||
for count_limit = 1; ; count_limit *= 2 {
|
||||
var node int = 0
|
||||
var l uint
|
||||
for l = length; l != 0; {
|
||||
l--
|
||||
if histogram[l] != 0 {
|
||||
if histogram[l] >= count_limit {
|
||||
initHuffmanTree(&(*tree)[node:][0], histogram[l], -1, int16(l))
|
||||
} else {
|
||||
initHuffmanTree(&(*tree)[node:][0], count_limit, -1, int16(l))
|
||||
}
|
||||
|
||||
node++
|
||||
}
|
||||
}
|
||||
{
|
||||
var n int = node
|
||||
/* Points to the next leaf node. */ /* Points to the next non-leaf node. */
|
||||
var sentinel huffmanTree
|
||||
var i int = 0
|
||||
var j int = n + 1
|
||||
var k int
|
||||
|
||||
sortHuffmanTreeItems(*tree, uint(n), huffmanTreeComparator(sortHuffmanTree1))
|
||||
|
||||
/* The nodes are:
|
||||
[0, n): the sorted leaf nodes that we start with.
|
||||
[n]: we add a sentinel here.
|
||||
[n + 1, 2n): new parent nodes are added here, starting from
|
||||
(n+1). These are naturally in ascending order.
|
||||
[2n]: we add a sentinel at the end as well.
|
||||
There will be (2n+1) elements at the end. */
|
||||
initHuffmanTree(&sentinel, math.MaxUint32, -1, -1)
|
||||
|
||||
(*tree)[node] = sentinel
|
||||
node++
|
||||
(*tree)[node] = sentinel
|
||||
node++
|
||||
|
||||
for k = n - 1; k > 0; k-- {
|
||||
var left int
|
||||
var right int
|
||||
if (*tree)[i].total_count_ <= (*tree)[j].total_count_ {
|
||||
left = i
|
||||
i++
|
||||
} else {
|
||||
left = j
|
||||
j++
|
||||
}
|
||||
|
||||
if (*tree)[i].total_count_ <= (*tree)[j].total_count_ {
|
||||
right = i
|
||||
i++
|
||||
} else {
|
||||
right = j
|
||||
j++
|
||||
}
|
||||
|
||||
/* The sentinel node becomes the parent node. */
|
||||
(*tree)[node-1].total_count_ = (*tree)[left].total_count_ + (*tree)[right].total_count_
|
||||
|
||||
(*tree)[node-1].index_left_ = int16(left)
|
||||
(*tree)[node-1].index_right_or_value_ = int16(right)
|
||||
|
||||
/* Add back the last sentinel node. */
|
||||
(*tree)[node] = sentinel
|
||||
node++
|
||||
}
|
||||
|
||||
if setDepth(2*n-1, *tree, depth, 14) {
|
||||
/* We need to pack the Huffman tree in 14 bits. If this was not
|
||||
successful, add fake entities to the lowest values and retry. */
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
huffmanTreePool.Put(tree)
|
||||
}
|
||||
|
||||
convertBitDepthsToSymbols(depth, length, bits)
|
||||
if count <= 4 {
|
||||
var i uint
|
||||
|
||||
/* value of 1 indicates a simple Huffman code */
|
||||
bw.writeBits(2, 1)
|
||||
|
||||
bw.writeBits(2, uint64(count)-1) /* NSYM - 1 */
|
||||
|
||||
/* Sort */
|
||||
for i = 0; i < count; i++ {
|
||||
var j uint
|
||||
for j = i + 1; j < count; j++ {
|
||||
if depth[symbols[j]] < depth[symbols[i]] {
|
||||
var tmp uint = symbols[j]
|
||||
symbols[j] = symbols[i]
|
||||
symbols[i] = tmp
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count == 2 {
|
||||
bw.writeBits(max_bits, uint64(symbols[0]))
|
||||
bw.writeBits(max_bits, uint64(symbols[1]))
|
||||
} else if count == 3 {
|
||||
bw.writeBits(max_bits, uint64(symbols[0]))
|
||||
bw.writeBits(max_bits, uint64(symbols[1]))
|
||||
bw.writeBits(max_bits, uint64(symbols[2]))
|
||||
} else {
|
||||
bw.writeBits(max_bits, uint64(symbols[0]))
|
||||
bw.writeBits(max_bits, uint64(symbols[1]))
|
||||
bw.writeBits(max_bits, uint64(symbols[2]))
|
||||
bw.writeBits(max_bits, uint64(symbols[3]))
|
||||
|
||||
/* tree-select */
|
||||
bw.writeSingleBit(depth[symbols[0]] == 1)
|
||||
}
|
||||
} else {
|
||||
var previous_value byte = 8
|
||||
var i uint
|
||||
|
||||
/* Complex Huffman Tree */
|
||||
storeStaticCodeLengthCodeBW(bw)
|
||||
|
||||
/* Actual RLE coding. */
|
||||
for i = 0; i < length; {
|
||||
var value byte = depth[i]
|
||||
var reps uint = 1
|
||||
var k uint
|
||||
for k = i + 1; k < length && depth[k] == value; k++ {
|
||||
reps++
|
||||
}
|
||||
|
||||
i += reps
|
||||
if value == 0 {
|
||||
bw.writeBits(uint(kZeroRepsDepth[reps]), kZeroRepsBits[reps])
|
||||
} else {
|
||||
if previous_value != value {
|
||||
bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value]))
|
||||
reps--
|
||||
}
|
||||
|
||||
if reps < 3 {
|
||||
for reps != 0 {
|
||||
reps--
|
||||
bw.writeBits(uint(kCodeLengthDepth[value]), uint64(kCodeLengthBits[value]))
|
||||
}
|
||||
} else {
|
||||
reps -= 3
|
||||
bw.writeBits(uint(kNonZeroRepsDepth[reps]), kNonZeroRepsBits[reps])
|
||||
}
|
||||
|
||||
previous_value = value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func indexOf(v []byte, v_size uint, value byte) uint {
|
||||
var i uint = 0
|
||||
for ; i < v_size; i++ {
|
||||
|
@ -674,12 +895,15 @@ func moveToFrontTransform(v_in []uint32, v_size uint, v_out []uint32) {
|
|||
}
|
||||
}
|
||||
|
||||
/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
|
||||
the run length plus extra bits (lower 9 bits is the prefix code and the rest
|
||||
are the extra bits). Non-zero values in v[] are shifted by
|
||||
*max_length_prefix. Will not create prefix codes bigger than the initial
|
||||
value of *max_run_length_prefix. The prefix code of run length L is simply
|
||||
Log2Floor(L) and the number of extra bits is the same as the prefix code. */
|
||||
/*
|
||||
Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
|
||||
|
||||
the run length plus extra bits (lower 9 bits is the prefix code and the rest
|
||||
are the extra bits). Non-zero values in v[] are shifted by
|
||||
*max_length_prefix. Will not create prefix codes bigger than the initial
|
||||
value of *max_run_length_prefix. The prefix code of run length L is simply
|
||||
Log2Floor(L) and the number of extra bits is the same as the prefix code.
|
||||
*/
|
||||
func runLengthCodeZeros(in_size uint, v []uint32, out_size *uint, max_run_length_prefix *uint32) {
|
||||
var max_reps uint32 = 0
|
||||
var i uint
|
||||
|
@ -799,8 +1023,11 @@ func storeBlockSwitch(code *blockSplitCode, block_len uint32, block_type byte, i
|
|||
writeBits(uint(len_nextra), uint64(len_extra), storage_ix, storage)
|
||||
}
|
||||
|
||||
/* Builds a BlockSplitCode data structure from the block split given by the
|
||||
vector of block types and block lengths and stores it to the bit stream. */
|
||||
/*
|
||||
Builds a BlockSplitCode data structure from the block split given by the
|
||||
|
||||
vector of block types and block lengths and stores it to the bit stream.
|
||||
*/
|
||||
func buildAndStoreBlockSplitCode(types []byte, lengths []uint32, num_blocks uint, num_types uint, tree []huffmanTree, code *blockSplitCode, storage_ix *uint, storage []byte) {
|
||||
var type_histo [maxBlockTypeSymbols]uint32
|
||||
var length_histo [numBlockLenSymbols]uint32
|
||||
|
@ -919,14 +1146,20 @@ func cleanupBlockEncoder(self *blockEncoder) {
|
|||
blockEncoderPool.Put(self)
|
||||
}
|
||||
|
||||
/* Creates entropy codes of block lengths and block types and stores them
|
||||
to the bit stream. */
|
||||
/*
|
||||
Creates entropy codes of block lengths and block types and stores them
|
||||
|
||||
to the bit stream.
|
||||
*/
|
||||
func buildAndStoreBlockSwitchEntropyCodes(self *blockEncoder, tree []huffmanTree, storage_ix *uint, storage []byte) {
|
||||
buildAndStoreBlockSplitCode(self.block_types_, self.block_lengths_, self.num_blocks_, self.num_block_types_, tree, &self.block_split_code_, storage_ix, storage)
|
||||
}
|
||||
|
||||
/* Stores the next symbol with the entropy code of the current block type.
|
||||
Updates the block type and block length at block boundaries. */
|
||||
/*
|
||||
Stores the next symbol with the entropy code of the current block type.
|
||||
|
||||
Updates the block type and block length at block boundaries.
|
||||
*/
|
||||
func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []byte) {
|
||||
if self.block_len_ == 0 {
|
||||
self.block_ix_++
|
||||
|
@ -945,9 +1178,12 @@ func storeSymbol(self *blockEncoder, symbol uint, storage_ix *uint, storage []by
|
|||
}
|
||||
}
|
||||
|
||||
/* Stores the next symbol with the entropy code of the current block type and
|
||||
context value.
|
||||
Updates the block type and block length at block boundaries. */
|
||||
/*
|
||||
Stores the next symbol with the entropy code of the current block type and
|
||||
|
||||
context value.
|
||||
Updates the block type and block length at block boundaries.
|
||||
*/
|
||||
func storeSymbolWithContext(self *blockEncoder, symbol uint, context uint, context_map []uint32, storage_ix *uint, storage []byte, context_bits uint) {
|
||||
if self.block_len_ == 0 {
|
||||
self.block_ix_++
|
||||
|
@ -1268,8 +1504,11 @@ func storeMetaBlockFast(input []byte, start_pos uint, length uint, mask uint, is
|
|||
}
|
||||
}
|
||||
|
||||
/* This is for storing uncompressed blocks (simple raw storage of
|
||||
bytes-as-bytes). */
|
||||
/*
|
||||
This is for storing uncompressed blocks (simple raw storage of
|
||||
|
||||
bytes-as-bytes).
|
||||
*/
|
||||
func storeUncompressedMetaBlock(is_final_block bool, input []byte, position uint, mask uint, len uint, storage_ix *uint, storage []byte) {
|
||||
var masked_pos uint = position & mask
|
||||
storeUncompressedMetaBlockHeader(uint(len), storage_ix, storage)
|
||||
|
|
|
@ -16,6 +16,8 @@ import (
|
|||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/andybalholm/brotli/matchfinder"
|
||||
)
|
||||
|
||||
func checkCompressedData(compressedData, wantOriginalData []byte) error {
|
||||
|
@ -595,3 +597,63 @@ func BenchmarkDecodeLevels(b *testing.B) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func test(t *testing.T, filename string, m matchfinder.MatchFinder, blockSize int) {
|
||||
data, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
b := new(bytes.Buffer)
|
||||
w := &matchfinder.Writer{
|
||||
Dest: b,
|
||||
MatchFinder: m,
|
||||
Encoder: &Encoder{},
|
||||
BlockSize: blockSize,
|
||||
}
|
||||
w.Write(data)
|
||||
w.Close()
|
||||
compressed := b.Bytes()
|
||||
sr := NewReader(bytes.NewReader(compressed))
|
||||
decompressed, err := ioutil.ReadAll(sr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !bytes.Equal(decompressed, data) {
|
||||
t.Fatal("decompressed output doesn't match")
|
||||
}
|
||||
}
|
||||
|
||||
func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSize int) {
|
||||
b.StopTimer()
|
||||
b.ReportAllocs()
|
||||
data, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
b.SetBytes(int64(len(data)))
|
||||
buf := new(bytes.Buffer)
|
||||
w := &matchfinder.Writer{
|
||||
Dest: buf,
|
||||
MatchFinder: m,
|
||||
Encoder: &Encoder{},
|
||||
BlockSize: blockSize,
|
||||
}
|
||||
w.Write(data)
|
||||
w.Close()
|
||||
b.ReportMetric(float64(len(data))/float64(buf.Len()), "ratio")
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
w.Reset(ioutil.Discard)
|
||||
w.Write(data)
|
||||
w.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeM4(t *testing.T) {
|
||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18}, 1<<16)
|
||||
}
|
||||
|
||||
func BenchmarkEncodeM4(b *testing.B) {
|
||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
|
||||
}
|
||||
|
|
|
@ -39,8 +39,11 @@ func isMatch1(p1 []byte, p2 []byte, length uint) bool {
|
|||
return p1[4] == p2[4] && p1[5] == p2[5]
|
||||
}
|
||||
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
/*
|
||||
Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
|
||||
"bits" based on "histogram" and stores it into the bit stream.
|
||||
*/
|
||||
func buildAndStoreCommandPrefixCode(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
|
||||
var tree [129]huffmanTree
|
||||
var cmd_depth = [numCommandSymbols]byte{0}
|
||||
|
@ -216,6 +219,25 @@ func storeMetaBlockHeader(len uint, is_uncompressed bool, storage_ix *uint, stor
|
|||
writeSingleBit(is_uncompressed, storage_ix, storage)
|
||||
}
|
||||
|
||||
func storeMetaBlockHeaderBW(len uint, is_uncompressed bool, bw *bitWriter) {
|
||||
var nibbles uint = 6
|
||||
|
||||
/* ISLAST */
|
||||
bw.writeBits(1, 0)
|
||||
|
||||
if len <= 1<<16 {
|
||||
nibbles = 4
|
||||
} else if len <= 1<<20 {
|
||||
nibbles = 5
|
||||
}
|
||||
|
||||
bw.writeBits(2, uint64(nibbles)-4)
|
||||
bw.writeBits(nibbles*4, uint64(len)-1)
|
||||
|
||||
/* ISUNCOMPRESSED */
|
||||
bw.writeSingleBit(is_uncompressed)
|
||||
}
|
||||
|
||||
func createCommands(input []byte, block_size uint, input_size uint, base_ip_ptr []byte, table []int, table_bits uint, min_match uint, literals *[]byte, commands *[]uint32) {
|
||||
var ip int = 0
|
||||
var shift uint = 64 - table_bits
|
||||
|
@ -710,19 +732,22 @@ func compressFragmentTwoPassImpl(input []byte, input_size uint, is_last bool, co
|
|||
}
|
||||
}
|
||||
|
||||
/* Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
/*
|
||||
Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
|
||||
REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
kCompressFragmentTwoPassBlockSize long arrays.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two
|
||||
OUTPUT: maximal copy distance <= |input_size|
|
||||
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
|
||||
REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
kCompressFragmentTwoPassBlockSize long arrays.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two
|
||||
OUTPUT: maximal copy distance <= |input_size|
|
||||
OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18)
|
||||
*/
|
||||
func compressFragmentTwoPass(input []byte, input_size uint, is_last bool, command_buf []uint32, literal_buf []byte, table []int, table_size uint, storage_ix *uint, storage []byte) {
|
||||
var initial_storage_ix uint = *storage_ix
|
||||
var table_bits uint = uint(log2FloorNonZero(table_size))
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
package brotli
|
||||
|
||||
import "github.com/andybalholm/brotli/matchfinder"
|
||||
|
||||
// An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
|
||||
type Encoder struct {
|
||||
wroteHeader bool
|
||||
bw bitWriter
|
||||
distCache []distanceCode
|
||||
}
|
||||
|
||||
func (e *Encoder) Reset() {
|
||||
e.wroteHeader = false
|
||||
e.bw = bitWriter{}
|
||||
}
|
||||
|
||||
func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
|
||||
e.bw.dst = dst
|
||||
if !e.wroteHeader {
|
||||
e.bw.writeBits(4, 15)
|
||||
e.wroteHeader = true
|
||||
}
|
||||
|
||||
var literalHisto [256]uint32
|
||||
var commandHisto [704]uint32
|
||||
var distanceHisto [64]uint32
|
||||
literalCount := 0
|
||||
commandCount := 0
|
||||
distanceCount := 0
|
||||
|
||||
if len(e.distCache) < len(matches) {
|
||||
e.distCache = make([]distanceCode, len(matches))
|
||||
}
|
||||
|
||||
// first pass: build the histograms
|
||||
pos := 0
|
||||
|
||||
// d is the ring buffer of the last 4 distances.
|
||||
d := [4]int{-10, -10, -10, -10}
|
||||
for i, m := range matches {
|
||||
if m.Unmatched > 0 {
|
||||
for _, c := range src[pos : pos+m.Unmatched] {
|
||||
literalHisto[c]++
|
||||
}
|
||||
literalCount += m.Unmatched
|
||||
}
|
||||
|
||||
insertCode := getInsertLengthCode(uint(m.Unmatched))
|
||||
copyCode := getCopyLengthCode(uint(m.Length))
|
||||
if m.Length == 0 {
|
||||
// If the stream ends with unmatched bytes, we need a dummy copy length.
|
||||
copyCode = 2
|
||||
}
|
||||
command := combineLengthCodes(insertCode, copyCode, false)
|
||||
commandHisto[command]++
|
||||
commandCount++
|
||||
|
||||
if command >= 128 && m.Length != 0 {
|
||||
var distCode distanceCode
|
||||
switch m.Distance {
|
||||
case d[3]:
|
||||
distCode.code = 0
|
||||
case d[2]:
|
||||
distCode.code = 1
|
||||
case d[1]:
|
||||
distCode.code = 2
|
||||
case d[0]:
|
||||
distCode.code = 3
|
||||
case d[3] - 1:
|
||||
distCode.code = 4
|
||||
case d[3] + 1:
|
||||
distCode.code = 5
|
||||
case d[3] - 2:
|
||||
distCode.code = 6
|
||||
case d[3] + 2:
|
||||
distCode.code = 7
|
||||
case d[3] - 3:
|
||||
distCode.code = 8
|
||||
case d[3] + 3:
|
||||
distCode.code = 9
|
||||
|
||||
// In my testing, codes 10–15 actually reduced the compression ratio.
|
||||
|
||||
default:
|
||||
distCode = getDistanceCode(m.Distance)
|
||||
}
|
||||
e.distCache[i] = distCode
|
||||
distanceHisto[distCode.code]++
|
||||
distanceCount++
|
||||
if distCode.code != 0 {
|
||||
d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
|
||||
}
|
||||
}
|
||||
|
||||
pos += m.Unmatched + m.Length
|
||||
}
|
||||
|
||||
storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
|
||||
e.bw.writeBits(13, 0)
|
||||
|
||||
var literalDepths [256]byte
|
||||
var literalBits [256]uint16
|
||||
buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
|
||||
|
||||
var commandDepths [704]byte
|
||||
var commandBits [704]uint16
|
||||
buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
|
||||
|
||||
var distanceDepths [64]byte
|
||||
var distanceBits [64]uint16
|
||||
buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
|
||||
|
||||
pos = 0
|
||||
for i, m := range matches {
|
||||
insertCode := getInsertLengthCode(uint(m.Unmatched))
|
||||
copyCode := getCopyLengthCode(uint(m.Length))
|
||||
if m.Length == 0 {
|
||||
// If the stream ends with unmatched bytes, we need a dummy copy length.
|
||||
copyCode = 2
|
||||
}
|
||||
command := combineLengthCodes(insertCode, copyCode, false)
|
||||
e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
|
||||
if kInsExtra[insertCode] > 0 {
|
||||
e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
|
||||
}
|
||||
if kCopyExtra[copyCode] > 0 {
|
||||
e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
|
||||
}
|
||||
|
||||
if m.Unmatched > 0 {
|
||||
for _, c := range src[pos : pos+m.Unmatched] {
|
||||
e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
|
||||
}
|
||||
}
|
||||
|
||||
if command >= 128 && m.Length != 0 {
|
||||
distCode := e.distCache[i]
|
||||
e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
|
||||
if distCode.nExtra > 0 {
|
||||
e.bw.writeBits(distCode.nExtra, distCode.extraBits)
|
||||
}
|
||||
}
|
||||
|
||||
pos += m.Unmatched + m.Length
|
||||
}
|
||||
|
||||
if lastBlock {
|
||||
e.bw.writeBits(2, 3) // islast + isempty
|
||||
e.bw.jumpToByteBoundary()
|
||||
}
|
||||
return e.bw.dst
|
||||
}
|
||||
|
||||
type distanceCode struct {
|
||||
code int
|
||||
nExtra uint
|
||||
extraBits uint64
|
||||
}
|
||||
|
||||
func getDistanceCode(distance int) distanceCode {
|
||||
d := distance + 3
|
||||
nbits := log2FloorNonZero(uint(d)) - 1
|
||||
prefix := (d >> nbits) & 1
|
||||
offset := (2 + prefix) << nbits
|
||||
distcode := int(2*(nbits-1)) + prefix + 16
|
||||
extra := d - offset
|
||||
return distanceCode{distcode, uint(nbits), uint64(extra)}
|
||||
}
|
|
@ -782,6 +782,11 @@ func storeStaticCodeLengthCode(storage_ix *uint, storage []byte) {
|
|||
writeBits(40, 0x0000FF55555554, storage_ix, storage)
|
||||
}
|
||||
|
||||
func storeStaticCodeLengthCodeBW(bw *bitWriter) {
|
||||
bw.writeBits(32, 0x55555554)
|
||||
bw.writeBits(8, 0xFF)
|
||||
}
|
||||
|
||||
var kZeroRepsBits = [numCommandSymbols]uint64{
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
|
|
2
go.mod
2
go.mod
|
@ -1,5 +1,5 @@
|
|||
module github.com/andybalholm/brotli
|
||||
|
||||
go 1.12
|
||||
go 1.13
|
||||
|
||||
retract v1.0.1 // occasional panics and data corruption
|
||||
|
|
|
@ -0,0 +1,270 @@
|
|||
package matchfinder
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
const (
|
||||
ssapBits = 17
|
||||
ssapMask = (1 << ssapBits) - 1
|
||||
)
|
||||
|
||||
// M4 is an implementation of the MatchFinder
|
||||
// interface that uses a simple hash table to find matches,
|
||||
// but the advanced parsing technique from
|
||||
// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html,
|
||||
// except that it looks for matches at every input position.
|
||||
type M4 struct {
|
||||
// MaxDistance is the maximum distance (in bytes) to look back for
|
||||
// a match. The default is 65535.
|
||||
MaxDistance int
|
||||
|
||||
// MinLength is the length of the shortest match to return.
|
||||
// The default is 4.
|
||||
MinLength int
|
||||
|
||||
// HashLen is the number of bytes to use to calculate the hashes.
|
||||
// The maximum is 8 and the default is 6.
|
||||
HashLen int
|
||||
|
||||
table [1 << ssapBits]uint32
|
||||
|
||||
history []byte
|
||||
}
|
||||
|
||||
func (q *M4) Reset() {
|
||||
q.table = [1 << ssapBits]uint32{}
|
||||
q.history = q.history[:0]
|
||||
}
|
||||
|
||||
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||
if q.MaxDistance == 0 {
|
||||
q.MaxDistance = 65535
|
||||
}
|
||||
if q.MinLength == 0 {
|
||||
q.MinLength = 4
|
||||
}
|
||||
if q.HashLen == 0 {
|
||||
q.HashLen = 6
|
||||
}
|
||||
var nextEmit int
|
||||
|
||||
if len(q.history) > q.MaxDistance*2 {
|
||||
// Trim down the history buffer.
|
||||
delta := len(q.history) - q.MaxDistance
|
||||
copy(q.history, q.history[delta:])
|
||||
q.history = q.history[:q.MaxDistance]
|
||||
|
||||
for i, v := range q.table {
|
||||
newV := int(v) - delta
|
||||
if newV < 0 {
|
||||
newV = 0
|
||||
}
|
||||
q.table[i] = uint32(newV)
|
||||
}
|
||||
}
|
||||
|
||||
// Append src to the history buffer.
|
||||
nextEmit = len(q.history)
|
||||
q.history = append(q.history, src...)
|
||||
src = q.history
|
||||
|
||||
// matches stores the matches that have been found but not emitted,
|
||||
// in reverse order. (matches[0] is the most recent one.)
|
||||
var matches [3]absoluteMatch
|
||||
for i := nextEmit; i < len(src)-7; i++ {
|
||||
if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
|
||||
// We have found some matches, and we're far enough along that we probably
|
||||
// won't find overlapping matches, so we might as well emit them.
|
||||
if matches[1] != (absoluteMatch{}) {
|
||||
if matches[1].End > matches[0].Start {
|
||||
matches[1].End = matches[0].Start
|
||||
}
|
||||
if matches[1].End-matches[1].Start >= q.MinLength {
|
||||
dst = append(dst, Match{
|
||||
Unmatched: matches[1].Start - nextEmit,
|
||||
Length: matches[1].End - matches[1].Start,
|
||||
Distance: matches[1].Start - matches[1].Match,
|
||||
})
|
||||
nextEmit = matches[1].End
|
||||
}
|
||||
}
|
||||
dst = append(dst, Match{
|
||||
Unmatched: matches[0].Start - nextEmit,
|
||||
Length: matches[0].End - matches[0].Start,
|
||||
Distance: matches[0].Start - matches[0].Match,
|
||||
})
|
||||
nextEmit = matches[0].End
|
||||
matches = [3]absoluteMatch{}
|
||||
}
|
||||
|
||||
// Now look for a match.
|
||||
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - ssapBits)
|
||||
candidate := int(q.table[h&ssapMask])
|
||||
q.table[h&ssapMask] = uint32(i)
|
||||
|
||||
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
|
||||
continue
|
||||
}
|
||||
if binary.LittleEndian.Uint32(src[candidate:]) != binary.LittleEndian.Uint32(src[i:]) {
|
||||
continue
|
||||
}
|
||||
|
||||
// We have a 4-byte match now.
|
||||
|
||||
start := i
|
||||
match := candidate
|
||||
end := extendMatch(src, match+4, start+4)
|
||||
for start > nextEmit && match > 0 && src[start-1] == src[match-1] {
|
||||
start--
|
||||
match--
|
||||
}
|
||||
if end-start <= matches[0].End-matches[0].Start {
|
||||
continue
|
||||
}
|
||||
|
||||
matches = [3]absoluteMatch{
|
||||
absoluteMatch{
|
||||
Start: start,
|
||||
End: end,
|
||||
Match: match,
|
||||
},
|
||||
matches[0],
|
||||
matches[1],
|
||||
}
|
||||
|
||||
if matches[2] == (absoluteMatch{}) {
|
||||
continue
|
||||
}
|
||||
|
||||
// We have three matches, so it's time to emit one and/or eliminate one.
|
||||
switch {
|
||||
case matches[0].Start < matches[2].End:
|
||||
// The first and third matches overlap; discard the one in between.
|
||||
matches = [3]absoluteMatch{
|
||||
matches[0],
|
||||
matches[2],
|
||||
absoluteMatch{},
|
||||
}
|
||||
|
||||
case matches[0].Start < matches[2].End+q.MinLength:
|
||||
// The first and third matches don't overlap, but there's no room for
|
||||
// another match between them. Emit the first match and discard the second.
|
||||
dst = append(dst, Match{
|
||||
Unmatched: matches[2].Start - nextEmit,
|
||||
Length: matches[2].End - matches[2].Start,
|
||||
Distance: matches[2].Start - matches[2].Match,
|
||||
})
|
||||
nextEmit = matches[2].End
|
||||
matches = [3]absoluteMatch{
|
||||
matches[0],
|
||||
absoluteMatch{},
|
||||
absoluteMatch{},
|
||||
}
|
||||
|
||||
default:
|
||||
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
||||
if matches[2].End > matches[1].Start {
|
||||
matches[2].End = matches[1].Start
|
||||
}
|
||||
if matches[2].End-matches[2].Start >= q.MinLength {
|
||||
dst = append(dst, Match{
|
||||
Unmatched: matches[2].Start - nextEmit,
|
||||
Length: matches[2].End - matches[2].Start,
|
||||
Distance: matches[2].Start - matches[2].Match,
|
||||
})
|
||||
nextEmit = matches[2].End
|
||||
}
|
||||
matches[2] = absoluteMatch{}
|
||||
}
|
||||
}
|
||||
|
||||
// We've found all the matches now; emit the remaining ones.
|
||||
if matches[1] != (absoluteMatch{}) {
|
||||
if matches[1].End > matches[0].Start {
|
||||
matches[1].End = matches[0].Start
|
||||
}
|
||||
if matches[1].End-matches[1].Start >= q.MinLength {
|
||||
dst = append(dst, Match{
|
||||
Unmatched: matches[1].Start - nextEmit,
|
||||
Length: matches[1].End - matches[1].Start,
|
||||
Distance: matches[1].Start - matches[1].Match,
|
||||
})
|
||||
nextEmit = matches[1].End
|
||||
}
|
||||
}
|
||||
if matches[0] != (absoluteMatch{}) {
|
||||
dst = append(dst, Match{
|
||||
Unmatched: matches[0].Start - nextEmit,
|
||||
Length: matches[0].End - matches[0].Start,
|
||||
Distance: matches[0].Start - matches[0].Match,
|
||||
})
|
||||
nextEmit = matches[0].End
|
||||
}
|
||||
|
||||
if nextEmit < len(src) {
|
||||
dst = append(dst, Match{
|
||||
Unmatched: len(src) - nextEmit,
|
||||
})
|
||||
}
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
const hashMul64 = 0x1E35A7BD1E35A7BD
|
||||
|
||||
// An absoluteMatch is like a Match, but it stores indexes into the byte
|
||||
// stream instead of lengths.
|
||||
type absoluteMatch struct {
|
||||
// Start is the index of the first byte.
|
||||
Start int
|
||||
|
||||
// End is the index of the byte after the last byte
|
||||
// (so that End - Start = Length).
|
||||
End int
|
||||
|
||||
// Match is the index of the previous data that matches
|
||||
// (Start - Match = Distance).
|
||||
Match int
|
||||
}
|
||||
|
||||
// extendMatch returns the largest k such that k <= len(src) and that
|
||||
// src[i:i+k-j] and src[j:k] have the same contents.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// 0 <= i && i < j && j <= len(src)
|
||||
func extendMatch(src []byte, i, j int) int {
|
||||
switch runtime.GOARCH {
|
||||
case "amd64":
|
||||
// As long as we are 8 or more bytes before the end of src, we can load and
|
||||
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
|
||||
for j+8 < len(src) {
|
||||
iBytes := binary.LittleEndian.Uint64(src[i:])
|
||||
jBytes := binary.LittleEndian.Uint64(src[j:])
|
||||
if iBytes != jBytes {
|
||||
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
|
||||
// the index of the first byte that differs. The BSF instruction finds the
|
||||
// least significant 1 bit, the amd64 architecture is little-endian, and
|
||||
// the shift by 3 converts a bit index to a byte index.
|
||||
return j + bits.TrailingZeros64(iBytes^jBytes)>>3
|
||||
}
|
||||
i, j = i+8, j+8
|
||||
}
|
||||
case "386":
|
||||
// On a 32-bit CPU, we do it 4 bytes at a time.
|
||||
for j+4 < len(src) {
|
||||
iBytes := binary.LittleEndian.Uint32(src[i:])
|
||||
jBytes := binary.LittleEndian.Uint32(src[j:])
|
||||
if iBytes != jBytes {
|
||||
return j + bits.TrailingZeros32(iBytes^jBytes)>>3
|
||||
}
|
||||
i, j = i+4, j+4
|
||||
}
|
||||
}
|
||||
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
|
||||
}
|
||||
return j
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
// The matchfinder package defines reusable components for data compression.
|
||||
//
|
||||
// Many compression libraries have two main parts:
|
||||
// - Something that looks for repeated sequences of bytes
|
||||
// - An encoder for the compressed data format (often an entropy coder)
|
||||
//
|
||||
// Although these are logically two separate steps, the implementations are
|
||||
// usually closely tied together. You can't use flate's matcher with snappy's
|
||||
// encoder, for example. This package defines interfaces and an intermediate
|
||||
// representation to allow mixing and matching compression components.
|
||||
package matchfinder
|
||||
|
||||
import "io"
|
||||
|
||||
// A Match is the basic unit of LZ77 compression.
|
||||
type Match struct {
|
||||
Unmatched int // the number of unmatched bytes since the previous match
|
||||
Length int // the number of bytes in the matched string; it may be 0 at the end of the input
|
||||
Distance int // how far back in the stream to copy from
|
||||
}
|
||||
|
||||
// A MatchFinder performs the LZ77 stage of compression, looking for matches.
|
||||
type MatchFinder interface {
|
||||
// FindMatches looks for matches in src, appends them to dst, and returns dst.
|
||||
FindMatches(dst []Match, src []byte) []Match
|
||||
|
||||
// Reset clears any internal state, preparing the MatchFinder to be used with
|
||||
// a new stream.
|
||||
Reset()
|
||||
}
|
||||
|
||||
// An Encoder encodes the data in its final format.
|
||||
type Encoder interface {
|
||||
// Encode appends the encoded format of src to dst, using the match
|
||||
// information from matches.
|
||||
Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte
|
||||
|
||||
// Reset clears any internal state, preparing the Encoder to be used with
|
||||
// a new stream.
|
||||
Reset()
|
||||
}
|
||||
|
||||
// A Writer uses MatchFinder and Encoder to write compressed data to Dest.
|
||||
type Writer struct {
|
||||
Dest io.Writer
|
||||
MatchFinder MatchFinder
|
||||
Encoder Encoder
|
||||
|
||||
// BlockSize is the number of bytes to compress at a time. If it is zero,
|
||||
// each Write operation will be treated as one block.
|
||||
BlockSize int
|
||||
|
||||
err error
|
||||
inBuf []byte
|
||||
outBuf []byte
|
||||
matches []Match
|
||||
}
|
||||
|
||||
func (w *Writer) Write(p []byte) (n int, err error) {
|
||||
if w.err != nil {
|
||||
return 0, w.err
|
||||
}
|
||||
|
||||
if w.BlockSize == 0 {
|
||||
return w.writeBlock(p, false)
|
||||
}
|
||||
|
||||
w.inBuf = append(w.inBuf, p...)
|
||||
var pos int
|
||||
for pos = 0; pos+w.BlockSize <= len(w.inBuf) && w.err == nil; pos += w.BlockSize {
|
||||
w.writeBlock(w.inBuf[pos:pos+w.BlockSize], false)
|
||||
}
|
||||
if pos > 0 {
|
||||
n := copy(w.inBuf, w.inBuf[pos:])
|
||||
w.inBuf = w.inBuf[:n]
|
||||
}
|
||||
|
||||
return len(p), w.err
|
||||
}
|
||||
|
||||
func (w *Writer) writeBlock(p []byte, lastBlock bool) (n int, err error) {
|
||||
w.outBuf = w.outBuf[:0]
|
||||
w.matches = w.MatchFinder.FindMatches(w.matches[:0], p)
|
||||
w.outBuf = w.Encoder.Encode(w.outBuf, p, w.matches, lastBlock)
|
||||
_, w.err = w.Dest.Write(w.outBuf)
|
||||
return len(p), w.err
|
||||
}
|
||||
|
||||
func (w *Writer) Close() error {
|
||||
w.writeBlock(w.inBuf, true)
|
||||
w.inBuf = w.inBuf[:0]
|
||||
return w.err
|
||||
}
|
||||
|
||||
func (w *Writer) Reset(newDest io.Writer) {
|
||||
w.MatchFinder.Reset()
|
||||
w.Encoder.Reset()
|
||||
w.err = nil
|
||||
w.inBuf = w.inBuf[:0]
|
||||
w.outBuf = w.outBuf[:0]
|
||||
w.matches = w.matches[:0]
|
||||
w.Dest = newDest
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package matchfinder
|
||||
|
||||
import "fmt"
|
||||
|
||||
// A TextEncoder is an Encoder that produces a human-readable representation of
|
||||
// the LZ77 compression. Matches are replaced with <Length,Distance> symbols.
|
||||
type TextEncoder struct{}
|
||||
|
||||
func (t TextEncoder) Reset() {}
|
||||
|
||||
func (t TextEncoder) Encode(dst []byte, src []byte, matches []Match, lastBlock bool) []byte {
|
||||
pos := 0
|
||||
for _, m := range matches {
|
||||
if m.Unmatched > 0 {
|
||||
dst = append(dst, src[pos:pos+m.Unmatched]...)
|
||||
pos += m.Unmatched
|
||||
}
|
||||
if m.Length > 0 {
|
||||
dst = append(dst, []byte(fmt.Sprintf("<%d,%d>", m.Length, m.Distance))...)
|
||||
pos += m.Length
|
||||
}
|
||||
}
|
||||
if pos < len(src) {
|
||||
dst = append(dst, src[pos:]...)
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// A NoMatchFinder implements MatchFinder, but doesn't find any matches.
|
||||
// It can be used to implement the equivalent of the standard library flate package's
|
||||
// HuffmanOnly setting.
|
||||
type NoMatchFinder struct{}
|
||||
|
||||
func (n NoMatchFinder) Reset() {}
|
||||
|
||||
func (n NoMatchFinder) FindMatches(dst []Match, src []byte) []Match {
|
||||
return append(dst, Match{
|
||||
Unmatched: len(src),
|
||||
})
|
||||
}
|
||||
|
||||
// AutoReset wraps a MatchFinder that can return references to data in previous
|
||||
// blocks, and calls Reset before each block. It is useful for (e.g.) using a
|
||||
// snappy Encoder with a MatchFinder designed for flate. (Snappy doesn't
|
||||
// support references between blocks.)
|
||||
type AutoReset struct {
|
||||
MatchFinder
|
||||
}
|
||||
|
||||
func (a AutoReset) FindMatches(dst []Match, src []byte) []Match {
|
||||
a.Reset()
|
||||
return a.MatchFinder.FindMatches(dst, src)
|
||||
}
|
Loading…
Reference in New Issue