diff --git a/brotli_test.go b/brotli_test.go index 4dd8b54..4ad417b 100644 --- a/brotli_test.go +++ b/brotli_test.go @@ -657,3 +657,11 @@ func TestEncodeM4(t *testing.T) { func BenchmarkEncodeM4(b *testing.B) { benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16) } + +func TestEncodeM4Limited(t *testing.T) { + test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, LimitedSearch: true}, 1<<16) +} + +func BenchmarkEncodeM4Limited(b *testing.B) { + benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, LimitedSearch: true}, 1<<16) +} diff --git a/matchfinder/m4.go b/matchfinder/m4.go index fb5c83d..d16f892 100644 --- a/matchfinder/m4.go +++ b/matchfinder/m4.go @@ -10,7 +10,7 @@ import ( // interface that uses a simple hash table to find matches, // but the advanced parsing technique from // https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html, -// except that it looks for matches at every input position. +// except that it normally looks for matches at every input position. type M4 struct { // MaxDistance is the maximum distance (in bytes) to look back for // a match. The default is 65535. @@ -28,6 +28,11 @@ type M4 struct { // The default is 17 (128K entries). TableBits int + // When LimitedSearch is true, it only looks for matches at certain + // points in the input rather than at every bite. + // (This makes compression faster, but hurts the compression ratio.) + LimitedSearch bool + table []uint32 history []byte @@ -97,6 +102,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { candidate := int(q.table[h]) q.table[h] = uint32(i) + if q.LimitedSearch && i < matches[0].End && i != matches[0].End+2-q.HashLen { + continue + } + if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match { continue }