matchfinder.M4: add LimitedSearch option
Using LimitedSearch, it only checks for overlapping matches in one place instead of checking at each byte. This gains about 50% in compression speed while only losing about 2% in compression ratio.
This commit is contained in:
parent
924a0eb0c6
commit
63f3f4372d
|
@ -657,3 +657,11 @@ func TestEncodeM4(t *testing.T) {
|
||||||
func BenchmarkEncodeM4(b *testing.B) {
|
func BenchmarkEncodeM4(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEncodeM4Limited(t *testing.T) {
|
||||||
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, LimitedSearch: true}, 1<<16)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkEncodeM4Limited(b *testing.B) {
|
||||||
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, LimitedSearch: true}, 1<<16)
|
||||||
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ import (
|
||||||
// interface that uses a simple hash table to find matches,
|
// interface that uses a simple hash table to find matches,
|
||||||
// but the advanced parsing technique from
|
// but the advanced parsing technique from
|
||||||
// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html,
|
// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html,
|
||||||
// except that it looks for matches at every input position.
|
// except that it normally looks for matches at every input position.
|
||||||
type M4 struct {
|
type M4 struct {
|
||||||
// MaxDistance is the maximum distance (in bytes) to look back for
|
// MaxDistance is the maximum distance (in bytes) to look back for
|
||||||
// a match. The default is 65535.
|
// a match. The default is 65535.
|
||||||
|
@ -28,6 +28,11 @@ type M4 struct {
|
||||||
// The default is 17 (128K entries).
|
// The default is 17 (128K entries).
|
||||||
TableBits int
|
TableBits int
|
||||||
|
|
||||||
|
// When LimitedSearch is true, it only looks for matches at certain
|
||||||
|
// points in the input rather than at every bite.
|
||||||
|
// (This makes compression faster, but hurts the compression ratio.)
|
||||||
|
LimitedSearch bool
|
||||||
|
|
||||||
table []uint32
|
table []uint32
|
||||||
|
|
||||||
history []byte
|
history []byte
|
||||||
|
@ -97,6 +102,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
candidate := int(q.table[h])
|
candidate := int(q.table[h])
|
||||||
q.table[h] = uint32(i)
|
q.table[h] = uint32(i)
|
||||||
|
|
||||||
|
if q.LimitedSearch && i < matches[0].End && i != matches[0].End+2-q.HashLen {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
|
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue