matchfinder: replace Score function with DistanceBitCost
This commit is contained in:
parent
578645e154
commit
a8d524a96d
|
@ -1,7 +1,5 @@
|
||||||
package brotli
|
package brotli
|
||||||
|
|
||||||
import "github.com/andybalholm/brotli/matchfinder"
|
|
||||||
|
|
||||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||||
|
|
||||||
Distributed under MIT license.
|
Distributed under MIT license.
|
||||||
|
@ -56,7 +54,3 @@ func (w *bitWriter) jumpToByteBoundary() {
|
||||||
w.bits = 0
|
w.bits = 0
|
||||||
w.dst = dst
|
w.dst = dst
|
||||||
}
|
}
|
||||||
|
|
||||||
func matchScore(m matchfinder.AbsoluteMatch) int {
|
|
||||||
return int(backwardReferenceScore(uint(m.End-m.Start), uint(m.Start-m.Match)))
|
|
||||||
}
|
|
||||||
|
|
|
@ -651,69 +651,69 @@ func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSi
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeM4(t *testing.T) {
|
func TestEncodeM4(t *testing.T) {
|
||||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, Score: matchScore}, 1<<16)
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4(b *testing.B) {
|
func BenchmarkEncodeM4(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeM4Chain1(t *testing.T) {
|
func TestEncodeM4Chain1(t *testing.T) {
|
||||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, Score: matchScore}, 1<<16)
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain1(b *testing.B) {
|
func BenchmarkEncodeM4Chain1(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain2(b *testing.B) {
|
func BenchmarkEncodeM4Chain2(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain4(b *testing.B) {
|
func BenchmarkEncodeM4Chain4(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain8(b *testing.B) {
|
func BenchmarkEncodeM4Chain8(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain16(b *testing.B) {
|
func BenchmarkEncodeM4Chain16(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain32(b *testing.B) {
|
func BenchmarkEncodeM4Chain32(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain64(b *testing.B) {
|
func BenchmarkEncodeM4Chain64(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain128(b *testing.B) {
|
func BenchmarkEncodeM4Chain128(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, Score: matchScore}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeMultiHash6(t *testing.T) {
|
func TestEncodeMultiHash6(t *testing.T) {
|
||||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 18, Score: matchScore, HashLengths: []int{6}}, 1<<16)
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 18, DistanceBitCost: 57, HashLengths: []int{6}}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeMultiHash6_8(t *testing.T) {
|
func TestEncodeMultiHash6_8(t *testing.T) {
|
||||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 18, Score: matchScore, HashLengths: []int{6, 8}}, 1<<16)
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 18, DistanceBitCost: 57, HashLengths: []int{6, 8}}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeMultiHash6(b *testing.B) {
|
func BenchmarkEncodeMultiHash6(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, Score: matchScore, HashLengths: []int{6}}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, DistanceBitCost: 57, HashLengths: []int{6}}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeMultiHash5_8(b *testing.B) {
|
func BenchmarkEncodeMultiHash5_8(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, Score: matchScore, HashLengths: []int{5, 8}}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, DistanceBitCost: 57, HashLengths: []int{5, 8}}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeMultiHash5_7_9(b *testing.B) {
|
func BenchmarkEncodeMultiHash5_7_9(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, Score: matchScore, HashLengths: []int{5, 7, 9}}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, DistanceBitCost: 57, HashLengths: []int{5, 7, 9}}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeMultiHash5_6_7_9(b *testing.B) {
|
func BenchmarkEncodeMultiHash5_6_7_9(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, Score: matchScore, HashLengths: []int{5, 6, 7, 9}}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.MultiHash{MaxDistance: 1 << 20, DistanceBitCost: 57, HashLengths: []int{5, 6, 7, 9}}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
package matchfinder
|
package matchfinder
|
||||||
|
|
||||||
// An AbsoluteMatch is like a Match, but it stores indexes into the byte
|
// An absoluteMatch is like a Match, but it stores indexes into the byte
|
||||||
// stream instead of lengths.
|
// stream instead of lengths.
|
||||||
type AbsoluteMatch struct {
|
type absoluteMatch struct {
|
||||||
// Start is the index of the first byte.
|
// Start is the index of the first byte.
|
||||||
Start int
|
Start int
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ type matchEmitter struct {
|
||||||
NextEmit int
|
NextEmit int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *matchEmitter) emit(m AbsoluteMatch) {
|
func (e *matchEmitter) emit(m absoluteMatch) {
|
||||||
e.Dst = append(e.Dst, Match{
|
e.Dst = append(e.Dst, Match{
|
||||||
Unmatched: m.Start - e.NextEmit,
|
Unmatched: m.Start - e.NextEmit,
|
||||||
Length: m.End - m.Start,
|
Length: m.End - m.Start,
|
||||||
|
@ -35,7 +35,7 @@ func (e *matchEmitter) emit(m AbsoluteMatch) {
|
||||||
|
|
||||||
// trim shortens m if it extends past maxEnd. Then if the length is at least
|
// trim shortens m if it extends past maxEnd. Then if the length is at least
|
||||||
// minLength, the match is emitted.
|
// minLength, the match is emitted.
|
||||||
func (e *matchEmitter) trim(m AbsoluteMatch, maxEnd int, minLength int) {
|
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
|
||||||
if m.End > maxEnd {
|
if m.End > maxEnd {
|
||||||
m.End = maxEnd
|
m.End = maxEnd
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,9 +32,14 @@ type M4 struct {
|
||||||
// locations with the same hash as the current location.
|
// locations with the same hash as the current location.
|
||||||
ChainLength int
|
ChainLength int
|
||||||
|
|
||||||
// Score is the rating function used to choose the best match.
|
// DistanceBitCost is used when comparing two matches to see
|
||||||
// The default is the length of the match.
|
// which is better. The comparison is primarily based on the length
|
||||||
Score func(AbsoluteMatch) int
|
// of the matches, but it can also take the distance into account,
|
||||||
|
// in terms of the number of bits needed to represent the distance.
|
||||||
|
// One byte of length is given a score of 256, so 32 (256/8) would
|
||||||
|
// be a reasonable first guess for the value of one bit.
|
||||||
|
// (The default is 0, which bases the comparison solely on length.)
|
||||||
|
DistanceBitCost int
|
||||||
|
|
||||||
table []uint32
|
table []uint32
|
||||||
chain []uint16
|
chain []uint16
|
||||||
|
@ -50,6 +55,10 @@ func (q *M4) Reset() {
|
||||||
q.chain = q.chain[:0]
|
q.chain = q.chain[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *M4) score(m absoluteMatch) int {
|
||||||
|
return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
|
||||||
|
}
|
||||||
|
|
||||||
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
if q.MaxDistance == 0 {
|
if q.MaxDistance == 0 {
|
||||||
q.MaxDistance = 65535
|
q.MaxDistance = 65535
|
||||||
|
@ -66,11 +75,6 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
if len(q.table) < 1<<q.TableBits {
|
if len(q.table) < 1<<q.TableBits {
|
||||||
q.table = make([]uint32, 1<<q.TableBits)
|
q.table = make([]uint32, 1<<q.TableBits)
|
||||||
}
|
}
|
||||||
if q.Score == nil {
|
|
||||||
q.Score = func(m AbsoluteMatch) int {
|
|
||||||
return m.End - m.Start
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
e := matchEmitter{Dst: dst}
|
e := matchEmitter{Dst: dst}
|
||||||
|
|
||||||
|
@ -102,16 +106,16 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
|
|
||||||
// matches stores the matches that have been found but not emitted,
|
// matches stores the matches that have been found but not emitted,
|
||||||
// in reverse order. (matches[0] is the most recent one.)
|
// in reverse order. (matches[0] is the most recent one.)
|
||||||
var matches [3]AbsoluteMatch
|
var matches [3]absoluteMatch
|
||||||
for i := e.NextEmit; i < len(src)-7; i++ {
|
for i := e.NextEmit; i < len(src)-7; i++ {
|
||||||
if matches[0] != (AbsoluteMatch{}) && i >= matches[0].End {
|
if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
|
||||||
// We have found some matches, and we're far enough along that we probably
|
// We have found some matches, and we're far enough along that we probably
|
||||||
// won't find overlapping matches, so we might as well emit them.
|
// won't find overlapping matches, so we might as well emit them.
|
||||||
if matches[1] != (AbsoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
e.trim(matches[1], matches[0].Start, q.MinLength)
|
e.trim(matches[1], matches[0].Start, q.MinLength)
|
||||||
}
|
}
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
matches = [3]AbsoluteMatch{}
|
matches = [3]absoluteMatch{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate and store the hash.
|
// Calculate and store the hash.
|
||||||
|
@ -133,7 +137,7 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for a match.
|
// Look for a match.
|
||||||
var currentMatch AbsoluteMatch
|
var currentMatch absoluteMatch
|
||||||
|
|
||||||
if i-candidate != matches[0].Start-matches[0].Match {
|
if i-candidate != matches[0].Start-matches[0].Match {
|
||||||
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
||||||
|
@ -156,24 +160,24 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
if i-candidate != matches[0].Start-matches[0].Match {
|
if i-candidate != matches[0].Start-matches[0].Match {
|
||||||
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
||||||
m := extendMatch2(src, i, candidate, e.NextEmit)
|
m := extendMatch2(src, i, candidate, e.NextEmit)
|
||||||
if m.End-m.Start > q.MinLength && q.Score(m) > q.Score(currentMatch) {
|
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
|
||||||
currentMatch = m
|
currentMatch = m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if q.Score(currentMatch) <= q.Score(matches[0]) {
|
if q.score(currentMatch) <= q.score(matches[0]) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
matches = [3]AbsoluteMatch{
|
matches = [3]absoluteMatch{
|
||||||
currentMatch,
|
currentMatch,
|
||||||
matches[0],
|
matches[0],
|
||||||
matches[1],
|
matches[1],
|
||||||
}
|
}
|
||||||
|
|
||||||
if matches[2] == (AbsoluteMatch{}) {
|
if matches[2] == (absoluteMatch{}) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,34 +185,34 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
switch {
|
switch {
|
||||||
case matches[0].Start < matches[2].End:
|
case matches[0].Start < matches[2].End:
|
||||||
// The first and third matches overlap; discard the one in between.
|
// The first and third matches overlap; discard the one in between.
|
||||||
matches = [3]AbsoluteMatch{
|
matches = [3]absoluteMatch{
|
||||||
matches[0],
|
matches[0],
|
||||||
matches[2],
|
matches[2],
|
||||||
AbsoluteMatch{},
|
absoluteMatch{},
|
||||||
}
|
}
|
||||||
|
|
||||||
case matches[0].Start < matches[2].End+q.MinLength:
|
case matches[0].Start < matches[2].End+q.MinLength:
|
||||||
// The first and third matches don't overlap, but there's no room for
|
// The first and third matches don't overlap, but there's no room for
|
||||||
// another match between them. Emit the first match and discard the second.
|
// another match between them. Emit the first match and discard the second.
|
||||||
e.emit(matches[2])
|
e.emit(matches[2])
|
||||||
matches = [3]AbsoluteMatch{
|
matches = [3]absoluteMatch{
|
||||||
matches[0],
|
matches[0],
|
||||||
AbsoluteMatch{},
|
absoluteMatch{},
|
||||||
AbsoluteMatch{},
|
absoluteMatch{},
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
||||||
e.trim(matches[2], matches[1].Start, q.MinLength)
|
e.trim(matches[2], matches[1].Start, q.MinLength)
|
||||||
matches[2] = AbsoluteMatch{}
|
matches[2] = absoluteMatch{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've found all the matches now; emit the remaining ones.
|
// We've found all the matches now; emit the remaining ones.
|
||||||
if matches[1] != (AbsoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
e.trim(matches[1], matches[0].Start, q.MinLength)
|
e.trim(matches[1], matches[0].Start, q.MinLength)
|
||||||
}
|
}
|
||||||
if matches[0] != (AbsoluteMatch{}) {
|
if matches[0] != (absoluteMatch{}) {
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,13 +269,13 @@ func extendMatch(src []byte, i, j int) int {
|
||||||
|
|
||||||
// Given a 4-byte match at src[start] and src[candidate], extendMatch2 extends it
|
// Given a 4-byte match at src[start] and src[candidate], extendMatch2 extends it
|
||||||
// upward as far as possible, and downward no farther than to min.
|
// upward as far as possible, and downward no farther than to min.
|
||||||
func extendMatch2(src []byte, start, candidate, min int) AbsoluteMatch {
|
func extendMatch2(src []byte, start, candidate, min int) absoluteMatch {
|
||||||
end := extendMatch(src, candidate+4, start+4)
|
end := extendMatch(src, candidate+4, start+4)
|
||||||
for start > min && candidate > 0 && src[start-1] == src[candidate-1] {
|
for start > min && candidate > 0 && src[start-1] == src[candidate-1] {
|
||||||
start--
|
start--
|
||||||
candidate--
|
candidate--
|
||||||
}
|
}
|
||||||
return AbsoluteMatch{
|
return absoluteMatch{
|
||||||
Start: start,
|
Start: start,
|
||||||
End: end,
|
End: end,
|
||||||
Match: candidate,
|
Match: candidate,
|
||||||
|
|
|
@ -2,6 +2,7 @@ package matchfinder
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"math/bits"
|
||||||
"sort"
|
"sort"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -26,9 +27,14 @@ type MultiHash struct {
|
||||||
// The default is 17 (128K entries).
|
// The default is 17 (128K entries).
|
||||||
TableBits int
|
TableBits int
|
||||||
|
|
||||||
// Score is the rating function used to choose the best match.
|
// DistanceBitCost is used when comparing two matches to see
|
||||||
// The default is the length of the match.
|
// which is better. The comparison is primarily based on the length
|
||||||
Score func(AbsoluteMatch) int
|
// of the matches, but it can also take the distance into account,
|
||||||
|
// in terms of the number of bits needed to represent the distance.
|
||||||
|
// One byte of length is given a score of 256, so 32 (256/8) would
|
||||||
|
// be a reasonable first guess for the value of one bit.
|
||||||
|
// (The default is 0, which bases the comparison solely on length.)
|
||||||
|
DistanceBitCost int
|
||||||
|
|
||||||
tables [][]uint32
|
tables [][]uint32
|
||||||
|
|
||||||
|
@ -44,6 +50,10 @@ func (q *MultiHash) Reset() {
|
||||||
q.history = q.history[:0]
|
q.history = q.history[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *MultiHash) score(m absoluteMatch) int {
|
||||||
|
return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
|
||||||
|
}
|
||||||
|
|
||||||
func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
||||||
if q.MaxDistance == 0 {
|
if q.MaxDistance == 0 {
|
||||||
q.MaxDistance = 65535
|
q.MaxDistance = 65535
|
||||||
|
@ -60,11 +70,6 @@ func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
||||||
q.tables[i] = make([]uint32, 1<<q.TableBits)
|
q.tables[i] = make([]uint32, 1<<q.TableBits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if q.Score == nil {
|
|
||||||
q.Score = func(m AbsoluteMatch) int {
|
|
||||||
return m.End - m.Start
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sort.Ints(q.HashLengths)
|
sort.Ints(q.HashLengths)
|
||||||
maxHashLen := q.HashLengths[len(q.HashLengths)-1]
|
maxHashLen := q.HashLengths[len(q.HashLengths)-1]
|
||||||
|
|
||||||
|
@ -94,19 +99,19 @@ func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
||||||
|
|
||||||
// matches stores the matches that have been found but not emitted,
|
// matches stores the matches that have been found but not emitted,
|
||||||
// in reverse order. (matches[0] is the most recent one.)
|
// in reverse order. (matches[0] is the most recent one.)
|
||||||
var matches [3]AbsoluteMatch
|
var matches [3]absoluteMatch
|
||||||
|
|
||||||
candidates := make([]int, len(q.HashLengths))
|
candidates := make([]int, len(q.HashLengths))
|
||||||
|
|
||||||
for i := e.NextEmit; i < len(src)-maxHashLen; i++ {
|
for i := e.NextEmit; i < len(src)-maxHashLen; i++ {
|
||||||
if matches[0] != (AbsoluteMatch{}) && i >= matches[0].End {
|
if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
|
||||||
// We have found some matches, and we're far enough along that we probably
|
// We have found some matches, and we're far enough along that we probably
|
||||||
// won't find overlapping matches, so we might as well emit them.
|
// won't find overlapping matches, so we might as well emit them.
|
||||||
if matches[1] != (AbsoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
e.trim(matches[1], matches[0].Start, q.MinLength)
|
e.trim(matches[1], matches[0].Start, q.MinLength)
|
||||||
}
|
}
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
matches = [3]AbsoluteMatch{}
|
matches = [3]absoluteMatch{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate and store the hashes.
|
// Calculate and store the hashes.
|
||||||
|
@ -124,7 +129,7 @@ func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look for a match.
|
// Look for a match.
|
||||||
var currentMatch AbsoluteMatch
|
var currentMatch absoluteMatch
|
||||||
|
|
||||||
if i < matches[0].End {
|
if i < matches[0].End {
|
||||||
// If we're looking for an overlapping match, we only need to check the
|
// If we're looking for an overlapping match, we only need to check the
|
||||||
|
@ -161,23 +166,23 @@ func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
m := extendMatch2(src, i, candidate, e.NextEmit)
|
m := extendMatch2(src, i, candidate, e.NextEmit)
|
||||||
if m.End-m.Start > q.MinLength && q.Score(m) > q.Score(currentMatch) {
|
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
|
||||||
currentMatch = m
|
currentMatch = m
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if currentMatch == (AbsoluteMatch{}) || q.Score(currentMatch) <= q.Score(matches[0]) {
|
if currentMatch == (absoluteMatch{}) || q.score(currentMatch) <= q.score(matches[0]) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
matches = [3]AbsoluteMatch{
|
matches = [3]absoluteMatch{
|
||||||
currentMatch,
|
currentMatch,
|
||||||
matches[0],
|
matches[0],
|
||||||
matches[1],
|
matches[1],
|
||||||
}
|
}
|
||||||
|
|
||||||
if matches[2] == (AbsoluteMatch{}) {
|
if matches[2] == (absoluteMatch{}) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,34 +190,34 @@ func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match {
|
||||||
switch {
|
switch {
|
||||||
case matches[0].Start < matches[2].End:
|
case matches[0].Start < matches[2].End:
|
||||||
// The first and third matches overlap; discard the one in between.
|
// The first and third matches overlap; discard the one in between.
|
||||||
matches = [3]AbsoluteMatch{
|
matches = [3]absoluteMatch{
|
||||||
matches[0],
|
matches[0],
|
||||||
matches[2],
|
matches[2],
|
||||||
AbsoluteMatch{},
|
absoluteMatch{},
|
||||||
}
|
}
|
||||||
|
|
||||||
case matches[0].Start < matches[2].End+q.MinLength:
|
case matches[0].Start < matches[2].End+q.MinLength:
|
||||||
// The first and third matches don't overlap, but there's no room for
|
// The first and third matches don't overlap, but there's no room for
|
||||||
// another match between them. Emit the first match and discard the second.
|
// another match between them. Emit the first match and discard the second.
|
||||||
e.emit(matches[2])
|
e.emit(matches[2])
|
||||||
matches = [3]AbsoluteMatch{
|
matches = [3]absoluteMatch{
|
||||||
matches[0],
|
matches[0],
|
||||||
AbsoluteMatch{},
|
absoluteMatch{},
|
||||||
AbsoluteMatch{},
|
absoluteMatch{},
|
||||||
}
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
||||||
e.trim(matches[2], matches[1].Start, q.MinLength)
|
e.trim(matches[2], matches[1].Start, q.MinLength)
|
||||||
matches[2] = AbsoluteMatch{}
|
matches[2] = absoluteMatch{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've found all the matches now; emit the remaining ones.
|
// We've found all the matches now; emit the remaining ones.
|
||||||
if matches[1] != (AbsoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
e.trim(matches[1], matches[0].Start, q.MinLength)
|
e.trim(matches[1], matches[0].Start, q.MinLength)
|
||||||
}
|
}
|
||||||
if matches[0] != (AbsoluteMatch{}) {
|
if matches[0] != (absoluteMatch{}) {
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue