matchfinder.M4: some refinements to scoring
This commit is contained in:
parent
17e5901d05
commit
97e8583d85
|
@ -674,51 +674,55 @@ func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSi
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeM4(t *testing.T) {
|
func TestEncodeM4(t *testing.T) {
|
||||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 57}, 1<<16)
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 66}, 1<<16)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEncodeM4Chain256(t *testing.T) {
|
||||||
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, DistanceBitCost: 66, ChainLength: 256}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4(b *testing.B) {
|
func BenchmarkEncodeM4(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeM4Chain1(t *testing.T) {
|
func TestEncodeM4Chain1(t *testing.T) {
|
||||||
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, DistanceBitCost: 57}, 1<<16)
|
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain1(b *testing.B) {
|
func BenchmarkEncodeM4Chain1(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain2(b *testing.B) {
|
func BenchmarkEncodeM4Chain2(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain4(b *testing.B) {
|
func BenchmarkEncodeM4Chain4(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain8(b *testing.B) {
|
func BenchmarkEncodeM4Chain8(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain16(b *testing.B) {
|
func BenchmarkEncodeM4Chain16(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain32(b *testing.B) {
|
func BenchmarkEncodeM4Chain32(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain64(b *testing.B) {
|
func BenchmarkEncodeM4Chain64(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain128(b *testing.B) {
|
func BenchmarkEncodeM4Chain128(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkEncodeM4Chain256(b *testing.B) {
|
func BenchmarkEncodeM4Chain256(b *testing.B) {
|
||||||
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 57}, 1<<16)
|
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 66}, 1<<16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEncodeM0(t *testing.T) {
|
func TestEncodeM0(t *testing.T) {
|
||||||
|
|
|
@ -32,14 +32,3 @@ func (e *matchEmitter) emit(m absoluteMatch) {
|
||||||
})
|
})
|
||||||
e.NextEmit = m.End
|
e.NextEmit = m.End
|
||||||
}
|
}
|
||||||
|
|
||||||
// trim shortens m if it extends past maxEnd. Then if the length is at least
|
|
||||||
// minLength, the match is emitted.
|
|
||||||
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
|
|
||||||
if m.End > maxEnd {
|
|
||||||
m.End = maxEnd
|
|
||||||
}
|
|
||||||
if m.End-m.Start >= minLength {
|
|
||||||
e.emit(m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ func (q *M4) Reset() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *M4) score(m absoluteMatch) int {
|
func (q *M4) score(m absoluteMatch) int {
|
||||||
return (m.End-m.Start)*256 + bits.LeadingZeros32(uint32(m.Start-m.Match))*q.DistanceBitCost
|
return (m.End-m.Start)*256 + (bits.LeadingZeros32(uint32(m.Start-m.Match))-32)*q.DistanceBitCost
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
|
@ -112,7 +112,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
// We have found some matches, and we're far enough along that we probably
|
// We have found some matches, and we're far enough along that we probably
|
||||||
// won't find overlapping matches, so we might as well emit them.
|
// won't find overlapping matches, so we might as well emit them.
|
||||||
if matches[1] != (absoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
e.trim(matches[1], matches[0].Start, q.MinLength)
|
if matches[1].End > matches[0].Start {
|
||||||
|
matches[1].End = matches[0].Start
|
||||||
|
}
|
||||||
|
if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 {
|
||||||
|
e.emit(matches[1])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
matches = [3]absoluteMatch{}
|
matches = [3]absoluteMatch{}
|
||||||
|
@ -139,12 +144,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
// Look for a match.
|
// Look for a match.
|
||||||
var currentMatch absoluteMatch
|
var currentMatch absoluteMatch
|
||||||
|
|
||||||
if i-candidate != matches[0].Start-matches[0].Match {
|
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
||||||
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
m := extendMatch2(src, i, candidate, e.NextEmit)
|
||||||
m := extendMatch2(src, i, candidate, e.NextEmit)
|
if m.End-m.Start > q.MinLength && q.score(m) > 0 {
|
||||||
if m.End-m.Start > q.MinLength {
|
currentMatch = m
|
||||||
currentMatch = m
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,12 +160,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
if candidate <= 0 || i-candidate > q.MaxDistance {
|
if candidate <= 0 || i-candidate > q.MaxDistance {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if i-candidate != matches[0].Start-matches[0].Match {
|
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
||||||
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
|
m := extendMatch2(src, i, candidate, e.NextEmit)
|
||||||
m := extendMatch2(src, i, candidate, e.NextEmit)
|
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
|
||||||
if m.End-m.Start > q.MinLength && q.score(m) > q.score(currentMatch) {
|
currentMatch = m
|
||||||
currentMatch = m
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -217,14 +218,24 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
||||||
e.trim(matches[2], matches[1].Start, q.MinLength)
|
if matches[2].End > matches[1].Start {
|
||||||
|
matches[2].End = matches[1].Start
|
||||||
|
}
|
||||||
|
if matches[2].End-matches[2].Start >= q.MinLength && q.score(matches[2]) > 0 {
|
||||||
|
e.emit(matches[2])
|
||||||
|
}
|
||||||
matches[2] = absoluteMatch{}
|
matches[2] = absoluteMatch{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've found all the matches now; emit the remaining ones.
|
// We've found all the matches now; emit the remaining ones.
|
||||||
if matches[1] != (absoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
e.trim(matches[1], matches[0].Start, q.MinLength)
|
if matches[1].End > matches[0].Start {
|
||||||
|
matches[1].End = matches[0].Start
|
||||||
|
}
|
||||||
|
if matches[1].End-matches[1].Start >= q.MinLength && q.score(matches[1]) > 0 {
|
||||||
|
e.emit(matches[1])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if matches[0] != (absoluteMatch{}) {
|
if matches[0] != (absoluteMatch{}) {
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
|
|
Loading…
Reference in New Issue