matchfinder.M4: more refactoring

Factor out matchEmitter.trim, and make TableBits configurable.
This commit is contained in:
Andy Balholm 2023-12-28 17:21:34 -08:00
parent c506503c67
commit 924a0eb0c6
2 changed files with 31 additions and 28 deletions

View File

@ -32,3 +32,14 @@ func (e *matchEmitter) emit(m absoluteMatch) {
})
e.NextEmit = m.End
}
// trim shortens m if it extends past maxEnd. Then if the length is at least
// minLength, the match is emitted.
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
if m.End > maxEnd {
m.End = maxEnd
}
if m.End-m.Start >= minLength {
e.emit(m)
}
}

View File

@ -6,11 +6,6 @@ import (
"runtime"
)
const (
ssapBits = 17
ssapMask = (1 << ssapBits) - 1
)
// M4 is an implementation of the MatchFinder
// interface that uses a simple hash table to find matches,
// but the advanced parsing technique from
@ -29,13 +24,19 @@ type M4 struct {
// The maximum is 8 and the default is 6.
HashLen int
table [1 << ssapBits]uint32
// TableBits is the number of bits in the hash table indexes.
// The default is 17 (128K entries).
TableBits int
table []uint32
history []byte
}
func (q *M4) Reset() {
q.table = [1 << ssapBits]uint32{}
for i := range q.table {
q.table[i] = 0
}
q.history = q.history[:0]
}
@ -49,6 +50,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
if q.HashLen == 0 {
q.HashLen = 6
}
if q.TableBits == 0 {
q.TableBits = 17
}
if len(q.table) < 1<<q.TableBits {
q.table = make([]uint32, 1<<q.TableBits)
}
e := matchEmitter{Dst: dst}
if len(q.history) > q.MaxDistance*2 {
@ -79,21 +86,16 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
// We have found some matches, and we're far enough along that we probably
// won't find overlapping matches, so we might as well emit them.
if matches[1] != (absoluteMatch{}) {
if matches[1].End > matches[0].Start {
matches[1].End = matches[0].Start
}
if matches[1].End-matches[1].Start >= q.MinLength {
e.emit(matches[1])
}
e.trim(matches[1], matches[0].Start, q.MinLength)
}
e.emit(matches[0])
matches = [3]absoluteMatch{}
}
// Now look for a match.
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - ssapBits)
candidate := int(q.table[h&ssapMask])
q.table[h&ssapMask] = uint32(i)
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - q.TableBits)
candidate := int(q.table[h])
q.table[h] = uint32(i)
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
continue
@ -151,24 +153,14 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
default:
// Emit the first match, shortening it if necessary to avoid overlap with the second.
if matches[2].End > matches[1].Start {
matches[2].End = matches[1].Start
}
if matches[2].End-matches[2].Start >= q.MinLength {
e.emit(matches[2])
}
e.trim(matches[2], matches[1].Start, q.MinLength)
matches[2] = absoluteMatch{}
}
}
// We've found all the matches now; emit the remaining ones.
if matches[1] != (absoluteMatch{}) {
if matches[1].End > matches[0].Start {
matches[1].End = matches[0].Start
}
if matches[1].End-matches[1].Start >= q.MinLength {
e.emit(matches[1])
}
e.trim(matches[1], matches[0].Start, q.MinLength)
}
if matches[0] != (absoluteMatch{}) {
e.emit(matches[0])