matchfinder.M4: more refactoring
Factor out matchEmitter.trim, and make TableBits configurable.
This commit is contained in:
parent
c506503c67
commit
924a0eb0c6
|
@ -32,3 +32,14 @@ func (e *matchEmitter) emit(m absoluteMatch) {
|
||||||
})
|
})
|
||||||
e.NextEmit = m.End
|
e.NextEmit = m.End
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trim shortens m if it extends past maxEnd. Then if the length is at least
|
||||||
|
// minLength, the match is emitted.
|
||||||
|
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
|
||||||
|
if m.End > maxEnd {
|
||||||
|
m.End = maxEnd
|
||||||
|
}
|
||||||
|
if m.End-m.Start >= minLength {
|
||||||
|
e.emit(m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -6,11 +6,6 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
ssapBits = 17
|
|
||||||
ssapMask = (1 << ssapBits) - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
// M4 is an implementation of the MatchFinder
|
// M4 is an implementation of the MatchFinder
|
||||||
// interface that uses a simple hash table to find matches,
|
// interface that uses a simple hash table to find matches,
|
||||||
// but the advanced parsing technique from
|
// but the advanced parsing technique from
|
||||||
|
@ -29,13 +24,19 @@ type M4 struct {
|
||||||
// The maximum is 8 and the default is 6.
|
// The maximum is 8 and the default is 6.
|
||||||
HashLen int
|
HashLen int
|
||||||
|
|
||||||
table [1 << ssapBits]uint32
|
// TableBits is the number of bits in the hash table indexes.
|
||||||
|
// The default is 17 (128K entries).
|
||||||
|
TableBits int
|
||||||
|
|
||||||
|
table []uint32
|
||||||
|
|
||||||
history []byte
|
history []byte
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *M4) Reset() {
|
func (q *M4) Reset() {
|
||||||
q.table = [1 << ssapBits]uint32{}
|
for i := range q.table {
|
||||||
|
q.table[i] = 0
|
||||||
|
}
|
||||||
q.history = q.history[:0]
|
q.history = q.history[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,6 +50,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
if q.HashLen == 0 {
|
if q.HashLen == 0 {
|
||||||
q.HashLen = 6
|
q.HashLen = 6
|
||||||
}
|
}
|
||||||
|
if q.TableBits == 0 {
|
||||||
|
q.TableBits = 17
|
||||||
|
}
|
||||||
|
if len(q.table) < 1<<q.TableBits {
|
||||||
|
q.table = make([]uint32, 1<<q.TableBits)
|
||||||
|
}
|
||||||
e := matchEmitter{Dst: dst}
|
e := matchEmitter{Dst: dst}
|
||||||
|
|
||||||
if len(q.history) > q.MaxDistance*2 {
|
if len(q.history) > q.MaxDistance*2 {
|
||||||
|
@ -79,21 +86,16 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
// We have found some matches, and we're far enough along that we probably
|
// We have found some matches, and we're far enough along that we probably
|
||||||
// won't find overlapping matches, so we might as well emit them.
|
// won't find overlapping matches, so we might as well emit them.
|
||||||
if matches[1] != (absoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
if matches[1].End > matches[0].Start {
|
e.trim(matches[1], matches[0].Start, q.MinLength)
|
||||||
matches[1].End = matches[0].Start
|
|
||||||
}
|
|
||||||
if matches[1].End-matches[1].Start >= q.MinLength {
|
|
||||||
e.emit(matches[1])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
matches = [3]absoluteMatch{}
|
matches = [3]absoluteMatch{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now look for a match.
|
// Now look for a match.
|
||||||
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - ssapBits)
|
h := ((binary.LittleEndian.Uint64(src[i:]) & (1<<(8*q.HashLen) - 1)) * hashMul64) >> (64 - q.TableBits)
|
||||||
candidate := int(q.table[h&ssapMask])
|
candidate := int(q.table[h])
|
||||||
q.table[h&ssapMask] = uint32(i)
|
q.table[h] = uint32(i)
|
||||||
|
|
||||||
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
|
if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
|
||||||
continue
|
continue
|
||||||
|
@ -151,24 +153,14 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
// Emit the first match, shortening it if necessary to avoid overlap with the second.
|
||||||
if matches[2].End > matches[1].Start {
|
e.trim(matches[2], matches[1].Start, q.MinLength)
|
||||||
matches[2].End = matches[1].Start
|
|
||||||
}
|
|
||||||
if matches[2].End-matches[2].Start >= q.MinLength {
|
|
||||||
e.emit(matches[2])
|
|
||||||
}
|
|
||||||
matches[2] = absoluteMatch{}
|
matches[2] = absoluteMatch{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We've found all the matches now; emit the remaining ones.
|
// We've found all the matches now; emit the remaining ones.
|
||||||
if matches[1] != (absoluteMatch{}) {
|
if matches[1] != (absoluteMatch{}) {
|
||||||
if matches[1].End > matches[0].Start {
|
e.trim(matches[1], matches[0].Start, q.MinLength)
|
||||||
matches[1].End = matches[0].Start
|
|
||||||
}
|
|
||||||
if matches[1].End-matches[1].Start >= q.MinLength {
|
|
||||||
e.emit(matches[1])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if matches[0] != (absoluteMatch{}) {
|
if matches[0] != (absoluteMatch{}) {
|
||||||
e.emit(matches[0])
|
e.emit(matches[0])
|
||||||
|
|
Loading…
Reference in New Issue