From 265f3afc2a20805dd16a7adee7eed6fc30a2b548 Mon Sep 17 00:00:00 2001 From: Andy Balholm Date: Tue, 9 Jan 2024 06:03:56 -0800 Subject: [PATCH] matchfinder: penalize score for overlapping matches --- matchfinder/m4.go | 16 +++++++++++++++- matchfinder/multihash.go | 16 +++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/matchfinder/m4.go b/matchfinder/m4.go index a1fc824..5b2acba 100644 --- a/matchfinder/m4.go +++ b/matchfinder/m4.go @@ -167,7 +167,21 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match { } } - if q.score(currentMatch) <= q.score(matches[0]) { + if currentMatch.End-currentMatch.Start < q.MinLength { + continue + } + + overlapPenalty := 0 + if matches[0] != (absoluteMatch{}) { + overlapPenalty = 275 + if currentMatch.Start <= matches[1].End { + // This match would completely replace the previous match, + // so there is no penalty for overlap. + overlapPenalty = 0 + } + } + + if q.score(currentMatch) <= q.score(matches[0])+overlapPenalty { continue } diff --git a/matchfinder/multihash.go b/matchfinder/multihash.go index adc0b40..369b99d 100644 --- a/matchfinder/multihash.go +++ b/matchfinder/multihash.go @@ -172,7 +172,21 @@ func (q *MultiHash) FindMatches(dst []Match, src []byte) []Match { } } - if currentMatch == (absoluteMatch{}) || q.score(currentMatch) <= q.score(matches[0]) { + if currentMatch.End-currentMatch.Start < q.MinLength { + continue + } + + overlapPenalty := 0 + if matches[0] != (absoluteMatch{}) { + overlapPenalty = 275 + if currentMatch.Start <= matches[1].End { + // This match would completely replace the previous match, + // so there is no penalty for overlap. + overlapPenalty = 0 + } + } + + if q.score(currentMatch) <= q.score(matches[0])+overlapPenalty { continue }