mirror of https://github.com/tidwall/match.git
238 lines
5.4 KiB
Go
238 lines
5.4 KiB
Go
// Package match provides a simple pattern matcher with unicode support.
|
|
package match
|
|
|
|
import (
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// Match returns true if str matches pattern. This is a very
|
|
// simple wildcard match where '*' matches on any number characters
|
|
// and '?' matches on any one character.
|
|
//
|
|
// pattern:
|
|
// { term }
|
|
// term:
|
|
// '*' matches any sequence of non-Separator characters
|
|
// '?' matches any single non-Separator character
|
|
// c matches character c (c != '*', '?', '\\')
|
|
// '\\' c matches character c
|
|
//
|
|
func Match(str, pattern string) bool {
|
|
if pattern == "*" {
|
|
return true
|
|
}
|
|
return match(str, pattern, 0, nil, -1) == rMatch
|
|
}
|
|
|
|
// MatchLimit is the same as Match but will limit the complexity of the match
|
|
// operation. This is to avoid long running matches, specifically to avoid ReDos
|
|
// attacks from arbritary inputs.
|
|
//
|
|
// How it works:
|
|
// The underlying match routine is recursive and may call itself when it
|
|
// encounters a sandwiched wildcard pattern, such as: `user:*:name`.
|
|
// Everytime it calls itself a counter is incremented.
|
|
// The operation is stopped when counter > maxcomp*len(str).
|
|
func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
|
|
if pattern == "*" {
|
|
return true, false
|
|
}
|
|
counter := 0
|
|
r := match(str, pattern, len(str), &counter, maxcomp)
|
|
if r == rStop {
|
|
return false, true
|
|
}
|
|
return r == rMatch, false
|
|
}
|
|
|
|
type result int
|
|
|
|
const (
|
|
rNoMatch result = iota
|
|
rMatch
|
|
rStop
|
|
)
|
|
|
|
func match(str, pat string, slen int, counter *int, maxcomp int) result {
|
|
// check complexity limit
|
|
if maxcomp > -1 {
|
|
if *counter > slen*maxcomp {
|
|
return rStop
|
|
}
|
|
*counter++
|
|
}
|
|
|
|
for len(pat) > 0 {
|
|
var wild bool
|
|
pc, ps := rune(pat[0]), 1
|
|
if pc > 0x7f {
|
|
pc, ps = utf8.DecodeRuneInString(pat)
|
|
}
|
|
var sc rune
|
|
var ss int
|
|
if len(str) > 0 {
|
|
sc, ss = rune(str[0]), 1
|
|
if sc > 0x7f {
|
|
sc, ss = utf8.DecodeRuneInString(str)
|
|
}
|
|
}
|
|
switch pc {
|
|
case '?':
|
|
if ss == 0 {
|
|
return rNoMatch
|
|
}
|
|
case '*':
|
|
// Ignore repeating stars.
|
|
for len(pat) > 1 && pat[1] == '*' {
|
|
pat = pat[1:]
|
|
}
|
|
|
|
// If this star is the last character then it must be a match.
|
|
if len(pat) == 1 {
|
|
return rMatch
|
|
}
|
|
|
|
// Match and trim any non-wildcard suffix characters.
|
|
var ok bool
|
|
str, pat, ok = matchTrimSuffix(str, pat)
|
|
if !ok {
|
|
return rNoMatch
|
|
}
|
|
|
|
// Check for single star again.
|
|
if len(pat) == 1 {
|
|
return rMatch
|
|
}
|
|
|
|
// Perform recursive wildcard search.
|
|
r := match(str, pat[1:], slen, counter, maxcomp)
|
|
if r != rNoMatch {
|
|
return r
|
|
}
|
|
if len(str) == 0 {
|
|
return rNoMatch
|
|
}
|
|
wild = true
|
|
default:
|
|
if ss == 0 {
|
|
return rNoMatch
|
|
}
|
|
if pc == '\\' {
|
|
pat = pat[ps:]
|
|
pc, ps = utf8.DecodeRuneInString(pat)
|
|
if ps == 0 {
|
|
return rNoMatch
|
|
}
|
|
}
|
|
if sc != pc {
|
|
return rNoMatch
|
|
}
|
|
}
|
|
str = str[ss:]
|
|
if !wild {
|
|
pat = pat[ps:]
|
|
}
|
|
}
|
|
if len(str) == 0 {
|
|
return rMatch
|
|
}
|
|
return rNoMatch
|
|
}
|
|
|
|
// matchTrimSuffix matches and trims any non-wildcard suffix characters.
|
|
// Returns the trimed string and pattern.
|
|
//
|
|
// This is called because the pattern contains extra data after the wildcard
|
|
// star. Here we compare any suffix characters in the pattern to the suffix of
|
|
// the target string. Basically a reverse match that stops when a wildcard
|
|
// character is reached. This is a little trickier than a forward match because
|
|
// we need to evaluate an escaped character in reverse.
|
|
//
|
|
// Any matched characters will be trimmed from both the target
|
|
// string and the pattern.
|
|
func matchTrimSuffix(str, pat string) (string, string, bool) {
|
|
// It's expected that the pattern has at least two bytes and the first byte
|
|
// is a wildcard star '*'
|
|
match := true
|
|
for len(str) > 0 && len(pat) > 1 {
|
|
pc, ps := utf8.DecodeLastRuneInString(pat)
|
|
var esc bool
|
|
for i := 0; ; i++ {
|
|
if pat[len(pat)-ps-i-1] != '\\' {
|
|
if i&1 == 1 {
|
|
esc = true
|
|
ps++
|
|
}
|
|
break
|
|
}
|
|
}
|
|
if pc == '*' && !esc {
|
|
match = true
|
|
break
|
|
}
|
|
sc, ss := utf8.DecodeLastRuneInString(str)
|
|
if !((pc == '?' && !esc) || pc == sc) {
|
|
match = false
|
|
break
|
|
}
|
|
str = str[:len(str)-ss]
|
|
pat = pat[:len(pat)-ps]
|
|
}
|
|
return str, pat, match
|
|
}
|
|
|
|
var maxRuneBytes = [...]byte{244, 143, 191, 191}
|
|
|
|
// Allowable parses the pattern and determines the minimum and maximum allowable
|
|
// values that the pattern can represent.
|
|
// When the max cannot be determined, 'true' will be returned
|
|
// for infinite.
|
|
func Allowable(pattern string) (min, max string) {
|
|
if pattern == "" || pattern[0] == '*' {
|
|
return "", ""
|
|
}
|
|
|
|
minb := make([]byte, 0, len(pattern))
|
|
maxb := make([]byte, 0, len(pattern))
|
|
var wild bool
|
|
for i := 0; i < len(pattern); i++ {
|
|
if pattern[i] == '*' {
|
|
wild = true
|
|
break
|
|
}
|
|
if pattern[i] == '?' {
|
|
minb = append(minb, 0)
|
|
maxb = append(maxb, maxRuneBytes[:]...)
|
|
} else {
|
|
minb = append(minb, pattern[i])
|
|
maxb = append(maxb, pattern[i])
|
|
}
|
|
}
|
|
if wild {
|
|
r, n := utf8.DecodeLastRune(maxb)
|
|
if r != utf8.RuneError {
|
|
if r < utf8.MaxRune {
|
|
r++
|
|
if r > 0x7f {
|
|
b := make([]byte, 4)
|
|
nn := utf8.EncodeRune(b, r)
|
|
maxb = append(maxb[:len(maxb)-n], b[:nn]...)
|
|
} else {
|
|
maxb = append(maxb[:len(maxb)-n], byte(r))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return string(minb), string(maxb)
|
|
}
|
|
|
|
// IsPattern returns true if the string is a pattern.
|
|
func IsPattern(str string) bool {
|
|
for i := 0; i < len(str); i++ {
|
|
if str[i] == '*' || str[i] == '?' {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|