diff --git a/glob.go b/glob.go index 45c306c..e4591c0 100644 --- a/glob.go +++ b/glob.go @@ -14,9 +14,23 @@ const ( var chars = []string{any, superAny, singleAny, escape} +type globKind int +const( + glob_raw globKind = iota + glob_multiple_separated + glob_multiple_super + glob_single + glob_composite + glob_prefix + glob_suffix + glob_prefix_suffix +) + // Glob represents compiled glob pattern. type Glob interface { Match(string) bool + search(string) (int, int, bool) + kind() globKind } // New creates Glob for given pattern and uses other given (if any) strings as separators. @@ -33,14 +47,36 @@ type Glob interface { func New(pattern string, d ...string) Glob { chunks := parse(pattern, nil, strings.Join(d, ""), false) - if len(chunks) == 1 { - return chunks[0] + switch len(chunks) { + case 1: + return chunks[0].glob + case 2: + if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super { + return &prefix{chunks[0].str} + } + if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super { + return &suffix{chunks[1].str} + } + case 3: + if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw { + return &prefix_suffix{chunks[0].str, chunks[2].str} + } } - return &composite{chunks} + var c []Glob + for _, chunk := range chunks { + c = append(c, chunk.glob) + } + + return &composite{c} } -func parse(p string, m []Glob, d string, esc bool) []Glob { +type token struct { + glob Glob + str string +} + +func parse(p string, m []token, d string, esc bool) []token { var e bool if len(p) == 0 { @@ -49,41 +85,62 @@ func parse(p string, m []Glob, d string, esc bool) []Glob { i, c := firstIndexOfChars(p, chars) if i == -1 { - return append(m, raw{p}) + return append(m, token{raw{p}, p}) } if i > 0 { - m = append(m, raw{p[0:i]}) + m = append(m, token{raw{p[0:i]}, p[0:i]}) } if esc { - m = append(m, raw{c}) + m = append(m, token{raw{c}, c}) } else { switch c { case escape: e = true case superAny: - m = append(m, multiple{}) + m = append(m, token{multiple{}, c}) case any: - m = append(m, multiple{d}) + m = append(m, token{multiple{d}, c}) case singleAny: - m = append(m, single{d}) + m = append(m, token{single{d}, c}) } } return parse(p[i+len(c):], m, d, e) } +// raw represents raw string to match type raw struct { s string } + func (self raw) Match(s string) bool { return self.s == s } + +func (self raw) kind() globKind { + return glob_raw +} + +func (self raw) search(s string) (i int, l int, ok bool) { + index := strings.Index(s, self.s) + if index == -1 { + return + } + + i = index + l = len(self.s) + ok = true + + return +} + func (self raw) String() string { return fmt.Sprintf("[raw:%s]", self.s) } +// multiple represents * type multiple struct { separators string } @@ -92,10 +149,27 @@ func (self multiple) Match(s string) bool { return strings.IndexAny(s, self.separators) == -1 } +func (self multiple) search(s string) (i int, l int, ok bool) { + if self.Match(s) { + return 0, len(s), true + } + + return +} + +func (self multiple) kind() globKind { + if self.separators == "" { + return glob_multiple_super + } else { + return glob_multiple_separated + } +} + func (self multiple) String() string { return fmt.Sprintf("[multiple:%s]", self.separators) } +// single represents ? type single struct { separators string } @@ -104,36 +178,60 @@ func (self single) Match(s string) bool { return len(s) == 1 && strings.IndexAny(s, self.separators) == -1 } +func (self single) search(s string) (i int, l int, ok bool) { + if self.Match(s) { + return 0, 1, true + } + + return +} + +func (self single) kind() globKind { + return glob_single +} + + func (self single) String() string { return fmt.Sprintf("[single:%s]", self.separators) } + +// composite type composite struct { chunks []Glob } -func (self composite) Match(m string) bool { - var prev Glob +func (self composite) kind() globKind { + return glob_composite +} - for _, c := range self.chunks { - if str, ok := c.(raw); ok { - i := strings.Index(m, str.s) - if i == -1 { +func (self composite) search(s string) (i int, l int, ok bool) { + if self.Match(s) { + return 0, len(s), true + } + + return +} + +func m(chunks []Glob, s string) bool { + var prev Glob + for _, c := range chunks { + if c.kind() == glob_raw { + i, l, ok := c.search(s) + if !ok { return false } - l := len(str.s) - if prev != nil { - if !prev.Match(m[:i]) { + if !prev.Match(s[:i]) { return false } prev = nil } - m = m[i+l:] + s = s[i+l:] continue } @@ -141,10 +239,14 @@ func (self composite) Match(m string) bool { } if prev != nil { - return prev.Match(m) + return prev.Match(s) } - return len(m) == 0 + return len(s) == 0 +} + +func (self composite) Match(s string) bool { + return m(self.chunks, s) } func firstIndexOfChars(p string, any []string) (min int, c string) { @@ -167,4 +269,67 @@ func firstIndexOfChars(p string, any []string) (min int, c string) { } return -} \ No newline at end of file +} + +type prefix struct { + s string +} + +func (self prefix) kind() globKind { + return glob_prefix +} + +func (self prefix) search(s string) (i int, l int, ok bool) { + if self.Match(s) { + return 0, len(s), true + } + + return +} + +func (self prefix) Match(s string) bool { + return strings.HasPrefix(s, self.s) +} + +type suffix struct { + s string +} + +func (self suffix) kind() globKind { + return glob_suffix +} + +func (self suffix) search(s string) (i int, l int, ok bool) { + if self.Match(s) { + return 0, len(s), true + } + + return +} + +func (self suffix) Match(s string) bool { + return strings.HasSuffix(s, self.s) +} + +type prefix_suffix struct { + p, s string +} + +func (self prefix_suffix) kind() globKind { + return glob_prefix_suffix +} + +func (self prefix_suffix) search(s string) (i int, l int, ok bool) { + if self.Match(s) { + return 0, len(s), true + } + + return +} + +func (self prefix_suffix) Match(s string) bool { + return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s) +} + + + diff --git a/glob_test.go b/glob_test.go index 8e07bb8..7c3a2df 100644 --- a/glob_test.go +++ b/glob_test.go @@ -61,6 +61,8 @@ func TestGlob(t *testing.T) { glob(true, `\*`, "*"), glob(true, "**", "a.b.c", "."), + glob(true, "* ?at * eyes", "my cat has very bright eyes"), + glob(false, "?at", "at"), glob(false, "?at", "fat", "f"), glob(false, "a.*", "a.b.c", "."), @@ -90,12 +92,27 @@ const Pattern = "*cat*eyes*" const ExpPattern = ".*cat.*eyes.*" const String = "my cat has very bright eyes" +const ProfPattern = "* ?at * eyes" +const ProfString = "my cat has very bright eyes" + //const Pattern = "*.google.com" //const ExpPattern = ".*google\\.com" //const String = "mail.google.com" -// const Pattern = "google.com" -// const ExpPattern = "google\\.com" -// const String = "google.com" +const PlainPattern = "google.com" +const PlainExpPattern = "google\\.com" +const PlainString = "google.com" + +const PSPattern = "https://*.google.com" +const PSExpPattern = `https:\/\/[a-z]+\.google\\.com` +const PSString = "https://account.google.com" + +func BenchmarkProf(b *testing.B) { + m := New(Pattern) + + for i := 0; i < b.N; i++ { + _ = m.Match(String) + } +} func BenchmarkGobwas(b *testing.B) { m := New(Pattern) @@ -104,22 +121,69 @@ func BenchmarkGobwas(b *testing.B) { _ = m.Match(String) } } +func BenchmarkGobwasPlain(b *testing.B) { + m := New(PlainPattern) + + for i := 0; i < b.N; i++ { + _ = m.Match(PlainString) + } +} +func BenchmarkGobwasPrefix(b *testing.B) { + m := New("abc*") + + for i := 0; i < b.N; i++ { + _ = m.Match("abcdef") + } +} +func BenchmarkGobwasSuffix(b *testing.B) { + m := New("*def") + + for i := 0; i < b.N; i++ { + _ = m.Match("abcdef") + } +} +func BenchmarkGobwasPrefixSuffix(b *testing.B) { + m := New("ab*ef") + + for i := 0; i < b.N; i++ { + _ = m.Match("abcdef") + } +} func BenchmarkRyanuber(b *testing.B) { for i := 0; i < b.N; i++ { _ = rGlob.Glob(Pattern, String) } } +func BenchmarkRyanuberPlain(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = rGlob.Glob(PlainPattern, PlainString) + } +} +func BenchmarkRyanuberPrefixSuffix(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = rGlob.Glob(PSPattern, PSString) + } +} + + func BenchmarkRegExp(b *testing.B) { r := regexp.MustCompile(ExpPattern) for i := 0; i < b.N; i++ { _ = r.Match([]byte(String)) } } +func BenchmarkRegExpPrefixSuffix(b *testing.B) { + r := regexp.MustCompile(PSExpPattern) + for i := 0; i < b.N; i++ { + _ = r.Match([]byte(PSString)) + } +} var ALPHABET_S = []string{"a", "b", "c"} const ALPHABET = "abc" +const PREFIX = "faa" const STR = "faafsdfcsdffc" func BenchmarkIndexOfAny(b *testing.B) { @@ -131,4 +195,4 @@ func BenchmarkFirstIndexOfChars(b *testing.B) { for i := 0; i < b.N; i++ { firstIndexOfChars(STR, ALPHABET_S) } -} +} \ No newline at end of file diff --git a/readme.md b/readme.md index 83fe087..5c0afc9 100644 --- a/readme.md +++ b/readme.md @@ -53,7 +53,7 @@ func main() { ## Performance -In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.7x faster (on my personal Mac), +In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.5x faster (on my personal Mac), because my impl compiles patterns for future usage. If you will not use compiled `glob.Glob` object, and do `g := glob.New(pattern); g.Match(...)` every time, then your code will be about ~3x slower. @@ -61,9 +61,16 @@ Run `go test bench=.` from source root to see the benchmarks: Test | Operations | Speed -----|------------|------ -github.com/gobwas/glob | 20000000 | 165 ns/op -github.com/ryanuber/go-glob | 10000000 | 452 ns/op +github.com/gobwas/glob | 20000000 | 150 ns/op +github.com/ryanuber/go-glob | 10000000 | 375 ns/op +Also, there are few simple optimizations, that help to test much faster patterns like `*abc`, `abc*` or `a*c`: + +Test | Operations | Speed +-----|------------|------ +prefix | 200000000 | 8.78 ns/op +suffix | 200000000 | 9.46 ns/op +prefix-suffix | 100000000 | 16.3 ns/op [godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg [godoc-url]: https://godoc.org/github.com/gobwas/glob