Perf optimizations

This commit is contained in:
s.kamardin 2015-12-01 17:22:17 +03:00
parent c664939ee7
commit 21665ef529
3 changed files with 266 additions and 30 deletions

211
glob.go
View File

@ -14,9 +14,23 @@ const (
var chars = []string{any, superAny, singleAny, escape} var chars = []string{any, superAny, singleAny, escape}
type globKind int
const(
glob_raw globKind = iota
glob_multiple_separated
glob_multiple_super
glob_single
glob_composite
glob_prefix
glob_suffix
glob_prefix_suffix
)
// Glob represents compiled glob pattern. // Glob represents compiled glob pattern.
type Glob interface { type Glob interface {
Match(string) bool Match(string) bool
search(string) (int, int, bool)
kind() globKind
} }
// New creates Glob for given pattern and uses other given (if any) strings as separators. // New creates Glob for given pattern and uses other given (if any) strings as separators.
@ -33,14 +47,36 @@ type Glob interface {
func New(pattern string, d ...string) Glob { func New(pattern string, d ...string) Glob {
chunks := parse(pattern, nil, strings.Join(d, ""), false) chunks := parse(pattern, nil, strings.Join(d, ""), false)
if len(chunks) == 1 { switch len(chunks) {
return chunks[0] case 1:
return chunks[0].glob
case 2:
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super {
return &prefix{chunks[0].str}
}
if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super {
return &suffix{chunks[1].str}
}
case 3:
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw {
return &prefix_suffix{chunks[0].str, chunks[2].str}
}
} }
return &composite{chunks} var c []Glob
for _, chunk := range chunks {
c = append(c, chunk.glob)
}
return &composite{c}
} }
func parse(p string, m []Glob, d string, esc bool) []Glob { type token struct {
glob Glob
str string
}
func parse(p string, m []token, d string, esc bool) []token {
var e bool var e bool
if len(p) == 0 { if len(p) == 0 {
@ -49,41 +85,62 @@ func parse(p string, m []Glob, d string, esc bool) []Glob {
i, c := firstIndexOfChars(p, chars) i, c := firstIndexOfChars(p, chars)
if i == -1 { if i == -1 {
return append(m, raw{p}) return append(m, token{raw{p}, p})
} }
if i > 0 { if i > 0 {
m = append(m, raw{p[0:i]}) m = append(m, token{raw{p[0:i]}, p[0:i]})
} }
if esc { if esc {
m = append(m, raw{c}) m = append(m, token{raw{c}, c})
} else { } else {
switch c { switch c {
case escape: case escape:
e = true e = true
case superAny: case superAny:
m = append(m, multiple{}) m = append(m, token{multiple{}, c})
case any: case any:
m = append(m, multiple{d}) m = append(m, token{multiple{d}, c})
case singleAny: case singleAny:
m = append(m, single{d}) m = append(m, token{single{d}, c})
} }
} }
return parse(p[i+len(c):], m, d, e) return parse(p[i+len(c):], m, d, e)
} }
// raw represents raw string to match
type raw struct { type raw struct {
s string s string
} }
func (self raw) Match(s string) bool { func (self raw) Match(s string) bool {
return self.s == s return self.s == s
} }
func (self raw) kind() globKind {
return glob_raw
}
func (self raw) search(s string) (i int, l int, ok bool) {
index := strings.Index(s, self.s)
if index == -1 {
return
}
i = index
l = len(self.s)
ok = true
return
}
func (self raw) String() string { func (self raw) String() string {
return fmt.Sprintf("[raw:%s]", self.s) return fmt.Sprintf("[raw:%s]", self.s)
} }
// multiple represents *
type multiple struct { type multiple struct {
separators string separators string
} }
@ -92,10 +149,27 @@ func (self multiple) Match(s string) bool {
return strings.IndexAny(s, self.separators) == -1 return strings.IndexAny(s, self.separators) == -1
} }
func (self multiple) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self multiple) kind() globKind {
if self.separators == "" {
return glob_multiple_super
} else {
return glob_multiple_separated
}
}
func (self multiple) String() string { func (self multiple) String() string {
return fmt.Sprintf("[multiple:%s]", self.separators) return fmt.Sprintf("[multiple:%s]", self.separators)
} }
// single represents ?
type single struct { type single struct {
separators string separators string
} }
@ -104,36 +178,60 @@ func (self single) Match(s string) bool {
return len(s) == 1 && strings.IndexAny(s, self.separators) == -1 return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
} }
func (self single) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, 1, true
}
return
}
func (self single) kind() globKind {
return glob_single
}
func (self single) String() string { func (self single) String() string {
return fmt.Sprintf("[single:%s]", self.separators) return fmt.Sprintf("[single:%s]", self.separators)
} }
// composite
type composite struct { type composite struct {
chunks []Glob chunks []Glob
} }
func (self composite) Match(m string) bool { func (self composite) kind() globKind {
var prev Glob return glob_composite
}
for _, c := range self.chunks { func (self composite) search(s string) (i int, l int, ok bool) {
if str, ok := c.(raw); ok { if self.Match(s) {
i := strings.Index(m, str.s) return 0, len(s), true
if i == -1 { }
return
}
func m(chunks []Glob, s string) bool {
var prev Glob
for _, c := range chunks {
if c.kind() == glob_raw {
i, l, ok := c.search(s)
if !ok {
return false return false
} }
l := len(str.s)
if prev != nil { if prev != nil {
if !prev.Match(m[:i]) { if !prev.Match(s[:i]) {
return false return false
} }
prev = nil prev = nil
} }
m = m[i+l:] s = s[i+l:]
continue continue
} }
@ -141,10 +239,14 @@ func (self composite) Match(m string) bool {
} }
if prev != nil { if prev != nil {
return prev.Match(m) return prev.Match(s)
} }
return len(m) == 0 return len(s) == 0
}
func (self composite) Match(s string) bool {
return m(self.chunks, s)
} }
func firstIndexOfChars(p string, any []string) (min int, c string) { func firstIndexOfChars(p string, any []string) (min int, c string) {
@ -167,4 +269,67 @@ func firstIndexOfChars(p string, any []string) (min int, c string) {
} }
return return
} }
type prefix struct {
s string
}
func (self prefix) kind() globKind {
return glob_prefix
}
func (self prefix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self prefix) Match(s string) bool {
return strings.HasPrefix(s, self.s)
}
type suffix struct {
s string
}
func (self suffix) kind() globKind {
return glob_suffix
}
func (self suffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self suffix) Match(s string) bool {
return strings.HasSuffix(s, self.s)
}
type prefix_suffix struct {
p, s string
}
func (self prefix_suffix) kind() globKind {
return glob_prefix_suffix
}
func (self prefix_suffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self prefix_suffix) Match(s string) bool {
return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s)
}

View File

@ -61,6 +61,8 @@ func TestGlob(t *testing.T) {
glob(true, `\*`, "*"), glob(true, `\*`, "*"),
glob(true, "**", "a.b.c", "."), glob(true, "**", "a.b.c", "."),
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
glob(false, "?at", "at"), glob(false, "?at", "at"),
glob(false, "?at", "fat", "f"), glob(false, "?at", "fat", "f"),
glob(false, "a.*", "a.b.c", "."), glob(false, "a.*", "a.b.c", "."),
@ -90,12 +92,27 @@ const Pattern = "*cat*eyes*"
const ExpPattern = ".*cat.*eyes.*" const ExpPattern = ".*cat.*eyes.*"
const String = "my cat has very bright eyes" const String = "my cat has very bright eyes"
const ProfPattern = "* ?at * eyes"
const ProfString = "my cat has very bright eyes"
//const Pattern = "*.google.com" //const Pattern = "*.google.com"
//const ExpPattern = ".*google\\.com" //const ExpPattern = ".*google\\.com"
//const String = "mail.google.com" //const String = "mail.google.com"
// const Pattern = "google.com" const PlainPattern = "google.com"
// const ExpPattern = "google\\.com" const PlainExpPattern = "google\\.com"
// const String = "google.com" const PlainString = "google.com"
const PSPattern = "https://*.google.com"
const PSExpPattern = `https:\/\/[a-z]+\.google\\.com`
const PSString = "https://account.google.com"
func BenchmarkProf(b *testing.B) {
m := New(Pattern)
for i := 0; i < b.N; i++ {
_ = m.Match(String)
}
}
func BenchmarkGobwas(b *testing.B) { func BenchmarkGobwas(b *testing.B) {
m := New(Pattern) m := New(Pattern)
@ -104,22 +121,69 @@ func BenchmarkGobwas(b *testing.B) {
_ = m.Match(String) _ = m.Match(String)
} }
} }
func BenchmarkGobwasPlain(b *testing.B) {
m := New(PlainPattern)
for i := 0; i < b.N; i++ {
_ = m.Match(PlainString)
}
}
func BenchmarkGobwasPrefix(b *testing.B) {
m := New("abc*")
for i := 0; i < b.N; i++ {
_ = m.Match("abcdef")
}
}
func BenchmarkGobwasSuffix(b *testing.B) {
m := New("*def")
for i := 0; i < b.N; i++ {
_ = m.Match("abcdef")
}
}
func BenchmarkGobwasPrefixSuffix(b *testing.B) {
m := New("ab*ef")
for i := 0; i < b.N; i++ {
_ = m.Match("abcdef")
}
}
func BenchmarkRyanuber(b *testing.B) { func BenchmarkRyanuber(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = rGlob.Glob(Pattern, String) _ = rGlob.Glob(Pattern, String)
} }
} }
func BenchmarkRyanuberPlain(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = rGlob.Glob(PlainPattern, PlainString)
}
}
func BenchmarkRyanuberPrefixSuffix(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = rGlob.Glob(PSPattern, PSString)
}
}
func BenchmarkRegExp(b *testing.B) { func BenchmarkRegExp(b *testing.B) {
r := regexp.MustCompile(ExpPattern) r := regexp.MustCompile(ExpPattern)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = r.Match([]byte(String)) _ = r.Match([]byte(String))
} }
} }
func BenchmarkRegExpPrefixSuffix(b *testing.B) {
r := regexp.MustCompile(PSExpPattern)
for i := 0; i < b.N; i++ {
_ = r.Match([]byte(PSString))
}
}
var ALPHABET_S = []string{"a", "b", "c"} var ALPHABET_S = []string{"a", "b", "c"}
const ALPHABET = "abc" const ALPHABET = "abc"
const PREFIX = "faa"
const STR = "faafsdfcsdffc" const STR = "faafsdfcsdffc"
func BenchmarkIndexOfAny(b *testing.B) { func BenchmarkIndexOfAny(b *testing.B) {
@ -131,4 +195,4 @@ func BenchmarkFirstIndexOfChars(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
firstIndexOfChars(STR, ALPHABET_S) firstIndexOfChars(STR, ALPHABET_S)
} }
} }

View File

@ -53,7 +53,7 @@ func main() {
## Performance ## Performance
In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.7x faster (on my personal Mac), In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.5x faster (on my personal Mac),
because my impl compiles patterns for future usage. If you will not use compiled `glob.Glob` object, because my impl compiles patterns for future usage. If you will not use compiled `glob.Glob` object,
and do `g := glob.New(pattern); g.Match(...)` every time, then your code will be about ~3x slower. and do `g := glob.New(pattern); g.Match(...)` every time, then your code will be about ~3x slower.
@ -61,9 +61,16 @@ Run `go test bench=.` from source root to see the benchmarks:
Test | Operations | Speed Test | Operations | Speed
-----|------------|------ -----|------------|------
github.com/gobwas/glob | 20000000 | 165 ns/op github.com/gobwas/glob | 20000000 | 150 ns/op
github.com/ryanuber/go-glob | 10000000 | 452 ns/op github.com/ryanuber/go-glob | 10000000 | 375 ns/op
Also, there are few simple optimizations, that help to test much faster patterns like `*abc`, `abc*` or `a*c`:
Test | Operations | Speed
-----|------------|------
prefix | 200000000 | 8.78 ns/op
suffix | 200000000 | 9.46 ns/op
prefix-suffix | 100000000 | 16.3 ns/op
[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg [godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg
[godoc-url]: https://godoc.org/github.com/gobwas/glob [godoc-url]: https://godoc.org/github.com/gobwas/glob