mirror of https://github.com/gobwas/glob.git
Perf optimizations
This commit is contained in:
parent
c664939ee7
commit
21665ef529
211
glob.go
211
glob.go
|
@ -14,9 +14,23 @@ const (
|
||||||
|
|
||||||
var chars = []string{any, superAny, singleAny, escape}
|
var chars = []string{any, superAny, singleAny, escape}
|
||||||
|
|
||||||
|
type globKind int
|
||||||
|
const(
|
||||||
|
glob_raw globKind = iota
|
||||||
|
glob_multiple_separated
|
||||||
|
glob_multiple_super
|
||||||
|
glob_single
|
||||||
|
glob_composite
|
||||||
|
glob_prefix
|
||||||
|
glob_suffix
|
||||||
|
glob_prefix_suffix
|
||||||
|
)
|
||||||
|
|
||||||
// Glob represents compiled glob pattern.
|
// Glob represents compiled glob pattern.
|
||||||
type Glob interface {
|
type Glob interface {
|
||||||
Match(string) bool
|
Match(string) bool
|
||||||
|
search(string) (int, int, bool)
|
||||||
|
kind() globKind
|
||||||
}
|
}
|
||||||
|
|
||||||
// New creates Glob for given pattern and uses other given (if any) strings as separators.
|
// New creates Glob for given pattern and uses other given (if any) strings as separators.
|
||||||
|
@ -33,14 +47,36 @@ type Glob interface {
|
||||||
func New(pattern string, d ...string) Glob {
|
func New(pattern string, d ...string) Glob {
|
||||||
chunks := parse(pattern, nil, strings.Join(d, ""), false)
|
chunks := parse(pattern, nil, strings.Join(d, ""), false)
|
||||||
|
|
||||||
if len(chunks) == 1 {
|
switch len(chunks) {
|
||||||
return chunks[0]
|
case 1:
|
||||||
|
return chunks[0].glob
|
||||||
|
case 2:
|
||||||
|
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super {
|
||||||
|
return &prefix{chunks[0].str}
|
||||||
|
}
|
||||||
|
if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super {
|
||||||
|
return &suffix{chunks[1].str}
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw {
|
||||||
|
return &prefix_suffix{chunks[0].str, chunks[2].str}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &composite{chunks}
|
var c []Glob
|
||||||
|
for _, chunk := range chunks {
|
||||||
|
c = append(c, chunk.glob)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &composite{c}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parse(p string, m []Glob, d string, esc bool) []Glob {
|
type token struct {
|
||||||
|
glob Glob
|
||||||
|
str string
|
||||||
|
}
|
||||||
|
|
||||||
|
func parse(p string, m []token, d string, esc bool) []token {
|
||||||
var e bool
|
var e bool
|
||||||
|
|
||||||
if len(p) == 0 {
|
if len(p) == 0 {
|
||||||
|
@ -49,41 +85,62 @@ func parse(p string, m []Glob, d string, esc bool) []Glob {
|
||||||
|
|
||||||
i, c := firstIndexOfChars(p, chars)
|
i, c := firstIndexOfChars(p, chars)
|
||||||
if i == -1 {
|
if i == -1 {
|
||||||
return append(m, raw{p})
|
return append(m, token{raw{p}, p})
|
||||||
}
|
}
|
||||||
|
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
m = append(m, raw{p[0:i]})
|
m = append(m, token{raw{p[0:i]}, p[0:i]})
|
||||||
}
|
}
|
||||||
|
|
||||||
if esc {
|
if esc {
|
||||||
m = append(m, raw{c})
|
m = append(m, token{raw{c}, c})
|
||||||
} else {
|
} else {
|
||||||
switch c {
|
switch c {
|
||||||
case escape:
|
case escape:
|
||||||
e = true
|
e = true
|
||||||
case superAny:
|
case superAny:
|
||||||
m = append(m, multiple{})
|
m = append(m, token{multiple{}, c})
|
||||||
case any:
|
case any:
|
||||||
m = append(m, multiple{d})
|
m = append(m, token{multiple{d}, c})
|
||||||
case singleAny:
|
case singleAny:
|
||||||
m = append(m, single{d})
|
m = append(m, token{single{d}, c})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return parse(p[i+len(c):], m, d, e)
|
return parse(p[i+len(c):], m, d, e)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// raw represents raw string to match
|
||||||
type raw struct {
|
type raw struct {
|
||||||
s string
|
s string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self raw) Match(s string) bool {
|
func (self raw) Match(s string) bool {
|
||||||
return self.s == s
|
return self.s == s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self raw) kind() globKind {
|
||||||
|
return glob_raw
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self raw) search(s string) (i int, l int, ok bool) {
|
||||||
|
index := strings.Index(s, self.s)
|
||||||
|
if index == -1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
i = index
|
||||||
|
l = len(self.s)
|
||||||
|
ok = true
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func (self raw) String() string {
|
func (self raw) String() string {
|
||||||
return fmt.Sprintf("[raw:%s]", self.s)
|
return fmt.Sprintf("[raw:%s]", self.s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// multiple represents *
|
||||||
type multiple struct {
|
type multiple struct {
|
||||||
separators string
|
separators string
|
||||||
}
|
}
|
||||||
|
@ -92,10 +149,27 @@ func (self multiple) Match(s string) bool {
|
||||||
return strings.IndexAny(s, self.separators) == -1
|
return strings.IndexAny(s, self.separators) == -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self multiple) search(s string) (i int, l int, ok bool) {
|
||||||
|
if self.Match(s) {
|
||||||
|
return 0, len(s), true
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self multiple) kind() globKind {
|
||||||
|
if self.separators == "" {
|
||||||
|
return glob_multiple_super
|
||||||
|
} else {
|
||||||
|
return glob_multiple_separated
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (self multiple) String() string {
|
func (self multiple) String() string {
|
||||||
return fmt.Sprintf("[multiple:%s]", self.separators)
|
return fmt.Sprintf("[multiple:%s]", self.separators)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// single represents ?
|
||||||
type single struct {
|
type single struct {
|
||||||
separators string
|
separators string
|
||||||
}
|
}
|
||||||
|
@ -104,36 +178,60 @@ func (self single) Match(s string) bool {
|
||||||
return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
|
return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self single) search(s string) (i int, l int, ok bool) {
|
||||||
|
if self.Match(s) {
|
||||||
|
return 0, 1, true
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self single) kind() globKind {
|
||||||
|
return glob_single
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
func (self single) String() string {
|
func (self single) String() string {
|
||||||
return fmt.Sprintf("[single:%s]", self.separators)
|
return fmt.Sprintf("[single:%s]", self.separators)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// composite
|
||||||
type composite struct {
|
type composite struct {
|
||||||
chunks []Glob
|
chunks []Glob
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func (self composite) Match(m string) bool {
|
func (self composite) kind() globKind {
|
||||||
var prev Glob
|
return glob_composite
|
||||||
|
}
|
||||||
|
|
||||||
for _, c := range self.chunks {
|
func (self composite) search(s string) (i int, l int, ok bool) {
|
||||||
if str, ok := c.(raw); ok {
|
if self.Match(s) {
|
||||||
i := strings.Index(m, str.s)
|
return 0, len(s), true
|
||||||
if i == -1 {
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func m(chunks []Glob, s string) bool {
|
||||||
|
var prev Glob
|
||||||
|
for _, c := range chunks {
|
||||||
|
if c.kind() == glob_raw {
|
||||||
|
i, l, ok := c.search(s)
|
||||||
|
if !ok {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
l := len(str.s)
|
|
||||||
|
|
||||||
if prev != nil {
|
if prev != nil {
|
||||||
if !prev.Match(m[:i]) {
|
if !prev.Match(s[:i]) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
prev = nil
|
prev = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
m = m[i+l:]
|
s = s[i+l:]
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,10 +239,14 @@ func (self composite) Match(m string) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
if prev != nil {
|
if prev != nil {
|
||||||
return prev.Match(m)
|
return prev.Match(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
return len(m) == 0
|
return len(s) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self composite) Match(s string) bool {
|
||||||
|
return m(self.chunks, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func firstIndexOfChars(p string, any []string) (min int, c string) {
|
func firstIndexOfChars(p string, any []string) (min int, c string) {
|
||||||
|
@ -167,4 +269,67 @@ func firstIndexOfChars(p string, any []string) (min int, c string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type prefix struct {
|
||||||
|
s string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self prefix) kind() globKind {
|
||||||
|
return glob_prefix
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self prefix) search(s string) (i int, l int, ok bool) {
|
||||||
|
if self.Match(s) {
|
||||||
|
return 0, len(s), true
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self prefix) Match(s string) bool {
|
||||||
|
return strings.HasPrefix(s, self.s)
|
||||||
|
}
|
||||||
|
|
||||||
|
type suffix struct {
|
||||||
|
s string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self suffix) kind() globKind {
|
||||||
|
return glob_suffix
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self suffix) search(s string) (i int, l int, ok bool) {
|
||||||
|
if self.Match(s) {
|
||||||
|
return 0, len(s), true
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self suffix) Match(s string) bool {
|
||||||
|
return strings.HasSuffix(s, self.s)
|
||||||
|
}
|
||||||
|
|
||||||
|
type prefix_suffix struct {
|
||||||
|
p, s string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self prefix_suffix) kind() globKind {
|
||||||
|
return glob_prefix_suffix
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self prefix_suffix) search(s string) (i int, l int, ok bool) {
|
||||||
|
if self.Match(s) {
|
||||||
|
return 0, len(s), true
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self prefix_suffix) Match(s string) bool {
|
||||||
|
return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
72
glob_test.go
72
glob_test.go
|
@ -61,6 +61,8 @@ func TestGlob(t *testing.T) {
|
||||||
glob(true, `\*`, "*"),
|
glob(true, `\*`, "*"),
|
||||||
glob(true, "**", "a.b.c", "."),
|
glob(true, "**", "a.b.c", "."),
|
||||||
|
|
||||||
|
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
|
||||||
|
|
||||||
glob(false, "?at", "at"),
|
glob(false, "?at", "at"),
|
||||||
glob(false, "?at", "fat", "f"),
|
glob(false, "?at", "fat", "f"),
|
||||||
glob(false, "a.*", "a.b.c", "."),
|
glob(false, "a.*", "a.b.c", "."),
|
||||||
|
@ -90,12 +92,27 @@ const Pattern = "*cat*eyes*"
|
||||||
const ExpPattern = ".*cat.*eyes.*"
|
const ExpPattern = ".*cat.*eyes.*"
|
||||||
const String = "my cat has very bright eyes"
|
const String = "my cat has very bright eyes"
|
||||||
|
|
||||||
|
const ProfPattern = "* ?at * eyes"
|
||||||
|
const ProfString = "my cat has very bright eyes"
|
||||||
|
|
||||||
//const Pattern = "*.google.com"
|
//const Pattern = "*.google.com"
|
||||||
//const ExpPattern = ".*google\\.com"
|
//const ExpPattern = ".*google\\.com"
|
||||||
//const String = "mail.google.com"
|
//const String = "mail.google.com"
|
||||||
// const Pattern = "google.com"
|
const PlainPattern = "google.com"
|
||||||
// const ExpPattern = "google\\.com"
|
const PlainExpPattern = "google\\.com"
|
||||||
// const String = "google.com"
|
const PlainString = "google.com"
|
||||||
|
|
||||||
|
const PSPattern = "https://*.google.com"
|
||||||
|
const PSExpPattern = `https:\/\/[a-z]+\.google\\.com`
|
||||||
|
const PSString = "https://account.google.com"
|
||||||
|
|
||||||
|
func BenchmarkProf(b *testing.B) {
|
||||||
|
m := New(Pattern)
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = m.Match(String)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkGobwas(b *testing.B) {
|
func BenchmarkGobwas(b *testing.B) {
|
||||||
m := New(Pattern)
|
m := New(Pattern)
|
||||||
|
@ -104,22 +121,69 @@ func BenchmarkGobwas(b *testing.B) {
|
||||||
_ = m.Match(String)
|
_ = m.Match(String)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func BenchmarkGobwasPlain(b *testing.B) {
|
||||||
|
m := New(PlainPattern)
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = m.Match(PlainString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func BenchmarkGobwasPrefix(b *testing.B) {
|
||||||
|
m := New("abc*")
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = m.Match("abcdef")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func BenchmarkGobwasSuffix(b *testing.B) {
|
||||||
|
m := New("*def")
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = m.Match("abcdef")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func BenchmarkGobwasPrefixSuffix(b *testing.B) {
|
||||||
|
m := New("ab*ef")
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = m.Match("abcdef")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkRyanuber(b *testing.B) {
|
func BenchmarkRyanuber(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
_ = rGlob.Glob(Pattern, String)
|
_ = rGlob.Glob(Pattern, String)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func BenchmarkRyanuberPlain(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = rGlob.Glob(PlainPattern, PlainString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func BenchmarkRyanuberPrefixSuffix(b *testing.B) {
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = rGlob.Glob(PSPattern, PSString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
func BenchmarkRegExp(b *testing.B) {
|
func BenchmarkRegExp(b *testing.B) {
|
||||||
r := regexp.MustCompile(ExpPattern)
|
r := regexp.MustCompile(ExpPattern)
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
_ = r.Match([]byte(String))
|
_ = r.Match([]byte(String))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func BenchmarkRegExpPrefixSuffix(b *testing.B) {
|
||||||
|
r := regexp.MustCompile(PSExpPattern)
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_ = r.Match([]byte(PSString))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var ALPHABET_S = []string{"a", "b", "c"}
|
var ALPHABET_S = []string{"a", "b", "c"}
|
||||||
|
|
||||||
const ALPHABET = "abc"
|
const ALPHABET = "abc"
|
||||||
|
const PREFIX = "faa"
|
||||||
const STR = "faafsdfcsdffc"
|
const STR = "faafsdfcsdffc"
|
||||||
|
|
||||||
func BenchmarkIndexOfAny(b *testing.B) {
|
func BenchmarkIndexOfAny(b *testing.B) {
|
||||||
|
@ -131,4 +195,4 @@ func BenchmarkFirstIndexOfChars(b *testing.B) {
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
firstIndexOfChars(STR, ALPHABET_S)
|
firstIndexOfChars(STR, ALPHABET_S)
|
||||||
}
|
}
|
||||||
}
|
}
|
13
readme.md
13
readme.md
|
@ -53,7 +53,7 @@ func main() {
|
||||||
|
|
||||||
## Performance
|
## Performance
|
||||||
|
|
||||||
In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.7x faster (on my personal Mac),
|
In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.5x faster (on my personal Mac),
|
||||||
because my impl compiles patterns for future usage. If you will not use compiled `glob.Glob` object,
|
because my impl compiles patterns for future usage. If you will not use compiled `glob.Glob` object,
|
||||||
and do `g := glob.New(pattern); g.Match(...)` every time, then your code will be about ~3x slower.
|
and do `g := glob.New(pattern); g.Match(...)` every time, then your code will be about ~3x slower.
|
||||||
|
|
||||||
|
@ -61,9 +61,16 @@ Run `go test bench=.` from source root to see the benchmarks:
|
||||||
|
|
||||||
Test | Operations | Speed
|
Test | Operations | Speed
|
||||||
-----|------------|------
|
-----|------------|------
|
||||||
github.com/gobwas/glob | 20000000 | 165 ns/op
|
github.com/gobwas/glob | 20000000 | 150 ns/op
|
||||||
github.com/ryanuber/go-glob | 10000000 | 452 ns/op
|
github.com/ryanuber/go-glob | 10000000 | 375 ns/op
|
||||||
|
|
||||||
|
Also, there are few simple optimizations, that help to test much faster patterns like `*abc`, `abc*` or `a*c`:
|
||||||
|
|
||||||
|
Test | Operations | Speed
|
||||||
|
-----|------------|------
|
||||||
|
prefix | 200000000 | 8.78 ns/op
|
||||||
|
suffix | 200000000 | 9.46 ns/op
|
||||||
|
prefix-suffix | 100000000 | 16.3 ns/op
|
||||||
|
|
||||||
[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg
|
[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg
|
||||||
[godoc-url]: https://godoc.org/github.com/gobwas/glob
|
[godoc-url]: https://godoc.org/github.com/gobwas/glob
|
||||||
|
|
Loading…
Reference in New Issue