forked from mirror/glob
Perf optimizations
This commit is contained in:
parent
c664939ee7
commit
21665ef529
211
glob.go
211
glob.go
|
@ -14,9 +14,23 @@ const (
|
|||
|
||||
var chars = []string{any, superAny, singleAny, escape}
|
||||
|
||||
type globKind int
|
||||
const(
|
||||
glob_raw globKind = iota
|
||||
glob_multiple_separated
|
||||
glob_multiple_super
|
||||
glob_single
|
||||
glob_composite
|
||||
glob_prefix
|
||||
glob_suffix
|
||||
glob_prefix_suffix
|
||||
)
|
||||
|
||||
// Glob represents compiled glob pattern.
|
||||
type Glob interface {
|
||||
Match(string) bool
|
||||
search(string) (int, int, bool)
|
||||
kind() globKind
|
||||
}
|
||||
|
||||
// New creates Glob for given pattern and uses other given (if any) strings as separators.
|
||||
|
@ -33,14 +47,36 @@ type Glob interface {
|
|||
func New(pattern string, d ...string) Glob {
|
||||
chunks := parse(pattern, nil, strings.Join(d, ""), false)
|
||||
|
||||
if len(chunks) == 1 {
|
||||
return chunks[0]
|
||||
switch len(chunks) {
|
||||
case 1:
|
||||
return chunks[0].glob
|
||||
case 2:
|
||||
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super {
|
||||
return &prefix{chunks[0].str}
|
||||
}
|
||||
if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super {
|
||||
return &suffix{chunks[1].str}
|
||||
}
|
||||
case 3:
|
||||
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw {
|
||||
return &prefix_suffix{chunks[0].str, chunks[2].str}
|
||||
}
|
||||
}
|
||||
|
||||
return &composite{chunks}
|
||||
var c []Glob
|
||||
for _, chunk := range chunks {
|
||||
c = append(c, chunk.glob)
|
||||
}
|
||||
|
||||
return &composite{c}
|
||||
}
|
||||
|
||||
func parse(p string, m []Glob, d string, esc bool) []Glob {
|
||||
type token struct {
|
||||
glob Glob
|
||||
str string
|
||||
}
|
||||
|
||||
func parse(p string, m []token, d string, esc bool) []token {
|
||||
var e bool
|
||||
|
||||
if len(p) == 0 {
|
||||
|
@ -49,41 +85,62 @@ func parse(p string, m []Glob, d string, esc bool) []Glob {
|
|||
|
||||
i, c := firstIndexOfChars(p, chars)
|
||||
if i == -1 {
|
||||
return append(m, raw{p})
|
||||
return append(m, token{raw{p}, p})
|
||||
}
|
||||
|
||||
if i > 0 {
|
||||
m = append(m, raw{p[0:i]})
|
||||
m = append(m, token{raw{p[0:i]}, p[0:i]})
|
||||
}
|
||||
|
||||
if esc {
|
||||
m = append(m, raw{c})
|
||||
m = append(m, token{raw{c}, c})
|
||||
} else {
|
||||
switch c {
|
||||
case escape:
|
||||
e = true
|
||||
case superAny:
|
||||
m = append(m, multiple{})
|
||||
m = append(m, token{multiple{}, c})
|
||||
case any:
|
||||
m = append(m, multiple{d})
|
||||
m = append(m, token{multiple{d}, c})
|
||||
case singleAny:
|
||||
m = append(m, single{d})
|
||||
m = append(m, token{single{d}, c})
|
||||
}
|
||||
}
|
||||
|
||||
return parse(p[i+len(c):], m, d, e)
|
||||
}
|
||||
|
||||
// raw represents raw string to match
|
||||
type raw struct {
|
||||
s string
|
||||
}
|
||||
|
||||
func (self raw) Match(s string) bool {
|
||||
return self.s == s
|
||||
}
|
||||
|
||||
func (self raw) kind() globKind {
|
||||
return glob_raw
|
||||
}
|
||||
|
||||
func (self raw) search(s string) (i int, l int, ok bool) {
|
||||
index := strings.Index(s, self.s)
|
||||
if index == -1 {
|
||||
return
|
||||
}
|
||||
|
||||
i = index
|
||||
l = len(self.s)
|
||||
ok = true
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self raw) String() string {
|
||||
return fmt.Sprintf("[raw:%s]", self.s)
|
||||
}
|
||||
|
||||
// multiple represents *
|
||||
type multiple struct {
|
||||
separators string
|
||||
}
|
||||
|
@ -92,10 +149,27 @@ func (self multiple) Match(s string) bool {
|
|||
return strings.IndexAny(s, self.separators) == -1
|
||||
}
|
||||
|
||||
func (self multiple) search(s string) (i int, l int, ok bool) {
|
||||
if self.Match(s) {
|
||||
return 0, len(s), true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self multiple) kind() globKind {
|
||||
if self.separators == "" {
|
||||
return glob_multiple_super
|
||||
} else {
|
||||
return glob_multiple_separated
|
||||
}
|
||||
}
|
||||
|
||||
func (self multiple) String() string {
|
||||
return fmt.Sprintf("[multiple:%s]", self.separators)
|
||||
}
|
||||
|
||||
// single represents ?
|
||||
type single struct {
|
||||
separators string
|
||||
}
|
||||
|
@ -104,36 +178,60 @@ func (self single) Match(s string) bool {
|
|||
return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
|
||||
}
|
||||
|
||||
func (self single) search(s string) (i int, l int, ok bool) {
|
||||
if self.Match(s) {
|
||||
return 0, 1, true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self single) kind() globKind {
|
||||
return glob_single
|
||||
}
|
||||
|
||||
|
||||
func (self single) String() string {
|
||||
return fmt.Sprintf("[single:%s]", self.separators)
|
||||
}
|
||||
|
||||
|
||||
// composite
|
||||
type composite struct {
|
||||
chunks []Glob
|
||||
}
|
||||
|
||||
|
||||
func (self composite) Match(m string) bool {
|
||||
var prev Glob
|
||||
func (self composite) kind() globKind {
|
||||
return glob_composite
|
||||
}
|
||||
|
||||
for _, c := range self.chunks {
|
||||
if str, ok := c.(raw); ok {
|
||||
i := strings.Index(m, str.s)
|
||||
if i == -1 {
|
||||
func (self composite) search(s string) (i int, l int, ok bool) {
|
||||
if self.Match(s) {
|
||||
return 0, len(s), true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func m(chunks []Glob, s string) bool {
|
||||
var prev Glob
|
||||
for _, c := range chunks {
|
||||
if c.kind() == glob_raw {
|
||||
i, l, ok := c.search(s)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
l := len(str.s)
|
||||
|
||||
if prev != nil {
|
||||
if !prev.Match(m[:i]) {
|
||||
if !prev.Match(s[:i]) {
|
||||
return false
|
||||
}
|
||||
|
||||
prev = nil
|
||||
}
|
||||
|
||||
m = m[i+l:]
|
||||
s = s[i+l:]
|
||||
continue
|
||||
}
|
||||
|
||||
|
@ -141,10 +239,14 @@ func (self composite) Match(m string) bool {
|
|||
}
|
||||
|
||||
if prev != nil {
|
||||
return prev.Match(m)
|
||||
return prev.Match(s)
|
||||
}
|
||||
|
||||
return len(m) == 0
|
||||
return len(s) == 0
|
||||
}
|
||||
|
||||
func (self composite) Match(s string) bool {
|
||||
return m(self.chunks, s)
|
||||
}
|
||||
|
||||
func firstIndexOfChars(p string, any []string) (min int, c string) {
|
||||
|
@ -167,4 +269,67 @@ func firstIndexOfChars(p string, any []string) (min int, c string) {
|
|||
}
|
||||
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
type prefix struct {
|
||||
s string
|
||||
}
|
||||
|
||||
func (self prefix) kind() globKind {
|
||||
return glob_prefix
|
||||
}
|
||||
|
||||
func (self prefix) search(s string) (i int, l int, ok bool) {
|
||||
if self.Match(s) {
|
||||
return 0, len(s), true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self prefix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, self.s)
|
||||
}
|
||||
|
||||
type suffix struct {
|
||||
s string
|
||||
}
|
||||
|
||||
func (self suffix) kind() globKind {
|
||||
return glob_suffix
|
||||
}
|
||||
|
||||
func (self suffix) search(s string) (i int, l int, ok bool) {
|
||||
if self.Match(s) {
|
||||
return 0, len(s), true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self suffix) Match(s string) bool {
|
||||
return strings.HasSuffix(s, self.s)
|
||||
}
|
||||
|
||||
type prefix_suffix struct {
|
||||
p, s string
|
||||
}
|
||||
|
||||
func (self prefix_suffix) kind() globKind {
|
||||
return glob_prefix_suffix
|
||||
}
|
||||
|
||||
func (self prefix_suffix) search(s string) (i int, l int, ok bool) {
|
||||
if self.Match(s) {
|
||||
return 0, len(s), true
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self prefix_suffix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
72
glob_test.go
72
glob_test.go
|
@ -61,6 +61,8 @@ func TestGlob(t *testing.T) {
|
|||
glob(true, `\*`, "*"),
|
||||
glob(true, "**", "a.b.c", "."),
|
||||
|
||||
glob(true, "* ?at * eyes", "my cat has very bright eyes"),
|
||||
|
||||
glob(false, "?at", "at"),
|
||||
glob(false, "?at", "fat", "f"),
|
||||
glob(false, "a.*", "a.b.c", "."),
|
||||
|
@ -90,12 +92,27 @@ const Pattern = "*cat*eyes*"
|
|||
const ExpPattern = ".*cat.*eyes.*"
|
||||
const String = "my cat has very bright eyes"
|
||||
|
||||
const ProfPattern = "* ?at * eyes"
|
||||
const ProfString = "my cat has very bright eyes"
|
||||
|
||||
//const Pattern = "*.google.com"
|
||||
//const ExpPattern = ".*google\\.com"
|
||||
//const String = "mail.google.com"
|
||||
// const Pattern = "google.com"
|
||||
// const ExpPattern = "google\\.com"
|
||||
// const String = "google.com"
|
||||
const PlainPattern = "google.com"
|
||||
const PlainExpPattern = "google\\.com"
|
||||
const PlainString = "google.com"
|
||||
|
||||
const PSPattern = "https://*.google.com"
|
||||
const PSExpPattern = `https:\/\/[a-z]+\.google\\.com`
|
||||
const PSString = "https://account.google.com"
|
||||
|
||||
func BenchmarkProf(b *testing.B) {
|
||||
m := New(Pattern)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = m.Match(String)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGobwas(b *testing.B) {
|
||||
m := New(Pattern)
|
||||
|
@ -104,22 +121,69 @@ func BenchmarkGobwas(b *testing.B) {
|
|||
_ = m.Match(String)
|
||||
}
|
||||
}
|
||||
func BenchmarkGobwasPlain(b *testing.B) {
|
||||
m := New(PlainPattern)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = m.Match(PlainString)
|
||||
}
|
||||
}
|
||||
func BenchmarkGobwasPrefix(b *testing.B) {
|
||||
m := New("abc*")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = m.Match("abcdef")
|
||||
}
|
||||
}
|
||||
func BenchmarkGobwasSuffix(b *testing.B) {
|
||||
m := New("*def")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = m.Match("abcdef")
|
||||
}
|
||||
}
|
||||
func BenchmarkGobwasPrefixSuffix(b *testing.B) {
|
||||
m := New("ab*ef")
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = m.Match("abcdef")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRyanuber(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = rGlob.Glob(Pattern, String)
|
||||
}
|
||||
}
|
||||
func BenchmarkRyanuberPlain(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = rGlob.Glob(PlainPattern, PlainString)
|
||||
}
|
||||
}
|
||||
func BenchmarkRyanuberPrefixSuffix(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = rGlob.Glob(PSPattern, PSString)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func BenchmarkRegExp(b *testing.B) {
|
||||
r := regexp.MustCompile(ExpPattern)
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = r.Match([]byte(String))
|
||||
}
|
||||
}
|
||||
func BenchmarkRegExpPrefixSuffix(b *testing.B) {
|
||||
r := regexp.MustCompile(PSExpPattern)
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = r.Match([]byte(PSString))
|
||||
}
|
||||
}
|
||||
|
||||
var ALPHABET_S = []string{"a", "b", "c"}
|
||||
|
||||
const ALPHABET = "abc"
|
||||
const PREFIX = "faa"
|
||||
const STR = "faafsdfcsdffc"
|
||||
|
||||
func BenchmarkIndexOfAny(b *testing.B) {
|
||||
|
@ -131,4 +195,4 @@ func BenchmarkFirstIndexOfChars(b *testing.B) {
|
|||
for i := 0; i < b.N; i++ {
|
||||
firstIndexOfChars(STR, ALPHABET_S)
|
||||
}
|
||||
}
|
||||
}
|
13
readme.md
13
readme.md
|
@ -53,7 +53,7 @@ func main() {
|
|||
|
||||
## Performance
|
||||
|
||||
In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.7x faster (on my personal Mac),
|
||||
In comparison with [go-glob](https://github.com/ryanuber/go-glob), it is ~2.5x faster (on my personal Mac),
|
||||
because my impl compiles patterns for future usage. If you will not use compiled `glob.Glob` object,
|
||||
and do `g := glob.New(pattern); g.Match(...)` every time, then your code will be about ~3x slower.
|
||||
|
||||
|
@ -61,9 +61,16 @@ Run `go test bench=.` from source root to see the benchmarks:
|
|||
|
||||
Test | Operations | Speed
|
||||
-----|------------|------
|
||||
github.com/gobwas/glob | 20000000 | 165 ns/op
|
||||
github.com/ryanuber/go-glob | 10000000 | 452 ns/op
|
||||
github.com/gobwas/glob | 20000000 | 150 ns/op
|
||||
github.com/ryanuber/go-glob | 10000000 | 375 ns/op
|
||||
|
||||
Also, there are few simple optimizations, that help to test much faster patterns like `*abc`, `abc*` or `a*c`:
|
||||
|
||||
Test | Operations | Speed
|
||||
-----|------------|------
|
||||
prefix | 200000000 | 8.78 ns/op
|
||||
suffix | 200000000 | 9.46 ns/op
|
||||
prefix-suffix | 100000000 | 16.3 ns/op
|
||||
|
||||
[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg
|
||||
[godoc-url]: https://godoc.org/github.com/gobwas/glob
|
||||
|
|
Loading…
Reference in New Issue