add range, refactor

2015-12-24 17:54:54 +03:00 · 2015-12-24 17:54:54 +03:00 · 5b4ed87b27
parent 3c56fe78a7
commit 5b4ed87b27
13 changed files with 544 additions and 298 deletions
--- a/glob.go
+++ b/glob.go
@ -2,35 +2,28 @@ package glob
 import (
 	"strings"
-	"fmt"
+	"errors"
 	"github.com/gobwas/glob/match"
 )
 const (
-	any       = `*`
+	any         = '*'
-	superAny  = `**`
+	single = '?'
-	singleAny = `?`
+	escape      = '\\'
-	escape    = `\`
+	range_open  = '['
 	range_close = ']'
 )
-var chars = []string{any, superAny, singleAny, escape}
+const (
-
+	inside_range_not = '!'
-type globKind int
+	inside_range_minus = '-'
 const(
 	glob_raw globKind = iota
 	glob_multiple_separated
 	glob_multiple_super
 	glob_single
 	glob_composite
 	glob_prefix
 	glob_suffix
 	glob_prefix_suffix
 )
 var syntaxPhrases = string([]byte{any, single, escape, range_open, range_close})
 // Glob represents compiled glob pattern.
 type Glob interface {
 	Match(string) bool
 	search(string) (int, int, bool)
 	kind() globKind
 }
 // New creates Glob for given pattern and uses other given (if any) strings as separators.
@ -44,292 +37,152 @@ type Glob interface {
 //		`?`         matches any single non-separator character
 //		c           matches character c (c != `*`, `**`, `?`, `\`)
 //		`\` c       matches character c
-func New(pattern string, separators ...string) Glob {
+func New(pattern string, separators ...string) (Glob, error) {
-	chunks := parse(pattern, nil, strings.Join(separators, ""), false)
+	chunks, err := parse(pattern, strings.Join(separators, ""), state{})
 	if err != nil {
 		return nil, err
 	}
 	switch len(chunks) {
 	case 1:
-		return chunks[0].glob
+		return chunks[0].matcher, nil
 	case 2:
-		if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super {
+		if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper {
-			return &prefix{chunks[0].str}
+			return &match.Prefix{chunks[0].str}, nil
 		}
-		if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super {
+		if chunks[1].matcher.Kind() == match.KindRaw && chunks[0].matcher.Kind() == match.KindMultipleSuper {
-			return &suffix{chunks[1].str}
+			return &match.Suffix{chunks[1].str}, nil
 		}
 	case 3:
-		if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw {
+		if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper && chunks[2].matcher.Kind() == match.KindRaw {
-			return &prefix_suffix{chunks[0].str, chunks[2].str}
+			return &match.PrefixSuffix{chunks[0].str, chunks[2].str}, nil
 		}
 	}
-	var c []Glob
+	var c []match.Matcher
 	for _, chunk := range chunks {
-		c = append(c, chunk.glob)
+		c = append(c, chunk.matcher)
 	}
-	return &composite{c}
+	return &match.Composite{c}, nil
 }
 type token struct {
 	glob Glob
 	str string
 }
-func parse(p string, m []token, d string, esc bool) []token {
+// parse parsed given pattern into list of tokens
-	var e bool
+func parse(str string, sep string, st state) ([]token, error) {
-
+	if len(str) == 0 {
-	if len(p) == 0 {
+		return st.tokens, nil
 		return m
 	}
-	i, c := firstIndexOfChars(p, chars)
+	// if there are no syntax symbols - pattern is simple string
 	i := strings.IndexAny(str, syntaxPhrases)
 	if i == -1 {
-		return append(m, token{raw{p}, p})
+		return append(st.tokens, token{match.Raw{str}, str}), nil
 	}
 	c := string(str[i])
 	// if syntax symbol is not at the start of pattern - add raw part before it
 	if i > 0 {
-		m = append(m, token{raw{p[0:i]}, p[0:i]})
+		st.tokens = append(st.tokens, token{match.Raw{str[0:i]}, str[0:i]})
 	}
-	if esc {
+	// if we are in escape state
-		m = append(m, token{raw{c}, c})
+	if st.escape {
 		st.tokens = append(st.tokens, token{match.Raw{c}, c})
 		st.escape = false
 	} else {
-		switch c {
+		switch str[i] {
 		case range_open:
 			closed := indexByteNonEscaped(str, range_close, escape, 0)
 			if closed == -1 {
 				return nil, errors.New("invalid format")
 			}
 			r := str[i+1:closed]
 			g, err := parseRange(r)
 			if err != nil {
 				return nil, err
 			}
 			st.tokens = append(st.tokens, token{g, r})
 			if closed == len(str) -1 {
 				return st.tokens, nil
 			}
 			return parse(str[closed+1:], sep, st)
 		case escape:
-			e = true
+			st.escape = true
 		case superAny:
 			m = append(m, token{multiple{}, c})
 		case any:
-			m = append(m, token{multiple{d}, c})
+			if len(str) > i+1 && str[i+1] == any {
-		case singleAny:
+				st.tokens = append(st.tokens, token{match.Multiple{}, c})
-			m = append(m, token{single{d}, c})
+				return parse(str[i+len(c)+1:], sep, st)
 			}
 			st.tokens = append(st.tokens, token{match.Multiple{sep}, c})
 		case single:
 			st.tokens = append(st.tokens, token{match.Single{sep}, c})
 		}
 	}
-	return parse(p[i+len(c):], m, d, e)
+	return parse(str[i+len(c):], sep, st)
 }
 // raw represents raw string to match
 type raw struct {
 	s string
 }
 func (self raw) Match(s string) bool {
 	return self.s == s
 }
 func (self raw) kind() globKind {
 	return glob_raw
 }
 func (self raw) search(s string) (i int, l int, ok bool) {
 	index := strings.Index(s, self.s)
 	if index == -1 {
 		return
 	}
 	i = index
 	l = len(self.s)
 	ok = true
 	return
 }
 func (self raw) String() string {
 	return fmt.Sprintf("[raw:%s]", self.s)
 }
 // multiple represents *
 type multiple struct {
 	separators string
 }
 func (self multiple) Match(s string) bool {
 	return strings.IndexAny(s, self.separators) == -1
 }
 func (self multiple) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self multiple) kind() globKind {
 	if self.separators == "" {
 		return glob_multiple_super
 	} else {
 		return glob_multiple_separated
 	}
 }
 func (self multiple) String() string {
 	return fmt.Sprintf("[multiple:%s]", self.separators)
 }
 // single represents ?
 type single struct {
 	separators string
 }
 func (self single) Match(s string) bool {
 	return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
 }
 func (self single) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, 1, true
 	}
 	return
 }
 func (self single) kind() globKind {
 	return glob_single
 }
-func (self single) String() string {
+func parseRange(def string) (match.Matcher, error) {
-	return fmt.Sprintf("[single:%s]", self.separators)
+	var (
-}
+		not   bool
 		esc   bool
 		minus bool
 		b   []byte
 	)
-
+	for i, c := range []byte(def) {
-// composite
+		if esc {
-type composite struct {
+			b = append(b, c)
-	chunks []Glob
+			esc = false
 }
 func (self composite) kind() globKind {
 	return glob_composite
 }
 func (self composite) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func m(chunks []Glob, s string) bool {
 	var prev Glob
 	for _, c := range chunks {
 		if c.kind() == glob_raw {
 			i, l, ok := c.search(s)
 			if !ok {
 				return false
 			}
 			if prev != nil {
 				if !prev.Match(s[:i]) {
 					return false
 				}
 				prev = nil
 			}
 			s = s[i+l:]
 			continue
 		}
-		prev = c
+		switch c{
-	}
+		case inside_range_not:
 			if i == 0 {
 				not = true
 			}
 		case escape:
 			if i == len(def) - 1 {
 				return nil, errors.New("escape character without follower")
 			}
-	if prev != nil {
+			esc = true
-		return prev.Match(s)
+		case inside_range_minus:
-	}
+			minus = true
-
+		default:
-	return len(s) == 0
+			b = append(b, c)
 }
 func (self composite) Match(s string) bool {
 	return m(self.chunks, s)
 }
 func firstIndexOfChars(p string, any []string) (min int, c string) {
 	l := len(p)
 	min = l
 	weight := 0
 	for _, s := range any {
 		w := len(s)
 		i := strings.Index(p, s)
 		if i != -1 && i <= min && w >= weight {
 			min = i
 			weight = w
 			c = s
 		}
 	}
-	if min == l {
+	def = string(b)
-		return -1, ""
+
 	if minus  {
 		r := []rune(def)
 		if len(r) != 3 || r[1] != inside_range_minus {
 			return nil, errors.New("invalid range syntax")
 		}
 		return &match.Between{r[0], r[2], not}, nil
 	}
-	return
+	return &match.RangeList{def, not}, nil
 }
-type prefix struct {
+type token struct {
-	s string
+	matcher match.Matcher
 	str     string
 }
-func (self prefix) kind() globKind {
+type state struct {
-	return glob_prefix
+	escape bool
 	tokens []token
 }
 func (self prefix) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self prefix) Match(s string) bool {
 	return strings.HasPrefix(s, self.s)
 }
 type suffix struct {
 	s string
 }
 func (self suffix) kind() globKind {
 	return glob_suffix
 }
 func (self suffix) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self suffix) Match(s string) bool {
 	return strings.HasSuffix(s, self.s)
 }
 type prefix_suffix struct {
 	p, s string
 }
 func (self prefix_suffix) kind() globKind {
 	return glob_prefix_suffix
 }
 func (self prefix_suffix) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self prefix_suffix) Match(s string) bool {
 	return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s)
 }
--- a/glob_test.go
+++ b/glob_test.go
@ -3,7 +3,6 @@ package glob
 import (
 	rGlob "github.com/ryanuber/go-glob"
 	"regexp"
 	"strings"
 	"testing"
 )
@ -17,29 +16,40 @@ func glob(s bool, p, m string, d ...string) test {
 	return test{p, m, s, d}
 }
-func TestFirstIndexOfChars(t *testing.T) {
+func TestIndexOfNonEscaped(t *testing.T) {
 	for _, test := range []struct {
 		s string
-		c []string
+		n, e byte
 		i int
 		r string
 	}{
 		{
-			"**",
+			"\\n_n",
-			[]string{"**", "*"},
+			'n',
-			0,
+			'\\',
-			"**",
+			3,
 		},
 		{
-			"**",
+			"ab",
-			[]string{"*", "**"},
+			'a',
 			'\\',
 			0,
-			"**",
+		},
 		{
 			"ab",
 			'b',
 			'\\',
 			1,
 		},
 		{
 			"",
 			'b',
 			'\\',
 			-1,
 		},
 	} {
-		i, r := firstIndexOfChars(test.s, test.c)
+		i := indexByteNonEscaped(test.s, test.n, test.e, 0)
-		if i != test.i || r != test.r {
+		if i != test.i {
-			t.Errorf("unexpeted index: expected %q at %v, got %q at %v", test.r, test.i, r, i)
+			t.Errorf("unexpeted index: expected %v, got %v", test.i, i)
 		}
 	}
 }
@ -79,7 +89,11 @@ func TestGlob(t *testing.T) {
 		glob(false, "*is", "this is a test"),
 		glob(false, "*no*", "this is a test"),
 	} {
-		g := New(test.pattern, test.delimiters...)
+		g, err := New(test.pattern, test.delimiters...)
 		if err != nil {
 			t.Error(err)
 			continue
 		}
 		result := g.Match(test.match)
 		if result != test.should {
@ -107,7 +121,7 @@ const PSExpPattern = `https:\/\/[a-z]+\.google\\.com`
 const PSString = "https://account.google.com"
 func BenchmarkProf(b *testing.B) {
-	m := New(Pattern)
+	m, _ := New(Pattern)
 	for i := 0; i < b.N; i++ {
 		_ = m.Match(String)
@ -115,35 +129,35 @@ func BenchmarkProf(b *testing.B) {
 }
 func BenchmarkGobwas(b *testing.B) {
-	m := New(Pattern)
+	m, _ := New(Pattern)
 	for i := 0; i < b.N; i++ {
 		_ = m.Match(String)
 	}
 }
 func BenchmarkGobwasPlain(b *testing.B) {
-	m := New(PlainPattern)
+	m, _ := New(PlainPattern)
 	for i := 0; i < b.N; i++ {
 		_ = m.Match(PlainString)
 	}
 }
 func BenchmarkGobwasPrefix(b *testing.B) {
-	m := New("abc*")
+	m, _ := New("abc*")
 	for i := 0; i < b.N; i++ {
 		_ = m.Match("abcdef")
 	}
 }
 func BenchmarkGobwasSuffix(b *testing.B) {
-	m := New("*def")
+	m, _ := New("*def")
 	for i := 0; i < b.N; i++ {
 		_ = m.Match("abcdef")
 	}
 }
 func BenchmarkGobwasPrefixSuffix(b *testing.B) {
-	m := New("ab*ef")
+	m, _ := New("ab*ef")
 	for i := 0; i < b.N; i++ {
 		_ = m.Match("abcdef")
@ -178,21 +192,4 @@ func BenchmarkRegExpPrefixSuffix(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		_ = r.Match([]byte(PSString))
 	}
 }
 var ALPHABET_S = []string{"a", "b", "c"}
 const ALPHABET = "abc"
 const PREFIX = "faa"
 const STR = "faafsdfcsdffc"
 func BenchmarkIndexOfAny(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		strings.IndexAny(STR, ALPHABET)
 	}
 }
 func BenchmarkFirstIndexOfChars(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		firstIndexOfChars(STR, ALPHABET_S)
 	}
 }
--- a/match/between.go
+++ b/match/between.go
@ -0,0 +1,38 @@
 package match
 import (
 	"fmt"
 )
 type Between struct {
 	Lo, Hi rune
 	Not    bool
 }
 func (self Between) Kind() Kind {
 	return KindRangeBetween
 }
 func (self Between) Search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self Between) Match(s string) bool {
 	r := []rune(s)
 	if (len(r) != 1) {
 		return false
 	}
 	inRange := r[0] >= self.Lo && r[0] <= self.Hi
 	return inRange == !self.Not
 }
 func (self Between) String() string {
 	return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not)
 }
--- a/match/composite.go
+++ b/match/composite.go
@ -0,0 +1,70 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 // composite
 type Composite struct {
 	Chunks []Matcher
 }
 func (self Composite) Kind() Kind {
 	return KindComposite
 }
 func (self Composite) Search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func m(chunks []Matcher, s string) bool {
 	var prev Matcher
 	for _, c := range chunks {
 		if c.Kind() == KindRaw {
 			i, l, ok := c.Search(s)
 			if !ok {
 				return false
 			}
 			if prev != nil {
 				if !prev.Match(s[:i]) {
 					return false
 				}
 				prev = nil
 			}
 			s = s[i+l:]
 			continue
 		}
 		prev = c
 	}
 	if prev != nil {
 		return prev.Match(s)
 	}
 	return len(s) == 0
 }
 func (self Composite) Match(s string) bool {
 	return m(self.Chunks, s)
 }
 func (self Composite) String() string {
 	var l []string
 	for _, c := range self.Chunks {
 		l = append(l, fmt.Sprint(c))
 	}
 	return fmt.Sprintf("[composite:%s]", strings.Join(l, ","))
 }
--- a/match/list.go
+++ b/match/list.go
@ -0,0 +1,41 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 type RangeList struct {
 	List string
 	Not  bool
 }
 func (self RangeList) Kind() Kind {
 	return KindRangeList
 }
 func (self RangeList) Search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self RangeList) Match(s string) bool {
 	r := []rune(s)
 	if (len(r) != 1) {
 		return false
 	}
 	inList := strings.IndexRune(self.List, r[0]) >= 0
 	return inList == !self.Not
 }
 func (self RangeList) String() string {
 	return fmt.Sprintf("[range_list:%s]", self.List)
 }
--- a/match/match.go
+++ b/match/match.go
@ -0,0 +1,22 @@
 package match
 type Kind int
 const(
 	KindRaw Kind = iota
 	KindMultipleSeparated
 	KindMultipleSuper
 	KindSingle
 	KindComposite
 	KindPrefix
 	KindSuffix
 	KindPrefixSuffix
 	KindRangeBetween
 	KindRangeList
 )
 type Matcher interface {
 	Match(string) bool
 	Search(string) (int, int, bool)
 	Kind() Kind
 }
--- a/match/multiple.go
+++ b/match/multiple.go
@ -0,0 +1,35 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 // multiple represents *
 type Multiple struct {
 	Separators string
 }
 func (self Multiple) Match(s string) bool {
 	return strings.IndexAny(s, self.Separators) == -1
 }
 func (self Multiple) Search(s string) (i, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self Multiple) Kind() Kind {
 	if self.Separators == "" {
 		return KindMultipleSuper
 	} else {
 		return KindMultipleSeparated
 	}
 }
 func (self Multiple) String() string {
 	return fmt.Sprintf("[multiple:%s]", self.Separators)
 }
--- a/match/prefix.go
+++ b/match/prefix.go
@ -0,0 +1,32 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 type Prefix struct {
 	Prefix string
 }
 func (self Prefix) Kind() Kind {
 	return KindPrefix
 }
 func (self Prefix) Search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self Prefix) Match(s string) bool {
 	return strings.HasPrefix(s, self.Prefix)
 }
 func (self Prefix) String() string {
 	return fmt.Sprintf("[prefix:%s]", self.Prefix)
 }
--- a/match/prefix_suffix.go
+++ b/match/prefix_suffix.go
@ -0,0 +1,33 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 type PrefixSuffix struct {
 	Prefix, Suffix string
 }
 func (self PrefixSuffix) kind() Kind {
 	return KindPrefixSuffix
 }
 func (self PrefixSuffix) search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self PrefixSuffix) Match(s string) bool {
 	return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
 }
 func (self PrefixSuffix) String() string {
 	return fmt.Sprintf("[prefix_suffix:%s-%s]", self.Prefix, self.Suffix)
 }
--- a/match/raw.go
+++ b/match/raw.go
@ -0,0 +1,36 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 // raw represents raw string to match
 type Raw struct {
 	Str string
 }
 func (self Raw) Match(s string) bool {
 	return self.Str == s
 }
 func (self Raw) Kind() Kind {
 	return KindRaw
 }
 func (self Raw) Search(s string) (i int, l int, ok bool) {
 	index := strings.Index(s, self.Str)
 	if index == -1 {
 		return
 	}
 	i = index
 	l = len(self.Str)
 	ok = true
 	return
 }
 func (self Raw) String() string {
 	return fmt.Sprintf("[raw:%s]", self.Str)
 }
--- a/match/single.go
+++ b/match/single.go
@ -0,0 +1,33 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 // single represents ?
 type Single struct {
 	Separators string
 }
 func (self Single) Match(s string) bool {
 	return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1
 }
 func (self Single) Search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self Single) Kind() Kind {
 	return KindSingle
 }
 func (self Single) String() string {
 	return fmt.Sprintf("[single:%s]", self.Separators)
 }
--- a/match/suffix.go
+++ b/match/suffix.go
@ -0,0 +1,36 @@
 package match
 import (
 	"strings"
 	"fmt"
 )
 type Suffix struct {
 	Suffix string
 }
 func (self Suffix) Kind() Kind {
 	return KindSuffix
 }
 func (self Suffix) Search(s string) (i int, l int, ok bool) {
 	if self.Match(s) {
 		return 0, len(s), true
 	}
 	return
 }
 func (self Suffix) Match(s string) bool {
 	return strings.HasSuffix(s, self.Suffix)
 }
 func (self Suffix) String() string {
 	return fmt.Sprintf("[suffix:%s]", self.Suffix)
 }
--- a/util.go
+++ b/util.go
@ -0,0 +1,20 @@
 package glob
 import (
 	"strings"
 )
 func indexByteNonEscaped(source string, needle, escape byte, shift int) int {
 	i := strings.IndexByte(source, needle)
 	if i <= 0 {
 		return i + shift
 	}
 	if source[i-1] != escape {
 		return i + shift
 	}
 	sh := i+1
 	return indexByteNonEscaped(source[sh:], needle, escape, sh)
 }