add range, refactor

This commit is contained in:
s.kamardin 2015-12-24 17:54:54 +03:00
parent 3c56fe78a7
commit 5b4ed87b27
13 changed files with 544 additions and 298 deletions

373
glob.go
View File

@ -2,35 +2,28 @@ package glob
import ( import (
"strings" "strings"
"fmt" "errors"
"github.com/gobwas/glob/match"
) )
const ( const (
any = `*` any = '*'
superAny = `**` single = '?'
singleAny = `?` escape = '\\'
escape = `\` range_open = '['
range_close = ']'
) )
var chars = []string{any, superAny, singleAny, escape} const (
inside_range_not = '!'
type globKind int inside_range_minus = '-'
const(
glob_raw globKind = iota
glob_multiple_separated
glob_multiple_super
glob_single
glob_composite
glob_prefix
glob_suffix
glob_prefix_suffix
) )
var syntaxPhrases = string([]byte{any, single, escape, range_open, range_close})
// Glob represents compiled glob pattern. // Glob represents compiled glob pattern.
type Glob interface { type Glob interface {
Match(string) bool Match(string) bool
search(string) (int, int, bool)
kind() globKind
} }
// New creates Glob for given pattern and uses other given (if any) strings as separators. // New creates Glob for given pattern and uses other given (if any) strings as separators.
@ -44,292 +37,152 @@ type Glob interface {
// `?` matches any single non-separator character // `?` matches any single non-separator character
// c matches character c (c != `*`, `**`, `?`, `\`) // c matches character c (c != `*`, `**`, `?`, `\`)
// `\` c matches character c // `\` c matches character c
func New(pattern string, separators ...string) Glob { func New(pattern string, separators ...string) (Glob, error) {
chunks := parse(pattern, nil, strings.Join(separators, ""), false) chunks, err := parse(pattern, strings.Join(separators, ""), state{})
if err != nil {
return nil, err
}
switch len(chunks) { switch len(chunks) {
case 1: case 1:
return chunks[0].glob return chunks[0].matcher, nil
case 2: case 2:
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super { if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper {
return &prefix{chunks[0].str} return &match.Prefix{chunks[0].str}, nil
} }
if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super { if chunks[1].matcher.Kind() == match.KindRaw && chunks[0].matcher.Kind() == match.KindMultipleSuper {
return &suffix{chunks[1].str} return &match.Suffix{chunks[1].str}, nil
} }
case 3: case 3:
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw { if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper && chunks[2].matcher.Kind() == match.KindRaw {
return &prefix_suffix{chunks[0].str, chunks[2].str} return &match.PrefixSuffix{chunks[0].str, chunks[2].str}, nil
} }
} }
var c []Glob var c []match.Matcher
for _, chunk := range chunks { for _, chunk := range chunks {
c = append(c, chunk.glob) c = append(c, chunk.matcher)
} }
return &composite{c} return &match.Composite{c}, nil
} }
type token struct {
glob Glob
str string
}
func parse(p string, m []token, d string, esc bool) []token { // parse parsed given pattern into list of tokens
var e bool func parse(str string, sep string, st state) ([]token, error) {
if len(str) == 0 {
if len(p) == 0 { return st.tokens, nil
return m
} }
i, c := firstIndexOfChars(p, chars) // if there are no syntax symbols - pattern is simple string
i := strings.IndexAny(str, syntaxPhrases)
if i == -1 { if i == -1 {
return append(m, token{raw{p}, p}) return append(st.tokens, token{match.Raw{str}, str}), nil
} }
c := string(str[i])
// if syntax symbol is not at the start of pattern - add raw part before it
if i > 0 { if i > 0 {
m = append(m, token{raw{p[0:i]}, p[0:i]}) st.tokens = append(st.tokens, token{match.Raw{str[0:i]}, str[0:i]})
} }
if esc { // if we are in escape state
m = append(m, token{raw{c}, c}) if st.escape {
st.tokens = append(st.tokens, token{match.Raw{c}, c})
st.escape = false
} else { } else {
switch c { switch str[i] {
case range_open:
closed := indexByteNonEscaped(str, range_close, escape, 0)
if closed == -1 {
return nil, errors.New("invalid format")
}
r := str[i+1:closed]
g, err := parseRange(r)
if err != nil {
return nil, err
}
st.tokens = append(st.tokens, token{g, r})
if closed == len(str) -1 {
return st.tokens, nil
}
return parse(str[closed+1:], sep, st)
case escape: case escape:
e = true st.escape = true
case superAny:
m = append(m, token{multiple{}, c})
case any: case any:
m = append(m, token{multiple{d}, c}) if len(str) > i+1 && str[i+1] == any {
case singleAny: st.tokens = append(st.tokens, token{match.Multiple{}, c})
m = append(m, token{single{d}, c}) return parse(str[i+len(c)+1:], sep, st)
}
st.tokens = append(st.tokens, token{match.Multiple{sep}, c})
case single:
st.tokens = append(st.tokens, token{match.Single{sep}, c})
} }
} }
return parse(p[i+len(c):], m, d, e) return parse(str[i+len(c):], sep, st)
}
// raw represents raw string to match
type raw struct {
s string
}
func (self raw) Match(s string) bool {
return self.s == s
}
func (self raw) kind() globKind {
return glob_raw
}
func (self raw) search(s string) (i int, l int, ok bool) {
index := strings.Index(s, self.s)
if index == -1 {
return
}
i = index
l = len(self.s)
ok = true
return
}
func (self raw) String() string {
return fmt.Sprintf("[raw:%s]", self.s)
}
// multiple represents *
type multiple struct {
separators string
}
func (self multiple) Match(s string) bool {
return strings.IndexAny(s, self.separators) == -1
}
func (self multiple) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self multiple) kind() globKind {
if self.separators == "" {
return glob_multiple_super
} else {
return glob_multiple_separated
}
}
func (self multiple) String() string {
return fmt.Sprintf("[multiple:%s]", self.separators)
}
// single represents ?
type single struct {
separators string
}
func (self single) Match(s string) bool {
return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
}
func (self single) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, 1, true
}
return
}
func (self single) kind() globKind {
return glob_single
} }
func (self single) String() string { func parseRange(def string) (match.Matcher, error) {
return fmt.Sprintf("[single:%s]", self.separators) var (
} not bool
esc bool
minus bool
b []byte
)
for i, c := range []byte(def) {
// composite if esc {
type composite struct { b = append(b, c)
chunks []Glob esc = false
}
func (self composite) kind() globKind {
return glob_composite
}
func (self composite) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func m(chunks []Glob, s string) bool {
var prev Glob
for _, c := range chunks {
if c.kind() == glob_raw {
i, l, ok := c.search(s)
if !ok {
return false
}
if prev != nil {
if !prev.Match(s[:i]) {
return false
}
prev = nil
}
s = s[i+l:]
continue continue
} }
prev = c switch c{
} case inside_range_not:
if i == 0 {
not = true
}
case escape:
if i == len(def) - 1 {
return nil, errors.New("escape character without follower")
}
if prev != nil { esc = true
return prev.Match(s) case inside_range_minus:
} minus = true
default:
return len(s) == 0 b = append(b, c)
}
func (self composite) Match(s string) bool {
return m(self.chunks, s)
}
func firstIndexOfChars(p string, any []string) (min int, c string) {
l := len(p)
min = l
weight := 0
for _, s := range any {
w := len(s)
i := strings.Index(p, s)
if i != -1 && i <= min && w >= weight {
min = i
weight = w
c = s
} }
} }
if min == l { def = string(b)
return -1, ""
if minus {
r := []rune(def)
if len(r) != 3 || r[1] != inside_range_minus {
return nil, errors.New("invalid range syntax")
}
return &match.Between{r[0], r[2], not}, nil
} }
return return &match.RangeList{def, not}, nil
} }
type prefix struct { type token struct {
s string matcher match.Matcher
str string
} }
func (self prefix) kind() globKind { type state struct {
return glob_prefix escape bool
tokens []token
} }
func (self prefix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self prefix) Match(s string) bool {
return strings.HasPrefix(s, self.s)
}
type suffix struct {
s string
}
func (self suffix) kind() globKind {
return glob_suffix
}
func (self suffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self suffix) Match(s string) bool {
return strings.HasSuffix(s, self.s)
}
type prefix_suffix struct {
p, s string
}
func (self prefix_suffix) kind() globKind {
return glob_prefix_suffix
}
func (self prefix_suffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self prefix_suffix) Match(s string) bool {
return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s)
}

View File

@ -3,7 +3,6 @@ package glob
import ( import (
rGlob "github.com/ryanuber/go-glob" rGlob "github.com/ryanuber/go-glob"
"regexp" "regexp"
"strings"
"testing" "testing"
) )
@ -17,29 +16,40 @@ func glob(s bool, p, m string, d ...string) test {
return test{p, m, s, d} return test{p, m, s, d}
} }
func TestFirstIndexOfChars(t *testing.T) { func TestIndexOfNonEscaped(t *testing.T) {
for _, test := range []struct { for _, test := range []struct {
s string s string
c []string n, e byte
i int i int
r string
}{ }{
{ {
"**", "\\n_n",
[]string{"**", "*"}, 'n',
0, '\\',
"**", 3,
}, },
{ {
"**", "ab",
[]string{"*", "**"}, 'a',
'\\',
0, 0,
"**", },
{
"ab",
'b',
'\\',
1,
},
{
"",
'b',
'\\',
-1,
}, },
} { } {
i, r := firstIndexOfChars(test.s, test.c) i := indexByteNonEscaped(test.s, test.n, test.e, 0)
if i != test.i || r != test.r { if i != test.i {
t.Errorf("unexpeted index: expected %q at %v, got %q at %v", test.r, test.i, r, i) t.Errorf("unexpeted index: expected %v, got %v", test.i, i)
} }
} }
} }
@ -79,7 +89,11 @@ func TestGlob(t *testing.T) {
glob(false, "*is", "this is a test"), glob(false, "*is", "this is a test"),
glob(false, "*no*", "this is a test"), glob(false, "*no*", "this is a test"),
} { } {
g := New(test.pattern, test.delimiters...) g, err := New(test.pattern, test.delimiters...)
if err != nil {
t.Error(err)
continue
}
result := g.Match(test.match) result := g.Match(test.match)
if result != test.should { if result != test.should {
@ -107,7 +121,7 @@ const PSExpPattern = `https:\/\/[a-z]+\.google\\.com`
const PSString = "https://account.google.com" const PSString = "https://account.google.com"
func BenchmarkProf(b *testing.B) { func BenchmarkProf(b *testing.B) {
m := New(Pattern) m, _ := New(Pattern)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(String) _ = m.Match(String)
@ -115,35 +129,35 @@ func BenchmarkProf(b *testing.B) {
} }
func BenchmarkGobwas(b *testing.B) { func BenchmarkGobwas(b *testing.B) {
m := New(Pattern) m, _ := New(Pattern)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(String) _ = m.Match(String)
} }
} }
func BenchmarkGobwasPlain(b *testing.B) { func BenchmarkGobwasPlain(b *testing.B) {
m := New(PlainPattern) m, _ := New(PlainPattern)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match(PlainString) _ = m.Match(PlainString)
} }
} }
func BenchmarkGobwasPrefix(b *testing.B) { func BenchmarkGobwasPrefix(b *testing.B) {
m := New("abc*") m, _ := New("abc*")
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match("abcdef") _ = m.Match("abcdef")
} }
} }
func BenchmarkGobwasSuffix(b *testing.B) { func BenchmarkGobwasSuffix(b *testing.B) {
m := New("*def") m, _ := New("*def")
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match("abcdef") _ = m.Match("abcdef")
} }
} }
func BenchmarkGobwasPrefixSuffix(b *testing.B) { func BenchmarkGobwasPrefixSuffix(b *testing.B) {
m := New("ab*ef") m, _ := New("ab*ef")
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = m.Match("abcdef") _ = m.Match("abcdef")
@ -178,21 +192,4 @@ func BenchmarkRegExpPrefixSuffix(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = r.Match([]byte(PSString)) _ = r.Match([]byte(PSString))
} }
}
var ALPHABET_S = []string{"a", "b", "c"}
const ALPHABET = "abc"
const PREFIX = "faa"
const STR = "faafsdfcsdffc"
func BenchmarkIndexOfAny(b *testing.B) {
for i := 0; i < b.N; i++ {
strings.IndexAny(STR, ALPHABET)
}
}
func BenchmarkFirstIndexOfChars(b *testing.B) {
for i := 0; i < b.N; i++ {
firstIndexOfChars(STR, ALPHABET_S)
}
} }

38
match/between.go Normal file
View File

@ -0,0 +1,38 @@
package match
import (
"fmt"
)
type Between struct {
Lo, Hi rune
Not bool
}
func (self Between) Kind() Kind {
return KindRangeBetween
}
func (self Between) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Between) Match(s string) bool {
r := []rune(s)
if (len(r) != 1) {
return false
}
inRange := r[0] >= self.Lo && r[0] <= self.Hi
return inRange == !self.Not
}
func (self Between) String() string {
return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not)
}

70
match/composite.go Normal file
View File

@ -0,0 +1,70 @@
package match
import (
"strings"
"fmt"
)
// composite
type Composite struct {
Chunks []Matcher
}
func (self Composite) Kind() Kind {
return KindComposite
}
func (self Composite) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func m(chunks []Matcher, s string) bool {
var prev Matcher
for _, c := range chunks {
if c.Kind() == KindRaw {
i, l, ok := c.Search(s)
if !ok {
return false
}
if prev != nil {
if !prev.Match(s[:i]) {
return false
}
prev = nil
}
s = s[i+l:]
continue
}
prev = c
}
if prev != nil {
return prev.Match(s)
}
return len(s) == 0
}
func (self Composite) Match(s string) bool {
return m(self.Chunks, s)
}
func (self Composite) String() string {
var l []string
for _, c := range self.Chunks {
l = append(l, fmt.Sprint(c))
}
return fmt.Sprintf("[composite:%s]", strings.Join(l, ","))
}

41
match/list.go Normal file
View File

@ -0,0 +1,41 @@
package match
import (
"strings"
"fmt"
)
type RangeList struct {
List string
Not bool
}
func (self RangeList) Kind() Kind {
return KindRangeList
}
func (self RangeList) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self RangeList) Match(s string) bool {
r := []rune(s)
if (len(r) != 1) {
return false
}
inList := strings.IndexRune(self.List, r[0]) >= 0
return inList == !self.Not
}
func (self RangeList) String() string {
return fmt.Sprintf("[range_list:%s]", self.List)
}

22
match/match.go Normal file
View File

@ -0,0 +1,22 @@
package match
type Kind int
const(
KindRaw Kind = iota
KindMultipleSeparated
KindMultipleSuper
KindSingle
KindComposite
KindPrefix
KindSuffix
KindPrefixSuffix
KindRangeBetween
KindRangeList
)
type Matcher interface {
Match(string) bool
Search(string) (int, int, bool)
Kind() Kind
}

35
match/multiple.go Normal file
View File

@ -0,0 +1,35 @@
package match
import (
"strings"
"fmt"
)
// multiple represents *
type Multiple struct {
Separators string
}
func (self Multiple) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Multiple) Search(s string) (i, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Multiple) Kind() Kind {
if self.Separators == "" {
return KindMultipleSuper
} else {
return KindMultipleSeparated
}
}
func (self Multiple) String() string {
return fmt.Sprintf("[multiple:%s]", self.Separators)
}

32
match/prefix.go Normal file
View File

@ -0,0 +1,32 @@
package match
import (
"strings"
"fmt"
)
type Prefix struct {
Prefix string
}
func (self Prefix) Kind() Kind {
return KindPrefix
}
func (self Prefix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Prefix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix)
}
func (self Prefix) String() string {
return fmt.Sprintf("[prefix:%s]", self.Prefix)
}

33
match/prefix_suffix.go Normal file
View File

@ -0,0 +1,33 @@
package match
import (
"strings"
"fmt"
)
type PrefixSuffix struct {
Prefix, Suffix string
}
func (self PrefixSuffix) kind() Kind {
return KindPrefixSuffix
}
func (self PrefixSuffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
}
func (self PrefixSuffix) String() string {
return fmt.Sprintf("[prefix_suffix:%s-%s]", self.Prefix, self.Suffix)
}

36
match/raw.go Normal file
View File

@ -0,0 +1,36 @@
package match
import (
"strings"
"fmt"
)
// raw represents raw string to match
type Raw struct {
Str string
}
func (self Raw) Match(s string) bool {
return self.Str == s
}
func (self Raw) Kind() Kind {
return KindRaw
}
func (self Raw) Search(s string) (i int, l int, ok bool) {
index := strings.Index(s, self.Str)
if index == -1 {
return
}
i = index
l = len(self.Str)
ok = true
return
}
func (self Raw) String() string {
return fmt.Sprintf("[raw:%s]", self.Str)
}

33
match/single.go Normal file
View File

@ -0,0 +1,33 @@
package match
import (
"strings"
"fmt"
)
// single represents ?
type Single struct {
Separators string
}
func (self Single) Match(s string) bool {
return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1
}
func (self Single) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Single) Kind() Kind {
return KindSingle
}
func (self Single) String() string {
return fmt.Sprintf("[single:%s]", self.Separators)
}

36
match/suffix.go Normal file
View File

@ -0,0 +1,36 @@
package match
import (
"strings"
"fmt"
)
type Suffix struct {
Suffix string
}
func (self Suffix) Kind() Kind {
return KindSuffix
}
func (self Suffix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Suffix) Match(s string) bool {
return strings.HasSuffix(s, self.Suffix)
}
func (self Suffix) String() string {
return fmt.Sprintf("[suffix:%s]", self.Suffix)
}

20
util.go Normal file
View File

@ -0,0 +1,20 @@
package glob
import (
"strings"
)
func indexByteNonEscaped(source string, needle, escape byte, shift int) int {
i := strings.IndexByte(source, needle)
if i <= 0 {
return i + shift
}
if source[i-1] != escape {
return i + shift
}
sh := i+1
return indexByteNonEscaped(source[sh:], needle, escape, sh)
}