add range, refactor

This commit is contained in:
s.kamardin 2015-12-24 17:54:54 +03:00
parent 3c56fe78a7
commit 5b4ed87b27
13 changed files with 544 additions and 298 deletions

373
glob.go
View File

@ -2,35 +2,28 @@ package glob
import (
"strings"
"fmt"
"errors"
"github.com/gobwas/glob/match"
)
const (
any = `*`
superAny = `**`
singleAny = `?`
escape = `\`
any = '*'
single = '?'
escape = '\\'
range_open = '['
range_close = ']'
)
var chars = []string{any, superAny, singleAny, escape}
type globKind int
const(
glob_raw globKind = iota
glob_multiple_separated
glob_multiple_super
glob_single
glob_composite
glob_prefix
glob_suffix
glob_prefix_suffix
const (
inside_range_not = '!'
inside_range_minus = '-'
)
var syntaxPhrases = string([]byte{any, single, escape, range_open, range_close})
// Glob represents compiled glob pattern.
type Glob interface {
Match(string) bool
search(string) (int, int, bool)
kind() globKind
}
// New creates Glob for given pattern and uses other given (if any) strings as separators.
@ -44,292 +37,152 @@ type Glob interface {
// `?` matches any single non-separator character
// c matches character c (c != `*`, `**`, `?`, `\`)
// `\` c matches character c
func New(pattern string, separators ...string) Glob {
chunks := parse(pattern, nil, strings.Join(separators, ""), false)
func New(pattern string, separators ...string) (Glob, error) {
chunks, err := parse(pattern, strings.Join(separators, ""), state{})
if err != nil {
return nil, err
}
switch len(chunks) {
case 1:
return chunks[0].glob
return chunks[0].matcher, nil
case 2:
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super {
return &prefix{chunks[0].str}
if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper {
return &match.Prefix{chunks[0].str}, nil
}
if chunks[1].glob.kind() == glob_raw && chunks[0].glob.kind() == glob_multiple_super {
return &suffix{chunks[1].str}
if chunks[1].matcher.Kind() == match.KindRaw && chunks[0].matcher.Kind() == match.KindMultipleSuper {
return &match.Suffix{chunks[1].str}, nil
}
case 3:
if chunks[0].glob.kind() == glob_raw && chunks[1].glob.kind() == glob_multiple_super && chunks[2].glob.kind() == glob_raw {
return &prefix_suffix{chunks[0].str, chunks[2].str}
if chunks[0].matcher.Kind() == match.KindRaw && chunks[1].matcher.Kind() == match.KindMultipleSuper && chunks[2].matcher.Kind() == match.KindRaw {
return &match.PrefixSuffix{chunks[0].str, chunks[2].str}, nil
}
}
var c []Glob
var c []match.Matcher
for _, chunk := range chunks {
c = append(c, chunk.glob)
c = append(c, chunk.matcher)
}
return &composite{c}
return &match.Composite{c}, nil
}
type token struct {
glob Glob
str string
}
func parse(p string, m []token, d string, esc bool) []token {
var e bool
if len(p) == 0 {
return m
// parse parsed given pattern into list of tokens
func parse(str string, sep string, st state) ([]token, error) {
if len(str) == 0 {
return st.tokens, nil
}
i, c := firstIndexOfChars(p, chars)
// if there are no syntax symbols - pattern is simple string
i := strings.IndexAny(str, syntaxPhrases)
if i == -1 {
return append(m, token{raw{p}, p})
return append(st.tokens, token{match.Raw{str}, str}), nil
}
c := string(str[i])
// if syntax symbol is not at the start of pattern - add raw part before it
if i > 0 {
m = append(m, token{raw{p[0:i]}, p[0:i]})
st.tokens = append(st.tokens, token{match.Raw{str[0:i]}, str[0:i]})
}
if esc {
m = append(m, token{raw{c}, c})
// if we are in escape state
if st.escape {
st.tokens = append(st.tokens, token{match.Raw{c}, c})
st.escape = false
} else {
switch c {
switch str[i] {
case range_open:
closed := indexByteNonEscaped(str, range_close, escape, 0)
if closed == -1 {
return nil, errors.New("invalid format")
}
r := str[i+1:closed]
g, err := parseRange(r)
if err != nil {
return nil, err
}
st.tokens = append(st.tokens, token{g, r})
if closed == len(str) -1 {
return st.tokens, nil
}
return parse(str[closed+1:], sep, st)
case escape:
e = true
case superAny:
m = append(m, token{multiple{}, c})
st.escape = true
case any:
m = append(m, token{multiple{d}, c})
case singleAny:
m = append(m, token{single{d}, c})
if len(str) > i+1 && str[i+1] == any {
st.tokens = append(st.tokens, token{match.Multiple{}, c})
return parse(str[i+len(c)+1:], sep, st)
}
st.tokens = append(st.tokens, token{match.Multiple{sep}, c})
case single:
st.tokens = append(st.tokens, token{match.Single{sep}, c})
}
}
return parse(p[i+len(c):], m, d, e)
}
// raw represents raw string to match
type raw struct {
s string
}
func (self raw) Match(s string) bool {
return self.s == s
}
func (self raw) kind() globKind {
return glob_raw
}
func (self raw) search(s string) (i int, l int, ok bool) {
index := strings.Index(s, self.s)
if index == -1 {
return
}
i = index
l = len(self.s)
ok = true
return
}
func (self raw) String() string {
return fmt.Sprintf("[raw:%s]", self.s)
}
// multiple represents *
type multiple struct {
separators string
}
func (self multiple) Match(s string) bool {
return strings.IndexAny(s, self.separators) == -1
}
func (self multiple) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self multiple) kind() globKind {
if self.separators == "" {
return glob_multiple_super
} else {
return glob_multiple_separated
}
}
func (self multiple) String() string {
return fmt.Sprintf("[multiple:%s]", self.separators)
}
// single represents ?
type single struct {
separators string
}
func (self single) Match(s string) bool {
return len(s) == 1 && strings.IndexAny(s, self.separators) == -1
}
func (self single) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, 1, true
}
return
}
func (self single) kind() globKind {
return glob_single
return parse(str[i+len(c):], sep, st)
}
func (self single) String() string {
return fmt.Sprintf("[single:%s]", self.separators)
}
func parseRange(def string) (match.Matcher, error) {
var (
not bool
esc bool
minus bool
b []byte
)
// composite
type composite struct {
chunks []Glob
}
func (self composite) kind() globKind {
return glob_composite
}
func (self composite) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func m(chunks []Glob, s string) bool {
var prev Glob
for _, c := range chunks {
if c.kind() == glob_raw {
i, l, ok := c.search(s)
if !ok {
return false
}
if prev != nil {
if !prev.Match(s[:i]) {
return false
}
prev = nil
}
s = s[i+l:]
for i, c := range []byte(def) {
if esc {
b = append(b, c)
esc = false
continue
}
prev = c
}
switch c{
case inside_range_not:
if i == 0 {
not = true
}
case escape:
if i == len(def) - 1 {
return nil, errors.New("escape character without follower")
}
if prev != nil {
return prev.Match(s)
}
return len(s) == 0
}
func (self composite) Match(s string) bool {
return m(self.chunks, s)
}
func firstIndexOfChars(p string, any []string) (min int, c string) {
l := len(p)
min = l
weight := 0
for _, s := range any {
w := len(s)
i := strings.Index(p, s)
if i != -1 && i <= min && w >= weight {
min = i
weight = w
c = s
esc = true
case inside_range_minus:
minus = true
default:
b = append(b, c)
}
}
if min == l {
return -1, ""
def = string(b)
if minus {
r := []rune(def)
if len(r) != 3 || r[1] != inside_range_minus {
return nil, errors.New("invalid range syntax")
}
return &match.Between{r[0], r[2], not}, nil
}
return
return &match.RangeList{def, not}, nil
}
type prefix struct {
s string
type token struct {
matcher match.Matcher
str string
}
func (self prefix) kind() globKind {
return glob_prefix
type state struct {
escape bool
tokens []token
}
func (self prefix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self prefix) Match(s string) bool {
return strings.HasPrefix(s, self.s)
}
type suffix struct {
s string
}
func (self suffix) kind() globKind {
return glob_suffix
}
func (self suffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self suffix) Match(s string) bool {
return strings.HasSuffix(s, self.s)
}
type prefix_suffix struct {
p, s string
}
func (self prefix_suffix) kind() globKind {
return glob_prefix_suffix
}
func (self prefix_suffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self prefix_suffix) Match(s string) bool {
return strings.HasPrefix(s, self.p) && strings.HasSuffix(s, self.s)
}

View File

@ -3,7 +3,6 @@ package glob
import (
rGlob "github.com/ryanuber/go-glob"
"regexp"
"strings"
"testing"
)
@ -17,29 +16,40 @@ func glob(s bool, p, m string, d ...string) test {
return test{p, m, s, d}
}
func TestFirstIndexOfChars(t *testing.T) {
func TestIndexOfNonEscaped(t *testing.T) {
for _, test := range []struct {
s string
c []string
n, e byte
i int
r string
}{
{
"**",
[]string{"**", "*"},
0,
"**",
"\\n_n",
'n',
'\\',
3,
},
{
"**",
[]string{"*", "**"},
"ab",
'a',
'\\',
0,
"**",
},
{
"ab",
'b',
'\\',
1,
},
{
"",
'b',
'\\',
-1,
},
} {
i, r := firstIndexOfChars(test.s, test.c)
if i != test.i || r != test.r {
t.Errorf("unexpeted index: expected %q at %v, got %q at %v", test.r, test.i, r, i)
i := indexByteNonEscaped(test.s, test.n, test.e, 0)
if i != test.i {
t.Errorf("unexpeted index: expected %v, got %v", test.i, i)
}
}
}
@ -79,7 +89,11 @@ func TestGlob(t *testing.T) {
glob(false, "*is", "this is a test"),
glob(false, "*no*", "this is a test"),
} {
g := New(test.pattern, test.delimiters...)
g, err := New(test.pattern, test.delimiters...)
if err != nil {
t.Error(err)
continue
}
result := g.Match(test.match)
if result != test.should {
@ -107,7 +121,7 @@ const PSExpPattern = `https:\/\/[a-z]+\.google\\.com`
const PSString = "https://account.google.com"
func BenchmarkProf(b *testing.B) {
m := New(Pattern)
m, _ := New(Pattern)
for i := 0; i < b.N; i++ {
_ = m.Match(String)
@ -115,35 +129,35 @@ func BenchmarkProf(b *testing.B) {
}
func BenchmarkGobwas(b *testing.B) {
m := New(Pattern)
m, _ := New(Pattern)
for i := 0; i < b.N; i++ {
_ = m.Match(String)
}
}
func BenchmarkGobwasPlain(b *testing.B) {
m := New(PlainPattern)
m, _ := New(PlainPattern)
for i := 0; i < b.N; i++ {
_ = m.Match(PlainString)
}
}
func BenchmarkGobwasPrefix(b *testing.B) {
m := New("abc*")
m, _ := New("abc*")
for i := 0; i < b.N; i++ {
_ = m.Match("abcdef")
}
}
func BenchmarkGobwasSuffix(b *testing.B) {
m := New("*def")
m, _ := New("*def")
for i := 0; i < b.N; i++ {
_ = m.Match("abcdef")
}
}
func BenchmarkGobwasPrefixSuffix(b *testing.B) {
m := New("ab*ef")
m, _ := New("ab*ef")
for i := 0; i < b.N; i++ {
_ = m.Match("abcdef")
@ -178,21 +192,4 @@ func BenchmarkRegExpPrefixSuffix(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = r.Match([]byte(PSString))
}
}
var ALPHABET_S = []string{"a", "b", "c"}
const ALPHABET = "abc"
const PREFIX = "faa"
const STR = "faafsdfcsdffc"
func BenchmarkIndexOfAny(b *testing.B) {
for i := 0; i < b.N; i++ {
strings.IndexAny(STR, ALPHABET)
}
}
func BenchmarkFirstIndexOfChars(b *testing.B) {
for i := 0; i < b.N; i++ {
firstIndexOfChars(STR, ALPHABET_S)
}
}

38
match/between.go Normal file
View File

@ -0,0 +1,38 @@
package match
import (
"fmt"
)
type Between struct {
Lo, Hi rune
Not bool
}
func (self Between) Kind() Kind {
return KindRangeBetween
}
func (self Between) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Between) Match(s string) bool {
r := []rune(s)
if (len(r) != 1) {
return false
}
inRange := r[0] >= self.Lo && r[0] <= self.Hi
return inRange == !self.Not
}
func (self Between) String() string {
return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not)
}

70
match/composite.go Normal file
View File

@ -0,0 +1,70 @@
package match
import (
"strings"
"fmt"
)
// composite
type Composite struct {
Chunks []Matcher
}
func (self Composite) Kind() Kind {
return KindComposite
}
func (self Composite) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func m(chunks []Matcher, s string) bool {
var prev Matcher
for _, c := range chunks {
if c.Kind() == KindRaw {
i, l, ok := c.Search(s)
if !ok {
return false
}
if prev != nil {
if !prev.Match(s[:i]) {
return false
}
prev = nil
}
s = s[i+l:]
continue
}
prev = c
}
if prev != nil {
return prev.Match(s)
}
return len(s) == 0
}
func (self Composite) Match(s string) bool {
return m(self.Chunks, s)
}
func (self Composite) String() string {
var l []string
for _, c := range self.Chunks {
l = append(l, fmt.Sprint(c))
}
return fmt.Sprintf("[composite:%s]", strings.Join(l, ","))
}

41
match/list.go Normal file
View File

@ -0,0 +1,41 @@
package match
import (
"strings"
"fmt"
)
type RangeList struct {
List string
Not bool
}
func (self RangeList) Kind() Kind {
return KindRangeList
}
func (self RangeList) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self RangeList) Match(s string) bool {
r := []rune(s)
if (len(r) != 1) {
return false
}
inList := strings.IndexRune(self.List, r[0]) >= 0
return inList == !self.Not
}
func (self RangeList) String() string {
return fmt.Sprintf("[range_list:%s]", self.List)
}

22
match/match.go Normal file
View File

@ -0,0 +1,22 @@
package match
type Kind int
const(
KindRaw Kind = iota
KindMultipleSeparated
KindMultipleSuper
KindSingle
KindComposite
KindPrefix
KindSuffix
KindPrefixSuffix
KindRangeBetween
KindRangeList
)
type Matcher interface {
Match(string) bool
Search(string) (int, int, bool)
Kind() Kind
}

35
match/multiple.go Normal file
View File

@ -0,0 +1,35 @@
package match
import (
"strings"
"fmt"
)
// multiple represents *
type Multiple struct {
Separators string
}
func (self Multiple) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Multiple) Search(s string) (i, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Multiple) Kind() Kind {
if self.Separators == "" {
return KindMultipleSuper
} else {
return KindMultipleSeparated
}
}
func (self Multiple) String() string {
return fmt.Sprintf("[multiple:%s]", self.Separators)
}

32
match/prefix.go Normal file
View File

@ -0,0 +1,32 @@
package match
import (
"strings"
"fmt"
)
type Prefix struct {
Prefix string
}
func (self Prefix) Kind() Kind {
return KindPrefix
}
func (self Prefix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Prefix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix)
}
func (self Prefix) String() string {
return fmt.Sprintf("[prefix:%s]", self.Prefix)
}

33
match/prefix_suffix.go Normal file
View File

@ -0,0 +1,33 @@
package match
import (
"strings"
"fmt"
)
type PrefixSuffix struct {
Prefix, Suffix string
}
func (self PrefixSuffix) kind() Kind {
return KindPrefixSuffix
}
func (self PrefixSuffix) search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self PrefixSuffix) Match(s string) bool {
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
}
func (self PrefixSuffix) String() string {
return fmt.Sprintf("[prefix_suffix:%s-%s]", self.Prefix, self.Suffix)
}

36
match/raw.go Normal file
View File

@ -0,0 +1,36 @@
package match
import (
"strings"
"fmt"
)
// raw represents raw string to match
type Raw struct {
Str string
}
func (self Raw) Match(s string) bool {
return self.Str == s
}
func (self Raw) Kind() Kind {
return KindRaw
}
func (self Raw) Search(s string) (i int, l int, ok bool) {
index := strings.Index(s, self.Str)
if index == -1 {
return
}
i = index
l = len(self.Str)
ok = true
return
}
func (self Raw) String() string {
return fmt.Sprintf("[raw:%s]", self.Str)
}

33
match/single.go Normal file
View File

@ -0,0 +1,33 @@
package match
import (
"strings"
"fmt"
)
// single represents ?
type Single struct {
Separators string
}
func (self Single) Match(s string) bool {
return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1
}
func (self Single) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Single) Kind() Kind {
return KindSingle
}
func (self Single) String() string {
return fmt.Sprintf("[single:%s]", self.Separators)
}

36
match/suffix.go Normal file
View File

@ -0,0 +1,36 @@
package match
import (
"strings"
"fmt"
)
type Suffix struct {
Suffix string
}
func (self Suffix) Kind() Kind {
return KindSuffix
}
func (self Suffix) Search(s string) (i int, l int, ok bool) {
if self.Match(s) {
return 0, len(s), true
}
return
}
func (self Suffix) Match(s string) bool {
return strings.HasSuffix(s, self.Suffix)
}
func (self Suffix) String() string {
return fmt.Sprintf("[suffix:%s]", self.Suffix)
}

20
util.go Normal file
View File

@ -0,0 +1,20 @@
package glob
import (
"strings"
)
func indexByteNonEscaped(source string, needle, escape byte, shift int) int {
i := strings.IndexByte(source, needle)
if i <= 0 {
return i + shift
}
if source[i-1] != escape {
return i + shift
}
sh := i+1
return indexByteNonEscaped(source[sh:], needle, escape, sh)
}