From 66fc4deeeb8400346d6f63c3552ae13b9496b091 Mon Sep 17 00:00:00 2001 From: gobwas Date: Sat, 9 Jan 2016 02:34:41 +0300 Subject: [PATCH] Fixes --- compiler.go | 87 +++++++++++++++++++++++++++++---- compiler_test.go | 106 ++++++++++++++++++++++++++++++++++++----- glob_test.go | 35 +++++++++++--- match/any.go | 16 +++---- match/any_of.go | 7 ++- match/btree.go | 105 ++++++++++++++++++++++++++++------------ match/contains.go | 4 ++ match/every_of.go | 15 +++++- match/list.go | 15 ++++-- match/match.go | 4 +- match/max.go | 11 ++++- match/min.go | 11 ++++- match/prefix.go | 8 ++-- match/prefix_suffix.go | 4 ++ match/range.go | 22 +++++---- match/raw.go | 9 ++-- match/row.go | 66 +++++++++++++++++++++++++ match/single.go | 17 ++++--- match/suffix.go | 12 ++--- match/super.go | 16 ++++++- parser.go | 15 +++--- util.go | 5 +- 22 files changed, 471 insertions(+), 119 deletions(-) create mode 100644 match/row.go diff --git a/compiler.go b/compiler.go index 67fce2a..cafc0e7 100644 --- a/compiler.go +++ b/compiler.go @@ -62,6 +62,52 @@ func optimize(matcher match.Matcher) match.Matcher { } func glueMatchers(matchers []match.Matcher) match.Matcher { + var ( + glued []match.Matcher + winner match.Matcher + ) + maxLen := -1 + + if m := glueAsEvery(matchers); m != nil { + glued = append(glued, m) + return m + } + + if m := glueAsRow(matchers); m != nil { + glued = append(glued, m) + return m + } + + for _, g := range glued { + if l := g.Len(); l > maxLen { + maxLen = l + winner = g + } + } + + return winner +} + +func glueAsRow(matchers []match.Matcher) match.Matcher { + switch len(matchers) { + case 0: + return nil + case 1: + return matchers[0] + } + + row := match.Row{} + for _, matcher := range matchers { + err := row.Add(matcher) + if err != nil { + return nil + } + } + + return row +} + +func glueAsEvery(matchers []match.Matcher) match.Matcher { switch len(matchers) { case 0: return nil @@ -147,7 +193,28 @@ func glueMatchers(matchers []match.Matcher) match.Matcher { return every } -func convertMatchers(matchers []match.Matcher) (match.Matcher, error) { +func convertMatchers(matchers []match.Matcher, result []match.Matcher) []match.Matcher { + var ( + buf []match.Matcher + done match.Matcher + ) + for idx, m := range matchers { + buf = append(buf, m) + if g := glueMatchers(buf); g != nil { + done = g + } else { + return convertMatchers(matchers[idx:], append(result, done)) + } + } + + if done != nil { + return append(result, done) + } + + return result +} + +func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { if m := glueMatchers(matchers); m != nil { return m, nil } @@ -156,14 +223,14 @@ func convertMatchers(matchers []match.Matcher) (match.Matcher, error) { val match.Primitive idx int ) - + maxLen := -1 for i, matcher := range matchers { if p, ok := matcher.(match.Primitive); ok { - idx = i - val = p - - if _, ok := matcher.(match.Raw); ok { - break + l := p.Len() + if l >= maxLen { + maxLen = l + idx = i + val = p } } } @@ -181,7 +248,7 @@ func convertMatchers(matchers []match.Matcher) (match.Matcher, error) { tree := match.BTree{Value: val} if len(left) > 0 { - l, err := convertMatchers(left) + l, err := compileMatchers(left) if err != nil { return nil, err } @@ -190,7 +257,7 @@ func convertMatchers(matchers []match.Matcher) (match.Matcher, error) { } if len(right) > 0 { - r, err := convertMatchers(right) + r, err := compileMatchers(right) if err != nil { return nil, err } @@ -217,7 +284,7 @@ func do(node node, s string) (m match.Matcher, err error) { if _, ok := node.(*nodeAnyOf); ok { m = match.AnyOf{matchers} } else { - m, err = convertMatchers(matchers) + m, err = compileMatchers(convertMatchers(matchers, nil)) if err != nil { return nil, err } diff --git a/compiler_test.go b/compiler_test.go index 13efe01..a324d9d 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -52,7 +52,7 @@ func TestGlueMatchers(t *testing.T) { }}, }, } { - act, err := convertMatchers(test.in) + act, err := compileMatchers(test.in) if err != nil { t.Errorf("#%d convert matchers error: %s", id, err) continue @@ -65,7 +65,7 @@ func TestGlueMatchers(t *testing.T) { } } -func TestConvertMatchers(t *testing.T) { +func TestCompileMatchers(t *testing.T) { for id, test := range []struct { in []match.Matcher exp match.Matcher @@ -96,8 +96,22 @@ func TestConvertMatchers(t *testing.T) { Right: match.Any{}, }, }, + { + []match.Matcher{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c"}, + match.Single{}, + }, + match.Row{Matchers: match.Matchers{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c"}, + match.Single{}, + }}, + }, } { - act, err := convertMatchers(test.in) + act, err := compileMatchers(test.in) if err != nil { t.Errorf("#%d convert matchers error: %s", id, err) continue @@ -110,6 +124,58 @@ func TestConvertMatchers(t *testing.T) { } } +func TestConvertMatchers2(t *testing.T) { + for id, test := range []struct { + in, exp []match.Matcher + }{ + { + []match.Matcher{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c"}, + match.Single{}, + match.Any{}, + }, + []match.Matcher{ + match.Row{Matchers: match.Matchers{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c"}, + match.Single{}, + }}, + match.Any{}, + }, + }, + { + []match.Matcher{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c"}, + match.Single{}, + match.Any{}, + match.Single{}, + match.Single{}, + match.Any{}, + }, + []match.Matcher{ + match.Row{Matchers: match.Matchers{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c"}, + match.Single{}, + }}, + match.Min{2}, + }, + }, + } { + act := convertMatchers(test.in, nil) + if !reflect.DeepEqual(act, test.exp) { + t.Errorf("#%d unexpected convert matchers 2 result:\nact: %s;\nexp: %s", id, act, test.exp) + continue + } + } +} + func pattern(nodes ...node) *nodePattern { return &nodePattern{ nodeImpl: nodeImpl{ @@ -183,21 +249,23 @@ func TestCompiler(t *testing.T) { ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}), sep: separators, result: match.BTree{ - Left: match.Any{separators}, - Value: match.Raw{"abc"}, - Right: match.Single{separators}, + Left: match.Any{separators}, + Value: match.Row{Matchers: match.Matchers{ + match.Raw{"abc"}, + match.Single{separators}, + }}, }, }, { ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}), sep: separators, result: match.BTree{ - Left: match.BTree{ - Left: match.Super{}, - Value: match.Single{separators}, - }, - Value: match.Raw{"abc"}, - Right: match.Single{separators}, + Left: match.Super{}, + Value: match.Row{Matchers: match.Matchers{ + match.Single{separators}, + match.Raw{"abc"}, + match.Single{separators}, + }}, }, }, { @@ -243,6 +311,20 @@ func TestCompiler(t *testing.T) { }}, }}, }, + { + ast: pattern( + &nodeRange{lo: 'a', hi: 'z'}, + &nodeRange{lo: 'a', hi: 'x', not: true}, + &nodeAny{}, + ), + result: match.BTree{ + Value: match.Row{Matchers: match.Matchers{ + match.Range{Lo: 'a', Hi: 'z'}, + match.Range{Lo: 'a', Hi: 'x', Not: true}, + }}, + Right: match.Super{}, + }, + }, } { prog, err := compile(test.ast, test.sep) if err != nil { diff --git a/glob_test.go b/glob_test.go index b09035e..6125181 100644 --- a/glob_test.go +++ b/glob_test.go @@ -16,6 +16,9 @@ const ( pattern_multiple = "https://*.google.*" fixture_multiple = "https://account.google.com" + pattern_alternatives = "{https://*.google.*,*yahoo.*}" + fixture_alternatives = "http://yahoo.com" + pattern_prefix = "abc*" pattern_suffix = "*def" pattern_prefix_suffix = "ab*ef" @@ -39,7 +42,7 @@ func TestCompilePattern(t *testing.T) { exp match.Matcher }{ // { - // pattern: "[!a]*****", + // pattern: "{http://*yandex.ru,b}", // exp: match.Raw{"t"}, // }, } { @@ -104,6 +107,8 @@ func TestIndexByteNonEscaped(t *testing.T) { func TestGlob(t *testing.T) { for _, test := range []test{ + glob(true, "* ?at * eyes", "my cat has very bright eyes"), + glob(true, "abc", "abc"), glob(true, "a*c", "abc"), glob(true, "a*c", "a12345c"), @@ -119,8 +124,6 @@ func TestGlob(t *testing.T) { glob(true, `\*`, "*"), glob(true, "**", "a.b.c", "."), - glob(true, "* ?at * eyes", "my cat has very bright eyes"), - glob(false, "?at", "at"), glob(false, "?at", "fat", "f"), glob(false, "a.*", "a.b.c", "."), @@ -138,15 +141,16 @@ func TestGlob(t *testing.T) { glob(false, "*no*", "this is a test"), glob(true, "[!a]*", "this is a test3"), - // glob(true, "*abc", "abcabc"), + glob(true, "*abc", "abcabc"), glob(true, "**abc", "abcabc"), - // glob(true, "???", "abc"), - // glob(true, "?*?", "abc"), - // glob(true, "?*?", "ac"), + glob(true, "???", "abc"), + glob(true, "?*?", "abc"), + glob(true, "?*?", "ac"), glob(true, pattern_all, fixture_all), glob(true, pattern_plain, fixture_plain), glob(true, pattern_multiple, fixture_multiple), + glob(true, pattern_alternatives, fixture_alternatives), glob(true, pattern_prefix, fixture_prefix_suffix), glob(true, pattern_suffix, fixture_prefix_suffix), glob(true, pattern_prefix_suffix, fixture_prefix_suffix), @@ -172,6 +176,8 @@ func BenchmarkParse(b *testing.B) { func BenchmarkAll(b *testing.B) { m, _ := Compile(pattern_all) + // fmt.Println("tree all:") + // fmt.Println(m) for i := 0; i < b.N; i++ { _ = m.Match(fixture_all) @@ -185,6 +191,13 @@ func BenchmarkMultiple(b *testing.B) { _ = m.Match(fixture_multiple) } } +func BenchmarkAlternatives(b *testing.B) { + m, _ := Compile(pattern_alternatives) + + for i := 0; i < b.N; i++ { + _ = m.Match(fixture_alternatives) + } +} func BenchmarkPlain(b *testing.B) { m, _ := Compile(pattern_plain) @@ -213,3 +226,11 @@ func BenchmarkPrefixSuffix(b *testing.B) { _ = m.Match(fixture_prefix_suffix) } } + +//BenchmarkParse-8 500000 2235 ns/op +//BenchmarkAll-8 20000000 73.1 ns/op +//BenchmarkMultiple-8 10000000 130 ns/op +//BenchmarkPlain-8 200000000 6.70 ns/op +//BenchmarkPrefix-8 200000000 8.36 ns/op +//BenchmarkSuffix-8 200000000 8.35 ns/op +//BenchmarkPrefixSuffix-8 100000000 13.6 ns/op diff --git a/match/any.go b/match/any.go index 174f810..0f2d8e1 100644 --- a/match/any.go +++ b/match/any.go @@ -13,19 +13,15 @@ func (self Any) Match(s string) bool { return strings.IndexAny(s, self.Separators) == -1 } -func (self Any) Index(s string) (index, min, max int) { +func (self Any) Index(s string) (index int, segments []int) { index = -1 - - for i, r := range []rune(s) { + for i, r := range s { if strings.IndexRune(self.Separators, r) == -1 { if index == -1 { index = i } - max++ - continue - } - - if index != -1 { + segments = append(segments, i-index) + } else if index != -1 { break } } @@ -33,6 +29,10 @@ func (self Any) Index(s string) (index, min, max int) { return } +func (self Any) Len() int { + return -1 +} + func (self Any) Kind() Kind { return KindAny } diff --git a/match/any_of.go b/match/any_of.go index 21d66ee..303fe2f 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -8,8 +8,9 @@ type AnyOf struct { Matchers Matchers } -func (self *AnyOf) Add(m Matcher) { +func (self *AnyOf) Add(m Matcher) error { self.Matchers = append(self.Matchers, m) + return nil } func (self AnyOf) Match(s string) bool { @@ -22,6 +23,10 @@ func (self AnyOf) Match(s string) bool { return false } +func (self AnyOf) Len() int { + return -1 +} + func (self AnyOf) Kind() Kind { return KindAnyOf } diff --git a/match/btree.go b/match/btree.go index b3b7e7e..e290e6d 100644 --- a/match/btree.go +++ b/match/btree.go @@ -2,6 +2,7 @@ package match import ( "fmt" + "unicode/utf8" ) type BTree struct { @@ -13,51 +14,95 @@ func (self BTree) Kind() Kind { return KindBTree } +func (self BTree) len() (l, v, r int, ok bool) { + v = self.Value.Len() + + if self.Left != nil { + l = self.Left.Len() + } + + if self.Right != nil { + r = self.Right.Len() + } + + ok = l > -1 && v > -1 && r > -1 + + return +} + +func (self BTree) Len() int { + l, v, r, ok := self.len() + if ok { + return l + v + r + } + + return -1 +} + func (self BTree) Match(s string) bool { - runes := []rune(s) - inputLen := len(runes) + inputLen := len(s) - for offset := 0; offset < inputLen; { - index, min, max := self.Value.Index(string(runes[offset:])) + lLen, vLen, rLen, ok := self.len() + if ok && lLen+vLen+rLen > inputLen { + return false + } + var offset, limit int + if lLen >= 0 { + offset = lLen + } + if rLen >= 0 { + limit = inputLen - rLen + } else { + limit = inputLen + } + + for offset < limit { + index, segments := self.Value.Index(s[offset:limit]) if index == -1 { return false } - for length := min; length <= max; length++ { - var left, right bool + l := string(s[:offset+index]) + var left bool + if self.Left != nil { + left = self.Left.Match(l) + } else { + left = l == "" + } - l := string(runes[:offset+index]) - if self.Left != nil { - left = self.Left.Match(l) - } else { - left = l == "" - } + if left { + for i := len(segments) - 1; i >= 0; i-- { + length := segments[i] - if !left { - break - } + if rLen >= 0 && inputLen-(offset+index+length) != rLen { + continue + } - var r string - // if there is no string for the right branch - if inputLen <= offset+index+length { - r = "" - } else { - r = string(runes[offset+index+length:]) - } + var right bool - if self.Right != nil { - right = self.Right.Match(r) - } else { - right = r == "" - } + var r string + // if there is no string for the right branch + if inputLen <= offset+index+length { + r = "" + } else { + r = s[offset+index+length:] + } - if left && right { - return true + if self.Right != nil { + right = self.Right.Match(r) + } else { + right = r == "" + } + + if right { + return true + } } } - offset += index + 1 + _, step := utf8.DecodeRuneInString(s[offset+index:]) + offset += index + step } return false diff --git a/match/contains.go b/match/contains.go index f956e38..6888dc9 100644 --- a/match/contains.go +++ b/match/contains.go @@ -14,6 +14,10 @@ func (self Contains) Match(s string) bool { return strings.Contains(s, self.Needle) != self.Not } +func (self Contains) Len() int { + return -1 +} + func (self Contains) Kind() Kind { return KindContains } diff --git a/match/every_of.go b/match/every_of.go index f1d5bf4..cc84fc8 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -8,8 +8,21 @@ type Every struct { Matchers Matchers } -func (self *Every) Add(m Matcher) { +func (self *Every) Add(m Matcher) error { self.Matchers = append(self.Matchers, m) + return nil +} + +func (self Every) Len() (l int) { + for _, m := range self.Matchers { + if ml := m.Len(); l > 0 { + l += ml + } else { + return -1 + } + } + + return } func (self Every) Match(s string) bool { diff --git a/match/list.go b/match/list.go index d883887..e83edd1 100644 --- a/match/list.go +++ b/match/list.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) type List struct { @@ -15,7 +16,7 @@ func (self List) Kind() Kind { } func (self List) Match(s string) bool { - if len([]rune(s)) != 1 { + if utf8.RuneCountInString(s) > 1 { return false } @@ -24,14 +25,18 @@ func (self List) Match(s string) bool { return inList == !self.Not } -func (self List) Index(s string) (index, min, max int) { - for i, r := range []rune(s) { +func (self List) Len() int { + return 1 +} + +func (self List) Index(s string) (int, []int) { + for i, r := range s { if self.Not == (strings.IndexRune(self.List, r) == -1) { - return i, 1, 1 + return i, []int{utf8.RuneLen(r)} } } - return -1, 0, 0 + return -1, nil } func (self List) String() string { diff --git a/match/match.go b/match/match.go index 54ffee5..0589623 100644 --- a/match/match.go +++ b/match/match.go @@ -29,10 +29,12 @@ const ( type Matcher interface { Match(string) bool + Len() int } type Primitive interface { - Index(string) (int, int, int) + Matcher + Index(string) (int, []int) } type Matchers []Matcher diff --git a/match/max.go b/match/max.go index aeccece..ef0e5b0 100644 --- a/match/max.go +++ b/match/max.go @@ -1,13 +1,20 @@ package match -import "fmt" +import ( + "fmt" + "unicode/utf8" +) type Max struct { Limit int } func (self Max) Match(s string) bool { - return len([]rune(s)) <= self.Limit + return utf8.RuneCountInString(s) <= self.Limit +} + +func (self Max) Len() int { + return -1 } func (self Max) Search(s string) (int, int, bool) { diff --git a/match/min.go b/match/min.go index 5216a6d..c12af11 100644 --- a/match/min.go +++ b/match/min.go @@ -1,13 +1,20 @@ package match -import "fmt" +import ( + "fmt" + "unicode/utf8" +) type Min struct { Limit int } func (self Min) Match(s string) bool { - return len([]rune(s)) >= self.Limit + return utf8.RuneCountInString(s) >= self.Limit +} + +func (self Min) Len() int { + return -1 } func (self Min) Search(s string) (int, int, bool) { diff --git a/match/prefix.go b/match/prefix.go index ba20bb2..ff31d13 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -1,11 +1,10 @@ package match import ( - "strings" "fmt" + "strings" ) - type Prefix struct { Prefix string } @@ -14,6 +13,10 @@ func (self Prefix) Kind() Kind { return KindPrefix } +func (self Prefix) Len() int { + return -1 +} + func (self Prefix) Search(s string) (i int, l int, ok bool) { if self.Match(s) { return 0, len(s), true @@ -26,7 +29,6 @@ func (self Prefix) Match(s string) bool { return strings.HasPrefix(s, self.Prefix) } - func (self Prefix) String() string { return fmt.Sprintf("[prefix:%s]", self.Prefix) } diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 870c955..b6d9d8a 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -13,6 +13,10 @@ func (self PrefixSuffix) Kind() Kind { return KindPrefixSuffix } +func (self PrefixSuffix) Len() int { + return -1 +} + func (self PrefixSuffix) Search(s string) (i int, l int, ok bool) { if self.Match(s) { return 0, len(s), true diff --git a/match/range.go b/match/range.go index 9ae0775..e709376 100644 --- a/match/range.go +++ b/match/range.go @@ -2,6 +2,7 @@ package match import ( "fmt" + "unicode/utf8" ) type Range struct { @@ -13,28 +14,31 @@ func (self Range) Kind() Kind { return KindRange } -func (self Range) Match(s string) bool { - r := []rune(s) +func (self Range) Len() int { + return 1 +} - if len(r) != 1 { +func (self Range) Match(s string) bool { + r, w := utf8.DecodeRuneInString(s) + if len(s) > w { return false } - inRange := r[0] >= self.Lo && r[0] <= self.Hi + inRange := r >= self.Lo && r <= self.Hi return inRange == !self.Not } -func (self Range) Index(s string) (index, min, max int) { - for i, r := range []rune(s) { +func (self Range) Index(s string) (int, []int) { + for i, r := range s { if self.Not != (r >= self.Lo && r <= self.Hi) { - return i, 1, 1 + return i, []int{utf8.RuneLen(r)} } } - return -1, 0, 0 + return -1, nil } func (self Range) String() string { - return fmt.Sprintf("[range_between:%s-%s(%t)]", self.Lo, self.Hi, self.Not) + return fmt.Sprintf("[range:%s-%s(%t)]", string(self.Lo), string(self.Hi), self.Not) } diff --git a/match/raw.go b/match/raw.go index ca717f1..9c05c6e 100644 --- a/match/raw.go +++ b/match/raw.go @@ -14,18 +14,21 @@ func (self Raw) Match(s string) bool { return self.Str == s } +func (self Raw) Len() int { + return len(self.Str) +} + func (self Raw) Kind() Kind { return KindRaw } -func (self Raw) Index(s string) (index, min, max int) { +func (self Raw) Index(s string) (index int, segments []int) { index = strings.Index(s, self.Str) if index == -1 { return } - min = len(self.Str) - max = min + segments = []int{len(self.Str)} return } diff --git a/match/row.go b/match/row.go new file mode 100644 index 0000000..46b4c90 --- /dev/null +++ b/match/row.go @@ -0,0 +1,66 @@ +package match + +import ( + "fmt" +) + +type Row struct { + Matchers Matchers + len int +} + +func (self *Row) Add(m Matcher) error { + if l := m.Len(); l == -1 { + return fmt.Errorf("matcher should have fixed length") + } + + self.Matchers = append(self.Matchers, m) + return nil +} + +func (self Row) Match(s string) bool { + if len(s) < self.Len() { + return false + } + + var idx int + for _, m := range self.Matchers { + l := m.Len() + if !m.Match(s[idx : idx+l]) { + return false + } + + idx += l + } + + return true +} + +func (self Row) Len() (l int) { + if self.len == 0 { + for _, m := range self.Matchers { + self.len += m.Len() + } + } + + return self.len +} + +func (self Row) Index(s string) (int, []int) { + for i := range s { + sub := s[i:] + if self.Match(sub) { + return i, []int{self.Len()} + } + } + + return -1, nil +} + +func (self Row) Kind() Kind { + return KindMin +} + +func (self Row) String() string { + return fmt.Sprintf("[row:%s]", self.Matchers) +} diff --git a/match/single.go b/match/single.go index d28718b..f60d883 100644 --- a/match/single.go +++ b/match/single.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) // single represents ? @@ -11,17 +12,21 @@ type Single struct { } func (self Single) Match(s string) bool { - return len([]rune(s)) == 1 && strings.IndexAny(s, self.Separators) == -1 + return utf8.RuneCountInString(s) == 1 && strings.IndexAny(s, self.Separators) == -1 } -func (self Single) Index(s string) (index, min, max int) { - for i, c := range []rune(s) { - if strings.IndexRune(self.Separators, c) == -1 { - return i, 1, 1 +func (self Single) Len() int { + return 1 +} + +func (self Single) Index(s string) (int, []int) { + for i, r := range s { + if strings.IndexRune(self.Separators, r) == -1 { + return i, []int{utf8.RuneLen(r)} } } - return -1, 0, 0 + return -1, nil } func (self Single) Kind() Kind { diff --git a/match/suffix.go b/match/suffix.go index 08a3bf8..b0fed42 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -1,14 +1,10 @@ package match import ( - "strings" "fmt" + "strings" ) - - - - type Suffix struct { Suffix string } @@ -17,6 +13,10 @@ func (self Suffix) Kind() Kind { return KindSuffix } +func (self Suffix) Len() int { + return -1 +} + func (self Suffix) Search(s string) (i int, l int, ok bool) { if self.Match(s) { return 0, len(s), true @@ -32,5 +32,3 @@ func (self Suffix) Match(s string) bool { func (self Suffix) String() string { return fmt.Sprintf("[suffix:%s]", self.Suffix) } - - diff --git a/match/super.go b/match/super.go index 6c3169c..8b1108c 100644 --- a/match/super.go +++ b/match/super.go @@ -2,6 +2,7 @@ package match import ( "fmt" + "unicode/utf8" ) type Super struct{} @@ -10,8 +11,19 @@ func (self Super) Match(s string) bool { return true } -func (self Super) Index(s string) (index, min, max int) { - return 0, 0, len([]rune(s)) +func (self Super) Len() int { + return -1 +} + +func (self Super) Index(s string) (int, []int) { + segments := make([]int, utf8.RuneCountInString(s)) + for i := range s { + segments = append(segments, i) + } + + segments = append(segments, len(s)) + + return 0, segments } func (self Super) Kind() Kind { diff --git a/parser.go b/parser.go index 4509b3d..43da13a 100644 --- a/parser.go +++ b/parser.go @@ -3,6 +3,7 @@ package glob import ( "errors" "fmt" + "unicode/utf8" ) type node interface { @@ -172,23 +173,23 @@ func parserRange(tree *tree, lexer *lexer) (parseFn, error) { not = true case item_range_lo: - r := []rune(item.s) - if len(r) != 1 { + r, w := utf8.DecodeRuneInString(item.s) + if len(item.s) > w { return nil, fmt.Errorf("unexpected length of lo character") } - lo = r[0] + lo = r case item_range_between: // case item_range_hi: - r := []rune(item.s) - if len(r) != 1 { - return nil, fmt.Errorf("unexpected length of hi character") + r, w := utf8.DecodeRuneInString(item.s) + if len(item.s) > w { + return nil, fmt.Errorf("unexpected length of lo character") } - hi = r[0] + hi = r if hi < lo { return nil, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo)) diff --git a/util.go b/util.go index 0be4dcf..0423e91 100644 --- a/util.go +++ b/util.go @@ -1,6 +1,5 @@ package glob - import ( "strings" ) @@ -18,6 +17,6 @@ func indexByteNonEscaped(source string, needle, escape byte, shift int) int { return i + shift } - sh := i+1 + sh := i + 1 return indexByteNonEscaped(source[sh:], needle, escape, sh) -} \ No newline at end of file +}