diff --git a/compiler.go b/compiler.go index 62ed12e..67aecc5 100644 --- a/compiler.go +++ b/compiler.go @@ -48,11 +48,13 @@ func optimize(matcher match.Matcher) match.Matcher { } if leftNil && rightSuffix { - return match.Every{match.Matchers{match.Prefix{r.Str}, rs}} + return match.PrefixSuffix{Prefix: r.Str, Suffix: rs.Suffix} + // return match.EveryOf{match.Matchers{match.Prefix{r.Str}, rs}} } if rightNil && leftPrefix { - return match.Every{match.Matchers{lp, match.Suffix{r.Str}}} + return match.PrefixSuffix{Prefix: lp.Prefix, Suffix: r.Str} + // return match.EveryOf{match.Matchers{lp, match.Suffix{r.Str}}} } return m @@ -176,7 +178,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { return match.Min{min} } - every := match.Every{} + every := match.EveryOf{} if min > 0 { every.Add(match.Min{min}) @@ -220,23 +222,21 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { } var ( - val match.Primitive + val match.Matcher idx int ) maxLen := -1 for i, matcher := range matchers { - if p, ok := matcher.(match.Primitive); ok { - l := p.Len() - if l >= maxLen { - maxLen = l - idx = i - val = p - } + l := matcher.Len() + if l >= maxLen { + maxLen = l + idx = i + val = matcher } } if val == nil { - return nil, fmt.Errorf("could not convert matchers %s: need at least one primitive", match.Matchers(matchers)) + return nil, fmt.Errorf("could not convert matchers %s: need at least one matcher", match.Matchers(matchers)) } left := matchers[:idx] diff --git a/compiler_test.go b/compiler_test.go index c806a40..5e919cc 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -25,7 +25,7 @@ func TestGlueMatchers(t *testing.T) { match.Any{separators}, match.Single{separators}, }, - match.Every{match.Matchers{ + match.EveryOf{match.Matchers{ match.Min{1}, match.Contains{separators, true}, }}, @@ -36,7 +36,7 @@ func TestGlueMatchers(t *testing.T) { match.Single{}, match.Single{}, }, - match.Every{match.Matchers{ + match.EveryOf{match.Matchers{ match.Min{3}, match.Max{3}, }}, @@ -46,7 +46,7 @@ func TestGlueMatchers(t *testing.T) { match.List{"a", true}, match.Any{"a"}, }, - match.Every{match.Matchers{ + match.EveryOf{match.Matchers{ match.Min{1}, match.Contains{"a", true}, }}, @@ -236,7 +236,7 @@ func TestCompiler(t *testing.T) { { ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), sep: separators, - result: match.Every{Matchers: match.Matchers{ + result: match.EveryOf{Matchers: match.Matchers{ match.Min{3}, match.Contains{separators, true}, }}, @@ -277,8 +277,9 @@ func TestCompiler(t *testing.T) { result: match.Prefix{"abc"}, }, { - ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}), - result: match.Every{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}}, + ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}), + // result: match.EveryOf{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}}, + result: match.PrefixSuffix{"abc", "def"}, }, { ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), diff --git a/glob_test.go b/glob_test.go index 066d1b6..473c62f 100644 --- a/glob_test.go +++ b/glob_test.go @@ -42,7 +42,7 @@ func TestCompilePattern(t *testing.T) { exp match.Matcher }{ // { - // pattern: "{abc,def}ghi", + // pattern: "{*,def}ghi", // exp: match.Raw{"t"}, // }, } { diff --git a/match/any.go b/match/any.go index 0f2d8e1..92c6274 100644 --- a/match/any.go +++ b/match/any.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) type Any struct { @@ -13,20 +14,25 @@ func (self Any) Match(s string) bool { return strings.IndexAny(s, self.Separators) == -1 } -func (self Any) Index(s string) (index int, segments []int) { - index = -1 - for i, r := range s { - if strings.IndexRune(self.Separators, r) == -1 { - if index == -1 { - index = i - } - segments = append(segments, i-index) - } else if index != -1 { - break - } +func (self Any) Index(s string) (int, []int) { + var sub string + + found := strings.IndexAny(s, self.Separators) + switch found { + case -1: + sub = s + default: + sub = s[:found] } - return + segments := make([]int, 0, utf8.RuneCountInString(sub)+1) + for i := range sub { + segments = append(segments, i) + } + + segments = append(segments, len(sub)) + + return 0, segments } func (self Any) Len() int { diff --git a/match/any_of.go b/match/any_of.go index 1eb2205..605a771 100644 --- a/match/any_of.go +++ b/match/any_of.go @@ -23,9 +23,40 @@ func (self AnyOf) Match(s string) bool { return false } -//func (self AnyOf) Index(s string) (index int, segments []int) { -// -//} +func (self AnyOf) Index(s string) (int, []int) { + if len(self.Matchers) == 0 { + return -1, nil + } + + // segments to merge + var segments [][]int + index := -1 + + for _, m := range self.Matchers { + idx, seg := m.Index(s) + if idx == -1 { + continue + } + + if index == -1 || idx < index { + index = idx + segments = [][]int{seg} + continue + } + + if idx > index { + continue + } + + segments = append(segments, seg) + } + + if index == -1 { + return -1, nil + } + + return index, mergeSegments(segments) +} func (self AnyOf) Len() (l int) { l = -1 diff --git a/match/any_of_test.go b/match/any_of_test.go new file mode 100644 index 0000000..6e26886 --- /dev/null +++ b/match/any_of_test.go @@ -0,0 +1,53 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestAnyOfIndex(t *testing.T) { + for id, test := range []struct { + matchers Matchers + fixture string + index int + segments []int + }{ + { + Matchers{ + Any{}, + Raw{"b"}, + Raw{"c"}, + }, + "abc", + 0, + []int{0, 1, 2, 3}, + }, + { + Matchers{ + Prefix{"b"}, + Suffix{"c"}, + }, + "abc", + 0, + []int{3}, + }, + { + Matchers{ + List{"[def]", false}, + List{"[abc]", false}, + }, + "abcdef", + 0, + []int{1}, + }, + } { + everyOf := AnyOf{test.matchers} + index, segments := everyOf.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/btree.go b/match/btree.go index e290e6d..64d3420 100644 --- a/match/btree.go +++ b/match/btree.go @@ -6,8 +6,7 @@ import ( ) type BTree struct { - Value Primitive - Left, Right Matcher + Value, Left, Right Matcher } func (self BTree) Kind() Kind { @@ -39,6 +38,11 @@ func (self BTree) Len() int { return -1 } +// todo +func (self BTree) Index(s string) (int, []int) { + return -1, nil +} + func (self BTree) Match(s string) bool { inputLen := len(s) diff --git a/match/contains.go b/match/contains.go index 6888dc9..329dc3b 100644 --- a/match/contains.go +++ b/match/contains.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) type Contains struct { @@ -14,6 +15,43 @@ func (self Contains) Match(s string) bool { return strings.Contains(s, self.Needle) != self.Not } +func (self Contains) Index(s string) (int, []int) { + var ( + sub string + offset int + ) + + idx := strings.Index(s, self.Needle) + + if !self.Not { + if idx == -1 { + return -1, nil + } + + offset = idx + len(self.Needle) + + if len(s) <= offset { + return 0, []int{offset} + } + + sub = s[offset:] + } else { + switch idx { + case -1: + sub = s + default: + sub = s[:idx] + } + } + + segments := make([]int, 0, utf8.RuneCountInString(sub)+1) + for i, _ := range sub { + segments = append(segments, offset+i) + } + + return 0, append(segments, offset+len(sub)) +} + func (self Contains) Len() int { return -1 } diff --git a/match/contains_test.go b/match/contains_test.go new file mode 100644 index 0000000..e45d3e0 --- /dev/null +++ b/match/contains_test.go @@ -0,0 +1,54 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestContainsIndex(t *testing.T) { + for id, test := range []struct { + prefix string + not bool + fixture string + index int + segments []int + }{ + { + "ab", + false, + "abc", + 0, + []int{2, 3}, + }, + { + "ab", + false, + "fffabfff", + 0, + []int{5, 6, 7, 8}, + }, + { + "ab", + true, + "abc", + 0, + []int{0}, + }, + { + "ab", + true, + "fffabfff", + 0, + []int{0, 1, 2, 3}, + }, + } { + p := Contains{test.prefix, test.not} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/every_of.go b/match/every_of.go index cc84fc8..36af182 100644 --- a/match/every_of.go +++ b/match/every_of.go @@ -4,16 +4,16 @@ import ( "fmt" ) -type Every struct { +type EveryOf struct { Matchers Matchers } -func (self *Every) Add(m Matcher) error { +func (self *EveryOf) Add(m Matcher) error { self.Matchers = append(self.Matchers, m) return nil } -func (self Every) Len() (l int) { +func (self EveryOf) Len() (l int) { for _, m := range self.Matchers { if ml := m.Len(); l > 0 { l += ml @@ -25,7 +25,46 @@ func (self Every) Len() (l int) { return } -func (self Every) Match(s string) bool { +func (self EveryOf) Index(s string) (int, []int) { + var index int + var offset int + var segments []int + + sub := s + for _, m := range self.Matchers { + idx, seg := m.Index(sub) + if idx == -1 { + return -1, nil + } + + var sum []int + if segments == nil { + sum = seg + } else { + delta := index - (idx + offset) + for _, ex := range segments { + for _, n := range seg { + if ex+delta == n { + sum = append(sum, n) + } + } + } + } + + if len(sum) == 0 { + return -1, nil + } + + segments = sum + index = idx + offset + sub = s[index:] + offset += idx + } + + return index, segments +} + +func (self EveryOf) Match(s string) bool { for _, m := range self.Matchers { if !m.Match(s) { return false @@ -35,10 +74,10 @@ func (self Every) Match(s string) bool { return true } -func (self Every) Kind() Kind { +func (self EveryOf) Kind() Kind { return KindEveryOf } -func (self Every) String() string { +func (self EveryOf) String() string { return fmt.Sprintf("[every_of:%s]", self.Matchers) } diff --git a/match/every_of_test.go b/match/every_of_test.go new file mode 100644 index 0000000..62c00a3 --- /dev/null +++ b/match/every_of_test.go @@ -0,0 +1,45 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestEveryOfIndex(t *testing.T) { + for id, test := range []struct { + matchers Matchers + fixture string + index int + segments []int + }{ + { + Matchers{ + Any{}, + Raw{"b"}, + Raw{"c"}, + }, + "abc", + -1, + nil, + }, + { + Matchers{ + Any{}, + Prefix{"b"}, + Suffix{"c"}, + }, + "abc", + 1, + []int{2}, + }, + } { + everyOf := EveryOf{test.matchers} + index, segments := everyOf.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/list_test.go b/match/list_test.go new file mode 100644 index 0000000..c985ebc --- /dev/null +++ b/match/list_test.go @@ -0,0 +1,40 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestListIndex(t *testing.T) { + for id, test := range []struct { + list string + not bool + fixture string + index int + segments []int + }{ + { + "ab", + false, + "abc", + 0, + []int{1}, + }, + { + "ab", + true, + "fffabfff", + 0, + []int{1}, + }, + } { + p := List{test.list, test.not} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/match.go b/match/match.go index 0589623..d685e5f 100644 --- a/match/match.go +++ b/match/match.go @@ -29,12 +29,8 @@ const ( type Matcher interface { Match(string) bool - Len() int -} - -type Primitive interface { - Matcher Index(string) (int, []int) + Len() int } type Matchers []Matcher @@ -47,3 +43,60 @@ func (m Matchers) String() string { return fmt.Sprintf("matchers[%s]", strings.Join(s, ",")) } + +func appendIfNotAsPrevious(target []int, val int) []int { + l := len(target) + if l != 0 && target[l-1] == val { + return target + } + + return append(target, val) +} + +// mergeSegments merges and sorts given already SORTED and UNIQUE segments. +func mergeSegments(segments [][]int) []int { + var current []int + for _, s := range segments { + if current == nil { + current = s + continue + } + + var next []int + for x, y := 0, 0; x < len(current) || y < len(s); { + if x >= len(current) { + next = append(next, s[y:]...) + break + } + + if y >= len(s) { + next = append(next, current[x:]...) + break + } + + xValue := current[x] + yValue := s[y] + + switch { + + case xValue == yValue: + x++ + y++ + next = appendIfNotAsPrevious(next, xValue) + + case xValue < yValue: + next = appendIfNotAsPrevious(next, xValue) + x++ + + case yValue < xValue: + next = appendIfNotAsPrevious(next, yValue) + y++ + + } + } + + current = next + } + + return current +} diff --git a/match/match_test.go b/match/match_test.go new file mode 100644 index 0000000..e92776a --- /dev/null +++ b/match/match_test.go @@ -0,0 +1,37 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestMergeSegments(t *testing.T) { + for id, test := range []struct { + segments [][]int + exp []int + }{ + { + [][]int{ + []int{0, 6, 7}, + []int{0, 1, 3}, + []int{2, 4}, + }, + []int{0, 1, 2, 3, 4, 6, 7}, + }, + { + [][]int{ + []int{0, 1, 3, 6, 7}, + []int{0, 1, 3}, + []int{2, 4}, + []int{1}, + }, + []int{0, 1, 2, 3, 4, 6, 7}, + }, + } { + act := mergeSegments(test.segments) + if !reflect.DeepEqual(act, test.exp) { + t.Errorf("#%d merge sort segments unexpected:\nact: %v\nexp:%v", id, act, test.exp) + continue + } + } +} diff --git a/match/max.go b/match/max.go index ef0e5b0..58fffb6 100644 --- a/match/max.go +++ b/match/max.go @@ -13,6 +13,26 @@ func (self Max) Match(s string) bool { return utf8.RuneCountInString(s) <= self.Limit } +func (self Max) Index(s string) (int, []int) { + c := utf8.RuneCountInString(s) + if c < self.Limit { + return -1, nil + } + + segments := make([]int, 0, self.Limit+1) + segments = append(segments, 0) + var count int + for i, r := range s { + count++ + if count > self.Limit { + break + } + segments = append(segments, i+utf8.RuneLen(r)) + } + + return 0, segments +} + func (self Max) Len() int { return -1 } diff --git a/match/max_test.go b/match/max_test.go new file mode 100644 index 0000000..d1ab803 --- /dev/null +++ b/match/max_test.go @@ -0,0 +1,37 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestMaxIndex(t *testing.T) { + for id, test := range []struct { + limit int + fixture string + index int + segments []int + }{ + { + 3, + "abc", + 0, + []int{0, 1, 2, 3}, + }, + { + 3, + "abcdef", + 0, + []int{0, 1, 2, 3}, + }, + } { + p := Max{test.limit} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/min.go b/match/min.go index c12af11..071ef88 100644 --- a/match/min.go +++ b/match/min.go @@ -13,6 +13,25 @@ func (self Min) Match(s string) bool { return utf8.RuneCountInString(s) >= self.Limit } +func (self Min) Index(s string) (int, []int) { + var count int + + c := utf8.RuneCountInString(s) + if c < self.Limit { + return -1, nil + } + + segments := make([]int, 0, c-self.Limit+1) + for i, r := range s { + count++ + if count >= self.Limit { + segments = append(segments, i+utf8.RuneLen(r)) + } + } + + return 0, segments +} + func (self Min) Len() int { return -1 } diff --git a/match/min_test.go b/match/min_test.go new file mode 100644 index 0000000..c8aa794 --- /dev/null +++ b/match/min_test.go @@ -0,0 +1,37 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestMinIndex(t *testing.T) { + for id, test := range []struct { + limit int + fixture string + index int + segments []int + }{ + { + 1, + "abc", + 0, + []int{1, 2, 3}, + }, + { + 3, + "abcd", + 0, + []int{3, 4}, + }, + } { + p := Min{test.limit} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/prefix.go b/match/prefix.go index ff31d13..67b8856 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -3,6 +3,7 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) type Prefix struct { @@ -13,6 +14,29 @@ func (self Prefix) Kind() Kind { return KindPrefix } +func (self Prefix) Index(s string) (int, []int) { + idx := strings.Index(s, self.Prefix) + if idx == -1 { + return -1, nil + } + + length := len(self.Prefix) + var sub string + if len(s) > idx+length { + sub = s[idx+length:] + } else { + sub = "" + } + + segments := make([]int, 0, utf8.RuneCountInString(sub)+1) + segments = append(segments, length) + for i, r := range sub { + segments = append(segments, length+i+utf8.RuneLen(r)) + } + + return idx, segments +} + func (self Prefix) Len() int { return -1 } diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index b6d9d8a..e2a4838 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -13,6 +13,36 @@ func (self PrefixSuffix) Kind() Kind { return KindPrefixSuffix } +func (self PrefixSuffix) Index(s string) (int, []int) { + prefixIdx := strings.Index(s, self.Prefix) + if prefixIdx == -1 { + return -1, nil + } + + var segments []int + for sub := s[prefixIdx:]; ; { + suffixIdx := strings.LastIndex(sub, self.Suffix) + if suffixIdx == -1 { + break + } + + segments = append(segments, suffixIdx+len(self.Suffix)) + sub = s[:suffixIdx] + } + + segLen := len(segments) + if segLen == 0 { + return -1, nil + } + + resp := make([]int, segLen) + for i, s := range segments { + resp[segLen-i-1] = s + } + + return prefixIdx, resp +} + func (self PrefixSuffix) Len() int { return -1 } diff --git a/match/prefix_suffix_test.go b/match/prefix_suffix_test.go new file mode 100644 index 0000000..ab65096 --- /dev/null +++ b/match/prefix_suffix_test.go @@ -0,0 +1,47 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestPrefixSuffixIndex(t *testing.T) { + for id, test := range []struct { + prefix string + suffix string + fixture string + index int + segments []int + }{ + { + "a", + "c", + "abc", + 0, + []int{3}, + }, + { + "f", + "f", + "fffabfff", + 0, + []int{1, 2, 3, 6, 7, 8}, + }, + { + "ab", + "bc", + "abc", + 0, + []int{3}, + }, + } { + p := PrefixSuffix{test.prefix, test.suffix} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/prefix_test.go b/match/prefix_test.go new file mode 100644 index 0000000..8f93f97 --- /dev/null +++ b/match/prefix_test.go @@ -0,0 +1,37 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestPrefixIndex(t *testing.T) { + for id, test := range []struct { + prefix string + fixture string + index int + segments []int + }{ + { + "ab", + "abc", + 0, + []int{2, 3}, + }, + { + "ab", + "fffabfff", + 3, + []int{2, 3, 4, 5}, + }, + } { + p := Prefix{test.prefix} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/suffix.go b/match/suffix.go index b0fed42..15266b7 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -13,6 +13,15 @@ func (self Suffix) Kind() Kind { return KindSuffix } +func (self Suffix) Index(s string) (int, []int) { + idx := strings.Index(s, self.Suffix) + if idx == -1 { + return -1, nil + } + + return 0, []int{idx + len(self.Suffix)} +} + func (self Suffix) Len() int { return -1 } diff --git a/match/suffix_test.go b/match/suffix_test.go new file mode 100644 index 0000000..5c3e434 --- /dev/null +++ b/match/suffix_test.go @@ -0,0 +1,37 @@ +package match + +import ( + "reflect" + "testing" +) + +func TestSuffixIndex(t *testing.T) { + for id, test := range []struct { + prefix string + fixture string + index int + segments []int + }{ + { + "ab", + "abc", + 0, + []int{2}, + }, + { + "ab", + "fffabfff", + 0, + []int{5}, + }, + } { + p := Suffix{test.prefix} + index, segments := p.Index(test.fixture) + if index != test.index { + t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) + } + if !reflect.DeepEqual(segments, test.segments) { + t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments) + } + } +} diff --git a/match/super.go b/match/super.go index 8b1108c..f69bd8b 100644 --- a/match/super.go +++ b/match/super.go @@ -16,7 +16,7 @@ func (self Super) Len() int { } func (self Super) Index(s string) (int, []int) { - segments := make([]int, utf8.RuneCountInString(s)) + segments := make([]int, 0, utf8.RuneCountInString(s)+1) for i := range s { segments = append(segments, i) }