Indexing all matchers

This commit is contained in:
s.kamardin 2016-01-12 14:06:59 +03:00
parent b11fb9474b
commit f2255c18f5
25 changed files with 746 additions and 48 deletions

View File

@ -48,11 +48,13 @@ func optimize(matcher match.Matcher) match.Matcher {
} }
if leftNil && rightSuffix { if leftNil && rightSuffix {
return match.Every{match.Matchers{match.Prefix{r.Str}, rs}} return match.PrefixSuffix{Prefix: r.Str, Suffix: rs.Suffix}
// return match.EveryOf{match.Matchers{match.Prefix{r.Str}, rs}}
} }
if rightNil && leftPrefix { if rightNil && leftPrefix {
return match.Every{match.Matchers{lp, match.Suffix{r.Str}}} return match.PrefixSuffix{Prefix: lp.Prefix, Suffix: r.Str}
// return match.EveryOf{match.Matchers{lp, match.Suffix{r.Str}}}
} }
return m return m
@ -176,7 +178,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher {
return match.Min{min} return match.Min{min}
} }
every := match.Every{} every := match.EveryOf{}
if min > 0 { if min > 0 {
every.Add(match.Min{min}) every.Add(match.Min{min})
@ -220,23 +222,21 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
} }
var ( var (
val match.Primitive val match.Matcher
idx int idx int
) )
maxLen := -1 maxLen := -1
for i, matcher := range matchers { for i, matcher := range matchers {
if p, ok := matcher.(match.Primitive); ok { l := matcher.Len()
l := p.Len()
if l >= maxLen { if l >= maxLen {
maxLen = l maxLen = l
idx = i idx = i
val = p val = matcher
}
} }
} }
if val == nil { if val == nil {
return nil, fmt.Errorf("could not convert matchers %s: need at least one primitive", match.Matchers(matchers)) return nil, fmt.Errorf("could not convert matchers %s: need at least one matcher", match.Matchers(matchers))
} }
left := matchers[:idx] left := matchers[:idx]

View File

@ -25,7 +25,7 @@ func TestGlueMatchers(t *testing.T) {
match.Any{separators}, match.Any{separators},
match.Single{separators}, match.Single{separators},
}, },
match.Every{match.Matchers{ match.EveryOf{match.Matchers{
match.Min{1}, match.Min{1},
match.Contains{separators, true}, match.Contains{separators, true},
}}, }},
@ -36,7 +36,7 @@ func TestGlueMatchers(t *testing.T) {
match.Single{}, match.Single{},
match.Single{}, match.Single{},
}, },
match.Every{match.Matchers{ match.EveryOf{match.Matchers{
match.Min{3}, match.Min{3},
match.Max{3}, match.Max{3},
}}, }},
@ -46,7 +46,7 @@ func TestGlueMatchers(t *testing.T) {
match.List{"a", true}, match.List{"a", true},
match.Any{"a"}, match.Any{"a"},
}, },
match.Every{match.Matchers{ match.EveryOf{match.Matchers{
match.Min{1}, match.Min{1},
match.Contains{"a", true}, match.Contains{"a", true},
}}, }},
@ -236,7 +236,7 @@ func TestCompiler(t *testing.T) {
{ {
ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}), ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}),
sep: separators, sep: separators,
result: match.Every{Matchers: match.Matchers{ result: match.EveryOf{Matchers: match.Matchers{
match.Min{3}, match.Min{3},
match.Contains{separators, true}, match.Contains{separators, true},
}}, }},
@ -278,7 +278,8 @@ func TestCompiler(t *testing.T) {
}, },
{ {
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}), ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}),
result: match.Every{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}}, // result: match.EveryOf{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}},
result: match.PrefixSuffix{"abc", "def"},
}, },
{ {
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),

View File

@ -42,7 +42,7 @@ func TestCompilePattern(t *testing.T) {
exp match.Matcher exp match.Matcher
}{ }{
// { // {
// pattern: "{abc,def}ghi", // pattern: "{*,def}ghi",
// exp: match.Raw{"t"}, // exp: match.Raw{"t"},
// }, // },
} { } {

View File

@ -3,6 +3,7 @@ package match
import ( import (
"fmt" "fmt"
"strings" "strings"
"unicode/utf8"
) )
type Any struct { type Any struct {
@ -13,20 +14,25 @@ func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1 return strings.IndexAny(s, self.Separators) == -1
} }
func (self Any) Index(s string) (index int, segments []int) { func (self Any) Index(s string) (int, []int) {
index = -1 var sub string
for i, r := range s {
if strings.IndexRune(self.Separators, r) == -1 { found := strings.IndexAny(s, self.Separators)
if index == -1 { switch found {
index = i case -1:
} sub = s
segments = append(segments, i-index) default:
} else if index != -1 { sub = s[:found]
break
}
} }
return segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
for i := range sub {
segments = append(segments, i)
}
segments = append(segments, len(sub))
return 0, segments
} }
func (self Any) Len() int { func (self Any) Len() int {

View File

@ -23,9 +23,40 @@ func (self AnyOf) Match(s string) bool {
return false return false
} }
//func (self AnyOf) Index(s string) (index int, segments []int) { func (self AnyOf) Index(s string) (int, []int) {
// if len(self.Matchers) == 0 {
//} return -1, nil
}
// segments to merge
var segments [][]int
index := -1
for _, m := range self.Matchers {
idx, seg := m.Index(s)
if idx == -1 {
continue
}
if index == -1 || idx < index {
index = idx
segments = [][]int{seg}
continue
}
if idx > index {
continue
}
segments = append(segments, seg)
}
if index == -1 {
return -1, nil
}
return index, mergeSegments(segments)
}
func (self AnyOf) Len() (l int) { func (self AnyOf) Len() (l int) {
l = -1 l = -1

53
match/any_of_test.go Normal file
View File

@ -0,0 +1,53 @@
package match
import (
"reflect"
"testing"
)
func TestAnyOfIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
index int
segments []int
}{
{
Matchers{
Any{},
Raw{"b"},
Raw{"c"},
},
"abc",
0,
[]int{0, 1, 2, 3},
},
{
Matchers{
Prefix{"b"},
Suffix{"c"},
},
"abc",
0,
[]int{3},
},
{
Matchers{
List{"[def]", false},
List{"[abc]", false},
},
"abcdef",
0,
[]int{1},
},
} {
everyOf := AnyOf{test.matchers}
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -6,8 +6,7 @@ import (
) )
type BTree struct { type BTree struct {
Value Primitive Value, Left, Right Matcher
Left, Right Matcher
} }
func (self BTree) Kind() Kind { func (self BTree) Kind() Kind {
@ -39,6 +38,11 @@ func (self BTree) Len() int {
return -1 return -1
} }
// todo
func (self BTree) Index(s string) (int, []int) {
return -1, nil
}
func (self BTree) Match(s string) bool { func (self BTree) Match(s string) bool {
inputLen := len(s) inputLen := len(s)

View File

@ -3,6 +3,7 @@ package match
import ( import (
"fmt" "fmt"
"strings" "strings"
"unicode/utf8"
) )
type Contains struct { type Contains struct {
@ -14,6 +15,43 @@ func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not return strings.Contains(s, self.Needle) != self.Not
} }
func (self Contains) Index(s string) (int, []int) {
var (
sub string
offset int
)
idx := strings.Index(s, self.Needle)
if !self.Not {
if idx == -1 {
return -1, nil
}
offset = idx + len(self.Needle)
if len(s) <= offset {
return 0, []int{offset}
}
sub = s[offset:]
} else {
switch idx {
case -1:
sub = s
default:
sub = s[:idx]
}
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
for i, _ := range sub {
segments = append(segments, offset+i)
}
return 0, append(segments, offset+len(sub))
}
func (self Contains) Len() int { func (self Contains) Len() int {
return -1 return -1
} }

54
match/contains_test.go Normal file
View File

@ -0,0 +1,54 @@
package match
import (
"reflect"
"testing"
)
func TestContainsIndex(t *testing.T) {
for id, test := range []struct {
prefix string
not bool
fixture string
index int
segments []int
}{
{
"ab",
false,
"abc",
0,
[]int{2, 3},
},
{
"ab",
false,
"fffabfff",
0,
[]int{5, 6, 7, 8},
},
{
"ab",
true,
"abc",
0,
[]int{0},
},
{
"ab",
true,
"fffabfff",
0,
[]int{0, 1, 2, 3},
},
} {
p := Contains{test.prefix, test.not}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -4,16 +4,16 @@ import (
"fmt" "fmt"
) )
type Every struct { type EveryOf struct {
Matchers Matchers Matchers Matchers
} }
func (self *Every) Add(m Matcher) error { func (self *EveryOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m) self.Matchers = append(self.Matchers, m)
return nil return nil
} }
func (self Every) Len() (l int) { func (self EveryOf) Len() (l int) {
for _, m := range self.Matchers { for _, m := range self.Matchers {
if ml := m.Len(); l > 0 { if ml := m.Len(); l > 0 {
l += ml l += ml
@ -25,7 +25,46 @@ func (self Every) Len() (l int) {
return return
} }
func (self Every) Match(s string) bool { func (self EveryOf) Index(s string) (int, []int) {
var index int
var offset int
var segments []int
sub := s
for _, m := range self.Matchers {
idx, seg := m.Index(sub)
if idx == -1 {
return -1, nil
}
var sum []int
if segments == nil {
sum = seg
} else {
delta := index - (idx + offset)
for _, ex := range segments {
for _, n := range seg {
if ex+delta == n {
sum = append(sum, n)
}
}
}
}
if len(sum) == 0 {
return -1, nil
}
segments = sum
index = idx + offset
sub = s[index:]
offset += idx
}
return index, segments
}
func (self EveryOf) Match(s string) bool {
for _, m := range self.Matchers { for _, m := range self.Matchers {
if !m.Match(s) { if !m.Match(s) {
return false return false
@ -35,10 +74,10 @@ func (self Every) Match(s string) bool {
return true return true
} }
func (self Every) Kind() Kind { func (self EveryOf) Kind() Kind {
return KindEveryOf return KindEveryOf
} }
func (self Every) String() string { func (self EveryOf) String() string {
return fmt.Sprintf("[every_of:%s]", self.Matchers) return fmt.Sprintf("[every_of:%s]", self.Matchers)
} }

45
match/every_of_test.go Normal file
View File

@ -0,0 +1,45 @@
package match
import (
"reflect"
"testing"
)
func TestEveryOfIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
index int
segments []int
}{
{
Matchers{
Any{},
Raw{"b"},
Raw{"c"},
},
"abc",
-1,
nil,
},
{
Matchers{
Any{},
Prefix{"b"},
Suffix{"c"},
},
"abc",
1,
[]int{2},
},
} {
everyOf := EveryOf{test.matchers}
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

40
match/list_test.go Normal file
View File

@ -0,0 +1,40 @@
package match
import (
"reflect"
"testing"
)
func TestListIndex(t *testing.T) {
for id, test := range []struct {
list string
not bool
fixture string
index int
segments []int
}{
{
"ab",
false,
"abc",
0,
[]int{1},
},
{
"ab",
true,
"fffabfff",
0,
[]int{1},
},
} {
p := List{test.list, test.not}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -29,12 +29,8 @@ const (
type Matcher interface { type Matcher interface {
Match(string) bool Match(string) bool
Len() int
}
type Primitive interface {
Matcher
Index(string) (int, []int) Index(string) (int, []int)
Len() int
} }
type Matchers []Matcher type Matchers []Matcher
@ -47,3 +43,60 @@ func (m Matchers) String() string {
return fmt.Sprintf("matchers[%s]", strings.Join(s, ",")) return fmt.Sprintf("matchers[%s]", strings.Join(s, ","))
} }
func appendIfNotAsPrevious(target []int, val int) []int {
l := len(target)
if l != 0 && target[l-1] == val {
return target
}
return append(target, val)
}
// mergeSegments merges and sorts given already SORTED and UNIQUE segments.
func mergeSegments(segments [][]int) []int {
var current []int
for _, s := range segments {
if current == nil {
current = s
continue
}
var next []int
for x, y := 0, 0; x < len(current) || y < len(s); {
if x >= len(current) {
next = append(next, s[y:]...)
break
}
if y >= len(s) {
next = append(next, current[x:]...)
break
}
xValue := current[x]
yValue := s[y]
switch {
case xValue == yValue:
x++
y++
next = appendIfNotAsPrevious(next, xValue)
case xValue < yValue:
next = appendIfNotAsPrevious(next, xValue)
x++
case yValue < xValue:
next = appendIfNotAsPrevious(next, yValue)
y++
}
}
current = next
}
return current
}

37
match/match_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestMergeSegments(t *testing.T) {
for id, test := range []struct {
segments [][]int
exp []int
}{
{
[][]int{
[]int{0, 6, 7},
[]int{0, 1, 3},
[]int{2, 4},
},
[]int{0, 1, 2, 3, 4, 6, 7},
},
{
[][]int{
[]int{0, 1, 3, 6, 7},
[]int{0, 1, 3},
[]int{2, 4},
[]int{1},
},
[]int{0, 1, 2, 3, 4, 6, 7},
},
} {
act := mergeSegments(test.segments)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d merge sort segments unexpected:\nact: %v\nexp:%v", id, act, test.exp)
continue
}
}
}

View File

@ -13,6 +13,26 @@ func (self Max) Match(s string) bool {
return utf8.RuneCountInString(s) <= self.Limit return utf8.RuneCountInString(s) <= self.Limit
} }
func (self Max) Index(s string) (int, []int) {
c := utf8.RuneCountInString(s)
if c < self.Limit {
return -1, nil
}
segments := make([]int, 0, self.Limit+1)
segments = append(segments, 0)
var count int
for i, r := range s {
count++
if count > self.Limit {
break
}
segments = append(segments, i+utf8.RuneLen(r))
}
return 0, segments
}
func (self Max) Len() int { func (self Max) Len() int {
return -1 return -1
} }

37
match/max_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestMaxIndex(t *testing.T) {
for id, test := range []struct {
limit int
fixture string
index int
segments []int
}{
{
3,
"abc",
0,
[]int{0, 1, 2, 3},
},
{
3,
"abcdef",
0,
[]int{0, 1, 2, 3},
},
} {
p := Max{test.limit}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -13,6 +13,25 @@ func (self Min) Match(s string) bool {
return utf8.RuneCountInString(s) >= self.Limit return utf8.RuneCountInString(s) >= self.Limit
} }
func (self Min) Index(s string) (int, []int) {
var count int
c := utf8.RuneCountInString(s)
if c < self.Limit {
return -1, nil
}
segments := make([]int, 0, c-self.Limit+1)
for i, r := range s {
count++
if count >= self.Limit {
segments = append(segments, i+utf8.RuneLen(r))
}
}
return 0, segments
}
func (self Min) Len() int { func (self Min) Len() int {
return -1 return -1
} }

37
match/min_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestMinIndex(t *testing.T) {
for id, test := range []struct {
limit int
fixture string
index int
segments []int
}{
{
1,
"abc",
0,
[]int{1, 2, 3},
},
{
3,
"abcd",
0,
[]int{3, 4},
},
} {
p := Min{test.limit}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -3,6 +3,7 @@ package match
import ( import (
"fmt" "fmt"
"strings" "strings"
"unicode/utf8"
) )
type Prefix struct { type Prefix struct {
@ -13,6 +14,29 @@ func (self Prefix) Kind() Kind {
return KindPrefix return KindPrefix
} }
func (self Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
if idx == -1 {
return -1, nil
}
length := len(self.Prefix)
var sub string
if len(s) > idx+length {
sub = s[idx+length:]
} else {
sub = ""
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
segments = append(segments, length)
for i, r := range sub {
segments = append(segments, length+i+utf8.RuneLen(r))
}
return idx, segments
}
func (self Prefix) Len() int { func (self Prefix) Len() int {
return -1 return -1
} }

View File

@ -13,6 +13,36 @@ func (self PrefixSuffix) Kind() Kind {
return KindPrefixSuffix return KindPrefixSuffix
} }
func (self PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, self.Prefix)
if prefixIdx == -1 {
return -1, nil
}
var segments []int
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
if suffixIdx == -1 {
break
}
segments = append(segments, suffixIdx+len(self.Suffix))
sub = s[:suffixIdx]
}
segLen := len(segments)
if segLen == 0 {
return -1, nil
}
resp := make([]int, segLen)
for i, s := range segments {
resp[segLen-i-1] = s
}
return prefixIdx, resp
}
func (self PrefixSuffix) Len() int { func (self PrefixSuffix) Len() int {
return -1 return -1
} }

View File

@ -0,0 +1,47 @@
package match
import (
"reflect"
"testing"
)
func TestPrefixSuffixIndex(t *testing.T) {
for id, test := range []struct {
prefix string
suffix string
fixture string
index int
segments []int
}{
{
"a",
"c",
"abc",
0,
[]int{3},
},
{
"f",
"f",
"fffabfff",
0,
[]int{1, 2, 3, 6, 7, 8},
},
{
"ab",
"bc",
"abc",
0,
[]int{3},
},
} {
p := PrefixSuffix{test.prefix, test.suffix}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

37
match/prefix_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestPrefixIndex(t *testing.T) {
for id, test := range []struct {
prefix string
fixture string
index int
segments []int
}{
{
"ab",
"abc",
0,
[]int{2, 3},
},
{
"ab",
"fffabfff",
3,
[]int{2, 3, 4, 5},
},
} {
p := Prefix{test.prefix}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -13,6 +13,15 @@ func (self Suffix) Kind() Kind {
return KindSuffix return KindSuffix
} }
func (self Suffix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
if idx == -1 {
return -1, nil
}
return 0, []int{idx + len(self.Suffix)}
}
func (self Suffix) Len() int { func (self Suffix) Len() int {
return -1 return -1
} }

37
match/suffix_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestSuffixIndex(t *testing.T) {
for id, test := range []struct {
prefix string
fixture string
index int
segments []int
}{
{
"ab",
"abc",
0,
[]int{2},
},
{
"ab",
"fffabfff",
0,
[]int{5},
},
} {
p := Suffix{test.prefix}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -16,7 +16,7 @@ func (self Super) Len() int {
} }
func (self Super) Index(s string) (int, []int) { func (self Super) Index(s string) (int, []int) {
segments := make([]int, utf8.RuneCountInString(s)) segments := make([]int, 0, utf8.RuneCountInString(s)+1)
for i := range s { for i := range s {
segments = append(segments, i) segments = append(segments, i)
} }