Indexing all matchers

This commit is contained in:
s.kamardin 2016-01-12 14:06:59 +03:00
parent b11fb9474b
commit f2255c18f5
25 changed files with 746 additions and 48 deletions

View File

@ -48,11 +48,13 @@ func optimize(matcher match.Matcher) match.Matcher {
}
if leftNil && rightSuffix {
return match.Every{match.Matchers{match.Prefix{r.Str}, rs}}
return match.PrefixSuffix{Prefix: r.Str, Suffix: rs.Suffix}
// return match.EveryOf{match.Matchers{match.Prefix{r.Str}, rs}}
}
if rightNil && leftPrefix {
return match.Every{match.Matchers{lp, match.Suffix{r.Str}}}
return match.PrefixSuffix{Prefix: lp.Prefix, Suffix: r.Str}
// return match.EveryOf{match.Matchers{lp, match.Suffix{r.Str}}}
}
return m
@ -176,7 +178,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher {
return match.Min{min}
}
every := match.Every{}
every := match.EveryOf{}
if min > 0 {
every.Add(match.Min{min})
@ -220,23 +222,21 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
}
var (
val match.Primitive
val match.Matcher
idx int
)
maxLen := -1
for i, matcher := range matchers {
if p, ok := matcher.(match.Primitive); ok {
l := p.Len()
if l >= maxLen {
maxLen = l
idx = i
val = p
}
l := matcher.Len()
if l >= maxLen {
maxLen = l
idx = i
val = matcher
}
}
if val == nil {
return nil, fmt.Errorf("could not convert matchers %s: need at least one primitive", match.Matchers(matchers))
return nil, fmt.Errorf("could not convert matchers %s: need at least one matcher", match.Matchers(matchers))
}
left := matchers[:idx]

View File

@ -25,7 +25,7 @@ func TestGlueMatchers(t *testing.T) {
match.Any{separators},
match.Single{separators},
},
match.Every{match.Matchers{
match.EveryOf{match.Matchers{
match.Min{1},
match.Contains{separators, true},
}},
@ -36,7 +36,7 @@ func TestGlueMatchers(t *testing.T) {
match.Single{},
match.Single{},
},
match.Every{match.Matchers{
match.EveryOf{match.Matchers{
match.Min{3},
match.Max{3},
}},
@ -46,7 +46,7 @@ func TestGlueMatchers(t *testing.T) {
match.List{"a", true},
match.Any{"a"},
},
match.Every{match.Matchers{
match.EveryOf{match.Matchers{
match.Min{1},
match.Contains{"a", true},
}},
@ -236,7 +236,7 @@ func TestCompiler(t *testing.T) {
{
ast: pattern(&nodeAny{}, &nodeSingle{}, &nodeSingle{}, &nodeSingle{}),
sep: separators,
result: match.Every{Matchers: match.Matchers{
result: match.EveryOf{Matchers: match.Matchers{
match.Min{3},
match.Contains{separators, true},
}},
@ -277,8 +277,9 @@ func TestCompiler(t *testing.T) {
result: match.Prefix{"abc"},
},
{
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}),
result: match.Every{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}},
ast: pattern(&nodeText{text: "abc"}, &nodeAny{}, &nodeText{text: "def"}),
// result: match.EveryOf{match.Matchers{match.Prefix{"abc"}, match.Suffix{"def"}}},
result: match.PrefixSuffix{"abc", "def"},
},
{
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),

View File

@ -42,7 +42,7 @@ func TestCompilePattern(t *testing.T) {
exp match.Matcher
}{
// {
// pattern: "{abc,def}ghi",
// pattern: "{*,def}ghi",
// exp: match.Raw{"t"},
// },
} {

View File

@ -3,6 +3,7 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type Any struct {
@ -13,20 +14,25 @@ func (self Any) Match(s string) bool {
return strings.IndexAny(s, self.Separators) == -1
}
func (self Any) Index(s string) (index int, segments []int) {
index = -1
for i, r := range s {
if strings.IndexRune(self.Separators, r) == -1 {
if index == -1 {
index = i
}
segments = append(segments, i-index)
} else if index != -1 {
break
}
func (self Any) Index(s string) (int, []int) {
var sub string
found := strings.IndexAny(s, self.Separators)
switch found {
case -1:
sub = s
default:
sub = s[:found]
}
return
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
for i := range sub {
segments = append(segments, i)
}
segments = append(segments, len(sub))
return 0, segments
}
func (self Any) Len() int {

View File

@ -23,9 +23,40 @@ func (self AnyOf) Match(s string) bool {
return false
}
//func (self AnyOf) Index(s string) (index int, segments []int) {
//
//}
func (self AnyOf) Index(s string) (int, []int) {
if len(self.Matchers) == 0 {
return -1, nil
}
// segments to merge
var segments [][]int
index := -1
for _, m := range self.Matchers {
idx, seg := m.Index(s)
if idx == -1 {
continue
}
if index == -1 || idx < index {
index = idx
segments = [][]int{seg}
continue
}
if idx > index {
continue
}
segments = append(segments, seg)
}
if index == -1 {
return -1, nil
}
return index, mergeSegments(segments)
}
func (self AnyOf) Len() (l int) {
l = -1

53
match/any_of_test.go Normal file
View File

@ -0,0 +1,53 @@
package match
import (
"reflect"
"testing"
)
func TestAnyOfIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
index int
segments []int
}{
{
Matchers{
Any{},
Raw{"b"},
Raw{"c"},
},
"abc",
0,
[]int{0, 1, 2, 3},
},
{
Matchers{
Prefix{"b"},
Suffix{"c"},
},
"abc",
0,
[]int{3},
},
{
Matchers{
List{"[def]", false},
List{"[abc]", false},
},
"abcdef",
0,
[]int{1},
},
} {
everyOf := AnyOf{test.matchers}
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -6,8 +6,7 @@ import (
)
type BTree struct {
Value Primitive
Left, Right Matcher
Value, Left, Right Matcher
}
func (self BTree) Kind() Kind {
@ -39,6 +38,11 @@ func (self BTree) Len() int {
return -1
}
// todo
func (self BTree) Index(s string) (int, []int) {
return -1, nil
}
func (self BTree) Match(s string) bool {
inputLen := len(s)

View File

@ -3,6 +3,7 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type Contains struct {
@ -14,6 +15,43 @@ func (self Contains) Match(s string) bool {
return strings.Contains(s, self.Needle) != self.Not
}
func (self Contains) Index(s string) (int, []int) {
var (
sub string
offset int
)
idx := strings.Index(s, self.Needle)
if !self.Not {
if idx == -1 {
return -1, nil
}
offset = idx + len(self.Needle)
if len(s) <= offset {
return 0, []int{offset}
}
sub = s[offset:]
} else {
switch idx {
case -1:
sub = s
default:
sub = s[:idx]
}
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
for i, _ := range sub {
segments = append(segments, offset+i)
}
return 0, append(segments, offset+len(sub))
}
func (self Contains) Len() int {
return -1
}

54
match/contains_test.go Normal file
View File

@ -0,0 +1,54 @@
package match
import (
"reflect"
"testing"
)
func TestContainsIndex(t *testing.T) {
for id, test := range []struct {
prefix string
not bool
fixture string
index int
segments []int
}{
{
"ab",
false,
"abc",
0,
[]int{2, 3},
},
{
"ab",
false,
"fffabfff",
0,
[]int{5, 6, 7, 8},
},
{
"ab",
true,
"abc",
0,
[]int{0},
},
{
"ab",
true,
"fffabfff",
0,
[]int{0, 1, 2, 3},
},
} {
p := Contains{test.prefix, test.not}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -4,16 +4,16 @@ import (
"fmt"
)
type Every struct {
type EveryOf struct {
Matchers Matchers
}
func (self *Every) Add(m Matcher) error {
func (self *EveryOf) Add(m Matcher) error {
self.Matchers = append(self.Matchers, m)
return nil
}
func (self Every) Len() (l int) {
func (self EveryOf) Len() (l int) {
for _, m := range self.Matchers {
if ml := m.Len(); l > 0 {
l += ml
@ -25,7 +25,46 @@ func (self Every) Len() (l int) {
return
}
func (self Every) Match(s string) bool {
func (self EveryOf) Index(s string) (int, []int) {
var index int
var offset int
var segments []int
sub := s
for _, m := range self.Matchers {
idx, seg := m.Index(sub)
if idx == -1 {
return -1, nil
}
var sum []int
if segments == nil {
sum = seg
} else {
delta := index - (idx + offset)
for _, ex := range segments {
for _, n := range seg {
if ex+delta == n {
sum = append(sum, n)
}
}
}
}
if len(sum) == 0 {
return -1, nil
}
segments = sum
index = idx + offset
sub = s[index:]
offset += idx
}
return index, segments
}
func (self EveryOf) Match(s string) bool {
for _, m := range self.Matchers {
if !m.Match(s) {
return false
@ -35,10 +74,10 @@ func (self Every) Match(s string) bool {
return true
}
func (self Every) Kind() Kind {
func (self EveryOf) Kind() Kind {
return KindEveryOf
}
func (self Every) String() string {
func (self EveryOf) String() string {
return fmt.Sprintf("[every_of:%s]", self.Matchers)
}

45
match/every_of_test.go Normal file
View File

@ -0,0 +1,45 @@
package match
import (
"reflect"
"testing"
)
func TestEveryOfIndex(t *testing.T) {
for id, test := range []struct {
matchers Matchers
fixture string
index int
segments []int
}{
{
Matchers{
Any{},
Raw{"b"},
Raw{"c"},
},
"abc",
-1,
nil,
},
{
Matchers{
Any{},
Prefix{"b"},
Suffix{"c"},
},
"abc",
1,
[]int{2},
},
} {
everyOf := EveryOf{test.matchers}
index, segments := everyOf.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

40
match/list_test.go Normal file
View File

@ -0,0 +1,40 @@
package match
import (
"reflect"
"testing"
)
func TestListIndex(t *testing.T) {
for id, test := range []struct {
list string
not bool
fixture string
index int
segments []int
}{
{
"ab",
false,
"abc",
0,
[]int{1},
},
{
"ab",
true,
"fffabfff",
0,
[]int{1},
},
} {
p := List{test.list, test.not}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -29,12 +29,8 @@ const (
type Matcher interface {
Match(string) bool
Len() int
}
type Primitive interface {
Matcher
Index(string) (int, []int)
Len() int
}
type Matchers []Matcher
@ -47,3 +43,60 @@ func (m Matchers) String() string {
return fmt.Sprintf("matchers[%s]", strings.Join(s, ","))
}
func appendIfNotAsPrevious(target []int, val int) []int {
l := len(target)
if l != 0 && target[l-1] == val {
return target
}
return append(target, val)
}
// mergeSegments merges and sorts given already SORTED and UNIQUE segments.
func mergeSegments(segments [][]int) []int {
var current []int
for _, s := range segments {
if current == nil {
current = s
continue
}
var next []int
for x, y := 0, 0; x < len(current) || y < len(s); {
if x >= len(current) {
next = append(next, s[y:]...)
break
}
if y >= len(s) {
next = append(next, current[x:]...)
break
}
xValue := current[x]
yValue := s[y]
switch {
case xValue == yValue:
x++
y++
next = appendIfNotAsPrevious(next, xValue)
case xValue < yValue:
next = appendIfNotAsPrevious(next, xValue)
x++
case yValue < xValue:
next = appendIfNotAsPrevious(next, yValue)
y++
}
}
current = next
}
return current
}

37
match/match_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestMergeSegments(t *testing.T) {
for id, test := range []struct {
segments [][]int
exp []int
}{
{
[][]int{
[]int{0, 6, 7},
[]int{0, 1, 3},
[]int{2, 4},
},
[]int{0, 1, 2, 3, 4, 6, 7},
},
{
[][]int{
[]int{0, 1, 3, 6, 7},
[]int{0, 1, 3},
[]int{2, 4},
[]int{1},
},
[]int{0, 1, 2, 3, 4, 6, 7},
},
} {
act := mergeSegments(test.segments)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d merge sort segments unexpected:\nact: %v\nexp:%v", id, act, test.exp)
continue
}
}
}

View File

@ -13,6 +13,26 @@ func (self Max) Match(s string) bool {
return utf8.RuneCountInString(s) <= self.Limit
}
func (self Max) Index(s string) (int, []int) {
c := utf8.RuneCountInString(s)
if c < self.Limit {
return -1, nil
}
segments := make([]int, 0, self.Limit+1)
segments = append(segments, 0)
var count int
for i, r := range s {
count++
if count > self.Limit {
break
}
segments = append(segments, i+utf8.RuneLen(r))
}
return 0, segments
}
func (self Max) Len() int {
return -1
}

37
match/max_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestMaxIndex(t *testing.T) {
for id, test := range []struct {
limit int
fixture string
index int
segments []int
}{
{
3,
"abc",
0,
[]int{0, 1, 2, 3},
},
{
3,
"abcdef",
0,
[]int{0, 1, 2, 3},
},
} {
p := Max{test.limit}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -13,6 +13,25 @@ func (self Min) Match(s string) bool {
return utf8.RuneCountInString(s) >= self.Limit
}
func (self Min) Index(s string) (int, []int) {
var count int
c := utf8.RuneCountInString(s)
if c < self.Limit {
return -1, nil
}
segments := make([]int, 0, c-self.Limit+1)
for i, r := range s {
count++
if count >= self.Limit {
segments = append(segments, i+utf8.RuneLen(r))
}
}
return 0, segments
}
func (self Min) Len() int {
return -1
}

37
match/min_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestMinIndex(t *testing.T) {
for id, test := range []struct {
limit int
fixture string
index int
segments []int
}{
{
1,
"abc",
0,
[]int{1, 2, 3},
},
{
3,
"abcd",
0,
[]int{3, 4},
},
} {
p := Min{test.limit}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -3,6 +3,7 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
type Prefix struct {
@ -13,6 +14,29 @@ func (self Prefix) Kind() Kind {
return KindPrefix
}
func (self Prefix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Prefix)
if idx == -1 {
return -1, nil
}
length := len(self.Prefix)
var sub string
if len(s) > idx+length {
sub = s[idx+length:]
} else {
sub = ""
}
segments := make([]int, 0, utf8.RuneCountInString(sub)+1)
segments = append(segments, length)
for i, r := range sub {
segments = append(segments, length+i+utf8.RuneLen(r))
}
return idx, segments
}
func (self Prefix) Len() int {
return -1
}

View File

@ -13,6 +13,36 @@ func (self PrefixSuffix) Kind() Kind {
return KindPrefixSuffix
}
func (self PrefixSuffix) Index(s string) (int, []int) {
prefixIdx := strings.Index(s, self.Prefix)
if prefixIdx == -1 {
return -1, nil
}
var segments []int
for sub := s[prefixIdx:]; ; {
suffixIdx := strings.LastIndex(sub, self.Suffix)
if suffixIdx == -1 {
break
}
segments = append(segments, suffixIdx+len(self.Suffix))
sub = s[:suffixIdx]
}
segLen := len(segments)
if segLen == 0 {
return -1, nil
}
resp := make([]int, segLen)
for i, s := range segments {
resp[segLen-i-1] = s
}
return prefixIdx, resp
}
func (self PrefixSuffix) Len() int {
return -1
}

View File

@ -0,0 +1,47 @@
package match
import (
"reflect"
"testing"
)
func TestPrefixSuffixIndex(t *testing.T) {
for id, test := range []struct {
prefix string
suffix string
fixture string
index int
segments []int
}{
{
"a",
"c",
"abc",
0,
[]int{3},
},
{
"f",
"f",
"fffabfff",
0,
[]int{1, 2, 3, 6, 7, 8},
},
{
"ab",
"bc",
"abc",
0,
[]int{3},
},
} {
p := PrefixSuffix{test.prefix, test.suffix}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

37
match/prefix_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestPrefixIndex(t *testing.T) {
for id, test := range []struct {
prefix string
fixture string
index int
segments []int
}{
{
"ab",
"abc",
0,
[]int{2, 3},
},
{
"ab",
"fffabfff",
3,
[]int{2, 3, 4, 5},
},
} {
p := Prefix{test.prefix}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -13,6 +13,15 @@ func (self Suffix) Kind() Kind {
return KindSuffix
}
func (self Suffix) Index(s string) (int, []int) {
idx := strings.Index(s, self.Suffix)
if idx == -1 {
return -1, nil
}
return 0, []int{idx + len(self.Suffix)}
}
func (self Suffix) Len() int {
return -1
}

37
match/suffix_test.go Normal file
View File

@ -0,0 +1,37 @@
package match
import (
"reflect"
"testing"
)
func TestSuffixIndex(t *testing.T) {
for id, test := range []struct {
prefix string
fixture string
index int
segments []int
}{
{
"ab",
"abc",
0,
[]int{2},
},
{
"ab",
"fffabfff",
0,
[]int{5},
},
} {
p := Suffix{test.prefix}
index, segments := p.Index(test.fixture)
if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
}
if !reflect.DeepEqual(segments, test.segments) {
t.Errorf("#%d unexpected segments: exp: %v, act: %v", id, test.segments, segments)
}
}
}

View File

@ -16,7 +16,7 @@ func (self Super) Len() int {
}
func (self Super) Index(s string) (int, []int) {
segments := make([]int, utf8.RuneCountInString(s))
segments := make([]int, 0, utf8.RuneCountInString(s)+1)
for i := range s {
segments = append(segments, i)
}