Tune, new feature test

This commit is contained in:
s.kamardin 2016-01-14 21:32:02 +03:00
parent c65eb46cf3
commit d2a191e0f0
13 changed files with 142 additions and 117 deletions

5
.gitignore vendored
View File

@ -2,4 +2,7 @@ glob.iml
.idea
*.cpu
*.mem
*.test
*.test
*.dot
*.png
*.svg

View File

@ -3,7 +3,7 @@ sudo: false
language: go
go:
- 1.5.1
- 1.5.3
script:
- go test -v ./...

View File

@ -17,7 +17,7 @@ func optimize(matcher match.Matcher) match.Matcher {
m.Left = optimize(m.Left)
m.Right = optimize(m.Right)
r, ok := m.Value.(match.Raw)
r, ok := m.Value.(match.Text)
if !ok {
return m
}
@ -26,7 +26,7 @@ func optimize(matcher match.Matcher) match.Matcher {
rightNil := m.Right == nil
if leftNil && rightNil {
return match.NewRaw(r.Str)
return match.NewText(r.Str)
}
_, leftSuper := m.Left.(match.Super)
@ -325,7 +325,7 @@ func do(node node, s string) (m match.Matcher, err error) {
m = match.Single{s}
case *nodeText:
m = match.NewRaw(n.text)
m = match.NewText(n.text)
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
@ -424,7 +424,7 @@ func do2(node node, s string) ([]match.Matcher, error) {
result = append(result, match.Single{s})
case *nodeText:
result = append(result, match.NewRaw(n.text))
result = append(result, match.NewText(n.text))
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")

View File

@ -74,10 +74,10 @@ func TestCompileMatchers(t *testing.T) {
[]match.Matcher{
match.Super{},
match.Single{separators},
match.Raw{"c", 1},
match.NewText("c"),
},
match.NewBTree(
match.Raw{"c", 1},
match.NewText("c"),
match.NewBTree(
match.Single{separators},
match.Super{},
@ -89,11 +89,11 @@ func TestCompileMatchers(t *testing.T) {
{
[]match.Matcher{
match.Any{},
match.Raw{"c", 1},
match.NewText("c"),
match.Any{},
},
match.NewBTree(
match.Raw{"c", 1},
match.NewText("c"),
match.Any{},
match.Any{},
),
@ -102,17 +102,17 @@ func TestCompileMatchers(t *testing.T) {
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.NewText("c"),
match.Single{},
},
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.NewText("c"),
match.Single{},
},
Length: 4,
RunesLength: 4,
},
},
} {
@ -137,7 +137,7 @@ func TestConvertMatchers(t *testing.T) {
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.NewText("c"),
match.Single{},
match.Any{},
},
@ -146,10 +146,10 @@ func TestConvertMatchers(t *testing.T) {
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.NewText("c"),
match.Single{},
},
Length: 4,
RunesLength: 4,
},
match.Any{},
},
@ -158,7 +158,7 @@ func TestConvertMatchers(t *testing.T) {
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.NewText("c"),
match.Single{},
match.Any{},
match.Single{},
@ -170,9 +170,9 @@ func TestConvertMatchers(t *testing.T) {
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.NewText("c"),
},
Length: 3,
RunesLength: 3,
},
match.Min{3},
},
@ -208,7 +208,7 @@ func TestCompiler(t *testing.T) {
}{
{
ast: pattern(&nodeText{text: "abc"}),
result: match.Raw{"abc", 3},
result: match.NewText("abc"),
},
{
ast: pattern(&nodeAny{}),
@ -261,10 +261,10 @@ func TestCompiler(t *testing.T) {
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.Raw{"abc", 3},
match.NewText("abc"),
match.Single{separators},
},
Length: 4,
RunesLength: 4,
},
match.Any{separators},
nil,
@ -277,10 +277,10 @@ func TestCompiler(t *testing.T) {
match.Row{
Matchers: match.Matchers{
match.Single{separators},
match.Raw{"abc", 3},
match.NewText("abc"),
match.Single{separators},
},
Length: 5,
RunesLength: 5,
},
match.Super{},
nil,
@ -306,7 +306,7 @@ func TestCompiler(t *testing.T) {
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
sep: separators,
result: match.NewBTree(
match.Raw{"abc", 3},
match.NewText("abc"),
match.Any{separators},
match.Any{separators},
),
@ -314,7 +314,7 @@ func TestCompiler(t *testing.T) {
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}),
result: match.NewBTree(
match.Raw{"abc", 3},
match.NewText("abc"),
match.Min{1},
match.Min{1},
),
@ -322,14 +322,14 @@ func TestCompiler(t *testing.T) {
{
ast: pattern(anyOf(&nodeText{text: "abc"})),
result: match.AnyOf{match.Matchers{
match.Raw{"abc", 3},
match.NewText("abc"),
}},
},
{
ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))),
result: match.AnyOf{match.Matchers{
match.AnyOf{match.Matchers{
match.Raw{"abc", 3},
match.NewText("abc"),
}},
}},
},
@ -345,7 +345,7 @@ func TestCompiler(t *testing.T) {
match.Range{Lo: 'a', Hi: 'z'},
match.Range{Lo: 'a', Hi: 'x', Not: true},
},
Length: 2,
RunesLength: 2,
},
nil,
match.Super{},

View File

@ -19,7 +19,7 @@ const (
pattern_multiple = "https://*.google.*"
fixture_multiple = "https://account.google.com"
pattern_alternatives = "{https://*.google.*,*yahoo.*}"
pattern_alternatives = "{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}"
fixture_alternatives = "http://yahoo.com"
pattern_prefix = "abc*"
@ -81,14 +81,22 @@ func TestCompilePattern(t *testing.T) {
sep string
exp match.Matcher
}{
// {
// pattern: "left*??B*abcd*[!b]??*abc*right",
// exp: match.Raw{"t"},
// },
// {
// pattern: "abc*??def",
// exp: match.Raw{"t"},
// },
// {
// pattern: "left*??B*abcd*[!b]??*abc*right",
// exp: match.Raw{"t"},
// },
// {
// pattern: "abc*??def",
// exp: match.Raw{"t"},
// },
{
pattern: "{abc[abc]ghi,abc[def]ghi}",
exp: match.NewBTree(
match.AnyOf{match.Matchers{match.List{"abc", false}, match.List{"qwe", false}}},
match.NewText("abc"),
match.NewText("ghi"),
),
},
} {
glob, err := Compile(test.pattern, test.sep)
if err != nil {

View File

@ -15,8 +15,8 @@ func TestAnyOfIndex(t *testing.T) {
{
Matchers{
Any{},
Raw{"b"},
Raw{"c"},
Text{"b"},
Text{"c"},
},
"abc",
0,

View File

@ -6,9 +6,13 @@ import (
)
type BTree struct {
Value, Left, Right Matcher
VLen, LLen, RLen int
Length int
Value Matcher
Left Matcher
Right Matcher
ValueLengthRunes int
LeftLengthRunes int
RightLengthRunes int
LengthRunes int
}
func NewBTree(Value, Left, Right Matcher) (tree BTree) {
@ -17,33 +21,33 @@ func NewBTree(Value, Left, Right Matcher) (tree BTree) {
tree.Right = Right
lenOk := true
if tree.VLen = Value.Len(); tree.VLen == -1 {
if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 {
lenOk = false
}
if Left != nil {
if tree.LLen = Left.Len(); tree.LLen == -1 {
if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 {
lenOk = false
}
}
if Right != nil {
if tree.RLen = Right.Len(); tree.RLen == -1 {
if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 {
lenOk = false
}
}
if lenOk {
tree.Length = tree.LLen + tree.VLen + tree.RLen
tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes
} else {
tree.Length = -1
tree.LengthRunes = -1
}
return tree
}
func (self BTree) Len() int {
return self.Length
return self.LengthRunes
}
// todo?
@ -54,27 +58,33 @@ func (self BTree) Index(s string) (int, []int) {
func (self BTree) Match(s string) bool {
inputLen := len(s)
if self.Length != -1 && self.Length > inputLen {
// self.Length, self.RLen and self.LLen are values meaning the length of runes for each part
// here we manipulating byte length for better optimizations
// but these checks still works, cause minLen of 1-rune string is 1 byte.
if self.LengthRunes != -1 && self.LengthRunes > inputLen {
return false
}
// try to cut unnecessary parts
// by knowledge of length of right and left part
var offset, limit int
if self.LLen >= 0 {
offset = self.LLen
if self.LeftLengthRunes >= 0 {
offset = self.LeftLengthRunes
}
if self.RLen >= 0 {
limit = inputLen - self.RLen
if self.RightLengthRunes >= 0 {
limit = inputLen - self.RightLengthRunes
} else {
limit = inputLen
}
for offset < limit {
// search for matching part in substring
index, segments := self.Value.Index(s[offset:limit])
if index == -1 {
return false
}
l := string(s[:offset+index])
l := s[:offset+index]
var left bool
if self.Left != nil {
left = self.Left.Match(l)
@ -86,12 +96,7 @@ func (self BTree) Match(s string) bool {
for i := len(segments) - 1; i >= 0; i-- {
length := segments[i]
if self.RLen >= 0 && inputLen-(offset+index+length) != self.RLen {
continue
}
var right bool
var r string
// if there is no string for the right branch
if inputLen <= offset+index+length {

View File

@ -11,17 +11,17 @@ func TestBTree(t *testing.T) {
exp bool
}{
{
BTree{Value: Raw{"abc"}, Left: Super{}, Right: Super{}},
BTree{Value: Text{"abc"}, Left: Super{}, Right: Super{}},
"abc",
true,
},
{
BTree{Value: Raw{"a"}, Left: Single{}, Right: Single{}},
BTree{Value: Text{"a"}, Left: Single{}, Right: Single{}},
"aaa",
true,
},
{
BTree{Value: Raw{"b"}, Left: Single{}},
BTree{Value: Text{"b"}, Left: Single{}},
"bbb",
false,
},
@ -31,7 +31,7 @@ func TestBTree(t *testing.T) {
Left: Super{},
Value: Single{},
},
Value: Raw{"c"},
Value: Text{"c"},
},
"abc",
true,

View File

@ -15,8 +15,8 @@ func TestEveryOfIndex(t *testing.T) {
{
Matchers{
Any{},
Raw{"b"},
Raw{"c"},
Text{"b"},
Text{"c"},
},
"abc",
-1,

View File

@ -12,6 +12,13 @@ type List struct {
}
func (self List) Match(s string) bool {
// if s 100% have two symbols
// _, w := utf8.DecodeRuneInString(s)
// if len(s) > w {
if len(s) > 4 {
return false
}
inList := strings.Index(self.List, s) != -1
return inList == !self.Not
}

View File

@ -1,43 +0,0 @@
package match
import (
"fmt"
"strings"
"unicode/utf8"
)
// raw represents raw string to match
type Raw struct {
Str string
Length int
}
func NewRaw(s string) Raw {
return Raw{
Str: s,
Length: utf8.RuneCountInString(s),
}
}
func (self Raw) Match(s string) bool {
return self.Str == s
}
func (self Raw) Len() int {
return self.Length
}
func (self Raw) Index(s string) (index int, segments []int) {
index = strings.Index(s, self.Str)
if index == -1 {
return
}
segments = []int{self.Length}
return
}
func (self Raw) String() string {
return fmt.Sprintf("<raw:%s>", self.Str)
}

View File

@ -6,8 +6,8 @@ import (
)
type Row struct {
Matchers Matchers
Length int
Matchers Matchers
RunesLength int
}
func (self Row) matchAll(s string) bool {
@ -25,7 +25,7 @@ func (self Row) matchAll(s string) bool {
}
func (self Row) Match(s string) bool {
if utf8.RuneCountInString(s) < self.Length {
if utf8.RuneCountInString(s) < self.RunesLength {
return false
}
@ -33,23 +33,23 @@ func (self Row) Match(s string) bool {
}
func (self Row) Len() (l int) {
return self.Length
return self.RunesLength
}
func (self Row) Index(s string) (int, []int) {
l := utf8.RuneCountInString(s)
if l < self.Length {
if l < self.RunesLength {
return -1, nil
}
for i := range s {
sub := s[i:]
if self.matchAll(sub) {
return i, []int{self.Length}
return i, []int{self.RunesLength}
}
l -= 1
if l < self.Length {
if l < self.RunesLength {
return -1, nil
}
}
@ -58,5 +58,5 @@ func (self Row) Index(s string) (int, []int) {
}
func (self Row) String() string {
return fmt.Sprintf("<row_%d:[%s]>", self.Length, self.Matchers)
return fmt.Sprintf("<row_%d:[%s]>", self.RunesLength, self.Matchers)
}

45
match/text.go Normal file
View File

@ -0,0 +1,45 @@
package match
import (
"fmt"
"strings"
"unicode/utf8"
)
// raw represents raw string to match
type Text struct {
Str string
RunesLength int
BytesLength int
}
func NewText(s string) Text {
return Text{
Str: s,
RunesLength: utf8.RuneCountInString(s),
BytesLength: len(s),
}
}
func (self Text) Match(s string) bool {
return self.Str == s
}
func (self Text) Len() int {
return self.RunesLength
}
func (self Text) Index(s string) (index int, segments []int) {
index = strings.Index(s, self.Str)
if index == -1 {
return
}
segments = []int{self.BytesLength}
return
}
func (self Text) String() string {
return fmt.Sprintf("<text:%s>", self.Str)
}