From d2a191e0f0a0a69ffddde736395fc766767aa19e Mon Sep 17 00:00:00 2001 From: "s.kamardin" Date: Thu, 14 Jan 2016 21:32:02 +0300 Subject: [PATCH] Tune, new feature test --- .gitignore | 5 ++++- .travis.yml | 2 +- compiler.go | 8 ++++---- compiler_test.go | 46 +++++++++++++++++++++--------------------- glob_test.go | 26 +++++++++++++++--------- match/any_of_test.go | 4 ++-- match/btree.go | 45 +++++++++++++++++++++++------------------ match/btree_test.go | 8 ++++---- match/every_of_test.go | 4 ++-- match/list.go | 7 +++++++ match/raw.go | 43 --------------------------------------- match/row.go | 16 +++++++-------- match/text.go | 45 +++++++++++++++++++++++++++++++++++++++++ 13 files changed, 142 insertions(+), 117 deletions(-) delete mode 100644 match/raw.go create mode 100644 match/text.go diff --git a/.gitignore b/.gitignore index e622226..b4ae623 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,7 @@ glob.iml .idea *.cpu *.mem -*.test \ No newline at end of file +*.test +*.dot +*.png +*.svg diff --git a/.travis.yml b/.travis.yml index 0830019..e8a2768 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ sudo: false language: go go: - - 1.5.1 + - 1.5.3 script: - go test -v ./... diff --git a/compiler.go b/compiler.go index 215117c..f73fe52 100644 --- a/compiler.go +++ b/compiler.go @@ -17,7 +17,7 @@ func optimize(matcher match.Matcher) match.Matcher { m.Left = optimize(m.Left) m.Right = optimize(m.Right) - r, ok := m.Value.(match.Raw) + r, ok := m.Value.(match.Text) if !ok { return m } @@ -26,7 +26,7 @@ func optimize(matcher match.Matcher) match.Matcher { rightNil := m.Right == nil if leftNil && rightNil { - return match.NewRaw(r.Str) + return match.NewText(r.Str) } _, leftSuper := m.Left.(match.Super) @@ -325,7 +325,7 @@ func do(node node, s string) (m match.Matcher, err error) { m = match.Single{s} case *nodeText: - m = match.NewRaw(n.text) + m = match.NewText(n.text) default: return nil, fmt.Errorf("could not compile tree: unknown node type") @@ -424,7 +424,7 @@ func do2(node node, s string) ([]match.Matcher, error) { result = append(result, match.Single{s}) case *nodeText: - result = append(result, match.NewRaw(n.text)) + result = append(result, match.NewText(n.text)) default: return nil, fmt.Errorf("could not compile tree: unknown node type") diff --git a/compiler_test.go b/compiler_test.go index 15a52ec..608c20f 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -74,10 +74,10 @@ func TestCompileMatchers(t *testing.T) { []match.Matcher{ match.Super{}, match.Single{separators}, - match.Raw{"c", 1}, + match.NewText("c"), }, match.NewBTree( - match.Raw{"c", 1}, + match.NewText("c"), match.NewBTree( match.Single{separators}, match.Super{}, @@ -89,11 +89,11 @@ func TestCompileMatchers(t *testing.T) { { []match.Matcher{ match.Any{}, - match.Raw{"c", 1}, + match.NewText("c"), match.Any{}, }, match.NewBTree( - match.Raw{"c", 1}, + match.NewText("c"), match.Any{}, match.Any{}, ), @@ -102,17 +102,17 @@ func TestCompileMatchers(t *testing.T) { []match.Matcher{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c", 1}, + match.NewText("c"), match.Single{}, }, match.Row{ Matchers: match.Matchers{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c", 1}, + match.NewText("c"), match.Single{}, }, - Length: 4, + RunesLength: 4, }, }, } { @@ -137,7 +137,7 @@ func TestConvertMatchers(t *testing.T) { []match.Matcher{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c", 1}, + match.NewText("c"), match.Single{}, match.Any{}, }, @@ -146,10 +146,10 @@ func TestConvertMatchers(t *testing.T) { Matchers: match.Matchers{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c", 1}, + match.NewText("c"), match.Single{}, }, - Length: 4, + RunesLength: 4, }, match.Any{}, }, @@ -158,7 +158,7 @@ func TestConvertMatchers(t *testing.T) { []match.Matcher{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c", 1}, + match.NewText("c"), match.Single{}, match.Any{}, match.Single{}, @@ -170,9 +170,9 @@ func TestConvertMatchers(t *testing.T) { Matchers: match.Matchers{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c", 1}, + match.NewText("c"), }, - Length: 3, + RunesLength: 3, }, match.Min{3}, }, @@ -208,7 +208,7 @@ func TestCompiler(t *testing.T) { }{ { ast: pattern(&nodeText{text: "abc"}), - result: match.Raw{"abc", 3}, + result: match.NewText("abc"), }, { ast: pattern(&nodeAny{}), @@ -261,10 +261,10 @@ func TestCompiler(t *testing.T) { result: match.NewBTree( match.Row{ Matchers: match.Matchers{ - match.Raw{"abc", 3}, + match.NewText("abc"), match.Single{separators}, }, - Length: 4, + RunesLength: 4, }, match.Any{separators}, nil, @@ -277,10 +277,10 @@ func TestCompiler(t *testing.T) { match.Row{ Matchers: match.Matchers{ match.Single{separators}, - match.Raw{"abc", 3}, + match.NewText("abc"), match.Single{separators}, }, - Length: 5, + RunesLength: 5, }, match.Super{}, nil, @@ -306,7 +306,7 @@ func TestCompiler(t *testing.T) { ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), sep: separators, result: match.NewBTree( - match.Raw{"abc", 3}, + match.NewText("abc"), match.Any{separators}, match.Any{separators}, ), @@ -314,7 +314,7 @@ func TestCompiler(t *testing.T) { { ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}), result: match.NewBTree( - match.Raw{"abc", 3}, + match.NewText("abc"), match.Min{1}, match.Min{1}, ), @@ -322,14 +322,14 @@ func TestCompiler(t *testing.T) { { ast: pattern(anyOf(&nodeText{text: "abc"})), result: match.AnyOf{match.Matchers{ - match.Raw{"abc", 3}, + match.NewText("abc"), }}, }, { ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))), result: match.AnyOf{match.Matchers{ match.AnyOf{match.Matchers{ - match.Raw{"abc", 3}, + match.NewText("abc"), }}, }}, }, @@ -345,7 +345,7 @@ func TestCompiler(t *testing.T) { match.Range{Lo: 'a', Hi: 'z'}, match.Range{Lo: 'a', Hi: 'x', Not: true}, }, - Length: 2, + RunesLength: 2, }, nil, match.Super{}, diff --git a/glob_test.go b/glob_test.go index be3015d..ece37c1 100644 --- a/glob_test.go +++ b/glob_test.go @@ -19,7 +19,7 @@ const ( pattern_multiple = "https://*.google.*" fixture_multiple = "https://account.google.com" - pattern_alternatives = "{https://*.google.*,*yahoo.*}" + pattern_alternatives = "{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}" fixture_alternatives = "http://yahoo.com" pattern_prefix = "abc*" @@ -81,14 +81,22 @@ func TestCompilePattern(t *testing.T) { sep string exp match.Matcher }{ - // { - // pattern: "left*??B*abcd*[!b]??*abc*right", - // exp: match.Raw{"t"}, - // }, - // { - // pattern: "abc*??def", - // exp: match.Raw{"t"}, - // }, + // { + // pattern: "left*??B*abcd*[!b]??*abc*right", + // exp: match.Raw{"t"}, + // }, + // { + // pattern: "abc*??def", + // exp: match.Raw{"t"}, + // }, + { + pattern: "{abc[abc]ghi,abc[def]ghi}", + exp: match.NewBTree( + match.AnyOf{match.Matchers{match.List{"abc", false}, match.List{"qwe", false}}}, + match.NewText("abc"), + match.NewText("ghi"), + ), + }, } { glob, err := Compile(test.pattern, test.sep) if err != nil { diff --git a/match/any_of_test.go b/match/any_of_test.go index 6e26886..0ac6ea9 100644 --- a/match/any_of_test.go +++ b/match/any_of_test.go @@ -15,8 +15,8 @@ func TestAnyOfIndex(t *testing.T) { { Matchers{ Any{}, - Raw{"b"}, - Raw{"c"}, + Text{"b"}, + Text{"c"}, }, "abc", 0, diff --git a/match/btree.go b/match/btree.go index 4168539..fe314f2 100644 --- a/match/btree.go +++ b/match/btree.go @@ -6,9 +6,13 @@ import ( ) type BTree struct { - Value, Left, Right Matcher - VLen, LLen, RLen int - Length int + Value Matcher + Left Matcher + Right Matcher + ValueLengthRunes int + LeftLengthRunes int + RightLengthRunes int + LengthRunes int } func NewBTree(Value, Left, Right Matcher) (tree BTree) { @@ -17,33 +21,33 @@ func NewBTree(Value, Left, Right Matcher) (tree BTree) { tree.Right = Right lenOk := true - if tree.VLen = Value.Len(); tree.VLen == -1 { + if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 { lenOk = false } if Left != nil { - if tree.LLen = Left.Len(); tree.LLen == -1 { + if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 { lenOk = false } } if Right != nil { - if tree.RLen = Right.Len(); tree.RLen == -1 { + if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 { lenOk = false } } if lenOk { - tree.Length = tree.LLen + tree.VLen + tree.RLen + tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes } else { - tree.Length = -1 + tree.LengthRunes = -1 } return tree } func (self BTree) Len() int { - return self.Length + return self.LengthRunes } // todo? @@ -54,27 +58,33 @@ func (self BTree) Index(s string) (int, []int) { func (self BTree) Match(s string) bool { inputLen := len(s) - if self.Length != -1 && self.Length > inputLen { + // self.Length, self.RLen and self.LLen are values meaning the length of runes for each part + // here we manipulating byte length for better optimizations + // but these checks still works, cause minLen of 1-rune string is 1 byte. + if self.LengthRunes != -1 && self.LengthRunes > inputLen { return false } + // try to cut unnecessary parts + // by knowledge of length of right and left part var offset, limit int - if self.LLen >= 0 { - offset = self.LLen + if self.LeftLengthRunes >= 0 { + offset = self.LeftLengthRunes } - if self.RLen >= 0 { - limit = inputLen - self.RLen + if self.RightLengthRunes >= 0 { + limit = inputLen - self.RightLengthRunes } else { limit = inputLen } for offset < limit { + // search for matching part in substring index, segments := self.Value.Index(s[offset:limit]) if index == -1 { return false } - l := string(s[:offset+index]) + l := s[:offset+index] var left bool if self.Left != nil { left = self.Left.Match(l) @@ -86,12 +96,7 @@ func (self BTree) Match(s string) bool { for i := len(segments) - 1; i >= 0; i-- { length := segments[i] - if self.RLen >= 0 && inputLen-(offset+index+length) != self.RLen { - continue - } - var right bool - var r string // if there is no string for the right branch if inputLen <= offset+index+length { diff --git a/match/btree_test.go b/match/btree_test.go index 459d907..2f20dad 100644 --- a/match/btree_test.go +++ b/match/btree_test.go @@ -11,17 +11,17 @@ func TestBTree(t *testing.T) { exp bool }{ { - BTree{Value: Raw{"abc"}, Left: Super{}, Right: Super{}}, + BTree{Value: Text{"abc"}, Left: Super{}, Right: Super{}}, "abc", true, }, { - BTree{Value: Raw{"a"}, Left: Single{}, Right: Single{}}, + BTree{Value: Text{"a"}, Left: Single{}, Right: Single{}}, "aaa", true, }, { - BTree{Value: Raw{"b"}, Left: Single{}}, + BTree{Value: Text{"b"}, Left: Single{}}, "bbb", false, }, @@ -31,7 +31,7 @@ func TestBTree(t *testing.T) { Left: Super{}, Value: Single{}, }, - Value: Raw{"c"}, + Value: Text{"c"}, }, "abc", true, diff --git a/match/every_of_test.go b/match/every_of_test.go index 62c00a3..0ab495f 100644 --- a/match/every_of_test.go +++ b/match/every_of_test.go @@ -15,8 +15,8 @@ func TestEveryOfIndex(t *testing.T) { { Matchers{ Any{}, - Raw{"b"}, - Raw{"c"}, + Text{"b"}, + Text{"c"}, }, "abc", -1, diff --git a/match/list.go b/match/list.go index 5739301..3d9f0b6 100644 --- a/match/list.go +++ b/match/list.go @@ -12,6 +12,13 @@ type List struct { } func (self List) Match(s string) bool { + // if s 100% have two symbols + // _, w := utf8.DecodeRuneInString(s) + // if len(s) > w { + if len(s) > 4 { + return false + } + inList := strings.Index(self.List, s) != -1 return inList == !self.Not } diff --git a/match/raw.go b/match/raw.go deleted file mode 100644 index e51ca68..0000000 --- a/match/raw.go +++ /dev/null @@ -1,43 +0,0 @@ -package match - -import ( - "fmt" - "strings" - "unicode/utf8" -) - -// raw represents raw string to match -type Raw struct { - Str string - Length int -} - -func NewRaw(s string) Raw { - return Raw{ - Str: s, - Length: utf8.RuneCountInString(s), - } -} - -func (self Raw) Match(s string) bool { - return self.Str == s -} - -func (self Raw) Len() int { - return self.Length -} - -func (self Raw) Index(s string) (index int, segments []int) { - index = strings.Index(s, self.Str) - if index == -1 { - return - } - - segments = []int{self.Length} - - return -} - -func (self Raw) String() string { - return fmt.Sprintf("", self.Str) -} diff --git a/match/row.go b/match/row.go index 9e4f895..434c4ca 100644 --- a/match/row.go +++ b/match/row.go @@ -6,8 +6,8 @@ import ( ) type Row struct { - Matchers Matchers - Length int + Matchers Matchers + RunesLength int } func (self Row) matchAll(s string) bool { @@ -25,7 +25,7 @@ func (self Row) matchAll(s string) bool { } func (self Row) Match(s string) bool { - if utf8.RuneCountInString(s) < self.Length { + if utf8.RuneCountInString(s) < self.RunesLength { return false } @@ -33,23 +33,23 @@ func (self Row) Match(s string) bool { } func (self Row) Len() (l int) { - return self.Length + return self.RunesLength } func (self Row) Index(s string) (int, []int) { l := utf8.RuneCountInString(s) - if l < self.Length { + if l < self.RunesLength { return -1, nil } for i := range s { sub := s[i:] if self.matchAll(sub) { - return i, []int{self.Length} + return i, []int{self.RunesLength} } l -= 1 - if l < self.Length { + if l < self.RunesLength { return -1, nil } } @@ -58,5 +58,5 @@ func (self Row) Index(s string) (int, []int) { } func (self Row) String() string { - return fmt.Sprintf("", self.Length, self.Matchers) + return fmt.Sprintf("", self.RunesLength, self.Matchers) } diff --git a/match/text.go b/match/text.go new file mode 100644 index 0000000..0b22c78 --- /dev/null +++ b/match/text.go @@ -0,0 +1,45 @@ +package match + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +// raw represents raw string to match +type Text struct { + Str string + RunesLength int + BytesLength int +} + +func NewText(s string) Text { + return Text{ + Str: s, + RunesLength: utf8.RuneCountInString(s), + BytesLength: len(s), + } +} + +func (self Text) Match(s string) bool { + return self.Str == s +} + +func (self Text) Len() int { + return self.RunesLength +} + +func (self Text) Index(s string) (index int, segments []int) { + index = strings.Index(s, self.Str) + if index == -1 { + return + } + + segments = []int{self.BytesLength} + + return +} + +func (self Text) String() string { + return fmt.Sprintf("", self.Str) +}