diff --git a/compiler.go b/compiler.go index 381e9a7..215117c 100644 --- a/compiler.go +++ b/compiler.go @@ -26,7 +26,7 @@ func optimize(matcher match.Matcher) match.Matcher { rightNil := m.Right == nil if leftNil && rightNil { - return match.Raw{r.Str} + return match.NewRaw(r.Str) } _, leftSuper := m.Left.(match.Super) @@ -93,15 +93,20 @@ func glueAsRow(matchers []match.Matcher) match.Matcher { return nil } - row := match.Row{} + var ( + c []match.Matcher + l int + ) for _, matcher := range matchers { - err := row.Add(matcher) - if err != nil { + if ml := matcher.Len(); ml == -1 { return nil + } else { + c = append(c, matcher) + l += ml } } - return row + return match.Row{c, l} } func glueAsEvery(matchers []match.Matcher) match.Matcher { @@ -187,7 +192,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher { return every } -func convertMatchers(matchers []match.Matcher) []match.Matcher { +func minimizeMatchers(matchers []match.Matcher) []match.Matcher { var done match.Matcher var left, right, count int @@ -201,7 +206,6 @@ func convertMatchers(matchers []match.Matcher) []match.Matcher { } else { cl, gl := done.Len(), glued.Len() swap = cl > -1 && gl > -1 && gl > cl - swap = swap || count < r-l } @@ -228,7 +232,7 @@ func convertMatchers(matchers []match.Matcher) []match.Matcher { return next } - return convertMatchers(next) + return minimizeMatchers(next) } func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { @@ -258,36 +262,29 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) { } } - // _, ok := val.(match.BTree) - // fmt.Println("a tree", ok) - left := matchers[:idx] var right []match.Matcher if len(matchers) > idx+1 { right = matchers[idx+1:] } - tree := match.BTree{Value: val} - + var l, r match.Matcher + var err error if len(left) > 0 { - l, err := compileMatchers(left) + l, err = compileMatchers(left) if err != nil { return nil, err } - - tree.Left = l } if len(right) > 0 { - r, err := compileMatchers(right) + r, err = compileMatchers(right) if err != nil { return nil, err } - - tree.Right = r } - return tree, nil + return match.NewBTree(val, l, r), nil } func do(node node, s string) (m match.Matcher, err error) { @@ -306,7 +303,7 @@ func do(node node, s string) (m match.Matcher, err error) { if _, ok := node.(*nodeAnyOf); ok { m = match.AnyOf{matchers} } else { - m, err = compileMatchers(convertMatchers(matchers)) + m, err = compileMatchers(minimizeMatchers(matchers)) if err != nil { return nil, err } @@ -328,7 +325,7 @@ func do(node node, s string) (m match.Matcher, err error) { m = match.Single{s} case *nodeText: - m = match.Raw{n.text} + m = match.NewRaw(n.text) default: return nil, fmt.Errorf("could not compile tree: unknown node type") @@ -370,7 +367,7 @@ func do2(node node, s string) ([]match.Matcher, error) { } for _, matchers := range ways { - c, err := compileMatchers(convertMatchers(matchers)) + c, err := compileMatchers(minimizeMatchers(matchers)) if err != nil { return nil, err } @@ -404,7 +401,7 @@ func do2(node node, s string) ([]match.Matcher, error) { } for _, matchers := range ways { - c, err := compileMatchers(convertMatchers(matchers)) + c, err := compileMatchers(minimizeMatchers(matchers)) if err != nil { return nil, err } @@ -427,7 +424,7 @@ func do2(node node, s string) ([]match.Matcher, error) { result = append(result, match.Single{s}) case *nodeText: - result = append(result, match.Raw{n.text}) + result = append(result, match.NewRaw(n.text)) default: return nil, fmt.Errorf("could not compile tree: unknown node type") diff --git a/compiler_test.go b/compiler_test.go index 3b56b43..15a52ec 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -74,42 +74,47 @@ func TestCompileMatchers(t *testing.T) { []match.Matcher{ match.Super{}, match.Single{separators}, - match.Raw{"c"}, + match.Raw{"c", 1}, }, - match.BTree{ - Left: match.BTree{ - Left: match.Super{}, - Value: match.Single{separators}, + match.NewBTree( + match.Raw{"c", 1}, + match.NewBTree( + match.Single{separators}, + match.Super{}, + nil, + ), + nil, + ), + }, + { + []match.Matcher{ + match.Any{}, + match.Raw{"c", 1}, + match.Any{}, + }, + match.NewBTree( + match.Raw{"c", 1}, + match.Any{}, + match.Any{}, + ), + }, + { + []match.Matcher{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c", 1}, + match.Single{}, + }, + match.Row{ + Matchers: match.Matchers{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c", 1}, + match.Single{}, }, - Value: match.Raw{"c"}, + Length: 4, }, }, - { - []match.Matcher{ - match.Any{}, - match.Raw{"c"}, - match.Any{}, - }, - match.BTree{ - Left: match.Any{}, - Value: match.Raw{"c"}, - Right: match.Any{}, - }, - }, - { - []match.Matcher{ - match.Range{'a', 'c', true}, - match.List{"zte", false}, - match.Raw{"c"}, - match.Single{}, - }, - match.Row{Matchers: match.Matchers{ - match.Range{'a', 'c', true}, - match.List{"zte", false}, - match.Raw{"c"}, - match.Single{}, - }}, - }, } { act, err := compileMatchers(test.in) if err != nil { @@ -132,17 +137,20 @@ func TestConvertMatchers(t *testing.T) { []match.Matcher{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c"}, + match.Raw{"c", 1}, match.Single{}, match.Any{}, }, []match.Matcher{ - match.Row{Matchers: match.Matchers{ - match.Range{'a', 'c', true}, - match.List{"zte", false}, - match.Raw{"c"}, - match.Single{}, - }}, + match.Row{ + Matchers: match.Matchers{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c", 1}, + match.Single{}, + }, + Length: 4, + }, match.Any{}, }, }, @@ -150,7 +158,7 @@ func TestConvertMatchers(t *testing.T) { []match.Matcher{ match.Range{'a', 'c', true}, match.List{"zte", false}, - match.Raw{"c"}, + match.Raw{"c", 1}, match.Single{}, match.Any{}, match.Single{}, @@ -158,16 +166,19 @@ func TestConvertMatchers(t *testing.T) { match.Any{}, }, []match.Matcher{ - match.Row{Matchers: match.Matchers{ - match.Range{'a', 'c', true}, - match.List{"zte", false}, - match.Raw{"c"}, - }}, + match.Row{ + Matchers: match.Matchers{ + match.Range{'a', 'c', true}, + match.List{"zte", false}, + match.Raw{"c", 1}, + }, + Length: 3, + }, match.Min{3}, }, }, } { - act := convertMatchers(test.in) + act := minimizeMatchers(test.in) if !reflect.DeepEqual(act, test.exp) { t.Errorf("#%d unexpected convert matchers 2 result:\nact: %s;\nexp: %s", id, act, test.exp) continue @@ -197,7 +208,7 @@ func TestCompiler(t *testing.T) { }{ { ast: pattern(&nodeText{text: "abc"}), - result: match.Raw{"abc"}, + result: match.Raw{"abc", 3}, }, { ast: pattern(&nodeAny{}), @@ -247,25 +258,33 @@ func TestCompiler(t *testing.T) { { ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}), sep: separators, - result: match.BTree{ - Left: match.Any{separators}, - Value: match.Row{Matchers: match.Matchers{ - match.Raw{"abc"}, - match.Single{separators}, - }}, - }, + result: match.NewBTree( + match.Row{ + Matchers: match.Matchers{ + match.Raw{"abc", 3}, + match.Single{separators}, + }, + Length: 4, + }, + match.Any{separators}, + nil, + ), }, { ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}), sep: separators, - result: match.BTree{ - Left: match.Super{}, - Value: match.Row{Matchers: match.Matchers{ - match.Single{separators}, - match.Raw{"abc"}, - match.Single{separators}, - }}, - }, + result: match.NewBTree( + match.Row{ + Matchers: match.Matchers{ + match.Single{separators}, + match.Raw{"abc", 3}, + match.Single{separators}, + }, + Length: 5, + }, + match.Super{}, + nil, + ), }, { ast: pattern(&nodeAny{}, &nodeText{text: "abc"}), @@ -284,29 +303,33 @@ func TestCompiler(t *testing.T) { result: match.Contains{"abc", false}, }, { - ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), - sep: separators, - result: match.BTree{Left: match.Any{separators}, Value: match.Raw{"abc"}, Right: match.Any{separators}}, + ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}), + sep: separators, + result: match.NewBTree( + match.Raw{"abc", 3}, + match.Any{separators}, + match.Any{separators}, + ), }, { ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}), - result: match.BTree{ - Left: match.Min{1}, - Value: match.Raw{"abc"}, - Right: match.Min{1}, - }, + result: match.NewBTree( + match.Raw{"abc", 3}, + match.Min{1}, + match.Min{1}, + ), }, { ast: pattern(anyOf(&nodeText{text: "abc"})), result: match.AnyOf{match.Matchers{ - match.Raw{"abc"}, + match.Raw{"abc", 3}, }}, }, { ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))), result: match.AnyOf{match.Matchers{ match.AnyOf{match.Matchers{ - match.Raw{"abc"}, + match.Raw{"abc", 3}, }}, }}, }, @@ -316,13 +339,17 @@ func TestCompiler(t *testing.T) { &nodeRange{lo: 'a', hi: 'x', not: true}, &nodeAny{}, ), - result: match.BTree{ - Value: match.Row{Matchers: match.Matchers{ - match.Range{Lo: 'a', Hi: 'z'}, - match.Range{Lo: 'a', Hi: 'x', Not: true}, - }}, - Right: match.Super{}, - }, + result: match.NewBTree( + match.Row{ + Matchers: match.Matchers{ + match.Range{Lo: 'a', Hi: 'z'}, + match.Range{Lo: 'a', Hi: 'x', Not: true}, + }, + Length: 2, + }, + nil, + match.Super{}, + ), }, // { // ast: pattern( @@ -330,9 +357,9 @@ func TestCompiler(t *testing.T) { // anyOf(&nodeText{text: "c"}, &nodeText{text: "d"}), // ), // result: match.AnyOf{Matchers: match.Matchers{ - // match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c"}}}, + // match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c", 1}}}, // match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"d"}}}, - // match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c"}}}, + // match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c", 1}}}, // match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"d"}}}, // }}, // }, diff --git a/match/any.go b/match/any.go index f554b6a..ba13645 100644 --- a/match/any.go +++ b/match/any.go @@ -36,7 +36,7 @@ func (self Any) Index(s string) (int, []int) { } func (self Any) Len() int { - return -1 + return lenNo } func (self Any) Kind() Kind { diff --git a/match/btree.go b/match/btree.go index 9c3534d..d9ce037 100644 --- a/match/btree.go +++ b/match/btree.go @@ -7,38 +7,50 @@ import ( type BTree struct { Value, Left, Right Matcher + VLen, LLen, RLen int + Length int +} + +func NewBTree(Value, Left, Right Matcher) (tree BTree) { + tree.Value = Value + tree.Left = Left + tree.Right = Right + + lenOk := true + if tree.VLen = Value.Len(); tree.VLen == -1 { + lenOk = false + } + + if Left != nil { + if tree.LLen = Left.Len(); tree.LLen == -1 { + lenOk = false + } + } + + if Right != nil { + if tree.RLen = Right.Len(); tree.RLen == -1 { + lenOk = false + } + } + + if lenOk { + tree.Length = tree.LLen + tree.VLen + tree.RLen + } else { + tree.Length = -1 + } + + return tree } func (self BTree) Kind() Kind { return KindBTree } -func (self BTree) len() (l, v, r int, ok bool) { - v = self.Value.Len() - - if self.Left != nil { - l = self.Left.Len() - } - - if self.Right != nil { - r = self.Right.Len() - } - - ok = l > -1 && v > -1 && r > -1 - - return -} - func (self BTree) Len() int { - l, v, r, ok := self.len() - if ok { - return l + v + r - } - - return -1 + return self.Length } -// todo +// todo? func (self BTree) Index(s string) (int, []int) { return -1, nil } @@ -46,17 +58,16 @@ func (self BTree) Index(s string) (int, []int) { func (self BTree) Match(s string) bool { inputLen := len(s) - lLen, vLen, rLen, ok := self.len() - if ok && lLen+vLen+rLen > inputLen { + if self.Length != -1 && self.Length > inputLen { return false } var offset, limit int - if lLen >= 0 { - offset = lLen + if self.LLen >= 0 { + offset = self.LLen } - if rLen >= 0 { - limit = inputLen - rLen + if self.RLen >= 0 { + limit = inputLen - self.RLen } else { limit = inputLen } @@ -79,7 +90,7 @@ func (self BTree) Match(s string) bool { for i := len(segments) - 1; i >= 0; i-- { length := segments[i] - if rLen >= 0 && inputLen-(offset+index+length) != rLen { + if self.RLen >= 0 && inputLen-(offset+index+length) != self.RLen { continue } diff --git a/match/contains.go b/match/contains.go index d7b2154..a9ff543 100644 --- a/match/contains.go +++ b/match/contains.go @@ -53,7 +53,7 @@ func (self Contains) Index(s string) (int, []int) { } func (self Contains) Len() int { - return -1 + return lenNo } func (self Contains) Kind() Kind { diff --git a/match/list.go b/match/list.go index 2ff1e1f..06068a5 100644 --- a/match/list.go +++ b/match/list.go @@ -16,17 +16,12 @@ func (self List) Kind() Kind { } func (self List) Match(s string) bool { - if utf8.RuneCountInString(s) > 1 { - return false - } - inList := strings.Index(self.List, s) != -1 - return inList == !self.Not } func (self List) Len() int { - return 1 + return lenOne } func (self List) Index(s string) (int, []int) { diff --git a/match/match.go b/match/match.go index d16f9a9..39989b8 100644 --- a/match/match.go +++ b/match/match.go @@ -27,6 +27,9 @@ const ( KindContains ) +const lenOne = 1 +const lenNo = -1 + type Matcher interface { Match(string) bool Index(string) (int, []int) diff --git a/match/max.go b/match/max.go index 67e5378..cc7244a 100644 --- a/match/max.go +++ b/match/max.go @@ -10,12 +10,19 @@ type Max struct { } func (self Max) Match(s string) bool { - return utf8.RuneCountInString(s) <= self.Limit + var l int + for range s { + l += 1 + if l > self.Limit { + return false + } + } + + return true } func (self Max) Index(s string) (int, []int) { - c := utf8.RuneCountInString(s) - if c < self.Limit { + if !self.Match(s) { return -1, nil } @@ -34,7 +41,7 @@ func (self Max) Index(s string) (int, []int) { } func (self Max) Len() int { - return -1 + return lenNo } func (self Max) Search(s string) (int, int, bool) { diff --git a/match/min.go b/match/min.go index 5bba994..1df3072 100644 --- a/match/min.go +++ b/match/min.go @@ -10,7 +10,15 @@ type Min struct { } func (self Min) Match(s string) bool { - return utf8.RuneCountInString(s) >= self.Limit + var l int + for range s { + l += 1 + if l >= self.Limit { + return true + } + } + + return false } func (self Min) Index(s string) (int, []int) { @@ -33,7 +41,7 @@ func (self Min) Index(s string) (int, []int) { } func (self Min) Len() int { - return -1 + return lenNo } func (self Min) Search(s string) (int, int, bool) { diff --git a/match/prefix.go b/match/prefix.go index 49b22e6..097e316 100644 --- a/match/prefix.go +++ b/match/prefix.go @@ -38,7 +38,7 @@ func (self Prefix) Index(s string) (int, []int) { } func (self Prefix) Len() int { - return -1 + return lenNo } func (self Prefix) Search(s string) (i int, l int, ok bool) { diff --git a/match/prefix_suffix.go b/match/prefix_suffix.go index 9efffea..6d3af9e 100644 --- a/match/prefix_suffix.go +++ b/match/prefix_suffix.go @@ -44,7 +44,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) { } func (self PrefixSuffix) Len() int { - return -1 + return lenNo } func (self PrefixSuffix) Search(s string) (i int, l int, ok bool) { diff --git a/match/range.go b/match/range.go index e771285..42453c6 100644 --- a/match/range.go +++ b/match/range.go @@ -15,7 +15,7 @@ func (self Range) Kind() Kind { } func (self Range) Len() int { - return 1 + return lenOne } func (self Range) Match(s string) bool { diff --git a/match/raw.go b/match/raw.go index 5cac636..12c86fa 100644 --- a/match/raw.go +++ b/match/raw.go @@ -3,11 +3,20 @@ package match import ( "fmt" "strings" + "unicode/utf8" ) // raw represents raw string to match type Raw struct { - Str string + Str string + Length int +} + +func NewRaw(s string) Raw { + return Raw{ + Str: s, + Length: utf8.RuneCountInString(s), + } } func (self Raw) Match(s string) bool { @@ -15,7 +24,7 @@ func (self Raw) Match(s string) bool { } func (self Raw) Len() int { - return len(self.Str) + return self.Length } func (self Raw) Kind() Kind { @@ -28,7 +37,7 @@ func (self Raw) Index(s string) (index int, segments []int) { return } - segments = []int{len(self.Str)} + segments = []int{self.Length} return } diff --git a/match/row.go b/match/row.go index 2c337df..9982ed4 100644 --- a/match/row.go +++ b/match/row.go @@ -2,27 +2,15 @@ package match import ( "fmt" + "unicode/utf8" ) type Row struct { Matchers Matchers - len int + Length int } -func (self *Row) Add(m Matcher) error { - if l := m.Len(); l == -1 { - return fmt.Errorf("matcher should have fixed length") - } - - self.Matchers = append(self.Matchers, m) - return nil -} - -func (self Row) Match(s string) bool { - if len(s) < self.Len() { - return false - } - +func (self Row) matchAll(s string) bool { var idx int for _, m := range self.Matchers { l := m.Len() @@ -36,21 +24,33 @@ func (self Row) Match(s string) bool { return true } -func (self Row) Len() (l int) { - if self.len == 0 { - for _, m := range self.Matchers { - self.len += m.Len() - } +func (self Row) Match(s string) bool { + if utf8.RuneCountInString(s) < self.Length { + return false } - return self.len + return self.matchAll(s) +} + +func (self Row) Len() (l int) { + return self.Length } func (self Row) Index(s string) (int, []int) { + l := utf8.RuneCountInString(s) + if l < self.Length { + return -1, nil + } + for i := range s { sub := s[i:] - if self.Match(sub) { - return i, []int{self.Len()} + if self.matchAll(sub) { + return i, []int{self.Length} + } + + l -= 1 + if l < self.Length { + return -1, nil } } @@ -62,5 +62,5 @@ func (self Row) Kind() Kind { } func (self Row) String() string { - return fmt.Sprintf("", self.Matchers) + return fmt.Sprintf("", self.Length, self.Matchers) } diff --git a/match/single.go b/match/single.go index 833f4c7..b35e80b 100644 --- a/match/single.go +++ b/match/single.go @@ -12,11 +12,11 @@ type Single struct { } func (self Single) Match(s string) bool { - return utf8.RuneCountInString(s) == 1 && strings.IndexAny(s, self.Separators) == -1 + return strings.IndexAny(s, self.Separators) == -1 } func (self Single) Len() int { - return 1 + return lenOne } func (self Single) Index(s string) (int, []int) { diff --git a/match/suffix.go b/match/suffix.go index c9e0425..670724a 100644 --- a/match/suffix.go +++ b/match/suffix.go @@ -23,7 +23,7 @@ func (self Suffix) Index(s string) (int, []int) { } func (self Suffix) Len() int { - return -1 + return lenNo } func (self Suffix) Search(s string) (i int, l int, ok bool) { diff --git a/match/super.go b/match/super.go index ad50d43..a7e0979 100644 --- a/match/super.go +++ b/match/super.go @@ -12,7 +12,7 @@ func (self Super) Match(s string) bool { } func (self Super) Len() int { - return -1 + return lenNo } func (self Super) Index(s string) (int, []int) {