Optimizations

This commit is contained in:
s.kamardin 2016-01-14 18:29:13 +03:00
parent af4b301eec
commit 3995275190
17 changed files with 241 additions and 184 deletions

View File

@ -26,7 +26,7 @@ func optimize(matcher match.Matcher) match.Matcher {
rightNil := m.Right == nil
if leftNil && rightNil {
return match.Raw{r.Str}
return match.NewRaw(r.Str)
}
_, leftSuper := m.Left.(match.Super)
@ -93,15 +93,20 @@ func glueAsRow(matchers []match.Matcher) match.Matcher {
return nil
}
row := match.Row{}
var (
c []match.Matcher
l int
)
for _, matcher := range matchers {
err := row.Add(matcher)
if err != nil {
if ml := matcher.Len(); ml == -1 {
return nil
} else {
c = append(c, matcher)
l += ml
}
}
return row
return match.Row{c, l}
}
func glueAsEvery(matchers []match.Matcher) match.Matcher {
@ -187,7 +192,7 @@ func glueAsEvery(matchers []match.Matcher) match.Matcher {
return every
}
func convertMatchers(matchers []match.Matcher) []match.Matcher {
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
var done match.Matcher
var left, right, count int
@ -201,7 +206,6 @@ func convertMatchers(matchers []match.Matcher) []match.Matcher {
} else {
cl, gl := done.Len(), glued.Len()
swap = cl > -1 && gl > -1 && gl > cl
swap = swap || count < r-l
}
@ -228,7 +232,7 @@ func convertMatchers(matchers []match.Matcher) []match.Matcher {
return next
}
return convertMatchers(next)
return minimizeMatchers(next)
}
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
@ -258,36 +262,29 @@ func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
}
}
// _, ok := val.(match.BTree)
// fmt.Println("a tree", ok)
left := matchers[:idx]
var right []match.Matcher
if len(matchers) > idx+1 {
right = matchers[idx+1:]
}
tree := match.BTree{Value: val}
var l, r match.Matcher
var err error
if len(left) > 0 {
l, err := compileMatchers(left)
l, err = compileMatchers(left)
if err != nil {
return nil, err
}
tree.Left = l
}
if len(right) > 0 {
r, err := compileMatchers(right)
r, err = compileMatchers(right)
if err != nil {
return nil, err
}
tree.Right = r
}
return tree, nil
return match.NewBTree(val, l, r), nil
}
func do(node node, s string) (m match.Matcher, err error) {
@ -306,7 +303,7 @@ func do(node node, s string) (m match.Matcher, err error) {
if _, ok := node.(*nodeAnyOf); ok {
m = match.AnyOf{matchers}
} else {
m, err = compileMatchers(convertMatchers(matchers))
m, err = compileMatchers(minimizeMatchers(matchers))
if err != nil {
return nil, err
}
@ -328,7 +325,7 @@ func do(node node, s string) (m match.Matcher, err error) {
m = match.Single{s}
case *nodeText:
m = match.Raw{n.text}
m = match.NewRaw(n.text)
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")
@ -370,7 +367,7 @@ func do2(node node, s string) ([]match.Matcher, error) {
}
for _, matchers := range ways {
c, err := compileMatchers(convertMatchers(matchers))
c, err := compileMatchers(minimizeMatchers(matchers))
if err != nil {
return nil, err
}
@ -404,7 +401,7 @@ func do2(node node, s string) ([]match.Matcher, error) {
}
for _, matchers := range ways {
c, err := compileMatchers(convertMatchers(matchers))
c, err := compileMatchers(minimizeMatchers(matchers))
if err != nil {
return nil, err
}
@ -427,7 +424,7 @@ func do2(node node, s string) ([]match.Matcher, error) {
result = append(result, match.Single{s})
case *nodeText:
result = append(result, match.Raw{n.text})
result = append(result, match.NewRaw(n.text))
default:
return nil, fmt.Errorf("could not compile tree: unknown node type")

View File

@ -74,42 +74,47 @@ func TestCompileMatchers(t *testing.T) {
[]match.Matcher{
match.Super{},
match.Single{separators},
match.Raw{"c"},
match.Raw{"c", 1},
},
match.BTree{
Left: match.BTree{
Left: match.Super{},
Value: match.Single{separators},
match.NewBTree(
match.Raw{"c", 1},
match.NewBTree(
match.Single{separators},
match.Super{},
nil,
),
nil,
),
},
{
[]match.Matcher{
match.Any{},
match.Raw{"c", 1},
match.Any{},
},
match.NewBTree(
match.Raw{"c", 1},
match.Any{},
match.Any{},
),
},
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.Single{},
},
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.Single{},
},
Value: match.Raw{"c"},
Length: 4,
},
},
{
[]match.Matcher{
match.Any{},
match.Raw{"c"},
match.Any{},
},
match.BTree{
Left: match.Any{},
Value: match.Raw{"c"},
Right: match.Any{},
},
},
{
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
},
match.Row{Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
}},
},
} {
act, err := compileMatchers(test.in)
if err != nil {
@ -132,17 +137,20 @@ func TestConvertMatchers(t *testing.T) {
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Raw{"c", 1},
match.Single{},
match.Any{},
},
[]match.Matcher{
match.Row{Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Single{},
}},
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
match.Single{},
},
Length: 4,
},
match.Any{},
},
},
@ -150,7 +158,7 @@ func TestConvertMatchers(t *testing.T) {
[]match.Matcher{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
match.Raw{"c", 1},
match.Single{},
match.Any{},
match.Single{},
@ -158,16 +166,19 @@ func TestConvertMatchers(t *testing.T) {
match.Any{},
},
[]match.Matcher{
match.Row{Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c"},
}},
match.Row{
Matchers: match.Matchers{
match.Range{'a', 'c', true},
match.List{"zte", false},
match.Raw{"c", 1},
},
Length: 3,
},
match.Min{3},
},
},
} {
act := convertMatchers(test.in)
act := minimizeMatchers(test.in)
if !reflect.DeepEqual(act, test.exp) {
t.Errorf("#%d unexpected convert matchers 2 result:\nact: %s;\nexp: %s", id, act, test.exp)
continue
@ -197,7 +208,7 @@ func TestCompiler(t *testing.T) {
}{
{
ast: pattern(&nodeText{text: "abc"}),
result: match.Raw{"abc"},
result: match.Raw{"abc", 3},
},
{
ast: pattern(&nodeAny{}),
@ -247,25 +258,33 @@ func TestCompiler(t *testing.T) {
{
ast: pattern(&nodeAny{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.BTree{
Left: match.Any{separators},
Value: match.Row{Matchers: match.Matchers{
match.Raw{"abc"},
match.Single{separators},
}},
},
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.Raw{"abc", 3},
match.Single{separators},
},
Length: 4,
},
match.Any{separators},
nil,
),
},
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSingle{}),
sep: separators,
result: match.BTree{
Left: match.Super{},
Value: match.Row{Matchers: match.Matchers{
match.Single{separators},
match.Raw{"abc"},
match.Single{separators},
}},
},
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.Single{separators},
match.Raw{"abc", 3},
match.Single{separators},
},
Length: 5,
},
match.Super{},
nil,
),
},
{
ast: pattern(&nodeAny{}, &nodeText{text: "abc"}),
@ -284,29 +303,33 @@ func TestCompiler(t *testing.T) {
result: match.Contains{"abc", false},
},
{
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
sep: separators,
result: match.BTree{Left: match.Any{separators}, Value: match.Raw{"abc"}, Right: match.Any{separators}},
ast: pattern(&nodeAny{}, &nodeAny{}, &nodeAny{}, &nodeText{text: "abc"}, &nodeAny{}, &nodeAny{}),
sep: separators,
result: match.NewBTree(
match.Raw{"abc", 3},
match.Any{separators},
match.Any{separators},
),
},
{
ast: pattern(&nodeSuper{}, &nodeSingle{}, &nodeText{text: "abc"}, &nodeSuper{}, &nodeSingle{}),
result: match.BTree{
Left: match.Min{1},
Value: match.Raw{"abc"},
Right: match.Min{1},
},
result: match.NewBTree(
match.Raw{"abc", 3},
match.Min{1},
match.Min{1},
),
},
{
ast: pattern(anyOf(&nodeText{text: "abc"})),
result: match.AnyOf{match.Matchers{
match.Raw{"abc"},
match.Raw{"abc", 3},
}},
},
{
ast: pattern(anyOf(pattern(anyOf(pattern(&nodeText{text: "abc"}))))),
result: match.AnyOf{match.Matchers{
match.AnyOf{match.Matchers{
match.Raw{"abc"},
match.Raw{"abc", 3},
}},
}},
},
@ -316,13 +339,17 @@ func TestCompiler(t *testing.T) {
&nodeRange{lo: 'a', hi: 'x', not: true},
&nodeAny{},
),
result: match.BTree{
Value: match.Row{Matchers: match.Matchers{
match.Range{Lo: 'a', Hi: 'z'},
match.Range{Lo: 'a', Hi: 'x', Not: true},
}},
Right: match.Super{},
},
result: match.NewBTree(
match.Row{
Matchers: match.Matchers{
match.Range{Lo: 'a', Hi: 'z'},
match.Range{Lo: 'a', Hi: 'x', Not: true},
},
Length: 2,
},
nil,
match.Super{},
),
},
// {
// ast: pattern(
@ -330,9 +357,9 @@ func TestCompiler(t *testing.T) {
// anyOf(&nodeText{text: "c"}, &nodeText{text: "d"}),
// ),
// result: match.AnyOf{Matchers: match.Matchers{
// match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c"}}},
// match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"c", 1}}},
// match.Row{Matchers: match.Matchers{match.Raw{"a"}, match.Raw{"d"}}},
// match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c"}}},
// match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"c", 1}}},
// match.Row{Matchers: match.Matchers{match.Raw{"b"}, match.Raw{"d"}}},
// }},
// },

View File

@ -36,7 +36,7 @@ func (self Any) Index(s string) (int, []int) {
}
func (self Any) Len() int {
return -1
return lenNo
}
func (self Any) Kind() Kind {

View File

@ -7,38 +7,50 @@ import (
type BTree struct {
Value, Left, Right Matcher
VLen, LLen, RLen int
Length int
}
func NewBTree(Value, Left, Right Matcher) (tree BTree) {
tree.Value = Value
tree.Left = Left
tree.Right = Right
lenOk := true
if tree.VLen = Value.Len(); tree.VLen == -1 {
lenOk = false
}
if Left != nil {
if tree.LLen = Left.Len(); tree.LLen == -1 {
lenOk = false
}
}
if Right != nil {
if tree.RLen = Right.Len(); tree.RLen == -1 {
lenOk = false
}
}
if lenOk {
tree.Length = tree.LLen + tree.VLen + tree.RLen
} else {
tree.Length = -1
}
return tree
}
func (self BTree) Kind() Kind {
return KindBTree
}
func (self BTree) len() (l, v, r int, ok bool) {
v = self.Value.Len()
if self.Left != nil {
l = self.Left.Len()
}
if self.Right != nil {
r = self.Right.Len()
}
ok = l > -1 && v > -1 && r > -1
return
}
func (self BTree) Len() int {
l, v, r, ok := self.len()
if ok {
return l + v + r
}
return -1
return self.Length
}
// todo
// todo?
func (self BTree) Index(s string) (int, []int) {
return -1, nil
}
@ -46,17 +58,16 @@ func (self BTree) Index(s string) (int, []int) {
func (self BTree) Match(s string) bool {
inputLen := len(s)
lLen, vLen, rLen, ok := self.len()
if ok && lLen+vLen+rLen > inputLen {
if self.Length != -1 && self.Length > inputLen {
return false
}
var offset, limit int
if lLen >= 0 {
offset = lLen
if self.LLen >= 0 {
offset = self.LLen
}
if rLen >= 0 {
limit = inputLen - rLen
if self.RLen >= 0 {
limit = inputLen - self.RLen
} else {
limit = inputLen
}
@ -79,7 +90,7 @@ func (self BTree) Match(s string) bool {
for i := len(segments) - 1; i >= 0; i-- {
length := segments[i]
if rLen >= 0 && inputLen-(offset+index+length) != rLen {
if self.RLen >= 0 && inputLen-(offset+index+length) != self.RLen {
continue
}

View File

@ -53,7 +53,7 @@ func (self Contains) Index(s string) (int, []int) {
}
func (self Contains) Len() int {
return -1
return lenNo
}
func (self Contains) Kind() Kind {

View File

@ -16,17 +16,12 @@ func (self List) Kind() Kind {
}
func (self List) Match(s string) bool {
if utf8.RuneCountInString(s) > 1 {
return false
}
inList := strings.Index(self.List, s) != -1
return inList == !self.Not
}
func (self List) Len() int {
return 1
return lenOne
}
func (self List) Index(s string) (int, []int) {

View File

@ -27,6 +27,9 @@ const (
KindContains
)
const lenOne = 1
const lenNo = -1
type Matcher interface {
Match(string) bool
Index(string) (int, []int)

View File

@ -10,12 +10,19 @@ type Max struct {
}
func (self Max) Match(s string) bool {
return utf8.RuneCountInString(s) <= self.Limit
var l int
for range s {
l += 1
if l > self.Limit {
return false
}
}
return true
}
func (self Max) Index(s string) (int, []int) {
c := utf8.RuneCountInString(s)
if c < self.Limit {
if !self.Match(s) {
return -1, nil
}
@ -34,7 +41,7 @@ func (self Max) Index(s string) (int, []int) {
}
func (self Max) Len() int {
return -1
return lenNo
}
func (self Max) Search(s string) (int, int, bool) {

View File

@ -10,7 +10,15 @@ type Min struct {
}
func (self Min) Match(s string) bool {
return utf8.RuneCountInString(s) >= self.Limit
var l int
for range s {
l += 1
if l >= self.Limit {
return true
}
}
return false
}
func (self Min) Index(s string) (int, []int) {
@ -33,7 +41,7 @@ func (self Min) Index(s string) (int, []int) {
}
func (self Min) Len() int {
return -1
return lenNo
}
func (self Min) Search(s string) (int, int, bool) {

View File

@ -38,7 +38,7 @@ func (self Prefix) Index(s string) (int, []int) {
}
func (self Prefix) Len() int {
return -1
return lenNo
}
func (self Prefix) Search(s string) (i int, l int, ok bool) {

View File

@ -44,7 +44,7 @@ func (self PrefixSuffix) Index(s string) (int, []int) {
}
func (self PrefixSuffix) Len() int {
return -1
return lenNo
}
func (self PrefixSuffix) Search(s string) (i int, l int, ok bool) {

View File

@ -15,7 +15,7 @@ func (self Range) Kind() Kind {
}
func (self Range) Len() int {
return 1
return lenOne
}
func (self Range) Match(s string) bool {

View File

@ -3,11 +3,20 @@ package match
import (
"fmt"
"strings"
"unicode/utf8"
)
// raw represents raw string to match
type Raw struct {
Str string
Str string
Length int
}
func NewRaw(s string) Raw {
return Raw{
Str: s,
Length: utf8.RuneCountInString(s),
}
}
func (self Raw) Match(s string) bool {
@ -15,7 +24,7 @@ func (self Raw) Match(s string) bool {
}
func (self Raw) Len() int {
return len(self.Str)
return self.Length
}
func (self Raw) Kind() Kind {
@ -28,7 +37,7 @@ func (self Raw) Index(s string) (index int, segments []int) {
return
}
segments = []int{len(self.Str)}
segments = []int{self.Length}
return
}

View File

@ -2,27 +2,15 @@ package match
import (
"fmt"
"unicode/utf8"
)
type Row struct {
Matchers Matchers
len int
Length int
}
func (self *Row) Add(m Matcher) error {
if l := m.Len(); l == -1 {
return fmt.Errorf("matcher should have fixed length")
}
self.Matchers = append(self.Matchers, m)
return nil
}
func (self Row) Match(s string) bool {
if len(s) < self.Len() {
return false
}
func (self Row) matchAll(s string) bool {
var idx int
for _, m := range self.Matchers {
l := m.Len()
@ -36,21 +24,33 @@ func (self Row) Match(s string) bool {
return true
}
func (self Row) Len() (l int) {
if self.len == 0 {
for _, m := range self.Matchers {
self.len += m.Len()
}
func (self Row) Match(s string) bool {
if utf8.RuneCountInString(s) < self.Length {
return false
}
return self.len
return self.matchAll(s)
}
func (self Row) Len() (l int) {
return self.Length
}
func (self Row) Index(s string) (int, []int) {
l := utf8.RuneCountInString(s)
if l < self.Length {
return -1, nil
}
for i := range s {
sub := s[i:]
if self.Match(sub) {
return i, []int{self.Len()}
if self.matchAll(sub) {
return i, []int{self.Length}
}
l -= 1
if l < self.Length {
return -1, nil
}
}
@ -62,5 +62,5 @@ func (self Row) Kind() Kind {
}
func (self Row) String() string {
return fmt.Sprintf("<row:[%s]>", self.Matchers)
return fmt.Sprintf("<row_%d:[%s]>", self.Length, self.Matchers)
}

View File

@ -12,11 +12,11 @@ type Single struct {
}
func (self Single) Match(s string) bool {
return utf8.RuneCountInString(s) == 1 && strings.IndexAny(s, self.Separators) == -1
return strings.IndexAny(s, self.Separators) == -1
}
func (self Single) Len() int {
return 1
return lenOne
}
func (self Single) Index(s string) (int, []int) {

View File

@ -23,7 +23,7 @@ func (self Suffix) Index(s string) (int, []int) {
}
func (self Suffix) Len() int {
return -1
return lenNo
}
func (self Suffix) Search(s string) (i int, l int, ok bool) {

View File

@ -12,7 +12,7 @@ func (self Super) Match(s string) bool {
}
func (self Super) Len() int {
return -1
return lenNo
}
func (self Super) Index(s string) (int, []int) {