optimizations with cached segments

This commit is contained in:
gobwas 2016-02-23 14:46:20 +03:00
parent 71fc92e3a7
commit 92be27c14d
15 changed files with 115 additions and 18 deletions

View File

@ -18,7 +18,7 @@ func (self Any) Index(s string) (int, []int) {
switch found { switch found {
case -1: case -1:
case 0: case 0:
return 0, []int{0} return 0, segments0
default: default:
s = s[:found] s = s[:found]
} }
@ -27,7 +27,6 @@ func (self Any) Index(s string) (int, []int) {
for i := range s { for i := range s {
segments = append(segments, i) segments = append(segments, i)
} }
segments = append(segments, len(s)) segments = append(segments, len(s))
return 0, segments return 0, segments

View File

@ -33,8 +33,8 @@ func TestAnyOfIndex(t *testing.T) {
}, },
{ {
Matchers{ Matchers{
List{[]rune("[def]"), false}, NewList([]rune("[def]"), false),
List{[]rune("[abc]"), false}, NewList([]rune("[abc]"), false),
}, },
"abcdef", "abcdef",
0, 0,

View File

@ -31,8 +31,9 @@ func (self EveryOf) Index(s string) (int, []int) {
// make `in` with cap as len(s), // make `in` with cap as len(s),
// cause it is the maximum size of output segments values // cause it is the maximum size of output segments values
next := make([]int, 0, len(s)) //todo opti!!!
current := make([]int, 0, len(s)) next := acquireSegments(len(s))
current := acquireSegments(len(s))
sub := s sub := s
for i, m := range self.Matchers { for i, m := range self.Matchers {

View File

@ -11,6 +11,13 @@ type List struct {
Not bool Not bool
} }
func NewList(list []rune, not bool) List {
return List{
List: list,
Not: not,
}
}
func (self List) Match(s string) bool { func (self List) Match(s string) bool {
r, w := utf8.DecodeRuneInString(s) r, w := utf8.DecodeRuneInString(s)
if len(s) > w { if len(s) > w {
@ -28,7 +35,7 @@ func (self List) Len() int {
func (self List) Index(s string) (int, []int) { func (self List) Index(s string) (int, []int) {
for i, r := range s { for i, r := range s {
if self.Not == (runes.IndexRune(self.List, r) == -1) { if self.Not == (runes.IndexRune(self.List, r) == -1) {
return i, []int{utf8.RuneLen(r)} return i, segmentsByRuneLength[utf8.RuneLen(r)]
} }
} }

View File

@ -28,7 +28,7 @@ func TestListIndex(t *testing.T) {
[]int{1}, []int{1},
}, },
} { } {
p := List{test.list, test.not} p := NewList(test.list, test.not)
index, segments := p.Index(test.fixture) index, segments := p.Index(test.fixture)
if index != test.index { if index != test.index {
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index) t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
@ -40,7 +40,7 @@ func TestListIndex(t *testing.T) {
} }
func BenchmarkIndexList(b *testing.B) { func BenchmarkIndexList(b *testing.B) {
m := List{[]rune("def"), false} m := NewList([]rune("def"), false)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
m.Index(bench_pattern) m.Index(bench_pattern)
@ -48,7 +48,7 @@ func BenchmarkIndexList(b *testing.B) {
} }
func BenchmarkIndexListParallel(b *testing.B) { func BenchmarkIndexListParallel(b *testing.B) {
m := List{[]rune("def"), false} m := NewList([]rune("def"), false)
b.RunParallel(func(pb *testing.PB) { b.RunParallel(func(pb *testing.PB) {
for pb.Next() { for pb.Next() {

View File

@ -1,5 +1,7 @@
package match package match
// todo common table of rune's length
import ( import (
"fmt" "fmt"
"strings" "strings"

View File

@ -3,6 +3,7 @@ package match
import ( import (
"reflect" "reflect"
"testing" "testing"
"unicode/utf8"
) )
var bench_separators = []rune{'.'} var bench_separators = []rune{'.'}
@ -62,3 +63,28 @@ func BenchmarkReverse(b *testing.B) {
reverseSegments([]int{1, 2, 3, 4}) reverseSegments([]int{1, 2, 3, 4})
} }
} }
func getTable() []int {
table := make([]int, utf8.MaxRune+1)
for i := 0; i <= utf8.MaxRune; i++ {
table[i] = utf8.RuneLen(rune(i))
}
return table
}
var table = getTable()
const runeToLen = 'q'
func BenchmarkRuneLenFromTable(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = table[runeToLen]
}
}
func BenchmarkRuneLenFromUTF8(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = utf8.RuneLen(runeToLen)
}
}

View File

@ -11,7 +11,7 @@ func (self Nothing) Match(s string) bool {
} }
func (self Nothing) Index(s string) (int, []int) { func (self Nothing) Index(s string) (int, []int) {
return 0, []int{0} return 0, segments0
} }
func (self Nothing) Len() int { func (self Nothing) Len() int {

View File

@ -10,9 +10,6 @@ type Range struct {
Not bool Not bool
} }
// todo make factory
// todo make range table inside factory
func (self Range) Len() int { func (self Range) Len() int {
return lenOne return lenOne
} }
@ -31,7 +28,7 @@ func (self Range) Match(s string) bool {
func (self Range) Index(s string) (int, []int) { func (self Range) Index(s string) (int, []int) {
for i, r := range s { for i, r := range s {
if self.Not != (r >= self.Lo && r <= self.Hi) { if self.Not != (r >= self.Lo && r <= self.Hi) {
return i, []int{utf8.RuneLen(r)} return i, segmentsByRuneLength[utf8.RuneLen(r)]
} }
} }

View File

@ -7,6 +7,15 @@ import (
type Row struct { type Row struct {
Matchers Matchers Matchers Matchers
RunesLength int RunesLength int
Segments []int
}
func NewRow(m Matchers, len int) Row {
return Row{
Matchers: m,
RunesLength: len,
Segments: []int{len},
}
} }
func (self Row) matchAll(s string) bool { func (self Row) matchAll(s string) bool {
@ -66,7 +75,7 @@ func (self Row) Index(s string) (int, []int) {
} }
if self.matchAll(s[i:]) { if self.matchAll(s[i:]) {
return i, []int{self.RunesLength} return i, self.Segments
} }
} }

View File

@ -25,6 +25,23 @@ const (
cacheToAndHigherIndex = 1023 cacheToAndHigherIndex = 1023
) )
var asciiTable [128]byte
var segmentsByRuneLength [5][]int
var (
segments0 = []int{0}
segments1 = []int{1}
segments2 = []int{2}
segments3 = []int{3}
segments4 = []int{4}
)
const (
asciiLo = 0
asciiHi = 127
)
func init() { func init() {
for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 { for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 {
func(i int) { func(i int) {
@ -33,6 +50,16 @@ func init() {
}} }}
}(i) }(i)
} }
segmentsByRuneLength[0] = segments0
segmentsByRuneLength[1] = segments1
segmentsByRuneLength[2] = segments2
segmentsByRuneLength[3] = segments3
segmentsByRuneLength[4] = segments4
for i := 0; i <= 127; i++ {
asciiTable[i] = 1
}
} }
func getTableIndex(c int) int { func getTableIndex(c int) int {

View File

@ -27,7 +27,7 @@ func (self Single) Len() int {
func (self Single) Index(s string) (int, []int) { func (self Single) Index(s string) (int, []int) {
for i, r := range s { for i, r := range s {
if runes.IndexRune(self.Separators, r) == -1 { if runes.IndexRune(self.Separators, r) == -1 {
return i, []int{utf8.RuneLen(r)} return i, segmentsByRuneLength[utf8.RuneLen(r)]
} }
} }

View File

@ -15,6 +15,7 @@ func (self Super) Len() int {
} }
func (self Super) Index(s string) (int, []int) { func (self Super) Index(s string) (int, []int) {
//todo acquire here
segments := make([]int, 0, len(s)+1) segments := make([]int, 0, len(s)+1)
for i := range s { for i := range s {
segments = append(segments, i) segments = append(segments, i)

View File

@ -11,6 +11,7 @@ type Text struct {
Str string Str string
RunesLength int RunesLength int
BytesLength int BytesLength int
Segments []int
} }
func NewText(s string) Text { func NewText(s string) Text {
@ -18,6 +19,7 @@ func NewText(s string) Text {
Str: s, Str: s,
RunesLength: utf8.RuneCountInString(s), RunesLength: utf8.RuneCountInString(s),
BytesLength: len(s), BytesLength: len(s),
Segments: []int{len(s)},
} }
} }
@ -35,7 +37,7 @@ func (self Text) Index(s string) (int, []int) {
return -1, nil return -1, nil
} }
return index, []int{self.BytesLength} return index, self.Segments
} }
func (self Text) String() string { func (self Text) String() string {

View File

@ -84,6 +84,32 @@ func Contains(s, needle []rune) bool {
return Index(s, needle) >= 0 return Index(s, needle) >= 0
} }
func Max(s []rune) (max rune) {
for _, r := range s {
if r > max {
max = r
}
}
return
}
func Min(s []rune) rune {
min := rune(-1)
for _, r := range s {
if min == -1 {
min = r
continue
}
if r < min {
min = r
}
}
return min
}
func IndexRune(s []rune, r rune) int { func IndexRune(s []rune, r rune) int {
for i, c := range s { for i, c := range s {
if c == r { if c == r {