forked from mirror/glob
optimizations with cached segments
This commit is contained in:
parent
71fc92e3a7
commit
92be27c14d
|
@ -18,7 +18,7 @@ func (self Any) Index(s string) (int, []int) {
|
|||
switch found {
|
||||
case -1:
|
||||
case 0:
|
||||
return 0, []int{0}
|
||||
return 0, segments0
|
||||
default:
|
||||
s = s[:found]
|
||||
}
|
||||
|
@ -27,7 +27,6 @@ func (self Any) Index(s string) (int, []int) {
|
|||
for i := range s {
|
||||
segments = append(segments, i)
|
||||
}
|
||||
|
||||
segments = append(segments, len(s))
|
||||
|
||||
return 0, segments
|
||||
|
|
|
@ -33,8 +33,8 @@ func TestAnyOfIndex(t *testing.T) {
|
|||
},
|
||||
{
|
||||
Matchers{
|
||||
List{[]rune("[def]"), false},
|
||||
List{[]rune("[abc]"), false},
|
||||
NewList([]rune("[def]"), false),
|
||||
NewList([]rune("[abc]"), false),
|
||||
},
|
||||
"abcdef",
|
||||
0,
|
||||
|
|
|
@ -31,8 +31,9 @@ func (self EveryOf) Index(s string) (int, []int) {
|
|||
|
||||
// make `in` with cap as len(s),
|
||||
// cause it is the maximum size of output segments values
|
||||
next := make([]int, 0, len(s))
|
||||
current := make([]int, 0, len(s))
|
||||
//todo opti!!!
|
||||
next := acquireSegments(len(s))
|
||||
current := acquireSegments(len(s))
|
||||
|
||||
sub := s
|
||||
for i, m := range self.Matchers {
|
||||
|
|
|
@ -11,6 +11,13 @@ type List struct {
|
|||
Not bool
|
||||
}
|
||||
|
||||
func NewList(list []rune, not bool) List {
|
||||
return List{
|
||||
List: list,
|
||||
Not: not,
|
||||
}
|
||||
}
|
||||
|
||||
func (self List) Match(s string) bool {
|
||||
r, w := utf8.DecodeRuneInString(s)
|
||||
if len(s) > w {
|
||||
|
@ -28,7 +35,7 @@ func (self List) Len() int {
|
|||
func (self List) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if self.Not == (runes.IndexRune(self.List, r) == -1) {
|
||||
return i, []int{utf8.RuneLen(r)}
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ func TestListIndex(t *testing.T) {
|
|||
[]int{1},
|
||||
},
|
||||
} {
|
||||
p := List{test.list, test.not}
|
||||
p := NewList(test.list, test.not)
|
||||
index, segments := p.Index(test.fixture)
|
||||
if index != test.index {
|
||||
t.Errorf("#%d unexpected index: exp: %d, act: %d", id, test.index, index)
|
||||
|
@ -40,7 +40,7 @@ func TestListIndex(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkIndexList(b *testing.B) {
|
||||
m := List{[]rune("def"), false}
|
||||
m := NewList([]rune("def"), false)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
m.Index(bench_pattern)
|
||||
|
@ -48,7 +48,7 @@ func BenchmarkIndexList(b *testing.B) {
|
|||
}
|
||||
|
||||
func BenchmarkIndexListParallel(b *testing.B) {
|
||||
m := List{[]rune("def"), false}
|
||||
m := NewList([]rune("def"), false)
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package match
|
||||
|
||||
// todo common table of rune's length
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
|
|
@ -3,6 +3,7 @@ package match
|
|||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var bench_separators = []rune{'.'}
|
||||
|
@ -62,3 +63,28 @@ func BenchmarkReverse(b *testing.B) {
|
|||
reverseSegments([]int{1, 2, 3, 4})
|
||||
}
|
||||
}
|
||||
|
||||
func getTable() []int {
|
||||
table := make([]int, utf8.MaxRune+1)
|
||||
for i := 0; i <= utf8.MaxRune; i++ {
|
||||
table[i] = utf8.RuneLen(rune(i))
|
||||
}
|
||||
|
||||
return table
|
||||
}
|
||||
|
||||
var table = getTable()
|
||||
|
||||
const runeToLen = 'q'
|
||||
|
||||
func BenchmarkRuneLenFromTable(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = table[runeToLen]
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRuneLenFromUTF8(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = utf8.RuneLen(runeToLen)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ func (self Nothing) Match(s string) bool {
|
|||
}
|
||||
|
||||
func (self Nothing) Index(s string) (int, []int) {
|
||||
return 0, []int{0}
|
||||
return 0, segments0
|
||||
}
|
||||
|
||||
func (self Nothing) Len() int {
|
||||
|
|
|
@ -10,9 +10,6 @@ type Range struct {
|
|||
Not bool
|
||||
}
|
||||
|
||||
// todo make factory
|
||||
// todo make range table inside factory
|
||||
|
||||
func (self Range) Len() int {
|
||||
return lenOne
|
||||
}
|
||||
|
@ -31,7 +28,7 @@ func (self Range) Match(s string) bool {
|
|||
func (self Range) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if self.Not != (r >= self.Lo && r <= self.Hi) {
|
||||
return i, []int{utf8.RuneLen(r)}
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
11
match/row.go
11
match/row.go
|
@ -7,6 +7,15 @@ import (
|
|||
type Row struct {
|
||||
Matchers Matchers
|
||||
RunesLength int
|
||||
Segments []int
|
||||
}
|
||||
|
||||
func NewRow(m Matchers, len int) Row {
|
||||
return Row{
|
||||
Matchers: m,
|
||||
RunesLength: len,
|
||||
Segments: []int{len},
|
||||
}
|
||||
}
|
||||
|
||||
func (self Row) matchAll(s string) bool {
|
||||
|
@ -66,7 +75,7 @@ func (self Row) Index(s string) (int, []int) {
|
|||
}
|
||||
|
||||
if self.matchAll(s[i:]) {
|
||||
return i, []int{self.RunesLength}
|
||||
return i, self.Segments
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,23 @@ const (
|
|||
cacheToAndHigherIndex = 1023
|
||||
)
|
||||
|
||||
var asciiTable [128]byte
|
||||
|
||||
var segmentsByRuneLength [5][]int
|
||||
|
||||
var (
|
||||
segments0 = []int{0}
|
||||
segments1 = []int{1}
|
||||
segments2 = []int{2}
|
||||
segments3 = []int{3}
|
||||
segments4 = []int{4}
|
||||
)
|
||||
|
||||
const (
|
||||
asciiLo = 0
|
||||
asciiHi = 127
|
||||
)
|
||||
|
||||
func init() {
|
||||
for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 {
|
||||
func(i int) {
|
||||
|
@ -33,6 +50,16 @@ func init() {
|
|||
}}
|
||||
}(i)
|
||||
}
|
||||
|
||||
segmentsByRuneLength[0] = segments0
|
||||
segmentsByRuneLength[1] = segments1
|
||||
segmentsByRuneLength[2] = segments2
|
||||
segmentsByRuneLength[3] = segments3
|
||||
segmentsByRuneLength[4] = segments4
|
||||
|
||||
for i := 0; i <= 127; i++ {
|
||||
asciiTable[i] = 1
|
||||
}
|
||||
}
|
||||
|
||||
func getTableIndex(c int) int {
|
||||
|
|
|
@ -27,7 +27,7 @@ func (self Single) Len() int {
|
|||
func (self Single) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if runes.IndexRune(self.Separators, r) == -1 {
|
||||
return i, []int{utf8.RuneLen(r)}
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ func (self Super) Len() int {
|
|||
}
|
||||
|
||||
func (self Super) Index(s string) (int, []int) {
|
||||
//todo acquire here
|
||||
segments := make([]int, 0, len(s)+1)
|
||||
for i := range s {
|
||||
segments = append(segments, i)
|
||||
|
|
|
@ -11,6 +11,7 @@ type Text struct {
|
|||
Str string
|
||||
RunesLength int
|
||||
BytesLength int
|
||||
Segments []int
|
||||
}
|
||||
|
||||
func NewText(s string) Text {
|
||||
|
@ -18,6 +19,7 @@ func NewText(s string) Text {
|
|||
Str: s,
|
||||
RunesLength: utf8.RuneCountInString(s),
|
||||
BytesLength: len(s),
|
||||
Segments: []int{len(s)},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,7 +37,7 @@ func (self Text) Index(s string) (int, []int) {
|
|||
return -1, nil
|
||||
}
|
||||
|
||||
return index, []int{self.BytesLength}
|
||||
return index, self.Segments
|
||||
}
|
||||
|
||||
func (self Text) String() string {
|
||||
|
|
|
@ -84,6 +84,32 @@ func Contains(s, needle []rune) bool {
|
|||
return Index(s, needle) >= 0
|
||||
}
|
||||
|
||||
func Max(s []rune) (max rune) {
|
||||
for _, r := range s {
|
||||
if r > max {
|
||||
max = r
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func Min(s []rune) rune {
|
||||
min := rune(-1)
|
||||
for _, r := range s {
|
||||
if min == -1 {
|
||||
min = r
|
||||
continue
|
||||
}
|
||||
|
||||
if r < min {
|
||||
min = r
|
||||
}
|
||||
}
|
||||
|
||||
return min
|
||||
}
|
||||
|
||||
func IndexRune(s []rune, r rune) int {
|
||||
for i, c := range s {
|
||||
if c == r {
|
||||
|
|
Loading…
Reference in New Issue