tile38/internal/server/scanner.go

586 lines
13 KiB
Go
Raw Normal View History

package server
2016-03-05 02:08:16 +03:00
import (
"bytes"
2022-04-22 01:00:17 +03:00
"encoding/base64"
2016-03-05 02:08:16 +03:00
"errors"
2017-08-10 23:31:36 +03:00
"math"
2016-03-05 02:08:16 +03:00
"strconv"
2016-04-02 17:20:30 +03:00
"sync"
2016-03-05 02:08:16 +03:00
"github.com/mmcloughlin/geohash"
"github.com/tidwall/geojson"
2022-04-22 01:00:17 +03:00
"github.com/tidwall/mvt"
2016-03-29 00:16:21 +03:00
"github.com/tidwall/resp"
"github.com/tidwall/tile38/internal/clip"
"github.com/tidwall/tile38/internal/collection"
"github.com/tidwall/tile38/internal/glob"
2016-03-05 02:08:16 +03:00
)
const limitItems = 100
type outputT int
const (
outputUnknown outputT = iota
outputIDs
outputObjects
outputCount
outputPoints
outputHashes
outputBounds
)
type scanWriter struct {
2016-04-02 17:20:30 +03:00
mu sync.Mutex
s *Server
2016-03-05 02:08:16 +03:00
wr *bytes.Buffer
2022-08-31 02:50:19 +03:00
key string
msg *Message
2016-03-05 02:08:16 +03:00
col *collection.Collection
fmap map[string]int
farr []string
fvals []float64
output outputT
wheres []whereT
2017-08-23 23:13:12 +03:00
whereins []whereinT
whereevals []whereevalT
2018-11-01 08:00:09 +03:00
numberIters uint64
2016-03-05 02:08:16 +03:00
numberItems uint64
nofields bool
cursor uint64
2016-03-05 02:08:16 +03:00
limit uint64
hitLimit bool
once bool
count uint64
precision uint64
globs []string
2016-03-05 02:08:16 +03:00
globEverything bool
fullFields bool
2016-03-29 00:16:21 +03:00
values []resp.Value
2022-04-22 01:00:17 +03:00
mvtObjs []geojson.Object
2016-07-13 06:11:02 +03:00
matchValues bool
2022-04-22 01:00:17 +03:00
mvt bool
2017-10-05 18:20:40 +03:00
respOut resp.Value
2022-08-31 02:50:19 +03:00
orgWheres []whereT
orgWhereins []whereinT
2016-03-05 02:08:16 +03:00
}
// ScanWriterParams ...
2017-01-10 19:49:48 +03:00
type ScanWriterParams struct {
2021-07-11 20:09:51 +03:00
id string
o geojson.Object
fields []float64
distance float64
distOutput bool // query or fence requested distance output
noLock bool
noTest bool
2021-07-11 20:09:51 +03:00
ignoreGlobMatch bool
clip geojson.Object
skipTesting bool
2017-01-10 19:49:48 +03:00
}
func (s *Server) newScanWriter(
wr *bytes.Buffer, msg *Message, key string, output outputT,
precision uint64, globs []string, matchValues bool,
2021-12-09 19:24:26 +03:00
cursor, limit uint64, wheres []whereT, whereins []whereinT,
2022-04-22 01:00:17 +03:00
whereevals []whereevalT, nofields, mvt bool,
2016-03-05 02:08:16 +03:00
) (
*scanWriter, error,
) {
switch output {
default:
return nil, errors.New("invalid output type")
case outputIDs, outputObjects, outputCount, outputBounds, outputPoints, outputHashes:
}
2017-08-10 23:31:36 +03:00
if limit == 0 {
if output == outputCount {
limit = math.MaxUint64
} else {
limit = limitItems
}
}
2016-03-05 02:08:16 +03:00
sw := &scanWriter{
s: s,
2016-07-13 06:11:02 +03:00
wr: wr,
2022-08-31 02:50:19 +03:00
key: key,
2016-07-13 06:11:02 +03:00
msg: msg,
2022-04-22 01:00:17 +03:00
mvt: mvt,
globs: globs,
2016-07-13 07:51:01 +03:00
limit: limit,
2021-12-09 19:24:26 +03:00
cursor: cursor,
2016-07-13 07:51:01 +03:00
output: output,
2016-07-13 06:11:02 +03:00
nofields: nofields,
2016-07-13 07:51:01 +03:00
precision: precision,
2021-12-09 19:24:26 +03:00
whereevals: whereevals,
2016-07-13 06:11:02 +03:00
matchValues: matchValues,
2016-03-05 02:08:16 +03:00
}
if len(globs) == 0 || (len(globs) == 1 && globs[0] == "*") {
2016-03-05 02:08:16 +03:00
sw.globEverything = true
}
2022-08-31 02:50:19 +03:00
sw.orgWheres = wheres
sw.orgWhereins = whereins
sw.loadWheres()
return sw, nil
}
func (sw *scanWriter) loadWheres() {
sw.fmap = nil
sw.farr = nil
sw.wheres = nil
sw.whereins = nil
sw.fvals = nil
sw.col, _ = sw.s.cols.Get(sw.key)
2016-03-05 02:08:16 +03:00
if sw.col != nil {
sw.fmap = sw.col.FieldMap()
sw.farr = sw.col.FieldArr()
// This fills index value in wheres/whereins
// so we don't have to map string field names for each tested object
var ok bool
2022-08-31 02:50:19 +03:00
if len(sw.orgWheres) > 0 {
sw.wheres = make([]whereT, len(sw.orgWheres))
for i, where := range sw.orgWheres {
2020-04-09 19:59:24 +03:00
if where.index, ok = sw.fmap[where.field]; !ok {
where.index = math.MaxInt32
2020-04-09 19:30:38 +03:00
}
sw.wheres[i] = where
}
}
2022-08-31 02:50:19 +03:00
if len(sw.orgWhereins) > 0 {
sw.whereins = make([]whereinT, len(sw.orgWhereins))
for i, wherein := range sw.orgWhereins {
2020-04-09 19:59:24 +03:00
if wherein.index, ok = sw.fmap[wherein.field]; !ok {
wherein.index = math.MaxInt32
2020-04-09 19:30:38 +03:00
}
sw.whereins[i] = wherein
}
}
2022-08-31 02:50:19 +03:00
if len(sw.farr) > 0 {
sw.fvals = make([]float64, len(sw.farr))
}
2016-03-05 02:08:16 +03:00
}
2022-08-31 02:50:19 +03:00
2016-03-05 02:08:16 +03:00
}
func (sw *scanWriter) hasFieldsOutput() bool {
switch sw.output {
default:
return false
case outputObjects, outputPoints, outputHashes, outputBounds:
return !sw.nofields
}
}
func (sw *scanWriter) writeHead() {
2016-04-02 17:20:30 +03:00
sw.mu.Lock()
defer sw.mu.Unlock()
2022-04-22 01:00:17 +03:00
if sw.mvt {
sw.wr.WriteString(`,"mvt":"`)
} else {
switch sw.msg.OutputType {
case JSON:
if len(sw.farr) > 0 && sw.hasFieldsOutput() {
sw.wr.WriteString(`,"fields":[`)
for i, field := range sw.farr {
if i > 0 {
sw.wr.WriteByte(',')
}
sw.wr.WriteString(jsonString(field))
2016-03-29 00:16:21 +03:00
}
2022-04-22 01:00:17 +03:00
sw.wr.WriteByte(']')
2016-03-05 02:08:16 +03:00
}
2022-04-22 01:00:17 +03:00
switch sw.output {
case outputIDs:
sw.wr.WriteString(`,"ids":[`)
case outputObjects:
sw.wr.WriteString(`,"objects":[`)
case outputPoints:
sw.wr.WriteString(`,"points":[`)
case outputBounds:
sw.wr.WriteString(`,"bounds":[`)
case outputHashes:
sw.wr.WriteString(`,"hashes":[`)
case outputCount:
2016-03-05 02:08:16 +03:00
2022-04-22 01:00:17 +03:00
}
case RESP:
2016-03-29 00:16:21 +03:00
}
2016-03-05 02:08:16 +03:00
}
}
2022-04-22 01:00:17 +03:00
func (sw *scanWriter) compileMVT() []byte {
var tile mvt.Tile
l := tile.AddLayer("default")
l.SetExtent(4096)
for _, g := range sw.mvtObjs {
_ = g
f := l.AddFeature(mvt.Polygon)
// f.MoveTo(128, 96)
// f.LineTo(148, 128)
// f.LineTo(108, 128)
// f.LineTo(128, 96)
f.ClosePath()
}
// println(sw.mvtObjs)
return tile.Render()
}
func (sw *scanWriter) writeFoot() {
2016-04-02 17:20:30 +03:00
sw.mu.Lock()
defer sw.mu.Unlock()
2018-11-01 08:00:09 +03:00
cursor := sw.numberIters
2016-03-05 02:08:16 +03:00
if !sw.hitLimit {
cursor = 0
}
2022-04-22 01:00:17 +03:00
var mvtTile []byte
if sw.mvt {
mvtTile = sw.compileMVT()
}
2016-03-29 00:16:21 +03:00
switch sw.msg.OutputType {
case JSON:
2022-04-22 01:00:17 +03:00
if sw.mvt {
sw.wr.WriteString(base64.RawStdEncoding.EncodeToString(mvtTile))
sw.wr.WriteByte('"')
} else {
switch sw.output {
default:
sw.wr.WriteByte(']')
case outputCount:
}
2016-03-29 00:16:21 +03:00
}
sw.wr.WriteString(`,"count":` + strconv.FormatUint(sw.count, 10))
sw.wr.WriteString(`,"cursor":` + strconv.FormatUint(cursor, 10))
case RESP:
2016-05-24 00:21:18 +03:00
if sw.output == outputCount {
2017-10-05 18:20:40 +03:00
sw.respOut = resp.IntegerValue(int(sw.count))
2016-05-24 00:21:18 +03:00
} else {
2022-04-22 01:00:17 +03:00
values := []resp.Value{resp.IntegerValue(int(cursor))}
if sw.mvt {
values = append(values, resp.BytesValue(mvtTile))
} else {
values = append(values, resp.ArrayValue(sw.values))
2016-05-24 00:21:18 +03:00
}
2017-10-05 18:20:40 +03:00
sw.respOut = resp.ArrayValue(values)
2016-03-29 00:16:21 +03:00
}
2016-03-05 02:08:16 +03:00
}
}
func extractZCoordinate(o geojson.Object) float64 {
for {
switch g := o.(type) {
case *geojson.Point:
return g.Z()
case *geojson.Feature:
o = g.Base()
default:
return 0
}
}
}
func (sw *scanWriter) fieldMatch(fields []float64, o geojson.Object) (fvals []float64, match bool) {
2016-03-05 02:08:16 +03:00
var z float64
var gotz bool
fvals = sw.fvals
2016-03-05 02:08:16 +03:00
if !sw.hasFieldsOutput() || sw.fullFields {
for _, where := range sw.wheres {
if where.field == "z" {
if !gotz {
z = extractZCoordinate(o)
2016-03-05 02:08:16 +03:00
}
if !where.match(z) {
return
2016-03-05 02:08:16 +03:00
}
continue
}
var value float64
2020-04-09 19:59:24 +03:00
if where.index < len(fields) {
value = fields[where.index]
2016-03-05 02:08:16 +03:00
}
if !where.match(value) {
return
2016-03-05 02:08:16 +03:00
}
}
2017-08-23 23:13:12 +03:00
for _, wherein := range sw.whereins {
var value float64
2020-04-09 19:59:24 +03:00
if wherein.index < len(fields) {
value = fields[wherein.index]
2017-08-23 23:13:12 +03:00
}
if !wherein.match(value) {
return
}
}
for _, whereval := range sw.whereevals {
fieldsWithNames := make(map[string]float64)
for field, idx := range sw.fmap {
if idx < len(fields) {
fieldsWithNames[field] = fields[idx]
} else {
fieldsWithNames[field] = 0
}
}
if !whereval.match(fieldsWithNames) {
return
2017-08-23 23:13:12 +03:00
}
}
2016-03-05 02:08:16 +03:00
} else {
copy(sw.fvals, fields)
// fields might be shorter for this item, need to pad sw.fvals with zeros
for i := len(fields); i < len(sw.fvals); i++ {
sw.fvals[i] = 0
2016-03-05 02:08:16 +03:00
}
for _, where := range sw.wheres {
if where.field == "z" {
if !gotz {
z = extractZCoordinate(o)
2016-03-05 02:08:16 +03:00
}
if !where.match(z) {
return
2016-03-05 02:08:16 +03:00
}
continue
}
var value float64
if where.index < len(sw.fvals) {
value = sw.fvals[where.index]
2016-03-05 02:08:16 +03:00
}
if !where.match(value) {
return
2016-03-05 02:08:16 +03:00
}
}
2017-08-23 23:13:12 +03:00
for _, wherein := range sw.whereins {
var value float64
if wherein.index < len(sw.fvals) {
value = sw.fvals[wherein.index]
2017-08-23 23:13:12 +03:00
}
if !wherein.match(value) {
return
}
}
for _, whereval := range sw.whereevals {
fieldsWithNames := make(map[string]float64)
for field, idx := range sw.fmap {
if idx < len(fields) {
fieldsWithNames[field] = fields[idx]
} else {
fieldsWithNames[field] = 0
}
}
if !whereval.match(fieldsWithNames) {
return
2017-08-23 23:13:12 +03:00
}
}
2016-03-05 02:08:16 +03:00
}
match = true
return
2016-03-05 02:08:16 +03:00
}
func (sw *scanWriter) globMatch(id string, o geojson.Object) (ok, keepGoing bool) {
if sw.globEverything {
return true, true
}
var val string
if sw.matchValues {
val = o.String()
} else {
val = id
}
for _, pattern := range sw.globs {
ok, _ := glob.Match(pattern, val)
if ok {
return true, true
}
}
return false, true
}
2018-11-01 08:00:09 +03:00
// Increment cursor
2018-11-02 16:09:56 +03:00
func (sw *scanWriter) Offset() uint64 {
return sw.cursor
}
func (sw *scanWriter) Step(n uint64) {
2018-11-01 08:00:09 +03:00
sw.numberIters += n
}
// ok is whether the object passes the test and should be written
// keepGoing is whether there could be more objects to test
func (sw *scanWriter) testObject(id string, o geojson.Object, fields []float64) (
ok, keepGoing bool, fieldVals []float64) {
match, kg := sw.globMatch(id, o)
if !match {
return false, kg, fieldVals
}
nf, ok := sw.fieldMatch(fields, o)
2018-11-02 16:09:56 +03:00
return ok, true, nf
}
2022-09-13 02:02:23 +03:00
// id string, o geojson.Object, fields []float64, noLock bool
2017-01-10 19:49:48 +03:00
func (sw *scanWriter) writeObject(opts ScanWriterParams) bool {
if !opts.noLock {
2016-04-03 00:13:20 +03:00
sw.mu.Lock()
defer sw.mu.Unlock()
}
keepGoing := true
if !opts.noTest {
var ok bool
ok, keepGoing, _ = sw.testObject(opts.id, opts.o, opts.fields)
if !ok {
return keepGoing
}
2016-03-05 02:08:16 +03:00
}
sw.count++
if sw.output == outputCount {
2017-07-26 06:23:21 +03:00
return sw.count < sw.limit
2016-03-05 02:08:16 +03:00
}
if opts.clip != nil {
opts.o = clip.Clip(opts.o, opts.clip, &sw.s.geomIndexOpts)
2018-05-08 02:18:18 +03:00
}
2022-04-22 01:00:17 +03:00
if sw.mvt {
sw.mvtObjs = append(sw.mvtObjs, opts.o)
} else {
switch sw.msg.OutputType {
case JSON:
var wr bytes.Buffer
var jsfields string
if sw.once {
wr.WriteByte(',')
} else {
sw.once = true
}
if sw.hasFieldsOutput() {
if sw.fullFields {
if len(sw.fmap) > 0 {
jsfields = `,"fields":{`
var i int
for field, idx := range sw.fmap {
if len(opts.fields) > idx {
if opts.fields[idx] != 0 {
if i > 0 {
jsfields += `,`
}
jsfields += jsonString(field) + ":" + strconv.FormatFloat(opts.fields[idx], 'f', -1, 64)
i++
2016-03-29 00:16:21 +03:00
}
2016-03-05 02:08:16 +03:00
}
}
2022-04-22 01:00:17 +03:00
jsfields += `}`
2016-03-05 02:08:16 +03:00
}
2016-03-29 00:16:21 +03:00
2022-04-22 01:00:17 +03:00
} else if len(sw.farr) > 0 {
jsfields = `,"fields":[`
for i, name := range sw.farr {
if i > 0 {
jsfields += `,`
}
j := sw.fmap[name]
if j < len(opts.fields) {
jsfields += strconv.FormatFloat(opts.fields[j], 'f', -1, 64)
} else {
jsfields += "0"
}
2016-03-29 00:16:21 +03:00
}
2022-04-22 01:00:17 +03:00
jsfields += `]`
2016-03-29 00:16:21 +03:00
}
2016-03-05 02:08:16 +03:00
}
2022-04-22 01:00:17 +03:00
if sw.output == outputIDs {
wr.WriteString(jsonString(opts.id))
} else {
wr.WriteString(`{"id":` + jsonString(opts.id))
switch sw.output {
case outputObjects:
wr.WriteString(`,"object":` + string(opts.o.AppendJSON(nil)))
case outputPoints:
wr.WriteString(`,"point":` + string(appendJSONSimplePoint(nil, opts.o)))
case outputHashes:
center := opts.o.Center()
p := geohash.EncodeWithPrecision(center.Y, center.X, uint(sw.precision))
wr.WriteString(`,"hash":"` + p + `"`)
case outputBounds:
wr.WriteString(`,"bounds":` + string(appendJSONSimpleBounds(nil, opts.o)))
}
2017-01-10 19:49:48 +03:00
2022-04-22 01:00:17 +03:00
wr.WriteString(jsfields)
2017-01-10 19:49:48 +03:00
2022-04-22 01:00:17 +03:00
if opts.distOutput || opts.distance > 0 {
wr.WriteString(`,"distance":` + strconv.FormatFloat(opts.distance, 'f', -1, 64))
}
2017-01-10 19:49:48 +03:00
2022-04-22 01:00:17 +03:00
wr.WriteString(`}`)
}
sw.wr.Write(wr.Bytes())
case RESP:
vals := make([]resp.Value, 1, 3)
vals[0] = resp.StringValue(opts.id)
if sw.output == outputIDs {
sw.values = append(sw.values, vals[0])
} else {
switch sw.output {
case outputObjects:
vals = append(vals, resp.StringValue(opts.o.String()))
case outputPoints:
point := opts.o.Center()
z := extractZCoordinate(opts.o)
if z != 0 {
vals = append(vals, resp.ArrayValue([]resp.Value{
resp.FloatValue(point.Y),
resp.FloatValue(point.X),
resp.FloatValue(z),
}))
} else {
vals = append(vals, resp.ArrayValue([]resp.Value{
resp.FloatValue(point.Y),
resp.FloatValue(point.X),
}))
}
case outputHashes:
center := opts.o.Center()
p := geohash.EncodeWithPrecision(center.Y, center.X, uint(sw.precision))
vals = append(vals, resp.StringValue(p))
case outputBounds:
bbox := opts.o.Rect()
2016-03-29 00:16:21 +03:00
vals = append(vals, resp.ArrayValue([]resp.Value{
2022-04-22 01:00:17 +03:00
resp.ArrayValue([]resp.Value{
resp.FloatValue(bbox.Min.Y),
resp.FloatValue(bbox.Min.X),
}),
resp.ArrayValue([]resp.Value{
resp.FloatValue(bbox.Max.Y),
resp.FloatValue(bbox.Max.X),
}),
2016-03-29 00:16:21 +03:00
}))
}
2022-04-22 01:00:17 +03:00
if sw.hasFieldsOutput() {
fvs := orderFields(sw.fmap, sw.farr, opts.fields)
if len(fvs) > 0 {
fvals := make([]resp.Value, 0, len(fvs)*2)
for i, fv := range fvs {
fvals = append(fvals, resp.StringValue(fv.field), resp.StringValue(strconv.FormatFloat(fv.value, 'f', -1, 64)))
i++
}
vals = append(vals, resp.ArrayValue(fvals))
}
2016-03-29 00:16:21 +03:00
}
2022-04-22 01:00:17 +03:00
if opts.distOutput || opts.distance > 0 {
vals = append(vals, resp.FloatValue(opts.distance))
}
2017-01-10 19:49:48 +03:00
2022-04-22 01:00:17 +03:00
sw.values = append(sw.values, resp.ArrayValue(vals))
}
2016-03-05 02:08:16 +03:00
}
}
sw.numberItems++
if sw.numberItems == sw.limit {
sw.hitLimit = true
return false
}
return keepGoing
}