added JSON indexes

This commit is contained in:
Josh Baker 2016-08-02 17:24:56 -07:00
parent c93aa91259
commit 93806b18dc
3 changed files with 301 additions and 117 deletions

View File

@ -32,12 +32,13 @@ Features
- In-memory database for [fast reads and writes](#performance) - In-memory database for [fast reads and writes](#performance)
- Embeddable with a [simple API](https://godoc.org/github.com/tidwall/buntdb) - Embeddable with a [simple API](https://godoc.org/github.com/tidwall/buntdb)
- [Spatial indexing](#spatial-indexes) for up to 20 dimensions; Useful for Geospatial data - [Spatial indexing](#spatial-indexes) for up to 20 dimensions; Useful for Geospatial data
- Index fields inside [JSON](#json-indexes) documents
- Create [custom indexes](#custom-indexes) for any data type - Create [custom indexes](#custom-indexes) for any data type
- [Built-in types](#built-in-types) that are easy to get up & running; String, Uint, Int, Float - [Built-in types](#built-in-types) that are easy to get up & running; String, Uint, Int, Float
- Flexible [iteration](#iterating) of data; ascending, descending, and ranges - Flexible [iteration](#iterating) of data; ascending, descending, and ranges
- [Durable append-only file](#append-only-file) format for persistence. - [Durable append-only file](#append-only-file) format for persistence.
- Option to evict old items with an [expiration](#data-expiration) TTL - Option to evict old items with an [expiration](#data-expiration) TTL
- Tight codebase, under 1K loc using the `cloc` command - Tight codebase, ~1K loc using the `cloc` command
- ACID semantics with locking [transactions](#transactions) that support rollbacks - ACID semantics with locking [transactions](#transactions) that support rollbacks
Getting Started Getting Started
@ -260,7 +261,7 @@ user:1:name 49
user:4:name 63 user:4:name 63
``` ```
### Spatial Indexes ## Spatial Indexes
BuntDB has support for spatial indexes by storing rectangles in an [R-tree](https://en.wikipedia.org/wiki/R-tree). An R-tree is organized in a similar manner as a [B-tree](https://en.wikipedia.org/wiki/B-tree), and both are balanced trees. But, an R-tree is special because it can operate on data that is in multiple dimensions. This is super handy for Geospatial applications. BuntDB has support for spatial indexes by storing rectangles in an [R-tree](https://en.wikipedia.org/wiki/R-tree). An R-tree is organized in a similar manner as a [B-tree](https://en.wikipedia.org/wiki/B-tree), and both are balanced trees. But, an R-tree is special because it can operate on data that is in multiple dimensions. This is super handy for Geospatial applications.
To create a spatial index use the `CreateSpatialIndex` function: To create a spatial index use the `CreateSpatialIndex` function:
@ -296,7 +297,7 @@ db.View(func(tx *buntdb.Tx) error {
This will get all three positions. This will get all three positions.
#### Spatial bracket syntax ### Spatial bracket syntax
The bracket syntax `[-117 30],[-112 36]` is unique to BuntDB, and it's how the built-in rectangles are processed. But, you are not limited to this syntax. Whatever Rect function you choose to use during `CreateSpatialIndex` will be used to process the parameter, in this case it's `IndexRect`. The bracket syntax `[-117 30],[-112 36]` is unique to BuntDB, and it's how the built-in rectangles are processed. But, you are not limited to this syntax. Whatever Rect function you choose to use during `CreateSpatialIndex` will be used to process the parameter, in this case it's `IndexRect`.
@ -346,9 +347,62 @@ Which will return:
``` ```
## JSON Indexes
Indexes can be created on individual fields inside JSON documents.
For example:
```go
package main
import (
"fmt"
"github.com/tidwall/buntdb"
)
func main() {
db, _ := buntdb.Open(":memory:")
db.CreateIndex("last_name", "*", buntdb.IndexJSON("name.last"))
db.CreateIndex("age", "*", buntdb.IndexJSON("age"))
db.Update(func(tx *buntdb.Tx) error {
tx.Set("1", `{"name":{"first":"Tom","last":"Johnson"},"age":38}`, nil)
tx.Set("2", `{"name":{"first":"Janet","last":"Prichard"},"age":47}`, nil)
tx.Set("3", `{"name":{"first":"Carol","last":"Anderson"},"age":52}`, nil)
tx.Set("4", `{"name":{"first":"Alan","last":"Cooper"},"age":28}`, nil)
return nil
})
db.View(func(tx *buntdb.Tx) error {
fmt.Println("Order by last name")
tx.Ascend("last_name", func(key, value string) bool {
fmt.Printf("%s: %s\n", key, value)
return true
})
fmt.Println("Order by age")
tx.Ascend("age", func(key, value string) bool {
fmt.Printf("%s: %s\n", key, value)
return true
})
return nil
})
}
```
Results:
```
Order by last name
3: {"name":{"first":"Carol","last":"Anderson"},"age":52}
4: {"name":{"first":"Alan","last":"Cooper"},"age":28}
1: {"name":{"first":"Tom","last":"Johnson"},"age":38}
2: {"name":{"first":"Janet","last":"Prichard"},"age":47}
Order by age
4: {"name":{"first":"Alan","last":"Cooper"},"age":28}
1: {"name":{"first":"Tom","last":"Johnson"},"age":38}
2: {"name":{"first":"Janet","last":"Prichard"},"age":47}
3: {"name":{"first":"Carol","last":"Anderson"},"age":52}
```
### Data Expiration ### Data Expiration

View File

@ -7,6 +7,7 @@ package buntdb
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"encoding/json"
"errors" "errors"
"io" "io"
"os" "os"
@ -530,6 +531,7 @@ func (db *DB) Shrink() error {
return err return err
} }
db.mu.Unlock() db.mu.Unlock()
time.Sleep(time.Second / 4) // wait just a bit before starting
f, err := os.Create(tmpname) f, err := os.Create(tmpname)
if err != nil { if err != nil {
return err return err
@ -1530,3 +1532,64 @@ func IndexFloat(a, b string) bool {
ib, _ := strconv.ParseFloat(b, 64) ib, _ := strconv.ParseFloat(b, 64)
return ia < ib return ia < ib
} }
// IndexJSON provides for the ability to create an index on any JSON field.
// When the field is a string, the comparison will be case-insensitive.
// It returns a helper function used by CreateIndex.
func IndexJSON(path string) func(a, b string) bool {
return jsonIndex(path, false)
}
// IndexJSONCaseSensitive provides for the ability to create an index on
// any JSON field.
// When the field is a string, the comparison will be case-sensitive.
// It returns a helper function used by CreateIndex.
func IndexJSONCaseSensitive(path string) func(a, b string) bool {
return jsonIndex(path, true)
}
func jsonIndex(path string, cs bool) func(a, b string) bool {
return func(a, b string) bool {
var am, bm map[string]interface{}
_ = json.Unmarshal([]byte(a), &am)
_ = json.Unmarshal([]byte(b), &bm)
parts := strings.Split(path, ".")
for i, part := range parts {
if am == nil {
if bm == nil {
return false
}
return true
} else if bm == nil {
return false
}
if i < len(parts)-1 {
am, _ = am[part].(map[string]interface{})
bm, _ = bm[part].(map[string]interface{})
continue
}
a, b := am[part], bm[part]
switch av := a.(type) {
case bool:
if bv, ok := b.(bool); ok {
return !av && bv
}
case float64:
if bv, ok := b.(float64); ok {
return av < bv
}
case string:
if bv, ok := b.(string); ok {
if cs {
return av < bv
}
return strings.ToLower(av) < strings.ToLower(bv)
}
case nil:
return b != nil
}
return false
}
return false
}
}

View File

@ -10,20 +10,39 @@ import (
"os" "os"
"strconv" "strconv"
"strings" "strings"
"sync"
"testing" "testing"
"time" "time"
) )
func TestBackgroudOperations(t *testing.T) { func testOpen(t testing.TB) *DB {
if err := os.RemoveAll("data.db"); err != nil { if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err) t.Fatal(err)
} }
return testReOpen(t, nil)
}
func testReOpen(t testing.TB, db *DB) *DB {
if db != nil {
if err := db.Close(); err != nil {
t.Fatal(err)
}
}
db, err := Open("data.db") db, err := Open("data.db")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
defer func() { _ = os.RemoveAll("data.db") }() return db
defer func() { _ = db.Close() }() }
func testClose(db *DB) {
_ = db.Close()
_ = os.RemoveAll("data.db")
}
func TestBackgroudOperations(t *testing.T) {
db := testOpen(t)
defer testClose(db)
for i := 0; i < 1000; i++ { for i := 0; i < 1000; i++ {
if err := db.Update(func(tx *Tx) error { if err := db.Update(func(tx *Tx) error {
for j := 0; j < 200; j++ { for j := 0; j < 200; j++ {
@ -40,7 +59,7 @@ func TestBackgroudOperations(t *testing.T) {
} }
} }
n := 0 n := 0
err = db.View(func(tx *Tx) error { err := db.View(func(tx *Tx) error {
var err error var err error
n, err = tx.Len() n, err = tx.Len()
return err return err
@ -52,15 +71,8 @@ func TestBackgroudOperations(t *testing.T) {
t.Fatalf("expecting '%v', got '%v'", 201, n) t.Fatalf("expecting '%v', got '%v'", 201, n)
} }
time.Sleep(time.Millisecond * 1500) time.Sleep(time.Millisecond * 1500)
if err := db.Close(); err != nil { db = testReOpen(t, db)
t.Fatal(err) defer testClose(db)
}
db, err = Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
n = 0 n = 0
err = db.View(func(tx *Tx) error { err = db.View(func(tx *Tx) error {
var err error var err error
@ -75,15 +87,8 @@ func TestBackgroudOperations(t *testing.T) {
} }
} }
func TestVariousTx(t *testing.T) { func TestVariousTx(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
if err := db.Update(func(tx *Tx) error { if err := db.Update(func(tx *Tx) error {
_, _, err := tx.Set("hello", "planet", nil) _, _, err := tx.Set("hello", "planet", nil)
return err return err
@ -98,7 +103,7 @@ func TestVariousTx(t *testing.T) {
t.Fatalf("did not correctly receive the user-defined transaction error.") t.Fatalf("did not correctly receive the user-defined transaction error.")
} }
var val string var val string
err = db.View(func(tx *Tx) error { err := db.View(func(tx *Tx) error {
var err error var err error
val, err = tx.Get("hello") val, err = tx.Get("hello")
return err return err
@ -419,17 +424,10 @@ func TestNoExpiringItem(t *testing.T) {
} }
} }
func TestAutoShrink(t *testing.T) { func TestAutoShrink(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
for i := 0; i < 1000; i++ { for i := 0; i < 1000; i++ {
err = db.Update(func(tx *Tx) error { err := db.Update(func(tx *Tx) error {
for i := 0; i < 20; i++ { for i := 0; i < 20; i++ {
if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil { if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil {
return err return err
@ -441,16 +439,11 @@ func TestAutoShrink(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
} }
if err := db.Close(); err != nil { db = testReOpen(t, db)
t.Fatal(err) defer testClose(db)
}
db, err = Open("data.db")
if err != nil {
t.Fatal(err)
}
db.config.AutoShrinkMinSize = 64 * 1024 // 64K db.config.AutoShrinkMinSize = 64 * 1024 // 64K
for i := 0; i < 2000; i++ { for i := 0; i < 2000; i++ {
err = db.Update(func(tx *Tx) error { err := db.Update(func(tx *Tx) error {
for i := 0; i < 20; i++ { for i := 0; i < 20; i++ {
if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil { if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil {
return err return err
@ -463,14 +456,9 @@ func TestAutoShrink(t *testing.T) {
} }
} }
time.Sleep(time.Second * 3) time.Sleep(time.Second * 3)
if err := db.Close(); err != nil { db = testReOpen(t, db)
t.Fatal(err) defer testClose(db)
} err := db.View(func(tx *Tx) error {
db, err = Open("data.db")
if err != nil {
t.Fatal(err)
}
err = db.View(func(tx *Tx) error {
n, err := tx.Len() n, err := tx.Len()
if err != nil { if err != nil {
return err return err
@ -501,12 +489,8 @@ func TestDatabaseFormat(t *testing.T) {
if err := ioutil.WriteFile("data.db", []byte(resp), 0666); err != nil { if err := ioutil.WriteFile("data.db", []byte(resp), 0666); err != nil {
t.Fatal(err) t.Fatal(err)
} }
db, err := Open("data.db") db := testOpen(t)
if err != nil { defer testClose(db)
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
}() }()
testBadFormat := func(resp string) { testBadFormat := func(resp string) {
if err := os.RemoveAll("data.db"); err != nil { if err := os.RemoveAll("data.db"); err != nil {
@ -544,15 +528,8 @@ func TestDatabaseFormat(t *testing.T) {
} }
func TestInsertsAndDeleted(t *testing.T) { func TestInsertsAndDeleted(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
if err := db.CreateIndex("any", "*", IndexString); err != nil { if err := db.CreateIndex("any", "*", IndexString); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -700,15 +677,8 @@ func TestOpeningClosedDatabase(t *testing.T) {
// test shrinking a database. // test shrinking a database.
func TestShrink(t *testing.T) { func TestShrink(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
if err := db.Shrink(); err != nil { if err := db.Shrink(); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -820,17 +790,10 @@ func TestShrink(t *testing.T) {
} }
func TestVariousIndexOperations(t *testing.T) { func TestVariousIndexOperations(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
// test creating an index with no index name. // test creating an index with no index name.
err = db.CreateIndex("", "", nil) err := db.CreateIndex("", "", nil)
if err == nil { if err == nil {
t.Fatal("should not be able to create an index with no name") t.Fatal("should not be able to create an index with no name")
} }
@ -939,15 +902,8 @@ func TestPatternMatching(t *testing.T) {
func TestBasic(t *testing.T) { func TestBasic(t *testing.T) {
rand.Seed(time.Now().UnixNano()) rand.Seed(time.Now().UnixNano())
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
// create a simple index // create a simple index
if err := db.CreateIndex("users", "fun:user:*", IndexString); err != nil { if err := db.CreateIndex("users", "fun:user:*", IndexString); err != nil {
@ -1019,8 +975,8 @@ func TestBasic(t *testing.T) {
} }
// verify the data has been created // verify the data has been created
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
err = db.View(func(tx *Tx) error { err := db.View(func(tx *Tx) error {
err = tx.Ascend("users", func(key, val string) bool { err := tx.Ascend("users", func(key, val string) bool {
fmt.Fprintf(buf, "%s %s\n", key, val) fmt.Fprintf(buf, "%s %s\n", key, val)
return true return true
}) })
@ -1151,16 +1107,9 @@ func TestRectStrings(t *testing.T) {
} }
func TestTTL(t *testing.T) { func TestTTL(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
} err := db.Update(func(tx *Tx) error {
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
err = db.Update(func(tx *Tx) error {
if _, _, err := tx.Set("key1", "val1", &SetOptions{Expires: true, TTL: time.Second}); err != nil { if _, _, err := tx.Set("key1", "val1", &SetOptions{Expires: true, TTL: time.Second}); err != nil {
return err return err
} }
@ -1195,17 +1144,10 @@ func TestTTL(t *testing.T) {
} }
func TestConfig(t *testing.T) { func TestConfig(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil { db := testOpen(t)
t.Fatal(err) defer testClose(db)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
err = db.SetConfig(Config{SyncPolicy: SyncPolicy(-1)}) err := db.SetConfig(Config{SyncPolicy: SyncPolicy(-1)})
if err == nil { if err == nil {
t.Fatal("expecting a config syncpolicy error") t.Fatal("expecting a config syncpolicy error")
} }
@ -1477,3 +1419,128 @@ func Benchmark_Spatial_2D(t *testing.B) {
} }
*/ */
func TestCoverCloseAlreadyClosed(t *testing.T) {
db := testOpen(t)
defer testClose(db)
_ = db.file.Close()
if err := db.Close(); err == nil {
t.Fatal("expecting an error")
}
}
func TestCoverConfigClosed(t *testing.T) {
db := testOpen(t)
defer testClose(db)
_ = db.Close()
var config Config
if err := db.ReadConfig(&config); err != ErrDatabaseClosed {
t.Fatal("expecting database closed error")
}
if err := db.SetConfig(config); err != ErrDatabaseClosed {
t.Fatal("expecting database closed error")
}
}
func TestCoverShrinkShrink(t *testing.T) {
db := testOpen(t)
defer testClose(db)
if err := db.Update(func(tx *Tx) error {
for i := 0; i < 10000; i++ {
_, _, err := tx.Set(fmt.Sprintf("%d", i), fmt.Sprintf("%d", i), nil)
if err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatal(err)
}
if err := db.Update(func(tx *Tx) error {
for i := 250; i < 250+100; i++ {
_, err := tx.Delete(fmt.Sprintf("%d", i))
if err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatal(err)
}
var err1, err2 error
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
err1 = db.Shrink()
}()
go func() {
defer wg.Done()
err2 = db.Shrink()
}()
wg.Wait()
//println(123)
//fmt.Printf("%v\n%v\n", err1, err2)
if err1 != ErrShrinkInProcess && err2 != ErrShrinkInProcess {
t.Fatal("expecting a shrink in process error")
}
db = testReOpen(t, db)
defer testClose(db)
if err := db.View(func(tx *Tx) error {
n, err := tx.Len()
if err != nil {
return err
}
if n != 9900 {
t.Fatal("expecting 9900 items")
}
return nil
}); err != nil {
t.Fatal(err)
}
}
func TestJSONIndex(t *testing.T) {
db := testOpen(t)
defer testClose(db)
_ = db.CreateIndex("last_name", "*", IndexJSON("name.last"))
_ = db.CreateIndex("last_name_cs", "*", IndexJSONCaseSensitive("name.last"))
_ = db.CreateIndex("age", "*", IndexJSON("age"))
_ = db.CreateIndex("student", "*", IndexJSON("student"))
_ = db.Update(func(tx *Tx) error {
_, _, _ = tx.Set("1", `{"name":{"first":"Tom","last":"Johnson"},"age":38,"student":false}`, nil)
_, _, _ = tx.Set("2", `{"name":{"first":"Janet","last":"Prichard"},"age":47,"student":true}`, nil)
_, _, _ = tx.Set("3", `{"name":{"first":"Carol","last":"Anderson"},"age":52,"student":true}`, nil)
_, _, _ = tx.Set("4", `{"name":{"first":"Alan","last":"Cooper"},"age":28,"student":false}`, nil)
_, _, _ = tx.Set("5", `{"name":{"first":"bill","last":"frank"},"age":21,"student":true}`, nil)
_, _, _ = tx.Set("6", `{"name":{"first":"sally","last":"randall"},"age":68,"student":false}`, nil)
return nil
})
var keys []string
_ = db.View(func(tx *Tx) error {
_ = tx.Ascend("last_name_cs", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
_ = tx.Ascend("last_name", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
_ = tx.Ascend("age", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
_ = tx.Ascend("student", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
return nil
})
expect := "3,4,1,2,5,6,3,4,5,1,2,6,5,4,1,2,3,6,1,4,6,2,3,5"
if strings.Join(keys, ",") != expect {
t.Fatalf("expected %v, got %v", expect, strings.Join(keys, ","))
}
}