added JSON indexes

This commit is contained in:
Josh Baker 2016-08-02 17:24:56 -07:00
parent c93aa91259
commit 93806b18dc
3 changed files with 301 additions and 117 deletions

View File

@ -32,12 +32,13 @@ Features
- In-memory database for [fast reads and writes](#performance)
- Embeddable with a [simple API](https://godoc.org/github.com/tidwall/buntdb)
- [Spatial indexing](#spatial-indexes) for up to 20 dimensions; Useful for Geospatial data
- Index fields inside [JSON](#json-indexes) documents
- Create [custom indexes](#custom-indexes) for any data type
- [Built-in types](#built-in-types) that are easy to get up & running; String, Uint, Int, Float
- Flexible [iteration](#iterating) of data; ascending, descending, and ranges
- [Durable append-only file](#append-only-file) format for persistence.
- Option to evict old items with an [expiration](#data-expiration) TTL
- Tight codebase, under 1K loc using the `cloc` command
- Tight codebase, ~1K loc using the `cloc` command
- ACID semantics with locking [transactions](#transactions) that support rollbacks
Getting Started
@ -260,7 +261,7 @@ user:1:name 49
user:4:name 63
```
### Spatial Indexes
## Spatial Indexes
BuntDB has support for spatial indexes by storing rectangles in an [R-tree](https://en.wikipedia.org/wiki/R-tree). An R-tree is organized in a similar manner as a [B-tree](https://en.wikipedia.org/wiki/B-tree), and both are balanced trees. But, an R-tree is special because it can operate on data that is in multiple dimensions. This is super handy for Geospatial applications.
To create a spatial index use the `CreateSpatialIndex` function:
@ -296,7 +297,7 @@ db.View(func(tx *buntdb.Tx) error {
This will get all three positions.
#### Spatial bracket syntax
### Spatial bracket syntax
The bracket syntax `[-117 30],[-112 36]` is unique to BuntDB, and it's how the built-in rectangles are processed. But, you are not limited to this syntax. Whatever Rect function you choose to use during `CreateSpatialIndex` will be used to process the parameter, in this case it's `IndexRect`.
@ -346,9 +347,62 @@ Which will return:
```
## JSON Indexes
Indexes can be created on individual fields inside JSON documents.
For example:
```go
package main
import (
"fmt"
"github.com/tidwall/buntdb"
)
func main() {
db, _ := buntdb.Open(":memory:")
db.CreateIndex("last_name", "*", buntdb.IndexJSON("name.last"))
db.CreateIndex("age", "*", buntdb.IndexJSON("age"))
db.Update(func(tx *buntdb.Tx) error {
tx.Set("1", `{"name":{"first":"Tom","last":"Johnson"},"age":38}`, nil)
tx.Set("2", `{"name":{"first":"Janet","last":"Prichard"},"age":47}`, nil)
tx.Set("3", `{"name":{"first":"Carol","last":"Anderson"},"age":52}`, nil)
tx.Set("4", `{"name":{"first":"Alan","last":"Cooper"},"age":28}`, nil)
return nil
})
db.View(func(tx *buntdb.Tx) error {
fmt.Println("Order by last name")
tx.Ascend("last_name", func(key, value string) bool {
fmt.Printf("%s: %s\n", key, value)
return true
})
fmt.Println("Order by age")
tx.Ascend("age", func(key, value string) bool {
fmt.Printf("%s: %s\n", key, value)
return true
})
return nil
})
}
```
Results:
```
Order by last name
3: {"name":{"first":"Carol","last":"Anderson"},"age":52}
4: {"name":{"first":"Alan","last":"Cooper"},"age":28}
1: {"name":{"first":"Tom","last":"Johnson"},"age":38}
2: {"name":{"first":"Janet","last":"Prichard"},"age":47}
Order by age
4: {"name":{"first":"Alan","last":"Cooper"},"age":28}
1: {"name":{"first":"Tom","last":"Johnson"},"age":38}
2: {"name":{"first":"Janet","last":"Prichard"},"age":47}
3: {"name":{"first":"Carol","last":"Anderson"},"age":52}
```
### Data Expiration

View File

@ -7,6 +7,7 @@ package buntdb
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"io"
"os"
@ -530,6 +531,7 @@ func (db *DB) Shrink() error {
return err
}
db.mu.Unlock()
time.Sleep(time.Second / 4) // wait just a bit before starting
f, err := os.Create(tmpname)
if err != nil {
return err
@ -1530,3 +1532,64 @@ func IndexFloat(a, b string) bool {
ib, _ := strconv.ParseFloat(b, 64)
return ia < ib
}
// IndexJSON provides for the ability to create an index on any JSON field.
// When the field is a string, the comparison will be case-insensitive.
// It returns a helper function used by CreateIndex.
func IndexJSON(path string) func(a, b string) bool {
return jsonIndex(path, false)
}
// IndexJSONCaseSensitive provides for the ability to create an index on
// any JSON field.
// When the field is a string, the comparison will be case-sensitive.
// It returns a helper function used by CreateIndex.
func IndexJSONCaseSensitive(path string) func(a, b string) bool {
return jsonIndex(path, true)
}
func jsonIndex(path string, cs bool) func(a, b string) bool {
return func(a, b string) bool {
var am, bm map[string]interface{}
_ = json.Unmarshal([]byte(a), &am)
_ = json.Unmarshal([]byte(b), &bm)
parts := strings.Split(path, ".")
for i, part := range parts {
if am == nil {
if bm == nil {
return false
}
return true
} else if bm == nil {
return false
}
if i < len(parts)-1 {
am, _ = am[part].(map[string]interface{})
bm, _ = bm[part].(map[string]interface{})
continue
}
a, b := am[part], bm[part]
switch av := a.(type) {
case bool:
if bv, ok := b.(bool); ok {
return !av && bv
}
case float64:
if bv, ok := b.(float64); ok {
return av < bv
}
case string:
if bv, ok := b.(string); ok {
if cs {
return av < bv
}
return strings.ToLower(av) < strings.ToLower(bv)
}
case nil:
return b != nil
}
return false
}
return false
}
}

View File

@ -10,20 +10,39 @@ import (
"os"
"strconv"
"strings"
"sync"
"testing"
"time"
)
func TestBackgroudOperations(t *testing.T) {
func testOpen(t testing.TB) *DB {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
return testReOpen(t, nil)
}
func testReOpen(t testing.TB, db *DB) *DB {
if db != nil {
if err := db.Close(); err != nil {
t.Fatal(err)
}
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
return db
}
func testClose(db *DB) {
_ = db.Close()
_ = os.RemoveAll("data.db")
}
func TestBackgroudOperations(t *testing.T) {
db := testOpen(t)
defer testClose(db)
for i := 0; i < 1000; i++ {
if err := db.Update(func(tx *Tx) error {
for j := 0; j < 200; j++ {
@ -40,7 +59,7 @@ func TestBackgroudOperations(t *testing.T) {
}
}
n := 0
err = db.View(func(tx *Tx) error {
err := db.View(func(tx *Tx) error {
var err error
n, err = tx.Len()
return err
@ -52,15 +71,8 @@ func TestBackgroudOperations(t *testing.T) {
t.Fatalf("expecting '%v', got '%v'", 201, n)
}
time.Sleep(time.Millisecond * 1500)
if err := db.Close(); err != nil {
t.Fatal(err)
}
db, err = Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db = testReOpen(t, db)
defer testClose(db)
n = 0
err = db.View(func(tx *Tx) error {
var err error
@ -75,15 +87,8 @@ func TestBackgroudOperations(t *testing.T) {
}
}
func TestVariousTx(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
if err := db.Update(func(tx *Tx) error {
_, _, err := tx.Set("hello", "planet", nil)
return err
@ -98,7 +103,7 @@ func TestVariousTx(t *testing.T) {
t.Fatalf("did not correctly receive the user-defined transaction error.")
}
var val string
err = db.View(func(tx *Tx) error {
err := db.View(func(tx *Tx) error {
var err error
val, err = tx.Get("hello")
return err
@ -419,17 +424,10 @@ func TestNoExpiringItem(t *testing.T) {
}
}
func TestAutoShrink(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
for i := 0; i < 1000; i++ {
err = db.Update(func(tx *Tx) error {
err := db.Update(func(tx *Tx) error {
for i := 0; i < 20; i++ {
if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil {
return err
@ -441,16 +439,11 @@ func TestAutoShrink(t *testing.T) {
t.Fatal(err)
}
}
if err := db.Close(); err != nil {
t.Fatal(err)
}
db, err = Open("data.db")
if err != nil {
t.Fatal(err)
}
db = testReOpen(t, db)
defer testClose(db)
db.config.AutoShrinkMinSize = 64 * 1024 // 64K
for i := 0; i < 2000; i++ {
err = db.Update(func(tx *Tx) error {
err := db.Update(func(tx *Tx) error {
for i := 0; i < 20; i++ {
if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil {
return err
@ -463,14 +456,9 @@ func TestAutoShrink(t *testing.T) {
}
}
time.Sleep(time.Second * 3)
if err := db.Close(); err != nil {
t.Fatal(err)
}
db, err = Open("data.db")
if err != nil {
t.Fatal(err)
}
err = db.View(func(tx *Tx) error {
db = testReOpen(t, db)
defer testClose(db)
err := db.View(func(tx *Tx) error {
n, err := tx.Len()
if err != nil {
return err
@ -501,12 +489,8 @@ func TestDatabaseFormat(t *testing.T) {
if err := ioutil.WriteFile("data.db", []byte(resp), 0666); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
}()
testBadFormat := func(resp string) {
if err := os.RemoveAll("data.db"); err != nil {
@ -544,15 +528,8 @@ func TestDatabaseFormat(t *testing.T) {
}
func TestInsertsAndDeleted(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
if err := db.CreateIndex("any", "*", IndexString); err != nil {
t.Fatal(err)
}
@ -700,15 +677,8 @@ func TestOpeningClosedDatabase(t *testing.T) {
// test shrinking a database.
func TestShrink(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
if err := db.Shrink(); err != nil {
t.Fatal(err)
}
@ -820,17 +790,10 @@ func TestShrink(t *testing.T) {
}
func TestVariousIndexOperations(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
// test creating an index with no index name.
err = db.CreateIndex("", "", nil)
err := db.CreateIndex("", "", nil)
if err == nil {
t.Fatal("should not be able to create an index with no name")
}
@ -939,15 +902,8 @@ func TestPatternMatching(t *testing.T) {
func TestBasic(t *testing.T) {
rand.Seed(time.Now().UnixNano())
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
// create a simple index
if err := db.CreateIndex("users", "fun:user:*", IndexString); err != nil {
@ -1019,8 +975,8 @@ func TestBasic(t *testing.T) {
}
// verify the data has been created
buf := &bytes.Buffer{}
err = db.View(func(tx *Tx) error {
err = tx.Ascend("users", func(key, val string) bool {
err := db.View(func(tx *Tx) error {
err := tx.Ascend("users", func(key, val string) bool {
fmt.Fprintf(buf, "%s %s\n", key, val)
return true
})
@ -1151,16 +1107,9 @@ func TestRectStrings(t *testing.T) {
}
func TestTTL(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
err = db.Update(func(tx *Tx) error {
db := testOpen(t)
defer testClose(db)
err := db.Update(func(tx *Tx) error {
if _, _, err := tx.Set("key1", "val1", &SetOptions{Expires: true, TTL: time.Second}); err != nil {
return err
}
@ -1195,17 +1144,10 @@ func TestTTL(t *testing.T) {
}
func TestConfig(t *testing.T) {
if err := os.RemoveAll("data.db"); err != nil {
t.Fatal(err)
}
db, err := Open("data.db")
if err != nil {
t.Fatal(err)
}
defer func() { _ = os.RemoveAll("data.db") }()
defer func() { _ = db.Close() }()
db := testOpen(t)
defer testClose(db)
err = db.SetConfig(Config{SyncPolicy: SyncPolicy(-1)})
err := db.SetConfig(Config{SyncPolicy: SyncPolicy(-1)})
if err == nil {
t.Fatal("expecting a config syncpolicy error")
}
@ -1477,3 +1419,128 @@ func Benchmark_Spatial_2D(t *testing.B) {
}
*/
func TestCoverCloseAlreadyClosed(t *testing.T) {
db := testOpen(t)
defer testClose(db)
_ = db.file.Close()
if err := db.Close(); err == nil {
t.Fatal("expecting an error")
}
}
func TestCoverConfigClosed(t *testing.T) {
db := testOpen(t)
defer testClose(db)
_ = db.Close()
var config Config
if err := db.ReadConfig(&config); err != ErrDatabaseClosed {
t.Fatal("expecting database closed error")
}
if err := db.SetConfig(config); err != ErrDatabaseClosed {
t.Fatal("expecting database closed error")
}
}
func TestCoverShrinkShrink(t *testing.T) {
db := testOpen(t)
defer testClose(db)
if err := db.Update(func(tx *Tx) error {
for i := 0; i < 10000; i++ {
_, _, err := tx.Set(fmt.Sprintf("%d", i), fmt.Sprintf("%d", i), nil)
if err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatal(err)
}
if err := db.Update(func(tx *Tx) error {
for i := 250; i < 250+100; i++ {
_, err := tx.Delete(fmt.Sprintf("%d", i))
if err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatal(err)
}
var err1, err2 error
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
err1 = db.Shrink()
}()
go func() {
defer wg.Done()
err2 = db.Shrink()
}()
wg.Wait()
//println(123)
//fmt.Printf("%v\n%v\n", err1, err2)
if err1 != ErrShrinkInProcess && err2 != ErrShrinkInProcess {
t.Fatal("expecting a shrink in process error")
}
db = testReOpen(t, db)
defer testClose(db)
if err := db.View(func(tx *Tx) error {
n, err := tx.Len()
if err != nil {
return err
}
if n != 9900 {
t.Fatal("expecting 9900 items")
}
return nil
}); err != nil {
t.Fatal(err)
}
}
func TestJSONIndex(t *testing.T) {
db := testOpen(t)
defer testClose(db)
_ = db.CreateIndex("last_name", "*", IndexJSON("name.last"))
_ = db.CreateIndex("last_name_cs", "*", IndexJSONCaseSensitive("name.last"))
_ = db.CreateIndex("age", "*", IndexJSON("age"))
_ = db.CreateIndex("student", "*", IndexJSON("student"))
_ = db.Update(func(tx *Tx) error {
_, _, _ = tx.Set("1", `{"name":{"first":"Tom","last":"Johnson"},"age":38,"student":false}`, nil)
_, _, _ = tx.Set("2", `{"name":{"first":"Janet","last":"Prichard"},"age":47,"student":true}`, nil)
_, _, _ = tx.Set("3", `{"name":{"first":"Carol","last":"Anderson"},"age":52,"student":true}`, nil)
_, _, _ = tx.Set("4", `{"name":{"first":"Alan","last":"Cooper"},"age":28,"student":false}`, nil)
_, _, _ = tx.Set("5", `{"name":{"first":"bill","last":"frank"},"age":21,"student":true}`, nil)
_, _, _ = tx.Set("6", `{"name":{"first":"sally","last":"randall"},"age":68,"student":false}`, nil)
return nil
})
var keys []string
_ = db.View(func(tx *Tx) error {
_ = tx.Ascend("last_name_cs", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
_ = tx.Ascend("last_name", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
_ = tx.Ascend("age", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
_ = tx.Ascend("student", func(key, value string) bool {
//fmt.Printf("%s: %s\n", key, value)
keys = append(keys, key)
return true
})
return nil
})
expect := "3,4,1,2,5,6,3,4,5,1,2,6,5,4,1,2,3,6,1,4,6,2,3,5"
if strings.Join(keys, ",") != expect {
t.Fatalf("expected %v, got %v", expect, strings.Join(keys, ","))
}
}