From 93806b18dcc254843dec835a89a1900f9dd271fa Mon Sep 17 00:00:00 2001 From: Josh Baker Date: Tue, 2 Aug 2016 17:24:56 -0700 Subject: [PATCH] added JSON indexes --- README.md | 60 +++++++++- buntdb.go | 63 +++++++++++ buntdb_test.go | 295 ++++++++++++++++++++++++++++++------------------- 3 files changed, 301 insertions(+), 117 deletions(-) diff --git a/README.md b/README.md index e10cf81..ebf660a 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,13 @@ Features - In-memory database for [fast reads and writes](#performance) - Embeddable with a [simple API](https://godoc.org/github.com/tidwall/buntdb) - [Spatial indexing](#spatial-indexes) for up to 20 dimensions; Useful for Geospatial data +- Index fields inside [JSON](#json-indexes) documents - Create [custom indexes](#custom-indexes) for any data type - [Built-in types](#built-in-types) that are easy to get up & running; String, Uint, Int, Float - Flexible [iteration](#iterating) of data; ascending, descending, and ranges - [Durable append-only file](#append-only-file) format for persistence. - Option to evict old items with an [expiration](#data-expiration) TTL -- Tight codebase, under 1K loc using the `cloc` command +- Tight codebase, ~1K loc using the `cloc` command - ACID semantics with locking [transactions](#transactions) that support rollbacks Getting Started @@ -260,7 +261,7 @@ user:1:name 49 user:4:name 63 ``` -### Spatial Indexes +## Spatial Indexes BuntDB has support for spatial indexes by storing rectangles in an [R-tree](https://en.wikipedia.org/wiki/R-tree). An R-tree is organized in a similar manner as a [B-tree](https://en.wikipedia.org/wiki/B-tree), and both are balanced trees. But, an R-tree is special because it can operate on data that is in multiple dimensions. This is super handy for Geospatial applications. To create a spatial index use the `CreateSpatialIndex` function: @@ -296,7 +297,7 @@ db.View(func(tx *buntdb.Tx) error { This will get all three positions. -#### Spatial bracket syntax +### Spatial bracket syntax The bracket syntax `[-117 30],[-112 36]` is unique to BuntDB, and it's how the built-in rectangles are processed. But, you are not limited to this syntax. Whatever Rect function you choose to use during `CreateSpatialIndex` will be used to process the parameter, in this case it's `IndexRect`. @@ -346,9 +347,62 @@ Which will return: ``` +## JSON Indexes +Indexes can be created on individual fields inside JSON documents. +For example: +```go +package main +import ( + "fmt" + + "github.com/tidwall/buntdb" +) + +func main() { + db, _ := buntdb.Open(":memory:") + db.CreateIndex("last_name", "*", buntdb.IndexJSON("name.last")) + db.CreateIndex("age", "*", buntdb.IndexJSON("age")) + db.Update(func(tx *buntdb.Tx) error { + tx.Set("1", `{"name":{"first":"Tom","last":"Johnson"},"age":38}`, nil) + tx.Set("2", `{"name":{"first":"Janet","last":"Prichard"},"age":47}`, nil) + tx.Set("3", `{"name":{"first":"Carol","last":"Anderson"},"age":52}`, nil) + tx.Set("4", `{"name":{"first":"Alan","last":"Cooper"},"age":28}`, nil) + return nil + }) + db.View(func(tx *buntdb.Tx) error { + fmt.Println("Order by last name") + tx.Ascend("last_name", func(key, value string) bool { + fmt.Printf("%s: %s\n", key, value) + return true + }) + fmt.Println("Order by age") + tx.Ascend("age", func(key, value string) bool { + fmt.Printf("%s: %s\n", key, value) + return true + }) + return nil + }) +} +``` + +Results: + +``` +Order by last name +3: {"name":{"first":"Carol","last":"Anderson"},"age":52} +4: {"name":{"first":"Alan","last":"Cooper"},"age":28} +1: {"name":{"first":"Tom","last":"Johnson"},"age":38} +2: {"name":{"first":"Janet","last":"Prichard"},"age":47} + +Order by age +4: {"name":{"first":"Alan","last":"Cooper"},"age":28} +1: {"name":{"first":"Tom","last":"Johnson"},"age":38} +2: {"name":{"first":"Janet","last":"Prichard"},"age":47} +3: {"name":{"first":"Carol","last":"Anderson"},"age":52} +``` ### Data Expiration diff --git a/buntdb.go b/buntdb.go index d9eeb82..a53bf94 100644 --- a/buntdb.go +++ b/buntdb.go @@ -7,6 +7,7 @@ package buntdb import ( "bufio" "bytes" + "encoding/json" "errors" "io" "os" @@ -530,6 +531,7 @@ func (db *DB) Shrink() error { return err } db.mu.Unlock() + time.Sleep(time.Second / 4) // wait just a bit before starting f, err := os.Create(tmpname) if err != nil { return err @@ -1530,3 +1532,64 @@ func IndexFloat(a, b string) bool { ib, _ := strconv.ParseFloat(b, 64) return ia < ib } + +// IndexJSON provides for the ability to create an index on any JSON field. +// When the field is a string, the comparison will be case-insensitive. +// It returns a helper function used by CreateIndex. +func IndexJSON(path string) func(a, b string) bool { + return jsonIndex(path, false) +} + +// IndexJSONCaseSensitive provides for the ability to create an index on +// any JSON field. +// When the field is a string, the comparison will be case-sensitive. +// It returns a helper function used by CreateIndex. +func IndexJSONCaseSensitive(path string) func(a, b string) bool { + return jsonIndex(path, true) +} + +func jsonIndex(path string, cs bool) func(a, b string) bool { + return func(a, b string) bool { + var am, bm map[string]interface{} + _ = json.Unmarshal([]byte(a), &am) + _ = json.Unmarshal([]byte(b), &bm) + parts := strings.Split(path, ".") + for i, part := range parts { + if am == nil { + if bm == nil { + return false + } + return true + } else if bm == nil { + return false + } + if i < len(parts)-1 { + am, _ = am[part].(map[string]interface{}) + bm, _ = bm[part].(map[string]interface{}) + continue + } + a, b := am[part], bm[part] + switch av := a.(type) { + case bool: + if bv, ok := b.(bool); ok { + return !av && bv + } + case float64: + if bv, ok := b.(float64); ok { + return av < bv + } + case string: + if bv, ok := b.(string); ok { + if cs { + return av < bv + } + return strings.ToLower(av) < strings.ToLower(bv) + } + case nil: + return b != nil + } + return false + } + return false + } +} diff --git a/buntdb_test.go b/buntdb_test.go index 7a782b5..6a8532f 100644 --- a/buntdb_test.go +++ b/buntdb_test.go @@ -10,20 +10,39 @@ import ( "os" "strconv" "strings" + "sync" "testing" "time" ) -func TestBackgroudOperations(t *testing.T) { +func testOpen(t testing.TB) *DB { if err := os.RemoveAll("data.db"); err != nil { t.Fatal(err) } + return testReOpen(t, nil) +} + +func testReOpen(t testing.TB, db *DB) *DB { + if db != nil { + if err := db.Close(); err != nil { + t.Fatal(err) + } + } db, err := Open("data.db") if err != nil { t.Fatal(err) } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + return db +} + +func testClose(db *DB) { + _ = db.Close() + _ = os.RemoveAll("data.db") +} + +func TestBackgroudOperations(t *testing.T) { + db := testOpen(t) + defer testClose(db) for i := 0; i < 1000; i++ { if err := db.Update(func(tx *Tx) error { for j := 0; j < 200; j++ { @@ -40,7 +59,7 @@ func TestBackgroudOperations(t *testing.T) { } } n := 0 - err = db.View(func(tx *Tx) error { + err := db.View(func(tx *Tx) error { var err error n, err = tx.Len() return err @@ -52,15 +71,8 @@ func TestBackgroudOperations(t *testing.T) { t.Fatalf("expecting '%v', got '%v'", 201, n) } time.Sleep(time.Millisecond * 1500) - if err := db.Close(); err != nil { - t.Fatal(err) - } - db, err = Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db = testReOpen(t, db) + defer testClose(db) n = 0 err = db.View(func(tx *Tx) error { var err error @@ -75,15 +87,8 @@ func TestBackgroudOperations(t *testing.T) { } } func TestVariousTx(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) if err := db.Update(func(tx *Tx) error { _, _, err := tx.Set("hello", "planet", nil) return err @@ -98,7 +103,7 @@ func TestVariousTx(t *testing.T) { t.Fatalf("did not correctly receive the user-defined transaction error.") } var val string - err = db.View(func(tx *Tx) error { + err := db.View(func(tx *Tx) error { var err error val, err = tx.Get("hello") return err @@ -419,17 +424,10 @@ func TestNoExpiringItem(t *testing.T) { } } func TestAutoShrink(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) for i := 0; i < 1000; i++ { - err = db.Update(func(tx *Tx) error { + err := db.Update(func(tx *Tx) error { for i := 0; i < 20; i++ { if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil { return err @@ -441,16 +439,11 @@ func TestAutoShrink(t *testing.T) { t.Fatal(err) } } - if err := db.Close(); err != nil { - t.Fatal(err) - } - db, err = Open("data.db") - if err != nil { - t.Fatal(err) - } + db = testReOpen(t, db) + defer testClose(db) db.config.AutoShrinkMinSize = 64 * 1024 // 64K for i := 0; i < 2000; i++ { - err = db.Update(func(tx *Tx) error { + err := db.Update(func(tx *Tx) error { for i := 0; i < 20; i++ { if _, _, err := tx.Set(fmt.Sprintf("HELLO:%d", i), "WORLD", nil); err != nil { return err @@ -463,14 +456,9 @@ func TestAutoShrink(t *testing.T) { } } time.Sleep(time.Second * 3) - if err := db.Close(); err != nil { - t.Fatal(err) - } - db, err = Open("data.db") - if err != nil { - t.Fatal(err) - } - err = db.View(func(tx *Tx) error { + db = testReOpen(t, db) + defer testClose(db) + err := db.View(func(tx *Tx) error { n, err := tx.Len() if err != nil { return err @@ -501,12 +489,8 @@ func TestDatabaseFormat(t *testing.T) { if err := ioutil.WriteFile("data.db", []byte(resp), 0666); err != nil { t.Fatal(err) } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) }() testBadFormat := func(resp string) { if err := os.RemoveAll("data.db"); err != nil { @@ -544,15 +528,8 @@ func TestDatabaseFormat(t *testing.T) { } func TestInsertsAndDeleted(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) if err := db.CreateIndex("any", "*", IndexString); err != nil { t.Fatal(err) } @@ -700,15 +677,8 @@ func TestOpeningClosedDatabase(t *testing.T) { // test shrinking a database. func TestShrink(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) if err := db.Shrink(); err != nil { t.Fatal(err) } @@ -820,17 +790,10 @@ func TestShrink(t *testing.T) { } func TestVariousIndexOperations(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) // test creating an index with no index name. - err = db.CreateIndex("", "", nil) + err := db.CreateIndex("", "", nil) if err == nil { t.Fatal("should not be able to create an index with no name") } @@ -939,15 +902,8 @@ func TestPatternMatching(t *testing.T) { func TestBasic(t *testing.T) { rand.Seed(time.Now().UnixNano()) - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) // create a simple index if err := db.CreateIndex("users", "fun:user:*", IndexString); err != nil { @@ -1019,8 +975,8 @@ func TestBasic(t *testing.T) { } // verify the data has been created buf := &bytes.Buffer{} - err = db.View(func(tx *Tx) error { - err = tx.Ascend("users", func(key, val string) bool { + err := db.View(func(tx *Tx) error { + err := tx.Ascend("users", func(key, val string) bool { fmt.Fprintf(buf, "%s %s\n", key, val) return true }) @@ -1151,16 +1107,9 @@ func TestRectStrings(t *testing.T) { } func TestTTL(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() - err = db.Update(func(tx *Tx) error { + db := testOpen(t) + defer testClose(db) + err := db.Update(func(tx *Tx) error { if _, _, err := tx.Set("key1", "val1", &SetOptions{Expires: true, TTL: time.Second}); err != nil { return err } @@ -1195,17 +1144,10 @@ func TestTTL(t *testing.T) { } func TestConfig(t *testing.T) { - if err := os.RemoveAll("data.db"); err != nil { - t.Fatal(err) - } - db, err := Open("data.db") - if err != nil { - t.Fatal(err) - } - defer func() { _ = os.RemoveAll("data.db") }() - defer func() { _ = db.Close() }() + db := testOpen(t) + defer testClose(db) - err = db.SetConfig(Config{SyncPolicy: SyncPolicy(-1)}) + err := db.SetConfig(Config{SyncPolicy: SyncPolicy(-1)}) if err == nil { t.Fatal("expecting a config syncpolicy error") } @@ -1477,3 +1419,128 @@ func Benchmark_Spatial_2D(t *testing.B) { } */ +func TestCoverCloseAlreadyClosed(t *testing.T) { + db := testOpen(t) + defer testClose(db) + _ = db.file.Close() + if err := db.Close(); err == nil { + t.Fatal("expecting an error") + } +} + +func TestCoverConfigClosed(t *testing.T) { + db := testOpen(t) + defer testClose(db) + _ = db.Close() + var config Config + if err := db.ReadConfig(&config); err != ErrDatabaseClosed { + t.Fatal("expecting database closed error") + } + if err := db.SetConfig(config); err != ErrDatabaseClosed { + t.Fatal("expecting database closed error") + } +} +func TestCoverShrinkShrink(t *testing.T) { + db := testOpen(t) + defer testClose(db) + if err := db.Update(func(tx *Tx) error { + for i := 0; i < 10000; i++ { + _, _, err := tx.Set(fmt.Sprintf("%d", i), fmt.Sprintf("%d", i), nil) + if err != nil { + return err + } + } + return nil + }); err != nil { + t.Fatal(err) + } + if err := db.Update(func(tx *Tx) error { + for i := 250; i < 250+100; i++ { + _, err := tx.Delete(fmt.Sprintf("%d", i)) + if err != nil { + return err + } + } + return nil + }); err != nil { + t.Fatal(err) + } + var err1, err2 error + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + err1 = db.Shrink() + }() + go func() { + defer wg.Done() + err2 = db.Shrink() + }() + wg.Wait() + //println(123) + //fmt.Printf("%v\n%v\n", err1, err2) + if err1 != ErrShrinkInProcess && err2 != ErrShrinkInProcess { + t.Fatal("expecting a shrink in process error") + } + db = testReOpen(t, db) + defer testClose(db) + if err := db.View(func(tx *Tx) error { + n, err := tx.Len() + if err != nil { + return err + } + if n != 9900 { + t.Fatal("expecting 9900 items") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +func TestJSONIndex(t *testing.T) { + db := testOpen(t) + defer testClose(db) + + _ = db.CreateIndex("last_name", "*", IndexJSON("name.last")) + _ = db.CreateIndex("last_name_cs", "*", IndexJSONCaseSensitive("name.last")) + _ = db.CreateIndex("age", "*", IndexJSON("age")) + _ = db.CreateIndex("student", "*", IndexJSON("student")) + _ = db.Update(func(tx *Tx) error { + _, _, _ = tx.Set("1", `{"name":{"first":"Tom","last":"Johnson"},"age":38,"student":false}`, nil) + _, _, _ = tx.Set("2", `{"name":{"first":"Janet","last":"Prichard"},"age":47,"student":true}`, nil) + _, _, _ = tx.Set("3", `{"name":{"first":"Carol","last":"Anderson"},"age":52,"student":true}`, nil) + _, _, _ = tx.Set("4", `{"name":{"first":"Alan","last":"Cooper"},"age":28,"student":false}`, nil) + _, _, _ = tx.Set("5", `{"name":{"first":"bill","last":"frank"},"age":21,"student":true}`, nil) + _, _, _ = tx.Set("6", `{"name":{"first":"sally","last":"randall"},"age":68,"student":false}`, nil) + return nil + }) + var keys []string + _ = db.View(func(tx *Tx) error { + _ = tx.Ascend("last_name_cs", func(key, value string) bool { + //fmt.Printf("%s: %s\n", key, value) + keys = append(keys, key) + return true + }) + _ = tx.Ascend("last_name", func(key, value string) bool { + //fmt.Printf("%s: %s\n", key, value) + keys = append(keys, key) + return true + }) + _ = tx.Ascend("age", func(key, value string) bool { + //fmt.Printf("%s: %s\n", key, value) + keys = append(keys, key) + return true + }) + _ = tx.Ascend("student", func(key, value string) bool { + //fmt.Printf("%s: %s\n", key, value) + keys = append(keys, key) + return true + }) + return nil + }) + expect := "3,4,1,2,5,6,3,4,5,1,2,6,5,4,1,2,3,6,1,4,6,2,3,5" + if strings.Join(keys, ",") != expect { + t.Fatalf("expected %v, got %v", expect, strings.Join(keys, ",")) + } +}