diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 47f78a8..21cbd2d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Go 1.x uses: actions/setup-go@v5 with: - go-version: ^1.22.6 + go-version: ^1.24.2 - name: Check out code into the Go module directory uses: actions/checkout@v4 diff --git a/README.md b/README.md index df83c47..6acf7e4 100644 --- a/README.md +++ b/README.md @@ -1,97 +1,138 @@ # benchtop - +Benchtop is a framework for storing large JSON documents as JSON blobs directly to disk with indexing provided by the key value database PebbleDb. ## Command line Build: + ``` make ``` -### Load data -``` -benchtop load test.data embeddings test.ndjson -``` - - `test.data` : name of archive - - `embeddings` : name of table - - `test.ndjson` : file to be loaded +#### Table Entries -### List tables -``` -benchtop tables test.data -``` +Benchtop KV Store Key Structure -### Get keys -``` -benchtop keys test.data embeddings -``` +This document outlines the binary key structure used by the benchtop package for storing and indexing data in a key-value (KV) store like PebbleDB. The structure is designed for efficient lookups, scans, and indexing of tabular or graph-like data by leveraging key prefixes and a consistent binary layout. +Core Concepts -### Get records -``` -benchtop get test.data embeddings ... -``` +1. Key Prefixes +All keys begin with a single-byte prefix to denote the type of data they represent. This allows different types of data to coexist in the same keyspace and enables efficient prefix scans (e.g., "find all position keys"). -## Format + T (TablePrefix): Keys related to table metadata. -Data is stored in a large binary files and index using [Pebble Key Value storage](https://github.com/cockroachdb/pebble). + P (PosPrefix): Keys that map a row ID to its physical location. + F (FieldPrefix): Keys that form a secondary index on specific field values. -### Key/Value format -Written using [Pebble](https://github.com/cockroachdb/) + R (RFieldPrefix): Keys that form a reverse index for efficient index deletion. +2. Field Separator -#### Table Entries +A special byte separator, FieldSep (ASCII 0x1F - Unit Separator), is used as a delimiter within compound keys (like the field indexes). This character is chosen because it is a non-printable control character that is not expected to appear in standard string data, ensuring reliable splitting of key components. +Key Types + +1. Table Keys + + Purpose: To store metadata or identifiers for data tables. + + Structure: T | TableId + + T: The literal character 'T' (TablePrefix). + + TableId: The unique byte slice identifier for the table. + + Functions: + + NewTableKey(id []byte): Creates a new table key. + + ParseTableKey(key []byte): Extracts the TableId from a table key. + +2. Position (Row Location) Keys + + Purpose: These keys are the primary index, mapping a unique row/vertex ID to its physical location (offset and size) in a data file. + + Structure: P | TableId | RowId + + P: The literal character 'P' (PosPrefix). + + TableId: A 2-byte uint16 (little-endian) identifying the table the row belongs to. + + RowId: The unique byte slice identifier for the row/vertex. + + Associated Value: The value stored for this key is an encoded RowLoc struct (see below). + + Functions: + + NewPosKey(table uint16, name []byte): Creates a new position key. + + ParsePosKey(key []byte): Extracts the TableId and RowId from a key. + + NewPosKeyPrefix(table uint16): Creates a key prefix for scanning all rows within a specific table. + +3. Field Index Keys + + Purpose: To create a secondary index on specific field values. This allows for fast lookups of all rows that have a certain value for a given field (e.g., find all users where city == 'New York'). + + Structure: FFieldLabelValueRowId + + F: The literal character 'F' (FieldPrefix). + + : The FieldSep byte. + + Field: The name of the indexed field (e.g., "city"). + + Label: The label or type of the row (e.g., "user"). + + Value: The JSON-encoded value of the field (e.g., "New York"). + + RowId: The unique ID of the row that contains this field value. + + Functions: + + FieldKey(field, label string, value any, rowID []byte): Creates a full field index key. + + FieldKeyParse(key []byte): Parses a field key back into its components. + + FieldLabelKey(field, label string): Creates a key prefix for scanning all indexed values for a specific field and label. + +4. Reverse Field Index Keys + + Purpose: To enable the efficient deletion of a row's entries from the field indexes. When a row is deleted, this reverse index is used to quickly find all the Field Index Keys that point to it, without having to scan the entire index. + + Structure: RLabelFieldRowId + + R: The literal character 'R' (RFieldPrefix). + + : The FieldSep byte. + + Label: The label of the row. -**Key** -|bytes|0|5:... | -|-|-|---------| -|type|t|<[]byte> | -|Desc|prefix|user ID| + Field: The name of the indexed field. -The user ID is provided by the user, but should be checked to ensure it is unique. + RowId: The unique ID of the row. -**Value** -|bytes|0:4|4:...| -|-|-|-------| -|type|[]byte| -|Desc|Json formatted Column definitions| + Functions: -First is the Table system ID, which is used as a prefix during key lookup. Then rest -of the bytes describe a list of columns and their data types. + RFieldKey(label, field, rowID string): Creates a new reverse field key. -#### Table ID -**Key** -|bytes|0|5:... | -|-|-|---------| -|type|T|uint32| -|Desc|prefix|system table ID| +Value Structures +RowLoc -The generated ID for a table. + Purpose: Represents the physical location of a data record, acting as a "pointer" to the full data object stored elsewhere. It is the value component for a Position Key. -**Value** -|bytes|0:4|4:...| -|-|-|-------| -|type|[]byte| -|Desc|User ID of table| + Structure: A fixed 10-byte binary layout. + Section (Bytes 0-1): A uint16 identifying the file or section where the data is stored. -#### ID Entries -These map the user specified ID to a data block specified with offset and size. + Offset (Bytes 2-5): A uint32 representing the starting byte offset within the section. -**Key** -|bytes|0|1:5|1:... | -|-|-|-|--------| -|type|k|uint32|<[]byte> | -|Desc|prefix|system table ID|user row ID| + Size (Bytes 6-9): A uint32 representing the length of the data in bytes. -**Value** -|bytes|0:8|8:16| -|-|-|---------| -|type|uint64|uint64| -|Desc|offset|size| + Functions: + EncodeRowLoc(loc *RowLoc): Encodes a RowLoc struct into a 10-byte slice. -### Data file format -Sequentially written [JSON](https://www.json.org/json-en.html/) entries. + DecodeRowLoc(v []byte): Decodes a 10-byte slice back into a RowLoc struct. diff --git a/cmdline/benchtop/cmds/get/main.go b/cmdline/benchtop/cmds/get/main.go index e6401bf..83d7f1b 100644 --- a/cmdline/benchtop/cmds/get/main.go +++ b/cmdline/benchtop/cmds/get/main.go @@ -34,18 +34,16 @@ var Cmd = &cobra.Command{ TS, _ := driver.(*jsontable.JSONDriver) for _, key := range keys { - val, closer, err := TS.Pb.Db.Get([]byte(key)) + val, closer, err := TS.Pkv.Get([]byte(key)) if err != nil { if err != pebble.ErrNotFound { log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", key, err) } log.Errorln("ERR: ", err) } - fmt.Println("VAL: ", val) - offset, size := benchtop.ParsePosValue(val) closer.Close() - data, err := table.GetRow(benchtop.RowLoc{Offset: offset, Size: size}) + data, err := table.GetRow(benchtop.DecodeRowLoc(val)) if err == nil { out, err := json.Marshal(data) if err != nil { diff --git a/cmdline/benchtop/cmds/keys/main.go b/cmdline/benchtop/cmds/keys/main.go index 1cf7d05..9c03902 100644 --- a/cmdline/benchtop/cmds/keys/main.go +++ b/cmdline/benchtop/cmds/keys/main.go @@ -4,6 +4,8 @@ import ( "fmt" "github.com/bmeg/benchtop/jsontable" + jTable "github.com/bmeg/benchtop/jsontable/table" + "github.com/spf13/cobra" ) @@ -27,12 +29,14 @@ var Cmd = &cobra.Command{ return err } - keys, err := table.Keys() + jT, _ := table.(*jTable.JSONTable) + + keys, err := driver.ListTableKeys(jT.TableId) if err != nil { return err } for k := range keys { - fmt.Printf("%s\n", k) + fmt.Printf("%s\n", k.Key) } return nil }, diff --git a/cmdline/benchtop/cmds/load/main.go b/cmdline/benchtop/cmds/load/main.go deleted file mode 100644 index e95541f..0000000 --- a/cmdline/benchtop/cmds/load/main.go +++ /dev/null @@ -1,71 +0,0 @@ -package load - -import ( - "encoding/json" - "log" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/jsontable" - "github.com/bmeg/benchtop/util" - "github.com/schollz/progressbar/v3" - "github.com/spf13/cobra" -) - -var keyField = "key" - -var Cmd = &cobra.Command{ - Use: "load ", - Short: "Load data", - Long: ``, - Args: cobra.ExactArgs(3), - RunE: func(cmd *cobra.Command, args []string) error { - - dbPath := args[0] - tableName := args[1] - filePath := args[2] - - driver, err := jsontable.NewJSONDriver(dbPath) - if err != nil { - return err - } - - table, err := driver.New(tableName, []benchtop.ColumnDef{}) - if err != nil { - return err - } - - lineCount, _ := util.LineCounter(filePath) - - lines, err := util.StreamLines(filePath, 10) - if err != nil { - return err - } - - records := make(chan benchtop.Row, 10) - go func() { - defer close(records) - bar := progressbar.Default(int64(lineCount)) - - for l := range lines { - data := map[string]any{} - json.Unmarshal([]byte(l), &data) - - if key, ok := data[keyField]; ok { - keyStr := key.(string) - records <- benchtop.Row{Id: []byte(keyStr), Data: data} - } else { - log.Printf("Key %s not found", keyField) - } - bar.Add(1) - } - }() - table.Load(records) - driver.Close() - return nil - }, -} - -func init() { - flags := Cmd.Flags() - flags.StringVarP(&keyField, "key", "k", keyField, "Field to use for key") -} diff --git a/cmdline/benchtop/cmds/root.go b/cmdline/benchtop/cmds/root.go index a6e05dc..cff305d 100644 --- a/cmdline/benchtop/cmds/root.go +++ b/cmdline/benchtop/cmds/root.go @@ -5,7 +5,6 @@ import ( "github.com/bmeg/benchtop/cmdline/benchtop/cmds/get" "github.com/bmeg/benchtop/cmdline/benchtop/cmds/keys" - "github.com/bmeg/benchtop/cmdline/benchtop/cmds/load" "github.com/bmeg/benchtop/cmdline/benchtop/cmds/tables" "github.com/spf13/cobra" @@ -20,7 +19,6 @@ var RootCmd = &cobra.Command{ func init() { RootCmd.AddCommand(keys.Cmd) - RootCmd.AddCommand(load.Cmd) RootCmd.AddCommand(tables.Cmd) RootCmd.AddCommand(get.Cmd) diff --git a/distqueue/distances.go b/distqueue/distances.go deleted file mode 100644 index 81c1506..0000000 --- a/distqueue/distances.go +++ /dev/null @@ -1,12 +0,0 @@ -package distqueue - -import "math" - -func Euclidean(a []float32, b []float32) float32 { - s := float32(0.0) - for i := range a { - x := a[i] - b[i] - s += (x * x) - } - return float32(math.Sqrt(float64(s))) -} diff --git a/distqueue/distqueue.go b/distqueue/distqueue.go deleted file mode 100644 index 71abb4b..0000000 --- a/distqueue/distqueue.go +++ /dev/null @@ -1,88 +0,0 @@ -package distqueue - -import ( - "slices" - - "golang.org/x/exp/constraints" -) - -type Element[D constraints.Ordered, V any] struct { - Dist D - Value V -} - -type DistQueueMinCapped[D constraints.Ordered, V any] []Element[D, V] -type DistQueueMax[D constraints.Ordered, V any] []Element[D, V] -type DistQueueMin[D constraints.Ordered, V any] []Element[D, V] - -// - -func minSort[D constraints.Ordered, V any](a, b Element[D, V]) int { - if a.Dist < b.Dist { - return -1 - } else if a.Dist > b.Dist { - return 1 - } - return 0 -} - -func maxSort[D constraints.Ordered, V any](a, b Element[D, V]) int { - if a.Dist < b.Dist { - return 1 - } else if a.Dist > b.Dist { - return -1 - } - return 0 -} - -// - -func NewMinCapped[D constraints.Ordered, V any](c int) DistQueueMinCapped[D, V] { - out := make(DistQueueMinCapped[D, V], 0, c) - return out -} - -func NewMax[D constraints.Ordered, V any]() DistQueueMax[D, V] { - out := make(DistQueueMax[D, V], 0, 100) - return out -} - -func NewMin[D constraints.Ordered, V any]() DistQueueMin[D, V] { - out := make(DistQueueMin[D, V], 0, 100) - return out -} - -func (d *DistQueueMax[D, V]) Insert(dist D, v V) { - *d = append(*d, Element[D, V]{dist, v}) - slices.SortFunc(*d, maxSort) -} - -func (d *DistQueueMin[D, V]) Insert(dist D, v V) { - *d = append(*d, Element[D, V]{dist, v}) - slices.SortFunc(*d, minSort) -} - -func (d *DistQueueMinCapped[D, V]) Insert(dist D, v V) { - if len(*d) == cap(*d) { - if dist > (*d)[len(*d)-1].Dist { - return - } - (*d) = (*d)[:len((*d))-1] - } - *d = append(*d, Element[D, V]{dist, v}) - slices.SortFunc(*d, minSort) -} - -func (d *DistQueueMinCapped[D, V]) Max() D { - return (*d)[len(*d)-1].Dist -} - -func (d *DistQueueMinCapped[D, V]) Filled() bool { - return len(*d) == cap(*d) -} - -func (d *DistQueueMin[D, V]) Pop() (D, V) { - out := (*d)[0] - (*d) = (*d)[1:] - return out.Dist, out.Value -} diff --git a/examples/vecload.go b/examples/vecload.go deleted file mode 100644 index 36f6450..0000000 --- a/examples/vecload.go +++ /dev/null @@ -1,63 +0,0 @@ -package main - -import ( - "encoding/json" - "flag" - "fmt" - "strings" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/jsontable" - "github.com/bmeg/benchtop/util" - - "github.com/schollz/progressbar/v3" -) - -func main() { - flag.Parse() - - file := flag.Arg(0) - dbPath := flag.Arg(1) - - db, err := jsontable.NewJSONDriver(dbPath) - if err != nil { - fmt.Printf("Error: %s", err) - return - } - - table, err := db.New("peptides", []benchtop.ColumnDef{}) - if err != nil { - fmt.Printf("Error: %s", err) - return - } - - lineCount, _ := util.LineCounter(file) - - lines, err := util.StreamLines(file, 10) - if err != nil { - fmt.Printf("Error: %s", err) - return - } - records := make(chan benchtop.Row, 10) - - go func() { - defer close(records) - bar := progressbar.Default(int64(lineCount)) - - for l := range lines { - row := strings.Split(l, "\t") - - //data := map[string]any{} - //json.Unmarshal([]byte(row[1]), &data) - data := []any{} - json.Unmarshal([]byte(row[1]), &data) - entry := map[string]any{ - "embedding": data, - } - records <- benchtop.Row{Id: []byte(row[0]), Data: entry} - bar.Add(1) - } - }() - table.Load(records) - db.Close() -} diff --git a/filters/filters.go b/filters/filters.go index 89c0234..09d4aa3 100644 --- a/filters/filters.go +++ b/filters/filters.go @@ -1,7 +1,9 @@ package filters import ( + "errors" "reflect" + "strconv" "github.com/bmeg/grip/gripql" "github.com/bmeg/grip/log" @@ -16,7 +18,7 @@ type FieldFilter struct { func ApplyFilterCondition(val any, cond *FieldFilter) bool { condVal := cond.Value - if (val == nil || cond.Value == nil) && + if (val == nil || condVal == nil) && cond.Operator != gripql.Condition_EQ && cond.Operator != gripql.Condition_NEQ && cond.Operator != gripql.Condition_WITHIN && @@ -27,197 +29,177 @@ func ApplyFilterCondition(val any, cond *FieldFilter) bool { switch cond.Operator { case gripql.Condition_EQ: - return reflect.DeepEqual(val, condVal) + switch v := val.(type) { + case string: + condS, ok := condVal.(string) + return ok && v == condS + case int: + condI, ok := condVal.(int) + return ok && v == condI + case float64: + condF, ok := condVal.(float64) + return ok && v == condF + case bool: + condB, ok := condVal.(bool) + return ok && v == condB + case nil: + return condVal == nil + default: + return reflect.DeepEqual(val, condVal) + } case gripql.Condition_NEQ: - return !reflect.DeepEqual(val, condVal) - - case gripql.Condition_GT: - valN, err := cast.ToFloat64E(val) - if err != nil { - return false - } - condN, err := cast.ToFloat64E(condVal) - if err != nil { - return false + switch v := val.(type) { + case string: + condS, ok := condVal.(string) + return ok && v != condS + case int: + condI, ok := condVal.(int) + return ok && v != condI + case float64: + condF, ok := condVal.(float64) + return ok && v != condF + case bool: + condB, ok := condVal.(bool) + return ok && v != condB + case nil: + return condVal != nil + default: + return !reflect.DeepEqual(val, condVal) } - return valN > condN - case gripql.Condition_GTE: - valN, err := cast.ToFloat64E(val) + case gripql.Condition_GT, gripql.Condition_GTE, gripql.Condition_LT, gripql.Condition_LTE: + valN, err := getFloat64(val) // Use optimized getter if err != nil { return false } - condN, err := cast.ToFloat64E(condVal) + condN, err := getFloat64(condVal) // Use optimized getter if err != nil { return false } - return valN >= condN - case gripql.Condition_LT: - //log.Debugf("match: %#v %#v %s", condVal, val, cond.Key) - valN, err := cast.ToFloat64E(val) - //log.Debugf("CAST: ", valN, "ERROR: ", err) - if err != nil { - return false + if cond.Operator == gripql.Condition_GT { + return valN > condN } - condN, err := cast.ToFloat64E(condVal) - if err != nil { - return false + if cond.Operator == gripql.Condition_GTE { + return valN >= condN } - return valN < condN - - case gripql.Condition_LTE: - valN, err := cast.ToFloat64E(val) - if err != nil { - return false + if cond.Operator == gripql.Condition_LT { + return valN < condN } - condN, err := cast.ToFloat64E(condVal) - if err != nil { - return false + if cond.Operator == gripql.Condition_LTE { + return valN <= condN } - return valN <= condN + return false // Should not be reached - case gripql.Condition_INSIDE: + case gripql.Condition_INSIDE, gripql.Condition_OUTSIDE, gripql.Condition_BETWEEN: + // Still requires slice check, but we can use the optimized getFloat64 inside vals, err := cast.ToSliceE(condVal) - if err != nil { - log.Debugf("UserError: could not cast INSIDE condition value: %v", err) - return false - } - if len(vals) != 2 { - log.Debugf("UserError: expected slice of length 2 not %v for INSIDE condition value", len(vals)) - return false - } - lower, err := cast.ToFloat64E(vals[0]) - if err != nil { - log.Debugf("UserError: could not cast lower INSIDE condition value: %v", err) - return false - } - upper, err := cast.ToFloat64E(vals[1]) - if err != nil { - log.Debugf("UserError: could not cast upper INSIDE condition value: %v", err) - return false - } - valF, err := cast.ToFloat64E(val) - if err != nil { - log.Debugf("UserError: could not cast INSIDE value: %v", err) + if err != nil || len(vals) != 2 { return false } - return valF > lower && valF < upper - case gripql.Condition_OUTSIDE: - vals, err := cast.ToSliceE(condVal) - if err != nil { - log.Debugf("UserError: could not cast OUTSIDE condition value: %v", err) - return false - } - if len(vals) != 2 { - log.Debugf("UserError: expected slice of length 2 not %v for OUTSIDE condition value", len(vals)) - return false - } - lower, err := cast.ToFloat64E(vals[0]) + lower, err := getFloat64(vals[0]) if err != nil { - log.Debugf("UserError: could not cast lower OUTSIDE condition value: %v", err) return false } - upper, err := cast.ToFloat64E(vals[1]) + upper, err := getFloat64(vals[1]) if err != nil { - log.Debugf("UserError: could not cast upper OUTSIDE condition value: %v", err) return false } - valF, err := cast.ToFloat64E(val) + valF, err := getFloat64(val) if err != nil { - log.Debugf("UserError: could not cast OUTSIDE value: %v", err) return false } - return valF < lower || valF > upper - case gripql.Condition_BETWEEN: - vals, err := cast.ToSliceE(condVal) - if err != nil { - log.Debugf("UserError: could not cast BETWEEN condition value: %v", err) - return false - } - if len(vals) != 2 { - log.Debugf("UserError: expected slice of length 2 not %v for BETWEEN condition value", len(vals)) - return false - } - lower, err := cast.ToFloat64E(vals[0]) - if err != nil { - log.Debugf("UserError: could not cast lower BETWEEN condition value: %v", err) - return false + if cond.Operator == gripql.Condition_INSIDE { + return valF > lower && valF < upper } - upper, err := cast.ToFloat64E(vals[1]) - if err != nil { - log.Debugf("UserError: could not cast upper BETWEEN condition value: %v", err) - return false + if cond.Operator == gripql.Condition_OUTSIDE { + return valF < lower || valF > upper } - valF, err := cast.ToFloat64E(val) - if err != nil { - log.Debugf("UserError: could not cast BETWEEN value: %v", err) - return false + if cond.Operator == gripql.Condition_BETWEEN { + return valF >= lower && valF < upper } - return valF >= lower && valF < upper + return false case gripql.Condition_WITHIN: - found := false - switch condVal := condVal.(type) { - case []any: - for _, v := range condVal { - if reflect.DeepEqual(val, v) { - found = true - } - } - - case nil: - found = false - - default: + // val is the single document value. condVal is the slice of allowed values. + // Check if val is EQ to any element in condVal slice. + condSlice, ok := condVal.([]any) + if !ok { log.Debugf("UserError: expected slice not %T for WITHIN condition value", condVal) + return false } - - return found + for _, v := range condSlice { + if ApplyFilterCondition(val, &FieldFilter{Operator: gripql.Condition_EQ, Value: v}) { + return true // Found a match + } + } + return false case gripql.Condition_WITHOUT: - found := false - switch condVal := condVal.(type) { - case []any: - for _, v := range condVal { - if reflect.DeepEqual(val, v) { - found = true - } + condSlice, ok := condVal.([]any) + if !ok { + log.Debugf("UserError: expected slice not %T for WITHIN condition value", condVal) + return true + } + for _, v := range condSlice { + if ApplyFilterCondition(val, &FieldFilter{Operator: gripql.Condition_EQ, Value: v}) { + return false } - - case nil: - found = false - - default: - log.Debugf("UserError: expected slice not %T for WITHOUT condition value", condVal) - } - - return !found + return true case gripql.Condition_CONTAINS: - found := false - switch val := val.(type) { - case []any: - for _, v := range val { - if reflect.DeepEqual(v, condVal) { - found = true - } + // val is the slice from the document. condVal is the single target element. + // Check if any element in val slice is EQ to condVal. + valSlice, ok := val.([]any) + if !ok { + log.Debugf("UserError: expected slice not %T for CONTAINS condition value", val) + return false + } + for _, v := range valSlice { + // Use the optimized EQ check recursively instead of reflect.DeepEqual(v, condVal) + // Note: Arguments are v (slice element) and condVal (target). + if ApplyFilterCondition(v, &FieldFilter{Operator: gripql.Condition_EQ, Value: condVal}) { + return true // Found a match } - - case nil: - found = false - - default: - log.Debugf("UserError: unknown condition value type %T for CONTAINS condition", val) } - - return found + return false default: return false } } + +// getFloat64 is a highly optimized helper to convert 'any' value to float64, +// prioritizing direct type assertions (fastest) before falling back to strconv or cast (slower). +// This eliminates the repeated, slow calls to cast.ToFloat64E(val) for numeric comparisons. +func getFloat64(val any) (float64, error) { + if val == nil { + return 0, errors.New("cannot convert nil to float64") + } + switch v := val.(type) { + case int: + return float64(v), nil + case int8: + return float64(v), nil + case int16: + return float64(v), nil + case int32: + return float64(v), nil + case int64: + return float64(v), nil + case float32: + return float64(v), nil + case float64: + return v, nil + case string: + // Use strconv for fast string-to-float conversion (more direct than cast) + return strconv.ParseFloat(v, 64) + default: + // Fallback to cast for complex/unknown numeric types if necessary (e.g., json.Number) + return cast.ToFloat64E(val) + } +} diff --git a/go.mod b/go.mod index a861d1f..b4851b5 100644 --- a/go.mod +++ b/go.mod @@ -1,24 +1,16 @@ module github.com/bmeg/benchtop -go 1.24 - -toolchain go1.24.2 +go 1.24.2 require ( - github.com/bmeg/grip v0.0.0-20250206222527-96023b5f8b4f - github.com/schollz/progressbar/v3 v3.16.0 + github.com/bmeg/grip v0.0.0-20251106174949-7f0784126fbb github.com/spf13/cobra v1.8.1 - go.mongodb.org/mongo-driver v1.17.0 golang.org/x/exp v0.0.0-20240707233637-46b078467d37 ) require ( - github.com/DataDog/zstd v1.5.6-0.20230824185856-869dae002e5e // indirect - github.com/akuity/grpc-gateway-client v0.0.0-20231116134900-80c401329778 // indirect - github.com/alevinval/sse v1.0.2 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/bytedance/sonic v1.13.3 // indirect - github.com/bytedance/sonic/loader v0.2.4 // indirect + github.com/bytedance/sonic/loader v0.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.5 // indirect github.com/cockroachdb/errors v1.11.3 // indirect @@ -27,32 +19,28 @@ require ( github.com/cockroachdb/redact v1.1.5 // indirect github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/getsentry/sentry-go v0.28.1 // indirect - github.com/go-resty/resty/v2 v2.13.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/logrusorgru/aurora v2.0.3+incompatible // indirect - github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.19.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect - golang.org/x/crypto v0.36.0 // indirect golang.org/x/net v0.37.0 // indirect golang.org/x/sys v0.33.0 // indirect golang.org/x/term v0.30.0 // indirect @@ -60,15 +48,16 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250811230008-5f3141c8851a // indirect google.golang.org/grpc v1.71.0 // indirect - google.golang.org/protobuf v1.36.7 // indirect ) require ( - github.com/bmeg/hnsw-index v0.0.0-20241122200324-94f3a5eb1f59 + github.com/DataDog/zstd v1.5.7 github.com/bmeg/jsonpath v0.0.0-20210207014051-cca5355553ad - github.com/cockroachdb/pebble v1.1.2 + github.com/bytedance/sonic v1.14.0 + github.com/cockroachdb/pebble v1.1.5 github.com/edsrzf/mmap-go v1.2.0 github.com/hashicorp/go-multierror v1.1.1 github.com/maypok86/otter/v2 v2.1.0 github.com/spf13/cast v1.9.2 + google.golang.org/protobuf v1.36.7 ) diff --git a/go.sum b/go.sum index 13d64b9..60ad324 100644 --- a/go.sum +++ b/go.sum @@ -1,30 +1,22 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/DataDog/zstd v1.5.6-0.20230824185856-869dae002e5e h1:ZIWapoIRN1VqT8GR8jAwb1Ie9GyehWjVcGh32Y2MznE= -github.com/DataDog/zstd v1.5.6-0.20230824185856-869dae002e5e/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= -github.com/akuity/grpc-gateway-client v0.0.0-20231116134900-80c401329778 h1:qj3+B4PU5AR2mBffDVXvP2d3hLCNDot28KKPWvQnOxs= -github.com/akuity/grpc-gateway-client v0.0.0-20231116134900-80c401329778/go.mod h1:0MZqOxL+zq+hGedAjYhkm1tOKuZyjUmE/xA8nqXa9q0= -github.com/alevinval/sse v1.0.2 h1:ooc08hn9B5X/u7vOMpnYDkXxIKA0y5DOw9qBVVK3YKY= -github.com/alevinval/sse v1.0.2/go.mod h1:X4J1/nTNs4yKbvjXFWJB+NdF9gaYkoAC4sw9Z9h7ASk= +github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= +github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bmeg/grip v0.0.0-20250206222527-96023b5f8b4f h1:8F6Va7kEwlDDSzvlhnE+v3iiAF9FUXvDYFcPW/ccdE8= -github.com/bmeg/grip v0.0.0-20250206222527-96023b5f8b4f/go.mod h1:afNS+svbAkFH3XUPjDIaKahT0F0GxAYsZim2bH+b0KU= -github.com/bmeg/hnsw-index v0.0.0-20241122200324-94f3a5eb1f59 h1:9tvIRzhj+xUtoCP6pKpsJMd1oQ4XHRSDNR8Yvoz3VKg= -github.com/bmeg/hnsw-index v0.0.0-20241122200324-94f3a5eb1f59/go.mod h1:eej8I0akm79rkkVAD59fc4N4RqByfxF2trZv5yIjgYw= +github.com/bmeg/grip v0.0.0-20251106174949-7f0784126fbb h1:GYQ0Tfj36h8m+6dZolHDQJyVnjjqT3pgBZlFGHT+HOE= +github.com/bmeg/grip v0.0.0-20251106174949-7f0784126fbb/go.mod h1:BxpaUuXbymKkEPvSDslziCzU17akkBo1ubu9nAFsI1A= github.com/bmeg/jsonpath v0.0.0-20210207014051-cca5355553ad h1:ICgBexeLB7iv/IQz4rsP+MimOXFZUwWSPojEypuOaQ8= github.com/bmeg/jsonpath v0.0.0-20210207014051-cca5355553ad/go.mod h1:ft96Irkp72C7ZrUWRenG7LrF0NKMxXdRvsypo5Njhm4= -github.com/bytedance/sonic v1.13.3 h1:MS8gmaH16Gtirygw7jV91pDCN33NyMrPbN7qiYhEsF0= -github.com/bytedance/sonic v1.13.3/go.mod h1:o68xyaF9u2gvVBuGHPlUVCy+ZfmNNO5ETf1+KgkJhz4= +github.com/bytedance/sonic v1.14.0 h1:/OfKt8HFw0kh2rj8N0F6C/qPGRESq0BbaNZgcNXXzQQ= +github.com/bytedance/sonic v1.14.0/go.mod h1:WoEbx8WTcFJfzCe0hbmyTGrfjt8PzNEBdxlNUO24NhA= github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= -github.com/bytedance/sonic/loader v0.2.4 h1:ZWCw4stuXUsn1/+zQDqeE7JKP+QO47tz7QCNan80NzY= -github.com/bytedance/sonic/loader v0.2.4/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= +github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA= +github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM= -github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudwego/base64x v0.1.5 h1:XPciSp1xaq2VCSt6lF0phncD4koWyULpl5bUxbfCyP4= github.com/cloudwego/base64x v0.1.5/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= @@ -38,8 +30,8 @@ github.com/cockroachdb/fifo v0.0.0-20240616162244-4768e80dfb9a h1:f52TdbU4D5nozM github.com/cockroachdb/fifo v0.0.0-20240616162244-4768e80dfb9a/go.mod h1:9/y3cnZ5GKakj/H4y9r9GTjCvAFta7KLgSHPJJYc52M= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= -github.com/cockroachdb/pebble v1.1.2 h1:CUh2IPtR4swHlEj48Rhfzw6l/d0qA31fItcIszQVIsA= -github.com/cockroachdb/pebble v1.1.2/go.mod h1:4exszw1r40423ZsmkG/09AFEG83I0uDgfujJdbL6kYU= +github.com/cockroachdb/pebble v1.1.5 h1:5AAWCBWbat0uE0blr8qzufZP5tBjkRyy/jWe1QWLnvw= +github.com/cockroachdb/pebble v1.1.5/go.mod h1:17wO9el1YEigxkP/YtV8NtCivQDgoCyBg5c4VR/eOWo= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= @@ -63,8 +55,10 @@ github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxI github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-resty/resty/v2 v2.13.1 h1:x+LHXBI2nMB1vqndymf26quycC4aggYJ7DECYbiz03g= -github.com/go-resty/resty/v2 v2.13.1/go.mod h1:GznXlLxkq6Nh4sU59rPmUw3VtgpO3aS96ORAI6Q7d+0= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= @@ -73,15 +67,19 @@ github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfb github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e h1:4bw4WeyTYPp0smaXiJZCNnLrvVBqirQVreixayXezGc= github.com/golang/snappy v0.0.5-0.20231225225746-43d5d4cd4e0e/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -91,8 +89,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -107,12 +105,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= -github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= -github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/maypok86/otter/v2 v2.1.0 h1:H+FO9NtLuSWYUlIUQ/kT6VNEpWSIF4w4GZJRDhxYb7k= github.com/maypok86/otter/v2 v2.1.0/go.mod h1:jX2xEKz9PrNVbDqnk8JUuOt5kURK8h7jd1kDYI5QsZk= -github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= -github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= @@ -133,14 +127,10 @@ github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= -github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/schollz/progressbar/v3 v3.16.0 h1:+MbBim/cE9DqDb8UXRfLJ6RZdyDkXG1BDy/sWc5s0Mc= -github.com/schollz/progressbar/v3 v3.16.0/go.mod h1:lLiKjKJ9/yzc9Q8jk+sVLfxWxgXKsktvUf6TO+4Y2nw= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= @@ -167,9 +157,18 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.mongodb.org/mongo-driver v1.17.0 h1:Hp4q2MCjvY19ViwimTs00wHi7G4yzxh4/2+nTx8r40k= -go.mongodb.org/mongo-driver v1.17.0/go.mod h1:wwWm/+BuOddhcq3n68LKRmgk2wXzmF6s0SFOa0GINL4= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.34.0 h1:zRLXxLCgL1WyKsPVrgbSdMN4c0FMkDAskSTQP+0hdUY= +go.opentelemetry.io/otel v1.34.0/go.mod h1:OWFPOQ+h4G8xpyjgqo4SxJYdDQ/qmRH+wivy7zzx9oI= +go.opentelemetry.io/otel/metric v1.34.0 h1:+eTR3U0MyfWjRDhmFMxe2SsW64QrZ84AOhvqS7Y+PoQ= +go.opentelemetry.io/otel/metric v1.34.0/go.mod h1:CEDrp0fy2D0MvkXE+dPV7cMi8tWZwX3dmaIhwPOaqHE= +go.opentelemetry.io/otel/sdk v1.34.0 h1:95zS4k/2GOy069d321O8jWgYsW3MzVV+KuSPKp7Wr1A= +go.opentelemetry.io/otel/sdk v1.34.0/go.mod h1:0e/pNiaMAqaykJGKbi+tSjWfNNHMTxoC9qANsCzbyxU= +go.opentelemetry.io/otel/sdk/metric v1.34.0 h1:5CeK9ujjbFVL5c1PhLuStg1wxA7vQv7ce1EK0Gyvahk= +go.opentelemetry.io/otel/sdk/metric v1.34.0/go.mod h1:jQ/r8Ze28zRKoNRdkjCZxfs6YvBTG1+YIqyFVFYec5w= +go.opentelemetry.io/otel/trace v1.34.0 h1:+ouXS2V8Rd4hp4580a8q23bg0azF2nI8cqLYnC8mh/k= +go.opentelemetry.io/otel/trace v1.34.0/go.mod h1:Svm7lSjQD7kG7KJ/MUHPVXSDGz2OX4h0M2jHBhmSfRE= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= @@ -179,13 +178,6 @@ golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUu golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= -golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= -golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20240707233637-46b078467d37 h1:uLDX+AfeFCct3a2C7uIWBKMJIR3CJMhcgfrUAqjRK6w= golang.org/x/exp v0.0.0-20240707233637-46b078467d37/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= @@ -195,8 +187,6 @@ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -205,12 +195,6 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -219,50 +203,24 @@ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -272,8 +230,6 @@ golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -283,7 +239,6 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a h1:DMCgtIAIQGZqJXMVzJF4MV8BlWoJh2ZuFiRdAleyr58= google.golang.org/genproto/googleapis/api v0.0.0-20250811230008-5f3141c8851a/go.mod h1:y2yVLIE/CSMCPXaHnSKXxu1spLPnglFLegmgdY23uuE= google.golang.org/genproto/googleapis/rpc v0.0.0-20250811230008-5f3141c8851a h1:tPE/Kp+x9dMSwUm/uM0JKK0IfdiJkwAbSMSeZBXXJXc= @@ -295,8 +250,6 @@ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.71.0 h1:kF77BGdPTQ4/JZWMlb9VpJ5pa25aqvVqogsxNHHdeBg= google.golang.org/grpc v1.71.0/go.mod h1:H0GRtasmQOh9LkFoCPDu3ZrwUtD1YGE+b2vYBYd/8Ec= -google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= -google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/interface.go b/interface.go index 0329895..ae9d48b 100644 --- a/interface.go +++ b/interface.go @@ -26,16 +26,6 @@ type ColumnDef struct { ) */ -type TableDriver interface { - New(name string, columns []ColumnDef) (TableStore, error) - Get(name string) (TableStore, error) - GetAllColNames() chan string - GetLabels(edges bool, removePrefix bool) chan string - List() []string - Delete(name string) error - Close() -} - type Row struct { Id []byte TableName string @@ -43,42 +33,44 @@ type Row struct { } type Index struct { - Key []byte - Position uint64 - Size uint64 -} - -type BulkResponse struct { - Key []byte - Data map[string]any - Err string + Key []byte + Loc RowLoc } type RowLoc struct { - Offset uint64 - Size uint64 - Label uint16 + TableId uint16 + Section uint16 // Sectioning allows for smaller Offset, Size + Offset uint32 // Max offset, size is 4GB + Size uint32 } type RowFilter interface { - Matches(row any) bool + Matches(row []byte, tableStr string) bool GetFilter() any IsNoOp() bool RequiredFields() []string } +type TableDriver interface { + New(name string, columns []ColumnDef) (TableStore, error) + Get(name string) (TableStore, error) + ListTableKeys(tableId uint16) (chan Index, error) + GetAllColNames() chan string + GetLabels(edges bool, removePrefix bool) chan string + List() []string + Delete(name string) error + Close() +} + type TableStore interface { GetColumnDefs() []ColumnDef AddRow(elem Row) (*RowLoc, error) - GetRow(loc RowLoc) (map[string]any, error) - DeleteRow(loc RowLoc, id []byte) error + GetRow(loc *RowLoc) (map[string]any, error) + DeleteRow(loc *RowLoc, id []byte) error - Fetch(inputs chan Index, workers int) <-chan BulkResponse - Remove(inputs chan Index, workers int) <-chan BulkResponse - Scan(key bool, filter RowFilter) chan any - Load(chan Row) error - Keys() (chan Index, error) + ScanDoc(filter RowFilter) chan map[string]any + ScanId(filter RowFilter) chan string - Compact() error - Close() + //Compact() error + Close() error } diff --git a/jsontable/bLoad.go b/jsontable/bLoad.go new file mode 100644 index 0000000..d31b752 --- /dev/null +++ b/jsontable/bLoad.go @@ -0,0 +1,110 @@ +package jsontable + +import ( + "fmt" + "sync" + + "github.com/bmeg/benchtop" + jTable "github.com/bmeg/benchtop/jsontable/table" + "github.com/bmeg/benchtop/pebblebulk" + "github.com/bytedance/sonic" + "github.com/hashicorp/go-multierror" +) + +func (dr *JSONDriver) BulkLoad(inputs chan *benchtop.Row, tx *pebblebulk.PebbleBulk) error { + if dr.Pkv == nil || dr.Pkv.Db == nil { + return fmt.Errorf("pebble database instance is nil") + } + var wg sync.WaitGroup + tableChannels := make(map[string]chan *benchtop.Row) + + metadataChan := make(chan *jTable.KitchenSink, 100) + + snapshot := dr.Pkv.Db.NewSnapshot() + defer snapshot.Close() + + for row := range inputs { + if _, exists := tableChannels[row.TableName]; !exists { + dr.Lock.RLock() + table, exists := dr.Tables[row.TableName] + dr.Lock.RUnlock() + if !exists { + var localErr *multierror.Error + newTable, err := dr.New(row.TableName, nil) + if err != nil { + localErr = multierror.Append(localErr, fmt.Errorf("failed to create table %s: %v", row.TableName, err)) + metadataChan <- &jTable.KitchenSink{ + FieldIndexKeyElements: nil, + Metadata: nil, + Err: localErr.ErrorOrNil(), + } + continue + } + table = newTable.(*jTable.JSONTable) + dr.Lock.Lock() + dr.Tables[row.TableName] = table + dr.Lock.Unlock() + } + inputChan := table.StartTableGoroutine(&wg, metadataChan, snapshot, BATCH_SIZE) + tableChannels[row.TableName] = inputChan + } + tableChannels[row.TableName] <- row + } + for _, ch := range tableChannels { + close(ch) + } + + var errs *multierror.Error + done := make(chan struct{}) + go func() { + defer close(done) + writeFunc := func(tx *pebblebulk.PebbleBulk) error { + for meta := range metadataChan { + if meta.Err != nil { + errs = multierror.Append(errs, meta.Err) + continue + } + if meta.Metadata == nil { + continue + } + for _, keyElements := range meta.FieldIndexKeyElements { + forwardKey := benchtop.FieldKey(keyElements.Field, keyElements.TableName, keyElements.Val, []byte(keyElements.RowId)) + if err := tx.Set(forwardKey, []byte{}, nil); err != nil { + errs = multierror.Append(errs, err) + } + BVal, err := sonic.ConfigFastest.Marshal(keyElements.Val) + if err != nil { + errs = multierror.Append(errs, err) + continue + } + if err := tx.Set(benchtop.RFieldKey(keyElements.TableName, keyElements.Field, keyElements.RowId), BVal, nil); err != nil { + errs = multierror.Append(errs, err) + } + } + + // Write row location entries. + for id, m := range meta.Metadata { + dr.LocCache.Set(id, m) + dr.AddTableEntryInfo(tx, []byte(id), m) + } + } + return nil + } + + if tx == nil { + errs = multierror.Append(errs, fmt.Errorf("pebble bulk instance passed into BulkLoad function is nil")) + } else { + dr.PebbleLock.Lock() + if err := writeFunc(tx); err != nil { + errs = multierror.Append(errs, err) + } + dr.PebbleLock.Unlock() + } + }() + + wg.Wait() + close(metadataChan) + <-done + + return errs.ErrorOrNil() +} diff --git a/jsontable/cache.go b/jsontable/cache.go deleted file mode 100644 index 7ead620..0000000 --- a/jsontable/cache.go +++ /dev/null @@ -1,57 +0,0 @@ -package jsontable - -import ( - "bytes" - "context" - "time" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/pebblebulk" - "github.com/bmeg/grip/log" - "github.com/maypok86/otter/v2" -) - -func (dr *JSONDriver) PreloadCache() error { - var keys []string - prefix := []byte{benchtop.PosPrefix} - L_Start := time.Now() - - err := dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { - for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { - _, id := benchtop.ParsePosKey(it.Key()) - keys = append(keys, string(id)) - } - return nil - }) - if err != nil { - return err - } - - bulkLoader := otter.BulkLoaderFunc[string, benchtop.RowLoc](func(ctx context.Context, keys []string) (map[string]benchtop.RowLoc, error) { - result := make(map[string]benchtop.RowLoc, len(keys)) - err := dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { - for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { - tableId, id := benchtop.ParsePosKey(it.Key()) - val, err := it.Value() - if err != nil { - log.Errorf("Err on it.Value() in bulkLoader: %v", err) - continue - } - offset, size := benchtop.ParsePosValue(val) - result[string(id)] = benchtop.RowLoc{Offset: offset, Size: size, Label: tableId} - - } - return nil - }) - if err != nil { - return nil, err - } - return result, nil - }) - - _, err = dr.PageCache.BulkGet(context.Background(), keys, bulkLoader) - if err == nil { - log.Debugf("Successfully loaded %d keys in RowLoc cache in %s", len(keys), (time.Now().Sub(L_Start).String())) - } - return err -} diff --git a/jsontable/cache/cache.go b/jsontable/cache/cache.go new file mode 100644 index 0000000..74fe879 --- /dev/null +++ b/jsontable/cache/cache.go @@ -0,0 +1,100 @@ +package cache + +import ( + "bytes" + "context" + "time" + + "github.com/bmeg/benchtop" + "github.com/bmeg/benchtop/pebblebulk" + "github.com/bmeg/grip/log" + "github.com/maypok86/otter/v2" +) + +type JSONCache struct { + pageCache *otter.Cache[string, *benchtop.RowLoc] + pageLoader otter.LoaderFunc[string, *benchtop.RowLoc] + bulkPageLoader otter.BulkLoaderFunc[string, *benchtop.RowLoc] + kv pebblebulk.KVStore +} + +// Get retrieves an item from the cache. If the item is not present, +// it is automatically loaded from the underlying KV store. +func (ca *JSONCache) Get(ctx context.Context, key string) (*benchtop.RowLoc, error) { + return ca.pageCache.Get(ctx, key, ca.pageLoader) +} + +// Set adds or updates an item in the cache. +func (ca *JSONCache) Set(key string, value *benchtop.RowLoc) (*benchtop.RowLoc, bool) { + return ca.pageCache.Set(key, value) +} + +// Delete removes an item from the cache. +func (ca *JSONCache) Invalidate(key string) (*benchtop.RowLoc, bool) { + return ca.pageCache.Invalidate(key) +} + +func NewJSONCache(kv pebblebulk.KVStore) *JSONCache { + cache := &JSONCache{ + kv: kv, + pageCache: otter.Must(&otter.Options[string, *benchtop.RowLoc]{ + MaximumSize: 10_000_000, + }), + } + cache.pageLoader = otter.LoaderFunc[string, *benchtop.RowLoc](func(ctx context.Context, key string) (*benchtop.RowLoc, error) { + log.Debugln("Cache miss, loading from kv: ", key) + val, closer, err := kv.Get([]byte(key)) + if err != nil { + if err.Error() != "pebble: not found" { // Handle Pebble-specific error generically + log.Errorf("Err on kv.Get for key %s in CacheLoader: %v", key, err) + } + return &benchtop.RowLoc{}, err + } + closer.Close() + return benchtop.DecodeRowLoc(val), nil + }) + + cache.bulkPageLoader = otter.BulkLoaderFunc[string, *benchtop.RowLoc](func(ctx context.Context, keys []string) (map[string]*benchtop.RowLoc, error) { + prefix := []byte{benchtop.PosPrefix} + result := make(map[string]*benchtop.RowLoc, len(keys)) + err := kv.View(func(it *pebblebulk.PebbleIterator) error { + for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { + _, id := benchtop.ParsePosKey(it.Key()) + val, err := it.Value() + if err != nil { + log.Errorf("Err on it.Value() in bulkLoader: %v", err) + continue + } + loc := benchtop.DecodeRowLoc(val) + result[string(id)] = loc + } + return nil + }) + if err != nil { + return nil, err + } + return result, nil + }) + return cache +} + +func (ca *JSONCache) PreloadCache() error { + var keys []string + prefix := []byte{benchtop.PosPrefix} + L_Start := time.Now() + err := ca.kv.View(func(it *pebblebulk.PebbleIterator) error { + for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { + _, id := benchtop.ParsePosKey(it.Key()) + keys = append(keys, string(id)) + } + return nil + }) + if err != nil { + return err + } + _, err = ca.pageCache.BulkGet(context.Background(), keys, ca.bulkPageLoader) + if err == nil { + log.Debugf("Successfully loaded %d keys in RowLoc cache in %s", len(keys), time.Since(L_Start).String()) + } + return err +} diff --git a/jsontable/driver.go b/jsontable/driver.go index 7e09837..dad9dae 100644 --- a/jsontable/driver.go +++ b/jsontable/driver.go @@ -2,163 +2,129 @@ package jsontable import ( "bytes" - "context" - "encoding/binary" "fmt" - "io" "os" "path/filepath" "sync" "time" "github.com/bmeg/benchtop" + "github.com/bmeg/benchtop/jsontable/cache" + "github.com/bmeg/benchtop/jsontable/section" + jTable "github.com/bmeg/benchtop/jsontable/table" "github.com/bmeg/benchtop/pebblebulk" "github.com/bmeg/benchtop/util" "github.com/bmeg/grip/log" "github.com/bytedance/sonic" - "github.com/cockroachdb/pebble" - multierror "github.com/hashicorp/go-multierror" - "github.com/maypok86/otter/v2" ) -const BATCH_SIZE = 1000 -const ROW_HSIZE = 12 -const ROW_OFFSET_HSIZE = 8 +const ( + BATCH_SIZE int = 1000 +) type JSONDriver struct { base string Lock sync.RWMutex PebbleLock sync.RWMutex - db *pebble.DB - Pb *pebblebulk.PebbleKV - - PageCache *otter.Cache[string, benchtop.RowLoc] - PageLoader otter.LoaderFunc[string, benchtop.RowLoc] + Pkv *pebblebulk.PebbleKV + LocCache *cache.JSONCache - Tables map[string]*JSONTable + Tables map[string]*jTable.JSONTable LabelLookup map[uint16]string - // Fields is defined like label, field - Fields map[string]map[string]struct{} } func NewJSONDriver(path string) (benchtop.TableDriver, error) { - db, err := pebble.Open(path, &pebble.Options{}) + Pkv, err := pebblebulk.NewPebbleKV(path) if err != nil { return nil, err } tableDir := filepath.Join(path, "TABLES") - if util.FileExists(tableDir) { - os.Mkdir(tableDir, 0700) + exist, err := util.DirExists(tableDir) + if err != nil { + return nil, err + } + if !exist { + if err := os.Mkdir(tableDir, 0700); err != nil { + Pkv.Db.Close() + return nil, fmt.Errorf("failed to create TABLES directory: %v", err) + } } driver := &JSONDriver{ base: path, - db: db, - Tables: map[string]*JSONTable{}, - Pb: &pebblebulk.PebbleKV{ - Db: db, + Tables: map[string]*jTable.JSONTable{}, + Pkv: &pebblebulk.PebbleKV{ + Db: Pkv.Db, InsertCount: 0, CompactLimit: uint32(1000), }, - PageCache: otter.Must(&otter.Options[string, benchtop.RowLoc]{ - MaximumSize: 10_000_000, - }), - Fields: map[string]map[string]struct{}{}, + LocCache: cache.NewJSONCache(Pkv), Lock: sync.RWMutex{}, PebbleLock: sync.RWMutex{}, LabelLookup: map[uint16]string{}, } - driver.PageLoader = otter.LoaderFunc[string, benchtop.RowLoc](func(ctx context.Context, key string) (benchtop.RowLoc, error) { - log.Debugln("Cache miss, loading from pebble: ", key) - val, closer, err := driver.Pb.Db.Get([]byte(key)) - if err != nil { - if err != pebble.ErrNotFound { - log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", key, err) - } - return benchtop.RowLoc{}, err - } - offset, size := benchtop.ParsePosValue(val) - closer.Close() - return benchtop.RowLoc{Offset: offset, Size: size}, nil - }) return driver, nil } +// Update LoadJSONDriver to use DirExists func LoadJSONDriver(path string) (benchtop.TableDriver, error) { - db, err := pebble.Open(path, &pebble.Options{}) + pKv, err := pebblebulk.NewPebbleKV(path) if err != nil { return nil, fmt.Errorf("failed to open database: %v", err) } tableDir := filepath.Join(path, "TABLES") - if !util.FileExists(tableDir) { + exist, err := util.DirExists(tableDir) + if err != nil { + pKv.Close() + return nil, err + } + if !exist { + pKv.Close() return nil, fmt.Errorf("TABLES directory not found at %s", tableDir) } driver := &JSONDriver{ base: path, - db: db, - Tables: map[string]*JSONTable{}, - Pb: &pebblebulk.PebbleKV{ - Db: db, + Tables: map[string]*jTable.JSONTable{}, + Pkv: &pebblebulk.PebbleKV{ + Db: pKv.Db, InsertCount: 0, CompactLimit: uint32(1000), }, - Fields: map[string]map[string]struct{}{}, - Lock: sync.RWMutex{}, - PebbleLock: sync.RWMutex{}, - PageCache: otter.Must(&otter.Options[string, benchtop.RowLoc]{ - MaximumSize: 10000000, - }), + LocCache: cache.NewJSONCache(pKv), + Lock: sync.RWMutex{}, + PebbleLock: sync.RWMutex{}, LabelLookup: map[uint16]string{}, } - err = driver.LoadFields() - if err != nil { - return nil, err - } - for _, tableName := range driver.List() { table, err := driver.Get(tableName) if err != nil { driver.Close() return nil, fmt.Errorf("failed to load table %s: %v", tableName, err) } - jsonTable, ok := table.(*JSONTable) + jsonTable, ok := table.(*jTable.JSONTable) if !ok { driver.Close() - log.Errorf("invalid table type for %s", tableName) return nil, fmt.Errorf("invalid table type for %s", tableName) } - // Pb is already set in Get, but ensure consistency if needed - jsonTable.Pb = &pebblebulk.PebbleKV{ - Db: db, - InsertCount: 0, - CompactLimit: uint32(1000), - } + driver.Lock.Lock() driver.LabelLookup[jsonTable.TableId] = tableName[2:] driver.Tables[tableName] = jsonTable driver.Lock.Unlock() } - driver.PageLoader = otter.LoaderFunc[string, benchtop.RowLoc](func(ctx context.Context, key string) (benchtop.RowLoc, error) { - log.Debugln("Cache miss, loading from pebble: ", key) - val, closer, err := driver.Pb.Db.Get([]byte(key)) - if err != nil { - if err != pebble.ErrNotFound { - log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", key, err) - } - return benchtop.RowLoc{}, err - } - offset, size := benchtop.ParsePosValue(val) - defer closer.Close() - return benchtop.RowLoc{Offset: offset, Size: size}, nil - }) + err = driver.LoadFields() + if err != nil { + pKv.Close() + return nil, err + } driver.Lock.RLock() - err = driver.PreloadCache() + err = driver.LocCache.PreloadCache() driver.Lock.RUnlock() if err != nil { return nil, err @@ -177,41 +143,29 @@ func (dr *JSONDriver) New(name string, columns []benchtop.ColumnDef) (benchtop.T dr.Lock.Lock() defer dr.Lock.Unlock() - - if p, ok := dr.Tables[name]; ok { - return p, nil - } - newId := dr.getMaxTablePrefix() formattedName := util.PadToSixDigits(int(newId)) tPath := filepath.Join(dr.base, "TABLES", formattedName) - f, err := os.Create(tPath) - if err != nil { - return nil, fmt.Errorf("failed to create table %s: %v", tPath, err) - } - out := &JSONTable{ - columns: columns, - handleLock: sync.RWMutex{}, - columnMap: map[string]int{}, - Path: tPath, - Name: name, - FileName: formattedName, - handle: f, - db: dr.db, - Pb: &pebblebulk.PebbleKV{ - Db: dr.db, - InsertCount: 0, - CompactLimit: uint32(1000), - }, - TableId: newId, + out := &jTable.JSONTable{ + Columns: columns, + ColumnMap: map[string]int{}, + Path: tPath, + Name: name, + FileName: tPath, // Base name for partition/section files + TableId: newId, + Fields: map[string]struct{}{}, + ActiveSections: map[uint8]*section.Section{}, + FlushCounter: map[uint8]int{}, + SectionLock: sync.Mutex{}, + MaxConcurrentSections: 10, + PartitionMap: map[uint8][]uint16{}, + Sections: map[uint16]*section.Section{}, } for n, d := range columns { - out.columnMap[d.Key] = n + out.ColumnMap[d.Key] = n } - dr.LabelLookup[newId] = name[2:] - // Create TableInfo for serialization tinfo := &benchtop.TableInfo{ Columns: columns, @@ -223,44 +177,59 @@ func (dr *JSONDriver) New(name string, columns []benchtop.ColumnDef) (benchtop.T outData, err := sonic.ConfigFastest.Marshal(tinfo) if err != nil { - f.Close() return nil, fmt.Errorf("failed to marshal table info: %v", err) } if err := dr.addTable(tinfo.Name, outData); err != nil { - f.Close() log.Errorf("Error adding table: %s", err) return nil, err } - buffer := make([]byte, 12) - binary.LittleEndian.PutUint64(buffer[:8], uint64(0)+uint64(len(outData))+12) - binary.LittleEndian.PutUint32(buffer[8:12], uint32(len(outData))) - - if _, err := out.handle.Write(buffer); err != nil { - f.Close() - return nil, fmt.Errorf("failed to write table header: %v", err) - } - if _, err := out.handle.Write(outData); err != nil { - f.Close() - return nil, fmt.Errorf("failed to write table data: %v", err) - } - if err := out.Init(10); err != nil { - f.Close() - log.Errorln("TABLE POOL ERR: %v", err) + log.Errorf("TABLE INIT ERR: %v", err) return nil, fmt.Errorf("failed to init table %s: %v", name, err) } dr.Tables[name] = out - log.Debugf("Created table %s with FilePool: %v", name, out.FilePool) + dr.LabelLookup[newId] = name[2:] + + log.Debugf("Created table %s", name) + return out, nil +} + +func (dr *JSONDriver) SetIndices(inputs chan benchtop.Index) { + dr.Pkv.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { + for index := range inputs { + dr.AddTableEntryInfo( + tx, + index.Key, + &index.Loc, + ) + } + return nil + }) +} + +func (dr *JSONDriver) ListTableKeys(tableId uint16) (chan benchtop.Index, error) { + out := make(chan benchtop.Index, 10) + go func() { + defer close(out) + prefix := benchtop.NewPosKeyPrefix(tableId) + dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { + for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { + _, value := benchtop.ParsePosKey(it.Key()) + out <- benchtop.Index{Key: value} + } + return nil + }) + }() return out, nil } func (dr *JSONDriver) List() []string { out := []string{} prefix := []byte{benchtop.TablePrefix} - dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { value := benchtop.ParseTableKey(it.Key()) out = append(out, string(value)) @@ -276,33 +245,19 @@ func (dr *JSONDriver) Close() { log.Infoln("Closing JSONDriver...") for tableName, table := range dr.Tables { - table.handleLock.Lock() - if table.handle != nil { - if syncErr := table.handle.Sync(); syncErr != nil { - log.Errorf("Error syncing table %s handle: %v", tableName, syncErr) - } - if closeErr := table.handle.Close(); closeErr != nil { - log.Errorf("Error closing table %s handle: %v", tableName, closeErr) - } else { - log.Debugf("Closed table %s", tableName) - } - table.handle = nil - } - table.handleLock.Unlock() - table.Pb = nil + table.Close() // Closes all section handles and file pools + log.Debugf("Closed table %s", tableName) } - dr.Tables = make(map[string]*JSONTable) - if dr.db != nil { - if closeErr := dr.db.Close(); closeErr != nil { + dr.Tables = make(map[string]*jTable.JSONTable) + if dr.Pkv.Db != nil { + if closeErr := dr.Pkv.Db.Close(); closeErr != nil { log.Errorf("Error closing Pebble database: %v", closeErr) } - dr.db = nil + dr.Pkv.Db = nil time.Sleep(50 * time.Millisecond) } - dr.Pb = nil - dr.Fields = make(map[string]map[string]struct{}) + dr.Pkv = nil log.Infof("Successfully closed JSONDriver for path %s", dr.base) - return } func (dr *JSONDriver) Get(name string) (benchtop.TableStore, error) { @@ -321,53 +276,45 @@ func (dr *JSONDriver) Get(name string) (benchtop.TableStore, error) { } nkey := benchtop.NewTableKey([]byte(name)) - value, closer, err := dr.db.Get(nkey) + value, closer, err := dr.Pkv.Db.Get(nkey) if err != nil { log.Errorln("JSONDriver Get: ", err) return nil, err } defer closer.Close() tinfo := benchtop.TableInfo{} - sonic.ConfigFastest.Unmarshal(value, &tinfo) + if err := sonic.ConfigFastest.Unmarshal(value, &tinfo); err != nil { + return nil, fmt.Errorf("failed to unmarshal table info: %v", err) + } log.Debugf("Opening Table: %#v\n", tinfo) tPath := filepath.Join(dr.base, "TABLES", string(tinfo.FileName)) - f, err := os.OpenFile(tPath, os.O_RDWR|os.O_CREATE, 0644) - if err != nil { - return nil, fmt.Errorf("failed to open table %s: %v", tPath, err) - } - - out := &JSONTable{ - columns: tinfo.Columns, - db: dr.db, - columnMap: map[string]int{}, - TableId: tinfo.TableId, - handle: f, - handleLock: sync.RWMutex{}, - Path: tPath, - FileName: tinfo.FileName, - Name: name, - Pb: &pebblebulk.PebbleKV{ - Db: dr.db, - InsertCount: 0, - CompactLimit: uint32(1000), - }, - } - for n, d := range out.columns { - out.columnMap[d.Key] = n + out := &jTable.JSONTable{ + Columns: tinfo.Columns, + ColumnMap: map[string]int{}, + TableId: tinfo.TableId, + Path: tPath, + FileName: tPath, + Name: name, + Fields: map[string]struct{}{}, + ActiveSections: map[uint8]*section.Section{}, + FlushCounter: map[uint8]int{}, + MaxConcurrentSections: 10, + Sections: map[uint16]*section.Section{}, + PartitionMap: map[uint8][]uint16{}, + SectionLock: sync.Mutex{}, + } + for n, d := range out.Columns { + out.ColumnMap[d.Key] = n } - if out.FilePool == nil { - if err := out.Init(10); err != nil { - f.Close() - return nil, fmt.Errorf("failed to init table %s: %v", name, err) - } + if err := out.Init(10); err != nil { + return nil, fmt.Errorf("failed to init table %s: %v", name, err) } dr.Tables[name] = out return out, nil } -// Currently not used func (dr *JSONDriver) Delete(name string) error { dr.Lock.Lock() defer dr.Lock.Unlock() @@ -377,260 +324,15 @@ func (dr *JSONDriver) Delete(name string) error { return fmt.Errorf("table %s does not exist", name) } - table.handleLock.Lock() - defer table.handleLock.Unlock() + table.Close() // Close all section files - if table.handle != nil { - if err := table.handle.Close(); err != nil { - log.Errorf("Error closing table %s handle: %v", name, err) + // Delete all section files for the table + for _, sec := range table.Sections { + if err := os.Remove(sec.Path); err != nil { + log.Errorf("Failed to delete section file %s: %v", sec.Path, err) } - table.handle = nil - } - - tPath := filepath.Join(dr.base, "TABLES", string(table.FileName)) - if err := os.Remove(tPath); err != nil { - return fmt.Errorf("failed to delete table file %s: %v", tPath, err) } delete(dr.Tables, name) dr.dropTable(name) return nil } - -// BulkLoad -// tx: set null to initialize pebble bulk write context -// BulkLoad -// tx: set null to initialize pebble bulk write context -func (dr *JSONDriver) BulkLoad(inputs chan *benchtop.Row, tx *pebblebulk.PebbleBulk) error { - - if dr.Pb == nil || dr.Pb.Db == nil { - return fmt.Errorf("pebble database instance is nil") - } - var wg sync.WaitGroup - tableChannels := make(map[string]chan *benchtop.Row) - - // New struct to hold the individual elements of a field key - type fieldKeyElements struct { - field string - tableName string - val any - rowId string - } - - metadataChan := make(chan struct { - table *JSONTable - fieldIndexKeyElements []fieldKeyElements // Changed to the new struct - metadata map[string]benchtop.RowLoc - err error - }, 100) - - startTableGoroutine := func(tableName string) { - snapshot := dr.Pb.Db.NewSnapshot() - - ch := make(chan *benchtop.Row, 100) - tableChannels[tableName] = ch - wg.Add(1) - go func() { - defer func() { - snapshot.Close() - wg.Done() - }() - var fieldIndexKeyElements []fieldKeyElements // Changed variable name - metadata := make(map[string]benchtop.RowLoc) - var localErr *multierror.Error - - dr.Lock.RLock() - table, exists := dr.Tables[tableName] - dr.Lock.RUnlock() - if !exists { - newTable, err := dr.New(tableName, nil) - if err != nil { - localErr = multierror.Append(localErr, fmt.Errorf("failed to create table %s: %v", tableName, err)) - metadataChan <- struct { - table *JSONTable - fieldIndexKeyElements []fieldKeyElements - metadata map[string]benchtop.RowLoc - err error - }{nil, nil, nil, localErr.ErrorOrNil()} - return - } - table = newTable.(*JSONTable) - dr.Lock.Lock() - dr.Tables[tableName] = table - dr.Lock.Unlock() - } - for { - batch := make([]*benchtop.Row, 0, BATCH_SIZE) - for range BATCH_SIZE { - row, ok := <-ch - if !ok { - break - } - batch = append(batch, row) - } - if len(batch) == 0 { - break - } - - bDatas := make([][]byte, 0, BATCH_SIZE) - ids := make([]string, 0, BATCH_SIZE) - for _, row := range batch { - _, fieldsExist := dr.Fields[tableName] - if fieldsExist { - for field := range dr.Fields[tableName] { - if val := PathLookup(row.Data, field); val != nil { - // Append the individual key elements to the new slice - fieldIndexKeyElements = append(fieldIndexKeyElements, fieldKeyElements{ - field: field, - tableName: tableName, - val: val, - rowId: string(row.Id), - }) - } - } - } - - bData, err := sonic.ConfigFastest.Marshal( - table.packData(row.Data, string(row.Id)), - ) - if err != nil { - localErr = multierror.Append(localErr, fmt.Errorf("marshal data error for table %s: %v", tableName, err)) - continue - } - - info, err := table.getTableEntryInfo(snapshot, row.Id) - if err != nil { - localErr = multierror.Append(localErr, fmt.Errorf("error getting entry info for %s: %v", row.Id, err)) - continue - } - - if info == nil { - bDatas = append(bDatas, bData) - ids = append(ids, string(row.Id)) - } - } - if len(bDatas) == 0 { - continue - } - - table.handleLock.Lock() - startOffset, err := table.handle.Seek(0, io.SeekEnd) - if err != nil { - localErr = multierror.Append(localErr, fmt.Errorf("seek error for table %s: %v", tableName, err)) - table.handleLock.Unlock() - continue - } - - offsets := make([]uint64, len(bDatas)+1) - offsets[0] = uint64(startOffset) - totalLen := 0 - for i, bData := range bDatas { - offsets[i+1] = offsets[i] + ROW_HSIZE + uint64(len(bData)) - totalLen += ROW_HSIZE + len(bData) - } - - batchData := make([]byte, totalLen) - pos := 0 - for i, bData := range bDatas { - binary.LittleEndian.PutUint64(batchData[pos:pos+ROW_OFFSET_HSIZE], offsets[i+1]) - binary.LittleEndian.PutUint32(batchData[pos+ROW_OFFSET_HSIZE:pos+ROW_HSIZE], uint32(len(bData))) - pos += ROW_HSIZE + len(bData) - copy(batchData[pos-len(bData):pos], bData) - } - - _, err = table.handle.Write(batchData) - if err != nil { - localErr = multierror.Append(localErr, fmt.Errorf("write error for table %s: %v", tableName, err)) - table.handleLock.Unlock() - continue - } - table.handleLock.Unlock() - - // Record metadata for each record in the batch - for i, id := range ids { - metadata[id] = benchtop.RowLoc{Offset: offsets[i], Size: uint64(len(bDatas[i])), Label: table.TableId} - } - } - - metadataChan <- struct { - table *JSONTable - fieldIndexKeyElements []fieldKeyElements - metadata map[string]benchtop.RowLoc - err error - }{table, fieldIndexKeyElements, metadata, localErr.ErrorOrNil()} - }() - } - - for row := range inputs { - tableName := row.TableName - if _, exists := tableChannels[tableName]; !exists { - startTableGoroutine(tableName) - } - tableChannels[tableName] <- row - } - - for _, ch := range tableChannels { - close(ch) - } - - var errs *multierror.Error - done := make(chan struct{}) - go func() { - defer close(done) - - writeFunc := func(tx *pebblebulk.PebbleBulk) error { - for meta := range metadataChan { - if meta.err != nil { - errs = multierror.Append(errs, meta.err) - continue - } - if meta.table == nil { - continue - } - - for _, keyElements := range meta.fieldIndexKeyElements { - forwardKey := benchtop.FieldKey(keyElements.field, keyElements.tableName, keyElements.val, []byte(keyElements.rowId)) - err := tx.Set(forwardKey, []byte{}, nil) - if err != nil { - errs = multierror.Append(errs, err) - } - - BVal, err := sonic.ConfigFastest.Marshal(keyElements.val) - if err != nil { - errs = multierror.Append(errs, err) - } - err = tx.Set(benchtop.RFieldKey( - keyElements.tableName, keyElements.field, keyElements.rowId, - ), - BVal, nil) - if err != nil { - errs = multierror.Append(errs, err) - } - } - - for id, m := range meta.metadata { - dr.PageCache.Set(id, m) - meta.table.AddTableEntryInfo(tx, []byte(id), m) - } - } - return nil - } - - var err error - if tx == nil { - errs = multierror.Append(errs, fmt.Errorf("pebble bulk instance passed into BulkLoad function is nil")) - } else { - dr.PebbleLock.Lock() - err = writeFunc(tx) - dr.PebbleLock.Unlock() - } - if err != nil { - errs = multierror.Append(errs, err) - } - }() - - wg.Wait() - close(metadataChan) - <-done - - return errs.ErrorOrNil() -} diff --git a/jsontable/fields.go b/jsontable/fields.go index d9376b8..f71c7ab 100644 --- a/jsontable/fields.go +++ b/jsontable/fields.go @@ -9,19 +9,26 @@ import ( "github.com/bytedance/sonic" "github.com/bmeg/benchtop/filters" + "github.com/bmeg/benchtop/jsontable/tpath" + "github.com/bmeg/benchtop/pebblebulk" "github.com/bmeg/grip/gripql" ) func (dr *JSONDriver) AddField(label, field string) error { - dr.Lock.Lock() - defer dr.Lock.Unlock() - foundTable, ok := dr.Tables[label] if !ok { + _, err := dr.New(label, nil) + if err != nil { + return err + } + + dr.Lock.Lock() + defer dr.Lock.Unlock() + log.Debugf("Creating index '%s' for table '%s' that has not been written yet", field, label) // If the table doesn't yet exist, write the index Key stub. - err := dr.db.Set( + err = dr.Pkv.Set( benchtop.FieldKey(field, label, nil, nil), []byte{}, nil, @@ -30,7 +37,7 @@ func (dr *JSONDriver) AddField(label, field string) error { log.Errorf("Err attempting to add field %v", err) return err } - err = dr.db.Set( + err = dr.Pkv.Set( bytes.Join([][]byte{ benchtop.RFieldPrefix, []byte(label), @@ -45,12 +52,15 @@ func (dr *JSONDriver) AddField(label, field string) error { } } else { + dr.Lock.Lock() + defer dr.Lock.Unlock() + log.Debugf("Found table %s writing indices for field %s", label, field) - err := dr.Pb.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { + err := dr.Pkv.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { var filter benchtop.RowFilter = nil - for r := range foundTable.Scan(true, filter) { - fieldValue := PathLookup(r.(map[string]any), field) - rowId, ok := r.(map[string]any)["_id"].(string) + for r := range foundTable.ScanDoc(filter) { + fieldValue := tpath.PathLookup(r, field) + rowId, ok := r["_id"].(string) if !ok { return fmt.Errorf("_id field not found or is not string in map %s", r) } @@ -85,16 +95,14 @@ func (dr *JSONDriver) AddField(label, field string) error { } } - innerMap, existsLabel := dr.Fields[label] - if !existsLabel { - innerMap = make(map[string]struct{}) - dr.Fields[label] = innerMap + if dr.Tables[label].Fields == nil { + dr.Tables[label].Fields = map[string]struct{}{} } - if _, existsField := innerMap[field]; existsField { + if _, existsField := dr.Tables[label].Fields[field]; existsField { return fmt.Errorf("index label '%s' field '%s' already exists", label, field) } - innerMap[field] = struct{}{} - log.Debugln("List Fields: ", dr.Fields) + dr.Tables[label].Fields[field] = struct{}{} + log.Debugln("List Fields: ", dr.Tables[label].Fields) return nil } @@ -103,13 +111,9 @@ func (dr *JSONDriver) RemoveField(label string, field string) error { dr.Lock.Lock() defer dr.Lock.Unlock() - if fieldsForLabel, ok := dr.Fields[label]; ok { - delete(fieldsForLabel, field) - if len(fieldsForLabel) == 0 { - delete(dr.Fields, label) - } + if table, ok := dr.Tables[label]; ok { + delete(table.Fields, field) } - FieldPrefix := benchtop.FieldLabelKey(field, label) RFieldKeyPrefix := bytes.Join([][]byte{ benchtop.RFieldPrefix, @@ -118,7 +122,7 @@ func (dr *JSONDriver) RemoveField(label string, field string) error { }, benchtop.FieldSep) // Perform deletion in a bulk write transaction - err := dr.Pb.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { + err := dr.Pkv.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { if err := tx.DeletePrefix(FieldPrefix); err != nil { return fmt.Errorf("delete field prefix failed: %w", err) } @@ -138,21 +142,26 @@ func (dr *JSONDriver) LoadFields() error { * Not sure wether to use a cache here as well or keep it how it is. */ fPrefix := benchtop.FieldPrefix - dr.Lock.Lock() - defer dr.Lock.Unlock() count := 0 - err := dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + err := dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(fPrefix); it.Valid() && bytes.HasPrefix(it.Key(), fPrefix); it.Next() { field, label, _, _ := benchtop.FieldKeyParse(it.Key()) - if _, exists := dr.Fields[label]; !exists { - dr.Fields[label] = make(map[string]struct{}) + if _, exists := dr.Tables[label]; !exists { + _, err := dr.New(label, nil) + if err != nil { + return err + } + + } + if dr.Tables[label].Fields == nil { + dr.Tables[label].Fields = make(map[string]struct{}) } - if _, exists := dr.Fields[label][field]; !exists { - dr.Fields[label][field] = struct{}{} + if _, exists := dr.Tables[label].Fields[field]; !exists { + dr.Tables[label].Fields[field] = struct{}{} count++ } } - log.Debugf("Loaded %d indices", len(dr.Fields)) + log.Debugf("Loaded %d indices", count) return nil }) if err != nil { @@ -175,12 +184,15 @@ func (dr *JSONDriver) ListFields() []FieldInfo { defer dr.Lock.RUnlock() var out []FieldInfo - for label, fieldsMap := range dr.Fields { - for fieldName := range fieldsMap { - if label[:2] == "v_" { - out = append(out, FieldInfo{Label: label[2:], Field: fieldName}) - } else { - out = append(out, FieldInfo{Label: label, Field: fieldName}) + for _, table := range dr.Tables { + if table.Fields != nil { + for fieldName, _ := range table.Fields { + if table.Name[:2] == "v_" { + out = append(out, FieldInfo{Label: table.Name[2:], Field: fieldName}) + } else { + out = append(out, FieldInfo{Label: table.Name, Field: fieldName}) + } + } } } @@ -195,17 +207,19 @@ func (dr *JSONDriver) DeleteRowField(label, field, rowID string) error { // Check if the table exists _, ok := dr.Tables[label] if !ok { - log.Errorf("Table '%s' does not exist", label) - return fmt.Errorf("table '%s' does not exist", label) + _, err := dr.New(label, nil) + if err != nil { + return err + } + } - // Check if the field exists - innerMap, existsLabel := dr.Fields[label] - if !existsLabel || innerMap == nil { + if len(dr.Tables[label].Fields) <= 0 { log.Errorf("No fields defined for table '%s'", label) return fmt.Errorf("no fields defined for table '%s'", label) } - if _, existsField := innerMap[field]; !existsField { + + if _, existsField := dr.Tables[label].Fields[field]; !existsField { log.Errorf("Field '%s' does not exist in table '%s'", field, label) return fmt.Errorf("field '%s' does not exist in table '%s'", field, label) } @@ -213,7 +227,7 @@ func (dr *JSONDriver) DeleteRowField(label, field, rowID string) error { // Get the field value from the reverse index rowIndexKey := benchtop.RFieldKey(label, field, rowID) var fieldValueBytes []byte - err := dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + err := dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { var err error if it.Seek(rowIndexKey); it.Valid() && bytes.Equal(it.Key(), rowIndexKey) { fieldValueBytes, err = it.Value() @@ -239,10 +253,9 @@ func (dr *JSONDriver) DeleteRowField(label, field, rowID string) error { log.Errorf("Error deserializing field value for row '%s' in table '%s' for field '%s': %v", rowID, label, field, err) return err } - fmt.Println("FIELD VALUE ANY: ", fieldValue) // Delete both the forward and reverse index entries - err = dr.Pb.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { + err = dr.Pkv.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { if err := tx.Delete(benchtop.FieldKey(field, label, fieldValue, []byte(rowID)), nil); err != nil { return err } @@ -271,7 +284,7 @@ func (dr *JSONDriver) RowIdsByHas(fltField string, fltValue any, fltOp gripql.Co out := make(chan string, 100) go func() { defer close(out) - err := dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + err := dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { _, _, value, rowID := benchtop.FieldKeyParse(it.Key()) if filters.ApplyFilterCondition( @@ -301,7 +314,7 @@ func (dr *JSONDriver) RowIdsByLabelFieldValue(fltLabel string, fltField string, out := make(chan string, 100) go func() { defer close(out) - err := dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + err := dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { _, _, value, rowID := benchtop.FieldKeyParse(it.Key()) if filters.ApplyFilterCondition( @@ -318,7 +331,6 @@ func (dr *JSONDriver) RowIdsByLabelFieldValue(fltLabel string, fltField string, if err != nil { log.Errorf("Error in View for field %s: %s", fltField, err) } - return }() return out } @@ -338,8 +350,8 @@ func (dr *JSONDriver) GetIDsForLabel(label string) chan string { } var filter benchtop.RowFilter = nil - for id := range table.Scan(false, filter) { - out <- id.(string) + for id := range table.ScanId(filter) { + out <- id } }() return out diff --git a/jsontable/driverhelpers.go b/jsontable/helpers.go similarity index 54% rename from jsontable/driverhelpers.go rename to jsontable/helpers.go index c487c5d..286c9b8 100644 --- a/jsontable/driverhelpers.go +++ b/jsontable/helpers.go @@ -7,6 +7,7 @@ import ( "github.com/bmeg/benchtop/pebblebulk" "github.com/bmeg/grip/log" "github.com/bytedance/sonic" + "github.com/cockroachdb/pebble" ) // Specify a table type prefix to differentiate between edge tables and vertex tables @@ -15,7 +16,7 @@ func (dr *JSONDriver) getMaxTablePrefix() uint16 { prefix := []byte{benchtop.TablePrefix} maxID := uint16(0) - dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { // fishing for edge cases if maxID == ^uint16(0) { @@ -31,17 +32,17 @@ func (dr *JSONDriver) getMaxTablePrefix() uint16 { func (dr *JSONDriver) addTable(Name string, TinfoMarshal []byte) error { nkey := benchtop.NewTableKey([]byte(Name)) - return dr.db.Set(nkey, TinfoMarshal, nil) + return dr.Pkv.Set(nkey, TinfoMarshal, nil) } func (dr *JSONDriver) dropTable(name string) error { nkey := benchtop.NewTableKey([]byte(name)) - return dr.db.Delete(nkey, nil) + return dr.Pkv.Delete(nkey, nil) } func (dr *JSONDriver) getTableInfo(name string) (benchtop.TableInfo, error) { - value, closer, err := dr.db.Get([]byte(name)) + value, closer, err := dr.Pkv.Get([]byte(name)) if err != nil { return benchtop.TableInfo{}, err } @@ -50,3 +51,32 @@ func (dr *JSONDriver) getTableInfo(name string) (benchtop.TableInfo, error) { closer.Close() return tinfo, nil } + +func (dr *JSONDriver) AddTableEntryInfo(tx *pebblebulk.PebbleBulk, rowId []byte, rowLoc *benchtop.RowLoc) error { + value := benchtop.EncodeRowLoc(rowLoc) + posKey := benchtop.NewPosKey(rowLoc.TableId, rowId) + if tx != nil { + err := tx.Set(posKey, value, nil) + if err != nil { + return err + } + } else { + err := dr.Pkv.Set(posKey, value, nil) + if err != nil { + return err + } + } + return nil +} + +func (dr *JSONDriver) GetLocFromTableKey(id []byte) (loc *benchtop.RowLoc, err error) { + val, closer, err := dr.Pkv.Get(benchtop.NewPosKey(loc.TableId, id)) + if err != nil { + if err != pebble.ErrNotFound { + log.Errorln("GetLocFromTableKey Err: ", err) + } + return nil, err + } + defer closer.Close() + return benchtop.DecodeRowLoc(val), nil +} diff --git a/jsontable/index.go b/jsontable/labels.go similarity index 91% rename from jsontable/index.go rename to jsontable/labels.go index e6bb614..bc68b54 100644 --- a/jsontable/index.go +++ b/jsontable/labels.go @@ -18,7 +18,7 @@ func (dr *JSONDriver) GetAllColNames() chan string { go func() { defer close(out) prefix := []byte{benchtop.TablePrefix} - dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { info, err := dr.getTableInfo(string(it.Key())) if err != nil { @@ -42,7 +42,7 @@ func (dr *JSONDriver) GetLabels(edges bool, removePrefix bool) chan string { go func() { defer close(out) prefix := []byte{benchtop.TablePrefix} - dr.Pb.View(func(it *pebblebulk.PebbleIterator) error { + dr.Pkv.View(func(it *pebblebulk.PebbleIterator) error { for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { strKey := string(benchtop.ParseTableKey(it.Key())) if (edges && strKey[:2] == "e_") || (!edges && strKey[:2] == "v_") { diff --git a/jsontable/section/section.go b/jsontable/section/section.go new file mode 100644 index 0000000..f8ee286 --- /dev/null +++ b/jsontable/section/section.go @@ -0,0 +1,159 @@ +package section + +import ( + "encoding/binary" + "fmt" + "os" + "sync" + + "github.com/DataDog/zstd" + "github.com/bmeg/benchtop" + "github.com/edsrzf/mmap-go" +) + +const ( + INITIAL_SECTION_SIZE = 1 << 20 // 1 MB + GROWTH_INCREMENT_SIZE = 1 << 24 // 16 MB + MAX_SECTION_SIZE = 65 * 1024 * 1024 // 65MB +) + +// Section represents a physical file within a partition +type Section struct { + ID uint16 // Global unique section ID (for RowLoc) + PartitionID uint8 // Partition this section belongs to + Path string // File path (e.g., table.data.partition0.section1) + File *os.File + FilePool chan *os.File // Pool for read/write access + Lock sync.RWMutex // Per-section lock + TotalRows uint32 // Total rows (live + deleted) + DeletedRows uint32 // Deleted rows (for compaction trigger) + LiveBytes uint32 // Live data size (bytes) + Active bool // True unless compacted/merged + + MMap mmap.MMap // cached read-only mmap + MMapMode int // mmap.RDWR or mmap.RDONLY + CompressScratch []byte +} + +func (s *Section) WriteJsonEntryToSection(payload []byte) (*benchtop.RowLoc, error) { + s.Lock.Lock() + defer s.Lock.Unlock() + + cPayload, err := zstd.Compress(s.CompressScratch[:0], payload) + if err != nil { + return nil, fmt.Errorf("compress failed: %w", err) + } + + compressedLen := uint32(len(cPayload)) + writeEnd := s.LiveBytes + benchtop.ROW_HSIZE + compressedLen + + // Check if write is outside the CURRENT mapped region + if writeEnd > uint32(len(s.MMap)) { + currentSize := int64(len(s.MMap)) + requiredSize := int64(writeEnd) + + newSize := requiredSize + if newSize%GROWTH_INCREMENT_SIZE != 0 { + newSize = (requiredSize/GROWTH_INCREMENT_SIZE + 1) * GROWTH_INCREMENT_SIZE + } + if newSize > MAX_SECTION_SIZE { + newSize = MAX_SECTION_SIZE + } + if newSize > currentSize { + if err := s.GrowAndRemap(newSize); err != nil { + return nil, fmt.Errorf("failed to grow section to %d: %w", newSize, err) + } + } else { + // This should not happen if logic is correct, but safe to check + return nil, fmt.Errorf("mmap too small even after considering max size: %d < %d", len(s.MMap), writeEnd) + } + } + + oldLiveBytes := s.LiveBytes + nextOffset := s.LiveBytes + benchtop.ROW_HSIZE + compressedLen + + headerTarget := s.MMap[oldLiveBytes : oldLiveBytes+benchtop.ROW_HSIZE] + binary.LittleEndian.PutUint32(headerTarget[:4], nextOffset) // next row offset + binary.LittleEndian.PutUint32(headerTarget[4:], compressedLen) // compressed size + copy(s.MMap[oldLiveBytes+benchtop.ROW_HSIZE:], cPayload) + s.LiveBytes = nextOffset + // Save the buffer for next time. If the buffer allocated to be larger, use the larger one. + s.CompressScratch = cPayload + return &benchtop.RowLoc{ + Section: s.ID, + Offset: oldLiveBytes, + Size: compressedLen, + }, nil +} + +func (s *Section) CloseSection() error { + if !s.Active { + return nil + } + s.Lock.Lock() + if err := s.MMap.Flush(); err != nil { + return err + } + if s.File != nil { + if err := s.File.Sync(); err != nil { + return err + } + } + s.Lock.Unlock() + s.Active = false + return nil +} + +// RemapReadOnly converts RDWR → RDONLY mmap +func (s *Section) RemapReadOnly() error { + if s.MMap != nil { + s.MMap.Unmap() + } + + roFile, err := os.Open(s.Path) + if err != nil { + return err + } + defer roFile.Close() + + m, err := mmap.Map(roFile, mmap.RDONLY, 0) + if err != nil { + return err + } + + s.MMap = m + s.MMapMode = mmap.RDONLY + return nil +} + +func (s *Section) GrowAndRemap(newSize int64) error { + // 1. Unmap the old region + if s.MMap != nil { + // Crucial: ensure any pending data is flushed before unmap + if err := s.MMap.Flush(); err != nil { + return fmt.Errorf("flush before unmap failed: %w", err) + } + if err := s.File.Sync(); err != nil { + return fmt.Errorf("sync failed: %w", err) + } + if err := s.MMap.Unmap(); err != nil { + return fmt.Errorf("unmap failed: %w", err) + } + s.MMap = nil // Clear the old map object + } + + // 2. Truncate the file to the new, larger size + // Note: Use s.File (the main handle) for Truncate + if err := s.File.Truncate(newSize); err != nil { + return fmt.Errorf("truncate to size %d failed: %w", newSize, err) + } + + // 3. Map the new, larger region + newMMap, err := mmap.Map(s.File, s.MMapMode, 0) + if err != nil { + return fmt.Errorf("mmap failed after resize: %w", err) + } + + s.MMap = newMMap + return nil +} diff --git a/jsontable/table.go b/jsontable/table.go deleted file mode 100644 index ddc8ed0..0000000 --- a/jsontable/table.go +++ /dev/null @@ -1,586 +0,0 @@ -package jsontable - -import ( - "bufio" - "bytes" - "encoding/binary" - "fmt" - "io" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "sync" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/pebblebulk" - "github.com/bmeg/grip/log" - "github.com/edsrzf/mmap-go" - multierror "github.com/hashicorp/go-multierror" - - "github.com/bytedance/sonic" - "github.com/cockroachdb/pebble" -) - -type JSONTable struct { - Pb *pebblebulk.PebbleKV - db *pebble.DB - columns []benchtop.ColumnDef - columnMap map[string]int - - FilePool chan *os.File - handle *os.File - handleLock sync.RWMutex - TableId uint16 - - Path string - Name string - FileName string -} - -func (b *JSONTable) Init(poolSize int) error { - b.FilePool = make(chan *os.File, poolSize) - for i := range poolSize { - file, err := os.OpenFile(b.Path, os.O_RDWR, 0666) - if err != nil { - for range i { - if file, ok := <-b.FilePool; ok { - file.Close() - } - } - return fmt.Errorf("failed to init file pool for %s: %v", b.Path, err) - } - b.FilePool <- file - } - return nil -} - -func (b *JSONTable) GetColumnDefs() []benchtop.ColumnDef { - return b.columns -} - -func (b *JSONTable) Close() { - if b.FilePool != nil { - for len(b.FilePool) > 0 { - if file, ok := <-b.FilePool; ok { - file.Close() - } - } - close(b.FilePool) - } - //because the table could be opened by other threads, don't actually close -} - -/* -//////////////////////////////////////////////////////////////// -Unary single effect operations -*/ -func (b *JSONTable) AddRow(elem benchtop.Row) (*benchtop.RowLoc, error) { - - bData, err := sonic.ConfigFastest.Marshal( - b.packData(elem.Data, string(elem.Id)), - ) - if err != nil { - return nil, err - } - - //append to end of block file - b.handleLock.Lock() - defer b.handleLock.Unlock() - offset, err := b.handle.Seek(0, io.SeekEnd) - if err != nil { - return nil, err - } - - //log.Debugln("WRITE ENTRY: ", offset, len(bData)) - writesize, err := b.writeJsonEntry(offset, bData) - if err != nil { - log.Errorf("write handler err in Load: bulkSet: %s", err) - return nil, err - } - - return &benchtop.RowLoc{ - Offset: uint64(offset), - Size: uint64(writesize), - Label: b.TableId, - }, nil -} - -func (b *JSONTable) GetRow(loc benchtop.RowLoc) (map[string]any, error) { - - file := <-b.FilePool - defer func() { - b.FilePool <- file - }() - - // Offset skip the first 8 bytes since they are for getting the offset for a scan operation - _, err := file.Seek(int64(loc.Offset+12), io.SeekStart) - if err != nil { - return nil, err - } - - decoder := sonic.ConfigFastest.NewDecoder(io.LimitReader(file, int64(loc.Size))) - var m RowData - err = decoder.Decode(&m) - if err != nil { - if err == io.EOF { - return nil, fmt.Errorf("JSON data for row at offset %d, size %d was incomplete: %w", loc.Offset, loc.Size, err) - } - return nil, fmt.Errorf("failed to decode JSON row at offset %d, size %d: %w", loc.Offset, loc.Size, err) - } - out, err := b.unpackData(true, false, &m) - if err != nil { - return nil, err - } - return out.(map[string]any), nil -} - -func (b *JSONTable) MarkDeleteTable(loc benchtop.RowLoc) error { - // Since we're not explicitly 'adding' to a part of the file, should be able - // to get away with no lock here since the space is just 'marked' as empty - file := <-b.FilePool - defer func() { - b.FilePool <- file - }() - if _, err := file.WriteAt([]byte{0x00, 0x00, 0x00, 0x00}, int64(loc.Offset+ROW_OFFSET_HSIZE)); err != nil { - return fmt.Errorf("writeAt failed: %w", err) - } - return nil -} - -func (b *JSONTable) DeleteRow(loc benchtop.RowLoc, id []byte) error { - b.handleLock.Lock() - defer b.handleLock.Unlock() - - if _, err := b.handle.WriteAt([]byte{0x00, 0x00, 0x00, 0x00}, int64(loc.Offset+ROW_OFFSET_HSIZE)); err != nil { - return fmt.Errorf("writeAt failed: %w", err) - } - err := b.db.Delete(benchtop.NewPosKey(b.TableId, id), nil) - if err != nil { - return err - } - return nil -} - -/* -//////////////////////////////////////////////////////////////// -Start of bulk, chan based functions -*/ -func (b *JSONTable) Keys() (chan benchtop.Index, error) { - out := make(chan benchtop.Index, 10) - go func() { - defer close(out) - prefix := benchtop.NewPosKeyPrefix(b.TableId) - b.Pb.View(func(it *pebblebulk.PebbleIterator) error { - for it.Seek(prefix); it.Valid() && bytes.HasPrefix(it.Key(), prefix); it.Next() { - _, value := benchtop.ParsePosKey(it.Key()) - out <- benchtop.Index{Key: value} - } - return nil - }) - }() - return out, nil -} - -func (b *JSONTable) Scan(loadData bool, filter benchtop.RowFilter) chan any { - outChan := make(chan any, 100) - go func() { - defer close(outChan) - handle := <-b.FilePool - _, err := handle.Seek(0, io.SeekStart) - if err != nil { - log.Errorln("Error in jsontable scan func", err) - return - } - - m, err := mmap.Map(handle, mmap.RDONLY, 0) - if err != nil { - log.Errorln("Error mapping file:", err) - return - } - - defer func() { - b.FilePool <- handle - defer m.Unmap() - }() - - // Process the memory-mapped data - offset := 0 - for offset+ROW_HSIZE <= len(m) { - header := m[offset : offset+ROW_HSIZE] - nextOffset := binary.LittleEndian.Uint64(header[:ROW_OFFSET_HSIZE]) - bSize := int32(binary.LittleEndian.Uint32(header[ROW_OFFSET_HSIZE:ROW_HSIZE])) - - if bSize == 0 || int64(bSize) == int64(nextOffset)-ROW_HSIZE { - offset = int(nextOffset) - continue - } - - jsonStart := offset + ROW_HSIZE - jsonEnd := jsonStart + int(bSize) - if jsonEnd > len(m) { - log.Debugf("Incomplete record at end of file at offset %d", offset) - break - } - - rowData := m[jsonStart:jsonEnd] - err = b.processJSONRowData(rowData, loadData, filter, outChan) - if err != nil { - log.Debugf("Skipping malformed row at offset %d: %v", offset, err) - } - offset = int(nextOffset) - - } - }() - return outChan -} - -// processBSONRowData handles the parsing of row bytes, -// applying filters, and sending the result to the output channel. -// It returns an error if the row is malformed or cannot be processed. -func (b *JSONTable) processJSONRowData( - rowData []byte, - loadData bool, - filter benchtop.RowFilter, - outChan chan any, -) error { - var val any - var err error - - if loadData || filter != nil && !filter.IsNoOp() { - var m RowData - sonic.ConfigFastest.Unmarshal(rowData, &m) - val, err = b.unpackData(true, true, &m) - if err != nil { - return err - } - } else { - val = rowData - } - - if filter == nil || filter.IsNoOp() || (!filter.IsNoOp() && filter.Matches(val)) { - if loadData { - outChan <- val - return nil - } - - node, err := sonic.Get(rowData, "1") - if err != nil { - log.Errorf("Error accessing JSON path for row data %s: %v\n", string(rowData), err) - return err - } - ID, err := node.Interface() - if err != nil { - log.Errorf("Error unmarshaling node: %v\n", err) - return err - } - outChan <- ID - } - return nil -} - -// Compact, Fetch, Load, And Remove methods are not currently being used in grip. -// Compact should be introduced into grip in a future PR since the heavy load and delete design approach that we are taking -func (b *JSONTable) Compact() error { - const flushThreshold = 1000 - flushCounter := 0 - b.handleLock.Lock() - defer b.handleLock.Unlock() - - tempFileName, err := filepath.Abs(b.handle.Name() + ".compact") - if err != nil { - return fmt.Errorf("failed to get absolute path for temp file: %w", err) - } - - tempHandle, err := os.Create(tempFileName) - if err != nil { - return fmt.Errorf("failed to create temp file: %w", err) - } - defer tempHandle.Close() - - oldHandle := b.handle - m, err := mmap.Map(oldHandle, mmap.RDONLY, 0) - if err != nil { - return fmt.Errorf("failed to map file: %w", err) - } - defer m.Unmap() - - writer := bufio.NewWriterSize(tempHandle, 16*1024*1024) - var newOffset uint64 = 0 - inputChan := make(chan benchtop.Index, 100) - - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - b.setDataIndices(inputChan) - }() - - offset := 0 - for offset+ROW_HSIZE <= len(m) { - header := m[offset : offset+ROW_HSIZE] - nextOffset := binary.LittleEndian.Uint64(header[:ROW_OFFSET_HSIZE]) - bSize := int32(binary.LittleEndian.Uint32(header[ROW_OFFSET_HSIZE:ROW_HSIZE])) - - if bSize == 0 || int64(nextOffset) == int64(12) { - if int64(nextOffset) > int64(offset) { - offset = int(nextOffset) - } - continue - } - - jsonStart := offset + 12 - jsonEnd := jsonStart + int(bSize) - if jsonEnd > len(m) { - return fmt.Errorf("incomplete JSON data at offset %d, size %d", offset, bSize) - } - - rowData := m[jsonStart:jsonEnd] - var mRow RowData - err = sonic.ConfigFastest.Unmarshal(rowData, &mRow) - if err != nil { - if err == io.EOF { - return fmt.Errorf("JSON data for row at offset %d, size %d was incomplete: %w", offset, bSize, err) - } - return fmt.Errorf("failed to decode JSON row at offset %d, size %d: %w", offset, bSize, err) - } - - node, err := sonic.Get(rowData, "1") - if err != nil { - return fmt.Errorf("failed to access ID field for row at offset %d: %w", offset, err) - } - key, err := node.String() - if err != nil { - return fmt.Errorf("failed to unmarshal ID field for row at offset %d: %w", offset, err) - } - inputChan <- benchtop.Index{Key: []byte(key), Position: newOffset, Size: uint64(bSize)} - - newOffsetBytes := make([]byte, 8) - binary.LittleEndian.PutUint64(newOffsetBytes, newOffset+uint64(bSize)+12) - - _, err = writer.Write(newOffsetBytes) - if err != nil { - return fmt.Errorf("failed writing new offset at %d: %w", newOffset, err) - } - _, err = writer.Write(rowData) - if err != nil { - return fmt.Errorf("failed writing JSON row at offset %d: %w", newOffset, err) - } - - flushCounter++ - if flushCounter%flushThreshold == 0 { - if err := writer.Flush(); err != nil { - return fmt.Errorf("failed flushing writer: %w", err) - } - } - - newOffset += uint64(bSize) + 8 - } - close(inputChan) - wg.Wait() - - if err := writer.Flush(); err != nil { - return fmt.Errorf("failed final flush of writer: %w", err) - } - if err := tempHandle.Sync(); err != nil { - return fmt.Errorf("failed syncing temp file: %w", err) - } - if err := tempHandle.Close(); err != nil { - return fmt.Errorf("failed closing temp file: %w", err) - } - if err := oldHandle.Close(); err != nil { - return fmt.Errorf("failed closing old handle: %w", err) - } - - fileName, err := filepath.Abs(b.handle.Name()) - if err != nil { - return fmt.Errorf("failed to get absolute path for file: %w", err) - } - if err := os.Rename(tempFileName, fileName); err != nil { - return fmt.Errorf("failed renaming compacted file: %w", err) - } - - newHandle, err := os.OpenFile(fileName, os.O_RDWR, 0644) - if err != nil { - return fmt.Errorf("failed reopening compacted file: %w", err) - } - b.handle = newHandle - - oldPool := b.FilePool - b.FilePool = make(chan *os.File, cap(oldPool)) - for range oldPool { - file, err := os.Open(b.Path) - if err != nil { - return fmt.Errorf("failed to refresh file pool: %w", err) - } - b.FilePool <- file - } - close(oldPool) - for file := range oldPool { - file.Close() - } - - return nil -} - -func (b *JSONTable) Fetch(inputs chan benchtop.Index, workers int) <-chan benchtop.BulkResponse { - results := make(chan benchtop.BulkResponse, workers) - var wg sync.WaitGroup - go func() { - for entry := range inputs { - wg.Add(1) - go func(index benchtop.Index) { - defer wg.Done() - val, closer, err := b.db.Get(benchtop.NewPosKey(b.TableId, index.Key)) - if err != nil { - results <- benchtop.BulkResponse{Key: index.Key, Data: nil, Err: func() string { - if err != nil { - return err.Error() - } - return "" - }()} - return - } - defer closer.Close() - - data, err := b.readFromFile(binary.LittleEndian.Uint64(val)) - if err != nil { - data = nil - } - - results <- benchtop.BulkResponse{Key: index.Key, Data: data, Err: func() string { - if err != nil { - return err.Error() - } - return "" - }()} - - }(entry) - } - wg.Wait() - close(results) - }() - return results -} - -func (b *JSONTable) Load(inputs chan benchtop.Row) error { - var errs *multierror.Error - b.handleLock.Lock() - defer b.handleLock.Unlock() - offset, err := b.handle.Seek(0, io.SeekEnd) - if err != nil { - return err - } - - err = b.Pb.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { - for entry := range inputs { - - bData, err := sonic.Marshal( - b.packData(entry.Data, string(entry.Id)), - ) - if err != nil { - errs = multierror.Append(errs, err) - log.Errorf("json Marshall err in Load: bulkSet: %s", err) - } - - // make Next offset equal to existing offset + length of data - writeSize, err := b.writeJsonEntry(offset, bData) - if err != nil { - errs = multierror.Append(errs, err) - log.Errorf("write handler err in Load: bulkSet: %s", err) - } - b.AddTableEntryInfo(tx, entry.Id, benchtop.RowLoc{Offset: uint64(offset), Size: uint64(writeSize)}) - offset += int64(writeSize) + 8 - } - return nil - }) - if err != nil { - log.Errorf("Err: %s", err) - errs = multierror.Append(errs, err) - } - return errs.ErrorOrNil() - -} - -func (b *JSONTable) Remove(inputs chan benchtop.Index, workers int) <-chan benchtop.BulkResponse { - results := make(chan benchtop.BulkResponse, workers) - batchDeletes := make(chan benchtop.Index, workers) - - go func() { - for index := range batchDeletes { - err := b.db.Delete(benchtop.NewPosKey(b.TableId, index.Key), nil) - if err != nil { - results <- benchtop.BulkResponse{Key: index.Key, Data: nil, Err: func() string { - if err != nil { - return err.Error() - } - return "" - }()} - } - } - - close(results) - }() - - var wg sync.WaitGroup - go func() { - defer close(batchDeletes) - for index := range inputs { - wg.Add(1) - go func(index benchtop.Index) { - defer wg.Done() - - val, closer, err := b.db.Get(benchtop.NewPosKey(b.TableId, index.Key)) - if err != nil { - results <- benchtop.BulkResponse{Key: index.Key, Data: nil, Err: func() string { - if err != nil { - return err.Error() - } - return "" - }()} - return - } - defer closer.Close() - - offset := binary.LittleEndian.Uint64(val) - if err := b.markDelete(offset); err != nil { - results <- benchtop.BulkResponse{Key: index.Key, Data: nil, Err: func() string { - if err != nil { - return err.Error() - } - return "" - }()} - return - } - - batchDeletes <- index - results <- benchtop.BulkResponse{Key: index.Key, Data: nil, Err: ""} - }(index) - } - wg.Wait() - }() - - return results -} - -func ConvertJSONPathToArray(path string) ([]any, error) { - path = strings.TrimLeft(path, "./") - result := []any{"0"} - - re := regexp.MustCompile(`[^.\[\]]+|\[\d+\]`) - matches := re.FindAllString(path, -1) - for _, token := range matches { - if strings.HasPrefix(token, "[") && strings.HasSuffix(token, "]") { - numStr := token[1 : len(token)-1] - index, err := strconv.Atoi(numStr) - if err != nil { - return nil, fmt.Errorf("invalid array index: %s", token) - } - result = append(result, index) - } else { - result = append(result, token) - } - } - return result, nil -} diff --git a/jsontable/table/bLoad.go b/jsontable/table/bLoad.go new file mode 100644 index 0000000..f96ede8 --- /dev/null +++ b/jsontable/table/bLoad.go @@ -0,0 +1,190 @@ +package table + +import ( + "fmt" + "sync" + + "github.com/bmeg/benchtop" + "github.com/bmeg/benchtop/jsontable/section" + "github.com/bmeg/benchtop/jsontable/tpath" + "github.com/bmeg/grip/log" + "github.com/bytedance/sonic" + "github.com/cockroachdb/pebble" + multierror "github.com/hashicorp/go-multierror" +) + +type FieldKeyElements struct { + Field string + TableName string + Val any + RowId string +} + +type KitchenSink struct { + FieldIndexKeyElements []FieldKeyElements + Metadata map[string]*benchtop.RowLoc + Err error +} + +func (b *JSONTable) StartTableGoroutine( + wg *sync.WaitGroup, + metadataChan chan *KitchenSink, + snapshot *pebble.Snapshot, + batchSize int, +) chan *benchtop.Row { + ch := make(chan *benchtop.Row, batchSize) + wg.Add(1) + go func() { + defer func() { + // --- FINAL FLUSH ON EXIT --- + b.SectionLock.Lock() + for _, sec := range b.ActiveSections { + if sec.LiveBytes > 0 { + if err := sec.MMap.Flush(); err != nil { + log.Errorf("Final flush failed for section %d: %v", sec.ID, err) + } + err := sec.File.Sync() + if err != nil { + log.Errorf("File Sync failed in bulk load: %v", err) + } + } + } + b.SectionLock.Unlock() + wg.Done() + }() + + const FLUSH_EVERY = 1000 + var allFieldIndexKeyElements = make([]FieldKeyElements, 0, batchSize*len(b.Fields)) + allMetadata := make(map[string]*benchtop.RowLoc, batchSize) + var localErr *multierror.Error + + var flushCounter uint32 = 0 + for { + batch := make([]*benchtop.Row, 0, batchSize) + for range batchSize { + row, ok := <-ch + if !ok { + break + } + batch = append(batch, row) + } + if len(batch) == 0 { + break + } + + newRows := make([]*benchtop.Row, 0, len(batch)) + for _, row := range batch { + info, err := b.GetTableEntryInfo(snapshot, row.Id) + if err != nil { + localErr = multierror.Append(localErr, fmt.Errorf("error getting entry info for %s: %v", row.Id, err)) + continue + } + if info == nil { + newRows = append(newRows, row) + for field := range b.Fields { + if val := tpath.PathLookup(row.Data, field); val != nil { + allFieldIndexKeyElements = append(allFieldIndexKeyElements, FieldKeyElements{ + Field: field, + TableName: b.Name, + Val: val, + RowId: string(row.Id), + }) + } + } + } + } + + if len(newRows) == 0 { + continue + } + + rowsByPartition := make(map[uint8][]*benchtop.Row) + for _, row := range newRows { + partitionId := b.PartitionFunc(row.Id) + rowsByPartition[partitionId] = append(rowsByPartition[partitionId], row) + } + + for partitionId, rowsInPartition := range rowsByPartition { + if len(rowsInPartition) == 0 { + continue + } + + bDatas := make([][]byte, 0, len(rowsInPartition)) + rowIds := make([]string, 0, len(rowsInPartition)) + var totalUncompressedSize uint32 + + for _, row := range rowsInPartition { + bData, err := sonic.ConfigFastest.Marshal(b.PackData(row.Data, string(row.Id))) + if err != nil { + localErr = multierror.Append(localErr, fmt.Errorf("marshal error for row %s: %v", row.Id, err)) + continue + } + bDatas = append(bDatas, bData) + rowIds = append(rowIds, string(row.Id)) + totalUncompressedSize += uint32(len(bData)) + 8 + } + if len(bDatas) == 0 { + continue + } + + sec := b.ActiveSections[partitionId] // This is the section active for writing + if sec == nil { + // This should not happen if Init is correct, but add recovery/guard + var err error + sec, err = b.CreateNewSection(partitionId) + if err != nil { + localErr = multierror.Append(localErr, fmt.Errorf("failed to get or create active section for partition %d: %v", partitionId, err)) + continue + } + } + + // --- ROTATE SECTION IF FULL --- + if sec.LiveBytes+totalUncompressedSize > section.MAX_SECTION_SIZE { + // Flush old section before rotating + if sec.LiveBytes > 0 { + err := sec.CloseSection() + if err != nil { + localErr = multierror.Append(localErr, err) + } + } + + newSec, err := b.CreateNewSection(partitionId) + if err != nil { + localErr = multierror.Append(localErr, fmt.Errorf("failed to create new section for partition %d: %v", partitionId, err)) + continue + } + sec = newSec + } + + for i, bData := range bDatas { + rowLoc, err := sec.WriteJsonEntryToSection(bData) + if err != nil { + localErr = multierror.Append(localErr, fmt.Errorf("write error for row %s in section %d: %v", rowIds[i], sec.ID, err)) + continue + } + rowLoc.TableId = b.TableId + allMetadata[rowIds[i]] = rowLoc + + // --- PERIODIC FLUSH --- + flushCounter++ + /*if flushCounter >= FLUSH_EVERY { + sec.Lock.Lock() + if err := sec.MMap.Flush(); err != nil { + log.Errorf("Periodic flush failed for section %d: %v", sec.ID, err) + } + sec.Lock.Unlock() + flushCounter = 0 + }*/ + } + sec.TotalRows += uint32(len(bDatas)) + } + } + + metadataChan <- &KitchenSink{ + FieldIndexKeyElements: allFieldIndexKeyElements, + Metadata: allMetadata, + Err: localErr.ErrorOrNil(), + } + }() + return ch +} diff --git a/jsontable/table/helpers.go b/jsontable/table/helpers.go new file mode 100644 index 0000000..bf98021 --- /dev/null +++ b/jsontable/table/helpers.go @@ -0,0 +1,32 @@ +package table + +import ( + "github.com/bmeg/benchtop" + "github.com/cockroachdb/pebble" +) + +type RowData struct { + Data map[string]any `json:"0"` + Key string `json:"1"` +} + +func (b *JSONTable) PackData(entry map[string]any, key string) *RowData { + return &RowData{ + Data: entry, + Key: key, + } +} + +func (b *JSONTable) GetTableEntryInfo(snap *pebble.Snapshot, id []byte) (*benchtop.RowLoc, error) { + // Really only want to see if anything was returned or not. Since this doesn't interact + // with the pebble indices, keep it in JSONTable + _, closer, err := snap.Get(benchtop.NewPosKey(b.TableId, id)) + if err == pebble.ErrNotFound { + return nil, nil + } + if err != nil { + return nil, err + } + defer closer.Close() + return &benchtop.RowLoc{}, nil +} diff --git a/jsontable/table/init.go b/jsontable/table/init.go new file mode 100644 index 0000000..ba7158d --- /dev/null +++ b/jsontable/table/init.go @@ -0,0 +1,160 @@ +package table + +import ( + "encoding/binary" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/bmeg/benchtop" + "github.com/bmeg/benchtop/jsontable/section" + "github.com/edsrzf/mmap-go" +) + +func (b *JSONTable) Init(poolSize int) error { + b.NumPartitions = 4 + if b.NumPartitions > 256 { + if uint32(b.NumPartitions)*uint32(SECTION_ID_MULT) > 65536 { + return fmt.Errorf("too many partitions (%d) for section ID multiplier (%d)", b.NumPartitions, SECTION_ID_MULT) + } + } + + b.PartitionFunc = defaultPartitionFunc(b.NumPartitions) + b.Sections = map[uint16]*section.Section{} + + dir := filepath.Dir(b.FileName) + base := filepath.Base(b.FileName) + files, err := os.ReadDir(dir) + if err != nil { + return fmt.Errorf("failed to read directory: %w", err) + } + + type secInfo struct { + pId uint8 + localSecId int + fileName string + } + var secList []secInfo + for _, f := range files { + if strings.HasPrefix(f.Name(), base+PART_FILE_SUFFIX) { + parts := strings.Split(strings.TrimPrefix(f.Name(), base+PART_FILE_SUFFIX), SECTION_FILE_SUFFIX) + + if len(parts) != 2 { + continue + } + pId, err := strconv.Atoi(parts[0]) + if err != nil { + continue + } + + localSecId, err := strconv.Atoi(parts[1]) + if err != nil { + continue + } + secList = append(secList, secInfo{ + pId: uint8(pId), + localSecId: localSecId, + fileName: f.Name(), + }) + } + } + + for _, s := range secList { + secId := uint16(s.pId)*SECTION_ID_MULT + uint16(s.localSecId) + secPath := filepath.Join(dir, s.fileName) + + // Open main file handle (for writes) + oFile, err := os.OpenFile(secPath, os.O_RDWR, 0666) + if err != nil { + return fmt.Errorf("failed to open section file %s: %w", secPath, err) + } + m, err := mmap.Map(oFile, mmap.RDWR, 0) + if err != nil { + return fmt.Errorf("failed to mmap section %s: %w", secPath, err) + } + + // Init file pool (for writes) + filePool := make(chan *os.File, poolSize) + for range poolSize { + file, err := os.OpenFile(secPath, os.O_RDWR, 0666) + if err != nil { + // Clean up + m.Unmap() + oFile.Close() + for len(filePool) > 0 { + if f, ok := <-filePool; ok { + f.Close() + } + } + return fmt.Errorf("failed to init file pool for %s: %w", secPath, err) + } + filePool <- file + } + + var liveBytes uint32 = 0 + var totalRows uint32 = 0 + var deletedRows uint32 = 0 + var offset uint32 = 0 + // Loop for Initializing live bytes, deletedRows, totalRows + for offset+benchtop.ROW_HSIZE <= uint32(len(m)) { + header := m[offset : offset+benchtop.ROW_HSIZE] + nextOffset := binary.LittleEndian.Uint32(header[:benchtop.ROW_OFFSET_HSIZE]) + bSize := binary.LittleEndian.Uint32(header[benchtop.ROW_OFFSET_HSIZE:benchtop.ROW_HSIZE]) + if nextOffset == 0 || nextOffset <= offset { + break + } + if bSize == 0 { + deletedRows++ + } + totalRows++ + offset = nextOffset + + } + liveBytes = offset + sec := §ion.Section{ + ID: secId, + PartitionID: s.pId, + Path: secPath, + File: oFile, + FilePool: filePool, + MMap: m, + LiveBytes: liveBytes, + Active: true, + MMapMode: mmap.RDWR, + TotalRows: totalRows, + DeletedRows: deletedRows, + Lock: sync.RWMutex{}, + CompressScratch: make([]byte, 0), + } + + b.Sections[secId] = sec + b.PartitionMap[s.pId] = append(b.PartitionMap[s.pId], secId) + + } + + for pId, secIds := range b.PartitionMap { + if len(secIds) > 0 { + latestSecId := secIds[len(secIds)-1] + latestSec := b.Sections[latestSecId] + + // Mark the latest section as active for writing + b.ActiveSections[pId] = latestSec + b.FlushCounter[pId] = 0 // Reset the counter for the newly active section + } + } + + // --- ENSURE ONE SECTION PER PARTITION --- + for pId := uint8(0); pId < uint8(b.NumPartitions); pId++ { + if len(b.PartitionMap[pId]) == 0 { + _, err := b.CreateNewSection(pId) + if err != nil { + return err + } + } + } + + return nil +} diff --git a/jsontable/table/table.go b/jsontable/table/table.go new file mode 100644 index 0000000..4f4a9c6 --- /dev/null +++ b/jsontable/table/table.go @@ -0,0 +1,753 @@ +package table + +import ( + "bytes" + "encoding/binary" + "fmt" + "hash/fnv" + "io" + "os" + "runtime" + "strconv" + "strings" + "sync" + + "github.com/DataDog/zstd" + "github.com/bmeg/benchtop" + "github.com/bmeg/benchtop/jsontable/section" + "github.com/bmeg/grip/log" + "github.com/edsrzf/mmap-go" + + "github.com/bytedance/sonic" +) + +const ( + PART_FILE_SUFFIX string = ".partition" + SECTION_FILE_SUFFIX string = ".section" + SECTION_ID_MULT uint16 = 256 + MAX_COMPACT_RATIO = 0.2 // 20% deleted rows triggers compaction + FLUSH_THRESHOLD = 1000 +) + +type JSONTable struct { + // Artifact arguments + Columns []benchtop.ColumnDef + ColumnMap map[string]int + + TableId uint16 + Path string // Base path (for legacy single file) + Name string + FileName string // Base name for section files + + Fields map[string]struct{} // Indexing moved to table level + + Sections map[uint16]*section.Section // sectionId -> Section + PartitionMap map[uint8][]uint16 // partitionId -> []sectionId + SectionLock sync.Mutex // For creating new sections + NumPartitions uint32 // Number of partitions + PartitionFunc func(id []byte) uint8 // Assigns row to partition + MaxConcurrentSections uint8 // Limit for parallel operations + + ActiveSections map[uint8]*section.Section // one per partition + FlushCounter map[uint8]int // per-partition flush counter +} + +// DefaultPartitionFunc assigns rows to partitions using FNV hash +func defaultPartitionFunc(numPartitions uint32) func(id []byte) uint8 { + return func(id []byte) uint8 { + h := fnv.New32a() + h.Write(id) + return uint8(h.Sum32() % numPartitions) + } +} + +func (b *JSONTable) Close() error { + for _, sec := range b.Sections { + if sec.MMap != nil { + err := sec.MMap.Unmap() + if err != nil { + fmt.Printf("ERROR ON UNMAP: %s", err) + return err + } + } + if sec.File != nil { + err := sec.File.Sync() + if err != nil { + fmt.Printf("ERROR ON FILE HANDLE SYNC: %s", err) + return err + } + err = sec.File.Close() + if err != nil { + fmt.Printf("ERROR ON FILE HANDLE CLOSE: %s", err) + return err + } + } + if sec.FilePool != nil { + close(sec.FilePool) + for f := range sec.FilePool { + err := f.Close() + if err != nil { + fmt.Printf("ERROR ON FILE POOL FILE HANDLE CLOSE: %s", err) + return err + } + } + } + } + b.Fields = map[string]struct{}{} + return nil +} + +// AddRow adds a single row to the JSONTable, writing it as zstd-compressed data. +func (b *JSONTable) AddRow(elem benchtop.Row) (*benchtop.RowLoc, error) { + partitionId := b.PartitionFunc(elem.Id) + if partitionId >= uint8(b.NumPartitions) { + return nil, fmt.Errorf("invalid partition") + } + + // Get or create active section + sec := b.ActiveSections[partitionId] + if sec == nil { + var err error + sec, err = b.CreateNewSection(partitionId) + if err != nil { + return nil, err + } + } + + bData, err := sonic.ConfigFastest.Marshal(b.PackData(elem.Data, string(elem.Id))) + if err != nil { + return nil, err + } + totalSize := uint32(len(bData)) + benchtop.ROW_HSIZE + + // Check size and rotate if needed + if sec.LiveBytes+totalSize > section.MAX_SECTION_SIZE { + // Close current + err := sec.CloseSection() + if err != nil { + sec.Lock.Unlock() + return nil, err + } + // Create new active + newSec, err := b.CreateNewSection(partitionId) + if err != nil { + return nil, err + } + sec = newSec + b.ActiveSections[partitionId] = sec + } + + loc, err := sec.WriteJsonEntryToSection(bData) + if err != nil { + return nil, err + } + + sec.TotalRows++ + loc.TableId = b.TableId + return loc, nil +} + +func (b *JSONTable) CreateNewSection(partitionId uint8) (*section.Section, error) { + b.SectionLock.Lock() + defer b.SectionLock.Unlock() + + localSecId := len(b.PartitionMap[partitionId]) + secId := uint16(partitionId)*SECTION_ID_MULT + uint16(localSecId) + if _, exists := b.Sections[secId]; exists { + return nil, fmt.Errorf("section ID conflict: %d", secId) + } + + path := fmt.Sprintf("%s%s%d.section%d", b.FileName, PART_FILE_SUFFIX, partitionId, localSecId) + handle, err := os.Create(path) + if err != nil { + return nil, err + } + handle.Truncate(section.INITIAL_SECTION_SIZE) // pre-allocate + + m, err := mmap.Map(handle, mmap.RDWR, 0) + if err != nil { + return nil, fmt.Errorf("mmap failed on new section: %w", err) + } + + filePool := make(chan *os.File, 10) + for range cap(filePool) { + f, err := os.OpenFile(path, os.O_RDWR, 0666) + if err != nil { + m.Unmap() + handle.Close() + return nil, err + } + filePool <- f + } + + sec := §ion.Section{ + ID: secId, + PartitionID: partitionId, + Path: path, + File: handle, + FilePool: filePool, + MMap: m, + MMapMode: mmap.RDWR, + Active: true, + LiveBytes: 0, + CompressScratch: make([]byte, 0), + } + + b.Sections[secId] = sec + b.PartitionMap[partitionId] = append(b.PartitionMap[partitionId], secId) + b.ActiveSections[partitionId] = sec + b.FlushCounter[partitionId] = 0 + return sec, nil +} + +func (b *JSONTable) GetRow(loc *benchtop.RowLoc) (map[string]any, error) { + sec, exists := b.Sections[loc.Section] + if !exists { + return nil, fmt.Errorf("section %d not found", loc.Section) + } + + if len(sec.MMap) == 0 { + return nil, fmt.Errorf("section %d is empty or not mapped", loc.Section) + } + + start := loc.Offset + benchtop.ROW_HSIZE + end := start + loc.Size + if end > uint32(len(sec.MMap)) { + return nil, fmt.Errorf("row out of bounds: %d > %d", end, len(sec.MMap)) + } + + compressed := sec.MMap[start:end] + decompressed, err := zstd.Decompress(nil, compressed) + if err != nil { + return nil, fmt.Errorf("decompress failed: %w", err) + } + + var m RowData + if err := sonic.ConfigFastest.Unmarshal(decompressed, &m); err != nil { + return nil, fmt.Errorf("unmarshal failed: %w", err) + } + + return m.Data, nil +} + +func (b *JSONTable) MarkDeleteTable(loc *benchtop.RowLoc) error { + sec, exists := b.Sections[loc.Section] + if !exists { + return fmt.Errorf("section %d not found", loc.Section) + } + + file := <-sec.FilePool + defer func() { sec.FilePool <- file }() + + _, err := file.WriteAt(bytes.Repeat([]byte{0x00}, 4), int64(loc.Offset+benchtop.ROW_OFFSET_HSIZE)) + if err != nil { + return fmt.Errorf("writeAt failed: %w", err) + } + sec.Lock.Lock() + sec.DeletedRows++ + sec.LiveBytes -= loc.Size + sec.Lock.Unlock() + return nil +} + +func (b *JSONTable) DeleteRow(loc *benchtop.RowLoc, id []byte) error { + sec, exists := b.Sections[loc.Section] + if !exists { + return fmt.Errorf("section %d not found", loc.Section) + } + + sec.Lock.Lock() + defer sec.Lock.Unlock() + + _, err := sec.File.Seek(int64(loc.Offset+benchtop.ROW_OFFSET_HSIZE), io.SeekStart) + if err != nil { + return err + } + _, err = sec.File.Write(bytes.Repeat([]byte{0x00}, 4)) + if err != nil { + return fmt.Errorf("writeAt failed: %w", err) + } + sec.DeletedRows++ + sec.LiveBytes -= loc.Size + return nil +} + +func (b *JSONTable) ScanDoc(filter benchtop.RowFilter) chan map[string]any { + outChan := make(chan map[string]any, 100*len(b.Sections)) + var wg sync.WaitGroup + sem := make(chan struct{}, b.MaxConcurrentSections) + for pId := uint8(0); pId < uint8(b.NumPartitions); pId++ { + for _, secId := range b.PartitionMap[pId] { + sec, exists := b.Sections[secId] + if !exists || len(sec.MMap) == 0 { + continue + } + wg.Add(1) + go func(sec *section.Section) { + sem <- struct{}{} + defer func() { <-sem; wg.Done() }() + m := sec.MMap + var offset uint32 = 0 + for offset+benchtop.ROW_HSIZE <= uint32(len(m)) { + header := m[offset : offset+benchtop.ROW_HSIZE] + nextOffset := binary.LittleEndian.Uint32(header[:benchtop.ROW_OFFSET_HSIZE]) + bSize := binary.LittleEndian.Uint32(header[benchtop.ROW_OFFSET_HSIZE:benchtop.ROW_HSIZE]) + if bSize == 0 { + if nextOffset == 0 || nextOffset <= offset { + break + } + offset = nextOffset + continue + } + jsonStart := offset + benchtop.ROW_HSIZE + jsonEnd := jsonStart + bSize + if jsonEnd > uint32(len(m)) { + break + } + rowData := m[jsonStart:jsonEnd] + if err := b.processJSONRowDataDoc(rowData, filter, outChan); err != nil { + log.Debugf("skip row in section %d: %v", sec.ID, err) + } + if nextOffset == 0 || nextOffset <= offset { + break + } + offset = nextOffset + } + }(sec) + } + } + go func() { wg.Wait(); close(outChan) }() + return outChan +} + +// processJSONRowDataDoc handles parsing of row bytes for ScanDoc, applying filters, and sending RowData to the output channel. +func (b *JSONTable) processJSONRowDataDoc(rowData []byte, filter benchtop.RowFilter, outChan chan map[string]any) error { + newData, err := zstd.Decompress(nil, rowData) + if err != nil { + return err + } + if filter != nil && !filter.IsNoOp() { + if !filter.Matches(newData, b.Name) { + return nil + } + } + var m RowData + err = sonic.ConfigFastest.Unmarshal(newData, &m) + if err != nil { + return err + } + if m.Data != nil { + m.Data["_id"] = m.Key + } + outChan <- m.Data + return nil +} + +// ScanId scans the JSONTable and returns IDs (as string) that match the filter. +func (b *JSONTable) ScanId(filter benchtop.RowFilter) chan string { + outChan := make(chan string, 100*len(b.Sections)) + var wg sync.WaitGroup + sem := make(chan struct{}, b.MaxConcurrentSections) + for pId := uint8(0); pId < uint8(b.NumPartitions); pId++ { + for _, secId := range b.PartitionMap[pId] { + sec, exists := b.Sections[secId] + if !exists || len(sec.MMap) == 0 { + continue + } + wg.Add(1) + go func(sec *section.Section) { + sem <- struct{}{} + defer func() { <-sem; wg.Done() }() + m := sec.MMap + var offset uint32 = 0 + for offset+benchtop.ROW_HSIZE <= uint32(len(m)) { + header := m[offset : offset+benchtop.ROW_HSIZE] + nextOffset := binary.LittleEndian.Uint32(header[:benchtop.ROW_OFFSET_HSIZE]) + bSize := binary.LittleEndian.Uint32(header[benchtop.ROW_OFFSET_HSIZE:benchtop.ROW_HSIZE]) + if bSize == 0 { + if nextOffset == 0 || nextOffset <= offset { + break + } + offset = nextOffset + continue + } + jsonStart := offset + benchtop.ROW_HSIZE + jsonEnd := jsonStart + bSize + if jsonEnd > uint32(len(m)) { + break + } + rowData := m[jsonStart:jsonEnd] + if err := b.processJSONRowDataId(rowData, filter, outChan); err != nil { + log.Debugf("skip row in section %d: %v", sec.ID, err) + } + if nextOffset == 0 || nextOffset <= offset { + break + } + offset = nextOffset + } + }(sec) + } + } + go func() { wg.Wait(); close(outChan) }() + return outChan +} + +// processJSONRowDataId handles parsing of row bytes for ScanId, applying filters, and sending IDs to the output channel. +func (b *JSONTable) processJSONRowDataId(rowData []byte, filter benchtop.RowFilter, outChan chan string) error { + newData, err := zstd.Decompress(nil, rowData) + if err != nil { + return err + } + + if filter != nil && !filter.IsNoOp() { + if !filter.Matches(newData, b.Name) { + return nil + } + } + + node, err := sonic.Get(newData, "1") + if err != nil { + log.Errorf("Error accessing JSON path for row data %s: %v\n", string(newData), err) + return err + } + + ID, err := node.String() + if err != nil { + log.Errorf("Error unmarshaling node: %v\n", err) + return err + } + + outChan <- ID + return nil +} + +/* +func (b *JSONTable) CompactSection(secId uint16) error { + sec, exists := b.Sections[secId] + if !exists { + return fmt.Errorf("section %d not found", secId) + } + sec.Lock.Lock() + defer sec.Lock.Unlock() + + flushCounter := 0 + tempFileName := sec.Path + ".compact" + tempHandle, err := os.Create(tempFileName) + if err != nil { + return fmt.Errorf("failed to create temp file: %w", err) + } + defer tempHandle.Close() + + m, err := mmap.Map(sec.File, mmap.RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to map file: %w", err) + } + defer m.Unmap() + + writer := bufio.NewWriterSize(tempHandle, 16*1024*1024) + var newOffset uint32 = 0 + inputChan := make(chan benchtop.Index, 100) + + // todo: figure out how to set indices from the driver instead of the table + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + b.setDataIndices(inputChan) + }() + + var offset uint32 = 0 + for offset+benchtop.ROW_HSIZE <= uint32(len(m)) { + header := m[offset : offset+benchtop.ROW_HSIZE] + nextOffset := binary.LittleEndian.Uint32(header[:benchtop.ROW_OFFSET_HSIZE]) + bSize := binary.LittleEndian.Uint32(header[benchtop.ROW_OFFSET_HSIZE:benchtop.ROW_HSIZE]) + + if bSize == 0 || int64(nextOffset) == int64(benchtop.ROW_HSIZE) { + if int64(nextOffset) > int64(offset) { + offset = nextOffset + } + continue + } + + jsonStart := offset + benchtop.ROW_HSIZE + jsonEnd := jsonStart + bSize + if jsonEnd > uint32(len(m)) { + return fmt.Errorf("incomplete JSON data at section %d, offset %d, size %d", sec.ID, offset, bSize) + } + + rowData := m[jsonStart:jsonEnd] + + rowData, err := zstd.Decompress(nil, rowData) + if err != nil { + log.Debugf("Failed to decompress row at section %d, offset %d: %v", sec.ID, offset, err) + if nextOffset == 0 || nextOffset <= offset { + break + } + offset = nextOffset + continue + } + + var mRow RowData + err = sonic.ConfigFastest.Unmarshal(rowData, &mRow) + if err != nil { + if err == io.EOF { + return fmt.Errorf("JSON data for row at section %d, offset %d, size %d was incomplete: %w", sec.ID, offset, bSize, err) + } + return fmt.Errorf("failed to decode JSON row at section %d, offset %d, size %d: %w", sec.ID, offset, bSize, err) + } + + node, err := sonic.Get(rowData, "1") + if err != nil { + return fmt.Errorf("failed to access ID field at section %d, offset %d: %w", sec.ID, offset, err) + } + key, err := node.String() + if err != nil { + return fmt.Errorf("failed to unmarshal ID field at section %d, offset %d: %w", sec.ID, offset, err) + } + inputChan <- benchtop.Index{Key: []byte(key), Loc: benchtop.RowLoc{Offset: newOffset, Size: bSize}} + + newOffsetBytes := make([]byte, benchtop.ROW_OFFSET_HSIZE) + binary.LittleEndian.PutUint32(newOffsetBytes, newOffset+bSize+benchtop.ROW_HSIZE) + _, err = writer.Write(newOffsetBytes) + if err != nil { + return fmt.Errorf("failed writing new offset at %d: %w", newOffset, err) + } + _, err = writer.Write(rowData) + if err != nil { + return fmt.Errorf("failed writing JSON row at offset %d: %w", newOffset, err) + } + + flushCounter++ + if flushCounter%FLUSH_THRESHOLD == 0 { + if err := writer.Flush(); err != nil { + return fmt.Errorf("failed flushing writer: %w", err) + } + } + newOffset += bSize + benchtop.ROW_HSIZE + } + close(inputChan) + //wg.Wait() + + if err := writer.Flush(); err != nil { + return fmt.Errorf("failed final flush: %w", err) + } + if err := tempHandle.Sync(); err != nil { + return fmt.Errorf("failed syncing temp file: %w", err) + } + if err := tempHandle.Close(); err != nil { + return fmt.Errorf("failed closing temp file: %w", err) + } + if err := sec.File.Close(); err != nil { + return fmt.Errorf("failed closing old handle: %w", err) + } + + if err := os.Rename(tempFileName, sec.Path); err != nil { + return fmt.Errorf("failed renaming compacted file: %w", err) + } + + newHandle, err := os.OpenFile(sec.Path, os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("failed reopening compacted file: %w", err) + } + sec.File = newHandle + + oldPool := sec.FilePool + sec.FilePool = make(chan *os.File, cap(oldPool)) + for range cap(sec.FilePool) { + file, err := os.OpenFile(sec.Path, os.O_RDWR, 0666) + if err != nil { + return fmt.Errorf("failed to refresh file pool: %w", err) + } + sec.FilePool <- file + } + close(oldPool) + for file := range oldPool { + file.Close() + } + + // Reset stats + stat, _ := os.Stat(sec.Path) + sec.LiveBytes = uint32(stat.Size()) + sec.DeletedRows = 0 + // Note: Could set sec.Active = false and create new section, updating RowLocs in DB, + // but current design reuses same section ID and path + return nil +} + +func (b *JSONTable) Compact() error { + var errs *multierror.Error + for secId, sec := range b.Sections { + if float64(sec.DeletedRows)/float64(sec.TotalRows) > MAX_COMPACT_RATIO { + if err := b.CompactSection(secId); err != nil { + errs = multierror.Append(errs, err) + } + } + } + return errs.ErrorOrNil() +} +*/ + +func ConvertJSONPathToArray(path string) ([]any, error) { + path = strings.TrimLeft(path, "./") + if path == "" { + return []any{"0"}, nil // Handle empty path after trimming + } + + result := make([]any, 1, len(path)/2+1) + result[0] = "0" + var start int = 0 + var length int = len(path) + + for i := 0; i < length; i++ { + char := path[i] + + switch char { + case '.': + // Found a dot separator. The preceding characters (if any) are a key. + if i > start { + token := path[start:i] + if token != "" { + result = append(result, token) + } + } + start = i + 1 // Start the next token after the dot + + case '[': + // Found the start of an array index. The preceding characters (if any) are a key. + if i > start { + token := path[start:i] + if token != "" { + result = append(result, token) + } + } + + // Look for the closing bracket + j := i + 1 + for j < length && path[j] != ']' { + j++ + } + + if j == length || j == i+1 { + // Error: missing closing bracket or empty brackets '[]' + return nil, fmt.Errorf("invalid path format: missing array closing bracket or empty index at position %d", i) + } + + // Extract and convert the index string + numStr := path[i+1 : j] + index, err := strconv.Atoi(numStr) + if err != nil { + return nil, fmt.Errorf("invalid array index: %s", numStr) + } + result = append(result, index) + + // Skip past the index token, including the ']' + i = j // Loop's i++ will make it j+1 + start = i + 1 + } + } + + // Handle the final token if the path didn't end with a separator + if start < length { + token := path[start:length] + if token != "" { + result = append(result, token) + } + } + + return result, nil +} + +func (b *JSONTable) GetRows(locs []*benchtop.RowLoc, sectionId uint16) ([]map[string]any, []error) { + results := make([]map[string]any, len(locs)) + errors := make([]error, len(locs)) + sec, exists := b.Sections[sectionId] + if !exists || len(sec.MMap) == 0 { + return nil, []error{fmt.Errorf("sectionId not found in sections: %d", sectionId)} + } + + sec.Lock.RLock() + defer sec.Lock.RUnlock() + var wg sync.WaitGroup + sem := make(chan struct{}, runtime.NumCPU()) // Per-section concurrency + chunkSize := 100 // Adjust based on profiling + for i := 0; i < len(locs); i += chunkSize { + end := i + chunkSize + if end > len(locs) { + end = len(locs) + } + chunk := locs[i:end] + wg.Add(1) + go func(start int, chunk []*benchtop.RowLoc) { + sem <- struct{}{} + defer func() { <-sem; wg.Done() }() + for j, loc := range chunk { + idx := start + j + if loc.Section != sectionId { + errors[idx] = fmt.Errorf("Expected sectionId %d but got %d instead", sectionId, loc.Section) + continue + } + startOffset := loc.Offset + benchtop.ROW_HSIZE + endOffset := startOffset + loc.Size + if endOffset > uint32(len(sec.MMap)) { + errors[idx] = fmt.Errorf("row out of bounds: %d > %d", endOffset, len(sec.MMap)) + continue + } + compressed := sec.MMap[startOffset:endOffset] + decompressed, err := zstd.Decompress(nil, compressed) + if err != nil { + errors[idx] = fmt.Errorf("decompress failed: %w", err) + continue + } + var m RowData + if err := sonic.ConfigFastest.Unmarshal(decompressed, &m); err != nil { + errors[idx] = fmt.Errorf("unmarshal failed: %w", err) + continue + } + results[idx] = m.Data + } + }(i, chunk) + } + wg.Wait() + return results, errors +} + +/*func (b *JSONTable) GetRows(locs []*benchtop.RowLoc, sectionId uint16) ([]map[string]any, []error) { + results := make([]map[string]any, len(locs)) + errors := make([]error, len(locs)) + sec, exists := b.Sections[sectionId] + if !exists || len(sec.MMap) == 0 { + return nil, []error{fmt.Errorf("sectionId not found in sections: %d", sectionId)} + } + + sec.Lock.RLock() + defer sec.Lock.RUnlock() + var m RowData + var start, end uint32 = 0, 0 + for i, loc := range locs { + if loc.Section != sectionId { + errors[i] = fmt.Errorf("Expected sectionId %d but got %d instead", sectionId, loc.Section) + continue + } + start = loc.Offset + benchtop.ROW_HSIZE + end = start + loc.Size + if end > uint32(len(sec.MMap)) { + errors[i] = fmt.Errorf("row out of bounds: %d > %d", end, len(sec.MMap)) + continue + } + decompressed, err := zstd.Decompress(nil, sec.MMap[start:end]) + if err != nil { + errors[i] = fmt.Errorf("decompress failed: %w", err) + continue + } + if err := sonic.ConfigFastest.Unmarshal(decompressed, &m); err != nil { + errors[i] = fmt.Errorf("unmarshal failed: %w", err) + continue + } + results[i] = m.Data + } + return results, errors +}*/ + +func (b *JSONTable) GetColumnDefs() []benchtop.ColumnDef { + return b.Columns +} diff --git a/jsontable/tablehelpers.go b/jsontable/tablehelpers.go deleted file mode 100644 index 198d3f3..0000000 --- a/jsontable/tablehelpers.go +++ /dev/null @@ -1,189 +0,0 @@ -package jsontable - -import ( - "encoding/binary" - "fmt" - "io" - "os" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/jsontable/tpath" - "github.com/bmeg/benchtop/pebblebulk" - "github.com/bmeg/grip/log" - "github.com/bmeg/jsonpath" - "github.com/bytedance/sonic" - "github.com/cockroachdb/pebble" -) - -type RowData struct { - Data map[string]any `json:"0"` - Key string `json:"1"` -} - -func (b *JSONTable) packData(entry map[string]any, key string) *RowData { - return &RowData{ - Data: entry, - Key: key, - } -} - -func (b *JSONTable) AddTableEntryInfo(tx *pebblebulk.PebbleBulk, rowId []byte, rowLoc benchtop.RowLoc) error { - value := benchtop.NewPosValue(rowLoc.Offset, rowLoc.Size) - posKey := benchtop.NewPosKey(b.TableId, rowId) - if tx != nil { - err := tx.Set(posKey, value, nil) - if err != nil { - return err - } - } else { - err := b.Pb.Db.Set(posKey, value, nil) - if err != nil { - return err - } - } - return nil -} - -func PathLookup(v map[string]any, path string) any { - /* Expects that special fields like '_id' and '_label' - are added to the map before reaching this function - */ - field := tpath.NormalizePath(path) - jpath := tpath.ToLocalPath(field) - res, err := jsonpath.JsonPathLookup(v, jpath) - if err != nil { - return nil - } - return res -} - -func (b *JSONTable) getTableEntryInfo(snap *pebble.Snapshot, id []byte) (*benchtop.RowLoc, error) { - // Really only want to see if anything was returned or not - _, closer, err := snap.Get(benchtop.NewPosKey(b.TableId, id)) - if err == pebble.ErrNotFound { - return nil, nil - } - if err != nil { - return nil, err - } - defer closer.Close() - return &benchtop.RowLoc{}, nil -} - -func (b *JSONTable) unpackData(loadData bool, retId bool, doc *RowData) (any, error) { - if doc == nil { - return nil, fmt.Errorf("Doc is nil nothing to unpack") - } - if !loadData { - return doc.Key, nil - } - if retId && doc.Data != nil { - doc.Data["_id"] = doc.Key - } - return doc.Data, nil - -} - -func (b *JSONTable) GetBlockPos(id []byte) (offset uint64, size uint64, err error) { - log.Debugln("TABLE ID: ", b.TableId, "ID: ", string(id)) - val, closer, err := b.db.Get(benchtop.NewPosKey(b.TableId, id)) - if err != nil { - if err != pebble.ErrNotFound { - log.Errorln("getBlockPos Err: ", err) - } - return 0, 0, err - } - - offset, size = benchtop.ParsePosValue(val) - defer closer.Close() - return offset, size, nil -} - -func (b *JSONTable) setDataIndices(inputs chan benchtop.Index) { - for index := range inputs { - b.AddTableEntryInfo( - nil, - index.Key, - benchtop.RowLoc{ - Offset: index.Position, - Size: index.Size, - }, - ) - } -} - -func (b *JSONTable) markDelete(offset uint64) error { - file, err := os.OpenFile(b.Path, os.O_RDWR, 0644) - if err != nil { - return err - } - defer file.Close() - - _, err = file.Seek(int64(offset+8), io.SeekStart) - if err != nil { - return err - } - _, err = file.Write([]byte{0x00, 0x00, 0x00, 0x00}) - if err != nil { - return err - } - err = file.Sync() - if err != nil { - return err - } - - return nil -} - -func (b *JSONTable) readFromFile(offset uint64) (map[string]any, error) { - file, err := os.Open(b.Path) - if err != nil { - return nil, err - } - defer file.Close() - - _, err = file.Seek(int64(offset+8), io.SeekStart) - if err != nil { - return nil, err - } - - // Read JSON block size - sizeBytes := []byte{0x00, 0x00, 0x00, 0x00} - _, err = file.Read(sizeBytes) - if err != nil { - return nil, err - } - - file.Seek(-4, io.SeekCurrent) - - rowData := make([]byte, int32(binary.LittleEndian.Uint32(sizeBytes))) - _, err = file.Read(rowData) - if err != nil { - return nil, err - } - var m *RowData = nil - sonic.ConfigFastest.Unmarshal(rowData, m) - out, err := b.unpackData(true, false, m) - if err != nil { - return nil, err - } - return out.(map[string]any), nil -} - -func (b *JSONTable) writeJsonEntry(offset int64, bData []byte) (int, error) { - // make next offset equal to existing offset + length of data - buffer := make([]byte, 12) - binary.LittleEndian.PutUint64(buffer[:8], uint64(offset)+uint64(len(bData))+12) - binary.LittleEndian.PutUint32(buffer[8:], uint32(len(bData))) - - _, err := b.handle.Write(buffer) - if err != nil { - return 0, fmt.Errorf("write offset error: %v", err) - } - - n, err := b.handle.Write(bData) - if err != nil { - return 0, fmt.Errorf("write JSON error: %v", err) - } - return n, nil -} diff --git a/jsontable/tpath/tpath.go b/jsontable/tpath/tpath.go index 20ed8cc..6147ea2 100644 --- a/jsontable/tpath/tpath.go +++ b/jsontable/tpath/tpath.go @@ -2,11 +2,26 @@ package tpath import ( "strings" + + "github.com/bmeg/jsonpath" ) // Current represents the 'current' traveler namespace const CURRENT = "_current" +func PathLookup(v map[string]any, path string) any { + /* Expects that special fields like '_id' and '_label' + are added to the map before reaching this function + */ + field := NormalizePath(path) + jpath := ToLocalPath(field) + res, err := jsonpath.JsonPathLookup(v, jpath) + if err != nil { + return nil + } + return res +} + // GetNamespace returns the namespace of the provided path // // Example: diff --git a/keys.go b/keys.go index cdb2961..aa116e8 100644 --- a/keys.go +++ b/keys.go @@ -8,6 +8,11 @@ import ( "github.com/bmeg/grip/log" ) +const ( + ROW_HSIZE uint32 = 8 // Header size: 8-byte next offset + 4-byte size + ROW_OFFSET_HSIZE uint32 = 4 // Offset part of header +) + // Vertex TableId // key: T | TableId | VtablePrefix' // The starting point for vertex table ids in th pebble index @@ -31,6 +36,7 @@ var RFieldPrefix = []byte{'R'} // The '0x1F' invisible character unit seperator not supposed to appear in ASCII text var FieldSep = []byte{0x1F} +// builds a RFieldKey in the format "R 0x1F label 0x1F field 0x1F rowId" func RFieldKey(label, field, rowID string) []byte { return bytes.Join([][]byte{ RFieldPrefix, @@ -115,13 +121,30 @@ func NewPosKeyPrefix(table uint16) []byte { return out[:] } -func NewPosValue(offset uint64, size uint64) []byte { - var out [64]byte - binary.LittleEndian.PutUint64(out[:], offset) - binary.LittleEndian.PutUint64(out[8:], size) +/* +Builds a 12 byte row loc encoding + + Each encoding in order contains: + + 2 bytes for TableId + 2 bytes for SectionId + 4 bytes for Offset + 4 bytes for Size +*/ +func EncodeRowLoc(loc *RowLoc) []byte { + var out [12]byte + binary.LittleEndian.PutUint16(out[0:], loc.TableId) + binary.LittleEndian.PutUint16(out[2:], loc.Section) + binary.LittleEndian.PutUint32(out[4:], loc.Offset) + binary.LittleEndian.PutUint32(out[8:], loc.Size) return out[:] } -func ParsePosValue(v []byte) (offset uint64, size uint64) { - return binary.LittleEndian.Uint64(v), binary.LittleEndian.Uint64(v[8:]) +func DecodeRowLoc(v []byte) *RowLoc { + return &RowLoc{ + TableId: binary.LittleEndian.Uint16(v[0:]), + Section: binary.LittleEndian.Uint16(v[2:]), + Offset: binary.LittleEndian.Uint32(v[4:]), + Size: binary.LittleEndian.Uint32(v[8:]), + } } diff --git a/pebblebulk/bulk.go b/pebblebulk/bulk.go new file mode 100644 index 0000000..fd4a8b6 --- /dev/null +++ b/pebblebulk/bulk.go @@ -0,0 +1,100 @@ +package pebblebulk + +import ( + "bytes" + "io" + "sync" + + "github.com/bmeg/benchtop/util" + "github.com/cockroachdb/pebble" +) + +const ( + maxWriterBuffer = 3 << 30 +) + +type PebbleBulk struct { + Db *pebble.DB + Batch *pebble.Batch + Highest, Lowest []byte + CurSize int + mu sync.Mutex + totalInserts uint32 +} + +func (pb *PebbleBulk) Set(id []byte, val []byte, opts *pebble.WriteOptions) error { + pb.mu.Lock() + defer pb.mu.Unlock() + if pb.Batch == nil { + pb.Batch = pb.Db.NewBatch() + } + + pb.CurSize += len(id) + len(val) + pb.totalInserts++ + if pb.Highest == nil || bytes.Compare(id, pb.Highest) > 0 { + pb.Highest = util.CopyBytes(id) + } + if pb.Lowest == nil || bytes.Compare(id, pb.Lowest) < 0 { + pb.Lowest = util.CopyBytes(id) + } + err := pb.Batch.Set(id, val, nil) + if pb.CurSize > maxWriterBuffer { + pb.Batch.Commit(nil) + pb.Batch.Reset() + pb.CurSize = 0 + } + return err +} + +func (pb *PebbleBulk) Get(key []byte) ([]byte, io.Closer, error) { + return pb.Db.Get(key) +} + +func (pb *PebbleBulk) Delete(key []byte, opts *pebble.WriteOptions) error { + pb.mu.Lock() + err := pb.Db.Delete(key, nil) + pb.mu.Unlock() + return err +} + +func (pb *PebbleBulk) BulkRead(fn func(tx *PebbleBulk) error) error { + return fn(pb) +} + +func (pb *PebbleBulk) Close() error { + return pb.Db.Close() +} + +func (pb *PebbleBulk) DeletePrefix(prefix []byte) error { + nextPrefix := append(prefix, 0xFF) + return pb.Db.DeleteRange(prefix, nextPrefix, nil) +} + +func (pb *PebbleBulk) DeleteRange(start, end []byte, opts *pebble.WriteOptions) error { + pb.mu.Lock() + defer pb.mu.Unlock() + if pb.Batch == nil { + pb.Batch = pb.Db.NewBatch() + } + + if pb.Lowest == nil || bytes.Compare(start, pb.Lowest) < 0 { + pb.Lowest = util.CopyBytes(start) + } + if pb.Highest == nil || bytes.Compare(end, pb.Highest) > 0 { + pb.Highest = util.CopyBytes(end) + } + + err := pb.Batch.DeleteRange(start, end, opts) + if err != nil { + return err + } + + if pb.CurSize > maxWriterBuffer { + if err := pb.Batch.Commit(nil); err != nil { + return err + } + pb.Batch.Reset() + pb.CurSize = 0 + } + return nil +} diff --git a/pebblebulk/iterator.go b/pebblebulk/iterator.go new file mode 100644 index 0000000..d6270c1 --- /dev/null +++ b/pebblebulk/iterator.go @@ -0,0 +1,68 @@ +package pebblebulk + +import ( + "io" + + "github.com/cockroachdb/pebble" +) + +type PebbleIterator struct { + db *pebble.DB + iter *pebble.Iterator + forward bool + key []byte + value []byte +} + +func (pit *PebbleIterator) Key() []byte { + return pit.key +} + +func (pit *PebbleIterator) Valid() bool { + return pit.iter.Valid() +} + +func (pit *PebbleIterator) Value() ([]byte, error) { + return pit.value, nil +} + +func (pit *PebbleIterator) Get(id []byte) ([]byte, error) { + v, c, err := pit.db.Get(id) + if err != nil { + return nil, err + } + out := copyBytes(v) + c.Close() + return out, nil +} + +func (pit *PebbleIterator) Seek(id []byte) error { + pit.forward = true + if !pit.iter.SeekGE(id) { + return io.EOF + } + pit.key = copyBytes(pit.iter.Key()) + pit.value = copyBytes(pit.iter.Value()) + return nil +} + +func (pit *PebbleIterator) Next() error { + if pit.forward { + if !pit.iter.Next() { + return io.EOF + } + } else { + if !pit.iter.Prev() { + return io.EOF + } + } + pit.key = copyBytes(pit.iter.Key()) + pit.value = copyBytes(pit.iter.Value()) + return nil +} + +func copyBytes(in []byte) []byte { + out := make([]byte, len(in)) + copy(out, in) + return out +} diff --git a/pebblebulk/kv.go b/pebblebulk/kv.go new file mode 100644 index 0000000..3193dfc --- /dev/null +++ b/pebblebulk/kv.go @@ -0,0 +1,82 @@ +package pebblebulk + +import ( + "io" + "sync" + + "github.com/bmeg/grip/log" + "github.com/cockroachdb/pebble" +) + +type KVStore interface { + Get(key []byte) ([]byte, io.Closer, error) + View(func(it *PebbleIterator) error) error + Set(key, value []byte, opts *pebble.WriteOptions) error + Delete(key []byte, opts *pebble.WriteOptions) error + BulkWrite(func(tx *PebbleBulk) error) error + Close() error +} + +type PebbleKV struct { + Db *pebble.DB + InsertCount uint32 + CompactLimit uint32 + mu sync.Mutex +} + +func NewPebbleKV(path string) (*PebbleKV, error) { + db, err := pebble.Open(path, &pebble.Options{}) + if err != nil { + return nil, err + } + return &PebbleKV{ + Db: db, + InsertCount: 0, + CompactLimit: uint32(1000), + mu: sync.Mutex{}, + }, nil +} + +func (pdb *PebbleKV) Set(id []byte, val []byte, opts *pebble.WriteOptions) error { + return pdb.Db.Set(id, val, opts) +} + +func (pdb *PebbleKV) BulkWrite(u func(tx *PebbleBulk) error) error { + batch := pdb.Db.NewBatch() + ptx := &PebbleBulk{pdb.Db, batch, nil, nil, 0, sync.Mutex{}, 0} + err := u(ptx) + batch.Commit(nil) + batch.Close() + + pdb.InsertCount += ptx.totalInserts + if pdb.InsertCount > pdb.CompactLimit { + log.Debugf("Running pebble compact %d > %d", pdb.InsertCount, pdb.CompactLimit) + pdb.Db.Compact([]byte{0x00}, []byte{0xFF}, true) + pdb.InsertCount = 0 + } + return err +} + +func (pb *PebbleKV) View(u func(tx *PebbleIterator) error) error { + it, err := pb.Db.NewIter(&pebble.IterOptions{}) + if err != nil { + return err + } + pit := &PebbleIterator{pb.Db, it, true, nil, nil} + err = u(pit) + it.Close() + return err +} + +func (pb *PebbleKV) Close() error { + return pb.Db.Close() +} + +func (pb *PebbleKV) Delete(key []byte, opts *pebble.WriteOptions) error { + return pb.Db.Delete(key, opts) +} + +func (pb *PebbleKV) Get(key []byte) ([]byte, io.Closer, error) { + val, closer, err := pb.Db.Get(key) + return val, closer, err +} diff --git a/pebblebulk/pebble-driver.go b/pebblebulk/pebble-driver.go deleted file mode 100644 index b1621e6..0000000 --- a/pebblebulk/pebble-driver.go +++ /dev/null @@ -1,197 +0,0 @@ -package pebblebulk - -import ( - "bytes" - "io" - "sync" - - "github.com/bmeg/benchtop/util" - "github.com/bmeg/grip/log" - "github.com/cockroachdb/pebble" -) - -const ( - maxWriterBuffer = 3 << 30 -) - -type PebbleKV struct { - Db *pebble.DB - InsertCount uint32 - CompactLimit uint32 - mu sync.Mutex -} - -type PebbleBulk struct { - Db *pebble.DB - Batch *pebble.Batch - Highest, Lowest []byte - CurSize int - mu sync.Mutex - totalInserts uint32 -} - -func (pb *PebbleBulk) Set(id []byte, val []byte, opts *pebble.WriteOptions) error { - pb.mu.Lock() - defer pb.mu.Unlock() - if pb.Batch == nil { - pb.Batch = pb.Db.NewBatch() - } - - pb.CurSize += len(id) + len(val) - pb.totalInserts++ - if pb.Highest == nil || bytes.Compare(id, pb.Highest) > 0 { - pb.Highest = util.CopyBytes(id) - } - if pb.Lowest == nil || bytes.Compare(id, pb.Lowest) < 0 { - pb.Lowest = util.CopyBytes(id) - } - err := pb.Batch.Set(id, val, nil) - if pb.CurSize > maxWriterBuffer { - pb.Batch.Commit(nil) - pb.Batch.Reset() - pb.CurSize = 0 - } - return err -} - -func (pb *PebbleBulk) Get(key []byte) ([]byte, io.Closer, error) { - return pb.Db.Get(key) -} - -func (pb *PebbleBulk) Delete(key []byte, opts *pebble.WriteOptions) error { - return pb.Db.Delete(key, nil) -} - -func (pdb *PebbleKV) Set(id []byte, val []byte) error { - return pdb.Db.Set(id, val, nil) -} - -func (pdb *PebbleKV) BulkWrite(u func(tx *PebbleBulk) error) error { - batch := pdb.Db.NewBatch() - ptx := &PebbleBulk{pdb.Db, batch, nil, nil, 0, sync.Mutex{}, 0} - err := u(ptx) - batch.Commit(nil) - batch.Close() - - pdb.InsertCount += ptx.totalInserts - if pdb.InsertCount > pdb.CompactLimit { - log.Debugf("Running pebble compact %d > %d", pdb.InsertCount, pdb.CompactLimit) - pdb.Db.Compact([]byte{0x00}, []byte{0xFF}, true) - pdb.InsertCount = 0 - } - return err -} - -func (pb *PebbleKV) View(u func(tx *PebbleIterator) error) error { - it, err := pb.Db.NewIter(&pebble.IterOptions{}) - if err != nil { - return err - } - pit := &PebbleIterator{pb.Db, it, true, nil, nil} - err = u(pit) - it.Close() - return err -} - -func (pb *PebbleBulk) BulkRead(fn func(tx *PebbleBulk) error) error { - return fn(pb) -} - -func (pb *PebbleBulk) Close() error { - return pb.Db.Close() -} - -func (pb *PebbleBulk) DeletePrefix(prefix []byte) error { - nextPrefix := append(prefix, 0xFF) - return pb.Db.DeleteRange(prefix, nextPrefix, nil) -} - -func (pb *PebbleBulk) DeleteRange(start, end []byte, opts *pebble.WriteOptions) error { - pb.mu.Lock() - defer pb.mu.Unlock() - if pb.Batch == nil { - pb.Batch = pb.Db.NewBatch() - } - - if pb.Lowest == nil || bytes.Compare(start, pb.Lowest) < 0 { - pb.Lowest = util.CopyBytes(start) - } - if pb.Highest == nil || bytes.Compare(end, pb.Highest) > 0 { - pb.Highest = util.CopyBytes(end) - } - - err := pb.Batch.DeleteRange(start, end, opts) - if err != nil { - return err - } - - if pb.CurSize > maxWriterBuffer { - if err := pb.Batch.Commit(nil); err != nil { - return err - } - pb.Batch.Reset() - pb.CurSize = 0 - } - return nil -} - -type PebbleIterator struct { - db *pebble.DB - iter *pebble.Iterator - forward bool - key []byte - value []byte -} - -func (pit *PebbleIterator) Key() []byte { - return pit.key -} - -func (pit *PebbleIterator) Valid() bool { - return pit.iter.Valid() -} - -func (pit *PebbleIterator) Value() ([]byte, error) { - return pit.value, nil -} - -func (pit *PebbleIterator) Get(id []byte) ([]byte, error) { - v, c, err := pit.db.Get(id) - if err != nil { - return nil, err - } - out := copyBytes(v) - c.Close() - return out, nil -} - -func (pit *PebbleIterator) Seek(id []byte) error { - pit.forward = true - if !pit.iter.SeekGE(id) { - return io.EOF - } - pit.key = copyBytes(pit.iter.Key()) - pit.value = copyBytes(pit.iter.Value()) - return nil -} - -func (pit *PebbleIterator) Next() error { - if pit.forward { - if !pit.iter.Next() { - return io.EOF - } - } else { - if !pit.iter.Prev() { - return io.EOF - } - } - pit.key = copyBytes(pit.iter.Key()) - pit.value = copyBytes(pit.iter.Value()) - return nil -} - -func copyBytes(in []byte) []byte { - out := make([]byte, len(in)) - copy(out, in) - return out -} diff --git a/test/benchmark/compact_test.go b/test/benchmark/compact_test.go index e1c085c..fcc5194 100644 --- a/test/benchmark/compact_test.go +++ b/test/benchmark/compact_test.go @@ -1,22 +1,12 @@ package test -import ( - "fmt" - "os" - "testing" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/jsontable" - "github.com/bmeg/benchtop/test/fixtures" - "github.com/bmeg/benchtop/util" -) - const ( numKeys = 1000 valueSize = 5024 NumDeleteKeys = 200 ) +/* Compact not implemented currently func BenchmarkCompactJson(b *testing.B) { var compactjsoname = "test.json" + util.RandomString(5) defer os.RemoveAll(compactjsoname) @@ -35,25 +25,39 @@ func BenchmarkCompactJson(b *testing.B) { b.Fatal(err) } - inputChan := make(chan benchtop.Row, 100) + inputChan := make(chan *benchtop.Row, 100) go func() { count := 0 for j := 0; j < numKeys; j++ { key := []byte(fmt.Sprintf("key_%d", j)) value := fixtures.GenerateRandomBytes(valueSize) - inputChan <- benchtop.Row{Id: key, Data: map[string]any{"data": value}} + inputChan <- &benchtop.Row{Id: key, Data: map[string]any{"data": value}} count++ } b.Logf("Inserted %d entries into inputChan", count) close(inputChan) }() + //func (dr *JSONDriver) BulkLoad(inputs chan *benchtop.Row, tx *pebblebulk.PebbleBulk) error { + + jsonDriver, ok := compactjsonDriver.(*jsontable.JSONDriver) + if !ok { + b.Fatalf("invalid table type for %s", compactjsoname) + } + + jT, ok := compactjsonTable.(*jTable.JSONTable) + if !ok { + b.Fatalf("invalid table type for %s", compactjsoname) + } + b.Log("start load") - if err := compactjsonTable.Load(inputChan); err != nil { + if err := jsonDriver.BulkLoad(inputChan, nil); err != nil { b.Fatal(err) } b.Log("Load completed successfully") + jT, _ = compactjsonTable.(*jTable.JSONTable) + keys, err := compactjsonTable.Keys() if err != nil { b.Fatal(err) @@ -64,17 +68,15 @@ func BenchmarkCompactJson(b *testing.B) { b.Fatal(err) } - bT, _ := compactjsonTable.(*jsontable.JSONTable) - count := 0 deleted := 0 for key := range keys { if _, exists := randomIndexSet[count]; exists { - offset, size, err := bT.GetBlockPos(key.Key) + loc, err := jT.GetBlockPos(key.Key) if err != nil { b.Error(err) } - if err := compactjsonTable.DeleteRow(benchtop.RowLoc{Offset: offset, Size: size, Label: bT.TableId}, key.Key); err != nil { + if err := compactjsonTable.DeleteRow(loc, key.Key); err != nil { b.Fatal(err) } deleted++ @@ -105,3 +107,4 @@ func BenchmarkCompactJson(b *testing.B) { b.Logf("Keys after compaction: %d", keyCount) } +*/ diff --git a/test/benchmark/fetch_test.go b/test/benchmark/fetch_test.go deleted file mode 100644 index 3cd11c0..0000000 --- a/test/benchmark/fetch_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package test - -import ( - "fmt" - "os" - "testing" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/jsontable" - "github.com/bmeg/benchtop/test/fixtures" - "github.com/bmeg/benchtop/util" -) - -const ( - fetchNumKeys = 100000 - fetchValueSize = 5024 -) - -func BenchmarkFetch(b *testing.B) { - var fetchname = "test.json" + util.RandomString(5) - defer os.RemoveAll(fetchname) - - b.Log("BenchmarkScaleWriteJson start") - - compactjsonDriver, err := jsontable.NewJSONDriver(fetchname) - if err != nil { - b.Fatal(err) - } - - columns := []benchtop.ColumnDef{{Key: "data"}} - - compactjsonTable, err := compactjsonDriver.New(fetchname, columns) - if err != nil { - b.Fatal(err) - } - - inputChan := make(chan benchtop.Row, 100) - go func() { - count := 0 - for j := 0; j < fetchNumKeys; j++ { - key := []byte(fmt.Sprintf("key_%d", j)) - value := fixtures.GenerateRandomBytes(fetchValueSize) - inputChan <- benchtop.Row{Id: key, Data: map[string]any{"data": value}} - count++ - } - b.Logf("Inserted %d entries into inputChan", count) - close(inputChan) - }() - - b.Log("start load") - if err := compactjsonTable.Load(inputChan); err != nil { - b.Fatal(err) - } - b.Log("Load completed successfully") - - keys, err := compactjsonTable.Keys() - if err != nil { - b.Fatal(err) - } - - outStruct := compactjsonTable.Fetch(keys, 5) - keyCount := 0 - for _ = range outStruct { - //b.Log("KEY: ", keys) - keyCount++ - } - b.Log("KEY COUNT: ", keyCount) -} diff --git a/test/benchmark/remove_test.go b/test/benchmark/remove_test.go deleted file mode 100644 index 7e59442..0000000 --- a/test/benchmark/remove_test.go +++ /dev/null @@ -1,106 +0,0 @@ -package test - -import ( - "fmt" - "os" - "testing" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/jsontable" - "github.com/bmeg/benchtop/test/fixtures" - "github.com/bmeg/benchtop/util" - "github.com/bmeg/grip/log" - "github.com/cockroachdb/pebble" -) - -const ( - removeNumKeys = 1000 - removeValueSize = 5024 -) - -func BenchmarkRemove(b *testing.B) { - var removename = "test.json" + util.RandomString(5) - defer os.RemoveAll(removename) // Clean up - b.Log("BenchmarkScaleWriteJson start") - - compactjsonDriver, err := jsontable.NewJSONDriver(removename) - if err != nil { - b.Fatal(err) - } - - columns := []benchtop.ColumnDef{{Key: "data"}} - - compactjsonTable, err := compactjsonDriver.New(removename, columns) - if err != nil { - b.Fatal(err) - } - - inputChan := make(chan benchtop.Row, 100) - go func() { - count := 0 - for j := 0; j < removeNumKeys; j++ { - key := []byte(fmt.Sprintf("key_%d", j)) - value := fixtures.GenerateRandomBytes(removeValueSize) - inputChan <- benchtop.Row{Id: key, Data: map[string]any{"data": value}} - count++ - } - b.Logf("Inserted %d entries into inputChan", count) - close(inputChan) - }() - - b.Log("start load") - if err := compactjsonTable.Load(inputChan); err != nil { - b.Fatal(err) - } - b.Log("Load completed successfully") - - bT, _ := compactjsonTable.(*jsontable.JSONTable) - pKey := benchtop.NewPosKey(bT.TableId, []byte("key_5")) - val, closer, err := bT.Pb.Db.Get(pKey) - if err != nil { - if err != pebble.ErrNotFound { - log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", pKey, err) - } - log.Errorln("ERR: ", err) - } - closer.Close() - offset, size := benchtop.ParsePosValue(val) - - data, err := compactjsonTable.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) - b.Log("DATA BEFORE: ", data) - - if len(data) == 0 { - b.Fatal("Expected data to be in key_5 but none was found") - } - - keys, err := compactjsonTable.Keys() - if err != nil { - b.Fatal(err) - } - - outStruct := compactjsonTable.Remove(keys, 5) - keyCount := 0 - for _ = range outStruct { - keyCount++ - } - - keys, err = compactjsonTable.Keys() - if err != nil { - b.Fatal(err) - } - - data, err = compactjsonTable.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) - b.Log("DATA AFTER: ", data) - if len(data) != 0 { - b.Fatalf("Expected data to be empty for key_5 but %#v was found\n", data) - } - - for key := range keys { - b.Error("Unexpected Key: ", key) - } - - scaChan := compactjsonTable.Scan(true, nil) - for elem := range scaChan { - fmt.Println("ELEM: ", elem) - } -} diff --git a/test/benchmark/scale_test.go b/test/benchmark/scale_test.go index df456d6..781ad8b 100644 --- a/test/benchmark/scale_test.go +++ b/test/benchmark/scale_test.go @@ -7,6 +7,8 @@ import ( "github.com/bmeg/benchtop" "github.com/bmeg/benchtop/jsontable" + jTable "github.com/bmeg/benchtop/jsontable/table" + "github.com/bmeg/benchtop/pebblebulk" "github.com/bmeg/benchtop/test/fixtures" "github.com/bmeg/benchtop/util" "github.com/bmeg/grip/log" @@ -14,7 +16,7 @@ import ( ) var Jsonname = "test.json" + util.RandomString(5) -var jsonTable *jsontable.JSONTable +var jsonTable *jTable.JSONTable var jsonDriver *jsontable.JSONDriver const ( @@ -47,7 +49,7 @@ func BenchmarkScaleWriteJson(b *testing.B) { } var ok bool - jsonTable, ok = table.(*jsontable.JSONTable) + jsonTable, ok = table.(*jTable.JSONTable) if !ok { b.Fatal("Failed to assert type *benchtop.JSONDriver") } @@ -55,22 +57,24 @@ func BenchmarkScaleWriteJson(b *testing.B) { b.ResetTimer() - for b.Loop() { - inputChan := make(chan benchtop.Row, 100) - go func() { - for j := range scalenumKeys { - key := []byte(fmt.Sprintf("key_%d", j)) - value := fixtures.GenerateRandomBytes(scalevalueSize) - inputChan <- benchtop.Row{Id: key, Data: map[string]any{"data": value}} + jsonDriver.Pkv.BulkWrite(func(tx *pebblebulk.PebbleBulk) error { + for b.Loop() { + inputChan := make(chan *benchtop.Row, 100) + go func() { + for j := range scalenumKeys { + key := []byte(fmt.Sprintf("key_%d", j)) + value := fixtures.GenerateRandomBytes(scalevalueSize) + inputChan <- &benchtop.Row{Id: key, Data: map[string]any{"data": value}} + } + close(inputChan) + }() + err = jsonDriver.BulkLoad(inputChan, tx) + if err != nil { + b.Fatal(err) } - close(inputChan) - }() - - err = jsonTable.Load(inputChan) - if err != nil { - b.Fatal(err) } - } + return nil + }) } func BenchmarkRandomReadJson(b *testing.B) { @@ -97,24 +101,23 @@ func BenchmarkRandomReadJson(b *testing.B) { selectedValues := make([]map[string]any, 0, len(randomIndexSet)) count := 0 b.ResetTimer() - - OTKEYS, _ := ot.Keys() - bT, _ := ot.(*jsontable.JSONTable) + jT, _ := ot.(*jTable.JSONTable) + OTKEYS, _ := jsonDriver.ListTableKeys(jT.TableId) for key := range OTKEYS { if _, exists := randomIndexSet[count]; exists { - pKey := benchtop.NewPosKey(bT.TableId, key.Key) - val, closer, err := bT.Pb.Db.Get(pKey) + pKey := benchtop.NewPosKey(jT.TableId, key.Key) + val, closer, err := jsonDriver.Pkv.Db.Get(pKey) if err != nil { if err != pebble.ErrNotFound { log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", key.Key, err) } log.Errorln("ERR: ", err) } - offset, size := benchtop.ParsePosValue(val) + loc := benchtop.DecodeRowLoc(val) closer.Close() - rOw, err := bT.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) + rOw, err := jT.GetRow(loc) if err != nil { b.Fatal(err) } @@ -122,8 +125,6 @@ func BenchmarkRandomReadJson(b *testing.B) { } count++ } - b.Log("READS:", len(selectedValues), "COUNT: ", count) - } func BenchmarkRandomKeysJson(b *testing.B) { @@ -153,7 +154,8 @@ func BenchmarkRandomKeysJson(b *testing.B) { count := 0 b.ResetTimer() - OTKEYS, _ := ot.Keys() + jT, _ := ot.(*jTable.JSONTable) + OTKEYS, _ := jsonDriver.ListTableKeys(jT.TableId) for key := range OTKEYS { if _, exists := randomIndexSet[count]; exists { selectedValues = append(selectedValues, key.Key) diff --git a/test/integration/basic_test.go b/test/integration/basic_test.go index 324c211..196293b 100644 --- a/test/integration/basic_test.go +++ b/test/integration/basic_test.go @@ -7,6 +7,8 @@ import ( "github.com/bmeg/benchtop" "github.com/bmeg/benchtop/jsontable" + jTable "github.com/bmeg/benchtop/jsontable/table" + "github.com/bmeg/benchtop/util" "github.com/bmeg/grip/log" "github.com/cockroachdb/pebble" @@ -77,31 +79,33 @@ func TestInsert(t *testing.T) { t.Error(err) } - bT, _ := ts.(*jsontable.JSONTable) + jT, _ := ts.(*jTable.JSONTable) + jDR, _ := dr.(*jsontable.JSONDriver) + for k, r := range data { - loc, err := bT.AddRow(benchtop.Row{Id: []byte(k), TableName: "table_1", Data: r}) + loc, err := jT.AddRow(benchtop.Row{Id: []byte(k), TableName: "table_1", Data: r}) if err != nil { t.Error(err) } - err = bT.AddTableEntryInfo(nil, []byte(k), *loc) + err = jDR.AddTableEntryInfo(nil, []byte(k), loc) if err != nil { t.Error(err) } } for k := range data { - pKey := benchtop.NewPosKey(bT.TableId, []byte(k)) - val, closer, err := bT.Pb.Db.Get(pKey) + pKey := benchtop.NewPosKey(jT.TableId, []byte(k)) + val, closer, err := jDR.Pkv.Db.Get(pKey) if err != nil { if err != pebble.ErrNotFound { log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", k, err) } log.Errorln("ERR: ", err) } - offset, size := benchtop.ParsePosValue(val) + loc := benchtop.DecodeRowLoc(val) closer.Close() - post, err := ts.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) + post, err := ts.GetRow(loc) if err != nil { t.Error(err) } @@ -114,7 +118,7 @@ func TestInsert(t *testing.T) { } } } - keyList, err := ts.Keys() + keyList, err := dr.ListTableKeys(jT.TableId) if err != nil { t.Error(err) } diff --git a/test/integration/compact_test.go b/test/integration/compact_test.go index 258a4e8..8be7ffc 100644 --- a/test/integration/compact_test.go +++ b/test/integration/compact_test.go @@ -1,11 +1,14 @@ package test import ( + "context" "os" "testing" "github.com/bmeg/benchtop" "github.com/bmeg/benchtop/jsontable" + jTable "github.com/bmeg/benchtop/jsontable/table" + "github.com/bmeg/benchtop/test/fixtures" "github.com/bmeg/benchtop/util" ) @@ -27,21 +30,27 @@ func TestCompact(t *testing.T) { t.Fatal(err) } - bT, _ := ts.(*jsontable.JSONTable) + jDR, _ := dr.(*jsontable.JSONDriver) + jT, _ := ts.(*jTable.JSONTable) + for k, r := range fixtures.ScanData { - loc, err := bT.AddRow(benchtop.Row{Id: []byte(k), TableName: "table_1", Data: r}) + loc, err := jT.AddRow(benchtop.Row{Id: []byte(k), TableName: "table_1", Data: r}) if err != nil { t.Fatal(err) } - err = bT.AddTableEntryInfo(nil, []byte(k), *loc) - + err = jDR.AddTableEntryInfo(nil, []byte(k), loc) + _, ok := jDR.LocCache.Set(k, loc) + if !ok { + t.Fatalf("Cache set failed for : %#v", loc) + } } - offset, size, err := bT.GetBlockPos([]byte("key4")) + loc, err := jDR.LocCache.Get(context.Background(), "key4") if err != nil { t.Error(err) } - err = ts.DeleteRow(benchtop.RowLoc{Offset: offset, Size: size, Label: bT.TableId}, []byte("key4")) + + err = ts.DeleteRow(loc, []byte("key4")) if err != nil { t.Fatal(err) } @@ -89,7 +98,7 @@ func TestCompact(t *testing.T) { } pKey := benchtop.NewPosKey(uint16(0), []byte("key8")) - val, closer, err := bT.Pb.Db.Get(pKey) + val, closer, err := jT.Pb.Db.Get(pKey) if err != nil { if err != pebble.ErrNotFound { log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", pKey, err) @@ -99,7 +108,7 @@ func TestCompact(t *testing.T) { offset, size := benchtop.ParsePosValue(val) closer.Close() - gotRow, err := bT.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) + gotRow, err := jT.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) if err != nil { t.Error(err) } @@ -110,7 +119,7 @@ func TestCompact(t *testing.T) { } pKey = benchtop.NewPosKey(uint16(0), []byte("key8")) - val, closer, err = bT.Pb.Db.Get(pKey) + val, closer, err = jT.Pb.Db.Get(pKey) if err != nil { if err != pebble.ErrNotFound { log.Errorf("Err on dr.Pb.Get for key %s in CacheLoader: %v", pKey, err) @@ -121,7 +130,7 @@ func TestCompact(t *testing.T) { closer.Close() // Get another key to double check that it works - gotRow, err = bT.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) + gotRow, err = jT.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: 0}) if err != nil { t.Error(err) } diff --git a/test/integration/delete_test.go b/test/integration/delete_test.go index 96bfb16..d17872e 100644 --- a/test/integration/delete_test.go +++ b/test/integration/delete_test.go @@ -1,12 +1,14 @@ package test import ( + "context" "fmt" "os" "testing" "github.com/bmeg/benchtop" "github.com/bmeg/benchtop/jsontable" + jTable "github.com/bmeg/benchtop/jsontable/table" "github.com/bmeg/benchtop/util" ) @@ -23,39 +25,44 @@ func TestDelete(t *testing.T) { {Key: "data"}, {Key: "id"}, }) - if err != nil { t.Error(err) } totalCount := 100 - bT, _ := ts.(*jsontable.JSONTable) - for i := 0; i < totalCount; i++ { + jT, _ := ts.(*jTable.JSONTable) + jDr, _ := dr.(*jsontable.JSONDriver) + + for i := range totalCount { key := fmt.Sprintf("key_%d", i) - loc, err := bT.AddRow(benchtop.Row{Id: []byte(key), Data: map[string]any{ + loc, err := jT.AddRow(benchtop.Row{Id: []byte(key), Data: map[string]any{ "id": key, "data": i, }}) if err != nil { t.Error(err) } - err = bT.AddTableEntryInfo(nil, []byte(key), *loc) + err = jDr.AddTableEntryInfo(nil, []byte(key), loc) if err != nil { t.Error(err) } + _, ok := jDr.LocCache.Set(key, loc) + if !ok { + t.Fatalf("Failed to set loc: %#v", loc) + } } count := 0 - r, err := bT.Keys() + r, err := jDr.ListTableKeys(jT.TableId) if err != nil { t.Error(err) } for i := range r { - offset, size, err := bT.GetBlockPos(i.Key) + loc, err := jDr.LocCache.Get(context.Background(), string(i.Key)) if err != nil { t.Error(err) } - _, err = bT.GetRow(benchtop.RowLoc{Offset: offset, Size: size, Label: uint16(0)}) + _, err = jT.GetRow(loc) if err != nil { t.Errorf("Get %s error: %s", string(i.Key), err) } @@ -66,28 +73,35 @@ func TestDelete(t *testing.T) { } var deleteCount = 0 - keys, err := bT.Keys() + fmt.Println("TABLE ID: ", jT.TableId) + keys, err := jDr.ListTableKeys(jT.TableId) if err != nil { t.Error(err) } i := 0 for k := range keys { if i%3 == 0 { - offset, size, err := bT.GetBlockPos(k.Key) + loc, err := jDr.LocCache.Get(context.Background(), string(k.Key)) if err != nil { t.Error(err) } - err = bT.DeleteRow(benchtop.RowLoc{Offset: offset, Size: size, Label: bT.TableId}, k.Key) + + err = jDr.Pkv.Delete(benchtop.NewPosKey(jT.TableId, k.Key), nil) + if err != nil { + t.Fatal(err) + } + err = jT.DeleteRow(loc, k.Key) if err != nil { t.Errorf("delete %s error: %s", string(k.Key), err) } + jDr.LocCache.Invalidate(string(k.Key)) deleteCount++ } i++ } count = 0 - r, err = bT.Keys() + r, err = jDr.ListTableKeys(jT.TableId) if err != nil { t.Error(err) } diff --git a/test/integration/keys_test.go b/test/integration/keys_test.go index 3300ca3..a010430 100644 --- a/test/integration/keys_test.go +++ b/test/integration/keys_test.go @@ -32,15 +32,23 @@ func TestPosKeyParse(t *testing.T) { } func TestPosValueParse(t *testing.T) { - pos := uint64(12345) - size := uint64(2028) - - k := benchtop.NewPosValue(pos, size) - pPos, pSize := benchtop.ParsePosValue(k) - if pos != pPos { - t.Errorf("%d != %d", pos, pPos) + pos := uint32(12345) + size := uint32(2028) + tableId := uint16(0) + section := uint16(0) + + k := benchtop.EncodeRowLoc(&benchtop.RowLoc{TableId: tableId, Section: section, Offset: pos, Size: size}) + loc := benchtop.DecodeRowLoc(k) + if pos != loc.Offset { + t.Errorf("%d != %d", pos, loc.Offset) + } + if size != loc.Size { + t.Errorf("%d != %d", size, loc.Size) + } + if section != loc.Section { + t.Errorf("%d != %d", size, loc.Size) } - if size != pSize { - t.Errorf("%d != %d", size, pSize) + if tableId != loc.TableId { + t.Errorf("%d != %d", size, loc.Size) } } diff --git a/test/integration/scan_test.go b/test/integration/scan_test.go index de7b845..88fc6bb 100644 --- a/test/integration/scan_test.go +++ b/test/integration/scan_test.go @@ -1,69 +1,145 @@ package test import ( - "fmt" + "context" "os" - "reflect" "testing" "github.com/bmeg/benchtop" "github.com/bmeg/benchtop/filters" "github.com/bmeg/benchtop/jsontable" + "github.com/bmeg/benchtop/jsontable/table" + jTable "github.com/bmeg/benchtop/jsontable/table" + "github.com/bytedance/sonic" + "github.com/bytedance/sonic/ast" + "google.golang.org/protobuf/types/known/structpb" + "github.com/bmeg/benchtop/test/fixtures" "github.com/bmeg/grip/gripql" + "github.com/bmeg/grip/log" "github.com/bmeg/benchtop/util" ) type FieldFilters []filters.FieldFilter -func (ff FieldFilters) Matches(row any) bool { - rowData, ok := row.(map[string]any) - if !ok { - return false - } - for _, filter := range ff { - fieldValue, ok := rowData[filter.Field] - if !ok { - return false - } - switch filter.Operator { - case gripql.Condition_EQ: - if fmt.Sprintf("%v", fieldValue) != fmt.Sprintf("%v", filter.Value) { +func localMatchesHasExpression(row []byte, stmt *gripql.HasExpression, tableName string) bool { + switch stmt.Expression.(type) { + case *gripql.HasExpression_Condition: + cond := stmt.GetCondition() + var lookupVal any + switch cond.Key { + case "_label": + lookupVal = tableName[2:] + case "_id": + node, err := sonic.Get(row, []any{"1"}...) + if err != nil { + if err != ast.ErrNotExist { + log.Errorf("Sonic Fetch err for path 1 on doc %#v: %v", string(row), err) + } return false } - case gripql.Condition_GT: - val1, ok1 := fieldValue.(float64) - val2, ok2 := filter.Value.(float64) - if !ok1 || !ok2 { - // Handle type mismatch, maybe return false or an error + lookupVal, err = node.Interface() + if err != nil { + log.Errorf("Error unmarshaling node: %v", err) return false } - if val1 <= val2 { - return false // Does not match the "greater than" condition + default: // Handles all other keys (e.g., standard properties) + pathArr, err := table.ConvertJSONPathToArray(cond.Key) + if err != nil { + log.Errorf("Error converting JSON path: %v", err) + return false } - - case gripql.Condition_CONTAINS: - found := false - switch val := filter.Value.(type) { - case []any: - for _, v := range val { - if reflect.DeepEqual(v, fieldValue) { - found = true - } + node, err := sonic.Get(row, pathArr...) + if err != nil { + if err != ast.ErrNotExist { + log.Errorf("Sonic Fetch err for path: %s on doc %#v: %v", pathArr, string(row), err) + return false } - case nil: - found = false - default: + lookupVal = nil + } else { + lookupVal, err = node.Interface() + if err != nil { + log.Errorf("Error unmarshaling node: %v", err) + return false + } + } + } + + // ApplyFilterCondition must be accessible via your bFilters import + // + return filters.ApplyFilterCondition( + lookupVal, + &filters.FieldFilter{ + Operator: cond.Condition, + Field: cond.Key, + Value: cond.Value.AsInterface(), + }, + ) + + case *gripql.HasExpression_And: + for _, e := range stmt.GetAnd().Expressions { + if !localMatchesHasExpression(row, e, tableName) { + return false } - return found + } + return true - default: + case *gripql.HasExpression_Or: + for _, e := range stmt.GetOr().Expressions { + if localMatchesHasExpression(row, e, tableName) { + return true + } + } + return false + + case *gripql.HasExpression_Not: + return !localMatchesHasExpression(row, stmt.GetNot(), tableName) + + default: + log.Errorf("unknown where expression type: %T", stmt.Expression) + return false + } +} + +func (ff FieldFilters) Matches(row []byte, tableStr string) bool { + if len(ff) == 0 { + return true + } + + expressions := make([]*gripql.HasExpression, 0, len(ff)) + + for _, filter := range ff { + // NOTE: Since your original test code used filter.Value directly, + // we'll convert it to *structpb.Value. You need to import this: + // "google.golang.org/protobuf/types/known/structpb" + valuePB, err := structpb.NewValue(filter.Value) + if err != nil { return false } + condition := &gripql.HasExpression_Condition{ + Condition: &gripql.HasCondition{ + Key: filter.Field, + Condition: filter.Operator, + Value: valuePB, + }, + } + + expressions = append(expressions, &gripql.HasExpression{ + Expression: condition, + }) } - return true + + // Combine all conditions into a single AND expression + hasExpr := &gripql.HasExpression{ + Expression: &gripql.HasExpression_And{ + And: &gripql.HasExpressionList{ + Expressions: expressions, + }, + }, + } + return localMatchesHasExpression(row, hasExpr, tableStr) } func (ff FieldFilters) IsNoOp() bool { @@ -99,30 +175,35 @@ func TestScan(t *testing.T) { t.Error(err) } - bT, _ := ts.(*jsontable.JSONTable) + jT, _ := ts.(*jTable.JSONTable) + jDr, _ := dr.(*jsontable.JSONDriver) + for k, r := range fixtures.ScanData { - loc, err := bT.AddRow(benchtop.Row{Id: []byte(k), Data: r}) + loc, err := jT.AddRow(benchtop.Row{Id: []byte(k), Data: r}) if err != nil { t.Error(err) } - if loc.Offset == 0 || loc.Size == 0 { - t.Error(fmt.Errorf("expecting Offset and Size to be populated but got %d and %d instead", loc.Offset, loc.Size)) - } - err = bT.AddTableEntryInfo(nil, []byte(k), *loc) + /*if loc.Offset == 0 || loc.Size == 0 { + t.Error(fmt.Errorf("expecting Offset and Size to be populated but got %d and %d instead", loc.Offset, loc.Size)) + }*/ + + err = jDr.AddTableEntryInfo(nil, []byte(k), loc) if err != nil { t.Error(err) } + + jDr.LocCache.Set(k, loc) } filters1 := FieldFilters{filters.FieldFilter{Field: "name", Operator: gripql.Condition_EQ, Value: "alice"}} lenscanChan1 := 0 - for elem := range bT.Scan(true, filters1) { + for elem := range jT.ScanDoc(filters1) { lenscanChan1++ t.Log("scanChan: ", elem) - if elem.(map[string]any)["name"] != "alice" { + if elem["name"] != "alice" { t.Errorf("expecting chan of len 1 with value name:alice got %v", elem) } - if _, ok := elem.(map[string]any)["_key"]; ok { + if _, ok := elem["_key"]; ok { t.Errorf("specified no key to be returned but returned key anyway") } } @@ -131,20 +212,16 @@ func TestScan(t *testing.T) { } // Second test case: "field1" == 0.2 - filters2 := FieldFilters{filters.FieldFilter{Field: "field1", Operator: gripql.Condition_EQ, Value: 0.2}} - scanChan2 := bT.Scan(true, filters2) - - for elem := range scanChan2 { + for elem := range jT.ScanDoc( + FieldFilters{filters.FieldFilter{ + Field: "field1", Operator: gripql.Condition_EQ, Value: 0.2}, + }, + ) { t.Log("scanChantwo: ", elem) - data, ok := elem.(map[string]any) - if !ok { - t.Errorf("expected map[string]any, but got %T", elem) - continue - } - if data["field1"] != 0.2 { + if elem["field1"].(float64) != 0.2 { t.Errorf("expecting chan of len 1 with value field:0.2 got %v", elem) } - if key, ok := data["_key"]; ok { + if key, ok := elem["_id"]; ok { if key == "" { t.Errorf("specified key to be returned but got an empty string") } @@ -153,18 +230,13 @@ func TestScan(t *testing.T) { // Third test case: "field1" > 0.2 filters3 := FieldFilters{filters.FieldFilter{Field: "field1", Operator: gripql.Condition_GT, Value: 0.2}} - scanChan3 := bT.Scan(true, filters3) + scanChan3 := jT.ScanDoc(filters3) scanChanLen3 := 0 for elem := range scanChan3 { t.Log("scanChanthree: ", elem) scanChanLen3++ - data, ok := elem.(map[string]any) - if !ok { - t.Errorf("expected map[string]any, but got %T", elem) - continue - } - if key, ok := data["_key"]; ok { + if key, ok := elem["_key"]; ok { if key == "" { t.Errorf("specified key to be returned but got an empty string") } @@ -174,27 +246,27 @@ func TestScan(t *testing.T) { t.Errorf("Expecting 6 items returned but got %d", scanChanLen3) } - offset, size, err := bT.GetBlockPos([]byte("key4")) + loc, err := jDr.LocCache.Get(context.Background(), "key4") if err != nil { t.Error(err) } - err = bT.DeleteRow(benchtop.RowLoc{Offset: offset, Size: size, Label: bT.TableId}, []byte("key4")) + err = jT.DeleteRow(loc, []byte("key4")) if err != nil { t.Error(err) } - // Fourth test case: "name" starts with "a" - // NOTE: You need to fix the case in your original code from "startswith" to "STARTSWITH" - filters4 := FieldFilters{filters.FieldFilter{Field: "name", Operator: gripql.Condition_CONTAINS, Value: []any{"mnbv"}}} - scanChan4 := bT.Scan(false, filters4) - scanChanLen4 := 0 - for elem := range scanChan4 { + for elem := range jT.ScanId( + FieldFilters{ + filters.FieldFilter{ + Field: "name", + Operator: gripql.Condition_EQ, + Value: "mnbv", + }, + }, + ) { t.Log("scanChanfour: ", elem) scanChanLen4++ - if key, ok := elem.(string); !ok { - t.Errorf("specified returned key is not string %s", key) - } } if scanChanLen4 != 1 { t.Errorf("Expecting only one elem after delete key4, but got %d", scanChanLen4) diff --git a/test/pybenchtop/test_vector_load.py b/test/pybenchtop/test_vector_load.py deleted file mode 100644 index 165070d..0000000 --- a/test/pybenchtop/test_vector_load.py +++ /dev/null @@ -1,14 +0,0 @@ - - -import numpy as np -import pybenchtop - -dbpath = "test.table" - -dr = pybenchtop.Driver(dbpath) -table = dr.new("VECTORS", {"embedding" : pybenchtop.VECTOR}) - -for i in range(100): - table.add(str(i), np.random.rand(256)) - -dr.close() \ No newline at end of file diff --git a/test/speed_test/marshal_test.go b/test/speed_test/marshal_test.go deleted file mode 100644 index d024e6e..0000000 --- a/test/speed_test/marshal_test.go +++ /dev/null @@ -1,7 +0,0 @@ -package test - -import "testing" - -func TestMarshal(t *testing.T) { - -} diff --git a/test/vector/vector_open_close_test.go b/test/vector/vector_open_close_test.go deleted file mode 100644 index 2069091..0000000 --- a/test/vector/vector_open_close_test.go +++ /dev/null @@ -1,138 +0,0 @@ -package test - -import ( - "math/rand" -) - -func GenerateRandomFloat32Vectors(numVectors, dim int) map[uint64][]float32 { - vmap := make(map[uint64][]float32, numVectors) - for i := 0; i < numVectors; i++ { - vector := make([]float32, dim) - for j := 0; j < dim; j++ { - vector[j] = rand.Float32() * 100 - } - vmap[uint64(i)] = vector - } - return vmap -} - -/* Not sure where this HnswIndex.ContainsDoc( is even coming from. Not going to attempt to maintain something that I don't remember -func TestBenchtopHNSW(t *testing.T) { - - numVectors := 100 - dim := 150 - - rootPath := filepath.Join(fmt.Sprintf("benchtop_hnsw_0")) - defer os.RemoveAll(rootPath) - - if err := os.MkdirAll(rootPath, 0755); err != nil { - t.Fatalf("failed to create directory: %v", err) - } - - driver, err := bsontable.NewBSONDriver(rootPath) - if err != nil { - t.Fatalf("failed to create BSON driver: %v", err) - } - defer driver.Close() - - columns := []benchtop.ColumnDef{ - {Key: "vector", Type: benchtop.VectorArray}, - } - table, err := driver.New("vectors", columns) - if err != nil { - t.Fatalf("failed to create table: %v", err) - } - - // Insert vectors - rows := make(chan benchtop.Row, 100) - vecs := GenerateRandomFloat32Vectors(numVectors, dim) - go func() { - defer close(rows) - for id, vec := range vecs { - key := make([]byte, 8) - binary.LittleEndian.PutUint64(key, id) - rows <- benchtop.Row{ - Id: key, - TableName: "vectors", - Data: map[string]any{"vector": vec}, - } - } - }() - if err := table.Load(rows); err != nil { - t.Fatalf("failed to load vectors: %v", err) - } - - val := table.(*bsontable.BSONTable).HnswIndex.ContainsDoc(uint64(rand.Int63n(int64(numVectors)))) - t.Log("VAL 1: ", val) - - driver.Close() - or, err := bsontable.LoadBSONDriver(rootPath, "benchtop_hnsw_0") - ot, err := or.Get("vectors") - if err != nil { - t.Error(err) - } - - key := make([]byte, 8) - binary.LittleEndian.PutUint64(key, uint64(rand.Int63n(int64(numVectors)))) - - row, err := ot.GetRow(key) - t.Log("ROW: ", row) - if err != nil { - t.Error(err) - } - - val = ot.(*bsontable.BSONTable).HnswIndex.ContainsDoc(uint64(rand.Int63n(int64(numVectors)))) - t.Log("VAL 2: ", val) - - results, err := ot.VectorSearch("vector", vecs[uint64(rand.Int63n(int64(numVectors)))], 10) - if err != nil { - t.Fatalf("vector search failed: %v", err) - } - - t.Log("RESULTS: ", results) - - or.Close() -} - -func TestPersistence(t *testing.T) { - rootPath := "test_hnsw" - os.RemoveAll(rootPath) - driver, err := bsontable.NewBSONDriver(rootPath) - if err != nil { - t.Fatalf("failed to create driver: %v", err) - } - table, err := driver.New("vectors", []benchtop.ColumnDef{{Key: "vector", Type: benchtop.VectorArray}}) - if err != nil { - t.Fatalf("failed to create table: %v", err) - } - id := uint64(1) - key := make([]byte, 8) - binary.LittleEndian.PutUint64(key, id) - vec := []float32{1.0, 2.0, 3.0} - table.AddRow(benchtop.Row{Id: key, TableName: "vectors", Data: map[string]any{"vector": vec}}) - //fmt.Printf("TABLE 1B: %#v\n", table.(*bsontable.BSONTable).HnswIndex) - //fmt.Printf("TABLE 1C: %#v\n", table.(*bsontable.BSONTable).Store) - - val := table.(*bsontable.BSONTable).HnswIndex.ContainsDoc(uint64(1)) - t.Log("VAL: ", val) - - driver.Close() - - // Reopen - driver, err = bsontable.LoadBSONDriver(rootPath) - if err != nil { - t.Fatalf("failed to load driver: %v", err) - } - - table, err = driver.Get("vectors") - - //fmt.Printf("TABLE 2B: %#v\n", table.(*bsontable.BSONTable).HnswIndex) - //fmt.Printf("TABLE 2C: %#v\n", table.(*bsontable.BSONTable).Store) - - bsonTable := table.(*bsontable.BSONTable) - twoval := bsonTable.HnswIndex.ContainsDoc(uint64(1)) - t.Log("TWOVAL: ", twoval) - driver.Close() - -} -*/ diff --git a/test/vector/vector_search_test.go b/test/vector/vector_search_test.go deleted file mode 100644 index d8b2896..0000000 --- a/test/vector/vector_search_test.go +++ /dev/null @@ -1,91 +0,0 @@ -package test - -import ( - "fmt" - "math/rand" - "os" - "testing" - "time" - - "github.com/bmeg/benchtop" - "github.com/bmeg/benchtop/distqueue" - "github.com/bmeg/benchtop/jsontable" -) - -// RandomString generates a random string of length n. -func RandomString(n int) string { - rand.NewSource(int64(time.Now().UnixNano())) - var letter = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") - b := make([]rune, n) - for i := range b { - b[i] = letter[rand.Intn(len(letter))] - } - return string(b) -} - -const ( - DIM = 128 - COUNT = 1000 -) - -func TestInsert(t *testing.T) { - - dbname := "test_index." + RandomString(5) - - driver, err := jsontable.NewJSONDriver(dbname) - - if err != nil { - t.Error(err) - } - - table, err := driver.New("VECTORS", []benchtop.ColumnDef{{Key: "embedding"}}) - if err != nil { - t.Error(err) - } - - vmap := map[string][]float32{} - for i := 0; i < 100; i++ { - c := make([]float32, DIM) - for j := 0; j < DIM; j++ { - c[j] = rand.Float32() - } - vmap[fmt.Sprintf("%d", i)] = c - } - - for k, v := range vmap { - _, err := table.AddRow(benchtop.Row{Id: []byte(k), TableName: "VECTORS", Data: map[string]any{"embedding": v}}) - if err != nil { - t.Error(err) - } - } - - //TODO Add search here - - qName := "10" - qVec := vmap[qName] - testDists := distqueue.NewMin[float32, string]() - - for k, v := range vmap { - d := distqueue.Euclidean(v, qVec) - testDists.Insert(d, k) - } - - //TODO: Make this work - /* - out, err := table.Search("VECTORS", vmap[qName], 10) - if err != nil { - t.Error(err) - } - - for _, i := range out { - fmt.Printf("search: out: %s\n", i) - } - - for i := 0; i < 10; i++ { - fmt.Printf("scan out: %s\n", testDists[i].Value) - } - */ - - driver.Close() - os.RemoveAll(dbname) -} diff --git a/util/util.go b/util/util.go index 2ea032c..3c970d3 100644 --- a/util/util.go +++ b/util/util.go @@ -28,6 +28,17 @@ func FileExists(path string) bool { return err != os.ErrNotExist } +func DirExists(path string) (bool, error) { + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + return info.IsDir(), nil +} + func CopyBytes(in []byte) []byte { out := make([]byte, len(in)) copy(out, in)