diff --git a/e2e_bench_test.go b/e2e_bench_test.go new file mode 100644 index 0000000..75b1927 --- /dev/null +++ b/e2e_bench_test.go @@ -0,0 +1,330 @@ +package rankdb + +// End-to-end benchmarks for RankDB. +// These simulate realistic usage patterns: list creation, bulk ingestion, +// score updates, rank lookups, concurrent access, and segment splits/merges. + +import ( + "context" + "fmt" + "math/rand" + "sync" + "testing" + "time" + + "github.com/Vivino/rankdb/blobstore/memstore" +) + +// e2eRandElements generates random elements with a given seed. +func e2eRandElements(n int, seed int64) Elements { + if n == 0 { + return Elements{} + } + rng := rand.New(rand.NewSource(seed)) + res := make(Elements, n) + for i := range res { + res[i] = Element{ + Score: uint64(rng.Uint32()), + TieBreaker: rng.Uint32(), + Updated: uint32(time.Now().Unix()), + ID: ElementID(rng.Uint64()), + Payload: []byte(`{"value":"bench","type":"user-list"}`), + } + } + return res +} + +// BenchmarkE2E_BulkIngestion benchmarks populating a list with elements. +func BenchmarkE2E_BulkIngestion(b *testing.B) { + sizes := []int{1000, 10000} + for _, size := range sizes { + b.Run(fmt.Sprintf("Elements_%d", size), func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + ctx := context.Background() + store := memstore.NewMemStore() + elems := e2eRandElements(size, int64(i)) + _, err := NewList(ctx, "bench-list", "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + +// BenchmarkE2E_UpdateElements benchmarks updating elements (the primary write path). +func BenchmarkE2E_UpdateElements(b *testing.B) { + sizes := []int{1000, 10000} + for _, size := range sizes { + b.Run(fmt.Sprintf("Elements_%d", size), func(b *testing.B) { + ctx := context.Background() + store := memstore.NewMemStore() + elems := e2eRandElements(size, 0xBEEF) + lst, err := NewList(ctx, "bench-list", "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + // Generate update batches (reuse same IDs with new scores) + updateBatchSize := size / 10 + if updateBatchSize < 10 { + updateBatchSize = 10 + } + batches := make([]Elements, b.N) + rng := rand.New(rand.NewSource(0xCAFE)) + for i := range batches { + batch := make(Elements, updateBatchSize) + for j := range batch { + srcIdx := rng.Intn(size) + batch[j] = Element{ + ID: elems[srcIdx].ID, + Score: uint64(rng.Uint32()), + TieBreaker: rng.Uint32(), + Updated: uint32(time.Now().Unix()), + Payload: elems[srcIdx].Payload, + } + } + batches[i] = batch + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _, err := lst.UpdateElements(ctx, store, batches[i], 0, false) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + +// BenchmarkE2E_GetElements benchmarks looking up elements by ID (read path). +func BenchmarkE2E_GetElements(b *testing.B) { + sizes := []int{1000, 10000} + for _, size := range sizes { + b.Run(fmt.Sprintf("Elements_%d", size), func(b *testing.B) { + ctx := context.Background() + store := memstore.NewMemStore() + elems := e2eRandElements(size, 0xDEAD) + lst, err := NewList(ctx, "bench-list", "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + // Prepare lookup batches + lookupSize := 50 + rng := rand.New(rand.NewSource(0xFACE)) + lookups := make([][]ElementID, b.N) + for i := range lookups { + ids := make([]ElementID, lookupSize) + for j := range ids { + ids[j] = elems[rng.Intn(size)].ID + } + lookups[i] = ids + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _, err := lst.GetElements(ctx, store, lookups[i], 0) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + +// BenchmarkE2E_GetRankTop benchmarks fetching top-ranked elements. +func BenchmarkE2E_GetRankTop(b *testing.B) { + sizes := []int{1000, 10000} + for _, size := range sizes { + b.Run(fmt.Sprintf("Elements_%d", size), func(b *testing.B) { + ctx := context.Background() + store := memstore.NewMemStore() + elems := e2eRandElements(size, 0xFEED) + lst, err := NewList(ctx, "bench-list", "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _, err := lst.GetRankTop(ctx, store, 0, 100) + if err != nil { + b.Fatal(err) + } + } + }) + } +} + +// BenchmarkE2E_ConcurrentReadWrite benchmarks concurrent readers and writers. +func BenchmarkE2E_ConcurrentReadWrite(b *testing.B) { + sizes := []int{1000, 10000} + for _, size := range sizes { + b.Run(fmt.Sprintf("Elements_%d", size), func(b *testing.B) { + ctx := context.Background() + store := memstore.NewMemStore() + elems := e2eRandElements(size, 0x5678) + lst, err := NewList(ctx, "bench-list", "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + var wg sync.WaitGroup + // 4 concurrent readers + for r := 0; r < 4; r++ { + wg.Add(1) + go func(r int) { + defer wg.Done() + rng := rand.New(rand.NewSource(int64(i*10 + r))) + for q := 0; q < 10; q++ { + ids := []ElementID{elems[rng.Intn(size)].ID} + _, _ = lst.GetElements(ctx, store, ids, 0) + } + }(r) + } + // 2 concurrent writers + for w := 0; w < 2; w++ { + wg.Add(1) + go func(w int) { + defer wg.Done() + rng := rand.New(rand.NewSource(int64(i*10 + w + 100))) + batch := make(Elements, 10) + for j := range batch { + srcIdx := rng.Intn(size) + batch[j] = Element{ + ID: elems[srcIdx].ID, + Score: uint64(rng.Uint32()), + TieBreaker: rng.Uint32(), + Updated: uint32(time.Now().Unix()), + Payload: elems[srcIdx].Payload, + } + } + _, _ = lst.UpdateElements(ctx, store, batch, 0, false) + }(w) + } + wg.Wait() + } + }) + } +} + +// BenchmarkE2E_MultiList benchmarks operations across multiple lists. +func BenchmarkE2E_MultiList(b *testing.B) { + ctx := context.Background() + store := memstore.NewMemStore() + numLists := 5 + listSize := 2000 + lists := make([]*List, numLists) + allElems := make([]Elements, numLists) + for li := 0; li < numLists; li++ { + elems := e2eRandElements(listSize, int64(li*1000)) + allElems[li] = elems + id := ListID(fmt.Sprintf("bench-list-%d", li)) + l, err := NewList(ctx, id, "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + lists[li] = l + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + var wg sync.WaitGroup + for li := 0; li < numLists; li++ { + wg.Add(1) + go func(li int) { + defer wg.Done() + rng := rand.New(rand.NewSource(int64(i*numLists + li))) + // Update a batch + batch := make(Elements, 20) + for j := range batch { + srcIdx := rng.Intn(listSize) + batch[j] = Element{ + ID: allElems[li][srcIdx].ID, + Score: uint64(rng.Uint32()), + TieBreaker: rng.Uint32(), + Updated: uint32(time.Now().Unix()), + Payload: allElems[li][srcIdx].Payload, + } + } + _, _ = lists[li].UpdateElements(ctx, store, batch, 0, false) + // Lookup + ids := []ElementID{allElems[li][rng.Intn(listSize)].ID} + _, _ = lists[li].GetElements(ctx, store, ids, 0) + // Top + _, _ = lists[li].GetRankTop(ctx, store, 0, 10) + }(li) + } + wg.Wait() + } +} + +// BenchmarkE2E_DeleteElements benchmarks element deletion. +func BenchmarkE2E_DeleteElements(b *testing.B) { + ctx := context.Background() + store := memstore.NewMemStore() + size := 5000 + elems := e2eRandElements(size, 0xDE1) + lst, err := NewList(ctx, "bench-list", "bench-set", store, + WithListOption.MergeSplitSize(500, 2000), + WithListOption.Populate(elems), + ) + if err != nil { + b.Fatal(err) + } + // Delete and re-add pattern: delete 50 elements, then re-insert them + rng := rand.New(rand.NewSource(0xDE2)) + delCount := 50 + batches := make([][]ElementID, b.N) + reinsertBatches := make([]Elements, b.N) + for i := range batches { + delIDs := make([]ElementID, delCount) + reinsert := make(Elements, delCount) + for j := range delIDs { + idx := rng.Intn(size) + delIDs[j] = elems[idx].ID + reinsert[j] = Element{ + ID: elems[idx].ID, + Score: elems[idx].Score, + TieBreaker: elems[idx].TieBreaker, + Updated: uint32(time.Now().Unix()), + Payload: elems[idx].Payload, + } + } + batches[i] = delIDs + reinsertBatches[i] = reinsert + } + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = lst.DeleteElements(ctx, store, batches[i]) + _, _ = lst.UpdateElements(ctx, store, reinsertBatches[i], 0, false) + } +} diff --git a/elements.go b/elements.go index 6dbf4d2..0f0278c 100644 --- a/elements.go +++ b/elements.go @@ -192,10 +192,12 @@ func (l Elements) FindIdx(id ElementID) (int, error) { // FindScoreIdx returns index of first element that matches score. func (l Elements) FindScoreIdx(score uint64) (int, error) { - for i := range l { - if l[i].Score == score { - return i, nil - } + // Find the first element with Score <= score **if any** + i := sort.Search(len(l), func(i int) bool { + return l[i].Score <= score + }) + if i < len(l) && l[i].Score == score { + return i, nil } return 0, ErrNotFound } @@ -204,22 +206,15 @@ func (l Elements) FindScoreIdx(score uint64) (int, error) { // Returns index of inserted item. func (l *Elements) Insert(e Element) int { lst := *l - for i, le := range lst { - if le.Above(e) { - continue - } - // Element should be placed at i - lst = append(lst, Element{}) - copy(lst[i+1:], lst[i:]) - lst[i] = e - - *l = lst - return i - } - // Element should be last. - lst = append(lst, e) + i := sort.Search(len(lst), func(i int) bool { + return !lst[i].aboveP(&e) + }) + // Element should be placed at i + lst = append(lst, Element{}) + copy(lst[i+1:], lst[i:]) + lst[i] = e *l = lst - return len(lst) - 1 + return i } // Merge other elements into this list. @@ -335,11 +330,15 @@ func (l *Elements) Add(e Element) (*Rank, error) { if e.Updated == 0 { e.Updated = uint32(time.Now().Unix()) } - _, err := l.FindIdx(e.ID) - if err == nil { - return l.Update(e) + lst := *l + for i, elem := range lst { + if elem.ID == e.ID { + // element found - delete the old, insert the new one + lst = append(lst[:i], lst[i+1:]...) + *l = lst + return l.idxRank(l.Insert(e)), nil + } } - return l.idxRank(l.Insert(e)), nil } @@ -362,11 +361,15 @@ func (l *Elements) Update(e Element) (*Rank, error) { if e.Updated == 0 { e.Updated = uint32(time.Now().Unix()) } - err := l.Delete(e.ID) - if err != nil { - return nil, err + lst := *l + for i, elem := range lst { + if elem.ID == e.ID { + lst = append(lst[:i], lst[i+1:]...) + *l = lst + return l.idxRank(l.Insert(e)), nil + } } - return l.idxRank(l.Insert(e)), nil + return nil, ErrNotFound } // Delete element from list. @@ -436,15 +439,16 @@ func (e Elements) FirstElementsWithScore(scores []uint64) Elements { return nil } res := make(Elements, 0, len(scores)) - top := scores[0] - for i := range e { - if e[i].Score <= top { + offset := 0 + for _, score := range scores { + sub := e[offset:] + i := sort.Search(len(sub), func(i int) bool { + return sub[i].Score <= score + }) + i += offset + if i < len(e) { res = append(res, e[i]) - if len(scores) <= 1 { - break - } - scores = scores[1:] - top = scores[0] + offset = i + 1 } } return res diff --git a/segment.go b/segment.go index 6cbacb3..3bd1a1b 100644 --- a/segment.go +++ b/segment.go @@ -12,6 +12,7 @@ import ( "errors" "fmt" "math" + "sort" "sync" "time" @@ -106,27 +107,21 @@ func (s *Segment) cacheID() string { // FilterIdx returns the indexes of the start and end of the slice of elements // that fall within the range of the segment. func (s *Segment) FilterIdx(e Elements) (start, end int) { - start = -1 - startE := &Element{Score: s.Max, TieBreaker: s.MaxTie} - // Find first element not above startE - for i, elem := range e { - if !elem.aboveP(startE) { - start = i - break - } + if len(e) == 0 { + return 0, 0 } - if start == -1 { + startE := &Element{Score: s.Max, TieBreaker: s.MaxTie} + start = sort.Search(len(e), func(i int) bool { + return !e[i].aboveP(startE) + }) + if start >= len(e) { return 0, 0 } + endE := &Element{Score: s.Min, TieBreaker: s.MinTie, Updated: math.MaxUint32} - // Find first element that is below endE - end = len(e) - for i, elem := range e[start:] { - if !elem.aboveP(endE) { - end = i + start - break - } - } + end = start + sort.Search(len(e)-start, func(i int) bool { + return !e[start+i].aboveP(endE) + }) return start, end } @@ -138,26 +133,16 @@ func (s *Segment) FilterScoresIdx(scores []uint64) (start, end int) { return 0, 0 } - start = -1 - // Find first element not above startE - for i, elem := range scores { - if elem <= s.Max { - start = i - break - } - } - if start == -1 { + start = sort.Search(len(scores), func(i int) bool { + return scores[i] <= s.Max + }) + if start >= len(scores) { return 0, 0 } - // Find first element that is below endE - end = len(scores) - for i, elem := range scores[start:] { - if elem <= s.Min { - end = i + start - break - } - } + end = start + sort.Search(len(scores)-start, func(i int) bool { + return scores[start+i] <= s.Min + }) return start, end }