Skip to content

Commit 8190f97

Browse files
mudlerCopilot
andauthored
feat: drop memory index (#43)
* feat: drop memory index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * fix tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent 933f686 commit 8190f97

File tree

11 files changed

+749
-171
lines changed

11 files changed

+749
-171
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,4 @@ clean-test-services: stop-test-services
184184
@docker compose rm -f localai postgres || true
185185

186186
run-e2e:
187-
@E2E=true LOCALAI_ENDPOINT=http://localhost:8081 LOCALRECALL_ENDPOINT=http://localhost:8080 go test -v ./test/e2e/...
187+
@E2E=true LOCALAI_ENDPOINT=http://localhost:8081 LOCALRECALL_ENDPOINT=http://localhost:8080 go test -v -timeout 30m ./test/e2e/...

pkg/client/client.go

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,20 @@ func (c *Client) ListCollections() ([]string, error) {
6666
return nil, errors.New("failed to list collections")
6767
}
6868

69-
var collections []string
70-
err = json.NewDecoder(resp.Body).Decode(&collections)
69+
var apiResp struct {
70+
Success bool `json:"success"`
71+
Message string `json:"message"`
72+
Data struct {
73+
Collections []string `json:"collections"`
74+
Count int `json:"count"`
75+
} `json:"data"`
76+
}
77+
err = json.NewDecoder(resp.Body).Decode(&apiResp)
7178
if err != nil {
7279
return nil, err
7380
}
7481

75-
return collections, nil
82+
return apiResp.Data.Collections, nil
7683
}
7784

7885
// ListEntries lists all entries in a collection
@@ -132,15 +139,20 @@ func (c *Client) GetEntryContent(collection, entry string) ([]EntryChunk, error)
132139

133140
var result struct {
134141
Data struct {
135-
Chunks []EntryChunk `json:"chunks"`
142+
Content string `json:"content"`
143+
ChunkCount int `json:"chunk_count"`
136144
} `json:"data"`
137145
}
138146
err = json.NewDecoder(resp.Body).Decode(&result)
139147
if err != nil {
140148
return nil, err
141149
}
142150

143-
return result.Data.Chunks, nil
151+
if result.Data.Content == "" {
152+
return nil, nil
153+
}
154+
155+
return []EntryChunk{{Content: result.Data.Content}}, nil
144156
}
145157

146158
// GetEntryRawFile returns the original uploaded binary file as a ReadCloser.
@@ -196,13 +208,19 @@ func (c *Client) DeleteEntry(collection, entry string) ([]string, error) {
196208
return nil, errors.New("failed to delete collection: " + bodyResult.String())
197209
}
198210

199-
var results []string
200-
err = json.NewDecoder(resp.Body).Decode(&results)
211+
var apiResp struct {
212+
Success bool `json:"success"`
213+
Message string `json:"message"`
214+
Data struct {
215+
RemainingEntries []string `json:"remaining_entries"`
216+
} `json:"data"`
217+
}
218+
err = json.NewDecoder(resp.Body).Decode(&apiResp)
201219
if err != nil {
202220
return nil, err
203221
}
204222

205-
return results, nil
223+
return apiResp.Data.RemainingEntries, nil
206224
}
207225

208226
// Search searches a collection
@@ -229,13 +247,19 @@ func (c *Client) Search(collection, query string, maxResults int) ([]types.Resul
229247
return nil, errors.New("failed to search collection")
230248
}
231249

232-
var results []types.Result
233-
err = json.NewDecoder(resp.Body).Decode(&results)
250+
var apiResp struct {
251+
Success bool `json:"success"`
252+
Message string `json:"message"`
253+
Data struct {
254+
Results []types.Result `json:"results"`
255+
} `json:"data"`
256+
}
257+
err = json.NewDecoder(resp.Body).Decode(&apiResp)
234258
if err != nil {
235259
return nil, err
236260
}
237261

238-
return results, nil
262+
return apiResp.Data.Results, nil
239263
}
240264

241265
func (c *Client) Reset(collection string) error {

rag/engine.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ type Engine interface {
1414
Count() int
1515
Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error
1616
GetByID(id string) (types.Result, error)
17+
GetBySource(source string) ([]types.Result, error)
1718
}

rag/engine/chromem.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,32 @@ func (c *ChromemDB) GetByID(id string) (types.Result, error) {
167167
return types.Result{ID: res.ID, Metadata: res.Metadata, Content: res.Content}, nil
168168
}
169169

170+
func (c *ChromemDB) GetBySource(source string) ([]types.Result, error) {
171+
ctx := context.Background()
172+
count := c.collection.Count()
173+
if count == 0 {
174+
return nil, nil
175+
}
176+
177+
// Use Query with a where filter to find documents by source metadata.
178+
// We use a dummy query and request all documents, relying on the where
179+
// filter to narrow results.
180+
res, err := c.collection.Query(ctx, ".", count, map[string]string{"source": source}, nil)
181+
if err != nil {
182+
return nil, fmt.Errorf("error querying by source: %v", err)
183+
}
184+
185+
var results []types.Result
186+
for _, r := range res {
187+
results = append(results, types.Result{
188+
ID: r.ID,
189+
Metadata: r.Metadata,
190+
Content: r.Content,
191+
})
192+
}
193+
return results, nil
194+
}
195+
170196
func (c *ChromemDB) Search(s string, similarEntries int) ([]types.Result, error) {
171197
res, err := c.collection.Query(context.Background(), s, similarEntries, nil, nil)
172198
if err != nil {

rag/engine/localai.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ func (db *LocalAIRAGDB) GetByID(id string) (types.Result, error) {
8888
return types.Result{}, fmt.Errorf("not implemented")
8989
}
9090

91+
func (db *LocalAIRAGDB) GetBySource(source string) ([]types.Result, error) {
92+
return nil, fmt.Errorf("not implemented")
93+
}
94+
9195
func (db *LocalAIRAGDB) Search(s string, similarEntries int) ([]types.Result, error) {
9296
resp, err := db.openaiClient.CreateEmbeddings(context.TODO(),
9397
openai.EmbeddingRequestStrings{

rag/engine/mock.go

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package engine
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"sync"
7+
8+
"github.com/mudler/localrecall/rag/types"
9+
)
10+
11+
// MockEngine is a simple in-memory engine for testing. It requires no
12+
// external dependencies (no LocalAI, no embeddings).
13+
type MockEngine struct {
14+
mu sync.Mutex
15+
docs map[string]types.Result
16+
index int
17+
}
18+
19+
func NewMockEngine() *MockEngine {
20+
return &MockEngine{
21+
docs: make(map[string]types.Result),
22+
index: 1,
23+
}
24+
}
25+
26+
func (m *MockEngine) Store(s string, metadata map[string]string) (Result, error) {
27+
results, err := m.StoreDocuments([]string{s}, metadata)
28+
if err != nil {
29+
return Result{}, err
30+
}
31+
return results[0], nil
32+
}
33+
34+
func (m *MockEngine) StoreDocuments(s []string, metadata map[string]string) ([]Result, error) {
35+
m.mu.Lock()
36+
defer m.mu.Unlock()
37+
38+
if len(s) == 0 {
39+
return nil, fmt.Errorf("empty input")
40+
}
41+
42+
results := make([]Result, len(s))
43+
for i, content := range s {
44+
id := fmt.Sprintf("%d", m.index)
45+
// Copy metadata so each doc has its own map
46+
meta := make(map[string]string, len(metadata))
47+
for k, v := range metadata {
48+
meta[k] = v
49+
}
50+
m.docs[id] = types.Result{
51+
ID: id,
52+
Content: content,
53+
Metadata: meta,
54+
}
55+
results[i] = Result{ID: id}
56+
m.index++
57+
}
58+
return results, nil
59+
}
60+
61+
func (m *MockEngine) Search(s string, similarEntries int) ([]types.Result, error) {
62+
m.mu.Lock()
63+
defer m.mu.Unlock()
64+
65+
var results []types.Result
66+
for _, doc := range m.docs {
67+
if strings.Contains(strings.ToLower(doc.Content), strings.ToLower(s)) {
68+
results = append(results, doc)
69+
}
70+
}
71+
// If no substring match, return all (useful for generic searches)
72+
if len(results) == 0 {
73+
for _, doc := range m.docs {
74+
results = append(results, doc)
75+
}
76+
}
77+
if len(results) > similarEntries {
78+
results = results[:similarEntries]
79+
}
80+
return results, nil
81+
}
82+
83+
func (m *MockEngine) Delete(where map[string]string, whereDocuments map[string]string, ids ...string) error {
84+
m.mu.Lock()
85+
defer m.mu.Unlock()
86+
87+
// Delete by IDs
88+
if len(ids) > 0 {
89+
for _, id := range ids {
90+
delete(m.docs, id)
91+
}
92+
return nil
93+
}
94+
95+
// Delete by metadata where filter
96+
if len(where) > 0 {
97+
for id, doc := range m.docs {
98+
match := true
99+
for k, v := range where {
100+
if doc.Metadata[k] != v {
101+
match = false
102+
break
103+
}
104+
}
105+
if match {
106+
delete(m.docs, id)
107+
}
108+
}
109+
}
110+
111+
return nil
112+
}
113+
114+
func (m *MockEngine) GetByID(id string) (types.Result, error) {
115+
m.mu.Lock()
116+
defer m.mu.Unlock()
117+
118+
doc, ok := m.docs[id]
119+
if !ok {
120+
return types.Result{}, fmt.Errorf("document not found: %s", id)
121+
}
122+
return doc, nil
123+
}
124+
125+
func (m *MockEngine) GetBySource(source string) ([]types.Result, error) {
126+
m.mu.Lock()
127+
defer m.mu.Unlock()
128+
129+
var results []types.Result
130+
for _, doc := range m.docs {
131+
if doc.Metadata["source"] == source {
132+
results = append(results, doc)
133+
}
134+
}
135+
return results, nil
136+
}
137+
138+
func (m *MockEngine) Count() int {
139+
m.mu.Lock()
140+
defer m.mu.Unlock()
141+
142+
return len(m.docs)
143+
}
144+
145+
func (m *MockEngine) Reset() error {
146+
m.mu.Lock()
147+
defer m.mu.Unlock()
148+
149+
m.docs = make(map[string]types.Result)
150+
m.index = 1
151+
return nil
152+
}
153+
154+
func (m *MockEngine) GetEmbeddingDimensions() (int, error) {
155+
return 384, nil
156+
}

rag/engine/postgres.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,45 @@ func (p *PostgresDB) GetByID(id string) (types.Result, error) {
586586
return result, nil
587587
}
588588

589+
func (p *PostgresDB) GetBySource(source string) ([]types.Result, error) {
590+
ctx := context.Background()
591+
592+
rows, err := p.pool.Query(ctx, fmt.Sprintf(`
593+
SELECT id::text, COALESCE(title, '') as title, content, metadata
594+
FROM %s WHERE metadata->>'source' = $1
595+
`, p.tableName), source)
596+
if err != nil {
597+
return nil, fmt.Errorf("failed to query by source: %w", err)
598+
}
599+
defer rows.Close()
600+
601+
var results []types.Result
602+
for rows.Next() {
603+
var r types.Result
604+
var title string
605+
var metadataJSON []byte
606+
607+
if err := rows.Scan(&r.ID, &title, &r.Content, &metadataJSON); err != nil {
608+
return nil, fmt.Errorf("failed to scan row: %w", err)
609+
}
610+
611+
r.Metadata = make(map[string]string)
612+
if len(metadataJSON) > 0 {
613+
json.Unmarshal(metadataJSON, &r.Metadata)
614+
}
615+
if title != "" {
616+
r.Metadata["title"] = title
617+
}
618+
results = append(results, r)
619+
}
620+
621+
if err := rows.Err(); err != nil {
622+
return nil, fmt.Errorf("row iteration error: %w", err)
623+
}
624+
625+
return results, nil
626+
}
627+
589628
func (p *PostgresDB) Search(s string, similarEntries int) ([]types.Result, error) {
590629
ctx := context.Background()
591630

0 commit comments

Comments
 (0)