diff --git a/src/services/sync/ChromaSync.ts b/src/services/sync/ChromaSync.ts index a4545e8d7e..4df48e7dca 100644 --- a/src/services/sync/ChromaSync.ts +++ b/src/services/sync/ChromaSync.ts @@ -377,6 +377,54 @@ export class ChromaSync { await this.addDocuments(documents); } + /** + * Bug #1914: Sync a raw observation database row to Chroma. + * Used by the import handler where we have database rows, not ParsedObservation objects. + * The row format matches SELECT * FROM observations. + */ + async syncObservationRow(row: { + id: number; + memory_session_id: string; + project: string; + merged_into_project?: string | null; + text: string | null; + type: string; + title: string | null; + subtitle: string | null; + facts: string | null; + narrative: string | null; + concepts: string | null; + files_read: string | null; + files_modified: string | null; + prompt_number: number; + discovery_tokens: number; + created_at: string; + created_at_epoch: number; + }): Promise { + const stored: StoredObservation = { + id: row.id, + memory_session_id: row.memory_session_id, + project: row.project, + merged_into_project: row.merged_into_project ?? null, + text: row.text, + type: row.type, + title: row.title, + subtitle: row.subtitle, + facts: row.facts, + narrative: row.narrative, + concepts: row.concepts, + files_read: row.files_read, + files_modified: row.files_modified, + prompt_number: row.prompt_number, + discovery_tokens: row.discovery_tokens, + created_at: row.created_at, + created_at_epoch: row.created_at_epoch + }; + + const documents = this.formatObservationDocs(stored); + await this.addDocuments(documents); + } + /** * Sync a single summary to Chroma * Blocks until sync completes, throws on error diff --git a/src/services/worker/SearchManager.ts b/src/services/worker/SearchManager.ts index ba64fb0903..a9803ecce1 100644 --- a/src/services/worker/SearchManager.ts +++ b/src/services/worker/SearchManager.ts @@ -117,9 +117,204 @@ export class SearchManager { normalized.isFolder = false; } + // Bug #1911: Default project filter to current project when not provided. + // Without this, search/timeline return cross-project results. + if (!normalized.project) { + normalized.project = getProjectContext(process.cwd()).primary; + } + + // Bug #1916: Remap singular `concept` to plural `concepts`. + // HTTP handler sends `concept` (singular) but findByConcept() destructures + // `concepts` (plural), causing malformed SQL when the field is missing. + if (normalized.concept && !normalized.concepts) { + normalized.concepts = [normalized.concept]; + delete normalized.concept; + } + return normalized; } + /** + * Bug #1913: FTS5 fallback when ChromaDB is disabled/unavailable. + * Queries the FTS5 virtual tables directly for text search. + * Returns null if FTS5 tables don't exist on this platform. + */ + private searchFTS5Fallback( + query: string, + options: { + searchObservations: boolean; + searchSessions: boolean; + searchPrompts: boolean; + limit: number; + project?: string; + } + ): { observations: ObservationSearchResult[]; sessions: SessionSummarySearchResult[]; prompts: UserPromptSearchResult[] } | null { + try { + // Check if FTS5 tables exist (SessionStore.db is public) + const db = this.sessionStore.db; + if (!db) return null; + + const hasFTS = (db.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='observations_fts'" + ).all() as { name: string }[]).length > 0; + + if (!hasFTS) { + logger.debug('SEARCH', 'FTS5 tables not available for fallback', {}); + return null; + } + + // Sanitize query for FTS5 MATCH (escape special chars, wrap terms in quotes) + const sanitizedQuery = query + .replace(/['"]/g, '') + .split(/\s+/) + .filter(Boolean) + .map(term => `"${term}"`) + .join(' OR '); + + if (!sanitizedQuery) return null; + + let observations: ObservationSearchResult[] = []; + let sessions: SessionSummarySearchResult[] = []; + let prompts: UserPromptSearchResult[] = []; + + if (options.searchObservations) { + const projectClause = options.project ? 'AND o.project = ?' : ''; + const params: any[] = [sanitizedQuery]; + if (options.project) params.push(options.project); + params.push(options.limit); + + observations = db.prepare(` + SELECT o.*, rank + FROM observations_fts fts + JOIN observations o ON o.id = fts.rowid + WHERE observations_fts MATCH ? + ${projectClause} + ORDER BY rank + LIMIT ? + `).all(...params) as ObservationSearchResult[]; + } + + if (options.searchSessions) { + const hasSessionFTS = (db.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='session_summaries_fts'" + ).all() as { name: string }[]).length > 0; + + if (hasSessionFTS) { + const projectClause = options.project ? 'AND s.project = ?' : ''; + const params: any[] = [sanitizedQuery]; + if (options.project) params.push(options.project); + params.push(options.limit); + + sessions = db.prepare(` + SELECT s.*, rank + FROM session_summaries_fts fts + JOIN session_summaries s ON s.id = fts.rowid + WHERE session_summaries_fts MATCH ? + ${projectClause} + ORDER BY rank + LIMIT ? + `).all(...params) as SessionSummarySearchResult[]; + } + } + + if (options.searchPrompts) { + const hasPromptFTS = (db.prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='user_prompts_fts'" + ).all() as { name: string }[]).length > 0; + + if (hasPromptFTS) { + const projectClause = options.project ? 'AND ss.project = ?' : ''; + const params: any[] = [sanitizedQuery]; + if (options.project) params.push(options.project); + params.push(options.limit); + + prompts = db.prepare(` + SELECT up.*, rank + FROM user_prompts_fts fts + JOIN user_prompts up ON up.id = fts.rowid + JOIN sdk_sessions ss ON up.content_session_id = ss.content_session_id + WHERE user_prompts_fts MATCH ? + ${projectClause} + ORDER BY rank + LIMIT ? + `).all(...params) as UserPromptSearchResult[]; + } + } + + logger.debug('SEARCH', 'FTS5 fallback results', { + observations: observations.length, + sessions: sessions.length, + prompts: prompts.length + }); + + return { observations, sessions, prompts }; + } catch (error) { + logger.warn('SEARCH', 'FTS5 fallback failed', {}, error as Error); + return null; + } + } + + /** + * Bug #1915: Deduplicate search results by content hash and apply + * per-project/session diversity cap to prevent result monopolization. + */ + private deduplicateResults( + results: T[], + options: { maxPerProjectSession?: number; skipDiversityCap?: boolean } = {} + ): T[] { + const { maxPerProjectSession = 5, skipDiversityCap = false } = options; + + // Step 1: Deduplicate by content_hash, keeping first occurrence (highest-scored) + const seenHashes = new Set(); + const deduped: T[] = []; + + for (const result of results) { + if (result.content_hash) { + if (seenHashes.has(result.content_hash)) { + continue; // Skip duplicate content + } + seenHashes.add(result.content_hash); + } + deduped.push(result); + } + + // Step 2: Apply per-project+session diversity cap only for text queries. + // Filter-only searches (no text query) should return all matching results + // without artificial truncation per project/session. + if (skipDiversityCap) { + if (deduped.length !== results.length) { + logger.debug('SEARCH', 'Deduplication applied (hash only, diversity cap skipped)', { + original: results.length, + afterHashDedup: deduped.length + }); + } + return deduped; + } + + const projectSessionCounts = new Map(); + const diversified: T[] = []; + + for (const result of deduped) { + const key = `${result.project || 'unknown'}::${result.memory_session_id || 'unknown'}`; + const count = projectSessionCounts.get(key) || 0; + if (count >= maxPerProjectSession) { + continue; // Skip - too many results from same project+session + } + projectSessionCounts.set(key, count + 1); + diversified.push(result); + } + + if (deduped.length !== results.length || diversified.length !== deduped.length) { + logger.debug('SEARCH', 'Deduplication applied', { + original: results.length, + afterHashDedup: deduped.length, + afterDiversityCap: diversified.length + }); + } + + return diversified; + } + /** * Tool handler: search */ @@ -260,16 +455,41 @@ export class SearchManager { logger.debug('SEARCH', 'ChromaDB found no matches (final result, no FTS5 fallback)', {}); } } - // ChromaDB not initialized - mark as failed to show proper error message + // Bug #1913: ChromaDB not initialized - fall back to FTS5 MATCH query + // instead of returning empty results else if (query) { - chromaFailed = true; - logger.debug('SEARCH', 'ChromaDB not initialized - semantic search unavailable', {}); - logger.debug('SEARCH', 'Install UVX/Python to enable vector search', { url: 'https://docs.astral.sh/uv/getting-started/installation/' }); - observations = []; - sessions = []; - prompts = []; + logger.debug('SEARCH', 'ChromaDB not initialized - falling back to FTS5 text search', {}); + + const fts5Results = this.searchFTS5Fallback(query, { + searchObservations, + searchSessions, + searchPrompts, + limit: options.limit || 20, + project: options.project + }); + + if (fts5Results) { + observations = fts5Results.observations; + sessions = fts5Results.sessions; + prompts = fts5Results.prompts; + } else { + // FTS5 also unavailable + chromaFailed = true; + logger.debug('SEARCH', 'FTS5 also unavailable - no text search backend', {}); + observations = []; + sessions = []; + prompts = []; + } } + // Bug #1915: Deduplicate results by content hash and apply diversity cap. + // Only apply diversity cap for text queries - filter-only searches should + // return all matching results without per-project/session truncation. + const skipDiversityCap = !query; + observations = this.deduplicateResults(observations, { skipDiversityCap }); + sessions = this.deduplicateResults(sessions, { skipDiversityCap }); + prompts = this.deduplicateResults(prompts, { skipDiversityCap }); + const totalResults = observations.length + sessions.length + prompts.length; // JSON format: return raw data for programmatic access (e.g., export scripts) @@ -905,8 +1125,19 @@ export class SearchManager { if (this.chromaSync) { logger.debug('SEARCH', 'Using hybrid semantic search (Chroma + SQLite)', {}); + // Bug #1912: Build Chroma where filter including project scope + let whereFilter: Record = { doc_type: 'observation' }; + if (options.project) { + whereFilter = { + $and: [ + { doc_type: 'observation' }, + { $or: [{ project: options.project }, { merged_into_project: options.project }] } + ] + }; + } + // Step 1: Chroma semantic search (top 100) - const chromaResults = await this.queryChroma(query, 100); + const chromaResults = await this.queryChroma(query, 100, whereFilter); logger.debug('SEARCH', 'Chroma returned semantic matches', { matchCount: chromaResults.ids.length }); if (chromaResults.ids.length > 0) { @@ -919,10 +1150,10 @@ export class SearchManager { logger.debug('SEARCH', 'Results within 90-day window', { count: recentIds.length }); - // Step 3: Hydrate from SQLite in temporal order + // Step 3: Hydrate from SQLite in temporal order (with project filter) if (recentIds.length > 0) { const limit = options.limit || 20; - results = this.sessionStore.getObservationsByIds(recentIds, { orderBy: 'date_desc', limit }); + results = this.sessionStore.getObservationsByIds(recentIds, { orderBy: 'date_desc', limit, project: options.project }); logger.debug('SEARCH', 'Hydrated observations from SQLite', { count: results.length }); } } @@ -962,8 +1193,19 @@ export class SearchManager { if (this.chromaSync) { logger.debug('SEARCH', 'Using hybrid semantic search for sessions', {}); + // Bug #1912: Build Chroma where filter including project scope + let whereFilter: Record = { doc_type: 'session_summary' }; + if (options.project) { + whereFilter = { + $and: [ + { doc_type: 'session_summary' }, + { $or: [{ project: options.project }, { merged_into_project: options.project }] } + ] + }; + } + // Step 1: Chroma semantic search (top 100) - const chromaResults = await this.queryChroma(query, 100, { doc_type: 'session_summary' }); + const chromaResults = await this.queryChroma(query, 100, whereFilter); logger.debug('SEARCH', 'Chroma returned semantic matches for sessions', { matchCount: chromaResults.ids.length }); if (chromaResults.ids.length > 0) { @@ -976,10 +1218,10 @@ export class SearchManager { logger.debug('SEARCH', 'Results within 90-day window', { count: recentIds.length }); - // Step 3: Hydrate from SQLite in temporal order + // Step 3: Hydrate from SQLite in temporal order (with project filter) if (recentIds.length > 0) { const limit = options.limit || 20; - results = this.sessionStore.getSessionSummariesByIds(recentIds, { orderBy: 'date_desc', limit }); + results = this.sessionStore.getSessionSummariesByIds(recentIds, { orderBy: 'date_desc', limit, project: options.project }); logger.debug('SEARCH', 'Hydrated sessions from SQLite', { count: results.length }); } } @@ -1019,8 +1261,19 @@ export class SearchManager { if (this.chromaSync) { logger.debug('SEARCH', 'Using hybrid semantic search for user prompts', {}); + // Bug #1912: Build Chroma where filter including project scope + let whereFilter: Record = { doc_type: 'user_prompt' }; + if (options.project) { + whereFilter = { + $and: [ + { doc_type: 'user_prompt' }, + { $or: [{ project: options.project }, { merged_into_project: options.project }] } + ] + }; + } + // Step 1: Chroma semantic search (top 100) - const chromaResults = await this.queryChroma(query, 100, { doc_type: 'user_prompt' }); + const chromaResults = await this.queryChroma(query, 100, whereFilter); logger.debug('SEARCH', 'Chroma returned semantic matches for prompts', { matchCount: chromaResults.ids.length }); if (chromaResults.ids.length > 0) { @@ -1033,10 +1286,10 @@ export class SearchManager { logger.debug('SEARCH', 'Results within 90-day window', { count: recentIds.length }); - // Step 3: Hydrate from SQLite in temporal order + // Step 3: Hydrate from SQLite in temporal order (with project filter) if (recentIds.length > 0) { const limit = options.limit || 20; - results = this.sessionStore.getUserPromptsByIds(recentIds, { orderBy: 'date_desc', limit }); + results = this.sessionStore.getUserPromptsByIds(recentIds, { orderBy: 'date_desc', limit, project: options.project }); logger.debug('SEARCH', 'Hydrated user prompts from SQLite', { count: results.length }); } } diff --git a/src/services/worker/http/routes/DataRoutes.ts b/src/services/worker/http/routes/DataRoutes.ts index b476a3d927..cba755ac7e 100644 --- a/src/services/worker/http/routes/DataRoutes.ts +++ b/src/services/worker/http/routes/DataRoutes.ts @@ -382,11 +382,13 @@ export class DataRoutes extends BaseRouteHandler { } // Import observations (depends on sessions) + const importedObservationRows: Array = []; if (Array.isArray(observations)) { for (const obs of observations) { const result = store.importObservation(obs); if (result.imported) { stats.observationsImported++; + importedObservationRows.push({ ...obs, id: result.id }); } else { stats.observationsSkipped++; } @@ -398,6 +400,25 @@ export class DataRoutes extends BaseRouteHandler { if (stats.observationsImported > 0) { store.rebuildObservationsFTSIndex(); } + + // Bug #1914: Also sync imported observations to ChromaDB so they + // appear in MCP search() results (vector/semantic search). + const chromaSync = this.dbManager.getChromaSync(); + if (chromaSync && importedObservationRows.length > 0) { + // Fire-and-forget: don't block the import response on Chroma sync + (async () => { + for (const obs of importedObservationRows) { + try { + await chromaSync.syncObservationRow(obs); + } catch (syncError) { + logger.warn('DATA', 'Failed to sync imported observation to ChromaDB', { id: obs.id }, syncError as Error); + } + } + logger.info('DATA', 'Synced imported observations to ChromaDB', { count: importedObservationRows.length }); + })().catch(err => { + logger.warn('DATA', 'ChromaDB import sync batch failed', {}, err as Error); + }); + } } // Import prompts (depends on sessions)