diff --git a/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx b/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx index a7ec8fdc17d7..7556cad5267b 100644 --- a/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx +++ b/client/src/components/SidePanel/Agents/Search/ApiKeyDialog.tsx @@ -152,6 +152,20 @@ export default function ApiKeyDialog({ }, }, }, + { + key: ScraperProviders.CRAWL4AI, + label: localize('com_ui_web_search_provider_crawl4ai'), + inputs: { + crawl4aiApiUrl: { + placeholder: localize('com_ui_web_search_crawl4ai_instance_url'), + type: 'text' as const, + }, + crawl4aiApiKey: { + placeholder: localize('com_ui_web_search_crawl4ai_api_key'), + type: 'password' as const, + }, + }, + }, ]; const [dropdownOpen, setDropdownOpen] = useState({ diff --git a/client/src/hooks/Plugins/useAuthSearchTool.ts b/client/src/hooks/Plugins/useAuthSearchTool.ts index bd5f41fe7894..6f8178571e44 100644 --- a/client/src/hooks/Plugins/useAuthSearchTool.ts +++ b/client/src/hooks/Plugins/useAuthSearchTool.ts @@ -16,6 +16,8 @@ export type SearchApiKeyFormData = { firecrawlApiUrl: string; jinaApiKey: string; jinaApiUrl: string; + crawl4aiApiUrl: string; + crawl4aiApiKey: string; cohereApiKey: string; }; @@ -56,6 +58,8 @@ const useAuthSearchTool = (options?: { isEntityTool: boolean }) => { firecrawlApiUrl: data.firecrawlApiUrl, jinaApiKey: data.jinaApiKey, jinaApiUrl: data.jinaApiUrl, + crawl4aiApiUrl: data.crawl4aiApiUrl, + crawl4aiApiKey: data.crawl4aiApiKey, cohereApiKey: data.cohereApiKey, }).reduce( (acc, [key, value]) => { diff --git a/client/src/locales/en/translation.json b/client/src/locales/en/translation.json index 7651b5a51de6..cfc534470939 100644 --- a/client/src/locales/en/translation.json +++ b/client/src/locales/en/translation.json @@ -1429,6 +1429,7 @@ "com_ui_web_search_provider": "Search Provider", "com_ui_web_search_provider_searxng": "SearXNG", "com_ui_web_search_provider_serper": "Serper API", + "com_ui_web_search_provider_crawl4ai": "Crawl4AI API", "com_ui_web_search_provider_serper_key": "Get your Serper API key", "com_ui_web_search_reading": "Reading results", "com_ui_web_search_reranker": "Reranker", @@ -1444,6 +1445,8 @@ "com_ui_web_search_scraper_serper_key": "Get your Serper API key", "com_ui_web_search_searxng_api_key": "Enter SearXNG API Key (optional)", "com_ui_web_search_searxng_instance_url": "SearXNG Instance URL", + "com_ui_web_search_crawl4ai_api_key": "Enter Crawl4AI API Key (optional)", + "com_ui_web_search_crawl4ai_instance_url": "Crawl4AI Instance URL", "com_ui_web_searching": "Searching the web", "com_ui_web_searching_again": "Searching the web again", "com_ui_weekend_morning": "Happy weekend", diff --git a/packages/api/src/app/AppService.spec.ts b/packages/api/src/app/AppService.spec.ts index 9c771b4bd6b6..3bf8e7e2125f 100644 --- a/packages/api/src/app/AppService.spec.ts +++ b/packages/api/src/app/AppService.spec.ts @@ -128,6 +128,8 @@ describe('AppService', () => { firecrawlApiKey: '${FIRECRAWL_API_KEY}', firecrawlApiUrl: '${FIRECRAWL_API_URL}', searxngInstanceUrl: '${SEARXNG_INSTANCE_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', }), memory: undefined, endpoints: expect.objectContaining({ diff --git a/packages/api/src/web/web.spec.ts b/packages/api/src/web/web.spec.ts index c7bb3f4962c8..c8949d3e932b 100644 --- a/packages/api/src/web/web.spec.ts +++ b/packages/api/src/web/web.spec.ts @@ -83,6 +83,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, }; @@ -304,6 +306,8 @@ describe('web.ts', () => { jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', cohereApiKey: '${COHERE_API_KEY}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, }; @@ -355,6 +359,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, // Specify which services to use @@ -449,6 +455,8 @@ describe('web.ts', () => { jinaApiKey: '${CUSTOM_JINA_KEY}', jinaApiUrl: '${CUSTOM_JINA_URL}', cohereApiKey: '${CUSTOM_COHERE_KEY}', + crawl4aiApiUrl: '${CUSTOM_CRAWL4AI_URL}', + crawl4aiApiKey: '${CUSTOM_CRAWL4AI_KEY}', safeSearch: SafeSearchTypes.MODERATE, // Specify which services to use searchProvider: 'serper' as SearchProviders, @@ -519,6 +527,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, }; @@ -581,6 +591,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, }; @@ -691,6 +703,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, searchProvider: 'serper' as SearchProviders, @@ -732,6 +746,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, scraperProvider: 'firecrawl' as ScraperProviders, @@ -773,6 +789,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, rerankerType: 'jina' as RerankerTypes, @@ -820,6 +838,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, searchProvider: 'invalid-provider' as SearchProviders, @@ -855,6 +875,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, rerankerType: 'jina' as RerankerTypes, @@ -906,6 +928,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, }; @@ -947,6 +971,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, firecrawlOptions: { @@ -1007,6 +1033,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, scraperTimeout: 15000, // This should take priority firecrawlOptions: { @@ -1049,6 +1077,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, firecrawlOptions: { includeTags: ['p'], @@ -1088,6 +1118,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, firecrawlOptions: { timeout: 12000, // Only timeout provided @@ -1125,6 +1157,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, firecrawlOptions: { formats: ['html', 'markdown'], // Only formats provided @@ -1162,6 +1196,8 @@ describe('web.ts', () => { firecrawlApiUrl: '${FIRECRAWL_API_URL}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, firecrawlOptions: { timeout: 8000, diff --git a/packages/api/src/web/web.ts b/packages/api/src/web/web.ts index ad172e187fad..ecf0a7048fb1 100644 --- a/packages/api/src/web/web.ts +++ b/packages/api/src/web/web.ts @@ -120,7 +120,8 @@ export async function loadWebSearchAuth({ } } - if (requiredKeys.length === 0) continue; + // If there are no required keys and no optional keys, skip this service + if (requiredKeys.length === 0 && optionalKeys.length === 0) continue; const requiredAuthFields = extractWebSearchEnvVars({ keys: requiredKeys, diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index c27bf42c9c90..750599898c8c 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -813,6 +813,7 @@ export enum SearchProviders { export enum ScraperProviders { FIRECRAWL = 'firecrawl', SERPER = 'serper', + CRAWL4AI = 'crawl4ai', } export enum RerankerTypes { @@ -833,6 +834,8 @@ export const webSearchSchema = z.object({ firecrawlApiKey: z.string().optional().default('${FIRECRAWL_API_KEY}'), firecrawlApiUrl: z.string().optional().default('${FIRECRAWL_API_URL}'), firecrawlVersion: z.string().optional().default('${FIRECRAWL_VERSION}'), + crawl4aiApiKey: z.string().optional().default('${CRAWL4AI_API_KEY}'), + crawl4aiApiUrl: z.string().optional().default('${CRAWL4AI_API_URL}'), jinaApiKey: z.string().optional().default('${JINA_API_KEY}'), jinaApiUrl: z.string().optional().default('${JINA_API_URL}'), cohereApiKey: z.string().optional().default('${COHERE_API_KEY}'), @@ -874,6 +877,14 @@ export const webSearchSchema = z.object({ .optional(), }) .optional(), + crawl4aiOptions: z + .object({ + extractionStrategy: z.string().optional(), + chunkingStrategy: z.string().optional(), + timeout: z.number().optional(), + fitStrategy: z.string().optional() + }) + .optional(), }); export type TWebSearchConfig = DeepPartial>; diff --git a/packages/data-schemas/src/app/web.spec.ts b/packages/data-schemas/src/app/web.spec.ts index 787d7809a36f..c0c7315d7fae 100644 --- a/packages/data-schemas/src/app/web.spec.ts +++ b/packages/data-schemas/src/app/web.spec.ts @@ -54,6 +54,8 @@ describe('loadWebSearchConfig', () => { firecrawlVersion: '${FIRECRAWL_VERSION}', jinaApiKey: '${JINA_API_KEY}', jinaApiUrl: '${JINA_API_URL}', + crawl4aiApiUrl: '${CRAWL4AI_API_URL}', + crawl4aiApiKey: '${CRAWL4AI_API_KEY}', cohereApiKey: '${COHERE_API_KEY}', safeSearch: SafeSearchTypes.MODERATE, }); @@ -154,6 +156,7 @@ describe('loadWebSearchConfig', () => { expect(result?.searxngInstanceUrl).toBe('${SEARXNG_INSTANCE_URL}'); expect(result?.firecrawlApiUrl).toBe('${FIRECRAWL_API_URL}'); expect(result?.jinaApiUrl).toBe('${JINA_API_URL}'); + expect(result?.crawl4aiApiUrl).toBe('${CRAWL4AI_API_URL}'); }); it('should preserve custom URLs', () => { @@ -161,6 +164,7 @@ describe('loadWebSearchConfig', () => { searxngInstanceUrl: 'https://custom-searxng.com', firecrawlApiUrl: 'https://custom-firecrawl.com', jinaApiUrl: 'https://custom-jina.com', + crawl4aiApiUrl: 'https://custom-crawl4ai.com' }; const result = loadWebSearchConfig(config); @@ -168,6 +172,7 @@ describe('loadWebSearchConfig', () => { expect(result?.searxngInstanceUrl).toBe('https://custom-searxng.com'); expect(result?.firecrawlApiUrl).toBe('https://custom-firecrawl.com'); expect(result?.jinaApiUrl).toBe('https://custom-jina.com'); + expect(result?.crawl4aiApiUrl).toBe('https://custom-crawl4ai.com'); }); }); }); diff --git a/packages/data-schemas/src/app/web.ts b/packages/data-schemas/src/app/web.ts index a61e1f161165..34891dcf5fd7 100644 --- a/packages/data-schemas/src/app/web.ts +++ b/packages/data-schemas/src/app/web.ts @@ -23,6 +23,12 @@ export const webSearchAuth = { serper: { serperApiKey: 1 as const, }, + crawl4ai: { + /** Optional (0) - works without API key for public deployments */ + crawl4aiApiKey: 0 as const, + /** Optional (0) */ + crawl4aiApiUrl: 1 as const, + }, }, rerankers: { jina: { @@ -69,6 +75,8 @@ export function loadWebSearchConfig( const firecrawlApiKey = config?.firecrawlApiKey ?? '${FIRECRAWL_API_KEY}'; const firecrawlApiUrl = config?.firecrawlApiUrl ?? '${FIRECRAWL_API_URL}'; const firecrawlVersion = config?.firecrawlVersion ?? '${FIRECRAWL_VERSION}'; + const crawl4aiApiKey = config?.crawl4aiApiKey ?? '${CRAWL4AI_API_KEY}'; + const crawl4aiApiUrl = config?.crawl4aiApiUrl ?? '${CRAWL4AI_API_URL}'; const jinaApiKey = config?.jinaApiKey ?? '${JINA_API_KEY}'; const jinaApiUrl = config?.jinaApiUrl ?? '${JINA_API_URL}'; const cohereApiKey = config?.cohereApiKey ?? '${COHERE_API_KEY}'; @@ -85,6 +93,8 @@ export function loadWebSearchConfig( firecrawlApiKey, firecrawlApiUrl, firecrawlVersion, + crawl4aiApiKey, + crawl4aiApiUrl, searxngInstanceUrl, }; } diff --git a/packages/data-schemas/src/types/web.ts b/packages/data-schemas/src/types/web.ts index a9cc1f0cc6f3..5e149ac77f01 100644 --- a/packages/data-schemas/src/types/web.ts +++ b/packages/data-schemas/src/types/web.ts @@ -7,6 +7,8 @@ export type TWebSearchKeys = | 'firecrawlApiKey' | 'firecrawlApiUrl' | 'firecrawlVersion' + | 'crawl4aiApiKey' + | 'crawl4aiApiUrl' | 'jinaApiKey' | 'jinaApiUrl' | 'cohereApiKey';