Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/citations/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export {
transformCitations,
injectCitationMarkers,
processContentParts,
type CitationSource,
type CitationData,
type ProcessedCitations,
type RawPerplexitySearchResult,
} from './perplexity';
153 changes: 153 additions & 0 deletions src/citations/perplexity.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import type { MessageContentComplex } from '../types';

// Unicode citation marker (U+E202 - Private Use Area)
const CITATION_MARKER = String.fromCharCode(0xe202);

/**
* A single citation source in LibreChat format
*/
export interface CitationSource {
link: string;
title: string;
snippet: string;
date: string;
position: number;
}

/**
* Citation data format for web_search attachments (LibreChat frontend compatible)
*/
export interface CitationData {
organic: CitationSource[];
topStories: never[];
images: never[];
videos: never[];
references: never[];
}

/**
* Raw search result from Perplexity API
*/
export interface RawPerplexitySearchResult {
url?: string;
link?: string;
title?: string;
snippet?: string;
date?: string;
}

/**
* Processed citation data ready for LibreChat consumption
*/
export interface ProcessedCitations {
/** Transformed to LibreChat's web_search attachment format */
searchResults: CitationData | null;
/** Raw citation URLs (for reference) */
citations: string[] | null;
}

/**
* Transform raw Perplexity citations/search_results to LibreChat's SearchResultData format
*
* @param citations - Raw citation URLs from Perplexity
* @param searchResults - Detailed search results from Perplexity (preferred if available)
* @returns SearchResultData object compatible with LibreChat's frontend
*/
export function transformCitations(
citations: string[] | null,
searchResults: unknown[] | null
): CitationData | null {
// Prefer search_results if available (has richer data), fallback to citations
const sources =
searchResults && searchResults.length > 0 ? searchResults : citations;

if (!sources || sources.length === 0) {
return null;
}

return {
organic: sources.map((source, index) => {
// If source is a string (from citations array), create basic object
if (typeof source === 'string') {
return {
link: source,
title: `Source ${index + 1}`,
snippet: '',
date: new Date().toISOString().split('T')[0],
position: index + 1,
};
}

// If source is an object (from search_results array)
const s = source as RawPerplexitySearchResult;
return {
link: s.url || s.link || '',
title: s.title || `Source ${index + 1}`,
snippet: s.snippet || '',
date: s.date || new Date().toISOString().split('T')[0],
position: index + 1,
};
}),
topStories: [],
images: [],
videos: [],
references: [],
};
}

/**
* Inject Unicode citation markers into text content.
* Replaces [1], [2] etc with {U+E202}turn{N}search{index}
*
* The Unicode marker U+E202 (Private Use Area) is recognized by LibreChat's
* frontend markdown parser to render as hoverable citation links.
*
* @param content - Text content with [N] style citation markers
* @param turnNumber - Current conversation turn number (0-indexed)
* @returns Content with Unicode citation markers injected
*/
export function injectCitationMarkers(
content: string,
turnNumber: number
): string {
if (!content) return content;

return content.replace(/\[(\d+)\]/g, (match, num) => {
const index = parseInt(num, 10) - 1; // Convert 1-based to 0-based index
if (index < 0) return match; // Keep original if invalid number

// Space before marker prevents breaking markdown bold parsing
// Without the space, `**bold**{U+E202}` is not recognized as valid bold
// by remark-gfm because U+E202 is not classified as punctuation/whitespace
return ` ${CITATION_MARKER}turn${turnNumber}search${index}`;
});
}

/**
* Process an array of content parts, injecting citation markers into text parts.
*
* @param contentParts - Array of MessageContentComplex parts
* @param turnNumber - Current conversation turn number (0-indexed)
* @returns New array with citation markers injected into text parts
*/
export function processContentParts(
contentParts: MessageContentComplex[],
turnNumber: number
): MessageContentComplex[] {
return contentParts.map((part) => {
if (
part &&
part.type === 'text' &&
typeof (part as { text?: string }).text === 'string'
) {
return {
...part,
text: injectCitationMarkers(
(part as { text: string }).text,
turnNumber
),
};
}
return part;
});
}
20 changes: 20 additions & 0 deletions src/graphs/Graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import {
addCacheControl,
extractToolDiscoveries,
} from '@/messages';
import { transformCitations, type ProcessedCitations } from '@/citations';
import {
resetIfNotEmpty,
isOpenAILike,
Expand Down Expand Up @@ -149,6 +150,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
agentContexts: Map<string, AgentContext> = new Map();
/** Default agent ID to use */
defaultAgentId: string;
/** Perplexity citation URLs (extracted from streaming response) */
perplexityCitations: string[] | null = null;
/** Perplexity search results (extracted from streaming response) */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
perplexitySearchResults: any[] | null = null;

constructor({
// parent-level graph inputs
Expand Down Expand Up @@ -330,6 +336,20 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
return convertMessagesToContent(this.messages.slice(this.startIndex));
}

/**
* Get processed Perplexity citations in LibreChat format.
* Returns null if no citations were extracted during streaming.
*/
getProcessedCitations(): ProcessedCitations {
return {
searchResults: transformCitations(
this.perplexityCitations,
this.perplexitySearchResults
),
citations: this.perplexityCitations,
};
}

/**
* Get all run steps, optionally filtered by agent ID
*/
Expand Down
3 changes: 3 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ export * from './messages';
/* Graphs */
export * from './graphs';

/* Citations */
export * from './citations';

/* Tools */
export * from './tools/Calculator';
export * from './tools/CodeExecutor';
Expand Down
176 changes: 176 additions & 0 deletions src/stream.perplexity.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
/**
* Tests for Perplexity citation extraction from streaming chunks
*
* The actual extraction happens in stream.ts handleOnChunkEvent,
* but this tests the extraction logic pattern in isolation.
*/

describe('Perplexity Citation Extraction', () => {
/**
* Simulates the citation extraction logic from stream.ts
* This mirrors the code in handleOnChunkEvent
*/
function extractPerplexityCitations(chunk: {
additional_kwargs?: Record<string, unknown>;
}): { citations: string[] | null; searchResults: unknown[] | null } {
const additionalKwargs = chunk.additional_kwargs;
let citations: string[] | null = null;
let searchResults: unknown[] | null = null;

if (
additionalKwargs?.citations != null &&
Array.isArray(additionalKwargs.citations)
) {
citations = additionalKwargs.citations as string[];
}

if (
additionalKwargs?.search_results != null &&
Array.isArray(additionalKwargs.search_results)
) {
searchResults = additionalKwargs.search_results as unknown[];
}

return { citations, searchResults };
}

describe('extractPerplexityCitations', () => {
it('should extract citations array from chunk', () => {
const chunk = {
additional_kwargs: {
citations: [
'https://example.com/article1',
'https://example.com/article2',
],
},
};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toEqual([
'https://example.com/article1',
'https://example.com/article2',
]);
expect(result.searchResults).toBeNull();
});

it('should extract search_results from chunk', () => {
const chunk = {
additional_kwargs: {
search_results: [
{
url: 'https://example.com',
title: 'Example',
snippet: 'A snippet',
},
{
url: 'https://test.com',
title: 'Test',
snippet: 'Another snippet',
},
],
},
};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toBeNull();
expect(result.searchResults).toHaveLength(2);
expect(result.searchResults![0]).toMatchObject({
url: 'https://example.com',
title: 'Example',
});
});

it('should extract both citations and search_results', () => {
const chunk = {
additional_kwargs: {
citations: ['https://cite1.com', 'https://cite2.com'],
search_results: [{ url: 'https://search1.com', title: 'Search 1' }],
},
};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toHaveLength(2);
expect(result.searchResults).toHaveLength(1);
});

it('should return nulls for chunk without citations', () => {
const chunk = {
additional_kwargs: {
some_other_field: 'value',
},
};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toBeNull();
expect(result.searchResults).toBeNull();
});

it('should return nulls for chunk without additional_kwargs', () => {
const chunk = {};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toBeNull();
expect(result.searchResults).toBeNull();
});

it('should ignore non-array citations', () => {
const chunk = {
additional_kwargs: {
citations: 'not an array',
search_results: { not: 'an array' },
},
};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toBeNull();
expect(result.searchResults).toBeNull();
});

it('should handle empty arrays', () => {
const chunk = {
additional_kwargs: {
citations: [],
search_results: [],
},
};

const result = extractPerplexityCitations(chunk);

expect(result.citations).toEqual([]);
expect(result.searchResults).toEqual([]);
});
});

describe('Graph integration', () => {
it('should store citations on Graph-like object', () => {
// Simulates Graph object behavior
const graph = {
perplexityCitations: null as string[] | null,
perplexitySearchResults: null as unknown[] | null,
};

const chunk = {
additional_kwargs: {
citations: ['https://example.com'],
search_results: [{ url: 'https://example.com', title: 'Example' }],
},
};

// Simulate the extraction and assignment from stream.ts
const extracted = extractPerplexityCitations(chunk);
graph.perplexityCitations = extracted.citations;
graph.perplexitySearchResults = extracted.searchResults;

expect(graph.perplexityCitations).toEqual(['https://example.com']);
expect(graph.perplexitySearchResults).toEqual([
{ url: 'https://example.com', title: 'Example' },
]);
});
});
});
Loading