Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 94 additions & 5 deletions graphiti_core/prompts/extract_edges.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,49 @@ class Edge(BaseModel):
description='The date and time when the relationship described by the edge fact stopped being true or ended. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
)
episode_indices: list[int] = Field(
default_factory=lambda: [1],
description='List of episode numbers (1-indexed) that this fact was derived from. '
'When processing a single episode, this should be [1].',
default_factory=lambda: [0],
description='List of episode numbers (0-indexed) that this fact was derived from. '
'When processing a single episode, this should be [0].',
)


class ExtractedEdges(BaseModel):
edges: list[Edge]


class EdgeTimestamps(BaseModel):
"""Temporal bounds extracted from a fact."""

valid_at: str | None = Field(
None,
description='When the fact became true. ISO 8601 with Z suffix (e.g., 2025-04-30T00:00:00Z)',
)
invalid_at: str | None = Field(
None,
description='When the fact stopped being true. ISO 8601 with Z suffix (e.g., 2025-04-30T00:00:00Z)',
)


class BatchEdgeTimestamps(BaseModel):
"""Temporal bounds for a batch of facts."""

timestamps: list[EdgeTimestamps] = Field(
..., description='Timestamps for each fact, in the same order as the input facts'
)


class Prompt(Protocol):
edge: PromptVersion
extract_attributes: PromptVersion
extract_timestamps: PromptVersion
extract_timestamps_batch: PromptVersion


class Versions(TypedDict):
edge: PromptFunction
extract_attributes: PromptFunction
extract_timestamps: PromptFunction
extract_timestamps_batch: PromptFunction


def edge(context: dict[str, Any]) -> list[Message]:
Expand Down Expand Up @@ -127,8 +152,8 @@ def edge(context: dict[str, Any]) -> list[Message]:
- GOOD: "Nate plays games on a Gamecube" → Nate -> PLAYS_GAMES_ON -> Gamecube (when "Gamecube" is in ENTITIES)
- GOOD: "Alice congratulated Bob" (relationship between two entities), "Alice lives in Paris" (relationship between entity and place)
4. Do not emit semantically redundant facts, even across episodes within the CURRENT_MESSAGE. However, if a later episode adds specific details to a previously stated fact (e.g., adding a brand name, a count, a color, a location, or any concrete attribute), extract the more detailed version as a NEW fact — it is NOT a duplicate. Only treat facts as duplicates when they convey the same specificity.
- NOT a duplicate: "user plays video games" (episode 1) vs. "user plays games on a Gamecube" (episode 2) → extract the second, more detailed fact.
- IS a duplicate: "user plays games on a Gamecube" (episode 1) vs. "user plays Gamecube games" (episode 2) → extract once, list both episodes in `episode_indices`.
- NOT a duplicate: "user plays video games" (Episode 0) vs. "user plays games on a Gamecube" (Episode 1) → extract the second, more detailed fact.
- IS a duplicate: "user plays games on a Gamecube" (Episode 0) vs. "user plays Gamecube games" (Episode 1) → extract once, list both episodes in `episode_indices`.
5. The `fact` MUST preserve all specific details from the source text: proper nouns, brand names, product names, model numbers, quantities, counts, colors, materials, physical descriptions, specific items, named locations, and named activities. Paraphrase the sentence structure but NEVER generalize:
- NEVER generalize "Gamecube" to "gaming console", "Ford Mustang" to "car", "wool coat" to "coat", "red and purple lighting" to "lighting", "cracked windshield" to "car damage", or "three screenplays" to "several screenplays".
- Do not verbatim quote the original text, but every concrete noun, number, and descriptor in the source should survive into the `fact`.
Expand Down Expand Up @@ -188,7 +213,71 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
]


def extract_timestamps(context: dict[str, Any]) -> list[Message]:
return [
Message(
role='system',
content='You extract temporal bounds from facts. NEVER hallucinate dates.',
),
Message(
role='user',
content=f"""Given a FACT and its REFERENCE TIME, determine when the fact became true
(valid_at) and when it stopped being true (invalid_at).

Rules:
- Resolve relative expressions ("last week", "2 years ago", "yesterday") using REFERENCE TIME.
- If the fact is ongoing (present tense), set valid_at to REFERENCE TIME.
- If a change or end is expressed, set invalid_at to the relevant time.
- Leave both null if no time is stated or resolvable.
- If only a date is mentioned (no time), assume 00:00:00.
- Use ISO 8601 with Z suffix (e.g., 2025-04-30T00:00:00Z).
- Do NOT hallucinate or infer dates from unrelated events.

<FACT>
{context['fact']}
</FACT>

<REFERENCE TIME>
{context['reference_time']}
</REFERENCE TIME>
""",
),
]


def extract_timestamps_batch(context: dict[str, Any]) -> list[Message]:
return [
Message(
role='system',
content='You extract temporal bounds from facts. NEVER hallucinate dates.',
),
Message(
role='user',
content=f"""Given a list of FACTS with their REFERENCE TIMES, determine when each fact
became true (valid_at) and when it stopped being true (invalid_at).

Rules:
- Resolve relative expressions ("last week", "2 years ago", "yesterday") using each fact's REFERENCE TIME.
- If the fact is ongoing (present tense), set valid_at to its REFERENCE TIME.
- If a change or end is expressed, set invalid_at to the relevant time.
- Leave both null if no time is stated or resolvable.
- If only a date is mentioned (no time), assume 00:00:00.
- Use ISO 8601 with Z suffix (e.g., 2025-04-30T00:00:00Z).
- Do NOT hallucinate or infer dates from unrelated events.

Return one timestamps entry per fact, in the same order.

<FACTS>
{to_prompt_json(context['facts'])}
</FACTS>
""",
),
]


versions: Versions = {
'edge': edge,
'extract_attributes': extract_attributes,
'extract_timestamps': extract_timestamps,
'extract_timestamps_batch': extract_timestamps_batch,
}
6 changes: 3 additions & 3 deletions graphiti_core/prompts/extract_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class ExtractedEntity(BaseModel):
'Must be one of the provided entity_type_id integers.',
)
episode_indices: list[int] = Field(
default_factory=lambda: [1],
description='List of episode numbers (1-indexed) this entity was extracted from. '
'When processing a single episode, this should be [1].',
default_factory=lambda: [0],
description='List of episode numbers (0-indexed) this entity was extracted from. '
'When processing a single episode, this should be [0].',
)


Expand Down
154 changes: 154 additions & 0 deletions graphiti_core/prompts/extract_nodes_and_edges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""
Copyright 2024, Zep Software, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from typing import Any, Protocol, TypedDict

from pydantic import BaseModel, Field

from .models import Message, PromptFunction, PromptVersion
from .prompt_helpers import to_prompt_json


class CombinedEntity(BaseModel):
"""Entity extracted by the combined node+edge extraction prompt."""

name: str = Field(..., description='Name of the extracted entity')
entity_type_id: int = Field(
description='ID of the classified entity type. '
'Must be one of the provided entity_type_id integers.',
)


class CombinedFact(BaseModel):
"""Relationship fact extracted by the combined node+edge extraction prompt."""

source_entity_name: str = Field(
..., description='The name of the source entity from the extracted entities list'
)
target_entity_name: str = Field(
..., description='The name of the target entity from the extracted entities list'
)
relation_type: str = Field(
...,
description='The type of relationship between the entities, in SCREAMING_SNAKE_CASE '
'(e.g., WORKS_AT, LIVES_IN, IS_FRIENDS_WITH)',
)
fact: str = Field(
...,
description='A self-contained natural language description of the relationship, '
'paraphrased from the source text with all specific details preserved',
)
episode_indices: list[int] = Field(
default_factory=lambda: [0],
description='List of episode numbers (0-indexed) that this fact was derived from. '
'When processing a single episode, this should be [0].',
)


class CombinedExtraction(BaseModel):
"""Combined node and edge extraction response."""

extracted_entities: list[CombinedEntity] = Field(..., description='List of extracted entities')
edges: list[CombinedFact] = Field(..., description='List of extracted relationship facts')


class Prompt(Protocol):
extract_message: PromptVersion


class Versions(TypedDict):
extract_message: PromptFunction


def extract_message(context: dict[str, Any]) -> list[Message]:
sys_prompt = (
'You are an expert knowledge graph extraction specialist for an AI agent memory system. '
'You extract both entity nodes and relationship facts from conversations in a single pass. '
'The extracted graph will be searched later by an AI agent to answer questions, personalize '
'responses, and maintain long-term memory. The original conversation will NOT be available '
'at retrieval time — only the entities and facts you extract will survive.'
)

user_prompt = f"""
ENTITY RULES:
1. Extract speakers and named entities explicitly mentioned in CURRENT MESSAGES.
2. Entity names must be at most 5 words. Use the most specific form mentioned.
3. When someone discusses their possession, project, pet, or creation, extract it
as a SEPARATE possessive entity — not just the person, not just the bare noun:
GOOD: "James's notebook", "Calvin's guitar", "Audrey's dogs", "Sam's cooking class"
BAD: "notebook", "guitar", "dogs" (too generic) or just "James" (collapses detail)
4. Extract hobbies and activities as entities when someone engages in them:
"video games", "watercolor painting", "VR gaming", "road cycling", "cooking"
5. Extract named/described objects ("Gamecube", "Ford Mustang", "wool coat") and
places ("Riverside Park", "the gym", "the beach") — not bare generics ("car", "coat").
6. Do NOT extract: pronouns, vague abstractions (balance, growth, motivation),
filler nouns (day, life, stuff, time), dates as entities, full sentences as names.
7. Each entity appears exactly ONCE. Classify using the ENTITY TYPES provided.
8. Only extract entities from CURRENT MESSAGES — PREVIOUS MESSAGES are context only.

FACT RULES:
1. source_entity_name and target_entity_name must match your extracted entity names.
2. When a fact involves two entities that are BOTH in your extracted entities list,
you MUST use both as source and target — never collapse into a self-referencing fact:
"Nate plays games on a Gamecube" → Nate -> PLAYS_GAMES_ON -> Gamecube
"Sarah lives in San Francisco" → Sarah -> LIVES_IN -> San Francisco
"James has a dog named Maximilian" → James -> HAS_PET -> Maximilian
Only use a self-referencing fact when no second entity in your list fits.
Self-referencing facts are still common and valuable — do NOT skip them:
- Routines/health: "Deborah goes jogging every morning", "Evan has a knee injury"
- Preferences/plans: "Nate's favorite game is Xenoblade Chronicles",
"Jon said he would not quit on his dreams"
- Emotions/states: "Sam feels he lacks motivation"
3. Facts must be SELF-CONTAINED — understandable without the original episode.
Use entity names, not pronouns. Preserve specific details where possible.
4. Extract facts from EVERY episode — not just the first. Process each episode's
CURRENT_MESSAGE independently. Set `episode_indices` to the 0-based episode
number(s) each fact comes from (matching [Episode N] headers).
If the SAME fact appears across multiple episodes, extract it ONCE and list ALL
episode indices — do NOT emit duplicate facts with different episode numbers.
5. You MAY use PREVIOUS MESSAGES to resolve what the current message refers to.
If the current message reacts to or confirms prior context, extract the full
contextualized fact (e.g., "all the hard work paid off" → extract what paid off).
6. Extract liberally — when in doubt, extract the fact. Preferences, opinions,
reactions, advice, plans, states, and experiences are all valuable. Only skip
content-free utterances like "Hi!", "Bye!", "Thanks!".
7. Do not emit redundant facts across episodes. But if a later episode adds new
details (brand, count, location), extract the more detailed version as a new fact.

<ENTITY TYPES>
{context['entity_types']}
</ENTITY TYPES>

<PREVIOUS MESSAGES>
{to_prompt_json([ep for ep in context['previous_episodes']])}
</PREVIOUS MESSAGES>

<CURRENT MESSAGES>
{context['episode_content']}
</CURRENT MESSAGES>

{context['custom_extraction_instructions']}
"""

return [
Message(role='system', content=sys_prompt),
Message(role='user', content=user_prompt),
]


versions: Versions = {
'extract_message': extract_message,
}
6 changes: 6 additions & 0 deletions graphiti_core/prompts/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
from .extract_nodes import Prompt as ExtractNodesPrompt
from .extract_nodes import Versions as ExtractNodesVersions
from .extract_nodes import versions as extract_nodes_versions
from .extract_nodes_and_edges import Prompt as ExtractNodesAndEdgesPrompt
from .extract_nodes_and_edges import Versions as ExtractNodesAndEdgesVersions
from .extract_nodes_and_edges import versions as extract_nodes_and_edges_versions
from .models import Message, PromptFunction
from .prompt_helpers import DO_NOT_ESCAPE_UNICODE
from .summarize_nodes import Prompt as SummarizeNodesPrompt
Expand All @@ -45,6 +48,7 @@ class PromptLibrary(Protocol):
extract_nodes: ExtractNodesPrompt
dedupe_nodes: DedupeNodesPrompt
extract_edges: ExtractEdgesPrompt
extract_nodes_and_edges: ExtractNodesAndEdgesPrompt
dedupe_edges: DedupeEdgesPrompt
summarize_nodes: SummarizeNodesPrompt
summarize_sagas: SummarizeSagasPrompt
Expand All @@ -55,6 +59,7 @@ class PromptLibraryImpl(TypedDict):
extract_nodes: ExtractNodesVersions
dedupe_nodes: DedupeNodesVersions
extract_edges: ExtractEdgesVersions
extract_nodes_and_edges: ExtractNodesAndEdgesVersions
dedupe_edges: DedupeEdgesVersions
summarize_nodes: SummarizeNodesVersions
summarize_sagas: SummarizeSagasVersions
Expand Down Expand Up @@ -88,6 +93,7 @@ def __init__(self, library: PromptLibraryImpl):
'extract_nodes': extract_nodes_versions,
'dedupe_nodes': dedupe_nodes_versions,
'extract_edges': extract_edges_versions,
'extract_nodes_and_edges': extract_nodes_and_edges_versions,
'dedupe_edges': dedupe_edges_versions,
'summarize_nodes': summarize_nodes_versions,
'summarize_sagas': summarize_sagas_versions,
Expand Down
Loading
Loading