yaalalabs · pulinduvidmal · May 10, 2026 · May 10, 2026 · May 10, 2026 · May 10, 2026
diff --git a/ak-py/pyproject.toml b/ak-py/pyproject.toml
@@ -117,6 +117,16 @@ gmail = [
     "google-auth-httplib2>=0.2.0",
     "google-api-python-client>=2.0.0",
 ]
+livekit = [
+    "livekit-agents>=1.0.0",
+    "livekit-plugins-openai>=1.0.0",
+    "livekit-plugins-deepgram>=1.0.0",
+    "livekit-plugins-silero>=1.0.0",
+    "livekit-plugins-elevenlabs>=1.0.0",
+    "livekit-plugins-google>=1.0.0",
+    "livekit-api>=0.2.0",
+    "Pillow>=10.0.0",
+]
 test = [
     "pytest>=8.4.1",
     "pytest-asyncio>=1.2.0",

diff --git a/ak-py/src/agentkernel/core/config.py b/ak-py/src/agentkernel/core/config.py
@@ -132,6 +132,21 @@ class _GmailConfig(BaseModel):
     label_filter: str = Field(default="INBOX", description="Gmail label to monitor (e.g., INBOX, UNREAD)")
 
 
+class _LiveKitConfig(BaseModel):
+    agent: str = Field(default="", description="Default agent to use for LiveKit Voice interactions")
+    url: str = Field(default="", description="LiveKit WebSocket URL (e.g., wss://my-project.livekit.cloud)")
+    api_key: str = Field(default="", description="LiveKit API Key")
+    api_secret: str = Field(default="", description="LiveKit API Secret")
+    stt_provider: str = Field(default="deepgram", pattern="^(deepgram|openai)$", description="Speech-to-Text provider (deepgram or openai)")
+    tts_provider: str = Field(
+        default="openai", pattern="^(openai|elevenlabs|google)$", description="Text-to-Speech provider (openai, elevenlabs, or google)"
+    )
+    vision_enabled: bool = Field(
+        default=False,
+        description="Enable webcam vision capture. When enabled, the latest video frame is captured on each voice turn and processed through the multimodal pipeline.",
+    )
+
+
 class _MultimodalStorageRedisConfig(BaseModel):
     url: str = Field(default="redis://localhost:6379", description="Redis connection URL")
     ttl: int = Field(default=604800, description="Attachment TTL in seconds")
@@ -281,6 +296,7 @@ class AKConfig(YamlBaseSettingsModified):
     instagram: _InstagramConfig = Field(description="Instagram Business API related configurations", default_factory=_InstagramConfig)
     telegram: _TelegramConfig = Field(description="Telegram Bot related configurations", default_factory=_TelegramConfig)
     gmail: _GmailConfig = Field(description="Gmail related configurations", default_factory=_GmailConfig)
+    livekit: _LiveKitConfig = Field(description="LiveKit Voice related configurations", default_factory=_LiveKitConfig)
     multimodal: _MultimodalConfig = Field(description="Multimodal attachment memory configurations", default_factory=_MultimodalConfig)
 
     trace: _TraceConfig = Field(description="Tracing related configurations", default_factory=_TraceConfig)

diff --git a/ak-py/src/agentkernel/integration/livekit/README.md b/ak-py/src/agentkernel/integration/livekit/README.md
@@ -0,0 +1,82 @@
+# LiveKit Voice Integration
+
+Agent Kernel supports real-time, ultra-low latency voice integrations via [LiveKit](https://livekit.io/). 
+
+By treating LiveKit as an **Integration**, you can build an agent once (using CrewAI, LangGraph, OpenAI, etc.), equip it with tools and memory via Agent Kernel, and then use LiveKit to allow users to **talk to your agent over a real-time voice call**.
+
+LiveKit handles the WebRTC voice connection, Speech-to-Text (STT), and Text-to-Speech (TTS), while Agent Kernel handles the intelligence, routing, and tools.
+
+## Architecture
+
+When you use the LiveKit integration:
+1. The user speaks into their microphone via a LiveKit frontend.
+2. LiveKit's **Speech-to-Text (STT)** plugin transcribes the voice into text.
+3. The transcribed text is intercepted by our custom `LiveKitLLM` bridge.
+4. The bridge forwards the text to **Agent Kernel** (`AgentService().run(text)`).
+5. Agent Kernel's selected agent processes the text and generates a response.
+6. The response is sent back to LiveKit's **Text-to-Speech (TTS)** plugin.
+7. The TTS plugin synthesizes the voice and streams it back to the user.
+
+## Setup
+
+First, ensure you have installed the LiveKit optional dependencies:
+
+```bash
+pip install "agentkernel[livekit]"
+```
+
+You will also need:
+1. A free account on [LiveKit Cloud](https://cloud.livekit.io/).
+2. Your LiveKit API keys (`AK_LIVEKIT__URL`, `AK_LIVEKIT__API_KEY`, `AK_LIVEKIT__API_SECRET`).
+3. API keys for your preferred STT/TTS providers (e.g., `OPENAI_API_KEY`, `DEEPGRAM_API_KEY`, etc.).
+
+## Configuration
+
+In your `config.yaml`, configure which Agent Kernel agent should respond to LiveKit voice interactions, as well as your preferred STT and TTS providers:
+
+```yaml
+livekit:
+  agent: "my-voice-agent"
+  stt_provider: "deepgram"   # Options: deepgram, openai
+  tts_provider: "openai"     # Options: openai, elevenlabs, google
+  url: "wss://your-project-id.livekit.cloud" # Optional, can use AK_LIVEKIT__URL env var
+  api_key: "your_api_key"                    # Optional, can use AK_LIVEKIT__API_KEY env var
+  api_secret: "your_api_secret"              # Optional, can use AK_LIVEKIT__API_SECRET env var
+```
+
+You can also set these via environment variables:
+```bash
+export AK_LIVEKIT__AGENT="my-voice-agent"
+export AK_LIVEKIT__STT_PROVIDER="openai"
+```
+
+## Example Usage
+
+Create a Python script (e.g., `server.py`) that initializes your Agent Kernel agent and starts the REST API. The `AgentLiveKitRequestHandler` will automatically launch the LiveKit background worker alongside your FastAPI server.
+
+```python
+import os
+import logging
+from agentkernel.api import RESTAPI
+from agentkernel.openai import OpenAIModule
+from agentkernel.livekit import AgentLiveKitRequestHandler
+from agents import Agent as OpenAIAgent
+
+logging.basicConfig(level=logging.INFO)
+
+# 1. Define your Agent Kernel Agent
+voice_agent = OpenAIAgent(
+    name="my-voice-agent",
+    handoff_description="Agent for voice interactions",
+    instructions="You are a concise voice assistant. Do not use markdown or emojis.",
+)
+
+# 2. Register the agent with Agent Kernel
+OpenAIModule([voice_agent])
+
+# 3. Start the server with the LiveKit Handler
+if __name__ == "__main__":
+    RESTAPI.run([AgentLiveKitRequestHandler()])
+```
+
+> **Note:** The `AgentLiveKitRequestHandler` exposes a `/livekit/token` API endpoint on your FastAPI server. Your frontend (e.g., a React application or the LiveKit Agent Console) can hit this endpoint to generate secure access tokens for users joining the voice room.
diff --git a/ak-py/src/agentkernel/integration/livekit/__init__.py b/ak-py/src/agentkernel/integration/livekit/__init__.py
@@ -0,0 +1,14 @@
+"""
+Agent Kernel Integration with LiveKit
+
+This package contains the Agent Kernel integration implementations for LiveKit Voice Agents.
+"""
+
+import importlib.metadata
+
+try:
+    __version__ = importlib.metadata.version("agentkernel")
+except importlib.metadata.PackageNotFoundError:
+    __version__ = "0.1.0"
+
+from .livekit_handler import AgentLiveKitRequestHandler, LiveKitLLM