Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions agents/src/voice/agent_activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,14 @@ import {
type _AudioOut,
type _TextOut,
applyInstructionsModality,
injectRunningToolCalls,
performAudioForwarding,
performLLMInference,
performTTSInference,
performTextForwarding,
performToolExecutions,
removeInstructions,
stripRunningToolCalls,
updateInstructions,
} from './generation.js';
import type { PlaybackFinishedEvent, TimedString } from './io.js';
Expand Down Expand Up @@ -2560,6 +2562,8 @@ export class AgentActivity implements RecognitionHooks {
// apply the correct variant of the instructions for the turn's input modality
applyInstructionsModality(chatCtx, { modality: speechHandle.inputDetails.modality });

injectRunningToolCalls(chatCtx, this.runningToolCalls());

const tasks: Array<Task<void>> = [];
const [llmTask, llmGenData] = performLLMInference(
// preserve `this` context in llmNode
Expand Down Expand Up @@ -3062,6 +3066,7 @@ export class AgentActivity implements RecognitionHooks {
...functionToolsExecutedEvent.functionCallOutputs,
] as ChatItem[];
if (shouldGenerateToolReply) {
stripRunningToolCalls(chatCtx);
chatCtx.insert(toolMessages);

// Increment step count on the existing handle.
Expand Down Expand Up @@ -3854,6 +3859,26 @@ export class AgentActivity implements RecognitionHooks {
this.wakeupMainTask();
}

private runningToolCalls(): FunctionCall[] {
const runningCalls: FunctionCall[] = [];

for (const speech of this._backgroundSpeeches) {
const completedCallIds = new Set(
speech.chatItems
.filter((item) => item.type === 'function_call_output')
.map((item) => item.callId),
);

for (const item of speech.chatItems) {
if (item.type === 'function_call' && !completedCallIds.has(item.callId)) {
runningCalls.push(item);
}
}
}

return runningCalls;
}

private async _pauseSchedulingTask(blockedTasks: Task<any>[]): Promise<void> {
if (this._schedulingPaused) return;

Expand Down
62 changes: 62 additions & 0 deletions agents/src/voice/generation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,68 @@ export class _LLMGenerationData {
}
}

const RUNNING_TOOL_PLACEHOLDER = 'The tool call is still in progress.';
const RUNNING_PLACEHOLDER_KEY = '__lk_running_placeholder__';

/** @internal */
export function injectRunningToolCalls(
chatCtx: ChatContext,
runningCalls: Iterable<FunctionCall>,
placeholder: string = RUNNING_TOOL_PLACEHOLDER,
): void {
const existing = new Set(
chatCtx.items.flatMap((item) =>
item.type === 'function_call' || item.type === 'function_call_output' ? [item.callId] : [],
),
);

for (const fncCall of runningCalls) {
if (existing.has(fncCall.callId)) continue;
existing.add(fncCall.callId);

chatCtx.insert([
FunctionCall.create({
id: fncCall.id,
callId: fncCall.callId,
name: fncCall.name,
args: fncCall.args,
createdAt: fncCall.createdAt,
extra: { ...fncCall.extra, [RUNNING_PLACEHOLDER_KEY]: true },

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Placeholder marker key leaks into LLM provider serialization

The __lk_running_placeholder__ key added to FunctionCall.extra (agents/src/voice/generation.ts:110) will be included when the chat context is serialized for the LLM provider, because FunctionCall.toJSON() at agents/src/llm/chat_context.ts:508-509 emits the full extra object whenever it is non-empty. Most providers ignore unrecognized fields, so this is unlikely to cause failures, but it does mean an internal framework marker is exposed in the wire payload. Consider stripping it during serialization or using a Symbol key instead if any provider is strict about extra fields.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

groupId: fncCall.groupId,
thoughtSignature: fncCall.thoughtSignature,
}),
FunctionCallOutput.create({
callId: fncCall.callId,
name: fncCall.name,
output: placeholder,
isError: false,
createdAt: fncCall.createdAt,
}),
]);
}
}

/** @internal */
export function stripRunningToolCalls(chatCtx: ChatContext): void {
const flagged = new Set(
chatCtx.items.flatMap((item) =>
item.type === 'function_call' && item.extra[RUNNING_PLACEHOLDER_KEY] === true
? [item.callId]
: [],
),
);

if (flagged.size === 0) return;

chatCtx.items = chatCtx.items.filter(
(item) =>
!(
(item.type === 'function_call' || item.type === 'function_call_output') &&
flagged.has(item.callId)
),
);
}

/**
* TTS generation data containing audio stream and optional timed transcripts.
* @internal
Expand Down
Loading