diff --git a/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts b/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts index 741c65e9582b2..d727814ec9a21 100644 --- a/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts +++ b/extensions/copilot/src/extension/chatSessions/copilotcli/node/copilotcliSession.ts @@ -13,6 +13,7 @@ import { GenAiMetrics } from '../../../../platform/otel/common/genAiMetrics'; import { CopilotChatAttr, GenAiAttr, GenAiOperationName, IOTelService, ISpanHandle, SpanKind, SpanStatusCode, truncateForOTel } from '../../../../platform/otel/common/index'; import { CapturingToken } from '../../../../platform/requestLogger/common/capturingToken'; import { IRequestLogger, LoggedRequestKind } from '../../../../platform/requestLogger/common/requestLogger'; +import { PromptTokenCategory, PromptTokenLabel } from '../../../../platform/tokenizer/node/promptTokenDetails'; import { IWorkspaceService } from '../../../../platform/workspace/common/workspaceService'; import { raceCancellation } from '../../../../util/vs/base/common/async'; import { CancellationToken } from '../../../../util/vs/base/common/cancellation'; @@ -403,6 +404,22 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes const chunkMessageIds = new Set(); const assistantMessageChunks: string[] = []; + let lastUsageInfo: UsageInfoData | undefined; + const reportUsage = (promptTokens: number, completionTokens: number) => { + if (token.isCancellationRequested || !this._stream) { + return; + } + this._stream.usage({ + promptTokens, + completionTokens, + promptTokenDetails: buildPromptTokenDetails(lastUsageInfo), + }); + }; + const updateUsageInfo = (async () => { + const metrics = await this._sdkSession.usage.getMetrics(); + const promptTokens = lastUsageInfo?.currentTokens || metrics.lastCallInputTokens; + reportUsage(promptTokens, metrics.lastCallOutputTokens); + })(); try { const shouldHandleExitPlanModeRequests = this.configurationService.getConfig(ConfigKey.Advanced.CLIPlanExitModeEnabled); disposables.add(toDisposable(this._sdkSession.on('*', (event) => { @@ -558,12 +575,19 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes }))); disposables.add(toDisposable(this._sdkSession.on('assistant.usage', (event) => { if (this._stream && typeof event.data.outputTokens === 'number' && typeof event.data.inputTokens === 'number') { - this._stream.usage({ - completionTokens: event.data.outputTokens, - promptTokens: event.data.inputTokens, - }); + reportUsage(event.data.inputTokens, event.data.outputTokens); } }))); + disposables.add(toDisposable(this._sdkSession.on('session.usage_info', (event) => { + lastUsageInfo = { + currentTokens: event.data.currentTokens, + systemTokens: event.data.systemTokens, + conversationTokens: event.data.conversationTokens, + toolDefinitionsTokens: event.data.toolDefinitionsTokens, + tokenLimit: event.data.tokenLimit, + }; + reportUsage(lastUsageInfo.currentTokens, 0); + }))); disposables.add(toDisposable(this._sdkSession.on('assistant.message_delta', (event) => { // Support for streaming delta messages. if (typeof event.data.deltaContent === 'string' && event.data.deltaContent.length) { @@ -723,7 +747,6 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes await this.sendRequestInternal(input, attachments, false, logStartTime); } this.logService.trace(`[CopilotCLISession] Invoking session (completed) ${this.sessionId}`); - const resolvedToolIdEditMap: Record = {}; await Promise.all(Array.from(toolIdEditMap.entries()).map(async ([toolId, editFilePromise]) => { const editId = await editFilePromise.catch(() => undefined); @@ -741,6 +764,9 @@ export class CopilotCLISession extends DisposableStore implements ICopilotCLISes this.logService.error(`[CopilotCLISession] Failed to update chat session metadata store for request ${request.id}`, error); }); } + await updateUsageInfo.catch(error => { + this.logService.error(`[CopilotCLISession] Failed to update usage info after request ${request.id}`, error); + }); this._status = ChatSessionStatus.Completed; this._statusChange.fire(this._status); @@ -1270,3 +1296,41 @@ function isHttpUrl(value: string): boolean { } } +interface UsageInfoData { + readonly currentTokens: number; + readonly systemTokens?: number; + readonly conversationTokens?: number; + readonly toolDefinitionsTokens?: number; + readonly tokenLimit?: number; +} + +function buildPromptTokenDetails(usageInfo: UsageInfoData | undefined): { category: string; label: string; percentageOfPrompt: number }[] | undefined { + if (!usageInfo || usageInfo.currentTokens <= 0) { + return undefined; + } + const details: { category: string; label: string; percentageOfPrompt: number }[] = []; + const total = usageInfo.currentTokens; + if (usageInfo.systemTokens && usageInfo.systemTokens > 0) { + details.push({ + category: PromptTokenCategory.System, + label: PromptTokenLabel.SystemInstructions, + percentageOfPrompt: Math.round((usageInfo.systemTokens / total) * 100), + }); + } + if (usageInfo.toolDefinitionsTokens && usageInfo.toolDefinitionsTokens > 0) { + details.push({ + category: PromptTokenCategory.System, + label: PromptTokenLabel.Tools, + percentageOfPrompt: Math.round((usageInfo.toolDefinitionsTokens / total) * 100), + }); + } + if (usageInfo.conversationTokens && usageInfo.conversationTokens > 0) { + details.push({ + category: PromptTokenCategory.UserContext, + label: PromptTokenLabel.Messages, + percentageOfPrompt: Math.round((usageInfo.conversationTokens / total) * 100), + }); + } + return details.length > 0 ? details : undefined; +} + diff --git a/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts b/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts index a2d384c7dc16f..aea13fb28cce1 100644 --- a/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts +++ b/extensions/copilot/src/extension/chatSessions/copilotcli/node/test/copilotcliSession.spec.ts @@ -133,6 +133,20 @@ class MockSdkSession { async setSelectedModel(model: string, _reasoningEffort?: string) { this._selectedModel = model; } async getEvents() { return []; } getPlanPath(): string | null { return null; } + + usage = { + getMetrics: async () => ({ + lastCallInputTokens: 100, + lastCallOutputTokens: 50, + totalPremiumRequestCost: 0, + totalUserRequests: 1, + totalApiDurationMs: 1000, + sessionStartTime: Date.now(), + codeChanges: { linesAdded: 0, linesRemoved: 0, filesModifiedCount: 0 }, + modelMetrics: {}, + currentModel: this._selectedModel, + }), + }; } function createWorkspaceService(root: string): IWorkspaceService { @@ -158,6 +172,15 @@ function workspaceInfoFor(workingDirectory: Uri | undefined): IWorkspaceInfo { }; } +class UsageCapturingStream extends MockChatResponseStream { + public readonly usages: import('vscode').ChatResultUsage[] = []; + constructor() { + super(); + } + override usage(u: import('vscode').ChatResultUsage): void { + this.usages.push(u); + } +} describe('CopilotCLISession', () => { const disposables = new DisposableStore(); @@ -1227,4 +1250,165 @@ describe('CopilotCLISession', () => { expect(result.value).toEqual({ approved: false }); }); }); + + describe('usage reporting', () => { + it('reports usage from assistant.usage event with per-call tokens', async () => { + sdkSession.send = async (options: any) => { + sdkSession.emit('user.message', { content: options.prompt }); + sdkSession.emit('assistant.usage', { inputTokens: 200, outputTokens: 80 }); + sdkSession.emit('assistant.turn_end', {}); + }; + + const session = await createSession(); + const stream = new UsageCapturingStream(); + session.attachStream(stream); + + await session.handleRequest({ id: 'req-1', toolInvocationToken: undefined as never }, { prompt: 'Hello' }, [], undefined, authInfo, CancellationToken.None); + + const usageFromEvent = stream.usages.find(u => u.promptTokens === 200 && u.completionTokens === 80); + expect(usageFromEvent).toBeDefined(); + }); + + it('reports usage from session.usage_info event immediately', async () => { + sdkSession.send = async (options: any) => { + sdkSession.emit('user.message', { content: options.prompt }); + sdkSession.emit('session.usage_info', { + currentTokens: 500, + tokenLimit: 8000, + messagesLength: 5, + systemTokens: 100, + conversationTokens: 350, + toolDefinitionsTokens: 50, + }); + sdkSession.emit('assistant.turn_end', {}); + }; + + const session = await createSession(); + const stream = new UsageCapturingStream(); + session.attachStream(stream); + + await session.handleRequest({ id: 'req-1', toolInvocationToken: undefined as never }, { prompt: 'Hello' }, [], undefined, authInfo, CancellationToken.None); + + const usageFromInfo = stream.usages.find(u => u.promptTokens === 500); + expect(usageFromInfo).toBeDefined(); + expect(usageFromInfo!.completionTokens).toBe(0); + }); + + it('includes promptTokenDetails breakdown in usage from session.usage_info', async () => { + sdkSession.send = async (options: any) => { + sdkSession.emit('user.message', { content: options.prompt }); + sdkSession.emit('session.usage_info', { + currentTokens: 500, + tokenLimit: 8000, + messagesLength: 5, + systemTokens: 100, + conversationTokens: 350, + toolDefinitionsTokens: 50, + }); + sdkSession.emit('assistant.turn_end', {}); + }; + + const session = await createSession(); + const stream = new UsageCapturingStream(); + session.attachStream(stream); + + await session.handleRequest({ id: 'req-1', toolInvocationToken: undefined as never }, { prompt: 'Hello' }, [], undefined, authInfo, CancellationToken.None); + + const usageFromInfo = stream.usages.find(u => u.promptTokens === 500); + expect(usageFromInfo?.promptTokenDetails).toBeDefined(); + expect(usageFromInfo!.promptTokenDetails).toEqual([ + { category: 'System', label: 'System Instructions', percentageOfPrompt: 20 }, + { category: 'System', label: 'Tool Definitions', percentageOfPrompt: 10 }, + { category: 'User Context', label: 'Messages', percentageOfPrompt: 70 }, + ]); + }); + + it('populates promptTokenDetails in assistant.usage event when usage_info was previously received', async () => { + sdkSession.send = async (options: any) => { + sdkSession.emit('user.message', { content: options.prompt }); + sdkSession.emit('session.usage_info', { + currentTokens: 400, + tokenLimit: 8000, + messagesLength: 4, + systemTokens: 80, + conversationTokens: 280, + toolDefinitionsTokens: 40, + }); + sdkSession.emit('assistant.usage', { inputTokens: 400, outputTokens: 60 }); + sdkSession.emit('assistant.turn_end', {}); + }; + + const session = await createSession(); + const stream = new UsageCapturingStream(); + session.attachStream(stream); + + await session.handleRequest({ id: 'req-1', toolInvocationToken: undefined as never }, { prompt: 'Hello' }, [], undefined, authInfo, CancellationToken.None); + + const assistantUsage = stream.usages.find(u => u.promptTokens === 400 && u.completionTokens === 60); + expect(assistantUsage).toBeDefined(); + expect(assistantUsage!.promptTokenDetails).toBeDefined(); + expect(assistantUsage!.promptTokenDetails!.length).toBeGreaterThan(0); + }); + + it('reports final usage from getMetrics() after session completes', async () => { + sdkSession.usage.getMetrics = async () => ({ + lastCallInputTokens: 350, + lastCallOutputTokens: 90, + totalPremiumRequestCost: 0, + totalUserRequests: 1, + totalApiDurationMs: 500, + sessionStartTime: Date.now(), + codeChanges: { linesAdded: 0, linesRemoved: 0, filesModifiedCount: 0 }, + modelMetrics: {}, + currentModel: 'modelA', + }); + + const session = await createSession(); + const stream = new UsageCapturingStream(); + session.attachStream(stream); + + await session.handleRequest({ id: 'req-1', toolInvocationToken: undefined as never }, { prompt: 'Hello' }, [], undefined, authInfo, CancellationToken.None); + + const finalUsage = stream.usages.at(-1); + expect(finalUsage).toBeDefined(); + expect(finalUsage!.completionTokens).toBe(90); + }); + + it('uses currentTokens from session.usage_info as promptTokens in final usage report (non-zero after compaction)', async () => { + sdkSession.send = async (options: any) => { + sdkSession.emit('user.message', { content: options.prompt }); + // Simulate post-compaction: usage_info fires with reduced token count, no assistant.usage follows + sdkSession.emit('session.usage_info', { + currentTokens: 120, + tokenLimit: 8000, + messagesLength: 2, + systemTokens: 80, + conversationTokens: 40, + toolDefinitionsTokens: 0, + }); + sdkSession.emit('assistant.turn_end', {}); + }; + sdkSession.usage.getMetrics = async () => ({ + lastCallInputTokens: 0, // stale / no new call made + lastCallOutputTokens: 0, + totalPremiumRequestCost: 0, + totalUserRequests: 1, + totalApiDurationMs: 0, + sessionStartTime: Date.now(), + codeChanges: { linesAdded: 0, linesRemoved: 0, filesModifiedCount: 0 }, + modelMetrics: {}, + currentModel: 'modelA', + }); + + const session = await createSession(); + const stream = new UsageCapturingStream(); + session.attachStream(stream); + + await session.handleRequest({ id: 'req-1', toolInvocationToken: undefined as never }, { prompt: 'Hello' }, [], undefined, authInfo, CancellationToken.None); + + // Final usage should use currentTokens (120) not the stale lastCallInputTokens (0) + const finalUsage = stream.usages.at(-1); + expect(finalUsage!.promptTokens).toBe(120); + }); + }); });