diff --git a/.changeset/fix-gemini-thought-signature-part-level.md b/.changeset/fix-gemini-thought-signature-part-level.md new file mode 100644 index 000000000..3d3659b44 --- /dev/null +++ b/.changeset/fix-gemini-thought-signature-part-level.md @@ -0,0 +1,12 @@ +--- +'@tanstack/ai-gemini': patch +--- + +fix(ai-gemini): read/write thoughtSignature at Part level + +Gemini emits `thoughtSignature` as a Part-level sibling of `functionCall` (per the `@google/genai` `Part` type definition), not nested inside `functionCall`. The `FunctionCall` type has never had a `thoughtSignature` property. The adapter was reading from `functionCall.thoughtSignature` (which doesn't exist in the SDK types) and writing it back nested inside `functionCall`, causing Gemini 3.x to reject subsequent tool-call turns with `400 INVALID_ARGUMENT: "Function call is missing a thought_signature"`. + +This fix: + +- **Read side:** reads `part.thoughtSignature` directly, using the SDK's typed `Part` interface +- **Write side:** emits `thoughtSignature` as a Part-level sibling of `functionCall`, using the SDK's typed `Part` interface diff --git a/packages/typescript/ai-gemini/src/adapters/text.ts b/packages/typescript/ai-gemini/src/adapters/text.ts index 0ed39cebf..43fe25de1 100644 --- a/packages/typescript/ai-gemini/src/adapters/text.ts +++ b/packages/typescript/ai-gemini/src/adapters/text.ts @@ -351,6 +351,11 @@ export class GeminiTextAdapter< `${functionCall.name}_${Date.now()}_${nextToolIndex}` const functionArgs = functionCall.args || {} + // Gemini emits thoughtSignature as a Part-level sibling of + // functionCall (per @google/genai Part type), not nested inside + // functionCall itself. + const partThoughtSignature = part.thoughtSignature || undefined + let toolCallData = toolCallMap.get(toolCallId) if (!toolCallData) { toolCallData = { @@ -361,11 +366,13 @@ export class GeminiTextAdapter< : JSON.stringify(functionArgs), index: nextToolIndex++, started: false, - thoughtSignature: - (functionCall as any).thoughtSignature || undefined, + thoughtSignature: partThoughtSignature, } toolCallMap.set(toolCallId, toolCallData) } else { + if (!toolCallData.thoughtSignature && partThoughtSignature) { + toolCallData.thoughtSignature = partThoughtSignature + } try { const existingArgs = JSON.parse(toolCallData.args) const newArgs = @@ -675,14 +682,21 @@ export class GeminiTextAdapter< const thoughtSignature = toolCall.providerMetadata ?.thoughtSignature as string | undefined - parts.push({ + // Gemini requires thoughtSignature at the Part level (sibling of + // functionCall), not nested inside functionCall. Nesting it causes + // the API to reject the next turn with + // "Function call is missing a thought_signature". + const part: Part = { functionCall: { id: toolCall.id, name: toolCall.function.name, args: parsedArgs, - ...(thoughtSignature && { thoughtSignature }), - } as any, - }) + }, + } + if (thoughtSignature) { + part.thoughtSignature = thoughtSignature + } + parts.push(part) } } diff --git a/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts b/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts index 9ad04e530..3da5d4c20 100644 --- a/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts +++ b/packages/typescript/ai-gemini/tests/gemini-adapter.test.ts @@ -502,10 +502,11 @@ describe('GeminiAdapter through AI', () => { expect(textParts[0].text).toBe("what's a good electric guitar?") }) - it('preserves thoughtSignature in functionCall parts when sending history back to Gemini', async () => { + it('reads Part-level thoughtSignature from Gemini 3.x streaming response', async () => { const thoughtSig = 'base64-encoded-thought-signature-xyz' - // First stream: model returns a function call with a thoughtSignature (thinking model) + // Gemini 3.x emits thoughtSignature at the Part level, as a sibling of + // functionCall (per @google/genai Part type), not nested inside functionCall. const firstStream = [ { candidates: [ @@ -513,11 +514,11 @@ describe('GeminiAdapter through AI', () => { content: { parts: [ { + thoughtSignature: thoughtSig, functionCall: { id: 'fc_001', name: 'sum_tool', args: { numbers: [1, 2, 5] }, - thoughtSignature: thoughtSig, }, }, ], @@ -533,7 +534,6 @@ describe('GeminiAdapter through AI', () => { }, ] - // Second stream: model returns the final answer const secondStream = [ { candidates: [ @@ -587,8 +587,92 @@ describe('GeminiAdapter through AI', () => { const functionCallPart = modelTurn.parts.find((p: any) => p.functionCall) expect(functionCallPart).toBeDefined() expect(functionCallPart.functionCall.name).toBe('sum_tool') - // The thoughtSignature must be preserved in the model turn's functionCall - expect(functionCallPart.functionCall.thoughtSignature).toBe(thoughtSig) + // thoughtSignature must be at the Part level, NOT nested in functionCall + expect(functionCallPart.thoughtSignature).toBe(thoughtSig) + expect(functionCallPart.functionCall.thoughtSignature).toBeUndefined() + }) + + it('ignores thoughtSignature nested inside functionCall (not part of @google/genai Part type)', async () => { + // The @google/genai SDK has never typed thoughtSignature on FunctionCall; + // it only exists on Part. A nested value should be ignored. + const firstStream = [ + { + candidates: [ + { + content: { + parts: [ + { + functionCall: { + id: 'fc_nested', + name: 'sum_tool', + args: { numbers: [3, 4] }, + thoughtSignature: 'should-be-ignored', + }, + }, + ], + }, + finishReason: 'STOP', + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ] + + const secondStream = [ + { + candidates: [ + { + content: { parts: [{ text: 'The sum is 7.' }] }, + finishReason: 'STOP', + }, + ], + usageMetadata: { + promptTokenCount: 20, + candidatesTokenCount: 5, + totalTokenCount: 25, + }, + }, + ] + + mocks.generateContentStreamSpy + .mockResolvedValueOnce(createStream(firstStream)) + .mockResolvedValueOnce(createStream(secondStream)) + + const adapter = createTextAdapter() + + const sumTool: Tool = { + name: 'sum_tool', + description: 'Sums an array of numbers.', + execute: async (input: any) => ({ + result: input.numbers.reduce((a: number, b: number) => a + b, 0), + }), + } + + for await (const _ of chat({ + adapter, + tools: [sumTool], + messages: [{ role: 'user', content: 'What is 3 + 4?' }], + })) { + /* consume stream */ + } + + expect(mocks.generateContentStreamSpy).toHaveBeenCalledTimes(2) + + const [secondPayload] = mocks.generateContentStreamSpy.mock.calls[1] + const modelTurn = secondPayload.contents.find( + (c: any) => c.role === 'model', + ) + expect(modelTurn).toBeDefined() + + const functionCallPart = modelTurn.parts.find((p: any) => p.functionCall) + expect(functionCallPart).toBeDefined() + // No thoughtSignature should be emitted since none was at Part level + expect(functionCallPart.thoughtSignature).toBeUndefined() + expect(functionCallPart.functionCall.thoughtSignature).toBeUndefined() }) it('uses function name (not toolCallId) in functionResponse and preserves the call id', async () => {