Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please don't edit this file

Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release Notes

## [unreleased]

Integrations:

* Capture Gemini `cache_read`, `thoughts` and `tool_use_prompt` tokens in `instrument_google_genai`; compute `operation.cost` via `genai-prices` when available.

## [v4.33.0] (2026-05-13)

CLI:
Expand Down
15 changes: 15 additions & 0 deletions docs/integrations/llms/google-genai.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,18 @@ This creates a span which shows the conversation in the Logfire UI:
to `true`, the spans will simply contain `<elided>` where the prompts and completions would be.

[`logfire.instrument_google_genai()`][logfire.Logfire.instrument_google_genai] uses the `GoogleGenAiSdkInstrumentor().instrument()` method of the [`opentelemetry-instrumentation-google-genai`](https://pypi.org/project/opentelemetry-instrumentation-google-genai/) package.

## Token usage details

When a span captures a Gemini call via `logfire.instrument_google_genai()`, the
following attributes may appear depending on the response:

- `gen_ai.usage.input_tokens` — total prompt tokens (already includes cached, see below)
- `gen_ai.usage.output_tokens` — completion tokens
- `gen_ai.usage.cache_read.input_tokens` — tokens served from context cache (cache hit)
- `gen_ai.usage.details.thoughts_tokens` — reasoning tokens (Gemini 2.5 / 3.x)
- `gen_ai.usage.details.tool_use_prompt_tokens` — tokens used for tool definitions
- `operation.cost` — calculated price in USD (requires [`genai-prices`](https://pypi.org/project/genai-prices/))

Note that, unlike Anthropic, the Gemini API's `prompt_token_count` already includes
the cached tokens; Logfire does not sum them again.
62 changes: 62 additions & 0 deletions logfire/_internal/integrations/google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,68 @@ def wrapped_to_dict(obj: object) -> object:
pass


try:
from opentelemetry.instrumentation.google_genai import generate_content as _gc_module

_Helper = _gc_module._GenerateContentInstrumentationHelper # pyright: ignore[reportPrivateUsage]
_original_maybe_update = _Helper._maybe_update_token_counts # pyright: ignore[reportPrivateUsage]
_original_create_final = _Helper.create_final_attributes

def _wrapped_maybe_update_token_counts(self: Any, response: Any) -> None:
_original_maybe_update(self, response)
try:
metadata = getattr(response, 'usage_metadata', None)
if metadata is None:
return
# "keep last non-zero" — streaming sends partial chunks; cached/thoughts/tool_use
# counts typically only appear in the final chunk.
if cached := getattr(metadata, 'cached_content_token_count', None):
self._lf_cache_read = cached
if thoughts := getattr(metadata, 'thoughts_token_count', None):
self._lf_thoughts = thoughts
if tool_use := getattr(metadata, 'tool_use_prompt_token_count', None):
self._lf_tool_use_prompt = tool_use
self._lf_response = response

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like _lf_response is the only thing that needs to be stored, and the rest can be retrieved in _wrapped_create_final_attributes

except Exception: # pragma: no cover
pass

def _wrapped_create_final_attributes(self: Any) -> dict[str, Any]:
attrs = _original_create_final(self)
try:
if cached := getattr(self, '_lf_cache_read', None):
attrs['gen_ai.usage.cache_read.input_tokens'] = cached
if thoughts := getattr(self, '_lf_thoughts', None):
attrs['gen_ai.usage.details.thoughts_tokens'] = thoughts
if tool_use := getattr(self, '_lf_tool_use_prompt', None):
attrs['gen_ai.usage.details.tool_use_prompt_tokens'] = tool_use
response = getattr(self, '_lf_response', None)
if response is not None:
try:
from genai_prices import calc_price, extract_usage

# genai_prices expects the camelCase JSON keys ('usageMetadata', 'modelVersion');
# google-genai pydantic models use snake_case fields with camelCase aliases.
usage_data = extract_usage(response.model_dump(by_alias=True), provider_id='google')
if usage_data.model is not None:
attrs['operation.cost'] = float(
calc_price(
usage_data.usage,
model_ref=usage_data.model.id,
provider_id='google',
).total_price
)
except Exception:
pass
except Exception: # pragma: no cover
pass
return attrs

_Helper._maybe_update_token_counts = _wrapped_maybe_update_token_counts # pyright: ignore[reportPrivateUsage]
_Helper.create_final_attributes = _wrapped_create_final_attributes
except Exception: # pragma: no cover
pass


Part: TypeAlias = 'dict[str, Any] | str'


Expand Down
155 changes: 155 additions & 0 deletions tests/otel_integrations/test_google_genai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import warnings
from typing import Any
from unittest import mock
from unittest.mock import patch

Expand Down Expand Up @@ -108,6 +109,7 @@ def get_current_weather(location: str) -> str:
'gen_ai.usage.input_tokens': 58,
'gen_ai.usage.output_tokens': 9,
'gen_ai.response.finish_reasons': ('stop',),
'operation.cost': 9.4e-06,
'logfire.metrics': IsPartialDict(),
'events': [
{'content': 'help', 'role': 'system'},
Expand Down Expand Up @@ -200,6 +202,7 @@ def get_current_weather(location: str) -> str:
'gen_ai.usage.input_tokens': 39,
'gen_ai.usage.output_tokens': 7,
'gen_ai.response.finish_reasons': ('stop',),
'operation.cost': 6.7e-06,
'logfire.metrics': IsPartialDict(),
'events': [
{'content': '<elided>', 'role': 'user'},
Expand Down Expand Up @@ -258,6 +261,8 @@ class ResponseData(pydantic.BaseModel):
'gen_ai.usage.input_tokens': 2,
'gen_ai.usage.output_tokens': 13,
'gen_ai.response.finish_reasons': ('stop',),
'gen_ai.usage.details.thoughts_tokens': 58,
'operation.cost': 0.0001781,
'logfire.metrics': IsPartialDict(),
'events': [
{'content': 'Hi', 'role': 'user'},
Expand All @@ -278,6 +283,156 @@ class ResponseData(pydantic.BaseModel):
)


def _stub_generate_content(response: Any) -> Any:
def _generate(self: Any, **kwargs: Any) -> Any:
return response

return _generate


def _build_fake_genai_response(

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is extra mocking needed? can we stick to vcr for the new tests?

*,
model_version: str = 'gemini-2.5-flash',
prompt_token_count: int = 1000,
candidates_token_count: int = 200,
cached_content_token_count: int | None = None,
thoughts_token_count: int | None = None,
tool_use_prompt_token_count: int | None = None,
):
from google.genai.types import (
Candidate,
Content,
FinishReason,
GenerateContentResponse,
GenerateContentResponseUsageMetadata,
Part,
)

return GenerateContentResponse(
model_version=model_version,
usage_metadata=GenerateContentResponseUsageMetadata(
prompt_token_count=prompt_token_count,
candidates_token_count=candidates_token_count,
cached_content_token_count=cached_content_token_count,
thoughts_token_count=thoughts_token_count,
tool_use_prompt_token_count=tool_use_prompt_token_count,
total_token_count=(prompt_token_count or 0) + (candidates_token_count or 0),
),
candidates=[
Candidate(
content=Content(parts=[Part.from_text(text='hi back')], role='model'),
finish_reason=FinishReason.STOP,
)
],
)


@pytest.fixture
def reset_google_genai_instrumentation():
"""Force re-instrumentation so monkeypatched `Models.generate_content` is captured.

The upstream `_MethodsSnapshot` captures `Models.generate_content` at instrument
time. The instrumentor is a process-wide singleton with an
`is_instrumented_by_opentelemetry` flag that gates re-instrumentation. We clear
the flag (the proper `uninstrument()` call asserts on a snapshot that the
upstream `__init__` resets to None on every `GoogleGenAiSdkInstrumentor()` call,
which makes it unreliable in a test suite) so the next `instrument()` call
re-creates the snapshot and picks up the mock.
"""
from opentelemetry.instrumentation.google_genai import GoogleGenAiSdkInstrumentor

instrumentor = GoogleGenAiSdkInstrumentor()
instrumentor._is_instrumented_by_opentelemetry = False # pyright: ignore[reportPrivateUsage]
yield
instrumentor._is_instrumented_by_opentelemetry = False # pyright: ignore[reportPrivateUsage]


def test_instrument_google_genai_cache_and_thinking_tokens(
exporter: TestExporter, monkeypatch: pytest.MonkeyPatch, reset_google_genai_instrumentation: None
) -> None:
from google.genai import Client
from google.genai.models import Models

fake_response = _build_fake_genai_response(
prompt_token_count=1000,
candidates_token_count=200,
cached_content_token_count=750,
thoughts_token_count=80,
tool_use_prompt_token_count=30,
)
monkeypatch.setattr(Models, 'generate_content', _stub_generate_content(fake_response))

logfire.instrument_google_genai()

client = Client(api_key='fake')
client.models.generate_content(model='gemini-2.5-flash', contents='hi') # type: ignore

[span] = exporter.exported_spans_as_dict(parse_json_attributes=True)
attrs = span['attributes']
assert attrs['gen_ai.usage.input_tokens'] == 1000
assert attrs['gen_ai.usage.output_tokens'] == 200
assert attrs['gen_ai.usage.cache_read.input_tokens'] == 750
assert attrs['gen_ai.usage.details.thoughts_tokens'] == 80
assert attrs['gen_ai.usage.details.tool_use_prompt_tokens'] == 30
# operation.cost depends on the current Gemini 2.5 Flash pricing table in
# genai_prices; just confirm it was computed and is a sensible positive value.
assert isinstance(attrs['operation.cost'], float)
assert attrs['operation.cost'] > 0


def test_instrument_google_genai_no_cache_metadata(
exporter: TestExporter, monkeypatch: pytest.MonkeyPatch, reset_google_genai_instrumentation: None
) -> None:
from google.genai import Client
from google.genai.models import Models

fake_response = _build_fake_genai_response(
prompt_token_count=58,
candidates_token_count=9,
)
monkeypatch.setattr(Models, 'generate_content', _stub_generate_content(fake_response))

logfire.instrument_google_genai()

client = Client(api_key='fake')
client.models.generate_content(model='gemini-2.5-flash', contents='hi') # type: ignore

[span] = exporter.exported_spans_as_dict(parse_json_attributes=True)
attrs = span['attributes']
assert 'gen_ai.usage.cache_read.input_tokens' not in attrs
assert 'gen_ai.usage.details.thoughts_tokens' not in attrs
assert 'gen_ai.usage.details.tool_use_prompt_tokens' not in attrs
assert attrs['gen_ai.usage.input_tokens'] == 58
assert attrs['gen_ai.usage.output_tokens'] == 9


def test_instrument_google_genai_cost_silent_failure(
exporter: TestExporter, monkeypatch: pytest.MonkeyPatch, reset_google_genai_instrumentation: None
) -> None:
from google.genai import Client
from google.genai.models import Models

fake_response = _build_fake_genai_response(
model_version='gemini-unknown-999',
prompt_token_count=1000,
candidates_token_count=200,
cached_content_token_count=750,
thoughts_token_count=80,
)
monkeypatch.setattr(Models, 'generate_content', _stub_generate_content(fake_response))

logfire.instrument_google_genai()

client = Client(api_key='fake')
client.models.generate_content(model='gemini-unknown-999', contents='hi') # type: ignore

[span] = exporter.exported_spans_as_dict(parse_json_attributes=True)
attrs = span['attributes']
assert 'operation.cost' not in attrs
assert attrs['gen_ai.usage.cache_read.input_tokens'] == 750
assert attrs['gen_ai.usage.details.thoughts_tokens'] == 80


def test_span_event_logger_with_none_parts(exporter: TestExporter) -> None:
"""Test that SpanEventLogger handles parts=None gracefully.

Expand Down
Loading