Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions strix/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,36 @@ async def generate(
messages = self._prepare_messages(conversation_history)
max_retries = int(Config.get("strix_llm_max_retries") or "5")

transient_thinking_retries = 0
max_transient_thinking_retries = 6

for attempt in range(max_retries + 1):
try:
async for response in self._stream(messages):
yield response
return # noqa: TRY300
except Exception as e: # noqa: BLE001
# Bedrock occasionally returns a 400 claiming the assistant's thinking
# blocks have been modified — even when the payload is structurally
# valid and the same payload succeeds on replay. Observed on
# claude-sonnet-4-6 with adaptive thinking. Retry with exponential
# backoff (5s, 10s, 20s, 40s, 80s, 160s — ~5 min total budget), using
# an inner loop so the transient retry counter does not share slots
# with the outer max_retries budget. Once the inner budget is
# exhausted (or a non-transient error is raised), fall through to the
# generic retry path below.
while (
self._is_transient_thinking_error(e)
and transient_thinking_retries < max_transient_thinking_retries
):
transient_thinking_retries += 1
await asyncio.sleep(min(240, 5 * (2 ** (transient_thinking_retries - 1))))
try:
async for response in self._stream(messages):
yield response
return # noqa: TRY300
except Exception as e2: # noqa: BLE001
e = e2
if attempt >= max_retries or not self._should_retry(e):
self._raise_error(e)
wait = min(90, 2 * (2**attempt))
Expand Down Expand Up @@ -323,6 +347,22 @@ def _extract_cost(self, response: Any) -> float:
except Exception: # noqa: BLE001
return 0.0

@staticmethod
def _is_transient_thinking_error(e: Exception) -> bool:
"""Detect Bedrock's transient 'thinking blocks cannot be modified' 400.

Observed on claude-sonnet-4-6 with adaptive thinking: Bedrock occasionally
rejects a well-formed payload with this error, and the identical payload
succeeds on replay. Treat it as transient rather than a structural issue.
"""
code = getattr(e, "status_code", None) or getattr(
getattr(e, "response", None), "status_code", None
)
if code != 400:
return False
message = str(e).lower()
return "thinking" in message and "cannot be modified" in message

def _should_retry(self, e: Exception) -> bool:
code = getattr(e, "status_code", None) or getattr(
getattr(e, "response", None), "status_code", None
Expand Down