From 999ecc97c4e8a3eb5de285bbeeb6cdcc1c615a85 Mon Sep 17 00:00:00 2001 From: Sean Turner Date: Sun, 19 Apr 2026 09:50:35 +0100 Subject: [PATCH 1/5] fix: recover from BadRequestError caused by oversized conversation payloads When tool results (e.g. large config files, CIS benchmark YAML) accumulate in conversation history, the serialized payload can exceed the provider's request size limit, causing a persistent HTTP 400. Previously this was not retried, failing the scan immediately. Now handles BadRequestError in two stages: 1. Bare retry after 2s (transient 400s from provider hiccups) 2. If STRIX_TRUNCATE_ON_OVERSIZE=true, truncates the largest tool_result XML blocks to 1000 chars with a "requires manual review" notice, then retries. This is opt-in to avoid lossy recovery for users who don't want it. Observed in practice on repos with 50KB+ CIS defaults YAML and 94KB tfvars.json files hitting Bedrock payload limits at ~6M tokens. Co-Authored-By: Claude Opus 4.6 --- strix/llm/llm.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 4f624956a..57790e40b 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -159,12 +159,28 @@ async def generate( messages = self._prepare_messages(conversation_history) max_retries = int(Config.get("strix_llm_max_retries") or "5") + bad_request_retried = False + bad_request_truncated = False + for attempt in range(max_retries + 1): try: async for response in self._stream(messages): yield response return # noqa: TRY300 except Exception as e: # noqa: BLE001 + if self._is_bad_request(e): + if not bad_request_retried: + bad_request_retried = True + await asyncio.sleep(2) + continue + truncate_enabled = Config.get("strix_truncate_on_oversize") or "" + if ( + not bad_request_truncated + and truncate_enabled.lower() in ("1", "true", "yes") + and self._truncate_large_tool_results(messages) + ): + bad_request_truncated = True + continue if attempt >= max_retries or not self._should_retry(e): self._raise_error(e) wait = min(90, 2 * (2**attempt)) @@ -314,6 +330,53 @@ def _extract_cost(self, response: Any) -> float: except Exception: # noqa: BLE001 return 0.0 + @staticmethod + def _truncate_large_tool_results( + messages: list[dict[str, Any]], max_chars: int = 2000 + ) -> bool: + """Aggressively truncate large tool results in messages to recover from BadRequestError. + + Scans messages in reverse for tool_result XML blocks that exceed max_chars and + replaces their content with a truncated version plus a skip notice. Returns True + if any truncation was performed (caller should retry the request). + """ + import re + + truncated_any = False + pattern = re.compile( + r"(\s*[^<]*\s*)(.*?)(\s*)", + re.DOTALL, + ) + + for msg in reversed(messages): + content = msg.get("content") + if not isinstance(content, str) or "" not in content: + continue + + def _truncate_match(m: re.Match) -> str: + prefix, body, suffix = m.group(1), m.group(2), m.group(3) + if len(body) <= max_chars: + return m.group(0) + nonlocal truncated_any + truncated_any = True + kept = body[:1000] + return ( + f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars " + f"due to request size limit — file requires manual review] ...{suffix}" + ) + + msg["content"] = pattern.sub(_truncate_match, content) + if truncated_any: + break + + return truncated_any + + def _is_bad_request(self, e: Exception) -> bool: + code = getattr(e, "status_code", None) or getattr( + getattr(e, "response", None), "status_code", None + ) + return code == 400 + def _should_retry(self, e: Exception) -> bool: code = getattr(e, "status_code", None) or getattr( getattr(e, "response", None), "status_code", None From 4a213b17a2cb1f0f30412c9bde19ca00a0278753 Mon Sep 17 00:00:00 2001 From: Sean Turner Date: Mon, 20 Apr 2026 16:54:04 +0100 Subject: [PATCH 2/5] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20last-attempt=20guard,=20module-level=20import,=20cl?= =?UTF-8?q?osure=20scope?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- strix/llm/llm.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 57790e40b..6e951e2a0 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -1,4 +1,5 @@ import asyncio +import re from collections.abc import AsyncIterator from dataclasses import dataclass from typing import Any @@ -171,6 +172,8 @@ async def generate( if self._is_bad_request(e): if not bad_request_retried: bad_request_retried = True + if attempt >= max_retries: + self._raise_error(e) await asyncio.sleep(2) continue truncate_enabled = Config.get("strix_truncate_on_oversize") or "" @@ -180,6 +183,8 @@ async def generate( and self._truncate_large_tool_results(messages) ): bad_request_truncated = True + if attempt >= max_retries: + self._raise_error(e) continue if attempt >= max_retries or not self._should_retry(e): self._raise_error(e) @@ -339,32 +344,33 @@ def _truncate_large_tool_results( Scans messages in reverse for tool_result XML blocks that exceed max_chars and replaces their content with a truncated version plus a skip notice. Returns True if any truncation was performed (caller should retry the request). - """ - import re + Note: All oversized tool_result blocks within a single message are truncated + in one pass — this is intentional to maximise payload size reduction per retry. + """ truncated_any = False pattern = re.compile( r"(\s*[^<]*\s*)(.*?)(\s*)", re.DOTALL, ) + def _truncate_match(m: re.Match) -> str: + nonlocal truncated_any + prefix, body, suffix = m.group(1), m.group(2), m.group(3) + if len(body) <= max_chars: + return m.group(0) + truncated_any = True + kept = body[:1000] + return ( + f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars " + f"due to request size limit — file requires manual review] ...{suffix}" + ) + for msg in reversed(messages): content = msg.get("content") if not isinstance(content, str) or "" not in content: continue - def _truncate_match(m: re.Match) -> str: - prefix, body, suffix = m.group(1), m.group(2), m.group(3) - if len(body) <= max_chars: - return m.group(0) - nonlocal truncated_any - truncated_any = True - kept = body[:1000] - return ( - f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars " - f"due to request size limit — file requires manual review] ...{suffix}" - ) - msg["content"] = pattern.sub(_truncate_match, content) if truncated_any: break From e28350e2c43f89e4e0fd3356ea2b31113d6d9b6f Mon Sep 17 00:00:00 2001 From: Sean Turner Date: Fri, 24 Apr 2026 09:23:49 +0100 Subject: [PATCH 3/5] fix: allow repeated truncation passes for multi-message oversized payloads Remove one-shot bad_request_truncated guard so truncation retries on each 400 until nothing remains to truncate. Also scan all messages per pass instead of stopping at the first hit. Addresses review feedback from Greptile on #460. Co-Authored-By: Claude Opus 4.6 --- strix/llm/llm.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 6e951e2a0..a1a6012eb 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -161,7 +161,6 @@ async def generate( max_retries = int(Config.get("strix_llm_max_retries") or "5") bad_request_retried = False - bad_request_truncated = False for attempt in range(max_retries + 1): try: @@ -178,11 +177,9 @@ async def generate( continue truncate_enabled = Config.get("strix_truncate_on_oversize") or "" if ( - not bad_request_truncated - and truncate_enabled.lower() in ("1", "true", "yes") + truncate_enabled.lower() in ("1", "true", "yes") and self._truncate_large_tool_results(messages) ): - bad_request_truncated = True if attempt >= max_retries: self._raise_error(e) continue @@ -339,14 +336,10 @@ def _extract_cost(self, response: Any) -> float: def _truncate_large_tool_results( messages: list[dict[str, Any]], max_chars: int = 2000 ) -> bool: - """Aggressively truncate large tool results in messages to recover from BadRequestError. + """Truncate large tool_result XML blocks to recover from BadRequestError. - Scans messages in reverse for tool_result XML blocks that exceed max_chars and - replaces their content with a truncated version plus a skip notice. Returns True - if any truncation was performed (caller should retry the request). - - Note: All oversized tool_result blocks within a single message are truncated - in one pass — this is intentional to maximise payload size reduction per retry. + Scans all messages for tool_result blocks exceeding max_chars and truncates them. + Called repeatedly on each 400 until it returns False (nothing left to truncate). """ truncated_any = False pattern = re.compile( @@ -370,10 +363,7 @@ def _truncate_match(m: re.Match) -> str: content = msg.get("content") if not isinstance(content, str) or "" not in content: continue - msg["content"] = pattern.sub(_truncate_match, content) - if truncated_any: - break return truncated_any From 6e25294bc81421d8c56bc4a185e77da7b065e5cb Mon Sep 17 00:00:00 2001 From: Sean Turner Date: Mon, 27 Apr 2026 18:31:31 +0100 Subject: [PATCH 4/5] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20split=20threshold/target,=20sleep=20before=20trunca?= =?UTF-8?q?tion=20retry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename max_chars → threshold_chars and add separate truncate_to_chars parameter so the threshold and truncation target can differ by design, replacing the hardcoded 1000. Callers can now shrink blocks aggressively without re-processing blocks that are already acceptable. - Add 2s sleep before the truncation-path `continue` to match the bare-retry pacing. If Bedrock is throttling after the original 400, immediately hitting it again risks a second rejection before the truncated payload is evaluated. --- strix/llm/llm.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index a1a6012eb..6a8ffb222 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -182,6 +182,10 @@ async def generate( ): if attempt >= max_retries: self._raise_error(e) + # Pace the provider — matches the 2s sleep on the bare-retry + # path so a throttled provider isn't hit back-to-back after the + # original 400. + await asyncio.sleep(2) continue if attempt >= max_retries or not self._should_retry(e): self._raise_error(e) @@ -334,12 +338,21 @@ def _extract_cost(self, response: Any) -> float: @staticmethod def _truncate_large_tool_results( - messages: list[dict[str, Any]], max_chars: int = 2000 + messages: list[dict[str, Any]], + threshold_chars: int = 2000, + truncate_to_chars: int = 1000, ) -> bool: """Truncate large tool_result XML blocks to recover from BadRequestError. - Scans all messages for tool_result blocks exceeding max_chars and truncates them. - Called repeatedly on each 400 until it returns False (nothing left to truncate). + Scans all messages for tool_result blocks whose body exceeds threshold_chars + and shrinks them to truncate_to_chars. Called repeatedly on each 400 until it + returns False (nothing left to truncate). + + threshold_chars and truncate_to_chars are independent: the threshold decides + which blocks qualify for truncation, and truncate_to_chars is the size of the + retained prefix. They are not the same value to allow aggressive shrinking of + blocks that are well over the threshold without re-processing blocks that are + already acceptable. """ truncated_any = False pattern = re.compile( @@ -350,10 +363,10 @@ def _truncate_large_tool_results( def _truncate_match(m: re.Match) -> str: nonlocal truncated_any prefix, body, suffix = m.group(1), m.group(2), m.group(3) - if len(body) <= max_chars: + if len(body) <= threshold_chars: return m.group(0) truncated_any = True - kept = body[:1000] + kept = body[:truncate_to_chars] return ( f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars " f"due to request size limit — file requires manual review] ...{suffix}" From 39c84d368593883dc1cbb4891de7467b4aba6c49 Mon Sep 17 00:00:00 2001 From: bearsyankees Date: Mon, 27 Apr 2026 17:31:14 -0400 Subject: [PATCH 5/5] Fix truncate list formatting and tool result pattern caching --- strix/llm/llm.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 6a8ffb222..5cabf227e 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -27,6 +27,12 @@ litellm.modify_params = True +_TOOL_RESULT_PATTERN = re.compile( + r"(\s*[^<]*\s*)(.*?)(\s*)", + re.DOTALL, +) + + class LLMRequestFailedError(Exception): def __init__(self, message: str, details: str | None = None): super().__init__(message) @@ -355,10 +361,6 @@ def _truncate_large_tool_results( already acceptable. """ truncated_any = False - pattern = re.compile( - r"(\s*[^<]*\s*)(.*?)(\s*)", - re.DOTALL, - ) def _truncate_match(m: re.Match) -> str: nonlocal truncated_any @@ -374,9 +376,17 @@ def _truncate_match(m: re.Match) -> str: for msg in reversed(messages): content = msg.get("content") - if not isinstance(content, str) or "" not in content: - continue - msg["content"] = pattern.sub(_truncate_match, content) + + if isinstance(content, list): + for block in content: + if ( + block.get("type") == "text" + and isinstance(block.get("text"), str) + and "" in block["text"] + ): + block["text"] = _TOOL_RESULT_PATTERN.sub(_truncate_match, block["text"]) + elif isinstance(content, str) and "" in content: + msg["content"] = _TOOL_RESULT_PATTERN.sub(_truncate_match, content) return truncated_any