From 999ecc97c4e8a3eb5de285bbeeb6cdcc1c615a85 Mon Sep 17 00:00:00 2001
From: Sean Turner <seanturner83@gmail.com>
Date: Sun, 19 Apr 2026 09:50:35 +0100
Subject: [PATCH 1/5] fix: recover from BadRequestError caused by oversized
 conversation payloads

When tool results (e.g. large config files, CIS benchmark YAML) accumulate
in conversation history, the serialized payload can exceed the provider's
request size limit, causing a persistent HTTP 400. Previously this was not
retried, failing the scan immediately.

Now handles BadRequestError in two stages:
1. Bare retry after 2s (transient 400s from provider hiccups)
2. If STRIX_TRUNCATE_ON_OVERSIZE=true, truncates the largest tool_result
   XML blocks to 1000 chars with a "requires manual review" notice, then
   retries. This is opt-in to avoid lossy recovery for users who don't
   want it.

Observed in practice on repos with 50KB+ CIS defaults YAML and 94KB
tfvars.json files hitting Bedrock payload limits at ~6M tokens.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 strix/llm/llm.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index 4f624956a..57790e40b 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -159,12 +159,28 @@ async def generate(
         messages = self._prepare_messages(conversation_history)
         max_retries = int(Config.get("strix_llm_max_retries") or "5")
 
+        bad_request_retried = False
+        bad_request_truncated = False
+
         for attempt in range(max_retries + 1):
             try:
                 async for response in self._stream(messages):
                     yield response
                 return  # noqa: TRY300
             except Exception as e:  # noqa: BLE001
+                if self._is_bad_request(e):
+                    if not bad_request_retried:
+                        bad_request_retried = True
+                        await asyncio.sleep(2)
+                        continue
+                    truncate_enabled = Config.get("strix_truncate_on_oversize") or ""
+                    if (
+                        not bad_request_truncated
+                        and truncate_enabled.lower() in ("1", "true", "yes")
+                        and self._truncate_large_tool_results(messages)
+                    ):
+                        bad_request_truncated = True
+                        continue
                 if attempt >= max_retries or not self._should_retry(e):
                     self._raise_error(e)
                 wait = min(90, 2 * (2**attempt))
@@ -314,6 +330,53 @@ def _extract_cost(self, response: Any) -> float:
         except Exception:  # noqa: BLE001
             return 0.0
 
+    @staticmethod
+    def _truncate_large_tool_results(
+        messages: list[dict[str, Any]], max_chars: int = 2000
+    ) -> bool:
+        """Aggressively truncate large tool results in messages to recover from BadRequestError.
+
+        Scans messages in reverse for tool_result XML blocks that exceed max_chars and
+        replaces their content with a truncated version plus a skip notice. Returns True
+        if any truncation was performed (caller should retry the request).
+        """
+        import re
+
+        truncated_any = False
+        pattern = re.compile(
+            r"(<tool_result>\s*<tool_name>[^<]*</tool_name>\s*<result>)(.*?)(</result>\s*</tool_result>)",
+            re.DOTALL,
+        )
+
+        for msg in reversed(messages):
+            content = msg.get("content")
+            if not isinstance(content, str) or "<tool_result>" not in content:
+                continue
+
+            def _truncate_match(m: re.Match) -> str:
+                prefix, body, suffix = m.group(1), m.group(2), m.group(3)
+                if len(body) <= max_chars:
+                    return m.group(0)
+                nonlocal truncated_any
+                truncated_any = True
+                kept = body[:1000]
+                return (
+                    f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars "
+                    f"due to request size limit — file requires manual review] ...{suffix}"
+                )
+
+            msg["content"] = pattern.sub(_truncate_match, content)
+            if truncated_any:
+                break
+
+        return truncated_any
+
+    def _is_bad_request(self, e: Exception) -> bool:
+        code = getattr(e, "status_code", None) or getattr(
+            getattr(e, "response", None), "status_code", None
+        )
+        return code == 400
+
     def _should_retry(self, e: Exception) -> bool:
         code = getattr(e, "status_code", None) or getattr(
             getattr(e, "response", None), "status_code", None

From 4a213b17a2cb1f0f30412c9bde19ca00a0278753 Mon Sep 17 00:00:00 2001
From: Sean Turner <seanturner83@gmail.com>
Date: Mon, 20 Apr 2026 16:54:04 +0100
Subject: [PATCH 2/5] =?UTF-8?q?fix:=20address=20review=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20last-attempt=20guard,=20module-level=20import,=20cl?=
 =?UTF-8?q?osure=20scope?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 strix/llm/llm.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index 57790e40b..6e951e2a0 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -1,4 +1,5 @@
 import asyncio
+import re
 from collections.abc import AsyncIterator
 from dataclasses import dataclass
 from typing import Any
@@ -171,6 +172,8 @@ async def generate(
                 if self._is_bad_request(e):
                     if not bad_request_retried:
                         bad_request_retried = True
+                        if attempt >= max_retries:
+                            self._raise_error(e)
                         await asyncio.sleep(2)
                         continue
                     truncate_enabled = Config.get("strix_truncate_on_oversize") or ""
@@ -180,6 +183,8 @@ async def generate(
                         and self._truncate_large_tool_results(messages)
                     ):
                         bad_request_truncated = True
+                        if attempt >= max_retries:
+                            self._raise_error(e)
                         continue
                 if attempt >= max_retries or not self._should_retry(e):
                     self._raise_error(e)
@@ -339,32 +344,33 @@ def _truncate_large_tool_results(
         Scans messages in reverse for tool_result XML blocks that exceed max_chars and
         replaces their content with a truncated version plus a skip notice. Returns True
         if any truncation was performed (caller should retry the request).
-        """
-        import re
 
+        Note: All oversized tool_result blocks within a single message are truncated
+        in one pass — this is intentional to maximise payload size reduction per retry.
+        """
         truncated_any = False
         pattern = re.compile(
             r"(<tool_result>\s*<tool_name>[^<]*</tool_name>\s*<result>)(.*?)(</result>\s*</tool_result>)",
             re.DOTALL,
         )
 
+        def _truncate_match(m: re.Match) -> str:
+            nonlocal truncated_any
+            prefix, body, suffix = m.group(1), m.group(2), m.group(3)
+            if len(body) <= max_chars:
+                return m.group(0)
+            truncated_any = True
+            kept = body[:1000]
+            return (
+                f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars "
+                f"due to request size limit — file requires manual review] ...{suffix}"
+            )
+
         for msg in reversed(messages):
             content = msg.get("content")
             if not isinstance(content, str) or "<tool_result>" not in content:
                 continue
 
-            def _truncate_match(m: re.Match) -> str:
-                prefix, body, suffix = m.group(1), m.group(2), m.group(3)
-                if len(body) <= max_chars:
-                    return m.group(0)
-                nonlocal truncated_any
-                truncated_any = True
-                kept = body[:1000]
-                return (
-                    f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars "
-                    f"due to request size limit — file requires manual review] ...{suffix}"
-                )
-
             msg["content"] = pattern.sub(_truncate_match, content)
             if truncated_any:
                 break

From e28350e2c43f89e4e0fd3356ea2b31113d6d9b6f Mon Sep 17 00:00:00 2001
From: Sean Turner <seanturner83@gmail.com>
Date: Fri, 24 Apr 2026 09:23:49 +0100
Subject: [PATCH 3/5] fix: allow repeated truncation passes for multi-message
 oversized payloads

Remove one-shot bad_request_truncated guard so truncation retries on each
400 until nothing remains to truncate. Also scan all messages per pass
instead of stopping at the first hit.

Addresses review feedback from Greptile on #460.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 strix/llm/llm.py | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index 6e951e2a0..a1a6012eb 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -161,7 +161,6 @@ async def generate(
         max_retries = int(Config.get("strix_llm_max_retries") or "5")
 
         bad_request_retried = False
-        bad_request_truncated = False
 
         for attempt in range(max_retries + 1):
             try:
@@ -178,11 +177,9 @@ async def generate(
                         continue
                     truncate_enabled = Config.get("strix_truncate_on_oversize") or ""
                     if (
-                        not bad_request_truncated
-                        and truncate_enabled.lower() in ("1", "true", "yes")
+                        truncate_enabled.lower() in ("1", "true", "yes")
                         and self._truncate_large_tool_results(messages)
                     ):
-                        bad_request_truncated = True
                         if attempt >= max_retries:
                             self._raise_error(e)
                         continue
@@ -339,14 +336,10 @@ def _extract_cost(self, response: Any) -> float:
     def _truncate_large_tool_results(
         messages: list[dict[str, Any]], max_chars: int = 2000
     ) -> bool:
-        """Aggressively truncate large tool results in messages to recover from BadRequestError.
+        """Truncate large tool_result XML blocks to recover from BadRequestError.
 
-        Scans messages in reverse for tool_result XML blocks that exceed max_chars and
-        replaces their content with a truncated version plus a skip notice. Returns True
-        if any truncation was performed (caller should retry the request).
-
-        Note: All oversized tool_result blocks within a single message are truncated
-        in one pass — this is intentional to maximise payload size reduction per retry.
+        Scans all messages for tool_result blocks exceeding max_chars and truncates them.
+        Called repeatedly on each 400 until it returns False (nothing left to truncate).
         """
         truncated_any = False
         pattern = re.compile(
@@ -370,10 +363,7 @@ def _truncate_match(m: re.Match) -> str:
             content = msg.get("content")
             if not isinstance(content, str) or "<tool_result>" not in content:
                 continue
-
             msg["content"] = pattern.sub(_truncate_match, content)
-            if truncated_any:
-                break
 
         return truncated_any
 

From 6e25294bc81421d8c56bc4a185e77da7b065e5cb Mon Sep 17 00:00:00 2001
From: Sean Turner <seanturner83@gmail.com>
Date: Mon, 27 Apr 2026 18:31:31 +0100
Subject: [PATCH 4/5] =?UTF-8?q?fix:=20address=20review=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20split=20threshold/target,=20sleep=20before=20trunca?=
 =?UTF-8?q?tion=20retry?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename max_chars → threshold_chars and add separate truncate_to_chars
  parameter so the threshold and truncation target can differ by design,
  replacing the hardcoded 1000. Callers can now shrink blocks aggressively
  without re-processing blocks that are already acceptable.
- Add 2s sleep before the truncation-path `continue` to match the bare-retry
  pacing. If Bedrock is throttling after the original 400, immediately hitting
  it again risks a second rejection before the truncated payload is evaluated.
---
 strix/llm/llm.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index a1a6012eb..6a8ffb222 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -182,6 +182,10 @@ async def generate(
                     ):
                         if attempt >= max_retries:
                             self._raise_error(e)
+                        # Pace the provider — matches the 2s sleep on the bare-retry
+                        # path so a throttled provider isn't hit back-to-back after the
+                        # original 400.
+                        await asyncio.sleep(2)
                         continue
                 if attempt >= max_retries or not self._should_retry(e):
                     self._raise_error(e)
@@ -334,12 +338,21 @@ def _extract_cost(self, response: Any) -> float:
 
     @staticmethod
     def _truncate_large_tool_results(
-        messages: list[dict[str, Any]], max_chars: int = 2000
+        messages: list[dict[str, Any]],
+        threshold_chars: int = 2000,
+        truncate_to_chars: int = 1000,
     ) -> bool:
         """Truncate large tool_result XML blocks to recover from BadRequestError.
 
-        Scans all messages for tool_result blocks exceeding max_chars and truncates them.
-        Called repeatedly on each 400 until it returns False (nothing left to truncate).
+        Scans all messages for tool_result blocks whose body exceeds threshold_chars
+        and shrinks them to truncate_to_chars. Called repeatedly on each 400 until it
+        returns False (nothing left to truncate).
+
+        threshold_chars and truncate_to_chars are independent: the threshold decides
+        which blocks qualify for truncation, and truncate_to_chars is the size of the
+        retained prefix. They are not the same value to allow aggressive shrinking of
+        blocks that are well over the threshold without re-processing blocks that are
+        already acceptable.
         """
         truncated_any = False
         pattern = re.compile(
@@ -350,10 +363,10 @@ def _truncate_large_tool_results(
         def _truncate_match(m: re.Match) -> str:
             nonlocal truncated_any
             prefix, body, suffix = m.group(1), m.group(2), m.group(3)
-            if len(body) <= max_chars:
+            if len(body) <= threshold_chars:
                 return m.group(0)
             truncated_any = True
-            kept = body[:1000]
+            kept = body[:truncate_to_chars]
             return (
                 f"{prefix}{kept}\n\n... [content truncated from {len(body)} to {len(kept)} chars "
                 f"due to request size limit — file requires manual review] ...{suffix}"

From 39c84d368593883dc1cbb4891de7467b4aba6c49 Mon Sep 17 00:00:00 2001
From: bearsyankees <bearsyankees@gmail.com>
Date: Mon, 27 Apr 2026 17:31:14 -0400
Subject: [PATCH 5/5] Fix truncate list formatting and tool result pattern
 caching

---
 strix/llm/llm.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/strix/llm/llm.py b/strix/llm/llm.py
index 6a8ffb222..5cabf227e 100644
--- a/strix/llm/llm.py
+++ b/strix/llm/llm.py
@@ -27,6 +27,12 @@
 litellm.modify_params = True
 
 
+_TOOL_RESULT_PATTERN = re.compile(
+    r"(<tool_result>\s*<tool_name>[^<]*</tool_name>\s*<result>)(.*?)(</result>\s*</tool_result>)",
+    re.DOTALL,
+)
+
+
 class LLMRequestFailedError(Exception):
     def __init__(self, message: str, details: str | None = None):
         super().__init__(message)
@@ -355,10 +361,6 @@ def _truncate_large_tool_results(
         already acceptable.
         """
         truncated_any = False
-        pattern = re.compile(
-            r"(<tool_result>\s*<tool_name>[^<]*</tool_name>\s*<result>)(.*?)(</result>\s*</tool_result>)",
-            re.DOTALL,
-        )
 
         def _truncate_match(m: re.Match) -> str:
             nonlocal truncated_any
@@ -374,9 +376,17 @@ def _truncate_match(m: re.Match) -> str:
 
         for msg in reversed(messages):
             content = msg.get("content")
-            if not isinstance(content, str) or "<tool_result>" not in content:
-                continue
-            msg["content"] = pattern.sub(_truncate_match, content)
+
+            if isinstance(content, list):
+                for block in content:
+                    if (
+                        block.get("type") == "text"
+                        and isinstance(block.get("text"), str)
+                        and "<tool_result>" in block["text"]
+                    ):
+                        block["text"] = _TOOL_RESULT_PATTERN.sub(_truncate_match, block["text"])
+            elif isinstance(content, str) and "<tool_result>" in content:
+                msg["content"] = _TOOL_RESULT_PATTERN.sub(_truncate_match, content)
 
         return truncated_any