PrimeIntellect-ai · samsja · Jun 11, 2026 · rasdani · Jun 11, 2026
diff --git a/tests/test_quest_taskset.py b/tests/test_quest_taskset.py
@@ -0,0 +1,46 @@
+import pytest
+import verifiers as vf
+from pydantic import BaseModel, ValidationError
+
+from verifiers.envs.experimental.composable.tasksets.search.quest.taskset import (
+    QuestOpenAIClient,
+)
+
+
+class _BinaryResult(BaseModel):
+    reasoning: str
+    result: bool
+
+
+class _FakeStructuredCompletions:
+    async def parse(self, **kwargs):
+        response_format = kwargs["response_format"]
+        return response_format.model_validate_json(
+            r'{"reasoning": "bad \q escape", "result": true}'
+        )
+
+
+class _FakeChat:
+    completions = _FakeStructuredCompletions()
+
+
+class _FakeBeta:
+    chat = _FakeChat()
+
+
+class _FakeOpenAIClient:
+    beta = _FakeBeta()
+
+
+@pytest.mark.asyncio
+async def test_quest_structured_parse_error_becomes_invalid_model_response():
+    client = QuestOpenAIClient(client=_FakeOpenAIClient(), model="judge-model")
+
+    with pytest.raises(vf.InvalidModelResponseError) as exc_info:
+        await client.async_response(
+            messages=[{"role": "user", "content": "judge this"}],
+            response_format=_BinaryResult,
+        )
+
+    assert "QUEST judge returned invalid structured response" in str(exc_info.value)
+    assert isinstance(exc_info.value.__cause__, ValidationError)
diff --git a/verifiers/envs/experimental/composable/tasksets/search/quest/taskset.py b/verifiers/envs/experimental/composable/tasksets/search/quest/taskset.py
@@ -34,7 +34,7 @@
     RateLimitError,
     UnprocessableEntityError,
 )
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 from verifiers.envs.experimental.composable import SandboxSpec, SandboxTaskSet
 from verifiers.types import ClientConfig
 from verifiers.utils.client_utils import setup_openai_client
@@ -188,6 +188,10 @@ async def async_response(self, *, count_token: bool = False, **kwargs: Any) -> A
                 )
             except _QUEST_JUDGE_ERROR_TYPES as exc:
                 _raise_quest_judge_error(exc, model=model)
+            except ValidationError as exc:
+                raise vf.InvalidModelResponseError(
+                    f"QUEST judge returned invalid structured response for {model}: {exc}"
+                ) from exc
             choice = _single_choice(response, context="structured")
             parsed = choice.message.parsed
             if parsed is None: