Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions tests/test_quest_taskset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest
import verifiers as vf
from pydantic import BaseModel, ValidationError

from verifiers.envs.experimental.composable.tasksets.search.quest.taskset import (
QuestOpenAIClient,
)


class _BinaryResult(BaseModel):
reasoning: str
result: bool


class _FakeStructuredCompletions:
async def parse(self, **kwargs):
response_format = kwargs["response_format"]
return response_format.model_validate_json(
r'{"reasoning": "bad \q escape", "result": true}'
)


class _FakeChat:
completions = _FakeStructuredCompletions()


class _FakeBeta:
chat = _FakeChat()


class _FakeOpenAIClient:
beta = _FakeBeta()


@pytest.mark.asyncio
async def test_quest_structured_parse_error_becomes_invalid_model_response():
client = QuestOpenAIClient(client=_FakeOpenAIClient(), model="judge-model")

with pytest.raises(vf.InvalidModelResponseError) as exc_info:
await client.async_response(
messages=[{"role": "user", "content": "judge this"}],
response_format=_BinaryResult,
)

assert "QUEST judge returned invalid structured response" in str(exc_info.value)
assert isinstance(exc_info.value.__cause__, ValidationError)
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
RateLimitError,
UnprocessableEntityError,
)
from pydantic import BaseModel
from pydantic import BaseModel, ValidationError
from verifiers.envs.experimental.composable import SandboxSpec, SandboxTaskSet
from verifiers.types import ClientConfig
from verifiers.utils.client_utils import setup_openai_client
Expand Down Expand Up @@ -188,6 +188,10 @@ async def async_response(self, *, count_token: bool = False, **kwargs: Any) -> A
)
except _QUEST_JUDGE_ERROR_TYPES as exc:
_raise_quest_judge_error(exc, model=model)
except ValidationError as exc:
raise vf.InvalidModelResponseError(
f"QUEST judge returned invalid structured response for {model}: {exc}"
) from exc
Comment on lines +191 to +194

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we put this into _raise_quest_judge_error?

choice = _single_choice(response, context="structured")
parsed = choice.message.parsed
if parsed is None:
Expand Down
Loading