Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pyrit/score/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)

from pyrit.exceptions import (
BadRequestException,
InvalidJsonException,
PyritException,
pyrit_json_retry,
Expand Down Expand Up @@ -729,6 +730,7 @@ async def _score_value_with_llm_async(
score_value still needs to be normalized and validated.

Raises:
BadRequestException: If the scorer's LLM response is blocked by content filtering.
ValueError: If required keys are missing from the response or if the response format is invalid.
InvalidJsonException: If the response is not valid JSON.
Exception: For other unexpected errors during scoring.
Expand Down Expand Up @@ -781,6 +783,16 @@ async def _score_value_with_llm_async(

response_json: str = ""
try:
# Check if the scorer's own LLM response was blocked by content filtering
if all(piece.is_blocked() for piece in response[0].message_pieces):
raise BadRequestException(
message=(
f"The scorer's LLM request was blocked by content filtering while scoring "
f"prompt ID: {scored_prompt_id}. Consider using a scorer endpoint with "
f"content filtering disabled for red-teaming workflows."
)
)

# Get the text piece which contains the JSON response containing the score_value and rationale from the LLM
text_piece = next(
piece for piece in response[0].message_pieces if piece.converted_value_data_type == "text"
Expand Down
33 changes: 33 additions & 0 deletions tests/unit/score/test_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1548,6 +1548,39 @@ async def test_score_value_with_llm_skips_reasoning_piece(good_json):
assert result.score_rationale == "Valid response"


async def test_score_value_with_llm_raises_when_scorer_response_blocked():
"""When the scorer's own LLM response is blocked by content filtering, raise BadRequestException."""
from pyrit.exceptions import BadRequestException

chat_target = MagicMock(PromptTarget)
chat_target.get_identifier.return_value = get_mock_target_identifier("MockChatTarget")

blocked_piece = MessagePiece(
role="assistant",
original_value="",
original_value_data_type="error",
converted_value="",
converted_value_data_type="error",
conversation_id="test-convo",
response_error="blocked",
)
blocked_response = Message(message_pieces=[blocked_piece])
chat_target.send_prompt_async = AsyncMock(return_value=[blocked_response])

scorer = MockScorer()

with pytest.raises(BadRequestException, match="blocked by content filtering"):
await scorer._score_value_with_llm_async(
prompt_target=chat_target,
system_prompt="system_prompt",
message_value="message_value",
message_data_type="text",
scored_prompt_id="test-prompt-id",
category="category",
objective="task",
)


# ── Helpers for score_blocked_content tests ──────────────────────────────────


Expand Down
Loading