From 9aa0abdf2e281408a2b9645885aef2647f9b153f Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Sun, 1 Feb 2026 18:38:20 -0800 Subject: [PATCH 1/2] Add OpenSubtitles.com REST API provider --- .../subtitles/providers/opensubtitles.py | 500 -------------- .../subtitles/providers/opensubtitles_com.py | 461 +++++++++++++ .../post_processing/subtitles/subtitle.py | 88 +-- src/program/settings/models.py | 31 +- src/tests/test_opensubtitles_com.py | 614 ++++++++++++++++++ 5 files changed, 1126 insertions(+), 568 deletions(-) delete mode 100644 src/program/services/post_processing/subtitles/providers/opensubtitles.py create mode 100644 src/program/services/post_processing/subtitles/providers/opensubtitles_com.py create mode 100644 src/tests/test_opensubtitles_com.py diff --git a/src/program/services/post_processing/subtitles/providers/opensubtitles.py b/src/program/services/post_processing/subtitles/providers/opensubtitles.py deleted file mode 100644 index b8d2f2d7d..000000000 --- a/src/program/services/post_processing/subtitles/providers/opensubtitles.py +++ /dev/null @@ -1,500 +0,0 @@ -""" -OpenSubtitles provider for Riven. -""" - -import base64 -import time -import zlib - -from collections.abc import Iterable -from http import HTTPStatus -from xmlrpc.client import ServerProxy -from typing import Any, Generic, Self, TypeVar, cast -from babelfish import Language, Error as BabelfishError -from loguru import logger -from pydantic import BaseModel, Field, field_validator, model_validator - -from .base import SubtitleItem, SubtitleProvider - -T = TypeVar("T", bound=BaseModel | Iterable[BaseModel] | dict[Any, Any] | None) - - -class StatusMixin(BaseModel): - status: int - - @field_validator("status", mode="before") - def transform_status(cls, status_string: str) -> int: - """Transform OpenSubtitles status string (e.g. '200 OK') to integer code.""" - - return int(status_string[:3]) - - @model_validator(mode="after") - def validate_response(self) -> Self: - """Raise exception for HTTP errors based on status code.""" - - status_code = HTTPStatus(self.status) - - if status_code == HTTPStatus.UNAUTHORIZED: - raise Exception("Unauthorized - Invalid credentials") - elif status_code == HTTPStatus.NOT_ACCEPTABLE: - raise Exception("No session - Please login again") - elif status_code == HTTPStatus.PROXY_AUTHENTICATION_REQUIRED: - raise Exception("Download limit reached") - elif status_code == HTTPStatus.SERVICE_UNAVAILABLE: - raise Exception("Service unavailable") - elif not status_code.is_success: - raise Exception(f"OpenSubtitles error: {status_code}") - - return self - - -class OpenSubtitlesAPIResponse(StatusMixin, Generic[T]): - data: T - - -class OpenSubtitlesLoginResponse(StatusMixin): - token: str - - -class OpenSubtitlesSubtitleItem(BaseModel): - id_subtitle_file: str = Field(alias="IDSubtitleFile") - sub_language_id: str = Field(alias="SubLanguageID") - sub_file_name: str | None = Field(alias="SubFileName") - sub_downloads_cnt: str | None = Field(alias="SubDownloadsCnt") - sub_rating: str | None = Field(alias="SubRating") - matched_by: str | None = Field(alias="MatchedBy") - movie_hash: str | None = Field(alias="MovieHash") - movie_name: str | None = Field(alias="MovieName") - - -class OpenSubtitlesDownloadSubtitleItem(BaseModel): - data: str - - -def normalize_language_to_alpha3(language: str) -> str: - """ - Convert language code to ISO 639-3 (3-letter code) for OpenSubtitles API. - - Uses babelfish library to handle all language code conversions, supporting: - - ISO 639-1 (2-letter codes like 'en', 'es') - - ISO 639-2 (3-letter codes like 'eng', 'spa') - - ISO 639-2/B (bibliographic codes like 'fre', 'ger') - - ISO 639-3 (terminological codes like 'fra', 'deu') - - Args: - language: Language code in various formats - - Returns: - ISO 639-3 language code (e.g., 'eng', 'spa', 'fra') - """ - - try: - language_str = str(language).strip().lower() - - if not language_str: - logger.warning("Empty language code provided, defaulting to 'eng'") - return "eng" - - # Try different parsing strategies - lang_obj = None - - # Strategy 1: Try as ISO 639-3 (3-letter terminological code) - if len(language_str) == 3: - try: - lang_obj = Language(language_str) - except (BabelfishError, ValueError): - # Strategy 2: Try as ISO 639-2/B (bibliographic code) - try: - lang_obj = Language.fromcode(language_str, "alpha3b") - except (BabelfishError, ValueError, KeyError): - pass - - # Strategy 3: Try as ISO 639-1 (2-letter code) - if lang_obj is None and len(language_str) == 2: - try: - lang_obj = Language.fromcode(language_str, "alpha2") - except (BabelfishError, ValueError, KeyError): - pass - - # Strategy 4: Try parsing as locale string (e.g., 'en-US', 'pt_BR') - if lang_obj is None and ("-" in language_str or "_" in language_str): - try: - # Extract just the language part before the separator - lang_part = language_str.split("-")[0].split("_")[0] - - if len(lang_part) == 2: - lang_obj = Language.fromcode(lang_part, "alpha2") - elif len(lang_part) == 3: - lang_obj = Language(lang_part) - except (BabelfishError, ValueError, KeyError): - pass - - if lang_obj: - return cast(str, lang_obj.alpha3) - - # Fallback to English - logger.warning(f"Could not parse language '{language}', defaulting to 'eng'") - - return "eng" - - except Exception as e: - logger.error( - f"Error normalizing language '{language}': {e}, defaulting to 'eng'" - ) - return "eng" - - -class OpenSubtitlesProvider(SubtitleProvider): - """ - OpenSubtitles XML-RPC provider implementation. - - Uses anonymous authentication and searches only by moviehash. - This ensures reliable subtitle matching without requiring user credentials. - """ - - def __init__(self): - self.server_url = "https://api.opensubtitles.org/xml-rpc" - self.user_agent = "VLSub 0.11.1" - self.token = None - self.login_time = None - self.server = ServerProxy(self.server_url, allow_none=True) - - @property - def name(self) -> str: - return "opensubtitles" - - def initialize(self): - """Initialize the provider session with anonymous authentication.""" - - logger.debug(f"Logging in anonymously with user agent: {self.user_agent}") - - # Anonymous login: empty username and password - response = OpenSubtitlesLoginResponse.model_validate( - self.server.LogIn( - "", - "", - "eng", - self.user_agent, - ) - ) - - self.token = response.token - self.login_time = time.time() - - logger.debug("Authenticated to OpenSubtitles (anonymous)") - - def _ensure_authenticated(self) -> bool: - """Ensure we have a valid session token.""" - - current_time = time.time() - - # Check if we need to login (no token or token older than 10 minutes) - if ( - not self.token - or not self.login_time - or (current_time - self.login_time) > 600 - ): - if self.login_time: - logger.debug("Token expired (>10 minutes), re-authenticating...") - - self.initialize() - - return True - - def search_subtitles( - self, - imdb_id: str, - video_hash: str | None = None, - file_size: int | None = None, - filename: str | None = None, - search_tags: str | None = None, - season: int | None = None, - episode: int | None = None, - language: str = "en", - ) -> list[SubtitleItem]: - """ - Search subtitles using multi-strategy approach. - - According to OpenSubtitles API documentation: - - Priority: moviehash+moviebytesize > tag > imdbid > query - - When moviehash and moviebytesize are provided, other parameters are ignored - - When tag is provided with imdbid, it filters results by release group/format - - Multiple search criteria can be sent in a single request - - Args: - imdb_id: IMDB ID - video_hash: OpenSubtitles hash of the video file - file_size: Size of the video file in bytes - filename: Original filename (not used - tags are preferred) - search_tags: Comma-separated tags (release group, format) for OpenSubtitles - season: Season number (for TV shows) - episode: Episode number (for TV shows) - language: Language code (ISO 639-1, ISO 639-2, or ISO 639-3) - - Returns: - list of subtitle results, prioritized by match type - """ - - try: - if not self._ensure_authenticated(): - return [] - - # Normalize language to ISO 639-3 format for OpenSubtitles API - opensubtitles_lang = normalize_language_to_alpha3(language) - - # Build search criteria array (multiple strategies in one request) - search_criteria = list[dict[str, str]]() - - # Strategy 1: moviehash + moviebytesize (perfect match - exact file) - if video_hash and file_size: - search_criteria.append( - { - "sublanguageid": opensubtitles_lang, - "moviehash": video_hash, - "moviebytesize": str(file_size), - } - ) - - logger.trace( - f"OpenSubtitles search strategy 1: moviehash={video_hash[:8]}...{video_hash[-8:]}, size={file_size:,} bytes" - ) - - # Strategy 2: imdbid + filename + tag (release-specific match) - if imdb_id and search_tags: - imdb_id = imdb_id.lstrip("tt") # Remove leading 'tt' from IMDB ID - criteria = { - "sublanguageid": opensubtitles_lang, - "imdbid": imdb_id, # Remove leading 'tt' from IMDB ID - "tags": search_tags, - } - - if season is not None: - criteria["season"] = str(season) - - if episode is not None: - criteria["episode"] = str(episode) - - search_criteria.append(criteria) - - logger.trace( - f"OpenSubtitles search strategy 2: imdbid={imdb_id}, tags={search_tags}, season={season}, episode={episode}" - ) - - # strategy 3: filename - if filename: - criteria3 = { - "sublanguageid": opensubtitles_lang, - "query": filename, - } - - if season is not None: - criteria3["season"] = str(season) - - if episode is not None: - criteria3["episode"] = str(episode) - - search_criteria.append(criteria3) - - logger.trace( - f"OpenSubtitles search strategy 3: filename={filename}, season={season}, episode={episode}" - ) - - if not search_criteria: - logger.trace("Skipping OpenSubtitles search: no valid search criteria") - return [] - - response = OpenSubtitlesAPIResponse[ - list[OpenSubtitlesSubtitleItem] - ].model_validate(self.server.SearchSubtitles(self.token, search_criteria)) - - if not response.data: - logger.debug("No subtitles found from OpenSubtitles") - return [] - - # Process results and prioritize by match type - # MatchedBy can be: moviehash, tag, imdbid, fulltext - results = list[SubtitleItem]() - norm_hash = str(video_hash).lower() if video_hash else None - - for item in response.data: - try: - # Get match type from API response - matched_by = (item.matched_by or "unknown").lower() - item_hash = str(item.movie_hash or "0").lower() - - # Validate hash matches - ensure MovieHash field matches our hash - if matched_by == "moviehash": - if not norm_hash or item_hash == "0" or item_hash != norm_hash: - # Invalid hash match, skip it - continue - - # Determine match type for scoring - # Priority: moviehash > tag > imdbid > fulltext - is_hash_match = matched_by == "moviehash" - is_tag_match = matched_by == "tag" - is_imdb_match = matched_by == "imdbid" - is_fulltext_match = matched_by == "fulltext" - - results.append( - SubtitleItem( - id=item.id_subtitle_file, - language=item.sub_language_id, - filename=item.sub_file_name or "subtitle.srt", - download_count=int(item.sub_downloads_cnt or 0), - rating=float(item.sub_rating or 0), - matched_by=matched_by, - movie_hash=item.movie_hash, - movie_name=item.movie_name or "", - provider=self.name, - score=self._calculate_score( - item, - is_hash_match, - is_tag_match, - is_imdb_match, - is_fulltext_match, - ), - ) - ) - except Exception as e: - logger.warning(f"Error processing subtitle result: {e}") - continue - - # Sort by score (hash > tag > imdb > fulltext) - results.sort(key=lambda x: x.score, reverse=True) - - # Log match type distribution - hash_count = sum(1 for r in results if r.matched_by == "moviehash") - tag_count = sum(1 for r in results if r.matched_by == "tag") - imdb_count = sum(1 for r in results if r.matched_by == "imdbid") - fulltext_count = sum(1 for r in results if r.matched_by == "fulltext") - - logger.debug( - f"Found {len(results)} subtitles from OpenSubtitles (hash:{hash_count}, tag:{tag_count}, imdb:{imdb_count}, fulltext:{fulltext_count})" - ) - - return results - except Exception as e: - error_msg = str(e).lower() - - if "syntax error" in error_msg or "expat" in error_msg: - logger.warning( - "OpenSubtitles server issue (HTML response) - trying other providers" - ) - else: - logger.error(f"OpenSubtitles search error: {e}") - - return [] - - def download_subtitle(self, subtitle_info: SubtitleItem) -> str | None: - """Download subtitle content from OpenSubtitles.""" - - try: - if not self._ensure_authenticated(): - return None - - subtitle_id = subtitle_info.id - - if not subtitle_id: - return None - - logger.debug(f"Downloading subtitle: {subtitle_info.filename}") - - response = OpenSubtitlesAPIResponse[ - list[OpenSubtitlesDownloadSubtitleItem] | None - ].model_validate(self.server.DownloadSubtitles(self.token, [subtitle_id])) - - if not response.data: - return None - - # Decode subtitle content (base64 + zlib compression) - subtitle_data = response.data[0].data - decoded_data = base64.b64decode(subtitle_data) - decompressed_data = zlib.decompress(decoded_data, 47) - - content = self._decode_subtitle_content(decompressed_data) - - if content and "opensubtitles vip" in content.lower(): - logger.debug("Received VIP-only content") - - logger.debug(f"Downloaded subtitle successfully") - - return content - - except Exception as e: - logger.error(f"OpenSubtitles download error: {e}") - return None - - def _calculate_score( - self, - subtitle_item: OpenSubtitlesSubtitleItem, - is_hash_match: bool, - is_tag_match: bool = False, - is_imdb_match: bool = False, - is_fulltext_match: bool = False, - ) -> int: - """ - Score results with priority: hash > tag > imdb > fulltext. - - According to OpenSubtitles API, MatchedBy can be: - - moviehash: Perfect file match (highest priority) - - tag: Release-specific match (high priority) - - imdbid: Movie-level match (medium priority) - - fulltext: Query-based match (lowest priority) - - Args: - subtitle_item: Subtitle result from OpenSubtitles - is_hash_match: True if matched by moviehash - is_tag_match: True if matched by tag - is_imdb_match: True if matched by imdbid - is_fulltext_match: True if matched by fulltext - - Returns: - Score (higher is better) - """ - - score = 0 - - # Priority 1: Hash matches (perfect file match) - if is_hash_match: - score += 10000 - # Priority 2: Tag matches (release-specific match) - elif is_tag_match: - score += 5000 - # Priority 3: IMDB matches (movie-level match) - elif is_imdb_match: - score += 2500 - # Priority 4: Fulltext matches (query-based, least accurate) - elif is_fulltext_match: - score += 1000 - - # Tie-breakers: popularity and rating - - # Downloads (max ~100 points) - score += int(subtitle_item.sub_downloads_cnt or 0) // 100 - - # Rating (max 100 points) - score += int(float(subtitle_item.sub_rating or 0) * 10) - - return score - - def _decode_subtitle_content(self, content_bytes: bytes) -> str | None: - """Decode subtitle content with multiple encoding fallbacks.""" - if not content_bytes: - return None - - encodings = ["utf-8", "utf-8-sig", "iso-8859-1", "windows-1252", "cp1252"] - - for encoding in encodings: - try: - decoded = content_bytes.decode(encoding) - if len(decoded.strip()) > 0: - return decoded - except (UnicodeDecodeError, UnicodeError): - continue - - # Last resort with error replacement - try: - return content_bytes.decode("utf-8", errors="replace") - except Exception: - logger.error("Failed to decode subtitle content") - return None diff --git a/src/program/services/post_processing/subtitles/providers/opensubtitles_com.py b/src/program/services/post_processing/subtitles/providers/opensubtitles_com.py new file mode 100644 index 000000000..0e077f5e1 --- /dev/null +++ b/src/program/services/post_processing/subtitles/providers/opensubtitles_com.py @@ -0,0 +1,461 @@ +""" +OpenSubtitles.com REST API provider for Riven. + +This provider uses the modern OpenSubtitles.com REST API (v1) which provides: +- JWT-based authentication with automatic token refresh +- Better rate limits for authenticated users (10 free vs 200+ VIP downloads/day) +- Superior search by IMDB ID, TMDB ID, and file hash +""" + +import time +from http import HTTPStatus +from typing import Any +from urllib.parse import urlparse + +from loguru import logger +from pydantic import BaseModel, Field, ValidationError, field_validator + +from program.settings.models import OpenSubtitlesComConfig +from program.utils.request import SmartSession + +from .base import SubtitleItem, SubtitleProvider + +# Whitelist of allowed domains for subtitle download URLs (SSRF prevention) +ALLOWED_DOWNLOAD_DOMAINS = { + "dl.opensubtitles.com", + "www.opensubtitles.com", + "vip.opensubtitles.com", +} + + +class OpenSubtitlesLoginResponse(BaseModel): + """Validated login response from OpenSubtitles API.""" + + token: str + + @field_validator("token", mode="before") + @classmethod + def validate_token_not_empty(cls, v: str) -> str: + if not v or not v.strip(): + raise ValueError("Token cannot be empty") + return v.strip() + + +class OpenSubtitlesSearchResult(BaseModel): + """Single subtitle search result from OpenSubtitles.com API.""" + + id: str = Field(alias="id") + attributes: dict[str, Any] + + model_config = {"populate_by_name": True} + + @property + def subtitle_id(self) -> str: + """Get the file ID for downloading.""" + files = self.attributes.get("files", []) + if files: + return str(files[0].get("file_id", self.id)) + return self.id + + @property + def language(self) -> str: + """Get the subtitle language code.""" + return self.attributes.get("language", "") + + @property + def filename(self) -> str: + """Get the subtitle filename.""" + files = self.attributes.get("files", []) + if files: + return files[0].get("file_name", "") + return "" + + @property + def download_count(self) -> int: + """Get the download count.""" + return int(self.attributes.get("download_count", 0)) + + @property + def rating(self) -> float: + """Get the subtitle rating.""" + return float(self.attributes.get("ratings", 0.0)) + + @property + def moviehash_match(self) -> bool: + """Check if this result matched by movie hash.""" + return bool(self.attributes.get("moviehash_match", False)) + + +class OpenSubtitlesComProvider(SubtitleProvider): + """ + OpenSubtitles.com REST API provider with automatic authentication. + + Uses the modern REST API at api.opensubtitles.com/api/v1 which provides: + - JWT-based authentication with 10-minute token refresh + - Multi-strategy search: hash > IMDB ID > filename + - Rate limit handling with Retry-After header support + """ + + API_BASE = "https://api.opensubtitles.com/api/v1" + TOKEN_EXPIRY_SECONDS = 540 # 9 minutes (with 1-minute safety buffer) + + def __init__(self, config: OpenSubtitlesComConfig) -> None: + """Initialize provider with configuration.""" + self.config = config + self.token: str | None = None + self.token_time: float = 0.0 + + # SmartSession provides: rate limiting, circuit breaker, retries + # Rate: 2 req/sec for authenticated users (API allows higher) + self.session = SmartSession( + rate_limits={"api.opensubtitles.com": {"rate": 2, "capacity": 10}} + ) + + logger.debug("OpenSubtitles.com provider initialized") + + def close(self) -> None: + """Clean up HTTP session resources.""" + if self.session: + try: + self.session.close() + logger.debug("OpenSubtitles.com session closed") + except Exception as e: + logger.warning(f"Error closing OpenSubtitles.com session: {e}") + + @property + def name(self) -> str: + """Provider identifier.""" + return "opensubtitles_com" + + def _headers(self, authenticated: bool = True) -> dict[str, str]: + """Build headers for API requests.""" + headers = { + "Api-Key": self.config.api_key, + "User-Agent": self.config.user_agent, + "Content-Type": "application/json", + } + if authenticated and self.token: + headers["Authorization"] = f"Bearer {self.token}" + return headers + + def _ensure_authenticated(self) -> bool: + """ + Ensure valid authentication token exists. + + Note: No explicit Lock needed - Python's GIL provides sufficient + thread safety for this pattern. Worst case: two threads both + call _login() simultaneously, both succeed (harmless). + """ + if self.token and (time.time() - self.token_time) < self.TOKEN_EXPIRY_SECONDS: + logger.trace("OpenSubtitles.com token still valid") + return True + + logger.debug("OpenSubtitles.com token expired or missing, authenticating...") + return self._login() + + def _login(self) -> bool: + """Authenticate with OpenSubtitles API.""" + try: + logger.debug("Attempting OpenSubtitles.com login") + + response = self.session.post( + f"{self.API_BASE}/login", + json={ + "username": self.config.username, + "password": self.config.password, + }, + headers=self._headers(authenticated=False), + ) + + # Handle specific HTTP errors + if response.status_code == HTTPStatus.UNAUTHORIZED: + logger.warning("OpenSubtitles.com: Invalid credentials") + return False + elif response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + logger.warning("OpenSubtitles.com: Rate limited on login") + return False + elif response.status_code >= HTTPStatus.INTERNAL_SERVER_ERROR: + logger.warning( + f"OpenSubtitles.com server error: {response.status_code}" + ) + return False + elif not response.ok: + logger.warning( + f"OpenSubtitles.com login failed: {response.status_code}" + ) + return False + + # Validate response schema + try: + data = OpenSubtitlesLoginResponse.model_validate(response.json()) + except ValidationError as e: + logger.error(f"OpenSubtitles.com: Invalid response schema: {e}") + return False + + self.token = data.token + self.token_time = time.time() + + logger.debug("OpenSubtitles.com authenticated successfully") + return True + + except Exception as e: + logger.warning(f"OpenSubtitles.com authentication error: {e}") + return False + + def search_subtitles( + self, + imdb_id: str, + video_hash: str | None = None, + file_size: int | None = None, + filename: str | None = None, + search_tags: str | None = None, + season: int | None = None, + episode: int | None = None, + language: str = "en", + ) -> list[SubtitleItem]: + """ + Search for subtitles using multi-strategy approach. + + Strategy priority: hash (best) > IMDB ID > filename (fallback) + Returns empty list on failure (never raises). + """ + if not self._ensure_authenticated(): + logger.error("OpenSubtitles.com authentication failed") + return [] + + # Build search strategies in priority order + for strategy in self._build_search_strategies( + video_hash, file_size, imdb_id, filename, season, episode, language + ): + logger.trace(f"Trying search strategy: {strategy['name']} with params={strategy['params']}") + results = self._search(strategy["params"]) + if results: + return self._score_results(results, strategy["name"]) + + logger.debug(f"No subtitles found for language={language}") + return [] + + def _build_search_strategies( + self, + video_hash: str | None, + file_size: int | None, + imdb_id: str | None, + filename: str | None, + season: int | None, + episode: int | None, + lang_code: str, + ) -> list[dict[str, Any]]: + """Build list of search parameter dicts in priority order.""" + strategies: list[dict[str, Any]] = [] + + # Strategy 1: Hash (most accurate) + if video_hash: + strategies.append( + { + "name": "hash", + "params": { + "moviehash": video_hash, + "languages": lang_code, + }, + } + ) + + # Strategy 2: IMDB ID with season/episode + if imdb_id: + # Strip 'tt' prefix if present (use slice, not lstrip which removes all 't' chars) + imdb_num = imdb_id[2:] if imdb_id.startswith("tt") else imdb_id + params: dict[str, Any] = { + "imdb_id": imdb_num, + "languages": lang_code, + } + if season is not None: + params["season_number"] = season + if episode is not None: + params["episode_number"] = episode + strategies.append({"name": "imdb", "params": params}) + + # Strategy 3: Filename fallback + if filename: + params = {"query": filename, "languages": lang_code} + if season is not None: + params["season_number"] = season + if episode is not None: + params["episode_number"] = episode + strategies.append({"name": "filename", "params": params}) + + # Add user-configured query params to all strategies + if self.config.query_params: + for strategy in strategies: + strategy["params"].update(self.config.query_params) + + return strategies + + def _search(self, params: dict[str, Any]) -> list[dict[str, Any]]: + """Execute single search request with error handling.""" + try: + response = self.session.get( + f"{self.API_BASE}/subtitles", + params=params, + headers=self._headers(), + ) + + # Handle 401 with single retry + if response.status_code == HTTPStatus.UNAUTHORIZED: + logger.debug("Token expired during search, re-authenticating") + self.token = None + if self._ensure_authenticated(): + response = self.session.get( + f"{self.API_BASE}/subtitles", + params=params, + headers=self._headers(), + ) + + # Handle rate limiting + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + retry_after = response.headers.get("Retry-After", "60") + logger.debug(f"Rate limited, retry after {retry_after}s") + return [] + + # Handle server errors + if response.status_code >= HTTPStatus.INTERNAL_SERVER_ERROR: + logger.warning( + f"OpenSubtitles.com server error: {response.status_code}" + ) + return [] + + if response.status_code == HTTPStatus.OK: + return response.json().get("data", []) + + return [] + + except Exception as e: + logger.error(f"Search request failed: {e}") + return [] + + def _score_results( + self, results: list[dict[str, Any]], match_type: str + ) -> list[SubtitleItem]: + """ + Convert and score search results. + + Score weights prioritize match accuracy: + - hash (10000): File hash match is most reliable + - imdb (5000): IMDB ID match is good but less precise + - filename (1000): Text search is least reliable + Additional points from popularity (downloads/100) and rating (rating*10). + """ + scored: list[SubtitleItem] = [] + + # Score weights by match type (higher = more reliable match) + match_scores = {"hash": 10000, "imdb": 5000, "filename": 1000} + base_score = match_scores.get(match_type, 0) + + for item in results: + try: + result = OpenSubtitlesSearchResult.model_validate(item) + + # Calculate score: match_type + popularity + rating + score = ( + base_score + (result.download_count // 100) + int(result.rating * 10) + ) + + # Bonus for hash match + if result.moviehash_match: + score += 5000 + + scored.append( + SubtitleItem( + id=result.subtitle_id, + language=result.language, + filename=result.filename, + download_count=result.download_count, + rating=result.rating, + matched_by=match_type, + movie_hash=None, + movie_name=None, + provider=self.name, + score=score, + ) + ) + except ValidationError as e: + logger.trace(f"Skipping invalid result: {e}") + continue + + # Sort by score descending + scored.sort(key=lambda x: x.score, reverse=True) + return scored + + def download_subtitle(self, subtitle_info: SubtitleItem) -> str | None: + """ + Download subtitle content via REST API. + + Returns subtitle content as string, or None on failure. + Never raises exceptions to caller. + """ + if not self._ensure_authenticated(): + return None + + try: + logger.debug(f"Downloading subtitle: {subtitle_info.filename}") + + # Step 1: Get download link + response = self.session.post( + f"{self.API_BASE}/download", + json={"file_id": int(subtitle_info.id)}, + headers=self._headers(), + ) + + if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + remaining = response.json().get("remaining", 0) + logger.warning(f"Download limit reached. Remaining: {remaining}") + return None + + if response.status_code != HTTPStatus.OK: + logger.error(f"Download request failed: {response.status_code}") + return None + + download_data = response.json() + download_link = download_data.get("link") + + if not download_link: + logger.error("No download link in response") + return None + + # Validate download URL to prevent SSRF attacks + parsed_url = urlparse(download_link) + if not parsed_url.hostname: + logger.error("Download link has no hostname") + return None + + hostname_lower = parsed_url.hostname.lower() + if hostname_lower not in ALLOWED_DOWNLOAD_DOMAINS: + logger.error( + f"Download link from unauthorized domain: {parsed_url.hostname}" + ) + return None + + if parsed_url.scheme != "https": + logger.error(f"Download link uses non-HTTPS scheme: {parsed_url.scheme}") + return None + + # Step 2: Fetch actual subtitle file + file_response = self.session.get( + download_link, + headers=self._headers(authenticated=False), + ) + + if file_response.status_code == HTTPStatus.OK: + content = self._decode_content(file_response.content) + logger.debug(f"Downloaded subtitle: {len(content)} bytes") + return content + + logger.error(f"File download failed: {file_response.status_code}") + return None + + except Exception as e: + logger.error(f"Download failed: {e}") + return None + + def _decode_content(self, content: bytes) -> str: + """Decode subtitle content as UTF-8.""" + return content.decode("utf-8") diff --git a/src/program/services/post_processing/subtitles/subtitle.py b/src/program/services/post_processing/subtitles/subtitle.py index f05fd67aa..6a5bd6887 100644 --- a/src/program/services/post_processing/subtitles/subtitle.py +++ b/src/program/services/post_processing/subtitles/subtitle.py @@ -18,7 +18,6 @@ SubtitleProvider, ) from program.core.analysis_service import AnalysisService -from .providers.opensubtitles import OpenSubtitlesProvider from .utils import calculate_opensubtitles_hash @@ -36,18 +35,17 @@ def __init__(self): return # Initialize providers - self.providers = list[SubtitleProvider]() + self.providers: list[SubtitleProvider] = [] self._initialize_providers() if not self.providers: logger.warning("No subtitle providers initialized") return - # Parse language codes - self.languages = self._parse_languages(self.settings.languages) + self.languages = self.settings.languages if not self.languages: - logger.warning("No valid languages configured for subtitles") + logger.warning("No languages configured for subtitles") return self.initialized = True @@ -59,61 +57,29 @@ def __init__(self): def get_key(cls) -> str: return "subtitle" - def _initialize_providers(self): + def _initialize_providers(self) -> None: """Initialize configured subtitle providers.""" provider_configs = self.settings.providers - # Initialize OpenSubtitles provider - if provider_configs.opensubtitles.enabled: - try: - provider = OpenSubtitlesProvider() - self.providers.append(provider) - logger.debug("OpenSubtitles provider initialized") - except Exception as e: - logger.error(f"Failed to initialize OpenSubtitles provider: {e}") - - # Add more providers here in the future - # if provider_configs.get("opensubtitlescom", {}).get("enabled"): - # ... - - @classmethod - def _parse_languages(cls, language_codes: list[str]) -> list[str]: - """ - Parse and validate language codes. - - Args: - language_codes: list of language codes (ISO 639-1, ISO 639-2, or ISO 639-3) - - Returns: - list of valid ISO 639-3 language codes - """ - - from .providers.opensubtitles import normalize_language_to_alpha3 - - valid_languages = list[str]() + # Initialize OpenSubtitles.com REST API provider + if provider_configs.opensubtitles_com.enabled: + config = provider_configs.opensubtitles_com + if not config.api_key: + logger.warning( + "OpenSubtitles.com enabled but no API key configured" + ) + else: + try: + from .providers.opensubtitles_com import OpenSubtitlesComProvider - for lang_code in language_codes: - try: - normalized = normalize_language_to_alpha3(lang_code) - - if ( - normalized - and normalized != "eng" - or lang_code.lower() in ["en", "eng"] - ): - valid_languages.append(normalized) - elif normalized == "eng" and lang_code.lower() not in ["en", "eng"]: - # Only add 'eng' if it was explicitly requested - logger.warning( - f"Language code '{lang_code}' normalized to 'eng' (fallback)" + provider = OpenSubtitlesComProvider(config) + self.providers.append(provider) + logger.debug("OpenSubtitles.com provider initialized") + except Exception as e: + logger.error( + f"Failed to initialize OpenSubtitles.com provider: {e}" ) - else: - valid_languages.append(normalized) - except Exception as e: - logger.error(f"Failed to parse language code '{lang_code}': {e}") - - return list(set(valid_languages)) # Remove duplicates @property def enabled(self) -> bool: @@ -176,8 +142,8 @@ def run(self, item: MediaItem) -> bool: # NOT full filenames - see https://trac.opensubtitles.org/opensubtitles/wiki/XMLRPC#Supportedtags search_tags = self._build_search_tags(item) - # Get IMDB ID - imdb_id = item.imdb_id + # Get IMDB ID (use get_top_imdb_id for episodes to get parent Show's imdb_id) + imdb_id = item.get_top_imdb_id() # Get season/episode info for TV shows season = None @@ -240,7 +206,7 @@ def _get_embedded_subtitle_languages(cls, item: MediaItem) -> set[str]: Set of ISO 639-3 language codes (e.g., {'eng', 'spa', 'fre'}) """ - embedded_languages = set[str]() + embedded_languages: set[str] = set() try: media_entry = item.media_entry @@ -280,7 +246,7 @@ def _build_search_tags(self, item: MediaItem) -> str | None: Comma-separated tags string (e.g., "BluRay,ETRG") or None """ - tags = list[str]() + tags: list[str] = [] try: if not (media_entry := item.media_entry) or not media_entry.media_metadata: @@ -451,7 +417,7 @@ def _fetch_subtitle_for_language( return # Search for subtitles across all providers - all_results = list[SubtitleItem]() + all_results: list[SubtitleItem] = [] for provider in self.providers: try: @@ -614,7 +580,7 @@ def should_submit(self, item: MediaItem) -> bool: embedded_languages = self._get_embedded_subtitle_languages(item) # Get already downloaded subtitle languages from database - downloaded_languages = set[str]() + downloaded_languages: set[str] = set() try: with db_session() as session: @@ -631,7 +597,7 @@ def should_submit(self, item: MediaItem) -> bool: available_languages = embedded_languages | downloaded_languages # Check if any wanted language is missing - languages = self._parse_languages(language_codes=self.settings.languages) + languages = self.settings.languages missing_languages = set(languages) - available_languages diff --git a/src/program/settings/models.py b/src/program/settings/models.py index e2f81c2a4..d2a8e57f0 100644 --- a/src/program/settings/models.py +++ b/src/program/settings/models.py @@ -856,22 +856,39 @@ class NotificationsModel(Observable): ) -class SubtitleProviderConfig(Observable): - enabled: bool = Field(default=False, description="Enable this subtitle provider") +class OpenSubtitlesComConfig(Observable): + """OpenSubtitles.com REST API configuration.""" + + enabled: bool = Field(default=False, description="Enable OpenSubtitles.com provider") + api_key: str = Field(default="", description="API key from opensubtitles.com account") + username: str = Field( + default="", description="Username for authenticated downloads" + ) + password: str = Field( + default="", description="Password for authenticated downloads" + ) + query_params: dict[str, str] = Field( + default_factory=dict, + description="Additional query parameters for subtitle search (e.g., foreign_parts_only, hearing_impaired)", + ) + user_agent: str = Field( + default="Riven/1.0", + description="User-Agent header for API requests", + ) class SubtitleProvidersDict(Observable): - opensubtitles: SubtitleProviderConfig = Field( - default_factory=lambda: SubtitleProviderConfig(), - description="OpenSubtitles provider configuration", + opensubtitles_com: OpenSubtitlesComConfig = Field( + default_factory=lambda: OpenSubtitlesComConfig(), + description="OpenSubtitles.com REST API provider configuration", ) class SubtitleConfig(Observable): enabled: bool = Field(default=False, description="Enable subtitle downloading") languages: list[str] = Field( - default_factory=lambda: ["eng"], - description="Subtitle languages to download (ISO 639-2 codes)", + default_factory=lambda: ["en"], + description="Subtitle languages to download (e.g., en, zh-cn, fr)", ) providers: SubtitleProvidersDict = Field( default_factory=lambda: SubtitleProvidersDict(), diff --git a/src/tests/test_opensubtitles_com.py b/src/tests/test_opensubtitles_com.py new file mode 100644 index 000000000..19c5ec38f --- /dev/null +++ b/src/tests/test_opensubtitles_com.py @@ -0,0 +1,614 @@ +""" +Unit tests for OpenSubtitles.com REST API provider. + +Tests cover: +- Authentication flow (success, 401, 429, 5xx) +- Search strategies (hash, IMDB, filename) +- Download with rate limiting +- Token refresh behavior +""" + +import pytest +import httpx + +from program.settings.models import OpenSubtitlesComConfig +from program.services.post_processing.subtitles.providers.opensubtitles_com import ( + OpenSubtitlesComProvider, + OpenSubtitlesLoginResponse, + OpenSubtitlesSearchResult, +) +from program.services.post_processing.subtitles.providers.base import SubtitleItem + + +@pytest.fixture +def mock_config(): + """Create a test configuration.""" + return OpenSubtitlesComConfig( + enabled=True, + api_key="test_api_key_1234567890123456789012345", + username="testuser", + password="testpass", + ) + + +@pytest.fixture +def requests_mock(monkeypatch): + """Mock httpx client for testing HTTP requests.""" + import program.utils.request as request_mod + + routes: dict[tuple[str, str], dict] = {} + + def _add(method: str, url: str, cfg): + key = (method.upper(), url) + if isinstance(cfg, list): + routes[key] = {"queue": list(cfg), "sticky": None} + else: + routes[key] = {"queue": [], "sticky": cfg} + + def handler(request: httpx.Request) -> httpx.Response: + key = (request.method.upper(), str(request.url).split("?")[0]) + entry = routes.get(key) + if not entry: + # Try partial match for URLs with query params + for (method, url), cfg in routes.items(): + if method == request.method.upper() and str(request.url).startswith(url): + entry = cfg + break + if not entry: + return httpx.Response( + 404, + json={"detail": f"Not mocked: {request.method} {request.url}"}, + headers={"Content-Type": "application/json"}, + ) + if entry["queue"]: + cfg = entry["queue"].pop(0) + else: + cfg = entry["sticky"] + if cfg is None: + return httpx.Response( + 404, + json={"detail": "Not mocked"}, + headers={"Content-Type": "application/json"}, + ) + status_code = cfg.get("status_code", 200) + headers = dict(cfg.get("headers", {})) + if "json" in cfg: + headers.setdefault("Content-Type", "application/json") + return httpx.Response(status_code, headers=headers, json=cfg["json"]) + content = cfg.get("content", b"") + return httpx.Response(status_code, headers=headers, content=content) + + transport = httpx.MockTransport(handler) + RealClient = httpx.Client + + class _FakeClient: + def __init__(self, *args, **kwargs): + self._client = RealClient(transport=transport) + self.timeout = httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0) + + def request(self, *args, **kwargs): + return self._client.request(*args, **kwargs) + + def build_request(self, *args, **kwargs): + return self._client.build_request(*args, **kwargs) + + def send(self, *args, **kwargs): + kwargs.pop("timeout", None) + return self._client.send(*args, **kwargs) + + def close(self): + self._client.close() + + monkeypatch.setattr(request_mod.httpx, "Client", _FakeClient, raising=True) + + class _Mock: + def register(self, method: str, url: str, cfg): + _add(method, url, cfg) + + def get(self, url: str, cfg=None, **kwargs): + if cfg is None and kwargs: + cfg = kwargs + _add("GET", url, cfg) + + def post(self, url: str, cfg=None, **kwargs): + if cfg is None and kwargs: + cfg = kwargs + _add("POST", url, cfg) + + return _Mock() + + +class TestOpenSubtitlesLoginResponse: + """Test Pydantic response model validation.""" + + def test_valid_token(self): + """Valid token should parse correctly.""" + response = OpenSubtitlesLoginResponse.model_validate({"token": "valid_jwt_token"}) + assert response.token == "valid_jwt_token" + + def test_empty_token_raises(self): + """Empty token should raise validation error.""" + with pytest.raises(Exception): + OpenSubtitlesLoginResponse.model_validate({"token": ""}) + + def test_whitespace_token_raises(self): + """Whitespace-only token should raise validation error.""" + with pytest.raises(Exception): + OpenSubtitlesLoginResponse.model_validate({"token": " "}) + + def test_token_is_stripped(self): + """Token with whitespace should be stripped.""" + response = OpenSubtitlesLoginResponse.model_validate({"token": " token_value "}) + assert response.token == "token_value" + + +class TestOpenSubtitlesSearchResult: + """Test search result model.""" + + def test_basic_result(self): + """Basic result should parse correctly.""" + data = { + "id": "12345", + "attributes": { + "language": "en", + "download_count": 1000, + "ratings": 8.5, + "moviehash_match": True, + "files": [{"file_id": "67890", "file_name": "Movie.2024.srt"}], + }, + } + result = OpenSubtitlesSearchResult.model_validate(data) + assert result.subtitle_id == "67890" + assert result.language == "en" + assert result.filename == "Movie.2024.srt" + assert result.download_count == 1000 + assert result.rating == 8.5 + assert result.moviehash_match is True + + def test_missing_files(self): + """Result without files should fallback to id.""" + data = { + "id": "12345", + "attributes": {"language": "en"}, + } + result = OpenSubtitlesSearchResult.model_validate(data) + assert result.subtitle_id == "12345" + assert result.filename == "" + + +class TestOpenSubtitlesComProvider: + """Test the provider implementation.""" + + def test_provider_name(self, mock_config, requests_mock): + """Provider should return correct name.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + assert provider.name == "opensubtitles_com" + + def test_login_success(self, mock_config, requests_mock): + """Successful login should store token.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "jwt_token_12345"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + assert provider._login() is True + assert provider.token == "jwt_token_12345" + + def test_login_unauthorized(self, mock_config, requests_mock): + """401 response should return False.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + status_code=401, + json={"error": "Invalid credentials"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + assert provider._login() is False + assert provider.token is None + + def test_login_rate_limited(self, mock_config, requests_mock): + """429 response should return False.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + status_code=429, + json={"error": "Too many requests"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + assert provider._login() is False + + def test_login_server_error(self, mock_config, requests_mock): + """5xx response should return False.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + status_code=500, + json={"error": "Internal server error"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + assert provider._login() is False + + def test_search_by_hash(self, mock_config, requests_mock): + """Search by hash should return results.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.get( + "https://api.opensubtitles.com/api/v1/subtitles", + json={ + "data": [ + { + "id": "123", + "attributes": { + "language": "en", + "download_count": 500, + "ratings": 7.5, + "moviehash_match": True, + "files": [{"file_id": "456", "file_name": "Test.srt"}], + }, + } + ] + }, + ) + + provider = OpenSubtitlesComProvider(mock_config) + results = provider.search_subtitles( + imdb_id="tt1234567", + video_hash="abc123def456", + file_size=1000000, + language="en", + ) + + assert len(results) == 1 + assert results[0].id == "456" + assert results[0].matched_by == "hash" + assert results[0].provider == "opensubtitles_com" + + def test_search_by_imdb(self, mock_config, requests_mock): + """Search by IMDB ID should work when hash fails.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + # First search (hash) returns empty + # Second search (imdb) returns results + requests_mock.get( + "https://api.opensubtitles.com/api/v1/subtitles", + [ + {"json": {"data": []}}, # hash search - empty + { + "json": { + "data": [ + { + "id": "789", + "attributes": { + "language": "en", + "download_count": 100, + "ratings": 6.0, + "files": [{"file_id": "999", "file_name": "IMDB.srt"}], + }, + } + ] + } + }, + ], + ) + + provider = OpenSubtitlesComProvider(mock_config) + results = provider.search_subtitles( + imdb_id="tt1234567", + video_hash="abc123", + file_size=1000000, + language="en", + ) + + assert len(results) == 1 + assert results[0].matched_by == "imdb" + + def test_search_strips_tt_prefix(self, mock_config, requests_mock): + """IMDB ID 'tt' prefix should be stripped.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.get( + "https://api.opensubtitles.com/api/v1/subtitles", + json={"data": []}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + # Just verify it doesn't crash - the actual param check would need deeper mocking + results = provider.search_subtitles(imdb_id="tt1234567", language="en") + assert results == [] + + def test_imdb_id_stripping_preserves_leading_zeros(self, mock_config, requests_mock): + """IMDB ID stripping should use slice, not lstrip (which removes all 't' chars).""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + + # Test that slice-based stripping works correctly + # lstrip("tt") would incorrectly turn "ttt0123456" into "0123456" (3 t's removed) + # slice [2:] correctly returns "t0123456" + strategies = provider._build_search_strategies( + video_hash=None, + file_size=None, + imdb_id="ttt0123456", # Edge case: triple t + filename=None, + season=None, + episode=None, + lang_code="en", + ) + + # Should have one IMDB strategy + imdb_strategy = next((s for s in strategies if s["name"] == "imdb"), None) + assert imdb_strategy is not None + # With slice [2:], "ttt0123456" becomes "t0123456" (correct) + # With lstrip("tt"), it would become "0123456" (wrong - removes all leading t's) + assert imdb_strategy["params"]["imdb_id"] == "t0123456" + + def test_query_params_added_to_strategies(self, requests_mock): + """User-configured query_params should be added to all search strategies.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + + config = OpenSubtitlesComConfig( + enabled=True, + api_key="test_api_key_1234567890123456789012345", + username="testuser", + password="testpass", + query_params={"foreign_parts_only": "include", "hearing_impaired": "exclude"}, + ) + provider = OpenSubtitlesComProvider(config) + + strategies = provider._build_search_strategies( + video_hash="abc123", + file_size=None, + imdb_id="tt1234567", + filename="Movie.2024.mkv", + season=None, + episode=None, + lang_code="en", + ) + + # All strategies should have the query_params + for strategy in strategies: + assert strategy["params"]["foreign_parts_only"] == "include" + assert strategy["params"]["hearing_impaired"] == "exclude" + + def test_custom_user_agent(self, requests_mock): + """User-configured user_agent should be used in API requests.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + + config = OpenSubtitlesComConfig( + enabled=True, + api_key="test_api_key_1234567890123456789012345", + username="testuser", + password="testpass", + user_agent="MyApp/2.0", + ) + provider = OpenSubtitlesComProvider(config) + + headers = provider._headers() + assert headers["User-Agent"] == "MyApp/2.0" + + def test_search_rate_limited(self, mock_config, requests_mock): + """429 during search should return empty list after retries.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.get( + "https://api.opensubtitles.com/api/v1/subtitles", + status_code=429, + headers={"Retry-After": "1"}, # Short delay for testing + json={"error": "Rate limited"}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + results = provider.search_subtitles(imdb_id="tt1234567", language="en") + assert results == [] + + def test_download_success(self, mock_config, requests_mock): + """Successful download should return subtitle content.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.post( + "https://api.opensubtitles.com/api/v1/download", + json={"link": "https://dl.opensubtitles.com/file.srt"}, + ) + requests_mock.get( + "https://dl.opensubtitles.com/file.srt", + content=b"1\n00:00:01,000 --> 00:00:02,000\nHello World\n", + ) + + provider = OpenSubtitlesComProvider(mock_config) + subtitle_info = SubtitleItem( + id="12345", + language="en", + filename="test.srt", + download_count=100, + rating=8.0, + matched_by="hash", + movie_hash=None, + movie_name=None, + provider="opensubtitles_com", + score=100, + ) + + content = provider.download_subtitle(subtitle_info) + assert content is not None + assert "Hello World" in content + + def test_download_rate_limited(self, mock_config, requests_mock): + """429 during download should return None.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.post( + "https://api.opensubtitles.com/api/v1/download", + status_code=429, + json={"remaining": 0}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + subtitle_info = SubtitleItem( + id="12345", + language="en", + filename="test.srt", + download_count=100, + rating=8.0, + matched_by="hash", + movie_hash=None, + movie_name=None, + provider="opensubtitles_com", + score=100, + ) + + content = provider.download_subtitle(subtitle_info) + assert content is None + + def test_download_rejects_unauthorized_domain(self, mock_config, requests_mock): + """Downloads from unauthorized domains should be rejected (SSRF prevention).""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.post( + "https://api.opensubtitles.com/api/v1/download", + json={"link": "http://malicious-server.com/steal-data"}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + subtitle_info = SubtitleItem( + id="12345", + language="en", + filename="test.srt", + download_count=100, + rating=8.0, + matched_by="hash", + movie_hash=None, + movie_name=None, + provider="opensubtitles_com", + score=100, + ) + + content = provider.download_subtitle(subtitle_info) + assert content is None + + def test_download_rejects_http_scheme(self, mock_config, requests_mock): + """Downloads using HTTP (non-HTTPS) should be rejected.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.post( + "https://api.opensubtitles.com/api/v1/download", + json={"link": "http://dl.opensubtitles.com/file.srt"}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + subtitle_info = SubtitleItem( + id="12345", + language="en", + filename="test.srt", + download_count=100, + rating=8.0, + matched_by="hash", + movie_hash=None, + movie_name=None, + provider="opensubtitles_com", + score=100, + ) + + content = provider.download_subtitle(subtitle_info) + assert content is None + + def test_download_rejects_internal_address(self, mock_config, requests_mock): + """Downloads from internal addresses should be rejected (SSRF prevention).""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + requests_mock.post( + "https://api.opensubtitles.com/api/v1/download", + json={"link": "https://localhost:8080/admin"}, + ) + + provider = OpenSubtitlesComProvider(mock_config) + subtitle_info = SubtitleItem( + id="12345", + language="en", + filename="test.srt", + download_count=100, + rating=8.0, + matched_by="hash", + movie_hash=None, + movie_name=None, + provider="opensubtitles_com", + score=100, + ) + + content = provider.download_subtitle(subtitle_info) + assert content is None + + def test_decode_utf8(self, mock_config, requests_mock): + """UTF-8 content should decode correctly.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + + content = provider._decode_content("Hello World".encode("utf-8")) + assert content == "Hello World" + + def test_score_calculation(self, mock_config, requests_mock): + """Results should be scored and sorted correctly.""" + requests_mock.post( + "https://api.opensubtitles.com/api/v1/login", + json={"token": "test_token"}, + ) + provider = OpenSubtitlesComProvider(mock_config) + + results = [ + { + "id": "1", + "attributes": { + "language": "en", + "download_count": 100, + "ratings": 5.0, + "files": [{"file_id": "1", "file_name": "low.srt"}], + }, + }, + { + "id": "2", + "attributes": { + "language": "en", + "download_count": 10000, + "ratings": 9.0, + "moviehash_match": True, + "files": [{"file_id": "2", "file_name": "high.srt"}], + }, + }, + ] + + scored = provider._score_results(results, "hash") + + # Second result should be first (higher score due to hash match + downloads) + assert len(scored) == 2 + assert scored[0].id == "2" + assert scored[0].score > scored[1].score From 75767138db14b33dea12d3895f52c3b29f7275bb Mon Sep 17 00:00:00 2001 From: Sam Xie Date: Wed, 4 Feb 2026 16:36:27 -0800 Subject: [PATCH 2/2] Update documents --- README.md | 56 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9d4fee0db..ac9830bc1 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,13 @@ Services currently supported: -| Type | Supported | -| ----------------- | --------------------------------------------------------------------------------- | -| Debrid services | Real Debrid, All Debrid | -| Content services | Plex Watchlist, Overseerr, Mdblist, Listrr, Trakt | -| Scraping services | Comet, Jackett, Torrentio, Orionoid, Mediafusion, Prowlarr, Zilean, Rarbg | -| Media servers | Plex, Jellyfin, Emby | +| Type | Supported | +| ------------------ | --------------------------------------------------------------------------------- | +| Debrid services | Real Debrid, All Debrid | +| Content services | Plex Watchlist, Overseerr, Mdblist, Listrr, Trakt | +| Scraping services | Comet, Jackett, Torrentio, Orionoid, Mediafusion, Prowlarr, Zilean, Rarbg | +| Subtitle services | OpenSubtitles.com | +| Media servers | Plex, Jellyfin, Emby | and more to come! @@ -44,6 +45,7 @@ We are constantly adding features and improvements as we go along and squashing - [Installation](#installation) - [Plex](#plex) - [RivenVFS and Caching](#rivenvfs-and-caching) +- [Subtitles](#subtitles) - [Contributing](#contributing) - [License](#license) @@ -329,6 +331,48 @@ The range format (e.g., `e01-05` or `E1-5`) is automatically applied based on yo - File extensions are added automatically - All names are sanitized for filesystem compatibility +## Subtitles + +Riven can automatically download subtitles for your media using the OpenSubtitles.com REST API. + +### OpenSubtitles.com Setup + +1. Create a free account at [opensubtitles.com](https://www.opensubtitles.com) +2. Get your API key from your account settings (API consumers section) + - This requirement will be removed once we obtained a public API key. +3. Configure in `settings.json`: + +```json +{ + "subtitle": { + "enabled": true, + "languages": ["en", "zh-cn", "fr"], + "providers": { + "opensubtitles_com": { + "enabled": true, + "api_key": "your-api-key", + "username": "your-username", + "password": "your-password" + } + } + } +} +``` + +**Configuration Options**: + +| Setting | Description | +|---------|-------------| +| `subtitle.enabled` | Enable subtitle downloading | +| `subtitle.languages` | Languages to download (ISO 639-1 codes with locale variants, e.g., `en`, `zh-cn`, `pt-br`) | +| `subtitle.providers.opensubtitles_com.enabled` | Enable OpenSubtitles.com provider | +| `subtitle.providers.opensubtitles_com.api_key` | Your OpenSubtitles.com API key | +| `subtitle.providers.opensubtitles_com.username` | Your OpenSubtitles.com username | +| `subtitle.providers.opensubtitles_com.password` | Your OpenSubtitles.com password | +| `subtitle.providers.opensubtitles_com.query_params` | Additional search parameters (e.g., `{"foreign_parts_only": "include"}`) | +| `subtitle.providers.opensubtitles_com.user_agent` | Custom User-Agent header (default: `Riven/1.0`) | + +The provider uses JWT authentication with automatic token refresh and multi-strategy search (file hash → IMDB ID → filename fallback) for best subtitle matching. ## Contributing