From 0273d5832c246a68225c697e3316a067f0fed50b Mon Sep 17 00:00:00 2001 From: nendo Date: Tue, 24 Mar 2026 05:56:15 +0900 Subject: [PATCH 1/7] Support Range requests for multi-file ROM downloads Cache ZIPs to disk on Range requests so nginx can serve 206 natively. Non-Range requests still stream via mod_zip as before. Refs: rommapp/romm#3160 --- backend/config/__init__.py | 1 + backend/endpoints/roms/__init__.py | 81 ++++++++++- backend/endpoints/tasks.py | 8 ++ backend/startup.py | 2 + backend/tasks/scheduled/cleanup_zip_cache.py | 28 ++++ backend/utils/m3u.py | 22 +++ backend/utils/zip_cache.py | 144 +++++++++++++++++++ docker/nginx/templates/default.conf.template | 6 + 8 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 backend/tasks/scheduled/cleanup_zip_cache.py create mode 100644 backend/utils/m3u.py create mode 100644 backend/utils/zip_cache.py diff --git a/backend/config/__init__.py b/backend/config/__init__.py index c1dd10a882..ba62d3a9bd 100644 --- a/backend/config/__init__.py +++ b/backend/config/__init__.py @@ -36,6 +36,7 @@ def _get_env(var: str, fallback: str | None = None) -> str | None: LIBRARY_BASE_PATH: Final[str] = f"{ROMM_BASE_PATH}/library" RESOURCES_BASE_PATH: Final[str] = f"{ROMM_BASE_PATH}/resources" ASSETS_BASE_PATH: Final[str] = f"{ROMM_BASE_PATH}/assets" +ZIP_CACHE_PATH: Final[str] = f"{ROMM_BASE_PATH}/cache/zips" FRONTEND_RESOURCES_PATH: Final[str] = "/assets/romm/resources" # SEVEN ZIP diff --git a/backend/endpoints/roms/__init__.py b/backend/endpoints/roms/__init__.py index 8178eb9207..a6cdfb640b 100644 --- a/backend/endpoints/roms/__init__.py +++ b/backend/endpoints/roms/__init__.py @@ -8,6 +8,7 @@ from urllib.parse import quote from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile, ZipInfo +import anyio import pydash from anyio import Path, open_file from fastapi import ( @@ -65,9 +66,18 @@ from utils.database import safe_int, safe_str_to_bool from utils.filesystem import sanitize_filename from utils.hashing import crc32_to_hex +from utils.m3u import generate_m3u_content from utils.nginx import FileRedirectResponse, ZipContentLine, ZipResponse from utils.router import APIRouter from utils.validation import ValidationError +from utils.zip_cache import ( + ZipFileEntry, + build_cached_zip, + get_cache_key, + get_cached_zip, + get_zip_redirect_path, + has_space_for_cache, +) from .files import router as files_router from .manual import router as manual_router @@ -890,6 +900,28 @@ async def head_rom_content( download_path=Path(f"/library/{files[0].full_path}"), ) + hidden_folder = safe_str_to_bool(request.query_params.get("hidden_folder", "")) + entries = [ + ZipFileEntry( + download_name=f.file_name_for_download(hidden_folder), + full_path=f.full_path, + file_size_bytes=f.file_size_bytes, + updated_at_epoch=f.updated_at.timestamp(), + ) + for f in files + ] + cache_key = get_cache_key(rom.id, entries, hidden_folder) + zip_path = get_cached_zip(rom.id, cache_key) + if zip_path: + return Response( + headers={ + "Content-Type": "application/zip", + "Content-Length": str(zip_path.stat().st_size), + "Accept-Ranges": "bytes", + "Content-Disposition": f"attachment; filename*=UTF-8''{quote(file_name)}.zip; filename=\"{quote(file_name)}.zip\"", + }, + ) + return Response( media_type="application/zip", headers={ @@ -1028,6 +1060,51 @@ async def build_zip_in_memory() -> bytes: download_path=Path(f"/library/{files[0].full_path}"), ) + # Multi-file path: serve cached ZIP for Range requests (resumable), + # fall through to mod_zip streaming for non-Range requests. + range_header = request.headers.get("range") + if range_header: + entries = [ + ZipFileEntry( + download_name=f.file_name_for_download(hidden_folder), + full_path=f.full_path, + file_size_bytes=f.file_size_bytes, + updated_at_epoch=f.updated_at.timestamp(), + ) + for f in files + ] + cache_key = get_cache_key(rom.id, entries, hidden_folder) + zip_path = get_cached_zip(rom.id, cache_key) + if zip_path: + return FileRedirectResponse( + download_path=get_zip_redirect_path(rom.id, cache_key), + filename=f"{file_name}.zip", + ) + if has_space_for_cache(entries): + m3u_content = ( + None + if rom.has_m3u_file() + else generate_m3u_content(files, hidden_folder) + ) + m3u_filename = None if rom.has_m3u_file() else f"{file_name}.m3u" + await anyio.to_thread.run_sync( + lambda: build_cached_zip( + rom_id=rom.id, + entries=entries, + m3u_content=m3u_content, + m3u_filename=m3u_filename, + cache_key=cache_key, + ) + ) + return FileRedirectResponse( + download_path=get_zip_redirect_path(rom.id, cache_key), + filename=f"{file_name}.zip", + ) + log.warning( + f"Insufficient disk space to cache ZIP for ROM {rom.id}, " + "falling back to streaming" + ) + content_lines = [ ZipContentLine( crc32=None, # The CRC hash stored for compressed files is for the uncompressed content @@ -1039,9 +1116,7 @@ async def build_zip_in_memory() -> bytes: ] if not rom.has_m3u_file(): - m3u_encoded_content = "\n".join( - [f.file_name_for_download(hidden_folder) for f in m3u_files] - ).encode() + m3u_encoded_content = generate_m3u_content(files, hidden_folder) m3u_base64_content = b64encode(m3u_encoded_content).decode() m3u_line = ZipContentLine( crc32=crc32_to_hex(binascii.crc32(m3u_encoded_content)), diff --git a/backend/endpoints/tasks.py b/backend/endpoints/tasks.py index 5b2c507895..99aa83c411 100644 --- a/backend/endpoints/tasks.py +++ b/backend/endpoints/tasks.py @@ -34,6 +34,7 @@ ) from tasks.manual.cleanup_missing_roms import cleanup_missing_roms_task from tasks.manual.cleanup_orphaned_resources import cleanup_orphaned_resources_task +from tasks.scheduled.cleanup_zip_cache import cleanup_zip_cache_task from tasks.scheduled.convert_images_to_webp import convert_images_to_webp_task from tasks.scheduled.scan_library import scan_library_task from tasks.scheduled.update_launchbox_metadata import update_launchbox_metadata_task @@ -89,6 +90,13 @@ class ManualTask(ScheduledTask): "task": convert_images_to_webp_task, } ), + ScheduledTask( + { + "name": "cleanup_zip_cache", + "type": TaskType.CLEANUP, + "task": cleanup_zip_cache_task, + } + ), ] manual_tasks: list[ManualTask] = [ diff --git a/backend/startup.py b/backend/startup.py index e366e195ef..d874277d25 100644 --- a/backend/startup.py +++ b/backend/startup.py @@ -26,6 +26,7 @@ from logger.logger import log from models.firmware import FIRMWARE_FIXTURES_DIR, KNOWN_BIOS_KEY from tasks.scheduled.cleanup_netplay import cleanup_netplay_task +from tasks.scheduled.cleanup_zip_cache import cleanup_zip_cache_task from tasks.scheduled.convert_images_to_webp import convert_images_to_webp_task from tasks.scheduled.scan_library import scan_library_task from tasks.scheduled.sync_retroachievements_progress import ( @@ -49,6 +50,7 @@ async def main() -> None: # Initialize scheduled tasks cleanup_netplay_task.init() + cleanup_zip_cache_task.init() if ENABLE_SCHEDULED_RESCAN: log.info("Starting scheduled rescan") diff --git a/backend/tasks/scheduled/cleanup_zip_cache.py b/backend/tasks/scheduled/cleanup_zip_cache.py new file mode 100644 index 0000000000..a197b86bc7 --- /dev/null +++ b/backend/tasks/scheduled/cleanup_zip_cache.py @@ -0,0 +1,28 @@ +from logger.logger import log +from tasks.tasks import PeriodicTask, TaskType +from utils.zip_cache import cleanup_stale_zips + + +class CleanupZipCacheTask(PeriodicTask): + def __init__(self): + super().__init__( + title="Scheduled ZIP cache cleanup", + description="Removes cached ZIP files older than 48 hours", + task_type=TaskType.CLEANUP, + enabled=True, + manual_run=False, + cron_string="0 4 * * *", # Daily at 4 AM + func="tasks.scheduled.cleanup_zip_cache.cleanup_zip_cache_task.run", + ) + + async def run(self) -> None: + if not self.enabled: + self.unschedule() + return + + deleted = cleanup_stale_zips(max_age_hours=48) + if deleted: + log.info(f"Cleaned up {deleted} stale cached ZIP files") + + +cleanup_zip_cache_task = CleanupZipCacheTask() diff --git a/backend/utils/m3u.py b/backend/utils/m3u.py new file mode 100644 index 0000000000..b8f5275b71 --- /dev/null +++ b/backend/utils/m3u.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from models.rom import RomFile + + +def generate_m3u_content( + files: list[RomFile], + hidden_folder: bool, +) -> bytes: + """Generate M3U playlist content for multi-file ROMs. + + If .cue files are present, only those are listed (avoids invalid entries + like raw .bin tracks). Otherwise all files are listed. + """ + cue_files = [f for f in files if f.file_extension.lower() == "cue"] + m3u_files = cue_files if cue_files else files + return "\n".join( + f.file_name_for_download(hidden_folder) for f in m3u_files + ).encode() diff --git a/backend/utils/zip_cache.py b/backend/utils/zip_cache.py new file mode 100644 index 0000000000..ead730ea8f --- /dev/null +++ b/backend/utils/zip_cache.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import dataclasses +import hashlib +import os +import tempfile +from datetime import datetime +from pathlib import Path +from stat import S_IFREG +from zipfile import ZIP_STORED, ZipFile, ZipInfo + +from config import LIBRARY_BASE_PATH, ZIP_CACHE_PATH +from logger.logger import log + + +@dataclasses.dataclass(frozen=True) +class ZipFileEntry: + """Thread-safe snapshot of a RomFile's download-relevant data.""" + + download_name: str + full_path: str + file_size_bytes: int + updated_at_epoch: float + + +def get_cache_key( + rom_id: int, + entries: list[ZipFileEntry], + hidden_folder: bool, +) -> str: + """Deterministic cache key derived from ROM state.""" + parts = [ + str(rom_id), + str(hidden_folder), + str(max(e.updated_at_epoch for e in entries)), + ] + for e in sorted(entries, key=lambda x: x.download_name): + parts.append(f"{e.download_name}:{e.file_size_bytes}") + digest = hashlib.sha256("|".join(parts).encode()).hexdigest()[:16] + return digest + + +def _cache_dir(rom_id: int) -> Path: + return Path(ZIP_CACHE_PATH) / str(rom_id) + + +def _cache_file(rom_id: int, cache_key: str) -> Path: + return _cache_dir(rom_id) / f"{cache_key}.zip" + + +def get_cached_zip(rom_id: int, cache_key: str) -> Path | None: + """Return the cached ZIP path if it exists on disk, else None.""" + path = _cache_file(rom_id, cache_key) + return path if path.exists() else None + + +def has_space_for_cache(entries: list[ZipFileEntry]) -> bool: + """Check if there is enough disk space to build the cached ZIP. + + Requires 2x the estimated ZIP size as a safety buffer to avoid filling + the disk during the build. + """ + estimated_size = sum(e.file_size_bytes for e in entries) + cache_root = Path(ZIP_CACHE_PATH) + cache_root.mkdir(parents=True, exist_ok=True) + stat = os.statvfs(cache_root) + available = stat.f_bavail * stat.f_frsize + return available > estimated_size * 2 + + +def build_cached_zip( + rom_id: int, + entries: list[ZipFileEntry], + m3u_content: bytes | None, + m3u_filename: str | None, + cache_key: str, +) -> Path: + """Build a ZIP_STORED archive on disk and return its path. + + Writes to a temp file in the same directory, then atomically renames to + the final path to prevent serving partial files. + """ + target = _cache_file(rom_id, cache_key) + if target.exists(): + return target + + target.parent.mkdir(parents=True, exist_ok=True) + now = datetime.now().timetuple()[:6] + + fd, tmp_path = tempfile.mkstemp(dir=target.parent, suffix=".tmp") + try: + os.close(fd) + with ZipFile(tmp_path, "w") as zf: + for entry in entries: + src = Path(LIBRARY_BASE_PATH) / entry.full_path + info = ZipInfo(filename=entry.download_name, date_time=now) + info.external_attr = S_IFREG | 0o600 + info.compress_type = ZIP_STORED + with open(src, "rb") as f: + zf.writestr(info, f.read()) + + if m3u_content is not None and m3u_filename is not None: + m3u_info = ZipInfo(filename=m3u_filename, date_time=now) + m3u_info.external_attr = S_IFREG | 0o600 + m3u_info.compress_type = ZIP_STORED + zf.writestr(m3u_info, m3u_content) + + os.rename(tmp_path, target) + except BaseException: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise + + log.info(f"Built cached ZIP for ROM {rom_id}: {target.name}") + return target + + +def get_zip_redirect_path(rom_id: int, cache_key: str) -> Path: + """Return the nginx-internal URL path for the cached ZIP.""" + return Path(f"/cache/zips/{rom_id}/{cache_key}.zip") + + +def cleanup_stale_zips(max_age_hours: int = 48) -> int: + """Remove cached ZIPs older than max_age_hours. Returns count deleted.""" + cache_root = Path(ZIP_CACHE_PATH) + if not cache_root.exists(): + return 0 + + import time + + cutoff = time.time() - (max_age_hours * 3600) + deleted = 0 + + for rom_dir in cache_root.iterdir(): + if not rom_dir.is_dir(): + continue + for zip_file in rom_dir.glob("*.zip"): + if zip_file.stat().st_mtime < cutoff: + zip_file.unlink() + deleted += 1 + if rom_dir.exists() and not any(rom_dir.iterdir()): + rom_dir.rmdir() + + return deleted diff --git a/docker/nginx/templates/default.conf.template b/docker/nginx/templates/default.conf.template index 80b58e785d..477e8d84f4 100644 --- a/docker/nginx/templates/default.conf.template +++ b/docker/nginx/templates/default.conf.template @@ -71,6 +71,12 @@ server { alias "${ROMM_BASE_PATH}/library/"; } + # Internally redirect cached zip file requests (Range-resumable downloads) + location /cache/ { + internal; + alias "${ROMM_BASE_PATH}/cache/"; + } + # Internal decoding endpoint, used to decode base64 encoded data location /decode { internal; From 476f3ef6d6212a8d1f6fccfd7512d4d30b271936 Mon Sep 17 00:00:00 2001 From: nendo Date: Tue, 24 Mar 2026 06:13:45 +0900 Subject: [PATCH 2/7] Add tests, fix streaming write and code standards - Use ZipFile.write() instead of reading entire files into memory - Extract magic numbers to module constants - Use hl() for log message highlighting - Tests for zip_cache, m3u, and cleanup task using mocker fixture --- backend/tests/tasks/test_cleanup_zip_cache.py | 30 ++ backend/tests/utils/test_m3u.py | 59 ++++ backend/tests/utils/test_zip_cache.py | 291 ++++++++++++++++++ backend/utils/zip_cache.py | 45 ++- 4 files changed, 398 insertions(+), 27 deletions(-) create mode 100644 backend/tests/tasks/test_cleanup_zip_cache.py create mode 100644 backend/tests/utils/test_m3u.py create mode 100644 backend/tests/utils/test_zip_cache.py diff --git a/backend/tests/tasks/test_cleanup_zip_cache.py b/backend/tests/tasks/test_cleanup_zip_cache.py new file mode 100644 index 0000000000..6ea0edd5ad --- /dev/null +++ b/backend/tests/tasks/test_cleanup_zip_cache.py @@ -0,0 +1,30 @@ +from unittest.mock import AsyncMock + +from tasks.scheduled.cleanup_zip_cache import CleanupZipCacheTask + + +class TestCleanupZipCacheTask: + def test_configuration(self): + task = CleanupZipCacheTask() + assert task.enabled is True + assert task.cron_string == "0 4 * * *" + assert "cleanup_zip_cache" in task.func + + async def test_run_calls_cleanup(self, mocker): + task = CleanupZipCacheTask() + mock_cleanup = mocker.patch( + "tasks.scheduled.cleanup_zip_cache.cleanup_stale_zips", + return_value=3, + ) + await task.run() + mock_cleanup.assert_called_once_with(max_age_hours=48) + + async def test_run_disabled_unschedules(self, mocker): + task = CleanupZipCacheTask() + task.enabled = False + task.unschedule = AsyncMock() + mock_cleanup = mocker.patch( + "tasks.scheduled.cleanup_zip_cache.cleanup_stale_zips", + ) + await task.run() + mock_cleanup.assert_not_called() diff --git a/backend/tests/utils/test_m3u.py b/backend/tests/utils/test_m3u.py new file mode 100644 index 0000000000..9abe4454e7 --- /dev/null +++ b/backend/tests/utils/test_m3u.py @@ -0,0 +1,59 @@ +from unittest.mock import MagicMock + +from utils.m3u import generate_m3u_content + + +def _make_file(name: str, extension: str, download_name: str | None = None): + f = MagicMock() + f.file_extension = extension + f.file_name_for_download.return_value = download_name or name + return f + + +class TestGenerateM3uContent: + def test_single_file(self): + files = [_make_file("game.bin", "bin")] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"game.bin" + files[0].file_name_for_download.assert_called_once_with(False) + + def test_multiple_files(self): + files = [ + _make_file("disc1.chd", "chd"), + _make_file("disc2.chd", "chd"), + _make_file("disc3.chd", "chd"), + ] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"disc1.chd\ndisc2.chd\ndisc3.chd" + + def test_cue_files_preferred_over_bin(self): + files = [ + _make_file("track01.bin", "bin"), + _make_file("track02.bin", "bin"), + _make_file("game.cue", "cue"), + ] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"game.cue" + + def test_cue_case_insensitive(self): + files = [ + _make_file("track.bin", "bin"), + _make_file("game.CUE", "CUE", download_name="game.CUE"), + ] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"game.CUE" + + def test_hidden_folder_passed_through(self): + files = [_make_file("game.chd", "chd", download_name=".hidden/game.chd")] + result = generate_m3u_content(files, hidden_folder=True) + assert result == b".hidden/game.chd" + files[0].file_name_for_download.assert_called_once_with(True) + + def test_no_cue_files_lists_all(self): + files = [ + _make_file("disc1.chd", "chd"), + _make_file("disc2.chd", "chd"), + ] + result = generate_m3u_content(files, hidden_folder=False) + lines = result.decode().split("\n") + assert len(lines) == 2 diff --git a/backend/tests/utils/test_zip_cache.py b/backend/tests/utils/test_zip_cache.py new file mode 100644 index 0000000000..015b6323e9 --- /dev/null +++ b/backend/tests/utils/test_zip_cache.py @@ -0,0 +1,291 @@ +import os +import time +from zipfile import ZipFile + +import pytest + +from utils.zip_cache import ( + ZipFileEntry, + build_cached_zip, + cleanup_stale_zips, + get_cache_key, + get_cached_zip, + get_zip_redirect_path, + has_space_for_cache, +) + + +def _entry( + name: str = "game.bin", size: int = 1024, epoch: float = 1000.0 +) -> ZipFileEntry: + return ZipFileEntry( + download_name=name, + full_path=f"roms/nes/{name}", + file_size_bytes=size, + updated_at_epoch=epoch, + ) + + +class TestGetCacheKey: + def test_deterministic(self): + entries = [_entry("a.bin", 100), _entry("b.bin", 200)] + key1 = get_cache_key(1, entries, False) + key2 = get_cache_key(1, entries, False) + assert key1 == key2 + + def test_different_rom_ids(self): + entries = [_entry()] + assert get_cache_key(1, entries, False) != get_cache_key(2, entries, False) + + def test_different_hidden_folder(self): + entries = [_entry()] + assert get_cache_key(1, entries, False) != get_cache_key(1, entries, True) + + def test_different_files(self): + e1 = [_entry("a.bin")] + e2 = [_entry("b.bin")] + assert get_cache_key(1, e1, False) != get_cache_key(1, e2, False) + + def test_different_updated_at(self): + e1 = [_entry(epoch=1000.0)] + e2 = [_entry(epoch=2000.0)] + assert get_cache_key(1, e1, False) != get_cache_key(1, e2, False) + + def test_order_independent(self): + entries_a = [_entry("a.bin", 100), _entry("b.bin", 200)] + entries_b = [_entry("b.bin", 200), _entry("a.bin", 100)] + assert get_cache_key(1, entries_a, False) == get_cache_key(1, entries_b, False) + + def test_returns_hex_string(self): + key = get_cache_key(1, [_entry()], False) + assert len(key) == 16 + int(key, 16) # should not raise + + def test_file_change_invalidates_cache(self): + entries_v1 = [ + _entry("disc1.chd", 500, epoch=1000.0), + _entry("disc2.chd", 600, epoch=1000.0), + ] + entries_v2 = [ + _entry("disc1.chd", 500, epoch=1000.0), + _entry("disc2.chd", 600, epoch=2000.0), + ] + assert get_cache_key(1, entries_v1, False) != get_cache_key( + 1, entries_v2, False + ) + + def test_file_size_change_invalidates_cache(self): + e1 = [_entry("game.bin", size=1000, epoch=1000.0)] + e2 = [_entry("game.bin", size=2000, epoch=1000.0)] + assert get_cache_key(1, e1, False) != get_cache_key(1, e2, False) + + +class TestGetCachedZip: + def test_returns_none_when_missing(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert get_cached_zip(1, "abc123") is None + + def test_returns_path_when_exists(self, tmp_path, mocker): + rom_dir = tmp_path / "1" + rom_dir.mkdir() + (rom_dir / "abc123.zip").write_bytes(b"fake") + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + result = get_cached_zip(1, "abc123") + assert result is not None + assert result.name == "abc123.zip" + + def test_old_key_not_returned_for_new_key(self, tmp_path, mocker): + rom_dir = tmp_path / "1" + rom_dir.mkdir() + (rom_dir / "oldkey.zip").write_bytes(b"old content") + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert get_cached_zip(1, "oldkey") is not None + assert get_cached_zip(1, "newkey") is None + + +class TestBuildCachedZip: + @pytest.fixture + def source_files(self, tmp_path): + lib = tmp_path / "library" + rom_dir = lib / "roms" / "nes" + rom_dir.mkdir(parents=True) + (rom_dir / "disc1.chd").write_bytes(b"disc1data") + (rom_dir / "disc2.chd").write_bytes(b"disc2data") + return lib + + def test_builds_valid_zip(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + entries = [ + ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0), + ZipFileEntry("disc2.chd", "roms/nes/disc2.chd", 9, 1000.0), + ] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + result = build_cached_zip( + rom_id=42, + entries=entries, + m3u_content=None, + m3u_filename=None, + cache_key="testkey", + ) + + assert result.exists() + with ZipFile(result) as zf: + names = zf.namelist() + assert "disc1.chd" in names + assert "disc2.chd" in names + assert zf.read("disc1.chd") == b"disc1data" + + def test_includes_m3u(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + entries = [ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0)] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + result = build_cached_zip( + rom_id=42, + entries=entries, + m3u_content=b"disc1.chd\ndisc2.chd", + m3u_filename="game.m3u", + cache_key="testkey2", + ) + + with ZipFile(result) as zf: + assert "game.m3u" in zf.namelist() + assert zf.read("game.m3u") == b"disc1.chd\ndisc2.chd" + + def test_skips_build_if_exists(self, tmp_path, mocker): + cache_dir = tmp_path / "cache" / "42" + cache_dir.mkdir(parents=True) + existing = cache_dir / "existing.zip" + existing.write_bytes(b"already here") + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path / "cache")) + result = build_cached_zip( + rom_id=42, + entries=[ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0)], + m3u_content=None, + m3u_filename=None, + cache_key="existing", + ) + assert result.read_bytes() == b"already here" + + def test_cleans_up_temp_on_error(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + entries = [ZipFileEntry("missing.bin", "roms/nes/missing.bin", 100, 1000.0)] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + + with pytest.raises(FileNotFoundError): + build_cached_zip( + rom_id=42, + entries=entries, + m3u_content=None, + m3u_filename=None, + cache_key="failkey", + ) + + rom_cache = cache_dir / "42" + if rom_cache.exists(): + assert not list(rom_cache.glob("*.tmp")) + + def test_new_key_builds_alongside_old(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + rom_cache = cache_dir / "42" + rom_cache.mkdir(parents=True) + old_zip = rom_cache / "oldkey.zip" + old_zip.write_bytes(b"old zip content") + + entries = [ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 2000.0)] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + build_cached_zip( + rom_id=42, + entries=entries, + m3u_content=None, + m3u_filename=None, + cache_key="newkey", + ) + + assert old_zip.exists(), "Old cache must not be deleted during build" + assert (rom_cache / "newkey.zip").exists() + + +class TestGetZipRedirectPath: + def test_returns_correct_path(self): + result = get_zip_redirect_path(42, "abc123") + assert str(result) == "/cache/zips/42/abc123.zip" + + +class TestHasSpaceForCache: + def test_returns_true_with_enough_space(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert has_space_for_cache([_entry(size=1024)]) is True + + def test_returns_false_with_insufficient_space(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert has_space_for_cache([_entry(size=2**62)]) is False + + def test_requires_2x_buffer(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + stat = os.statvfs(tmp_path) + available = stat.f_bavail * stat.f_frsize + size_just_over_half = int(available * 0.6) + assert has_space_for_cache([_entry(size=size_just_over_half)]) is False + + +class TestCleanupStaleZips: + def test_deletes_old_files(self, tmp_path, mocker): + rom_dir = tmp_path / "1" + rom_dir.mkdir() + old_zip = rom_dir / "old.zip" + old_zip.write_bytes(b"stale") + old_time = time.time() - (72 * 3600) + os.utime(old_zip, (old_time, old_time)) + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips(max_age_hours=48) == 1 + assert not old_zip.exists() + + def test_keeps_recent_files(self, tmp_path, mocker): + rom_dir = tmp_path / "1" + rom_dir.mkdir() + recent = rom_dir / "recent.zip" + recent.write_bytes(b"fresh") + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips(max_age_hours=48) == 0 + assert recent.exists() + + def test_removes_empty_directories(self, tmp_path, mocker): + rom_dir = tmp_path / "1" + rom_dir.mkdir() + old_zip = rom_dir / "old.zip" + old_zip.write_bytes(b"stale") + old_time = time.time() - (72 * 3600) + os.utime(old_zip, (old_time, old_time)) + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + cleanup_stale_zips(max_age_hours=48) + assert not rom_dir.exists() + + def test_handles_missing_cache_dir(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path / "nonexistent")) + assert cleanup_stale_zips(max_age_hours=48) == 0 + + def test_keeps_dir_with_remaining_files(self, tmp_path, mocker): + rom_dir = tmp_path / "1" + rom_dir.mkdir() + + old_zip = rom_dir / "old.zip" + old_zip.write_bytes(b"stale") + old_time = time.time() - (72 * 3600) + os.utime(old_zip, (old_time, old_time)) + + fresh_zip = rom_dir / "fresh.zip" + fresh_zip.write_bytes(b"current") + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips(max_age_hours=48) == 1 + assert not old_zip.exists() + assert fresh_zip.exists() + assert rom_dir.exists() diff --git a/backend/utils/zip_cache.py b/backend/utils/zip_cache.py index ead730ea8f..db773d0591 100644 --- a/backend/utils/zip_cache.py +++ b/backend/utils/zip_cache.py @@ -4,14 +4,18 @@ import hashlib import os import tempfile -from datetime import datetime +import time from pathlib import Path -from stat import S_IFREG -from zipfile import ZIP_STORED, ZipFile, ZipInfo +from zipfile import ZIP_STORED, ZipFile from config import LIBRARY_BASE_PATH, ZIP_CACHE_PATH +from logger.formatter import highlight as hl from logger.logger import log +CACHE_KEY_LENGTH = 16 +DISK_SPACE_MULTIPLIER = 2 +SECONDS_PER_HOUR = 3600 + @dataclasses.dataclass(frozen=True) class ZipFileEntry: @@ -36,8 +40,7 @@ def get_cache_key( ] for e in sorted(entries, key=lambda x: x.download_name): parts.append(f"{e.download_name}:{e.file_size_bytes}") - digest = hashlib.sha256("|".join(parts).encode()).hexdigest()[:16] - return digest + return hashlib.sha256("|".join(parts).encode()).hexdigest()[:CACHE_KEY_LENGTH] def _cache_dir(rom_id: int) -> Path: @@ -55,17 +58,14 @@ def get_cached_zip(rom_id: int, cache_key: str) -> Path | None: def has_space_for_cache(entries: list[ZipFileEntry]) -> bool: - """Check if there is enough disk space to build the cached ZIP. - - Requires 2x the estimated ZIP size as a safety buffer to avoid filling - the disk during the build. - """ + """Check if available disk space exceeds the estimated ZIP size + multiplied by DISK_SPACE_MULTIPLIER.""" estimated_size = sum(e.file_size_bytes for e in entries) cache_root = Path(ZIP_CACHE_PATH) cache_root.mkdir(parents=True, exist_ok=True) stat = os.statvfs(cache_root) available = stat.f_bavail * stat.f_frsize - return available > estimated_size * 2 + return available > estimated_size * DISK_SPACE_MULTIPLIER def build_cached_zip( @@ -78,32 +78,25 @@ def build_cached_zip( """Build a ZIP_STORED archive on disk and return its path. Writes to a temp file in the same directory, then atomically renames to - the final path to prevent serving partial files. + the final path to prevent serving partial files. Uses ZipFile.write() + to stream file content without loading entire files into memory. """ target = _cache_file(rom_id, cache_key) if target.exists(): return target target.parent.mkdir(parents=True, exist_ok=True) - now = datetime.now().timetuple()[:6] fd, tmp_path = tempfile.mkstemp(dir=target.parent, suffix=".tmp") try: os.close(fd) - with ZipFile(tmp_path, "w") as zf: + with ZipFile(tmp_path, "w", compression=ZIP_STORED) as zf: for entry in entries: src = Path(LIBRARY_BASE_PATH) / entry.full_path - info = ZipInfo(filename=entry.download_name, date_time=now) - info.external_attr = S_IFREG | 0o600 - info.compress_type = ZIP_STORED - with open(src, "rb") as f: - zf.writestr(info, f.read()) + zf.write(src, arcname=entry.download_name) if m3u_content is not None and m3u_filename is not None: - m3u_info = ZipInfo(filename=m3u_filename, date_time=now) - m3u_info.external_attr = S_IFREG | 0o600 - m3u_info.compress_type = ZIP_STORED - zf.writestr(m3u_info, m3u_content) + zf.writestr(m3u_filename, m3u_content) os.rename(tmp_path, target) except BaseException: @@ -111,7 +104,7 @@ def build_cached_zip( os.unlink(tmp_path) raise - log.info(f"Built cached ZIP for ROM {rom_id}: {target.name}") + log.info(f"Built cached ZIP for ROM {hl(str(rom_id))}: {hl(target.name)}") return target @@ -126,9 +119,7 @@ def cleanup_stale_zips(max_age_hours: int = 48) -> int: if not cache_root.exists(): return 0 - import time - - cutoff = time.time() - (max_age_hours * 3600) + cutoff = time.time() - (max_age_hours * SECONDS_PER_HOUR) deleted = 0 for rom_dir in cache_root.iterdir(): From 18864a54f95809421e09b28333e093bd4662adc8 Mon Sep 17 00:00:00 2001 From: nendo Date: Tue, 24 Mar 2026 06:34:26 +0900 Subject: [PATCH 3/7] Add bulk download caching, LRU eviction, tiered TTL - Bulk /download endpoint now caches ZIPs for Range requests (max 100 ROMs) - LRU eviction frees space by removing oldest cached ZIPs (>1h old) - Tiered TTL: 48h default, 12h for ZIPs over 8GB - Generalize cache paths from rom_id to namespace (supports bulk-{ids}) --- backend/endpoints/roms/__init__.py | 82 ++++++-- backend/tasks/scheduled/cleanup_zip_cache.py | 6 +- backend/tests/tasks/test_cleanup_zip_cache.py | 2 +- backend/tests/utils/test_zip_cache.py | 196 ++++++++++++------ backend/utils/zip_cache.py | 125 ++++++++--- 5 files changed, 298 insertions(+), 113 deletions(-) diff --git a/backend/endpoints/roms/__init__.py b/backend/endpoints/roms/__init__.py index a6cdfb640b..18c31b2316 100644 --- a/backend/endpoints/roms/__init__.py +++ b/backend/endpoints/roms/__init__.py @@ -71,12 +71,13 @@ from utils.router import APIRouter from utils.validation import ValidationError from utils.zip_cache import ( + BULK_CACHE_MAX_ROMS, ZipFileEntry, build_cached_zip, + ensure_space_for_cache, get_cache_key, get_cached_zip, get_zip_redirect_path, - has_space_for_cache, ) from .files import router as files_router @@ -680,26 +681,65 @@ async def download_roms( f"User {hl(current_username, color=BLUE)} is downloading {len(rom_objects)} ROMs as zip" ) - content_lines = [] + all_entries = [] for rom in rom_objects: rom_files = sorted(rom.files, key=lambda x: x.file_name) for file in rom_files: - content_lines.append( - ZipContentLine( - crc32=None, # The CRC hash stored for compressed files is for the uncompressed content - size_bytes=file.file_size_bytes, - encoded_location=quote(f"/library/{file.full_path}"), - filename=file.full_path, + all_entries.append( + ZipFileEntry( + download_name=file.full_path, + full_path=file.full_path, + file_size_bytes=file.file_size_bytes, + updated_at_epoch=file.updated_at.timestamp(), ) ) if filename: file_name = sanitize_filename(filename) else: - base64_content = b64encode( - ("\n".join([str(line) for line in content_lines])).encode() + content_summary = "\n".join( + f"{e.download_name}:{e.file_size_bytes}" for e in all_entries + ).encode() + file_name = f"{len(rom_objects)} ROMs ({crc32_to_hex(binascii.crc32(content_summary))}).zip" + + range_header = request.headers.get("range") + if range_header and len(rom_objects) <= BULK_CACHE_MAX_ROMS: + namespace = f"bulk-{'-'.join(str(r.id) for r in sorted(rom_objects, key=lambda r: r.id))}" + cache_key = get_cache_key(namespace, all_entries) + zip_path = get_cached_zip(namespace, cache_key) + if zip_path: + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=file_name, + ) + if ensure_space_for_cache(all_entries): + await anyio.to_thread.run_sync( + lambda: build_cached_zip( + namespace=namespace, + entries=all_entries, + m3u_content=None, + m3u_filename=None, + cache_key=cache_key, + ) + ) + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=file_name, + ) + log.warning( + f"Insufficient disk space to cache bulk ZIP ({len(rom_objects)} ROMs), " + "falling back to streaming" ) - file_name = f"{len(rom_objects)} ROMs ({crc32_to_hex(binascii.crc32(base64_content))}).zip" + + content_lines = [ + ZipContentLine( + crc32=None, + size_bytes=e.file_size_bytes, + encoded_location=quote(f"/library/{e.full_path}"), + filename=e.download_name, + ) + for e in all_entries + ] return ZipResponse( content_lines=content_lines, @@ -910,8 +950,9 @@ async def head_rom_content( ) for f in files ] - cache_key = get_cache_key(rom.id, entries, hidden_folder) - zip_path = get_cached_zip(rom.id, cache_key) + namespace = str(rom.id) + cache_key = get_cache_key(namespace, entries, hidden_folder) + zip_path = get_cached_zip(namespace, cache_key) if zip_path: return Response( headers={ @@ -1073,14 +1114,15 @@ async def build_zip_in_memory() -> bytes: ) for f in files ] - cache_key = get_cache_key(rom.id, entries, hidden_folder) - zip_path = get_cached_zip(rom.id, cache_key) + namespace = str(rom.id) + cache_key = get_cache_key(namespace, entries, hidden_folder) + zip_path = get_cached_zip(namespace, cache_key) if zip_path: return FileRedirectResponse( - download_path=get_zip_redirect_path(rom.id, cache_key), + download_path=get_zip_redirect_path(namespace, cache_key), filename=f"{file_name}.zip", ) - if has_space_for_cache(entries): + if ensure_space_for_cache(entries): m3u_content = ( None if rom.has_m3u_file() @@ -1089,7 +1131,7 @@ async def build_zip_in_memory() -> bytes: m3u_filename = None if rom.has_m3u_file() else f"{file_name}.m3u" await anyio.to_thread.run_sync( lambda: build_cached_zip( - rom_id=rom.id, + namespace=namespace, entries=entries, m3u_content=m3u_content, m3u_filename=m3u_filename, @@ -1097,11 +1139,11 @@ async def build_zip_in_memory() -> bytes: ) ) return FileRedirectResponse( - download_path=get_zip_redirect_path(rom.id, cache_key), + download_path=get_zip_redirect_path(namespace, cache_key), filename=f"{file_name}.zip", ) log.warning( - f"Insufficient disk space to cache ZIP for ROM {rom.id}, " + f"Insufficient disk space to cache ZIP for ROM {hl(str(rom.id))}, " "falling back to streaming" ) diff --git a/backend/tasks/scheduled/cleanup_zip_cache.py b/backend/tasks/scheduled/cleanup_zip_cache.py index a197b86bc7..f81436861e 100644 --- a/backend/tasks/scheduled/cleanup_zip_cache.py +++ b/backend/tasks/scheduled/cleanup_zip_cache.py @@ -7,11 +7,11 @@ class CleanupZipCacheTask(PeriodicTask): def __init__(self): super().__init__( title="Scheduled ZIP cache cleanup", - description="Removes cached ZIP files older than 48 hours", + description="Removes stale cached ZIP files based on tiered TTL", task_type=TaskType.CLEANUP, enabled=True, manual_run=False, - cron_string="0 4 * * *", # Daily at 4 AM + cron_string="0 4 * * *", func="tasks.scheduled.cleanup_zip_cache.cleanup_zip_cache_task.run", ) @@ -20,7 +20,7 @@ async def run(self) -> None: self.unschedule() return - deleted = cleanup_stale_zips(max_age_hours=48) + deleted = cleanup_stale_zips() if deleted: log.info(f"Cleaned up {deleted} stale cached ZIP files") diff --git a/backend/tests/tasks/test_cleanup_zip_cache.py b/backend/tests/tasks/test_cleanup_zip_cache.py index 6ea0edd5ad..8e1dd2e272 100644 --- a/backend/tests/tasks/test_cleanup_zip_cache.py +++ b/backend/tests/tasks/test_cleanup_zip_cache.py @@ -17,7 +17,7 @@ async def test_run_calls_cleanup(self, mocker): return_value=3, ) await task.run() - mock_cleanup.assert_called_once_with(max_age_hours=48) + mock_cleanup.assert_called_once_with() async def test_run_disabled_unschedules(self, mocker): task = CleanupZipCacheTask() diff --git a/backend/tests/utils/test_zip_cache.py b/backend/tests/utils/test_zip_cache.py index 015b6323e9..589677ad9c 100644 --- a/backend/tests/utils/test_zip_cache.py +++ b/backend/tests/utils/test_zip_cache.py @@ -5,13 +5,19 @@ import pytest from utils.zip_cache import ( + BULK_CACHE_MAX_ROMS, + DEFAULT_TTL_HOURS, + LARGE_ZIP_THRESHOLD_BYTES, + LARGE_ZIP_TTL_HOURS, + SECONDS_PER_HOUR, ZipFileEntry, build_cached_zip, cleanup_stale_zips, + ensure_space_for_cache, get_cache_key, get_cached_zip, + get_ttl_hours, get_zip_redirect_path, - has_space_for_cache, ) @@ -29,37 +35,39 @@ def _entry( class TestGetCacheKey: def test_deterministic(self): entries = [_entry("a.bin", 100), _entry("b.bin", 200)] - key1 = get_cache_key(1, entries, False) - key2 = get_cache_key(1, entries, False) + key1 = get_cache_key("1", entries, False) + key2 = get_cache_key("1", entries, False) assert key1 == key2 - def test_different_rom_ids(self): + def test_different_namespaces(self): entries = [_entry()] - assert get_cache_key(1, entries, False) != get_cache_key(2, entries, False) + assert get_cache_key("1", entries, False) != get_cache_key("2", entries, False) def test_different_hidden_folder(self): entries = [_entry()] - assert get_cache_key(1, entries, False) != get_cache_key(1, entries, True) + assert get_cache_key("1", entries, False) != get_cache_key("1", entries, True) def test_different_files(self): e1 = [_entry("a.bin")] e2 = [_entry("b.bin")] - assert get_cache_key(1, e1, False) != get_cache_key(1, e2, False) + assert get_cache_key("1", e1, False) != get_cache_key("1", e2, False) def test_different_updated_at(self): e1 = [_entry(epoch=1000.0)] e2 = [_entry(epoch=2000.0)] - assert get_cache_key(1, e1, False) != get_cache_key(1, e2, False) + assert get_cache_key("1", e1, False) != get_cache_key("1", e2, False) def test_order_independent(self): entries_a = [_entry("a.bin", 100), _entry("b.bin", 200)] entries_b = [_entry("b.bin", 200), _entry("a.bin", 100)] - assert get_cache_key(1, entries_a, False) == get_cache_key(1, entries_b, False) + assert get_cache_key("1", entries_a, False) == get_cache_key( + "1", entries_b, False + ) def test_returns_hex_string(self): - key = get_cache_key(1, [_entry()], False) + key = get_cache_key("1", [_entry()], False) assert len(key) == 16 - int(key, 16) # should not raise + int(key, 16) def test_file_change_invalidates_cache(self): entries_v1 = [ @@ -70,37 +78,43 @@ def test_file_change_invalidates_cache(self): _entry("disc1.chd", 500, epoch=1000.0), _entry("disc2.chd", 600, epoch=2000.0), ] - assert get_cache_key(1, entries_v1, False) != get_cache_key( - 1, entries_v2, False + assert get_cache_key("1", entries_v1, False) != get_cache_key( + "1", entries_v2, False ) def test_file_size_change_invalidates_cache(self): e1 = [_entry("game.bin", size=1000, epoch=1000.0)] e2 = [_entry("game.bin", size=2000, epoch=1000.0)] - assert get_cache_key(1, e1, False) != get_cache_key(1, e2, False) + assert get_cache_key("1", e1, False) != get_cache_key("1", e2, False) + + def test_bulk_namespace(self): + entries = [_entry()] + key_single = get_cache_key("42", entries) + key_bulk = get_cache_key("bulk-42-43", entries) + assert key_single != key_bulk class TestGetCachedZip: def test_returns_none_when_missing(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert get_cached_zip(1, "abc123") is None + assert get_cached_zip("1", "abc123") is None def test_returns_path_when_exists(self, tmp_path, mocker): - rom_dir = tmp_path / "1" - rom_dir.mkdir() - (rom_dir / "abc123.zip").write_bytes(b"fake") + ns_dir = tmp_path / "1" + ns_dir.mkdir() + (ns_dir / "abc123.zip").write_bytes(b"fake") mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - result = get_cached_zip(1, "abc123") + result = get_cached_zip("1", "abc123") assert result is not None assert result.name == "abc123.zip" def test_old_key_not_returned_for_new_key(self, tmp_path, mocker): - rom_dir = tmp_path / "1" - rom_dir.mkdir() - (rom_dir / "oldkey.zip").write_bytes(b"old content") + ns_dir = tmp_path / "1" + ns_dir.mkdir() + (ns_dir / "oldkey.zip").write_bytes(b"old content") mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert get_cached_zip(1, "oldkey") is not None - assert get_cached_zip(1, "newkey") is None + assert get_cached_zip("1", "oldkey") is not None + assert get_cached_zip("1", "newkey") is None class TestBuildCachedZip: @@ -122,7 +136,7 @@ def test_builds_valid_zip(self, tmp_path, source_files, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) result = build_cached_zip( - rom_id=42, + namespace="42", entries=entries, m3u_content=None, m3u_filename=None, @@ -142,7 +156,7 @@ def test_includes_m3u(self, tmp_path, source_files, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) result = build_cached_zip( - rom_id=42, + namespace="42", entries=entries, m3u_content=b"disc1.chd\ndisc2.chd", m3u_filename="game.m3u", @@ -161,7 +175,7 @@ def test_skips_build_if_exists(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path / "cache")) result = build_cached_zip( - rom_id=42, + namespace="42", entries=[ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0)], m3u_content=None, m3u_filename=None, @@ -177,7 +191,7 @@ def test_cleans_up_temp_on_error(self, tmp_path, source_files, mocker): with pytest.raises(FileNotFoundError): build_cached_zip( - rom_id=42, + namespace="42", entries=entries, m3u_content=None, m3u_filename=None, @@ -199,7 +213,7 @@ def test_new_key_builds_alongside_old(self, tmp_path, source_files, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) build_cached_zip( - rom_id=42, + namespace="42", entries=entries, m3u_content=None, m3u_filename=None, @@ -211,81 +225,145 @@ def test_new_key_builds_alongside_old(self, tmp_path, source_files, mocker): class TestGetZipRedirectPath: - def test_returns_correct_path(self): - result = get_zip_redirect_path(42, "abc123") - assert str(result) == "/cache/zips/42/abc123.zip" + def test_single_rom_path(self): + assert str(get_zip_redirect_path("42", "abc123")) == "/cache/zips/42/abc123.zip" + + def test_bulk_path(self): + assert ( + str(get_zip_redirect_path("bulk-1-2-3", "abc123")) + == "/cache/zips/bulk-1-2-3/abc123.zip" + ) -class TestHasSpaceForCache: +class TestEnsureSpaceForCache: def test_returns_true_with_enough_space(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert has_space_for_cache([_entry(size=1024)]) is True + assert ensure_space_for_cache([_entry(size=1024)]) is True def test_returns_false_with_insufficient_space(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert has_space_for_cache([_entry(size=2**62)]) is False + assert ensure_space_for_cache([_entry(size=2**62)]) is False def test_requires_2x_buffer(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) stat = os.statvfs(tmp_path) available = stat.f_bavail * stat.f_frsize size_just_over_half = int(available * 0.6) - assert has_space_for_cache([_entry(size=size_just_over_half)]) is False + assert ensure_space_for_cache([_entry(size=size_just_over_half)]) is False + + def test_evicts_old_entries_to_make_space(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + ns_dir = tmp_path / "old_rom" + ns_dir.mkdir() + old_zip = ns_dir / "stale.zip" + old_zip.write_bytes(b"x" * 1024) + old_time = time.time() - (2 * SECONDS_PER_HOUR) + os.utime(old_zip, (old_time, old_time)) + + assert ensure_space_for_cache([_entry(size=512)]) is True + + def test_does_not_evict_recent_entries(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + ns_dir = tmp_path / "active_rom" + ns_dir.mkdir() + fresh_zip = ns_dir / "fresh.zip" + fresh_zip.write_bytes(b"x" * 1024) + + # Fresh file should survive eviction attempt + ensure_space_for_cache([_entry(size=512)]) + assert fresh_zip.exists() + + +class TestGetTtlHours: + def test_small_zip_gets_default_ttl(self): + entries = [_entry(size=1024)] + assert get_ttl_hours(entries) == DEFAULT_TTL_HOURS + + def test_large_zip_gets_reduced_ttl(self): + entries = [_entry(size=LARGE_ZIP_THRESHOLD_BYTES + 1)] + assert get_ttl_hours(entries) == LARGE_ZIP_TTL_HOURS class TestCleanupStaleZips: def test_deletes_old_files(self, tmp_path, mocker): - rom_dir = tmp_path / "1" - rom_dir.mkdir() - old_zip = rom_dir / "old.zip" + ns_dir = tmp_path / "1" + ns_dir.mkdir() + old_zip = ns_dir / "old.zip" old_zip.write_bytes(b"stale") - old_time = time.time() - (72 * 3600) + old_time = time.time() - (DEFAULT_TTL_HOURS + 1) * SECONDS_PER_HOUR os.utime(old_zip, (old_time, old_time)) mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert cleanup_stale_zips(max_age_hours=48) == 1 + assert cleanup_stale_zips() == 1 assert not old_zip.exists() def test_keeps_recent_files(self, tmp_path, mocker): - rom_dir = tmp_path / "1" - rom_dir.mkdir() - recent = rom_dir / "recent.zip" + ns_dir = tmp_path / "1" + ns_dir.mkdir() + recent = ns_dir / "recent.zip" recent.write_bytes(b"fresh") mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert cleanup_stale_zips(max_age_hours=48) == 0 + assert cleanup_stale_zips() == 0 assert recent.exists() def test_removes_empty_directories(self, tmp_path, mocker): - rom_dir = tmp_path / "1" - rom_dir.mkdir() - old_zip = rom_dir / "old.zip" + ns_dir = tmp_path / "1" + ns_dir.mkdir() + old_zip = ns_dir / "old.zip" old_zip.write_bytes(b"stale") - old_time = time.time() - (72 * 3600) + old_time = time.time() - (DEFAULT_TTL_HOURS + 1) * SECONDS_PER_HOUR os.utime(old_zip, (old_time, old_time)) mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - cleanup_stale_zips(max_age_hours=48) - assert not rom_dir.exists() + cleanup_stale_zips() + assert not ns_dir.exists() def test_handles_missing_cache_dir(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path / "nonexistent")) - assert cleanup_stale_zips(max_age_hours=48) == 0 + assert cleanup_stale_zips() == 0 def test_keeps_dir_with_remaining_files(self, tmp_path, mocker): - rom_dir = tmp_path / "1" - rom_dir.mkdir() + ns_dir = tmp_path / "1" + ns_dir.mkdir() - old_zip = rom_dir / "old.zip" + old_zip = ns_dir / "old.zip" old_zip.write_bytes(b"stale") - old_time = time.time() - (72 * 3600) + old_time = time.time() - (DEFAULT_TTL_HOURS + 1) * SECONDS_PER_HOUR os.utime(old_zip, (old_time, old_time)) - fresh_zip = rom_dir / "fresh.zip" + fresh_zip = ns_dir / "fresh.zip" fresh_zip.write_bytes(b"current") mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert cleanup_stale_zips(max_age_hours=48) == 1 + assert cleanup_stale_zips() == 1 assert not old_zip.exists() assert fresh_zip.exists() - assert rom_dir.exists() + assert ns_dir.exists() + + def test_large_files_use_shorter_ttl(self, tmp_path, mocker): + """A large file between the two TTLs should be deleted.""" + ns_dir = tmp_path / "1" + ns_dir.mkdir() + large_zip = ns_dir / "large.zip" + # Create file larger than threshold + large_zip.write_bytes(b"x" * (LARGE_ZIP_THRESHOLD_BYTES + 1)) + # Set mtime between the two TTLs (e.g., 24 hours ago) + age = (LARGE_ZIP_TTL_HOURS + 1) * SECONDS_PER_HOUR + old_time = time.time() - age + os.utime(large_zip, (old_time, old_time)) + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips() == 1 + + +class TestConstants: + def test_bulk_cache_max_roms(self): + assert BULK_CACHE_MAX_ROMS == 100 + + def test_large_zip_threshold(self): + assert LARGE_ZIP_THRESHOLD_BYTES == 8 * 1024 * 1024 * 1024 + + def test_ttl_values(self): + assert DEFAULT_TTL_HOURS == 48 + assert LARGE_ZIP_TTL_HOURS == 12 diff --git a/backend/utils/zip_cache.py b/backend/utils/zip_cache.py index db773d0591..ad94909c7b 100644 --- a/backend/utils/zip_cache.py +++ b/backend/utils/zip_cache.py @@ -15,6 +15,11 @@ CACHE_KEY_LENGTH = 16 DISK_SPACE_MULTIPLIER = 2 SECONDS_PER_HOUR = 3600 +LARGE_ZIP_THRESHOLD_BYTES = 8 * 1024 * 1024 * 1024 # 8 GB +DEFAULT_TTL_HOURS = 48 +LARGE_ZIP_TTL_HOURS = 12 +BULK_CACHE_MAX_ROMS = 100 +MIN_EVICT_AGE_HOURS = 1 @dataclasses.dataclass(frozen=True) @@ -28,13 +33,13 @@ class ZipFileEntry: def get_cache_key( - rom_id: int, + namespace: str, entries: list[ZipFileEntry], - hidden_folder: bool, + hidden_folder: bool = False, ) -> str: - """Deterministic cache key derived from ROM state.""" + """Deterministic cache key derived from content state.""" parts = [ - str(rom_id), + namespace, str(hidden_folder), str(max(e.updated_at_epoch for e in entries)), ] @@ -43,33 +48,73 @@ def get_cache_key( return hashlib.sha256("|".join(parts).encode()).hexdigest()[:CACHE_KEY_LENGTH] -def _cache_dir(rom_id: int) -> Path: - return Path(ZIP_CACHE_PATH) / str(rom_id) +def _cache_dir(namespace: str) -> Path: + return Path(ZIP_CACHE_PATH) / namespace -def _cache_file(rom_id: int, cache_key: str) -> Path: - return _cache_dir(rom_id) / f"{cache_key}.zip" +def _cache_file(namespace: str, cache_key: str) -> Path: + return _cache_dir(namespace) / f"{cache_key}.zip" -def get_cached_zip(rom_id: int, cache_key: str) -> Path | None: +def get_cached_zip(namespace: str, cache_key: str) -> Path | None: """Return the cached ZIP path if it exists on disk, else None.""" - path = _cache_file(rom_id, cache_key) + path = _cache_file(namespace, cache_key) return path if path.exists() else None -def has_space_for_cache(entries: list[ZipFileEntry]) -> bool: - """Check if available disk space exceeds the estimated ZIP size - multiplied by DISK_SPACE_MULTIPLIER.""" - estimated_size = sum(e.file_size_bytes for e in entries) +def _get_available_space() -> int: cache_root = Path(ZIP_CACHE_PATH) cache_root.mkdir(parents=True, exist_ok=True) stat = os.statvfs(cache_root) - available = stat.f_bavail * stat.f_frsize - return available > estimated_size * DISK_SPACE_MULTIPLIER + return stat.f_bavail * stat.f_frsize + + +def _get_all_cached_zips() -> list[Path]: + """Return all cached ZIP files sorted by mtime ascending (oldest first).""" + cache_root = Path(ZIP_CACHE_PATH) + if not cache_root.exists(): + return [] + zips = [] + for ns_dir in cache_root.iterdir(): + if not ns_dir.is_dir(): + continue + for zip_file in ns_dir.glob("*.zip"): + zips.append(zip_file) + zips.sort(key=lambda p: p.stat().st_mtime) + return zips + + +def ensure_space_for_cache(entries: list[ZipFileEntry]) -> bool: + """Check available disk space, evicting oldest cached ZIPs if needed. + + Requires DISK_SPACE_MULTIPLIER times the estimated ZIP size. Evicts + cached entries older than MIN_EVICT_AGE_HOURS until enough space is + available or no more evictable entries remain. + """ + estimated_size = sum(e.file_size_bytes for e in entries) + required = estimated_size * DISK_SPACE_MULTIPLIER + + if _get_available_space() > required: + return True + + evict_cutoff = time.time() - (MIN_EVICT_AGE_HOURS * SECONDS_PER_HOUR) + for zip_file in _get_all_cached_zips(): + if zip_file.stat().st_mtime > evict_cutoff: + continue + freed = zip_file.stat().st_size + zip_file.unlink() + parent = zip_file.parent + if parent.exists() and not any(parent.iterdir()): + parent.rmdir() + log.debug(f"Evicted cached ZIP {hl(zip_file.name)} ({freed} bytes)") + if _get_available_space() > required: + return True + + return _get_available_space() > required def build_cached_zip( - rom_id: int, + namespace: str, entries: list[ZipFileEntry], m3u_content: bytes | None, m3u_filename: str | None, @@ -81,7 +126,7 @@ def build_cached_zip( the final path to prevent serving partial files. Uses ZipFile.write() to stream file content without loading entire files into memory. """ - target = _cache_file(rom_id, cache_key) + target = _cache_file(namespace, cache_key) if target.exists(): return target @@ -104,32 +149,52 @@ def build_cached_zip( os.unlink(tmp_path) raise - log.info(f"Built cached ZIP for ROM {hl(str(rom_id))}: {hl(target.name)}") + log.info(f"Built cached ZIP in {hl(namespace)}: {hl(target.name)}") return target -def get_zip_redirect_path(rom_id: int, cache_key: str) -> Path: +def get_zip_redirect_path(namespace: str, cache_key: str) -> Path: """Return the nginx-internal URL path for the cached ZIP.""" - return Path(f"/cache/zips/{rom_id}/{cache_key}.zip") + return Path(f"/cache/zips/{namespace}/{cache_key}.zip") -def cleanup_stale_zips(max_age_hours: int = 48) -> int: - """Remove cached ZIPs older than max_age_hours. Returns count deleted.""" +def get_ttl_hours(entries: list[ZipFileEntry]) -> int: + """Return the appropriate TTL based on estimated ZIP size.""" + estimated_size = sum(e.file_size_bytes for e in entries) + if estimated_size > LARGE_ZIP_THRESHOLD_BYTES: + return LARGE_ZIP_TTL_HOURS + return DEFAULT_TTL_HOURS + + +def cleanup_stale_zips() -> int: + """Remove cached ZIPs that have exceeded their TTL. + + Files larger than LARGE_ZIP_THRESHOLD_BYTES use LARGE_ZIP_TTL_HOURS, + all others use DEFAULT_TTL_HOURS. + """ cache_root = Path(ZIP_CACHE_PATH) if not cache_root.exists(): return 0 - cutoff = time.time() - (max_age_hours * SECONDS_PER_HOUR) + now = time.time() + default_cutoff = now - (DEFAULT_TTL_HOURS * SECONDS_PER_HOUR) + large_cutoff = now - (LARGE_ZIP_TTL_HOURS * SECONDS_PER_HOUR) deleted = 0 - for rom_dir in cache_root.iterdir(): - if not rom_dir.is_dir(): + for ns_dir in cache_root.iterdir(): + if not ns_dir.is_dir(): continue - for zip_file in rom_dir.glob("*.zip"): - if zip_file.stat().st_mtime < cutoff: + for zip_file in ns_dir.glob("*.zip"): + stat = zip_file.stat() + cutoff = ( + large_cutoff + if stat.st_size > LARGE_ZIP_THRESHOLD_BYTES + else default_cutoff + ) + if stat.st_mtime < cutoff: zip_file.unlink() deleted += 1 - if rom_dir.exists() and not any(rom_dir.iterdir()): - rom_dir.rmdir() + if ns_dir.exists() and not any(ns_dir.iterdir()): + ns_dir.rmdir() return deleted From 4ffa94c9d91f5edd7acb2b6f3258ed0cc0e2c0cb Mon Sep 17 00:00:00 2001 From: nendo Date: Tue, 24 Mar 2026 06:39:34 +0900 Subject: [PATCH 4/7] Fix mypy method-assign in cleanup task test --- backend/tests/tasks/test_cleanup_zip_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/tests/tasks/test_cleanup_zip_cache.py b/backend/tests/tasks/test_cleanup_zip_cache.py index 8e1dd2e272..4351225653 100644 --- a/backend/tests/tasks/test_cleanup_zip_cache.py +++ b/backend/tests/tasks/test_cleanup_zip_cache.py @@ -22,7 +22,7 @@ async def test_run_calls_cleanup(self, mocker): async def test_run_disabled_unschedules(self, mocker): task = CleanupZipCacheTask() task.enabled = False - task.unschedule = AsyncMock() + mocker.patch.object(task, "unschedule", new_callable=AsyncMock) mock_cleanup = mocker.patch( "tasks.scheduled.cleanup_zip_cache.cleanup_stale_zips", ) From 3f47af1110ab8497c52250c2c6b8e0d48c362aa2 Mon Sep 17 00:00:00 2001 From: nendo Date: Tue, 24 Mar 2026 07:00:13 +0900 Subject: [PATCH 5/7] Work around zipfile monkey-patch breaking writestr on CPython 3.13 A third-party library in the import chain replaces zipfile._get_compressor with an incompatible signature. Reload the zipfile module before building cached ZIPs to restore it. --- backend/utils/zip_cache.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/utils/zip_cache.py b/backend/utils/zip_cache.py index ad94909c7b..07ca227013 100644 --- a/backend/utils/zip_cache.py +++ b/backend/utils/zip_cache.py @@ -5,8 +5,9 @@ import os import tempfile import time +import zipfile from pathlib import Path -from zipfile import ZIP_STORED, ZipFile +from zipfile import ZipFile from config import LIBRARY_BASE_PATH, ZIP_CACHE_PATH from logger.formatter import highlight as hl @@ -135,10 +136,17 @@ def build_cached_zip( fd, tmp_path = tempfile.mkstemp(dir=target.parent, suffix=".tmp") try: os.close(fd) - with ZipFile(tmp_path, "w", compression=ZIP_STORED) as zf: + # A third-party library in the import chain replaces + # zipfile._get_compressor with an incompatible signature on + # CPython 3.13. Reload the module to restore the original. + import importlib + + importlib.reload(zipfile) + with ZipFile(tmp_path, "w") as zf: for entry in entries: src = Path(LIBRARY_BASE_PATH) / entry.full_path - zf.write(src, arcname=entry.download_name) + with open(src, "rb") as f: + zf.writestr(entry.download_name, f.read()) if m3u_content is not None and m3u_filename is not None: zf.writestr(m3u_filename, m3u_content) From 6ffb8ac891852d88ede8fa89a36fc61627712941 Mon Sep 17 00:00:00 2001 From: nendo Date: Tue, 24 Mar 2026 08:11:55 +0900 Subject: [PATCH 6/7] Address Copilot review feedback - Use zipfile.ZipFile after reload instead of stale direct import - Use ZipFile.write() for streaming (reload fixes CPython 3.13 bug) - Wrap cache build in try/except, fall back to mod_zip on failure - Guard empty entries list in get_cache_key - Hash bulk namespace to avoid ENAMETOOLONG with 100 ROM IDs - Use MagicMock for sync unschedule method in tests - Mock _get_available_space in eviction tests to force eviction path - Use sparse file (truncate) for large file TTL test - Add tests for get_bulk_namespace and empty entries --- backend/endpoints/roms/__init__.py | 79 +++++++++++-------- backend/tests/tasks/test_cleanup_zip_cache.py | 4 +- backend/tests/utils/test_zip_cache.py | 70 ++++++++++++---- backend/utils/zip_cache.py | 40 +++++++--- 4 files changed, 131 insertions(+), 62 deletions(-) diff --git a/backend/endpoints/roms/__init__.py b/backend/endpoints/roms/__init__.py index 18c31b2316..0a347509a2 100644 --- a/backend/endpoints/roms/__init__.py +++ b/backend/endpoints/roms/__init__.py @@ -75,6 +75,7 @@ ZipFileEntry, build_cached_zip, ensure_space_for_cache, + get_bulk_namespace, get_cache_key, get_cached_zip, get_zip_redirect_path, @@ -703,8 +704,8 @@ async def download_roms( file_name = f"{len(rom_objects)} ROMs ({crc32_to_hex(binascii.crc32(content_summary))}).zip" range_header = request.headers.get("range") - if range_header and len(rom_objects) <= BULK_CACHE_MAX_ROMS: - namespace = f"bulk-{'-'.join(str(r.id) for r in sorted(rom_objects, key=lambda r: r.id))}" + if range_header and len(rom_objects) <= BULK_CACHE_MAX_ROMS and all_entries: + namespace = get_bulk_namespace([r.id for r in rom_objects]) cache_key = get_cache_key(namespace, all_entries) zip_path = get_cached_zip(namespace, cache_key) if zip_path: @@ -713,23 +714,30 @@ async def download_roms( filename=file_name, ) if ensure_space_for_cache(all_entries): - await anyio.to_thread.run_sync( - lambda: build_cached_zip( - namespace=namespace, - entries=all_entries, - m3u_content=None, - m3u_filename=None, - cache_key=cache_key, + try: + await anyio.to_thread.run_sync( + lambda: build_cached_zip( + namespace=namespace, + entries=all_entries, + m3u_content=None, + m3u_filename=None, + cache_key=cache_key, + ) ) + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=file_name, + ) + except Exception as e: + log.warning( + f"Failed to build cached bulk ZIP ({len(rom_objects)} ROMs), " + f"falling back to streaming: {e}" + ) + else: + log.warning( + f"Insufficient disk space to cache bulk ZIP ({len(rom_objects)} ROMs), " + "falling back to streaming" ) - return FileRedirectResponse( - download_path=get_zip_redirect_path(namespace, cache_key), - filename=file_name, - ) - log.warning( - f"Insufficient disk space to cache bulk ZIP ({len(rom_objects)} ROMs), " - "falling back to streaming" - ) content_lines = [ ZipContentLine( @@ -1129,23 +1137,30 @@ async def build_zip_in_memory() -> bytes: else generate_m3u_content(files, hidden_folder) ) m3u_filename = None if rom.has_m3u_file() else f"{file_name}.m3u" - await anyio.to_thread.run_sync( - lambda: build_cached_zip( - namespace=namespace, - entries=entries, - m3u_content=m3u_content, - m3u_filename=m3u_filename, - cache_key=cache_key, + try: + await anyio.to_thread.run_sync( + lambda: build_cached_zip( + namespace=namespace, + entries=entries, + m3u_content=m3u_content, + m3u_filename=m3u_filename, + cache_key=cache_key, + ) ) + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=f"{file_name}.zip", + ) + except Exception as e: + log.warning( + f"Failed to build cached ZIP for ROM {hl(str(rom.id))}, " + f"falling back to streaming: {e}" + ) + else: + log.warning( + f"Insufficient disk space to cache ZIP for ROM {hl(str(rom.id))}, " + "falling back to streaming" ) - return FileRedirectResponse( - download_path=get_zip_redirect_path(namespace, cache_key), - filename=f"{file_name}.zip", - ) - log.warning( - f"Insufficient disk space to cache ZIP for ROM {hl(str(rom.id))}, " - "falling back to streaming" - ) content_lines = [ ZipContentLine( diff --git a/backend/tests/tasks/test_cleanup_zip_cache.py b/backend/tests/tasks/test_cleanup_zip_cache.py index 4351225653..75bb9019be 100644 --- a/backend/tests/tasks/test_cleanup_zip_cache.py +++ b/backend/tests/tasks/test_cleanup_zip_cache.py @@ -1,4 +1,4 @@ -from unittest.mock import AsyncMock +from unittest.mock import MagicMock from tasks.scheduled.cleanup_zip_cache import CleanupZipCacheTask @@ -22,7 +22,7 @@ async def test_run_calls_cleanup(self, mocker): async def test_run_disabled_unschedules(self, mocker): task = CleanupZipCacheTask() task.enabled = False - mocker.patch.object(task, "unschedule", new_callable=AsyncMock) + mocker.patch.object(task, "unschedule", MagicMock()) mock_cleanup = mocker.patch( "tasks.scheduled.cleanup_zip_cache.cleanup_stale_zips", ) diff --git a/backend/tests/utils/test_zip_cache.py b/backend/tests/utils/test_zip_cache.py index 589677ad9c..9d02ed3421 100644 --- a/backend/tests/utils/test_zip_cache.py +++ b/backend/tests/utils/test_zip_cache.py @@ -6,6 +6,8 @@ from utils.zip_cache import ( BULK_CACHE_MAX_ROMS, + BULK_NAMESPACE_PREFIX, + CACHE_KEY_LENGTH, DEFAULT_TTL_HOURS, LARGE_ZIP_THRESHOLD_BYTES, LARGE_ZIP_TTL_HOURS, @@ -14,6 +16,7 @@ build_cached_zip, cleanup_stale_zips, ensure_space_for_cache, + get_bulk_namespace, get_cache_key, get_cached_zip, get_ttl_hours, @@ -66,7 +69,7 @@ def test_order_independent(self): def test_returns_hex_string(self): key = get_cache_key("1", [_entry()], False) - assert len(key) == 16 + assert len(key) == CACHE_KEY_LENGTH int(key, 16) def test_file_change_invalidates_cache(self): @@ -90,9 +93,34 @@ def test_file_size_change_invalidates_cache(self): def test_bulk_namespace(self): entries = [_entry()] key_single = get_cache_key("42", entries) - key_bulk = get_cache_key("bulk-42-43", entries) + key_bulk = get_cache_key("bulk-abc123", entries) assert key_single != key_bulk + def test_empty_entries(self): + key = get_cache_key("1", [], False) + assert len(key) == CACHE_KEY_LENGTH + int(key, 16) + + +class TestGetBulkNamespace: + def test_deterministic(self): + assert get_bulk_namespace([1, 2, 3]) == get_bulk_namespace([1, 2, 3]) + + def test_order_independent(self): + assert get_bulk_namespace([3, 1, 2]) == get_bulk_namespace([1, 2, 3]) + + def test_starts_with_prefix(self): + ns = get_bulk_namespace([1, 2]) + assert ns.startswith(f"{BULK_NAMESPACE_PREFIX}-") + + def test_short_enough_for_filesystem(self): + ids = list(range(1, 101)) + ns = get_bulk_namespace(ids) + assert len(ns) < 255 + + def test_different_ids_produce_different_namespace(self): + assert get_bulk_namespace([1, 2]) != get_bulk_namespace([1, 3]) + class TestGetCachedZip: def test_returns_none_when_missing(self, tmp_path, mocker): @@ -229,10 +257,10 @@ def test_single_rom_path(self): assert str(get_zip_redirect_path("42", "abc123")) == "/cache/zips/42/abc123.zip" def test_bulk_path(self): - assert ( - str(get_zip_redirect_path("bulk-1-2-3", "abc123")) - == "/cache/zips/bulk-1-2-3/abc123.zip" - ) + ns = get_bulk_namespace([1, 2, 3]) + path = str(get_zip_redirect_path(ns, "abc123")) + assert path.startswith("/cache/zips/bulk-") + assert path.endswith("/abc123.zip") class TestEnsureSpaceForCache: @@ -242,17 +270,18 @@ def test_returns_true_with_enough_space(self, tmp_path, mocker): def test_returns_false_with_insufficient_space(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - assert ensure_space_for_cache([_entry(size=2**62)]) is False + mocker.patch("utils.zip_cache._get_available_space", return_value=100) + assert ensure_space_for_cache([_entry(size=1024)]) is False def test_requires_2x_buffer(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) - stat = os.statvfs(tmp_path) - available = stat.f_bavail * stat.f_frsize - size_just_over_half = int(available * 0.6) - assert ensure_space_for_cache([_entry(size=size_just_over_half)]) is False + # 500 bytes available, entry is 300 -> 2x = 600 > 500 -> False + mocker.patch("utils.zip_cache._get_available_space", return_value=500) + assert ensure_space_for_cache([_entry(size=300)]) is False def test_evicts_old_entries_to_make_space(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + ns_dir = tmp_path / "old_rom" ns_dir.mkdir() old_zip = ns_dir / "stale.zip" @@ -260,16 +289,26 @@ def test_evicts_old_entries_to_make_space(self, tmp_path, mocker): old_time = time.time() - (2 * SECONDS_PER_HOUR) os.utime(old_zip, (old_time, old_time)) + call_count = [0] + + def fake_space(): + call_count[0] += 1 + return 100 if call_count[0] <= 1 else 999999 + + mocker.patch("utils.zip_cache._get_available_space", side_effect=fake_space) + assert ensure_space_for_cache([_entry(size=512)]) is True + assert not old_zip.exists() def test_does_not_evict_recent_entries(self, tmp_path, mocker): mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + mocker.patch("utils.zip_cache._get_available_space", return_value=100) + ns_dir = tmp_path / "active_rom" ns_dir.mkdir() fresh_zip = ns_dir / "fresh.zip" fresh_zip.write_bytes(b"x" * 1024) - # Fresh file should survive eviction attempt ensure_space_for_cache([_entry(size=512)]) assert fresh_zip.exists() @@ -342,13 +381,12 @@ def test_keeps_dir_with_remaining_files(self, tmp_path, mocker): assert ns_dir.exists() def test_large_files_use_shorter_ttl(self, tmp_path, mocker): - """A large file between the two TTLs should be deleted.""" ns_dir = tmp_path / "1" ns_dir.mkdir() large_zip = ns_dir / "large.zip" - # Create file larger than threshold - large_zip.write_bytes(b"x" * (LARGE_ZIP_THRESHOLD_BYTES + 1)) - # Set mtime between the two TTLs (e.g., 24 hours ago) + # Use truncate for sparse file instead of allocating 8GB+ + with open(large_zip, "wb") as f: + f.truncate(LARGE_ZIP_THRESHOLD_BYTES + 1) age = (LARGE_ZIP_TTL_HOURS + 1) * SECONDS_PER_HOUR old_time = time.time() - age os.utime(large_zip, (old_time, old_time)) diff --git a/backend/utils/zip_cache.py b/backend/utils/zip_cache.py index 07ca227013..905df176e1 100644 --- a/backend/utils/zip_cache.py +++ b/backend/utils/zip_cache.py @@ -2,12 +2,12 @@ import dataclasses import hashlib +import importlib import os import tempfile import time import zipfile from pathlib import Path -from zipfile import ZipFile from config import LIBRARY_BASE_PATH, ZIP_CACHE_PATH from logger.formatter import highlight as hl @@ -20,6 +20,7 @@ DEFAULT_TTL_HOURS = 48 LARGE_ZIP_TTL_HOURS = 12 BULK_CACHE_MAX_ROMS = 100 +BULK_NAMESPACE_PREFIX = "bulk" MIN_EVICT_AGE_HOURS = 1 @@ -39,6 +40,10 @@ def get_cache_key( hidden_folder: bool = False, ) -> str: """Deterministic cache key derived from content state.""" + if not entries: + return hashlib.sha256(f"{namespace}:empty".encode()).hexdigest()[ + :CACHE_KEY_LENGTH + ] parts = [ namespace, str(hidden_folder), @@ -49,6 +54,13 @@ def get_cache_key( return hashlib.sha256("|".join(parts).encode()).hexdigest()[:CACHE_KEY_LENGTH] +def get_bulk_namespace(rom_ids: list[int]) -> str: + """Deterministic namespace for bulk downloads, hashed to avoid ENAMETOOLONG.""" + id_str = "-".join(str(i) for i in sorted(rom_ids)) + id_hash = hashlib.sha256(id_str.encode()).hexdigest()[:CACHE_KEY_LENGTH] + return f"{BULK_NAMESPACE_PREFIX}-{id_hash}" + + def _cache_dir(namespace: str) -> Path: return Path(ZIP_CACHE_PATH) / namespace @@ -114,6 +126,17 @@ def ensure_space_for_cache(entries: list[ZipFileEntry]) -> bool: return _get_available_space() > required +def _reload_zipfile() -> None: + """Reload the zipfile module to restore stdlib internals. + + A third-party library in the import chain overrides + zipfile._get_compressor with an incompatible signature on + CPython 3.13, breaking ZipFile.write() and writestr(). Reloading + restores the original implementation. + """ + importlib.reload(zipfile) + + def build_cached_zip( namespace: str, entries: list[ZipFileEntry], @@ -124,8 +147,7 @@ def build_cached_zip( """Build a ZIP_STORED archive on disk and return its path. Writes to a temp file in the same directory, then atomically renames to - the final path to prevent serving partial files. Uses ZipFile.write() - to stream file content without loading entire files into memory. + the final path to prevent serving partial files. """ target = _cache_file(namespace, cache_key) if target.exists(): @@ -136,17 +158,11 @@ def build_cached_zip( fd, tmp_path = tempfile.mkstemp(dir=target.parent, suffix=".tmp") try: os.close(fd) - # A third-party library in the import chain replaces - # zipfile._get_compressor with an incompatible signature on - # CPython 3.13. Reload the module to restore the original. - import importlib - - importlib.reload(zipfile) - with ZipFile(tmp_path, "w") as zf: + _reload_zipfile() + with zipfile.ZipFile(tmp_path, "w") as zf: for entry in entries: src = Path(LIBRARY_BASE_PATH) / entry.full_path - with open(src, "rb") as f: - zf.writestr(entry.download_name, f.read()) + zf.write(src, arcname=entry.download_name) if m3u_content is not None and m3u_filename is not None: zf.writestr(m3u_filename, m3u_content) From 462cdb1b80f970c9d453bfc28e4f7f2285ebcb6c Mon Sep 17 00:00:00 2001 From: Georges-Antoine Assi Date: Tue, 7 Apr 2026 22:59:21 -0400 Subject: [PATCH 7/7] run fmt --- backend/endpoints/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/endpoints/tasks.py b/backend/endpoints/tasks.py index dbe6947bf3..4f269c8e6c 100644 --- a/backend/endpoints/tasks.py +++ b/backend/endpoints/tasks.py @@ -36,8 +36,8 @@ ) from tasks.manual.cleanup_missing_roms import cleanup_missing_roms_task from tasks.manual.cleanup_orphaned_resources import cleanup_orphaned_resources_task -from tasks.scheduled.cleanup_zip_cache import cleanup_zip_cache_task from tasks.manual.sync_folder_scan import sync_folder_scan_task +from tasks.scheduled.cleanup_zip_cache import cleanup_zip_cache_task from tasks.scheduled.convert_images_to_webp import convert_images_to_webp_task from tasks.scheduled.scan_library import scan_library_task from tasks.scheduled.update_launchbox_metadata import update_launchbox_metadata_task