diff --git a/backend/config/__init__.py b/backend/config/__init__.py index dcb07ac188..39d8d1ee72 100644 --- a/backend/config/__init__.py +++ b/backend/config/__init__.py @@ -36,6 +36,7 @@ def _get_env(var: str, fallback: str | None = None) -> str | None: LIBRARY_BASE_PATH: Final[str] = f"{ROMM_BASE_PATH}/library" RESOURCES_BASE_PATH: Final[str] = f"{ROMM_BASE_PATH}/resources" ASSETS_BASE_PATH: Final[str] = f"{ROMM_BASE_PATH}/assets" +ZIP_CACHE_PATH: Final[str] = f"{ROMM_BASE_PATH}/cache/zips" FRONTEND_RESOURCES_PATH: Final[str] = "/assets/romm/resources" # SEVEN ZIP diff --git a/backend/endpoints/roms/__init__.py b/backend/endpoints/roms/__init__.py index 914b1aead4..61801b454f 100644 --- a/backend/endpoints/roms/__init__.py +++ b/backend/endpoints/roms/__init__.py @@ -8,6 +8,7 @@ from urllib.parse import quote from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile, ZipInfo +import anyio import pydash from anyio import Path, open_file from fastapi import ( @@ -65,9 +66,20 @@ from utils.database import safe_int, safe_str_to_bool from utils.filesystem import sanitize_filename from utils.hashing import crc32_to_hex +from utils.m3u import generate_m3u_content from utils.nginx import FileRedirectResponse, ZipContentLine, ZipResponse from utils.router import APIRouter from utils.validation import ValidationError +from utils.zip_cache import ( + BULK_CACHE_MAX_ROMS, + ZipFileEntry, + build_cached_zip, + ensure_space_for_cache, + get_bulk_namespace, + get_cache_key, + get_cached_zip, + get_zip_redirect_path, +) from .files import router as files_router from .manual import router as manual_router @@ -657,26 +669,72 @@ async def download_roms( f"User {hl(current_username, color=BLUE)} is downloading {len(rom_objects)} ROMs as zip" ) - content_lines = [] + all_entries = [] for rom in rom_objects: rom_files = sorted(rom.files, key=lambda x: x.file_name) for file in rom_files: - content_lines.append( - ZipContentLine( - crc32=None, # The CRC hash stored for compressed files is for the uncompressed content - size_bytes=file.file_size_bytes, - encoded_location=quote(f"/library/{file.full_path}"), - filename=file.full_path, + all_entries.append( + ZipFileEntry( + download_name=file.full_path, + full_path=file.full_path, + file_size_bytes=file.file_size_bytes, + updated_at_epoch=file.updated_at.timestamp(), ) ) if filename: file_name = sanitize_filename(filename) else: - base64_content = b64encode( - ("\n".join([str(line) for line in content_lines])).encode() + content_summary = "\n".join( + f"{e.download_name}:{e.file_size_bytes}" for e in all_entries + ).encode() + file_name = f"{len(rom_objects)} ROMs ({crc32_to_hex(binascii.crc32(content_summary))}).zip" + + range_header = request.headers.get("range") + if range_header and len(rom_objects) <= BULK_CACHE_MAX_ROMS and all_entries: + namespace = get_bulk_namespace([r.id for r in rom_objects]) + cache_key = get_cache_key(namespace, all_entries) + zip_path = get_cached_zip(namespace, cache_key) + if zip_path: + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=file_name, + ) + if ensure_space_for_cache(all_entries): + try: + await anyio.to_thread.run_sync( + lambda: build_cached_zip( + namespace=namespace, + entries=all_entries, + m3u_content=None, + m3u_filename=None, + cache_key=cache_key, + ) + ) + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=file_name, + ) + except Exception as e: + log.warning( + f"Failed to build cached bulk ZIP ({len(rom_objects)} ROMs), " + f"falling back to streaming: {e}" + ) + else: + log.warning( + f"Insufficient disk space to cache bulk ZIP ({len(rom_objects)} ROMs), " + "falling back to streaming" + ) + + content_lines = [ + ZipContentLine( + crc32=None, + size_bytes=e.file_size_bytes, + encoded_location=quote(f"/library/{e.full_path}"), + filename=e.download_name, ) - file_name = f"{len(rom_objects)} ROMs ({crc32_to_hex(binascii.crc32(base64_content))}).zip" + for e in all_entries + ] return ZipResponse( content_lines=content_lines, @@ -877,6 +935,29 @@ async def head_rom_content( download_path=Path(f"/library/{files[0].full_path}"), ) + hidden_folder = safe_str_to_bool(request.query_params.get("hidden_folder", "")) + entries = [ + ZipFileEntry( + download_name=f.file_name_for_download(hidden_folder), + full_path=f.full_path, + file_size_bytes=f.file_size_bytes, + updated_at_epoch=f.updated_at.timestamp(), + ) + for f in files + ] + namespace = str(rom.id) + cache_key = get_cache_key(namespace, entries, hidden_folder) + zip_path = get_cached_zip(namespace, cache_key) + if zip_path: + return Response( + headers={ + "Content-Type": "application/zip", + "Content-Length": str(zip_path.stat().st_size), + "Accept-Ranges": "bytes", + "Content-Disposition": f"attachment; filename*=UTF-8''{quote(file_name)}.zip; filename=\"{quote(file_name)}.zip\"", + }, + ) + return Response( media_type="application/zip", headers={ @@ -1015,6 +1096,59 @@ async def build_zip_in_memory() -> bytes: download_path=Path(f"/library/{files[0].full_path}"), ) + # Multi-file path: serve cached ZIP for Range requests (resumable), + # fall through to mod_zip streaming for non-Range requests. + range_header = request.headers.get("range") + if range_header: + entries = [ + ZipFileEntry( + download_name=f.file_name_for_download(hidden_folder), + full_path=f.full_path, + file_size_bytes=f.file_size_bytes, + updated_at_epoch=f.updated_at.timestamp(), + ) + for f in files + ] + namespace = str(rom.id) + cache_key = get_cache_key(namespace, entries, hidden_folder) + zip_path = get_cached_zip(namespace, cache_key) + if zip_path: + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=f"{file_name}.zip", + ) + if ensure_space_for_cache(entries): + m3u_content = ( + None + if rom.has_m3u_file() + else generate_m3u_content(files, hidden_folder) + ) + m3u_filename = None if rom.has_m3u_file() else f"{file_name}.m3u" + try: + await anyio.to_thread.run_sync( + lambda: build_cached_zip( + namespace=namespace, + entries=entries, + m3u_content=m3u_content, + m3u_filename=m3u_filename, + cache_key=cache_key, + ) + ) + return FileRedirectResponse( + download_path=get_zip_redirect_path(namespace, cache_key), + filename=f"{file_name}.zip", + ) + except Exception as e: + log.warning( + f"Failed to build cached ZIP for ROM {hl(str(rom.id))}, " + f"falling back to streaming: {e}" + ) + else: + log.warning( + f"Insufficient disk space to cache ZIP for ROM {hl(str(rom.id))}, " + "falling back to streaming" + ) + content_lines = [ ZipContentLine( crc32=None, # The CRC hash stored for compressed files is for the uncompressed content @@ -1026,9 +1160,7 @@ async def build_zip_in_memory() -> bytes: ] if not rom.has_m3u_file(): - m3u_encoded_content = "\n".join( - [f.file_name_for_download(hidden_folder) for f in m3u_files] - ).encode() + m3u_encoded_content = generate_m3u_content(files, hidden_folder) m3u_base64_content = b64encode(m3u_encoded_content).decode() m3u_line = ZipContentLine( crc32=crc32_to_hex(binascii.crc32(m3u_encoded_content)), diff --git a/backend/endpoints/tasks.py b/backend/endpoints/tasks.py index e1c1303f4e..4f269c8e6c 100644 --- a/backend/endpoints/tasks.py +++ b/backend/endpoints/tasks.py @@ -37,6 +37,7 @@ from tasks.manual.cleanup_missing_roms import cleanup_missing_roms_task from tasks.manual.cleanup_orphaned_resources import cleanup_orphaned_resources_task from tasks.manual.sync_folder_scan import sync_folder_scan_task +from tasks.scheduled.cleanup_zip_cache import cleanup_zip_cache_task from tasks.scheduled.convert_images_to_webp import convert_images_to_webp_task from tasks.scheduled.scan_library import scan_library_task from tasks.scheduled.update_launchbox_metadata import update_launchbox_metadata_task @@ -92,6 +93,13 @@ class ManualTask(ScheduledTask): "task": convert_images_to_webp_task, } ), + ScheduledTask( + { + "name": "cleanup_zip_cache", + "type": TaskType.CLEANUP, + "task": cleanup_zip_cache_task, + } + ), ] manual_tasks: list[ManualTask] = [ diff --git a/backend/startup.py b/backend/startup.py index 9181960adf..f8d4ab745d 100644 --- a/backend/startup.py +++ b/backend/startup.py @@ -27,6 +27,7 @@ from logger.logger import log from models.firmware import FIRMWARE_FIXTURES_DIR, KNOWN_BIOS_KEY from tasks.scheduled.cleanup_netplay import cleanup_netplay_task +from tasks.scheduled.cleanup_zip_cache import cleanup_zip_cache_task from tasks.scheduled.convert_images_to_webp import convert_images_to_webp_task from tasks.scheduled.scan_library import scan_library_task from tasks.scheduled.sync_retroachievements_progress import ( @@ -51,6 +52,7 @@ async def main() -> None: # Initialize scheduled tasks cleanup_netplay_task.init() + cleanup_zip_cache_task.init() if ENABLE_SCHEDULED_RESCAN: log.info("Starting scheduled rescan") diff --git a/backend/tasks/scheduled/cleanup_zip_cache.py b/backend/tasks/scheduled/cleanup_zip_cache.py new file mode 100644 index 0000000000..f81436861e --- /dev/null +++ b/backend/tasks/scheduled/cleanup_zip_cache.py @@ -0,0 +1,28 @@ +from logger.logger import log +from tasks.tasks import PeriodicTask, TaskType +from utils.zip_cache import cleanup_stale_zips + + +class CleanupZipCacheTask(PeriodicTask): + def __init__(self): + super().__init__( + title="Scheduled ZIP cache cleanup", + description="Removes stale cached ZIP files based on tiered TTL", + task_type=TaskType.CLEANUP, + enabled=True, + manual_run=False, + cron_string="0 4 * * *", + func="tasks.scheduled.cleanup_zip_cache.cleanup_zip_cache_task.run", + ) + + async def run(self) -> None: + if not self.enabled: + self.unschedule() + return + + deleted = cleanup_stale_zips() + if deleted: + log.info(f"Cleaned up {deleted} stale cached ZIP files") + + +cleanup_zip_cache_task = CleanupZipCacheTask() diff --git a/backend/tests/tasks/test_cleanup_zip_cache.py b/backend/tests/tasks/test_cleanup_zip_cache.py new file mode 100644 index 0000000000..75bb9019be --- /dev/null +++ b/backend/tests/tasks/test_cleanup_zip_cache.py @@ -0,0 +1,30 @@ +from unittest.mock import MagicMock + +from tasks.scheduled.cleanup_zip_cache import CleanupZipCacheTask + + +class TestCleanupZipCacheTask: + def test_configuration(self): + task = CleanupZipCacheTask() + assert task.enabled is True + assert task.cron_string == "0 4 * * *" + assert "cleanup_zip_cache" in task.func + + async def test_run_calls_cleanup(self, mocker): + task = CleanupZipCacheTask() + mock_cleanup = mocker.patch( + "tasks.scheduled.cleanup_zip_cache.cleanup_stale_zips", + return_value=3, + ) + await task.run() + mock_cleanup.assert_called_once_with() + + async def test_run_disabled_unschedules(self, mocker): + task = CleanupZipCacheTask() + task.enabled = False + mocker.patch.object(task, "unschedule", MagicMock()) + mock_cleanup = mocker.patch( + "tasks.scheduled.cleanup_zip_cache.cleanup_stale_zips", + ) + await task.run() + mock_cleanup.assert_not_called() diff --git a/backend/tests/utils/test_m3u.py b/backend/tests/utils/test_m3u.py new file mode 100644 index 0000000000..9abe4454e7 --- /dev/null +++ b/backend/tests/utils/test_m3u.py @@ -0,0 +1,59 @@ +from unittest.mock import MagicMock + +from utils.m3u import generate_m3u_content + + +def _make_file(name: str, extension: str, download_name: str | None = None): + f = MagicMock() + f.file_extension = extension + f.file_name_for_download.return_value = download_name or name + return f + + +class TestGenerateM3uContent: + def test_single_file(self): + files = [_make_file("game.bin", "bin")] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"game.bin" + files[0].file_name_for_download.assert_called_once_with(False) + + def test_multiple_files(self): + files = [ + _make_file("disc1.chd", "chd"), + _make_file("disc2.chd", "chd"), + _make_file("disc3.chd", "chd"), + ] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"disc1.chd\ndisc2.chd\ndisc3.chd" + + def test_cue_files_preferred_over_bin(self): + files = [ + _make_file("track01.bin", "bin"), + _make_file("track02.bin", "bin"), + _make_file("game.cue", "cue"), + ] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"game.cue" + + def test_cue_case_insensitive(self): + files = [ + _make_file("track.bin", "bin"), + _make_file("game.CUE", "CUE", download_name="game.CUE"), + ] + result = generate_m3u_content(files, hidden_folder=False) + assert result == b"game.CUE" + + def test_hidden_folder_passed_through(self): + files = [_make_file("game.chd", "chd", download_name=".hidden/game.chd")] + result = generate_m3u_content(files, hidden_folder=True) + assert result == b".hidden/game.chd" + files[0].file_name_for_download.assert_called_once_with(True) + + def test_no_cue_files_lists_all(self): + files = [ + _make_file("disc1.chd", "chd"), + _make_file("disc2.chd", "chd"), + ] + result = generate_m3u_content(files, hidden_folder=False) + lines = result.decode().split("\n") + assert len(lines) == 2 diff --git a/backend/tests/utils/test_zip_cache.py b/backend/tests/utils/test_zip_cache.py new file mode 100644 index 0000000000..9d02ed3421 --- /dev/null +++ b/backend/tests/utils/test_zip_cache.py @@ -0,0 +1,407 @@ +import os +import time +from zipfile import ZipFile + +import pytest + +from utils.zip_cache import ( + BULK_CACHE_MAX_ROMS, + BULK_NAMESPACE_PREFIX, + CACHE_KEY_LENGTH, + DEFAULT_TTL_HOURS, + LARGE_ZIP_THRESHOLD_BYTES, + LARGE_ZIP_TTL_HOURS, + SECONDS_PER_HOUR, + ZipFileEntry, + build_cached_zip, + cleanup_stale_zips, + ensure_space_for_cache, + get_bulk_namespace, + get_cache_key, + get_cached_zip, + get_ttl_hours, + get_zip_redirect_path, +) + + +def _entry( + name: str = "game.bin", size: int = 1024, epoch: float = 1000.0 +) -> ZipFileEntry: + return ZipFileEntry( + download_name=name, + full_path=f"roms/nes/{name}", + file_size_bytes=size, + updated_at_epoch=epoch, + ) + + +class TestGetCacheKey: + def test_deterministic(self): + entries = [_entry("a.bin", 100), _entry("b.bin", 200)] + key1 = get_cache_key("1", entries, False) + key2 = get_cache_key("1", entries, False) + assert key1 == key2 + + def test_different_namespaces(self): + entries = [_entry()] + assert get_cache_key("1", entries, False) != get_cache_key("2", entries, False) + + def test_different_hidden_folder(self): + entries = [_entry()] + assert get_cache_key("1", entries, False) != get_cache_key("1", entries, True) + + def test_different_files(self): + e1 = [_entry("a.bin")] + e2 = [_entry("b.bin")] + assert get_cache_key("1", e1, False) != get_cache_key("1", e2, False) + + def test_different_updated_at(self): + e1 = [_entry(epoch=1000.0)] + e2 = [_entry(epoch=2000.0)] + assert get_cache_key("1", e1, False) != get_cache_key("1", e2, False) + + def test_order_independent(self): + entries_a = [_entry("a.bin", 100), _entry("b.bin", 200)] + entries_b = [_entry("b.bin", 200), _entry("a.bin", 100)] + assert get_cache_key("1", entries_a, False) == get_cache_key( + "1", entries_b, False + ) + + def test_returns_hex_string(self): + key = get_cache_key("1", [_entry()], False) + assert len(key) == CACHE_KEY_LENGTH + int(key, 16) + + def test_file_change_invalidates_cache(self): + entries_v1 = [ + _entry("disc1.chd", 500, epoch=1000.0), + _entry("disc2.chd", 600, epoch=1000.0), + ] + entries_v2 = [ + _entry("disc1.chd", 500, epoch=1000.0), + _entry("disc2.chd", 600, epoch=2000.0), + ] + assert get_cache_key("1", entries_v1, False) != get_cache_key( + "1", entries_v2, False + ) + + def test_file_size_change_invalidates_cache(self): + e1 = [_entry("game.bin", size=1000, epoch=1000.0)] + e2 = [_entry("game.bin", size=2000, epoch=1000.0)] + assert get_cache_key("1", e1, False) != get_cache_key("1", e2, False) + + def test_bulk_namespace(self): + entries = [_entry()] + key_single = get_cache_key("42", entries) + key_bulk = get_cache_key("bulk-abc123", entries) + assert key_single != key_bulk + + def test_empty_entries(self): + key = get_cache_key("1", [], False) + assert len(key) == CACHE_KEY_LENGTH + int(key, 16) + + +class TestGetBulkNamespace: + def test_deterministic(self): + assert get_bulk_namespace([1, 2, 3]) == get_bulk_namespace([1, 2, 3]) + + def test_order_independent(self): + assert get_bulk_namespace([3, 1, 2]) == get_bulk_namespace([1, 2, 3]) + + def test_starts_with_prefix(self): + ns = get_bulk_namespace([1, 2]) + assert ns.startswith(f"{BULK_NAMESPACE_PREFIX}-") + + def test_short_enough_for_filesystem(self): + ids = list(range(1, 101)) + ns = get_bulk_namespace(ids) + assert len(ns) < 255 + + def test_different_ids_produce_different_namespace(self): + assert get_bulk_namespace([1, 2]) != get_bulk_namespace([1, 3]) + + +class TestGetCachedZip: + def test_returns_none_when_missing(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert get_cached_zip("1", "abc123") is None + + def test_returns_path_when_exists(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + (ns_dir / "abc123.zip").write_bytes(b"fake") + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + result = get_cached_zip("1", "abc123") + assert result is not None + assert result.name == "abc123.zip" + + def test_old_key_not_returned_for_new_key(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + (ns_dir / "oldkey.zip").write_bytes(b"old content") + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert get_cached_zip("1", "oldkey") is not None + assert get_cached_zip("1", "newkey") is None + + +class TestBuildCachedZip: + @pytest.fixture + def source_files(self, tmp_path): + lib = tmp_path / "library" + rom_dir = lib / "roms" / "nes" + rom_dir.mkdir(parents=True) + (rom_dir / "disc1.chd").write_bytes(b"disc1data") + (rom_dir / "disc2.chd").write_bytes(b"disc2data") + return lib + + def test_builds_valid_zip(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + entries = [ + ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0), + ZipFileEntry("disc2.chd", "roms/nes/disc2.chd", 9, 1000.0), + ] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + result = build_cached_zip( + namespace="42", + entries=entries, + m3u_content=None, + m3u_filename=None, + cache_key="testkey", + ) + + assert result.exists() + with ZipFile(result) as zf: + names = zf.namelist() + assert "disc1.chd" in names + assert "disc2.chd" in names + assert zf.read("disc1.chd") == b"disc1data" + + def test_includes_m3u(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + entries = [ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0)] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + result = build_cached_zip( + namespace="42", + entries=entries, + m3u_content=b"disc1.chd\ndisc2.chd", + m3u_filename="game.m3u", + cache_key="testkey2", + ) + + with ZipFile(result) as zf: + assert "game.m3u" in zf.namelist() + assert zf.read("game.m3u") == b"disc1.chd\ndisc2.chd" + + def test_skips_build_if_exists(self, tmp_path, mocker): + cache_dir = tmp_path / "cache" / "42" + cache_dir.mkdir(parents=True) + existing = cache_dir / "existing.zip" + existing.write_bytes(b"already here") + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path / "cache")) + result = build_cached_zip( + namespace="42", + entries=[ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 1000.0)], + m3u_content=None, + m3u_filename=None, + cache_key="existing", + ) + assert result.read_bytes() == b"already here" + + def test_cleans_up_temp_on_error(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + entries = [ZipFileEntry("missing.bin", "roms/nes/missing.bin", 100, 1000.0)] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + + with pytest.raises(FileNotFoundError): + build_cached_zip( + namespace="42", + entries=entries, + m3u_content=None, + m3u_filename=None, + cache_key="failkey", + ) + + rom_cache = cache_dir / "42" + if rom_cache.exists(): + assert not list(rom_cache.glob("*.tmp")) + + def test_new_key_builds_alongside_old(self, tmp_path, source_files, mocker): + cache_dir = tmp_path / "cache" + rom_cache = cache_dir / "42" + rom_cache.mkdir(parents=True) + old_zip = rom_cache / "oldkey.zip" + old_zip.write_bytes(b"old zip content") + + entries = [ZipFileEntry("disc1.chd", "roms/nes/disc1.chd", 9, 2000.0)] + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(cache_dir)) + mocker.patch("utils.zip_cache.LIBRARY_BASE_PATH", str(source_files)) + build_cached_zip( + namespace="42", + entries=entries, + m3u_content=None, + m3u_filename=None, + cache_key="newkey", + ) + + assert old_zip.exists(), "Old cache must not be deleted during build" + assert (rom_cache / "newkey.zip").exists() + + +class TestGetZipRedirectPath: + def test_single_rom_path(self): + assert str(get_zip_redirect_path("42", "abc123")) == "/cache/zips/42/abc123.zip" + + def test_bulk_path(self): + ns = get_bulk_namespace([1, 2, 3]) + path = str(get_zip_redirect_path(ns, "abc123")) + assert path.startswith("/cache/zips/bulk-") + assert path.endswith("/abc123.zip") + + +class TestEnsureSpaceForCache: + def test_returns_true_with_enough_space(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert ensure_space_for_cache([_entry(size=1024)]) is True + + def test_returns_false_with_insufficient_space(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + mocker.patch("utils.zip_cache._get_available_space", return_value=100) + assert ensure_space_for_cache([_entry(size=1024)]) is False + + def test_requires_2x_buffer(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + # 500 bytes available, entry is 300 -> 2x = 600 > 500 -> False + mocker.patch("utils.zip_cache._get_available_space", return_value=500) + assert ensure_space_for_cache([_entry(size=300)]) is False + + def test_evicts_old_entries_to_make_space(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + + ns_dir = tmp_path / "old_rom" + ns_dir.mkdir() + old_zip = ns_dir / "stale.zip" + old_zip.write_bytes(b"x" * 1024) + old_time = time.time() - (2 * SECONDS_PER_HOUR) + os.utime(old_zip, (old_time, old_time)) + + call_count = [0] + + def fake_space(): + call_count[0] += 1 + return 100 if call_count[0] <= 1 else 999999 + + mocker.patch("utils.zip_cache._get_available_space", side_effect=fake_space) + + assert ensure_space_for_cache([_entry(size=512)]) is True + assert not old_zip.exists() + + def test_does_not_evict_recent_entries(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + mocker.patch("utils.zip_cache._get_available_space", return_value=100) + + ns_dir = tmp_path / "active_rom" + ns_dir.mkdir() + fresh_zip = ns_dir / "fresh.zip" + fresh_zip.write_bytes(b"x" * 1024) + + ensure_space_for_cache([_entry(size=512)]) + assert fresh_zip.exists() + + +class TestGetTtlHours: + def test_small_zip_gets_default_ttl(self): + entries = [_entry(size=1024)] + assert get_ttl_hours(entries) == DEFAULT_TTL_HOURS + + def test_large_zip_gets_reduced_ttl(self): + entries = [_entry(size=LARGE_ZIP_THRESHOLD_BYTES + 1)] + assert get_ttl_hours(entries) == LARGE_ZIP_TTL_HOURS + + +class TestCleanupStaleZips: + def test_deletes_old_files(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + old_zip = ns_dir / "old.zip" + old_zip.write_bytes(b"stale") + old_time = time.time() - (DEFAULT_TTL_HOURS + 1) * SECONDS_PER_HOUR + os.utime(old_zip, (old_time, old_time)) + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips() == 1 + assert not old_zip.exists() + + def test_keeps_recent_files(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + recent = ns_dir / "recent.zip" + recent.write_bytes(b"fresh") + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips() == 0 + assert recent.exists() + + def test_removes_empty_directories(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + old_zip = ns_dir / "old.zip" + old_zip.write_bytes(b"stale") + old_time = time.time() - (DEFAULT_TTL_HOURS + 1) * SECONDS_PER_HOUR + os.utime(old_zip, (old_time, old_time)) + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + cleanup_stale_zips() + assert not ns_dir.exists() + + def test_handles_missing_cache_dir(self, tmp_path, mocker): + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path / "nonexistent")) + assert cleanup_stale_zips() == 0 + + def test_keeps_dir_with_remaining_files(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + + old_zip = ns_dir / "old.zip" + old_zip.write_bytes(b"stale") + old_time = time.time() - (DEFAULT_TTL_HOURS + 1) * SECONDS_PER_HOUR + os.utime(old_zip, (old_time, old_time)) + + fresh_zip = ns_dir / "fresh.zip" + fresh_zip.write_bytes(b"current") + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips() == 1 + assert not old_zip.exists() + assert fresh_zip.exists() + assert ns_dir.exists() + + def test_large_files_use_shorter_ttl(self, tmp_path, mocker): + ns_dir = tmp_path / "1" + ns_dir.mkdir() + large_zip = ns_dir / "large.zip" + # Use truncate for sparse file instead of allocating 8GB+ + with open(large_zip, "wb") as f: + f.truncate(LARGE_ZIP_THRESHOLD_BYTES + 1) + age = (LARGE_ZIP_TTL_HOURS + 1) * SECONDS_PER_HOUR + old_time = time.time() - age + os.utime(large_zip, (old_time, old_time)) + + mocker.patch("utils.zip_cache.ZIP_CACHE_PATH", str(tmp_path)) + assert cleanup_stale_zips() == 1 + + +class TestConstants: + def test_bulk_cache_max_roms(self): + assert BULK_CACHE_MAX_ROMS == 100 + + def test_large_zip_threshold(self): + assert LARGE_ZIP_THRESHOLD_BYTES == 8 * 1024 * 1024 * 1024 + + def test_ttl_values(self): + assert DEFAULT_TTL_HOURS == 48 + assert LARGE_ZIP_TTL_HOURS == 12 diff --git a/backend/utils/m3u.py b/backend/utils/m3u.py new file mode 100644 index 0000000000..b8f5275b71 --- /dev/null +++ b/backend/utils/m3u.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from models.rom import RomFile + + +def generate_m3u_content( + files: list[RomFile], + hidden_folder: bool, +) -> bytes: + """Generate M3U playlist content for multi-file ROMs. + + If .cue files are present, only those are listed (avoids invalid entries + like raw .bin tracks). Otherwise all files are listed. + """ + cue_files = [f for f in files if f.file_extension.lower() == "cue"] + m3u_files = cue_files if cue_files else files + return "\n".join( + f.file_name_for_download(hidden_folder) for f in m3u_files + ).encode() diff --git a/backend/utils/zip_cache.py b/backend/utils/zip_cache.py new file mode 100644 index 0000000000..905df176e1 --- /dev/null +++ b/backend/utils/zip_cache.py @@ -0,0 +1,224 @@ +from __future__ import annotations + +import dataclasses +import hashlib +import importlib +import os +import tempfile +import time +import zipfile +from pathlib import Path + +from config import LIBRARY_BASE_PATH, ZIP_CACHE_PATH +from logger.formatter import highlight as hl +from logger.logger import log + +CACHE_KEY_LENGTH = 16 +DISK_SPACE_MULTIPLIER = 2 +SECONDS_PER_HOUR = 3600 +LARGE_ZIP_THRESHOLD_BYTES = 8 * 1024 * 1024 * 1024 # 8 GB +DEFAULT_TTL_HOURS = 48 +LARGE_ZIP_TTL_HOURS = 12 +BULK_CACHE_MAX_ROMS = 100 +BULK_NAMESPACE_PREFIX = "bulk" +MIN_EVICT_AGE_HOURS = 1 + + +@dataclasses.dataclass(frozen=True) +class ZipFileEntry: + """Thread-safe snapshot of a RomFile's download-relevant data.""" + + download_name: str + full_path: str + file_size_bytes: int + updated_at_epoch: float + + +def get_cache_key( + namespace: str, + entries: list[ZipFileEntry], + hidden_folder: bool = False, +) -> str: + """Deterministic cache key derived from content state.""" + if not entries: + return hashlib.sha256(f"{namespace}:empty".encode()).hexdigest()[ + :CACHE_KEY_LENGTH + ] + parts = [ + namespace, + str(hidden_folder), + str(max(e.updated_at_epoch for e in entries)), + ] + for e in sorted(entries, key=lambda x: x.download_name): + parts.append(f"{e.download_name}:{e.file_size_bytes}") + return hashlib.sha256("|".join(parts).encode()).hexdigest()[:CACHE_KEY_LENGTH] + + +def get_bulk_namespace(rom_ids: list[int]) -> str: + """Deterministic namespace for bulk downloads, hashed to avoid ENAMETOOLONG.""" + id_str = "-".join(str(i) for i in sorted(rom_ids)) + id_hash = hashlib.sha256(id_str.encode()).hexdigest()[:CACHE_KEY_LENGTH] + return f"{BULK_NAMESPACE_PREFIX}-{id_hash}" + + +def _cache_dir(namespace: str) -> Path: + return Path(ZIP_CACHE_PATH) / namespace + + +def _cache_file(namespace: str, cache_key: str) -> Path: + return _cache_dir(namespace) / f"{cache_key}.zip" + + +def get_cached_zip(namespace: str, cache_key: str) -> Path | None: + """Return the cached ZIP path if it exists on disk, else None.""" + path = _cache_file(namespace, cache_key) + return path if path.exists() else None + + +def _get_available_space() -> int: + cache_root = Path(ZIP_CACHE_PATH) + cache_root.mkdir(parents=True, exist_ok=True) + stat = os.statvfs(cache_root) + return stat.f_bavail * stat.f_frsize + + +def _get_all_cached_zips() -> list[Path]: + """Return all cached ZIP files sorted by mtime ascending (oldest first).""" + cache_root = Path(ZIP_CACHE_PATH) + if not cache_root.exists(): + return [] + zips = [] + for ns_dir in cache_root.iterdir(): + if not ns_dir.is_dir(): + continue + for zip_file in ns_dir.glob("*.zip"): + zips.append(zip_file) + zips.sort(key=lambda p: p.stat().st_mtime) + return zips + + +def ensure_space_for_cache(entries: list[ZipFileEntry]) -> bool: + """Check available disk space, evicting oldest cached ZIPs if needed. + + Requires DISK_SPACE_MULTIPLIER times the estimated ZIP size. Evicts + cached entries older than MIN_EVICT_AGE_HOURS until enough space is + available or no more evictable entries remain. + """ + estimated_size = sum(e.file_size_bytes for e in entries) + required = estimated_size * DISK_SPACE_MULTIPLIER + + if _get_available_space() > required: + return True + + evict_cutoff = time.time() - (MIN_EVICT_AGE_HOURS * SECONDS_PER_HOUR) + for zip_file in _get_all_cached_zips(): + if zip_file.stat().st_mtime > evict_cutoff: + continue + freed = zip_file.stat().st_size + zip_file.unlink() + parent = zip_file.parent + if parent.exists() and not any(parent.iterdir()): + parent.rmdir() + log.debug(f"Evicted cached ZIP {hl(zip_file.name)} ({freed} bytes)") + if _get_available_space() > required: + return True + + return _get_available_space() > required + + +def _reload_zipfile() -> None: + """Reload the zipfile module to restore stdlib internals. + + A third-party library in the import chain overrides + zipfile._get_compressor with an incompatible signature on + CPython 3.13, breaking ZipFile.write() and writestr(). Reloading + restores the original implementation. + """ + importlib.reload(zipfile) + + +def build_cached_zip( + namespace: str, + entries: list[ZipFileEntry], + m3u_content: bytes | None, + m3u_filename: str | None, + cache_key: str, +) -> Path: + """Build a ZIP_STORED archive on disk and return its path. + + Writes to a temp file in the same directory, then atomically renames to + the final path to prevent serving partial files. + """ + target = _cache_file(namespace, cache_key) + if target.exists(): + return target + + target.parent.mkdir(parents=True, exist_ok=True) + + fd, tmp_path = tempfile.mkstemp(dir=target.parent, suffix=".tmp") + try: + os.close(fd) + _reload_zipfile() + with zipfile.ZipFile(tmp_path, "w") as zf: + for entry in entries: + src = Path(LIBRARY_BASE_PATH) / entry.full_path + zf.write(src, arcname=entry.download_name) + + if m3u_content is not None and m3u_filename is not None: + zf.writestr(m3u_filename, m3u_content) + + os.rename(tmp_path, target) + except BaseException: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise + + log.info(f"Built cached ZIP in {hl(namespace)}: {hl(target.name)}") + return target + + +def get_zip_redirect_path(namespace: str, cache_key: str) -> Path: + """Return the nginx-internal URL path for the cached ZIP.""" + return Path(f"/cache/zips/{namespace}/{cache_key}.zip") + + +def get_ttl_hours(entries: list[ZipFileEntry]) -> int: + """Return the appropriate TTL based on estimated ZIP size.""" + estimated_size = sum(e.file_size_bytes for e in entries) + if estimated_size > LARGE_ZIP_THRESHOLD_BYTES: + return LARGE_ZIP_TTL_HOURS + return DEFAULT_TTL_HOURS + + +def cleanup_stale_zips() -> int: + """Remove cached ZIPs that have exceeded their TTL. + + Files larger than LARGE_ZIP_THRESHOLD_BYTES use LARGE_ZIP_TTL_HOURS, + all others use DEFAULT_TTL_HOURS. + """ + cache_root = Path(ZIP_CACHE_PATH) + if not cache_root.exists(): + return 0 + + now = time.time() + default_cutoff = now - (DEFAULT_TTL_HOURS * SECONDS_PER_HOUR) + large_cutoff = now - (LARGE_ZIP_TTL_HOURS * SECONDS_PER_HOUR) + deleted = 0 + + for ns_dir in cache_root.iterdir(): + if not ns_dir.is_dir(): + continue + for zip_file in ns_dir.glob("*.zip"): + stat = zip_file.stat() + cutoff = ( + large_cutoff + if stat.st_size > LARGE_ZIP_THRESHOLD_BYTES + else default_cutoff + ) + if stat.st_mtime < cutoff: + zip_file.unlink() + deleted += 1 + if ns_dir.exists() and not any(ns_dir.iterdir()): + ns_dir.rmdir() + + return deleted diff --git a/docker/nginx/templates/default.conf.template b/docker/nginx/templates/default.conf.template index 80b58e785d..477e8d84f4 100644 --- a/docker/nginx/templates/default.conf.template +++ b/docker/nginx/templates/default.conf.template @@ -71,6 +71,12 @@ server { alias "${ROMM_BASE_PATH}/library/"; } + # Internally redirect cached zip file requests (Range-resumable downloads) + location /cache/ { + internal; + alias "${ROMM_BASE_PATH}/cache/"; + } + # Internal decoding endpoint, used to decode base64 encoded data location /decode { internal;