diff --git a/chris_backend/core/models.py b/chris_backend/core/models.py index e73fad7d..f6816211 100644 --- a/chris_backend/core/models.py +++ b/chris_backend/core/models.py @@ -3,6 +3,7 @@ import uuid import io import os +import pathlib from django.db import models from django.db.models.functions import Length @@ -36,6 +37,83 @@ def validate_permission(permission): return permission +def user_can_access_obj(obj, user): + """ + Return whether the given user is allowed to read-access the given storage + object (a ``ChrisFolder``, ``ChrisFile`` or ``ChrisLinkFile``). + + This is the canonical owner/superuser/public/permission read-access rule. + The object is accessible if the user owns it, is the superuser 'chris', the + object is public, or the user has been granted any permission to it + (possibly through one of their groups). + """ + return (obj.owner == user or user.username == 'chris' or obj.public + or obj.has_user_permission(user)) + + +class PathAccessError(Exception): + """ + Raised by ``validate_path_access`` when a user is not allowed to access a + storage path. The exception message is a human-readable reason suitable for + surfacing to API clients. + """ + pass + + +def validate_path_access(user, path): + """ + Check whether the given user is allowed to access the given storage path. + + Enforces the structural path rules, the PUBLIC/SHARED link-target restriction + and the owner/public/permission access rule. This is the single source of + truth for path-access authorization. + + Returns the normalized path on success. + Raises ``PathAccessError`` with a human-readable reason on failure. + """ + path = path.strip().strip('/') + path_parts = pathlib.Path(path).parts + + if len(path_parts) < 2: + # trying to access a top-level folder or an unknown folder + raise PathAccessError( + f"This field may not reference a top-level folder path '{path}'.") + + if path_parts[0] not in ('home', 'SERVICES', 'PIPELINES'): + raise PathAccessError( + f"This field may not reference an invalid path '{path}'.") + + if len(path_parts) == 2 and path_parts[0] == 'home': + raise PathAccessError( + f"This field may not reference a home folder path '{path}'.") + + if len(path_parts) == 3 and path_parts[0] == 'home' and path_parts[2] == 'feeds': + raise PathAccessError( + f"This field may not reference a home's feeds folder path '{path}'.") + + try: + obj = ChrisFolder.objects.get(path=path) + except ChrisFolder.DoesNotExist: # path is not a folder + try: + obj = ChrisFile.objects.get(fname=path) + except ChrisFile.DoesNotExist: # path is not a file + try: + obj = ChrisLinkFile.objects.get(fname=path) + + if obj.path in ('PUBLIC', 'SHARED'): + raise PathAccessError( + f"This field may not reference an invalid path '{path}'.") + + except ChrisLinkFile.DoesNotExist: # path is not a link file + raise PathAccessError( + f"This field may not reference an invalid path '{path}'.") + + if not user_can_access_obj(obj, user): + raise PathAccessError( + f"User does not have permission to access path '{path}'.") + return path + + class AsyncDeletableModel(models.Model): class DeletionStatus(models.TextChoices): INACTIVE = 'inactive' diff --git a/chris_backend/core/tests/test_models.py b/chris_backend/core/tests/test_models.py index 6f9192aa..7b4c336d 100755 --- a/chris_backend/core/tests/test_models.py +++ b/chris_backend/core/tests/test_models.py @@ -1,12 +1,17 @@ +import io import logging import os from unittest import mock -from django.test import TestCase +from django.test import TestCase, tag +from django.conf import settings from django.contrib.auth.models import User -from core.models import ChrisFolder, ChrisFile +from core.models import (ChrisFolder, ChrisFile, ChrisLinkFile, PathAccessError, + validate_path_access, user_can_access_obj) +from core.storage import connect_storage +from userfiles.models import UserFile class ModelTests(TestCase): @@ -84,3 +89,175 @@ def test_move(self): self.assertEqual(self.file.fname.name, new_path) self.assertEqual(self.file.parent_folder.path, f'home/{self.username}/tests') storage_manager_mock.delete_obj.assert_called_with(self.upload_path) + + +class UserCanAccessObjTests(ModelTests): + """ + Tests for the canonical ``user_can_access_obj`` read-access rule. + """ + + def test_owner_chris_public_and_permission_rule(self): + """ + Test the owner/superuser/public/permission read-access rule. + """ + user = User.objects.get(username=self.username) + chris = User.objects.get(username='chris') + other = User.objects.create_user(username='other3', password='o-pass') + + folder, _ = ChrisFolder.objects.get_or_create( + path='home/other3/uploads', owner=other) + + # owner -> allowed + self.assertTrue(user_can_access_obj(folder, other)) + # superuser 'chris' -> allowed + self.assertTrue(user_can_access_obj(folder, chris)) + # unrelated user, not public, no permission -> denied + self.assertFalse(user_can_access_obj(folder, user)) + + # public -> allowed + folder.public = True + folder.save() + self.assertTrue(user_can_access_obj(folder, user)) + + # explicitly shared with the user -> allowed + folder.public = False + folder.save() + folder.grant_user_permission(user, 'r') + self.assertTrue(user_can_access_obj(folder, user)) + + +class ValidatePathAccessTests(ModelTests): + """ + Tests for the centralized ``validate_path_access`` path-access + authorization function. + """ + + def test_rejects_structural_paths_with_exact_messages(self): + """ + Test whether validate_path_access rejects structurally invalid paths and + that the exact, distinct error messages are preserved. + """ + user = User.objects.get(username=self.username) + + cases = [ + ('home', + "This field may not reference a top-level folder path 'home'."), + ('NOTAROOT/x', + "This field may not reference an invalid path 'NOTAROOT/x'."), + (f'home/{self.username}', + f"This field may not reference a home folder path " + f"'home/{self.username}'."), + (f'home/{self.username}/feeds', + f"This field may not reference a home's feeds folder path " + f"'home/{self.username}/feeds'."), + ] + for path, expected_msg in cases: + with self.assertRaises(PathAccessError) as cm: + validate_path_access(user, path) + self.assertEqual(str(cm.exception), expected_msg) + + def test_rejects_nonexistent_path_with_exact_message(self): + """ + Test whether validate_path_access rejects a structurally valid path that + does not resolve to any folder/file/link file. + """ + user = User.objects.get(username=self.username) + with self.assertRaises(PathAccessError) as cm: + validate_path_access(user, 'home/someone/uploads') + self.assertEqual( + str(cm.exception), + "This field may not reference an invalid path 'home/someone/uploads'.") + + def test_folder_owner_public_and_permission_rule(self): + """ + Test the owner/public/permission access rule and that a normalized path + is returned on success. + """ + user = User.objects.get(username=self.username) + own_folder, _ = ChrisFolder.objects.get_or_create( + path=f'home/{self.username}/uploads', owner=user) + + # owner -> allowed; trailing slash is normalized away in the return value + self.assertEqual(validate_path_access(user, own_folder.path + '/'), + own_folder.path) + + other = User.objects.create_user(username='other', password='other-pass') + other_folder, _ = ChrisFolder.objects.get_or_create( + path='home/other/uploads', owner=other) + + # not owner / not public / no permission -> denied (permission message) + with self.assertRaises(PathAccessError) as cm: + validate_path_access(user, other_folder.path) + self.assertEqual( + str(cm.exception), + "User does not have permission to access path 'home/other/uploads'.") + + # public -> allowed + other_folder.public = True + other_folder.save() + self.assertEqual(validate_path_access(user, other_folder.path), + other_folder.path) + + # explicitly shared with the user -> allowed + other_folder.public = False + other_folder.save() + other_folder.grant_user_permission(user, 'r') + self.assertEqual(validate_path_access(user, other_folder.path), + other_folder.path) + + def test_chris_superuser_bypass(self): + """ + Test whether the 'chris' superuser is allowed to access any path. + """ + chris = User.objects.get(username='chris') + other = User.objects.create_user(username='other2', password='o-pass') + other_folder, _ = ChrisFolder.objects.get_or_create( + path='home/other2/uploads', owner=other) + self.assertEqual(validate_path_access(chris, other_folder.path), + other_folder.path) + + +@tag('integration') +class ValidatePathAccessStorageTests(ModelTests): + """ + Storage-backed tests for ``validate_path_access`` covering the ChrisFile and + ChrisLinkFile resolution branches and the PUBLIC/SHARED link-target rule. + """ + + def setUp(self): + super(ValidatePathAccessStorageTests, self).setUp() + self.storage_manager = connect_storage(settings) + + def test_resolves_file_and_link_targets(self): + user = User.objects.get(username=self.username) + folder, _ = ChrisFolder.objects.get_or_create( + path=f'home/{self.username}/uploads', owner=user) + + # ChrisFile branch -> owned by user -> allowed + fpath = f'home/{self.username}/uploads/f.txt' + with io.StringIO('x') as f: + self.storage_manager.upload_obj(fpath, f.read(), + content_type='text/plain') + uf = UserFile(owner=user, parent_folder=folder) + uf.fname.name = fpath + uf.save() + self.assertEqual(validate_path_access(user, fpath), fpath) + + # ChrisLinkFile branch, target not PUBLIC/SHARED -> allowed + lf = ChrisLinkFile(path=f'home/{self.username}/uploads', owner=user, + parent_folder=folder) + lf.save(name='mylink') + self.assertEqual(validate_path_access(user, lf.fname.name), + lf.fname.name) + + # ChrisLinkFile whose target is PUBLIC -> rejected even though owned + lf2 = ChrisLinkFile(path='PUBLIC', owner=user, parent_folder=folder) + lf2.save(name='publink') + with self.assertRaises(PathAccessError) as cm: + validate_path_access(user, lf2.fname.name) + self.assertEqual( + str(cm.exception), + f"This field may not reference an invalid path '{lf2.fname.name}'.") + + # delete files from storage + self.storage_manager.delete_path(f'home/{self.username}/uploads') diff --git a/chris_backend/filebrowser/services.py b/chris_backend/filebrowser/services.py index ad55dbfb..cdf25283 100644 --- a/chris_backend/filebrowser/services.py +++ b/chris_backend/filebrowser/services.py @@ -2,7 +2,7 @@ from django.db import models -from core.models import ChrisFolder +from core.models import ChrisFolder, user_can_access_obj def get_folder_queryset(pk_dict, user=None): @@ -18,8 +18,7 @@ def get_folder_queryset(pk_dict, user=None): if not folder.public: return ChrisFolder.objects.none() else: - if not (folder.owner == user or user.username == 'chris' or folder.public - or folder.has_user_permission(user)): + if not user_can_access_obj(folder, user): return ChrisFolder.objects.none() return qs diff --git a/chris_backend/plugininstances/models.py b/chris_backend/plugininstances/models.py index 856702ab..eb869705 100644 --- a/chris_backend/plugininstances/models.py +++ b/chris_backend/plugininstances/models.py @@ -15,7 +15,8 @@ from plugins.fields import CPUField, MemoryField from plugins.fields import MemoryInt, CPUInt from workflows.models import Workflow -from plugininstances.enums import ACTIVE_STATUSES, INACTIVE_STATUSES, STATUS_CHOICES, REMOTE_CLEANUP_STATUS_CHOICES +from plugininstances.enums import (ACTIVE_STATUSES, INACTIVE_STATUSES, STATUS_CHOICES, + REMOTE_CLEANUP_STATUS_CHOICES) logger = logging.getLogger(__name__) diff --git a/chris_backend/plugininstances/serializers.py b/chris_backend/plugininstances/serializers.py index f26c06ed..2e01f949 100644 --- a/chris_backend/plugininstances/serializers.py +++ b/chris_backend/plugininstances/serializers.py @@ -1,13 +1,11 @@ -import pathlib - from django.core.exceptions import ObjectDoesNotExist from rest_framework import serializers from rest_framework.reverse import reverse from drf_spectacular.utils import OpenApiTypes, extend_schema_field from collectionjson.fields import ItemLinkField -from core.models import ChrisFolder, ChrisFile, ChrisLinkFile +from core.models import PathAccessError, validate_path_access from plugins.enums import TYPES from plugins.models import Plugin @@ -344,51 +342,16 @@ class Meta: def validate_paths(user, string): """ - Custom function to check whether a user is allowed to access the provided paths. + Custom function to check whether a user is allowed to access the provided + paths (a string of one or more paths separated by commas). """ path_list = [s.strip().strip('/') for s in string.split(',')] for path in path_list: - path_parts = pathlib.Path(path).parts - - if len(path_parts) < 2: - # trying to access a top-level folder or an unknown folder - raise serializers.ValidationError( - [f"This field may not reference a top-level folder path '{path}'."]) - - if path_parts[0] not in ('home', 'SERVICES', 'PIPELINES'): - raise serializers.ValidationError( - [f"This field may not reference an invalid path '{path}'."]) - - if len(path_parts) == 2 and path_parts[0] == 'home': - raise serializers.ValidationError( - [f"This field may not reference a home folder path '{path}'."]) - - if len(path_parts) == 3 and path_parts[0] == 'home' and path_parts[2] == 'feeds': - raise serializers.ValidationError( - [f"This field may not reference a home's feeds folder path '{path}'."]) - try: - obj = ChrisFolder.objects.get(path=path) - except ChrisFolder.DoesNotExist: # path is not a folder - try: - obj = ChrisFile.objects.get(fname=path) - except ChrisFile.DoesNotExist: # path is not a file - try: - obj = ChrisLinkFile.objects.get(fname=path) - - if obj.path in ('PUBLIC', 'SHARED'): - raise serializers.ValidationError( - [f"This field may not reference an invalid path '{path}'."]) - - except ChrisLinkFile.DoesNotExist: # path is not a link file - raise serializers.ValidationError( - [f"This field may not reference an invalid path '{path}'."]) - - if not (obj.owner == user or user.username == 'chris' or obj.public - or obj.has_user_permission(user)): - raise serializers.ValidationError( - [f"User does not have permission to access path '{path}'."]) + validate_path_access(user, path) + except PathAccessError as e: + raise serializers.ValidationError([str(e)]) return ','.join(path_list) diff --git a/chris_backend/plugininstances/services/pluginjobs.py b/chris_backend/plugininstances/services/pluginjobs.py index 40dea5c3..b6796953 100644 --- a/chris_backend/plugininstances/services/pluginjobs.py +++ b/chris_backend/plugininstances/services/pluginjobs.py @@ -23,7 +23,8 @@ from rest_framework.authtoken.models import Token from core.utils import json_zip2str -from core.models import ChrisFolder, ChrisFile, ChrisLinkFile +from core.models import (ChrisFolder, ChrisFile, ChrisLinkFile, PathAccessError, + validate_path_access) from plugininstances.models import PluginInstance, PluginInstanceLock from userfiles.models import UserFile from .abstractjobs import PluginInstanceJob @@ -417,6 +418,25 @@ def manage_empty_inputdir(self): raise return str_inputdir + def _check_path_access(self, linked_path): + """ + Raise a ``ValueError`` (and set the CODE17 error code) if the plugin + instance owner is not allowed to access the given path. Authorization is + delegated to ``core.models.validate_path_access`` so the exact + same rule enforced at job submission time is re-enforced at runtime when + ChRIS link files are followed or created (link files can point anywhere, + so they must be re-checked). + """ + try: + validate_path_access(self.c_plugin_inst.owner, linked_path) + except PathAccessError: + job_id = self.str_job_id + logger.error( + f'[CODE17,{job_id}]: Found unauthorized input path {linked_path} ' + f'that the plugin instance owner is not allowed to access') + self.c_plugin_inst.error_code = 'CODE17' + raise ValueError(f'Invalid input path: {linked_path}') + def find_all_storage_object_paths(self, storage_path, obj_paths, visited_paths): """ Find all object storage paths from the passed storage path (prefix) by @@ -457,6 +477,10 @@ def find_all_storage_object_paths(self, storage_path, obj_paths, visited_paths): self.c_plugin_inst.error_code = 'CODE17' raise ValueError(f'Invalid input path: {linked_path}') + # link files can point anywhere, so re-enforce the access rule + # to prevent following links into unauthorized data + self._check_path_access(linked_path) + self.find_all_storage_object_paths(linked_path, obj_paths, visited_paths) # recursive call obj_paths.add(obj_path) @@ -607,6 +631,10 @@ def _create_chris_link_file(self, linked_path, parent_folder): self.c_plugin_inst.error_code = 'CODE17' raise ValueError(f'Invalid input path: {linked_path}') + # re-enforce the access rule: only paths the plugin instance owner is + # authorized to access may be turned into output link files + self._check_path_access(path) + str_source_trace_dir = path.replace('/', '_') try: ChrisFolder.objects.get(path=path) @@ -987,6 +1015,28 @@ def _register_output_files(self): obj_path = changed_file_paths[obj_path] if obj_path: + if obj_path.endswith('.chrislink'): + # Plugins are not authorized to produce ChRIS link files. + # Any link file coming from the remote compute (zip/json + # output) is refused and deleted from storage so subsequent + # jobs cannot follow it. Legit link files created by CUBE for + # 'unextpath' parameters or 'ts' plugins are registered + # directly as ChrisLinkFile objects and never reach this set. + logger.warning(f'Refusing to register link file ' + f'-->{obj_path}<-- produced by job {job_id}; ' + f'deleting it from storage') + try: + self.storage_manager.delete_obj(obj_path) + except Exception as e: + # a non-deletable attacker-controlled link left in an + # output dir is a security-relevant state: fail the job + logger.error(f'[CODE07,{job_id}]: Error while deleting ' + f'unauthorized link file {obj_path} from ' + f'storage, detail: {str(e)}') + self.c_plugin_inst.error_code = 'CODE07' + raise + continue + logger.info(f'Registering file -->{obj_path}<-- for job {job_id}') folder_path = os.path.dirname(obj_path) diff --git a/chris_backend/plugininstances/tests/test_models.py b/chris_backend/plugininstances/tests/test_models.py index 26cfa9f2..a15018f3 100644 --- a/chris_backend/plugininstances/tests/test_models.py +++ b/chris_backend/plugininstances/tests/test_models.py @@ -3,7 +3,7 @@ import os from unittest import mock -from django.test import TestCase, tag +from django.test import TestCase from django.contrib.auth.models import User from django.conf import settings diff --git a/chris_backend/plugininstances/tests/test_pluginjobs.py b/chris_backend/plugininstances/tests/test_pluginjobs.py index 3e33d745..1236a066 100644 --- a/chris_backend/plugininstances/tests/test_pluginjobs.py +++ b/chris_backend/plugininstances/tests/test_pluginjobs.py @@ -12,7 +12,7 @@ from pfconclient import client as pfcon -from core.models import ChrisInstance +from core.models import ChrisInstance, ChrisFolder, ChrisLinkFile from core.storage import connect_storage from plugins.models import PluginMeta, Plugin from plugins.models import PluginParameter @@ -293,3 +293,223 @@ def test_integration_plugin_job_can_register_output_files(self): # delete files from storage self.storage_manager.delete_path(outputdir) + + def _create_started_plugin_inst(self): + user = User.objects.get(username=self.username) + plugin = Plugin.objects.get(meta__name=self.plugin_fs_name) + (pl_inst, tf) = PluginInstance.objects.get_or_create( + plugin=plugin, owner=user, status='started', + compute_resource=plugin.compute_resources.all()[0]) + return pl_inst + + def test_create_chris_link_file_refuses_unauthorized_path(self): + """ + Test whether _create_chris_link_file refuses to create a link file to a + path the plugin instance owner is not allowed to access. + """ + pl_inst = self._create_started_plugin_inst() + job = pluginjobs.PluginInstanceAppJob(pl_inst) + + other = User.objects.create_user(username='other', password='other-pass') + other_folder, _ = ChrisFolder.objects.get_or_create( + path='home/other/uploads', owner=other) + + parent_folder, _ = ChrisFolder.objects.get_or_create( + path=pl_inst.get_output_path(), owner=pl_inst.owner) + + # refusing the unauthorized path logs at ERROR (CODE17) before raising; + # capture/suppress with assertLogs and verify the expected message fired + with self.assertLogs('plugininstances.services.pluginjobs', + level='ERROR') as cm: + with self.assertRaises(ValueError): + job._create_chris_link_file(other_folder.path, parent_folder) + self.assertEqual(pl_inst.error_code, 'CODE17') + self.assertTrue(any('CODE17' in msg and 'home/other/uploads' in msg + for msg in cm.output)) + + def test_find_all_storage_object_paths_refuses_unauthorized_link(self): + """ + Test whether find_all_storage_object_paths refuses to follow a ChRIS link + file pointing to a path the plugin instance owner is not allowed to access. + """ + pl_inst = self._create_started_plugin_inst() + job = pluginjobs.PluginInstanceAppJob(pl_inst) + + other = User.objects.create_user(username='other', password='other-pass') + ChrisFolder.objects.get_or_create(path='home/other/uploads', owner=other) + + storage_path = 'home/%s/uploads' % self.username + link_obj = storage_path + '/secret.chrislink' + + job.storage_manager = mock.Mock() + job.storage_manager.ls = mock.Mock(return_value=[link_obj]) + job.storage_manager.download_obj = mock.Mock( + return_value=b'home/other/uploads') + + obj_paths = set() + visited_paths = set() + # refusing the unauthorized link logs at ERROR (CODE17) before raising; + # capture/suppress with assertLogs and verify the expected message fired + with self.assertLogs('plugininstances.services.pluginjobs', + level='ERROR') as cm: + with self.assertRaises(ValueError): + job.find_all_storage_object_paths(storage_path, obj_paths, + visited_paths) + self.assertEqual(pl_inst.error_code, 'CODE17') + self.assertTrue(any('CODE17' in msg and 'home/other/uploads' in msg + for msg in cm.output)) + + def test_find_all_storage_object_paths_follows_authorized_link(self): + """ + Test whether find_all_storage_object_paths still follows a ChRIS link file + pointing to a path the plugin instance owner is allowed to access. + """ + user = User.objects.get(username=self.username) + pl_inst = self._create_started_plugin_inst() + job = pluginjobs.PluginInstanceAppJob(pl_inst) + + linked_path = 'home/%s/uploads' % self.username + ChrisFolder.objects.get_or_create(path=linked_path, owner=user) + + storage_path = 'home/%s/uploads_input' % self.username + link_obj = storage_path + '/data.chrislink' + linked_file = linked_path + '/file.txt' + + def fake_ls(path): + if path == storage_path: + return [link_obj] + if path == linked_path: + return [linked_file] + return [] + + job.storage_manager = mock.Mock() + job.storage_manager.ls = mock.Mock(side_effect=fake_ls) + job.storage_manager.download_obj = mock.Mock( + return_value=linked_path.encode()) + + obj_paths = set() + visited_paths = set() + job.find_all_storage_object_paths(storage_path, obj_paths, visited_paths) + + self.assertIn(link_obj, obj_paths) + self.assertIn(linked_file, obj_paths) + self.assertEqual(pl_inst.error_code, '') + + @tag('integration') + def test_integration_register_output_files_refuses_remote_link_file(self): + """ + Test whether _register_output_files refuses to register a .chrislink file + coming from the remote compute and deletes it from storage, while still + registering regular output files. + """ + user = User.objects.get(username=self.username) + plugin = Plugin.objects.get(meta__name=self.plugin_fs_name) + pl_inst = PluginInstance.objects.create( + plugin=plugin, owner=user, status='finishedSuccessfully', + compute_resource=plugin.compute_resources.all()[0]) + + outputdir = pl_inst.get_output_path() + + with io.StringIO('Test file') as f: + self.storage_manager.upload_obj(outputdir + '/data.txt', f.read(), + content_type='text/plain') + with io.StringIO('home/other/uploads') as f: + self.storage_manager.upload_obj(outputdir + '/evil.chrislink', + f.read(), content_type='text/plain') + + job = pluginjobs.PluginInstanceAppJob(pl_inst) + job.plugin_inst_output_files = {outputdir + '/data.txt', + outputdir + '/evil.chrislink'} + job._register_output_files() + + fnames = [f.fname.name for f in pl_inst.output_folder.chris_files.all()] + self.assertIn(outputdir + '/data.txt', fnames) + self.assertNotIn(outputdir + '/evil.chrislink', fnames) + + self.assertFalse( + self.storage_manager.obj_exists(outputdir + '/evil.chrislink')) + self.assertTrue( + self.storage_manager.obj_exists(outputdir + '/data.txt')) + + self.assertIn(outputdir + '/data.txt', job.plugin_inst_output_files) + self.assertNotIn(outputdir + '/evil.chrislink', + job.plugin_inst_output_files) + + # delete files from storage + self.storage_manager.delete_path(outputdir) + + def test_register_output_files_fails_when_link_delete_fails(self): + """ + Test whether _register_output_files fails the job (sets CODE07 and + propagates the exception) when a refused remote link file cannot be + deleted from storage. + """ + pl_inst = self._create_started_plugin_inst() + job = pluginjobs.PluginInstanceAppJob(pl_inst) + outputdir = pl_inst.get_output_path() + + job.storage_manager = mock.Mock() + job.storage_manager.sanitize_obj_names = mock.Mock(return_value={}) + job.storage_manager.delete_obj = mock.Mock( + side_effect=Exception('boom')) + job.plugin_inst_output_files = {outputdir + '/evil.chrislink'} + + # the failed deletion logs at ERROR (CODE07) before re-raising; capture/ + # suppress with assertLogs and verify the expected message fired + with self.assertLogs('plugininstances.services.pluginjobs', + level='ERROR') as cm: + with self.assertRaises(Exception): + job._register_output_files() + self.assertEqual(pl_inst.error_code, 'CODE07') + self.assertTrue(any('CODE07' in msg and 'boom' in msg + for msg in cm.output)) + + @tag('integration') + def test_integration_register_output_files_keeps_cube_generated_link_file(self): + """ + Test whether _register_output_files preserves legit ChRIS link files + created by CUBE (via _create_chris_link_file) for 'unextpath'/'ts' flows, + i.e. they are not in plugin_inst_output_files and are not scrubbed. + """ + user = User.objects.get(username=self.username) + plugin = Plugin.objects.get(meta__name=self.plugin_fs_name) + pl_inst = PluginInstance.objects.create( + plugin=plugin, owner=user, status='finishedSuccessfully', + compute_resource=plugin.compute_resources.all()[0]) + outputdir = pl_inst.get_output_path() + + # an authorized folder owned by the instance owner, used as link target + target_folder, _ = ChrisFolder.objects.get_or_create( + path=f'home/{self.username}/uploads', owner=user) + with io.StringIO('Test file') as f: + self.storage_manager.upload_obj( + f'home/{self.username}/uploads/in.txt', f.read(), + content_type='text/plain') + + # a regular remote output file + with io.StringIO('Test file') as f: + self.storage_manager.upload_obj(outputdir + '/data.txt', f.read(), + content_type='text/plain') + + job = pluginjobs.PluginInstanceAppJob(pl_inst) + parent_folder, _ = ChrisFolder.objects.get_or_create( + path=outputdir, owner=user) + job._create_chris_link_file(target_folder.path, parent_folder) + + link_path = (outputdir + '/' + + target_folder.path.replace('/', '_') + '.chrislink') + + # CUBE-generated link is a ChrisLinkFile and NOT in the remote-output set + self.assertTrue(ChrisLinkFile.objects.filter(fname=link_path).exists()) + self.assertNotIn(link_path, job.plugin_inst_output_files) + + job.plugin_inst_output_files = {outputdir + '/data.txt'} + job._register_output_files() + + # the link file survived the scrub (storage + DB) + self.assertTrue(self.storage_manager.obj_exists(link_path)) + self.assertTrue(ChrisLinkFile.objects.filter(fname=link_path).exists()) + + # delete files from storage + self.storage_manager.delete_path(outputdir) + self.storage_manager.delete_obj(f'home/{self.username}/uploads/in.txt')