From cc2f83dac08eb0add166a5635f8c5f2af4bd8c4e Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Fri, 24 May 2024 18:26:18 +0200 Subject: [PATCH] Raise appropriate exception if accessing deleted input file --- lib/galaxy/webapps/galaxy/api/job_files.py | 21 +++++++++++++++++---- test/integration/test_job_files.py | 19 ++++++++++++++++++- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/webapps/galaxy/api/job_files.py b/lib/galaxy/webapps/galaxy/api/job_files.py index df2edf2415cb..fa30c3433e48 100644 --- a/lib/galaxy/webapps/galaxy/api/job_files.py +++ b/lib/galaxy/webapps/galaxy/api/job_files.py @@ -11,6 +11,7 @@ exceptions, util, ) +from galaxy.managers.context import ProvidesAppContext from galaxy.model import Job from galaxy.web import ( expose_api_anonymous_and_sessionless, @@ -34,7 +35,7 @@ class JobFilesAPIController(BaseGalaxyAPIController): """ @expose_api_raw_anonymous_and_sessionless - def index(self, trans, job_id, **kwargs): + def index(self, trans: ProvidesAppContext, job_id, **kwargs): """ GET /api/jobs/{job_id}/files @@ -56,9 +57,21 @@ def index(self, trans, job_id, **kwargs): :rtype: binary :returns: contents of file """ - self.__authorize_job_access(trans, job_id, **kwargs) - path = kwargs.get("path", None) - return open(path, "rb") + job = self.__authorize_job_access(trans, job_id, **kwargs) + path = kwargs["path"] + try: + return open(path, "rb") + except FileNotFoundError: + # We know that the job is not terminal, but users (or admin scripts) can purge input datasets. + # Here we discriminate that case from truly unexpected bugs. + # Not failing the job here, this is or should be handled by pulsar. + match = re.match(r"(galaxy_)?dataset_(.*)\.dat", os.path.basename(path)) + if match: + # This looks like a galaxy dataset, check if any job input has been deleted. + if any(jtid.dataset.dataset.purged for jtid in job.input_datasets): + raise exceptions.ItemDeletionException("Input dataset(s) for job have been purged.") + else: + raise @expose_api_anonymous_and_sessionless def create(self, trans, job_id, payload, **kwargs): diff --git a/test/integration/test_job_files.py b/test/integration/test_job_files.py index 36babdcebf6e..fce3bf4bd6bd 100644 --- a/test/integration/test_job_files.py +++ b/test/integration/test_job_files.py @@ -43,6 +43,7 @@ class TestJobFilesIntegration(integration_util.IntegrationTestCase): initialized = False + dataset_populator: DatasetPopulator @classmethod def handle_galaxy_config_kwds(cls, config): @@ -60,7 +61,7 @@ def setUp(self): sa_session = self.sa_session stmt = select(model.HistoryDatasetAssociation) assert len(sa_session.scalars(stmt).all()) == 0 - self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_TEXT, wait=True) + self.input_hda_dict = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_TEXT, wait=True) assert len(sa_session.scalars(stmt).all()) == 1 self.input_hda = sa_session.scalars(stmt).all()[0] TestJobFilesIntegration.initialized = True @@ -86,6 +87,22 @@ def test_read_by_state(self): response = requests.get(get_url, params=data) _assert_insufficient_permissions(response) + def test_read_fails_if_input_file_purged(self): + job, _, _ = self.create_static_job_with_state("running") + job_id, job_key = self._api_job_keys(job) + input_file_path = self.input_hda.get_file_name() + data = {"path": input_file_path, "job_key": job_key} + get_url = self._api_url(f"jobs/{job_id}/files", use_key=True) + head_response = requests.head(get_url, params=data) + api_asserts.assert_status_code_is_ok(head_response) + delete_response = self.dataset_populator.delete_dataset( + self.input_hda_dict["history_id"], content_id=self.input_hda_dict["id"], purge=True, wait_for_purge=True + ) + assert delete_response.status_code == 200 + head_response = requests.get(get_url, params=data) + assert head_response.status_code == 400 + assert head_response.json()["err_msg"] == "Input dataset(s) for job have been purged." + def test_write_by_state(self): job, output_hda, working_directory = self.create_static_job_with_state("running") job_id, job_key = self._api_job_keys(job)