Skip to content

Commit

Permalink
Merge pull request #18223 from mvdbeek/discriminate_inputs_deleted_jo…
Browse files Browse the repository at this point in the history
…b_files

[24.0] Raise appropriate exception if accessing deleted input file
  • Loading branch information
jmchilton authored May 27, 2024
2 parents 82a54a5 + cc2f83d commit 98d8ae9
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 5 deletions.
21 changes: 17 additions & 4 deletions lib/galaxy/webapps/galaxy/api/job_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
exceptions,
util,
)
from galaxy.managers.context import ProvidesAppContext
from galaxy.model import Job
from galaxy.web import (
expose_api_anonymous_and_sessionless,
Expand All @@ -34,7 +35,7 @@ class JobFilesAPIController(BaseGalaxyAPIController):
"""

@expose_api_raw_anonymous_and_sessionless
def index(self, trans, job_id, **kwargs):
def index(self, trans: ProvidesAppContext, job_id, **kwargs):
"""
GET /api/jobs/{job_id}/files
Expand All @@ -56,9 +57,21 @@ def index(self, trans, job_id, **kwargs):
:rtype: binary
:returns: contents of file
"""
self.__authorize_job_access(trans, job_id, **kwargs)
path = kwargs.get("path", None)
return open(path, "rb")
job = self.__authorize_job_access(trans, job_id, **kwargs)
path = kwargs["path"]
try:
return open(path, "rb")
except FileNotFoundError:
# We know that the job is not terminal, but users (or admin scripts) can purge input datasets.
# Here we discriminate that case from truly unexpected bugs.
# Not failing the job here, this is or should be handled by pulsar.
match = re.match(r"(galaxy_)?dataset_(.*)\.dat", os.path.basename(path))
if match:
# This looks like a galaxy dataset, check if any job input has been deleted.
if any(jtid.dataset.dataset.purged for jtid in job.input_datasets):
raise exceptions.ItemDeletionException("Input dataset(s) for job have been purged.")
else:
raise

@expose_api_anonymous_and_sessionless
def create(self, trans, job_id, payload, **kwargs):
Expand Down
19 changes: 18 additions & 1 deletion test/integration/test_job_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

class TestJobFilesIntegration(integration_util.IntegrationTestCase):
initialized = False
dataset_populator: DatasetPopulator

@classmethod
def handle_galaxy_config_kwds(cls, config):
Expand All @@ -60,7 +61,7 @@ def setUp(self):
sa_session = self.sa_session
stmt = select(model.HistoryDatasetAssociation)
assert len(sa_session.scalars(stmt).all()) == 0
self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_TEXT, wait=True)
self.input_hda_dict = self.dataset_populator.new_dataset(history_id, content=TEST_INPUT_TEXT, wait=True)
assert len(sa_session.scalars(stmt).all()) == 1
self.input_hda = sa_session.scalars(stmt).all()[0]
TestJobFilesIntegration.initialized = True
Expand All @@ -86,6 +87,22 @@ def test_read_by_state(self):
response = requests.get(get_url, params=data)
_assert_insufficient_permissions(response)

def test_read_fails_if_input_file_purged(self):
job, _, _ = self.create_static_job_with_state("running")
job_id, job_key = self._api_job_keys(job)
input_file_path = self.input_hda.get_file_name()
data = {"path": input_file_path, "job_key": job_key}
get_url = self._api_url(f"jobs/{job_id}/files", use_key=True)
head_response = requests.head(get_url, params=data)
api_asserts.assert_status_code_is_ok(head_response)
delete_response = self.dataset_populator.delete_dataset(
self.input_hda_dict["history_id"], content_id=self.input_hda_dict["id"], purge=True, wait_for_purge=True
)
assert delete_response.status_code == 200
head_response = requests.get(get_url, params=data)
assert head_response.status_code == 400
assert head_response.json()["err_msg"] == "Input dataset(s) for job have been purged."

def test_write_by_state(self):
job, output_hda, working_directory = self.create_static_job_with_state("running")
job_id, job_key = self._api_job_keys(job)
Expand Down

0 comments on commit 98d8ae9

Please sign in to comment.