Skip to content

Commit

Permalink
Merge pull request galaxyproject#18691 from mvdbeek/enable_change_dat…
Browse files Browse the repository at this point in the history
…atype_pja_on_pick_value

[24.1] Fix change datatype PJA on expression tool data outputs
  • Loading branch information
mvdbeek authored Aug 15, 2024
2 parents 1f0984e + a4ac441 commit ba3ff2c
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 8 deletions.
13 changes: 9 additions & 4 deletions lib/galaxy/celery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,19 @@ def set_metadata(
dataset_id: int,
model_class: str = "HistoryDatasetAssociation",
overwrite: bool = True,
ensure_can_set_metadata: bool = True,
task_user_id: Optional[int] = None,
):
"""
ensure_can_set_metadata can be bypassed for new outputs.
"""
manager = _get_dataset_manager(hda_manager, ldda_manager, model_class)
dataset_instance = manager.by_id(dataset_id)
can_set_metadata = manager.ensure_can_set_metadata(dataset_instance, raiseException=False)
if not can_set_metadata:
log.info(f"Setting metadata is not allowed for {model_class} {dataset_instance.id}")
return
if ensure_can_set_metadata:
can_set_metadata = manager.ensure_can_set_metadata(dataset_instance, raiseException=False)
if not can_set_metadata:
log.info(f"Setting metadata is not allowed for {model_class} {dataset_instance.id}")
return
try:
if overwrite:
hda_manager.overwrite_metadata(dataset_instance)
Expand Down
7 changes: 6 additions & 1 deletion lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,8 +2020,9 @@ def fail(message=job.info, exception=None):
# Certain tools require tasks to be completed after job execution
# ( this used to be performed in the "exec_after_process" hook, but hooks are deprecated ).
param_dict = self.get_param_dict(job)
task_wrapper = None
try:
self.tool.exec_after_process(
task_wrapper = self.tool.exec_after_process(
self.app, inp_data, out_data, param_dict, job=job, final_job_state=final_job_state
)
except Exception as e:
Expand Down Expand Up @@ -2063,6 +2064,10 @@ def fail(message=job.info, exception=None):
self.sa_session.commit()
if job.state == job.states.ERROR:
self._report_error()
elif task_wrapper:
# Only task is setting metadata (if necessary) on expression tool output.
# The dataset state is SETTING_METADATA, which delays dependent jobs until the task completes.
task_wrapper.delay()
cleanup_job = self.cleanup_job
delete_files = cleanup_job == "always" or (job.state == job.states.OK and cleanup_job == "onsuccess")
self.cleanup(delete_files=delete_files)
Expand Down
22 changes: 21 additions & 1 deletion lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2876,7 +2876,27 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
break
if copy_object is None:
raise exceptions.MessageException("Failed to find dataset output.")
out_data[key].copy_from(copy_object, include_metadata=True)
output = out_data[key]
# if change_datatype PJA is associated with expression tool output the new output already has
# the desired datatype, so we use it. If the extension is "data" there's no change_dataset PJA and
# we want to use the existing extension.
new_ext = output.extension if output.extension != "data" else copy_object.extension
require_metadata_regeneration = copy_object.extension != new_ext
output.copy_from(copy_object, include_metadata=not require_metadata_regeneration)
output.extension = new_ext
if require_metadata_regeneration:
if app.config.enable_celery_tasks:
from galaxy.celery.tasks import set_metadata

output._state = model.Dataset.states.SETTING_METADATA
return set_metadata.si(
dataset_id=output.id, task_user_id=output.history.user_id, ensure_can_set_metadata=False
)
else:
# TODO: move exec_after_process into metadata script so this doesn't run on the headnode ?
output.init_meta()
output.set_meta()
output.set_metadata_success_state()

def parse_environment_variables(self, tool_source):
"""Setup environment variable for inputs file."""
Expand Down
16 changes: 14 additions & 2 deletions lib/galaxy_test/api/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -2086,14 +2086,25 @@ def test_run_workflow_pick_value_bam_pja(self):
pick_from:
- value:
__class__: RuntimeValue
consume_index:
tool_id: metadata_bam
in:
input_bam: pick_value/data_param
tool_state:
ref_names:
- chr10_random
- chr11
- chrM
- chrX
- chr16
outputs:
pick_out:
outputSource: pick_value/data_param
""",
test_data="""
some_file:
value: 1.bam
file_type: bam
value: 3.bam
file_type: unsorted.bam
type: File
""",
history_id=history_id,
Expand All @@ -2106,6 +2117,7 @@ def test_run_workflow_pick_value_bam_pja(self):
)
assert dataset_details["metadata_reference_names"]
assert dataset_details["metadata_bam_index"]
assert dataset_details["file_ext"] == "bam"

def test_run_workflow_simple_conditional_step(self):
with self.dataset_populator.test_history() as history_id:
Expand Down

0 comments on commit ba3ff2c

Please sign in to comment.