From f7b7409ea86c38c0bd19adda0455f382bc195acb Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 24 Dec 2023 10:30:41 +0100 Subject: [PATCH 1/5] Add dbkey to DatasetLikeObject --- .../datatypes/display_applications/parameters.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/datatypes/display_applications/parameters.py b/lib/galaxy/datatypes/display_applications/parameters.py index c5cb95caa7f4..413b49453770 100644 --- a/lib/galaxy/datatypes/display_applications/parameters.py +++ b/lib/galaxy/datatypes/display_applications/parameters.py @@ -63,6 +63,7 @@ class DatasetLikeObject: file_name: str state: DatasetState extension: str + dbkey: Optional[str] class DisplayApplicationDataParameter(DisplayApplicationParameter): @@ -114,18 +115,24 @@ def _get_dataset_like_object(self, other_values) -> Optional[DatasetLikeObject]: if self.metadata: rval = getattr(data.metadata, self.metadata, None) assert rval, f'Unknown metadata name "{self.metadata}" provided for dataset type "{data.ext}".' - return DatasetLikeObject(file_name=rval.get_file_name(), state=data.state, extension="data") + return DatasetLikeObject( + file_name=rval.get_file_name(), state=data.state, extension="data", dbkey=data.dbkey + ) elif ( self.formats and self.extensions and (self.force_conversion or not isinstance(data.datatype, self.formats)) ): for ext in self.extensions: rval = data.get_converted_files_by_type(ext) if rval: - return DatasetLikeObject(file_name=rval.get_file_name(), state=rval.state, extension=rval.extension) + return DatasetLikeObject( + file_name=rval.get_file_name(), state=rval.state, extension=rval.extension, dbkey=data.dbkey + ) direct_match, target_ext, _ = data.find_conversion_destination(self.formats) assert direct_match or target_ext is not None, f"No conversion path found for data param: {self.name}" return None - return DatasetLikeObject(file_name=data.get_file_name(), state=data.state, extension=data.extension) + return DatasetLikeObject( + file_name=data.get_file_name(), state=data.state, extension=data.extension, dbkey=data.dbkey + ) def get_value(self, other_values, dataset_hash, user_hash, trans): data = self._get_dataset_like_object(other_values) From 21bfb0c4903872c3fd3d4b30b156c92671511896 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 24 Dec 2023 15:16:49 +0100 Subject: [PATCH 2/5] Test and fix display application link handling --- .../display_applications/parameters.py | 64 +++++++++++++------ lib/galaxy/datatypes/registry.py | 31 +++++++-- lib/galaxy/tool_util/verify/interactor.py | 11 +++- .../webapps/galaxy/controllers/dataset.py | 29 ++++----- .../api/test_display_applications.py | 48 ++++++++++++++ 5 files changed, 140 insertions(+), 43 deletions(-) diff --git a/lib/galaxy/datatypes/display_applications/parameters.py b/lib/galaxy/datatypes/display_applications/parameters.py index 413b49453770..46e9de33db43 100644 --- a/lib/galaxy/datatypes/display_applications/parameters.py +++ b/lib/galaxy/datatypes/display_applications/parameters.py @@ -1,14 +1,23 @@ # Contains parameters that are used in Display Applications import mimetypes from dataclasses import dataclass -from typing import Optional +from typing import ( + Callable, + Optional, + TYPE_CHECKING, + Union, +) from urllib.parse import quote_plus +from galaxy.datatypes.data import Data +from galaxy.model import DatasetInstance from galaxy.model.base import transaction from galaxy.schema.schema import DatasetState from galaxy.util import string_as_bool from galaxy.util.template import fill_template +if TYPE_CHECKING: + from galaxy.datatypes.registry import Registry DEFAULT_DATASET_NAME = "dataset" @@ -60,10 +69,12 @@ def build_url(self, other_values): @dataclass class DatasetLikeObject: - file_name: str + get_file_name: Callable state: DatasetState extension: str + name: str dbkey: Optional[str] + datatype: Data class DisplayApplicationDataParameter(DisplayApplicationParameter): @@ -91,21 +102,22 @@ def __init__(self, elem, link): self.force_url_param = string_as_bool(elem.get("force_url_param", "False")) self.force_conversion = string_as_bool(elem.get("force_conversion", "False")) + @property + def datatypes_registry(self) -> "Registry": + return self.link.display_application.app.datatypes_registry + @property def formats(self): if self.extensions: return tuple( map( type, - map( - self.link.display_application.app.datatypes_registry.get_datatype_by_extension, self.extensions - ), + map(self.datatypes_registry.get_datatype_by_extension, self.extensions), ) ) return None - def _get_dataset_like_object(self, other_values) -> Optional[DatasetLikeObject]: - # this returned object has file_name, state, and states attributes equivalent to a DatasetAssociation + def _get_dataset_like_object(self, other_values) -> Optional[Union[DatasetLikeObject, DatasetInstance]]: data = other_values.get(self.dataset, None) assert data, "Base dataset could not be found in values provided to DisplayApplicationDataParameter" if isinstance(data, DisplayDataValueWrapper): @@ -114,9 +126,24 @@ def _get_dataset_like_object(self, other_values) -> Optional[DatasetLikeObject]: return None if self.metadata: rval = getattr(data.metadata, self.metadata, None) + if not rval: + # May have to look at converted datasets + for converted_dataset_association in data.implicitly_converted_datasets: + converted_dataset = converted_dataset_association.dataset + if converted_dataset.state != DatasetState.OK: + return None + rval = getattr(converted_dataset.metadata, self.metadata, None) + if rval: + data = converted_dataset + break assert rval, f'Unknown metadata name "{self.metadata}" provided for dataset type "{data.ext}".' return DatasetLikeObject( - file_name=rval.get_file_name(), state=data.state, extension="data", dbkey=data.dbkey + get_file_name=rval.get_file_name, + state=data.state, + extension="data", + dbkey=data.dbkey, + name=data.name, + datatype=data.datatype, ) elif ( self.formats and self.extensions and (self.force_conversion or not isinstance(data.datatype, self.formats)) @@ -124,15 +151,13 @@ def _get_dataset_like_object(self, other_values) -> Optional[DatasetLikeObject]: for ext in self.extensions: rval = data.get_converted_files_by_type(ext) if rval: - return DatasetLikeObject( - file_name=rval.get_file_name(), state=rval.state, extension=rval.extension, dbkey=data.dbkey - ) - direct_match, target_ext, _ = data.find_conversion_destination(self.formats) + return rval + direct_match, target_ext, _ = self.datatypes_registry.find_conversion_destination_for_dataset_by_extensions( + data.extension, self.extensions + ) assert direct_match or target_ext is not None, f"No conversion path found for data param: {self.name}" return None - return DatasetLikeObject( - file_name=data.get_file_name(), state=data.state, extension=data.extension, dbkey=data.dbkey - ) + return data def get_value(self, other_values, dataset_hash, user_hash, trans): data = self._get_dataset_like_object(other_values) @@ -144,12 +169,15 @@ def prepare(self, other_values, dataset_hash, user_hash, trans): data = self._get_dataset_like_object(other_values) if not data and self.formats: data = other_values.get(self.dataset, None) - trans.sa_session.refresh(data) # start conversion # FIXME: Much of this is copied (more than once...); should be some abstract method elsewhere called from here # find target ext - direct_match, target_ext, converted_dataset = data.find_conversion_destination( - self.formats, converter_safe=True + ( + direct_match, + target_ext, + converted_dataset, + ) = self.datatypes_registry.find_conversion_destination_for_dataset_by_extensions( + data.extension, self.extensions ) if not direct_match: if target_ext and not converted_dataset: diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py index d454810834b0..8620f49eb240 100644 --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -11,9 +11,11 @@ from typing import ( cast, Dict, + Iterable, List, Optional, Tuple, + TYPE_CHECKING, Union, ) @@ -39,6 +41,9 @@ ) from .display_applications.application import DisplayApplication +if TYPE_CHECKING: + from galaxy.datatypes.data import Data + class ConfigurationError(Exception): pass @@ -595,7 +600,7 @@ def get_mimetype_by_extension(self, ext, default="application/octet-stream"): self.log.warning(f"unknown mimetype in data factory {str(ext)}") return mimetype - def get_datatype_by_extension(self, ext): + def get_datatype_by_extension(self, ext) -> Optional["Data"]: """Returns a datatype object based on an extension""" return self.datatypes_by_extension.get(ext, None) @@ -843,7 +848,10 @@ def get_converter_by_target_type(self, source_ext, target_ext): return None def find_conversion_destination_for_dataset_by_extensions( - self, dataset_or_ext: Union[str, DatasetProtocol], accepted_formats: List[str], converter_safe: bool = True + self, + dataset_or_ext: Union[str, DatasetProtocol], + accepted_formats: Iterable[Union[str, "Data"]], + converter_safe: bool = True, ) -> Tuple[bool, Optional[str], Optional[DatasetProtocol]]: """ returns (direct_match, converted_ext, converted_dataset) @@ -857,8 +865,21 @@ def find_conversion_destination_for_dataset_by_extensions( ext = dataset_or_ext dataset = None - datatype_by_extension = self.get_datatype_by_extension(ext) - if datatype_by_extension and datatype_by_extension.matches_any(accepted_formats): + accepted_datatypes: List["Data"] = [] + for accepted_format in accepted_formats: + if isinstance(accepted_format, str): + accepted_datatype = self.get_datatype_by_extension(accepted_format) + if accepted_datatype is None: + self.log.warning( + f"Datatype class not found for extension '{accepted_format}', which is used as target for conversion from datatype '{ext}'" + ) + else: + accepted_datatypes.append(accepted_datatype) + else: + accepted_datatypes.append(accepted_format) + + datatype = self.get_datatype_by_extension(ext) + if datatype and datatype.matches_any(accepted_datatypes): return True, None, None for convert_ext in self.get_converters_by_datatype(ext): @@ -867,7 +888,7 @@ def find_conversion_destination_for_dataset_by_extensions( self.log.warning( f"Datatype class not found for extension '{convert_ext}', which is used as target for conversion from datatype '{ext}'" ) - elif convert_ext_datatype.matches_any(accepted_formats): + elif convert_ext_datatype.matches_any(accepted_datatypes): converted_dataset = dataset and dataset.get_converted_files_by_type(convert_ext) if converted_dataset: ret_data = converted_dataset diff --git a/lib/galaxy/tool_util/verify/interactor.py b/lib/galaxy/tool_util/verify/interactor.py index 18ca57f76c11..94cdb25a8efd 100644 --- a/lib/galaxy/tool_util/verify/interactor.py +++ b/lib/galaxy/tool_util/verify/interactor.py @@ -959,14 +959,21 @@ def _put(self, path, data=None, key=None, headers=None, admin=False, anon=False, kwd["timeout"] = kwd.pop("timeout", util.DEFAULT_SOCKET_TIMEOUT) return requests.put(url, **kwd) - def _get(self, path, data=None, key=None, headers=None, admin=False, anon=False): + def _get(self, path, data=None, key=None, headers=None, admin=False, anon=False, allow_redirects=True): headers = self.api_key_header(key=key, admin=admin, anon=anon, headers=headers) url = self.get_api_url(path) kwargs: Dict[str, Any] = {} if self.cookies: kwargs["cookies"] = self.cookies # no data for GET - return requests.get(url, params=data, headers=headers, timeout=util.DEFAULT_SOCKET_TIMEOUT, **kwargs) + return requests.get( + url, + params=data, + headers=headers, + timeout=util.DEFAULT_SOCKET_TIMEOUT, + allow_redirects=allow_redirects, + **kwargs, + ) def _head(self, path, data=None, key=None, headers=None, admin=False, anon=False): headers = self.api_key_header(key=key, admin=admin, anon=anon, headers=headers) diff --git a/lib/galaxy/webapps/galaxy/controllers/dataset.py b/lib/galaxy/webapps/galaxy/controllers/dataset.py index 6396db3d62b6..d74c3f17f8ee 100644 --- a/lib/galaxy/webapps/galaxy/controllers/dataset.py +++ b/lib/galaxy/webapps/galaxy/controllers/dataset.py @@ -674,7 +674,7 @@ def display_application( ), f"Extra file content requested ({action_param_extra}), but allow_extra_files_access is False." file_name = os.path.join(value.extra_files_path, action_param_extra) else: - file_name = value.file_name + file_name = value.get_file_name() content_length = os.path.getsize(file_name) rval = open(file_name, "rb") except OSError as e: @@ -697,24 +697,17 @@ def display_application( msg.append((f"Invalid action provided: {app_action}", "error")) else: if app_action is None: - if trans.history != data.history: - msg.append( - ( - "You must import this dataset into your current history before you can view it at the desired display application.", - "error", - ) + refresh = True + trans.response.status = 202 + msg.append( + ( + "Launching this display application requires additional datasets to be generated, you can view the status of these jobs below. ", + "info", ) - else: - refresh = True - msg.append( - ( - "Launching this display application required additional datasets to be generated, you can view the status of these jobs below. ", - "info", - ) - ) - if not display_link.preparing_display(): - display_link.prepare_display() - preparable_steps = display_link.get_prepare_steps() + ) + if not display_link.preparing_display(): + display_link.prepare_display() + preparable_steps = display_link.get_prepare_steps() else: raise Exception(f"Attempted a view action ({app_action}) on a non-ready display application") return trans.fill_template_mako( diff --git a/lib/galaxy_test/api/test_display_applications.py b/lib/galaxy_test/api/test_display_applications.py index 3fd5484fc833..5509acc71faa 100644 --- a/lib/galaxy_test/api/test_display_applications.py +++ b/lib/galaxy_test/api/test_display_applications.py @@ -1,12 +1,21 @@ import random from typing import List +from urllib.parse import parse_qsl from galaxy.util import UNKNOWN from galaxy_test.base.decorators import requires_admin +from galaxy_test.base.populators import ( + DatasetPopulator, + wait_on, +) from ._framework import ApiTestCase class TestDisplayApplicationsApi(ApiTestCase): + def setUp(self): + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + def test_index(self): response = self._get("display_applications") self._assert_status_code_is(response, 200) @@ -51,6 +60,45 @@ def test_reload_as_non_admin_returns_403(self): response = self._post("display_applications/reload") self._assert_status_code_is(response, 403) + def test_get_display_application_link_bam(self, history_id): + # a controller route, but used in external apps. IMO qualifies (and can be used) as API + instance_url = self.galaxy_interactor.api_url.split("/api")[0] + for display_app_url in self._setup_igv_datasets(history_id=history_id, instance_url=instance_url): + # wait for eventual conversion to finish + wait_on( + lambda display_app_url=display_app_url: True + if self._get(display_app_url, allow_redirects=False).status_code == 302 + else None, + "display application to become ready", + ) + response = self._get(display_app_url, allow_redirects=False) + components = parse_qsl(response.next.url) + params = dict(components[1:]) + redirect_url = components[0][1] + assert redirect_url.startswith(instance_url) + data_response = self._get(redirect_url, data=params) + data_response.raise_for_status() + assert data_response.content + + def _setup_igv_datasets(self, history_id, instance_url: str): + dataset_app_combinations = { + "1.bam": "igv_bam/local_default", + "test.vcf": "igv_vcf/local_default", + "test.vcf.gz": "igv_vcf/local_default", + "5.gff": "igv_gff/local_default", + "1.bigwig": "igv_bigwig/local_default", + "1.fasta": "igv_fasta/local_default", + } + display_urls = [] + for file_name, display_app_link in dataset_app_combinations.items(): + test_file = self.test_data_resolver.get_filename(file_name) + test_dataset = self.dataset_populator.new_dataset( + history_id, content=open(test_file, "rb"), file_type="auto", wait=True + ) + display_app_url = f"{instance_url}/display_application/{test_dataset['dataset_id']}/{display_app_link}" + display_urls.append(display_app_url) + return display_urls + def _get_half_random_items(self, collection: List[str]) -> List[str]: half_num_items = int(len(collection) / 2) rval = random.sample(collection, half_num_items) From 0b951643f43222318d0c914f741a0792d07ca822 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 25 Dec 2023 10:35:43 +0100 Subject: [PATCH 3/5] Don't fail CONVERTER_fasta_to_fai with stderr contents --- lib/galaxy/datatypes/converters/fasta_to_fai.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/datatypes/converters/fasta_to_fai.xml b/lib/galaxy/datatypes/converters/fasta_to_fai.xml index 9d812f828259..50eca7da7858 100644 --- a/lib/galaxy/datatypes/converters/fasta_to_fai.xml +++ b/lib/galaxy/datatypes/converters/fasta_to_fai.xml @@ -1,4 +1,4 @@ - + samtools From 8bab912a7f60c8c32602987f92dd27c17738aeb9 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 25 Dec 2023 10:36:53 +0100 Subject: [PATCH 4/5] Move display application link test to integration test --- .../api/test_display_applications.py | 48 ----------------- test/integration/test_display_applications.py | 53 +++++++++++++++++++ 2 files changed, 53 insertions(+), 48 deletions(-) create mode 100644 test/integration/test_display_applications.py diff --git a/lib/galaxy_test/api/test_display_applications.py b/lib/galaxy_test/api/test_display_applications.py index 5509acc71faa..3fd5484fc833 100644 --- a/lib/galaxy_test/api/test_display_applications.py +++ b/lib/galaxy_test/api/test_display_applications.py @@ -1,21 +1,12 @@ import random from typing import List -from urllib.parse import parse_qsl from galaxy.util import UNKNOWN from galaxy_test.base.decorators import requires_admin -from galaxy_test.base.populators import ( - DatasetPopulator, - wait_on, -) from ._framework import ApiTestCase class TestDisplayApplicationsApi(ApiTestCase): - def setUp(self): - super().setUp() - self.dataset_populator = DatasetPopulator(self.galaxy_interactor) - def test_index(self): response = self._get("display_applications") self._assert_status_code_is(response, 200) @@ -60,45 +51,6 @@ def test_reload_as_non_admin_returns_403(self): response = self._post("display_applications/reload") self._assert_status_code_is(response, 403) - def test_get_display_application_link_bam(self, history_id): - # a controller route, but used in external apps. IMO qualifies (and can be used) as API - instance_url = self.galaxy_interactor.api_url.split("/api")[0] - for display_app_url in self._setup_igv_datasets(history_id=history_id, instance_url=instance_url): - # wait for eventual conversion to finish - wait_on( - lambda display_app_url=display_app_url: True - if self._get(display_app_url, allow_redirects=False).status_code == 302 - else None, - "display application to become ready", - ) - response = self._get(display_app_url, allow_redirects=False) - components = parse_qsl(response.next.url) - params = dict(components[1:]) - redirect_url = components[0][1] - assert redirect_url.startswith(instance_url) - data_response = self._get(redirect_url, data=params) - data_response.raise_for_status() - assert data_response.content - - def _setup_igv_datasets(self, history_id, instance_url: str): - dataset_app_combinations = { - "1.bam": "igv_bam/local_default", - "test.vcf": "igv_vcf/local_default", - "test.vcf.gz": "igv_vcf/local_default", - "5.gff": "igv_gff/local_default", - "1.bigwig": "igv_bigwig/local_default", - "1.fasta": "igv_fasta/local_default", - } - display_urls = [] - for file_name, display_app_link in dataset_app_combinations.items(): - test_file = self.test_data_resolver.get_filename(file_name) - test_dataset = self.dataset_populator.new_dataset( - history_id, content=open(test_file, "rb"), file_type="auto", wait=True - ) - display_app_url = f"{instance_url}/display_application/{test_dataset['dataset_id']}/{display_app_link}" - display_urls.append(display_app_url) - return display_urls - def _get_half_random_items(self, collection: List[str]) -> List[str]: half_num_items = int(len(collection) / 2) rval = random.sample(collection, half_num_items) diff --git a/test/integration/test_display_applications.py b/test/integration/test_display_applications.py new file mode 100644 index 000000000000..41e82fd63b17 --- /dev/null +++ b/test/integration/test_display_applications.py @@ -0,0 +1,53 @@ +from urllib.parse import parse_qsl + +from galaxy.tool_util.verify.wait import wait_on +from galaxy_test.base.populators import DatasetPopulator +from .test_containerized_jobs import ContainerizedIntegrationTestCase + + +class TestDisplayApplicationTestCase(ContainerizedIntegrationTestCase): + container_type = "docker" + + def setUp(self): + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + + def test_get_display_application_links(self, history_id): + # a controller route, but used in external apps. IMO qualifies (and can be used) as API + instance_url = self.galaxy_interactor.api_url.split("/api")[0] + for display_app_url in self._setup_igv_datasets(history_id=history_id, instance_url=instance_url): + # wait for eventual conversion to finish + wait_on( + lambda display_app_url=display_app_url: True + if self._get(display_app_url, allow_redirects=False).status_code == 302 + else None, + "display application to become ready", + timeout=60, + ) + response = self._get(display_app_url, allow_redirects=False) + components = parse_qsl(response.next.url) + params = dict(components[1:]) + redirect_url = components[0][1] + assert redirect_url.startswith(instance_url) + data_response = self._get(redirect_url, data=params) + data_response.raise_for_status() + assert data_response.content + + def _setup_igv_datasets(self, history_id, instance_url: str): + dataset_app_combinations = { + "1.bam": "igv_bam/local_default", + "test.vcf": "igv_vcf/local_default", + "test.vcf.gz": "igv_vcf/local_default", + "5.gff": "igv_gff/local_default", + "1.bigwig": "igv_bigwig/local_default", + "1.fasta": "igv_fasta/local_default", + } + display_urls = [] + for file_name, display_app_link in dataset_app_combinations.items(): + test_file = self.test_data_resolver.get_filename(file_name) + test_dataset = self.dataset_populator.new_dataset( + history_id, content=open(test_file, "rb"), file_type="auto", wait=True + ) + display_app_url = f"{instance_url}/display_application/{test_dataset['dataset_id']}/{display_app_link}" + display_urls.append(display_app_url) + return display_urls From ccdd733121e9d72613a371584e334fbca52f938c Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 25 Dec 2023 10:48:22 +0100 Subject: [PATCH 5/5] Commit and legacy backpopulate pattern from unit test --- test/integration/test_display_applications.py | 1 + test/unit/data/test_galaxy_mapping.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/integration/test_display_applications.py b/test/integration/test_display_applications.py index 41e82fd63b17..da79751480da 100644 --- a/test/integration/test_display_applications.py +++ b/test/integration/test_display_applications.py @@ -6,6 +6,7 @@ class TestDisplayApplicationTestCase(ContainerizedIntegrationTestCase): + dataset_populator: DatasetPopulator container_type = "docker" def setUp(self): diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index 8e1ba80f6d06..b2ba36a4f6e0 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -60,7 +60,7 @@ def persist(cls, *args, **kwargs): for arg in args: session.add(arg) if flush: - session.flush() + session.commit() if kwargs.get("expunge", not flush): cls.expunge() return arg # Return last or only arg. @@ -699,7 +699,7 @@ def test_current_session(self): self.persist(user, galaxy_session) assert user.current_galaxy_session == galaxy_session new_galaxy_session = model.GalaxySession() - new_galaxy_session.user = user + user.galaxy_sessions.append(new_galaxy_session) self.persist(user, new_galaxy_session) assert user.current_galaxy_session == new_galaxy_session