From dc4b59d6f5cdf6b39c4ec5165f047e2b5109a0e3 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 12 Sep 2024 09:09:01 -0400 Subject: [PATCH 01/14] Fixes and tests for data fetch models. --- client/src/api/schema/schema.ts | 74 +++++++++++++ lib/galaxy/schema/fetch_data.py | 11 ++ lib/galaxy/tools/data_fetch.py | 4 + lib/galaxy_test/api/test_tools_upload.py | 61 +++++++++++ test/unit/app/tools/test_data_fetch.py | 132 +++++++++++++++++++++++ 5 files changed, 282 insertions(+) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index f1bba9373c56..af1b78aa5656 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -6330,6 +6330,12 @@ export interface components { CompositeDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -6360,6 +6366,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -8668,10 +8676,26 @@ export interface components { } & { [key: string]: unknown; }; + /** FetchDatasetHash */ + FetchDatasetHash: { + /** + * Hash Function + * @enum {string} + */ + hash_function: "MD5" | "SHA-1" | "SHA-256" | "SHA-512"; + /** Hash Value */ + hash_value: string; + }; /** FileDataElement */ FileDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -8701,6 +8725,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -8966,6 +8992,12 @@ export interface components { FtpImportElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -8997,6 +9029,8 @@ export interface components { extra_files?: components["schemas"]["ExtraFiles"] | null; /** Ftp Path */ ftp_path: string; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -13374,6 +13408,12 @@ export interface components { NestedElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -13416,6 +13456,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -14018,6 +14060,12 @@ export interface components { PastedDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -14047,6 +14095,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -14078,6 +14128,12 @@ export interface components { PathDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -14107,6 +14163,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Link Data Only */ @@ -14799,6 +14857,12 @@ export interface components { ServerDirElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -14828,6 +14892,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Link Data Only */ @@ -16443,6 +16509,12 @@ export interface components { UrlDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -16472,6 +16544,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ diff --git a/lib/galaxy/schema/fetch_data.py b/lib/galaxy/schema/fetch_data.py index 2603b1471ea5..b415ffebf64e 100644 --- a/lib/galaxy/schema/fetch_data.py +++ b/lib/galaxy/schema/fetch_data.py @@ -101,6 +101,13 @@ class ExtraFiles(FetchBaseModel): ) +class FetchDatasetHash(Model): + hash_function: Literal["MD5", "SHA-1", "SHA-256", "SHA-512"] + hash_value: str + + model_config = ConfigDict(extra="forbid") + + class BaseDataElement(FetchBaseModel): name: Optional[CoercedStringType] = None dbkey: str = Field("?") @@ -116,6 +123,10 @@ class BaseDataElement(FetchBaseModel): items_from: Optional[ElementsFromType] = Field(None, alias="elements_from") collection_type: Optional[str] = None MD5: Optional[str] = None + SHA1: Optional[str] = Field(None, alias="SHA-1") + SHA256: Optional[str] = Field(None, alias="SHA-256") + SHA512: Optional[str] = Field(None, alias="SHA-512") + hashes: Optional[List[FetchDatasetHash]] = None description: Optional[str] = None model_config = ConfigDict(extra="forbid") diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index a6786c725dc9..540baacc2d91 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -250,6 +250,10 @@ def _resolve_item_with_primary(item): if url: sources.append(source_dict) hashes = item.get("hashes", []) + for hash_function in HASH_NAMES: + hash_value = item.get(hash_function) + if hash_value: + hashes.append({"hash_function": hash_function, "hash_value": hash_value}) for hash_dict in hashes: hash_function = hash_dict.get("hash_function") hash_value = hash_dict.get("hash_value") diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py index 64d7e97365b9..ed32bc92caf0 100644 --- a/lib/galaxy_test/api/test_tools_upload.py +++ b/lib/galaxy_test/api/test_tools_upload.py @@ -1,6 +1,7 @@ import json import os import urllib.parse +from base64 import b64encode import pytest from tusclient import client @@ -25,6 +26,9 @@ ) from ._framework import ApiTestCase +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" + class TestToolsUpload(ApiTestCase): dataset_populator: DatasetPopulator @@ -927,6 +931,63 @@ def test_upload_and_validate_valid(self): terminal_validated_state = self.dataset_populator.validate_dataset_and_wait(history_id, dataset_id) assert terminal_validated_state == "ok", terminal_validated_state + def test_upload_and_validate_hash_valid(self): + with self.dataset_populator.test_history() as history_id: + destination = {"type": "hdas"} + targets = [ + { + "destination": destination, + "items": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + {"hash_function": "SHA-1", "hash_value": "65e9d53484d28eef5447bc06fe2d754d1090975a"} + ], + }, + ], + } + ] + payload = { + "history_id": history_id, + "targets": targets, + "validate_hashes": True, + } + fetch_response = self.dataset_populator.fetch(payload) + self._assert_status_code_is(fetch_response, 200) + # history ok implies the dataset upload work + self.dataset_populator.wait_for_history(history_id, assert_ok=True) + + def test_upload_and_validate_hash_invalid(self): + with self.dataset_populator.test_history() as history_id: + destination = {"type": "hdas"} + targets = [ + { + "destination": destination, + "items": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [{"hash_function": "SHA-1", "hash_value": "invalidhash"}], + }, + ], + } + ] + payload = { + "history_id": history_id, + "targets": targets, + "validate_hashes": True, + } + fetch_response = self.dataset_populator.fetch(payload, assert_ok=True, wait=False) + self._assert_status_code_is(fetch_response, 200) + outputs = fetch_response.json()["outputs"] + new_dataset = outputs[0] + self.dataset_populator.wait_for_history(history_id, assert_ok=False) + dataset_details = self.dataset_populator.get_history_dataset_details( + history_id, dataset=new_dataset, assert_ok=False + ) + assert dataset_details["state"] == "error" + def _velvet_upload(self, history_id, extra_inputs): payload = self.dataset_populator.upload_payload( history_id, diff --git a/test/unit/app/tools/test_data_fetch.py b/test/unit/app/tools/test_data_fetch.py index 58e13e5d1549..ee7cdd61536f 100644 --- a/test/unit/app/tools/test_data_fetch.py +++ b/test/unit/app/tools/test_data_fetch.py @@ -1,6 +1,7 @@ import json import os import tempfile +from base64 import b64encode from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp @@ -8,6 +9,9 @@ from galaxy.tools.data_fetch import main from galaxy.util.unittest_utils import skip_if_github_down +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" + def test_simple_path_get(): with _execute_context() as execute_context: @@ -55,6 +59,134 @@ def test_simple_uri_get(): assert hda_result["ext"] == "bed" +def test_correct_md5(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": "5ba48b6e5a7c4d4930fda256f411e55b", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert hda_result["state"] == "ok" + assert hda_result["ext"] == "txt" + + +def test_incorrect_md5(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": "thisisbad", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert ( + hda_result["error_message"] + == "Failed to validate upload with [MD5] - expected [thisisbad] got [5ba48b6e5a7c4d4930fda256f411e55b]" + ) + + +def test_correct_sha1(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "SHA-1", + "hash_value": "65e9d53484d28eef5447bc06fe2d754d1090975a", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert hda_result["state"] == "ok" + assert hda_result["ext"] == "txt" + + +def test_incorrect_sha1(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "SHA-1", + "hash_value": "thisisbad", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert ( + hda_result["error_message"] + == "Failed to validate upload with [SHA-1] - expected [thisisbad] got [65e9d53484d28eef5447bc06fe2d754d1090975a]" + ) + + @skip_if_github_down def test_deferred_uri_get(): with _execute_context() as execute_context: From b367c8a5a3e0994d0a4a32589bc76d591e031b85 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 17 Sep 2024 11:44:07 -0400 Subject: [PATCH 02/14] More tests for various data column feature combos. --- lib/galaxy_test/api/test_tools.py | 64 +++++++++++++++++++ .../gx_data_column_accept_default.xml | 21 ++++++ .../parameters/gx_data_column_multiple.xml | 3 - ...gx_data_column_multiple_accept_default.xml | 21 ++++++ ...l => gx_data_column_multiple_optional.xml} | 0 ..._column_multiple_optional_with_default.xml | 21 ++++++ .../gx_data_column_multiple_with_default.xml | 21 ++++++ ...gx_data_column_optional_accept_default.xml | 21 ++++++ .../gx_data_column_with_default.xml | 22 +++++++ .../gx_data_column_with_default_legacy.xml | 22 +++++++ 10 files changed, 213 insertions(+), 3 deletions(-) create mode 100644 test/functional/tools/parameters/gx_data_column_accept_default.xml create mode 100644 test/functional/tools/parameters/gx_data_column_multiple_accept_default.xml rename test/functional/tools/parameters/{data_column_multiple_optional.xml => gx_data_column_multiple_optional.xml} (100%) create mode 100644 test/functional/tools/parameters/gx_data_column_multiple_optional_with_default.xml create mode 100644 test/functional/tools/parameters/gx_data_column_multiple_with_default.xml create mode 100644 test/functional/tools/parameters/gx_data_column_optional_accept_default.xml create mode 100644 test/functional/tools/parameters/gx_data_column_with_default.xml create mode 100644 test/functional/tools/parameters/gx_data_column_with_default_legacy.xml diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 8a29c39aaa2b..921d706b79cf 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -1002,6 +1002,70 @@ def test_optional_repeats_with_mins_filled_id(self): assert "false" in output1_content assert "length: 2" in output1_content + def test_data_column_defaults(self): + for input_format in ["legacy", "21.01"]: + tabular_contents = "1\t2\t3\t\n4\t5\t6\n" + with self.dataset_populator.test_history(require_new=True) as history_id: + hda = dataset_to_param( + self.dataset_populator.new_dataset(history_id, content=tabular_contents, file_type="tabular") + ) + details = self.dataset_populator.get_history_dataset_details(history_id, dataset=hda, assert_ok=True) + inputs = {"ref_parameter": hda} + response = self._run( + "gx_data_column", history_id, inputs, assert_ok=False, input_format=input_format + ).json() + output = response["outputs"] + details = self.dataset_populator.get_history_dataset_details( + history_id, dataset=output[0], assert_ok=False + ) + assert details["state"] == "ok" + + bed1_contents = open(self.get_filename("1.bed")).read() + hda = dataset_to_param( + self.dataset_populator.new_dataset(history_id, content=bed1_contents, file_type="bed") + ) + details = self.dataset_populator.get_history_dataset_details(history_id, dataset=hda, assert_ok=True) + inputs = {"ref_parameter": hda} + response = self._run( + "gx_data_column", history_id, inputs, assert_ok=False, input_format=input_format + ).json() + output = response["outputs"] + details = self.dataset_populator.get_history_dataset_details( + history_id, dataset=output[0], assert_ok=False + ) + assert details["state"] == "ok" + + with self.dataset_populator.test_history(require_new=False) as history_id: + response = self._run("gx_data_column_multiple", history_id, inputs, assert_ok=False).json() + assert "err_msg" in response, str(response) + assert "parameter 'parameter': an invalid option" in response["err_msg"] + + with self.dataset_populator.test_history(require_new=True) as history_id: + response = self._run("gx_data_column_optional", history_id, inputs, assert_ok=True) + output = response["outputs"] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output[0]) + assert "parameter: None" in content + + response = self._run("gx_data_column_with_default", history_id, inputs, assert_ok=True) + output = response["outputs"] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output[0]) + assert "parameter: 2" in content + + response = self._run("gx_data_column_with_default_legacy", history_id, inputs, assert_ok=True) + output = response["outputs"] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output[0]) + assert "parameter: 3" in content + + response = self._run("gx_data_column_accept_default", history_id, inputs, assert_ok=True) + output = response["outputs"] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output[0]) + assert "parameter: 1" in content + + response = self._run("gx_data_column_multiple_with_default", history_id, inputs, assert_ok=True) + output = response["outputs"] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output[0]) + assert "parameter: 1,2" in content + @skip_without_tool("library_data") def test_library_data_param(self): with self.dataset_populator.test_history(require_new=False) as history_id: diff --git a/test/functional/tools/parameters/gx_data_column_accept_default.xml b/test/functional/tools/parameters/gx_data_column_accept_default.xml new file mode 100644 index 000000000000..5b189f4ebda0 --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_accept_default.xml @@ -0,0 +1,21 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_data_column_multiple.xml b/test/functional/tools/parameters/gx_data_column_multiple.xml index ed37a530d19b..bd6b30a2a545 100644 --- a/test/functional/tools/parameters/gx_data_column_multiple.xml +++ b/test/functional/tools/parameters/gx_data_column_multiple.xml @@ -28,9 +28,6 @@ echo 'parameter: $parameter' >> '$output' - - - diff --git a/test/functional/tools/parameters/gx_data_column_multiple_accept_default.xml b/test/functional/tools/parameters/gx_data_column_multiple_accept_default.xml new file mode 100644 index 000000000000..401d416668d2 --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_multiple_accept_default.xml @@ -0,0 +1,21 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/data_column_multiple_optional.xml b/test/functional/tools/parameters/gx_data_column_multiple_optional.xml similarity index 100% rename from test/functional/tools/parameters/data_column_multiple_optional.xml rename to test/functional/tools/parameters/gx_data_column_multiple_optional.xml diff --git a/test/functional/tools/parameters/gx_data_column_multiple_optional_with_default.xml b/test/functional/tools/parameters/gx_data_column_multiple_optional_with_default.xml new file mode 100644 index 000000000000..0700e38a23a9 --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_multiple_optional_with_default.xml @@ -0,0 +1,21 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_data_column_multiple_with_default.xml b/test/functional/tools/parameters/gx_data_column_multiple_with_default.xml new file mode 100644 index 000000000000..fca81373d544 --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_multiple_with_default.xml @@ -0,0 +1,21 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_data_column_optional_accept_default.xml b/test/functional/tools/parameters/gx_data_column_optional_accept_default.xml new file mode 100644 index 000000000000..be893d34337f --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_optional_accept_default.xml @@ -0,0 +1,21 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_data_column_with_default.xml b/test/functional/tools/parameters/gx_data_column_with_default.xml new file mode 100644 index 000000000000..126c4dfc8ec1 --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_with_default.xml @@ -0,0 +1,22 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_data_column_with_default_legacy.xml b/test/functional/tools/parameters/gx_data_column_with_default_legacy.xml new file mode 100644 index 000000000000..b77b546add50 --- /dev/null +++ b/test/functional/tools/parameters/gx_data_column_with_default_legacy.xml @@ -0,0 +1,22 @@ + + + macros.xml + + > '$output' + ]]> + + + + + + + + + + + + + + + From c0921f6ba139ea8ae43f72c809c69113215f5ab3 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 4 Sep 2024 09:57:05 -0400 Subject: [PATCH 03/14] Is it a bug that column params need to be strings? --- lib/galaxy/tools/parameters/basic.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 1a099bfe7061..0d296c7292fc 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -1447,10 +1447,9 @@ def from_json(self, value, trans=None, other_values=None): @staticmethod def _strip_c(column): - if isinstance(column, str): - column = column.strip() - if column.startswith("c") and len(column) > 1 and all(c.isdigit() for c in column[1:]): - column = column.lower()[1:] + column = str(column).strip() + if column.startswith("c") and len(column) > 1 and all(c.isdigit() for c in column[1:]): + column = column.lower()[1:] return column def get_column_list(self, trans, other_values): From 16c5a4f636de5f20d4b1b63b9908ebeb4119a453 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Fri, 6 Sep 2024 13:45:28 -0400 Subject: [PATCH 04/14] Database migration for tool request API. --- ...3d5d144_implement_structured_tool_state.py | 178 ++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 lib/galaxy/model/migrations/alembic/versions_gxy/7ffd33d5d144_implement_structured_tool_state.py diff --git a/lib/galaxy/model/migrations/alembic/versions_gxy/7ffd33d5d144_implement_structured_tool_state.py b/lib/galaxy/model/migrations/alembic/versions_gxy/7ffd33d5d144_implement_structured_tool_state.py new file mode 100644 index 000000000000..027338dac279 --- /dev/null +++ b/lib/galaxy/model/migrations/alembic/versions_gxy/7ffd33d5d144_implement_structured_tool_state.py @@ -0,0 +1,178 @@ +"""implement structured tool state + +Revision ID: 7ffd33d5d144 +Revises: 25b092f7938b +Create Date: 2022-11-09 15:53:11.451185 + +""" + +from sqlalchemy import ( + Column, + DateTime, + Integer, + String, +) + +from galaxy.model.custom_types import ( + JSONType, + UUIDType, +) +from galaxy.model.database_object_names import build_index_name +from galaxy.model.migrations.util import ( + add_column, + create_foreign_key, + create_index, + create_table, + drop_column, + drop_index, + drop_table, + transaction, +) + +# revision identifiers, used by Alembic. +revision = "7ffd33d5d144" +down_revision = "25b092f7938b" +branch_labels = None +depends_on = None + +job_table_name = "job" +tool_source_table_name = "tool_source" +tool_request_table_name = "tool_request" +request_column_name = "tool_request_id" +job_request_index_name = build_index_name(job_table_name, request_column_name) +workflow_request_to_input_dataset_table_name = "workflow_request_to_input_dataset" +workflow_request_to_input_collection_table_name = "workflow_request_to_input_collection_dataset" +workflow_request_to_input_parameter_table_name = "workflow_request_input_step_parameter" +workflow_input_request_column_name = "request" +workflow_landing_request_table_name = "workflow_landing_request" +tool_landing_request_table_name = "tool_landing_request" + + +def upgrade(): + with transaction(): + create_table( + tool_landing_request_table_name, + Column("id", Integer, primary_key=True), + Column("user_id", Integer, index=True, nullable=True), + Column("create_time", DateTime), + Column("update_time", DateTime, nullable=True), + Column("request_state", JSONType), + Column("client_secret", String(255), nullable=True), + Column("uuid", UUIDType, nullable=False, index=True), + ) + + create_table( + workflow_landing_request_table_name, + Column("id", Integer, primary_key=True), + Column("user_id", Integer, index=True, nullable=True), + Column("stored_workflow_id", Integer, index=True), + Column("workflow_id", Integer, index=True), + Column("create_time", DateTime), + Column("update_time", DateTime, nullable=True), + Column("request_state", JSONType), + Column("client_secret", String(255), nullable=True), + Column("uuid", UUIDType, nullable=False, index=True), + ) + + create_foreign_key( + "foreign_key_user_id", + tool_landing_request_table_name, + "galaxy_user", + ["user_id"], + ["id"], + ) + create_foreign_key( + "foreign_key_user_id", + workflow_landing_request_table_name, + "galaxy_user", + ["user_id"], + ["id"], + ) + create_foreign_key( + "foreign_key_stored_workflow_id", + workflow_landing_request_table_name, + "stored_workflow", + ["stored_workflow_id"], + ["id"], + ) + create_foreign_key( + "foreign_key_workflow_id", + workflow_landing_request_table_name, + "workflow", + ["workflow_id"], + ["id"], + ) + + create_table( + tool_source_table_name, + Column("id", Integer, primary_key=True), + Column("hash", String(255), index=True), + Column("source", JSONType), + ) + create_table( + tool_request_table_name, + Column("id", Integer, primary_key=True), + Column("request", JSONType), + Column("state", String(32)), + Column("state_message", JSONType), + Column("tool_source_id", Integer, index=True), + Column("history_id", Integer, index=True), + ) + + create_foreign_key( + "foreign_key_tool_source_id", + tool_request_table_name, + tool_source_table_name, + ["tool_source_id"], + ["id"], + ) + + create_foreign_key( + "foreign_key_history_id", + tool_request_table_name, + "history", + ["history_id"], + ["id"], + ) + + add_column( + job_table_name, + Column(request_column_name, Integer, default=None), + ) + + create_foreign_key( + "foreign_key_tool_request_id", + job_table_name, + tool_request_table_name, + ["tool_request_id"], + ["id"], + ) + + create_index(job_request_index_name, job_table_name, [request_column_name]) + + add_column( + workflow_request_to_input_dataset_table_name, + Column(workflow_input_request_column_name, JSONType, default=None), + ) + add_column( + workflow_request_to_input_collection_table_name, + Column(workflow_input_request_column_name, JSONType, default=None), + ) + add_column( + workflow_request_to_input_parameter_table_name, + Column(workflow_input_request_column_name, JSONType, default=None), + ) + + +def downgrade(): + with transaction(): + drop_column(workflow_request_to_input_dataset_table_name, workflow_input_request_column_name) + drop_column(workflow_request_to_input_collection_table_name, workflow_input_request_column_name) + drop_column(workflow_request_to_input_parameter_table_name, workflow_input_request_column_name) + drop_index(job_request_index_name, job_table_name) + drop_column(job_table_name, request_column_name) + drop_table(tool_request_table_name) + drop_table(tool_source_table_name) + + drop_table(tool_landing_request_table_name) + drop_table(workflow_landing_request_table_name) From ad73e9e838fb23dd48b51d2a24214d25524c705f Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sun, 8 Sep 2024 19:50:37 -0400 Subject: [PATCH 05/14] Tool Request models. --- .../dev/tool_state_state_classes.plantuml.svg | 78 ++-- .../dev/tool_state_state_classes.plantuml.txt | 23 +- doc/source/dev/tool_state_task.plantuml.svg | 62 ++++ doc/source/dev/tool_state_task.plantuml.txt | 25 ++ lib/galaxy/managers/hdas.py | 7 +- lib/galaxy/schema/tasks.py | 4 +- lib/galaxy/tool_util/parameters/__init__.py | 20 + lib/galaxy/tool_util/parameters/case.py | 45 ++- lib/galaxy/tool_util/parameters/convert.py | 303 +++++++++++++++- lib/galaxy/tool_util/parameters/factory.py | 35 +- lib/galaxy/tool_util/parameters/models.py | 316 ++++++++++++---- lib/galaxy/tool_util/parameters/state.py | 9 + lib/galaxy/tool_util/parameters/visitor.py | 90 ++++- lib/galaxy/tool_util/parser/interface.py | 24 +- lib/galaxy/tool_util/parser/xml.py | 4 + lib/galaxy/tool_util/parser/yaml.py | 3 + .../tools/parameters/gx_boolean.xml | 7 + .../tools/parameters/gx_boolean_checked.xml | 36 ++ .../tools/parameters/gx_boolean_optional.xml | 1 - .../gx_boolean_optional_checked.xml | 58 +++ .../tools/parameters/gx_select_optional.xml | 7 + test/functional/tools/select_optional.xml | 1 + test/unit/tool_util/framework_tool_checks.yml | 54 +++ .../tool_util/parameter_specification.yml | 342 ++++++++++++++++-- test/unit/tool_util/test_parameter_convert.py | 221 +++++++++++ .../tool_util/test_parameter_specification.py | 31 ++ .../tool_util/test_parameter_test_cases.py | 109 +++++- test/unit/tool_util/util.py | 2 +- 28 files changed, 1721 insertions(+), 196 deletions(-) create mode 100644 doc/source/dev/tool_state_task.plantuml.svg create mode 100644 doc/source/dev/tool_state_task.plantuml.txt create mode 100644 test/functional/tools/parameters/gx_boolean_checked.xml create mode 100644 test/functional/tools/parameters/gx_boolean_optional_checked.xml create mode 100644 test/unit/tool_util/test_parameter_convert.py diff --git a/doc/source/dev/tool_state_state_classes.plantuml.svg b/doc/source/dev/tool_state_state_classes.plantuml.svg index de86dbcd3787..28c7da1c9092 100644 --- a/doc/source/dev/tool_state_state_classes.plantuml.svg +++ b/doc/source/dev/tool_state_state_classes.plantuml.svg @@ -1,21 +1,17 @@ -galaxy.tool_util.parameters.stateToolStatestate_representation: strinput_state: Dict[str, Any]validate(input_models: ToolParameterBundle)_to_base_model(input_models: ToolParameterBundle): Optional[Type[BaseModel]]RequestToolStatestate_representation = "request"_to_base_model(input_models: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <encoded_id>}.Allow mapping/reduce constructs.RequestInternalToolStatestate_representation = "request_internal"_to_base_model(input_models: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <decoded_id>}.Allow mapping/reduce constructs.JobInternalToolStatestate_representation = "job_internal"_to_base_model(input_models: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <decoded_id>}.Mapping constructs expanded out.(Defaults are inserted?)TestCaseToolStatestate_representation = "test_case"_to_base_model(input_models: ToolParameterBundle): Type[BaseModel]Object references of the form file name and URIs.Mapping constructs not allowed. WorkflowStepToolStatestate_representation = "workflow_step"_to_base_model(input_models: ToolParameterBundle): Type[BaseModel]Nearly everything optional except conditional discriminators. WorkflowStepLinkedToolStatestate_representation = "workflow_step_linked"_to_base_model(input_models: ToolParameterBundle): Type[BaseModel]Expect pre-process ``in`` dictionaries and bring in representationof links and defaults and validate them in model. decodeexpandpreprocess_links_and_defaultsgalaxy.tool_util.parameters.stateToolStatestate_representation: strinput_state: Dict[str, Any]validate(parameters: ToolParameterBundle)_to_base_model(parameters: ToolParameterBundle): Optional[Type[BaseModel]]RequestToolStatestate_representation = "request"_to_base_model(parameters: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <encoded_id>}.Allow mapping/reduce constructs.RequestInternalToolStatestate_representation = "request_internal"_to_base_model(parameters: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <decoded_id>}.Allow mapping/reduce constructs. Allows URI src dicts.RequestInternalDereferencedToolStatestate_representation = "request_internal"_to_base_model(parameters: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <decoded_id>}.Allow mapping/reduce constructs. No URI src dicts - all converted to HDAs.JobInternalToolStatestate_representation = "job_internal"_to_base_model(parameters: ToolParameterBundle): Type[BaseModel]Object references of the form{src: "hda", id: <decoded_id>}.Mapping constructs expanded out.(Defaults are inserted?)decodedereferenceexpand \ No newline at end of file diff --git a/doc/source/dev/tool_state_task.plantuml.txt b/doc/source/dev/tool_state_task.plantuml.txt new file mode 100644 index 000000000000..64286f2f5ed5 --- /dev/null +++ b/doc/source/dev/tool_state_task.plantuml.txt @@ -0,0 +1,25 @@ +@startuml +'!include plantuml_options.txt +queue TaskQueue as queue +participant "queue_jobs Task" as task +participant "JobSubmitter.queue_jobs" as queue_jobs +participant "JobSubmitter.dereference" as dereference +participant "materialize Task" as materialize_task +participant "Tool.handle_input_async" as handle_input +participant "expand_meta_parameters_async" as expand +participant "ToolAction.execute" as tool_action + +queue -> task : +task -> queue_jobs : QueueJobs pydantic model +queue_jobs -> dereference : RequestInternalToolState +dereference -> queue_jobs : RequestInternalDereferencedToolState +queue_jobs -> materialize_task : HDA (with state deferred) +materialize_task -> queue_jobs : return when state is okay +queue_jobs -> handle_input : RequestInternalDereferencedToolState +handle_input -> expand : RequestInternalDereferencedToolState +expand -> handle_input : JobInternalToolState[] +loop over expanded job tool states + handle_input -> tool_action : + tool_action -> handle_input : A Galaxy Job +end +@enduml diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 86f9b8c98c02..3f38381ce4f0 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -173,7 +173,7 @@ def create( session.commit() return hda - def materialize(self, request: MaterializeDatasetInstanceTaskRequest) -> None: + def materialize(self, request: MaterializeDatasetInstanceTaskRequest, in_place: bool = False) -> None: request_user: RequestUser = request.user materializer = materializer_factory( True, # attached... @@ -187,8 +187,9 @@ def materialize(self, request: MaterializeDatasetInstanceTaskRequest) -> None: else: dataset_instance = self.ldda_manager.get_accessible(request.content, user) history = self.app.history_manager.by_id(request.history_id) - new_hda = materializer.ensure_materialized(dataset_instance, target_history=history) - history.add_dataset(new_hda, set_hid=True) + new_hda = materializer.ensure_materialized(dataset_instance, target_history=history, in_place=in_place) + if not in_place: + history.add_dataset(new_hda, set_hid=True) session = self.session() with transaction(session): session.commit() diff --git a/lib/galaxy/schema/tasks.py b/lib/galaxy/schema/tasks.py index 022d82666aed..313ec9f1dad0 100644 --- a/lib/galaxy/schema/tasks.py +++ b/lib/galaxy/schema/tasks.py @@ -104,8 +104,8 @@ class MaterializeDatasetInstanceTaskRequest(Model): title="Content", description=( "Depending on the `source` it can be:\n" - "- The encoded id of the source library dataset\n" - "- The encoded id of the HDA\n" + "- The decoded id of the source library dataset\n" + "- The decoded id of the HDA\n" ), ) diff --git a/lib/galaxy/tool_util/parameters/__init__.py b/lib/galaxy/tool_util/parameters/__init__.py index ac6ca0dbbf3c..4a54ba0a8373 100644 --- a/lib/galaxy/tool_util/parameters/__init__.py +++ b/lib/galaxy/tool_util/parameters/__init__.py @@ -1,7 +1,10 @@ from .case import test_case_state from .convert import ( decode, + dereference, encode, + encode_test, + fill_static_defaults, ) from .factory import ( from_input_source, @@ -26,7 +29,12 @@ CwlStringParameterModel, CwlUnionParameterModel, DataCollectionParameterModel, + DataCollectionRequest, DataParameterModel, + DataRequest, + DataRequestHda, + DataRequestInternalHda, + DataRequestUri, FloatParameterModel, HiddenParameterModel, IntegerParameterModel, @@ -42,6 +50,7 @@ validate_against_model, validate_internal_job, validate_internal_request, + validate_internal_request_dereferenced, validate_request, validate_test_case, validate_workflow_step, @@ -49,6 +58,7 @@ ) from .state import ( JobInternalToolState, + RequestInternalDereferencedToolState, RequestInternalToolState, RequestToolState, TestCaseToolState, @@ -75,6 +85,11 @@ "JobInternalToolState", "ToolParameterBundle", "ToolParameterBundleModel", + "DataRequest", + "DataRequestInternalHda", + "DataRequestHda", + "DataRequestUri", + "DataCollectionRequest", "ToolParameterModel", "IntegerParameterModel", "BooleanParameterModel", @@ -101,6 +116,7 @@ "validate_against_model", "validate_internal_job", "validate_internal_request", + "validate_internal_request_dereferenced", "validate_request", "validate_test_case", "validate_workflow_step", @@ -113,6 +129,7 @@ "test_case_state", "RequestToolState", "RequestInternalToolState", + "RequestInternalDereferencedToolState", "flat_state_path", "keys_starting_with", "visit_input_values", @@ -120,6 +137,9 @@ "VISITOR_NO_REPLACEMENT", "decode", "encode", + "encode_test", + "fill_static_defaults", + "dereference", "WorkflowStepToolState", "WorkflowStepLinkedToolState", ) diff --git a/lib/galaxy/tool_util/parameters/case.py b/lib/galaxy/tool_util/parameters/case.py index cd8aa4f71e04..d1ff72b67d24 100644 --- a/lib/galaxy/tool_util/parameters/case.py +++ b/lib/galaxy/tool_util/parameters/case.py @@ -43,6 +43,7 @@ ) INTEGER_STR_PATTERN = compile(r"^(\d+)$") +INTEGERS_STR_PATTERN = compile(r"^(\d+)(\s*,\s*(\d+))*$") COLUMN_NAME_STR_PATTERN = compile(r"^c(\d+): .*$") # In an effort to squeeze all the ambiguity out of test cases - at some point Marius and John # agree tools should be using value_json for typed inputs to parameters but John has come around on @@ -111,22 +112,31 @@ def legacy_from_string(parameter: ToolParameterT, value: Optional[Any], warnings "Likely using deprected truevalue/falsevalue in tool parameter - switch to 'true' or 'false'" ) elif isinstance(parameter, (DataColumnParameterModel,)): - integer_match = INTEGER_STR_PATTERN.match(value_str) - if integer_match: - if WARN_ON_UNTYPED_XML_STRINGS: + if parameter.multiple: + integers_match = INTEGER_STR_PATTERN.match(value_str) + if integers_match: + if WARN_ON_UNTYPED_XML_STRINGS: + warnings.append( + f"Implicitly converted {parameter.name} to a column index integer from a string value, please use 'value_json' to define this test input parameter value instead." + ) + result_value = [int(v.strip()) for v in value_str.split(",")] + else: + integer_match = INTEGER_STR_PATTERN.match(value_str) + if integer_match: + if WARN_ON_UNTYPED_XML_STRINGS: + warnings.append( + f"Implicitly converted {parameter.name} to a column index integer from a string value, please use 'value_json' to define this test input parameter value instead." + ) + result_value = int(value_str) + elif Version(profile) < Version("24.2"): + # allow this for older tools but new tools will just require the integer index warnings.append( - f"Implicitly converted {parameter.name} to a column index integer from a string value, please use 'value_json' to define this test input parameter value instead." + f"Using column names as test case values is deprecated, please adjust {parameter.name} to just use an integer column index." ) - result_value = int(value_str) - elif Version(profile) < Version("24.2"): - # allow this for older tools but new tools will just require the integer index - warnings.append( - f"Using column names as test case values is deprecated, please adjust {parameter.name} to just use an integer column index." - ) - column_name_value_match = COLUMN_NAME_STR_PATTERN.match(value_str) - if column_name_value_match: - column_part = column_name_value_match.group(1) - result_value = int(column_part) + column_name_value_match = COLUMN_NAME_STR_PATTERN.match(value_str) + if column_name_value_match: + column_part = column_name_value_match.group(1) + result_value = int(column_part) return result_value @@ -268,8 +278,8 @@ def _merge_into_state( for test_input_value in test_input_values: instance_test_input = test_input.copy() instance_test_input["value"] = test_input_value - input_value = xml_data_input_to_json(test_input) - input_value_list.append(input_value) + input_value_json = xml_data_input_to_json(instance_test_input) + input_value_list.append(input_value_json) input_value = input_value_list else: input_value = xml_data_input_to_json(test_input) @@ -286,11 +296,14 @@ def _select_which_when( conditional: ConditionalParameterModel, state: dict, inputs: ToolSourceTestInputs, prefix: str ) -> ConditionalWhen: test_parameter = conditional.test_parameter + is_boolean = test_parameter.parameter_type == "gx_boolean" test_parameter_name = test_parameter.name test_parameter_flat_path = flat_state_path(test_parameter_name, prefix) test_input = _input_for(test_parameter_flat_path, inputs) explicit_test_value = test_input["value"] if test_input else None + if is_boolean and isinstance(explicit_test_value, str): + explicit_test_value = asbool(explicit_test_value) test_value = validate_explicit_conditional_test_value(test_parameter_name, explicit_test_value) for when in conditional.whens: if test_value is None and when.is_default_when: diff --git a/lib/galaxy/tool_util/parameters/convert.py b/lib/galaxy/tool_util/parameters/convert.py index 14caed47e92c..9484cc6c7e71 100644 --- a/lib/galaxy/tool_util/parameters/convert.py +++ b/lib/galaxy/tool_util/parameters/convert.py @@ -1,24 +1,58 @@ """Utilities for converting between request states. """ +import logging from typing import ( Any, Callable, + cast, + Dict, + List, + Optional, + Union, ) +from galaxy.tool_util.parser.interface import ( + JsonTestCollectionDefDict, + JsonTestDatasetDefDict, +) from .models import ( + BooleanParameterModel, + ConditionalParameterModel, + ConditionalWhen, + DataCollectionRequest, + DataColumnParameterModel, + DataParameterModel, + DataRequestHda, + DataRequestInternalHda, + DataRequestUri, + DiscriminatorType, + DrillDownParameterModel, + FloatParameterModel, + GenomeBuildParameterModel, + HiddenParameterModel, + IntegerParameterModel, + RepeatParameterModel, + SectionParameterModel, + SelectParameterModel, ToolParameterBundle, ToolParameterT, ) from .state import ( + JobInternalToolState, + RequestInternalDereferencedToolState, RequestInternalToolState, RequestToolState, + TestCaseToolState, ) from .visitor import ( + validate_explicit_conditional_test_value, visit_input_values, VISITOR_NO_REPLACEMENT, ) +log = logging.getLogger(__name__) + def decode( external_state: RequestToolState, input_models: ToolParameterBundle, decode_id: Callable[[str], int] @@ -27,13 +61,30 @@ def decode( external_state.validate(input_models) + def decode_src_dict(src_dict: dict): + if "id" in src_dict: + decoded_dict = src_dict.copy() + decoded_dict["id"] = decode_id(src_dict["id"]) + return decoded_dict + else: + return src_dict + def decode_callback(parameter: ToolParameterT, value: Any): if parameter.parameter_type == "gx_data": + if value is None: + return VISITOR_NO_REPLACEMENT + data_parameter = cast(DataParameterModel, parameter) + if data_parameter.multiple: + assert isinstance(value, list), str(value) + return list(map(decode_src_dict, value)) + else: + assert isinstance(value, dict), str(value) + return decode_src_dict(value) + elif parameter.parameter_type == "gx_data_collection": + if value is None: + return VISITOR_NO_REPLACEMENT assert isinstance(value, dict), str(value) - assert "id" in value - decoded_dict = value.copy() - decoded_dict["id"] = decode_id(value["id"]) - return decoded_dict + return decode_src_dict(value) else: return VISITOR_NO_REPLACEMENT @@ -53,13 +104,26 @@ def encode( ) -> RequestToolState: """Prepare an external representation of tool state (request) for storing in the database (request_internal).""" + def encode_src_dict(src_dict: dict): + if "id" in src_dict: + encoded_dict = src_dict.copy() + encoded_dict["id"] = encode_id(src_dict["id"]) + return encoded_dict + else: + return src_dict + def encode_callback(parameter: ToolParameterT, value: Any): if parameter.parameter_type == "gx_data": + data_parameter = cast(DataParameterModel, parameter) + if data_parameter.multiple: + assert isinstance(value, list), str(value) + return list(map(encode_src_dict, value)) + else: + assert isinstance(value, dict), str(value) + return encode_src_dict(value) + elif parameter.parameter_type == "gx_data_collection": assert isinstance(value, dict), str(value) - assert "id" in value - encoded_dict = value.copy() - encoded_dict["id"] = encode_id(value["id"]) - return encoded_dict + return encode_src_dict(value) else: return VISITOR_NO_REPLACEMENT @@ -71,3 +135,226 @@ def encode_callback(parameter: ToolParameterT, value: Any): request_state = RequestToolState(request_state_dict) request_state.validate(input_models) return request_state + + +DereferenceCallable = Callable[[DataRequestUri], DataRequestInternalHda] + + +def dereference( + internal_state: RequestInternalToolState, input_models: ToolParameterBundle, dereference: DereferenceCallable +) -> RequestInternalDereferencedToolState: + + def derefrence_dict(src_dict: dict): + src = src_dict.get("src") + if src == "url": + data_request_uri: DataRequestUri = DataRequestUri.model_validate(src_dict) + data_request_hda: DataRequestInternalHda = dereference(data_request_uri) + return data_request_hda.model_dump() + else: + return src_dict + + def dereference_callback(parameter: ToolParameterT, value: Any): + if parameter.parameter_type == "gx_data": + if value is None: + return VISITOR_NO_REPLACEMENT + data_parameter = cast(DataParameterModel, parameter) + if data_parameter.multiple: + assert isinstance(value, list), str(value) + return list(map(derefrence_dict, value)) + else: + assert isinstance(value, dict), str(value) + return derefrence_dict(value) + else: + return VISITOR_NO_REPLACEMENT + + request_state_dict = visit_input_values( + input_models, + internal_state, + dereference_callback, + ) + request_state = RequestInternalDereferencedToolState(request_state_dict) + request_state.validate(input_models) + return request_state + + +# interfaces for adapting test data dictionaries to tool request dictionaries +# e.g. {class: File, path: foo.bed} => {src: hda, id: ab1235cdfea3} +AdaptDatasets = Callable[[JsonTestDatasetDefDict], DataRequestHda] +AdaptCollections = Callable[[JsonTestCollectionDefDict], DataCollectionRequest] + + +def encode_test( + test_case_state: TestCaseToolState, + input_models: ToolParameterBundle, + adapt_datasets: AdaptDatasets, + adapt_collections: AdaptCollections, +): + + def encode_callback(parameter: ToolParameterT, value: Any): + if parameter.parameter_type == "gx_data": + data_parameter = cast(DataParameterModel, parameter) + if value is not None: + if data_parameter.multiple: + assert isinstance(value, list), str(value) + test_datasets = cast(List[JsonTestDatasetDefDict], value) + return [d.model_dump() for d in map(adapt_datasets, test_datasets)] + else: + assert isinstance(value, dict), str(value) + test_dataset = cast(JsonTestDatasetDefDict, value) + return adapt_datasets(test_dataset).model_dump() + elif parameter.parameter_type == "gx_data_collection": + # data_collection_parameter = cast(DataCollectionParameterModel, parameter) + if value is not None: + assert isinstance(value, dict), str(value) + test_collection = cast(JsonTestCollectionDefDict, value) + return adapt_collections(test_collection).model_dump() + elif parameter.parameter_type == "gx_select": + select_parameter = cast(SelectParameterModel, parameter) + if select_parameter.multiple and value is not None: + return [v.strip() for v in value.split(",")] + else: + return VISITOR_NO_REPLACEMENT + elif parameter.parameter_type == "gx_drill_down": + drilldown = cast(DrillDownParameterModel, parameter) + if drilldown.multiple and value is not None: + return [v.strip() for v in value.split(",")] + else: + return VISITOR_NO_REPLACEMENT + elif parameter.parameter_type == "gx_data_column": + data_column = cast(DataColumnParameterModel, parameter) + is_multiple = data_column.multiple + if is_multiple and value is not None and isinstance(value, (str,)): + return [int(v.strip()) for v in value.split(",")] + else: + return VISITOR_NO_REPLACEMENT + + return VISITOR_NO_REPLACEMENT + + request_state_dict = visit_input_values( + input_models, + test_case_state, + encode_callback, + ) + request_state = RequestToolState(request_state_dict) + request_state.validate(input_models) + return request_state + + +def fill_static_defaults( + tool_state: Dict[str, Any], input_models: ToolParameterBundle, profile: float, partial: bool = True +) -> Dict[str, Any]: + """If additional defaults might stem from Galaxy runtime, partial should be true. + + Setting partial to True, prevents runtime validation. + """ + _fill_defaults(tool_state, input_models) + + if not partial: + internal_state = JobInternalToolState(tool_state) + internal_state.validate(input_models) + return tool_state + + +def _fill_defaults(tool_state: Dict[str, Any], input_models: ToolParameterBundle) -> None: + for parameter in input_models.parameters: + _fill_default_for(tool_state, parameter) + + +def _fill_default_for(tool_state: Dict[str, Any], parameter: ToolParameterT) -> None: + parameter_name = parameter.name + parameter_type = parameter.parameter_type + if parameter_type == "gx_boolean": + boolean = cast(BooleanParameterModel, parameter) + if parameter_name not in tool_state: + # even optional parameters default to false if not in the body of the request :_( + # see test_tools.py -> expression_null_handling_boolean or test cases for gx_boolean_optional.xml + tool_state[parameter_name] = boolean.value or False + + if parameter_type in ["gx_integer", "gx_float", "gx_hidden"]: + has_value_parameter = cast( + Union[ + IntegerParameterModel, + FloatParameterModel, + HiddenParameterModel, + ], + parameter, + ) + if parameter_name not in tool_state: + tool_state[parameter_name] = has_value_parameter.value + elif parameter_type == "gx_genomebuild": + genomebuild = cast(GenomeBuildParameterModel, parameter) + if parameter_name not in tool_state and genomebuild.optional: + tool_state[parameter_name] = None + elif parameter_type == "gx_select": + select = cast(SelectParameterModel, parameter) + # don't fill in dynamic parameters - wait for runtime to specify the default + if select.dynamic_options: + return + + if parameter_name not in tool_state: + if not select.multiple: + tool_state[parameter_name] = select.default_value + else: + tool_state[parameter_name] = None + elif parameter_type == "gx_drill_down": + if parameter_name not in tool_state: + drilldown = cast(DrillDownParameterModel, parameter) + if drilldown.multiple: + options = drilldown.default_options + if options is not None: + tool_state[parameter_name] = options + else: + option = drilldown.default_option + if option is not None: + tool_state[parameter_name] = option + elif parameter_type in ["gx_conditional"]: + if parameter_name not in tool_state: + tool_state[parameter_name] = {} + + raw_conditional_state = tool_state[parameter_name] + assert isinstance(raw_conditional_state, dict) + conditional_state = cast(Dict[str, Any], raw_conditional_state) + + conditional = cast(ConditionalParameterModel, parameter) + test_parameter = conditional.test_parameter + test_parameter_name = test_parameter.name + + explicit_test_value: Optional[DiscriminatorType] = ( + conditional_state[test_parameter_name] if test_parameter_name in conditional_state else None + ) + test_value = validate_explicit_conditional_test_value(test_parameter_name, explicit_test_value) + when = _select_which_when(conditional, test_value, conditional_state) + test_parameter = conditional.test_parameter + _fill_default_for(conditional_state, test_parameter) + _fill_defaults(conditional_state, when) + elif parameter_type in ["gx_repeat"]: + if parameter_name not in tool_state: + tool_state[parameter_name] = [] + repeat_instances = cast(List[Dict[str, Any]], tool_state[parameter_name]) + repeat = cast(RepeatParameterModel, parameter) + if repeat.min: + while len(repeat_instances) < repeat.min: + repeat_instances.append({}) + + for instance_state in tool_state[parameter_name]: + _fill_defaults(instance_state, repeat) + elif parameter_type in ["gx_section"]: + if parameter_name not in tool_state: + tool_state[parameter_name] = {} + section_state = cast(Dict[str, Any], tool_state[parameter_name]) + section = cast(SectionParameterModel, parameter) + _fill_defaults(section_state, section) + + +def _select_which_when( + conditional: ConditionalParameterModel, test_value: Optional[DiscriminatorType], conditional_state: Dict[str, Any] +) -> ConditionalWhen: + for when in conditional.whens: + if test_value is None and when.is_default_when: + return when + elif test_value == when.discriminator: + return when + else: + raise Exception( + f"Invalid conditional test value ({test_value}) for parameter ({conditional.test_parameter.name})" + ) diff --git a/lib/galaxy/tool_util/parameters/factory.py b/lib/galaxy/tool_util/parameters/factory.py index a636b13a17e6..e200b67aeb0e 100644 --- a/lib/galaxy/tool_util/parameters/factory.py +++ b/lib/galaxy/tool_util/parameters/factory.py @@ -14,6 +14,7 @@ PagesSource, ToolSource, ) +from galaxy.tool_util.parser.util import parse_profile_version from galaxy.util import string_as_bool from .models import ( BaseUrlParameterModel, @@ -64,7 +65,7 @@ def get_color_value(input_source: InputSource) -> str: return input_source.get("value", "#000000") -def _from_input_source_galaxy(input_source: InputSource) -> ToolParameterT: +def _from_input_source_galaxy(input_source: InputSource, profile: float) -> ToolParameterT: input_type = input_source.parse_input_type() if input_type == "param": param_type = input_source.get("type") @@ -84,11 +85,11 @@ def _from_input_source_galaxy(input_source: InputSource) -> ToolParameterT: return IntegerParameterModel(name=input_source.parse_name(), optional=optional, value=int_value) elif param_type == "boolean": nullable = input_source.parse_optional() - checked = input_source.get_bool("checked", None if nullable else False) + value = input_source.get_bool_or_none("checked", None if nullable else False) return BooleanParameterModel( name=input_source.parse_name(), optional=nullable, - value=checked, + value=value, ) elif param_type == "text": optional = input_source.parse_optional() @@ -213,7 +214,8 @@ def _from_input_source_galaxy(input_source: InputSource) -> ToolParameterT: elif input_type == "conditional": test_param_input_source = input_source.parse_test_input_source() test_parameter = cast( - Union[BooleanParameterModel, SelectParameterModel], _from_input_source_galaxy(test_param_input_source) + Union[BooleanParameterModel, SelectParameterModel], + _from_input_source_galaxy(test_param_input_source, profile), ) whens = [] default_test_value = cond_test_parameter_default_value(test_parameter) @@ -224,12 +226,14 @@ def _from_input_source_galaxy(input_source: InputSource) -> ToolParameterT: else: typed_value = value - tool_parameter_models = input_models_for_page(case_inputs_sources) + tool_parameter_models = input_models_for_page(case_inputs_sources, profile) is_default_when = False if typed_value == default_test_value: is_default_when = True whens.append( - ConditionalWhen(discriminator=value, parameters=tool_parameter_models, is_default_when=is_default_when) + ConditionalWhen( + discriminator=typed_value, parameters=tool_parameter_models, is_default_when=is_default_when + ) ) return ConditionalParameterModel( name=input_source.parse_name(), @@ -241,7 +245,7 @@ def _from_input_source_galaxy(input_source: InputSource) -> ToolParameterT: # title = input_source.get("title") # help = input_source.get("help", None) instance_sources = input_source.parse_nested_inputs_source() - instance_tool_parameter_models = input_models_for_page(instance_sources) + instance_tool_parameter_models = input_models_for_page(instance_sources, profile) min_raw = input_source.get("min", None) max_raw = input_source.get("max", None) min = int(min_raw) if min_raw is not None else None @@ -255,7 +259,7 @@ def _from_input_source_galaxy(input_source: InputSource) -> ToolParameterT: elif input_type == "section": name = input_source.get("name") instance_sources = input_source.parse_nested_inputs_source() - instance_tool_parameter_models = input_models_for_page(instance_sources) + instance_tool_parameter_models = input_models_for_page(instance_sources, profile) return SectionParameterModel( name=name, parameters=instance_tool_parameter_models, @@ -328,34 +332,35 @@ def tool_parameter_bundle_from_json(json: Dict[str, Any]) -> ToolParameterBundle def input_models_for_tool_source(tool_source: ToolSource) -> ToolParameterBundleModel: pages = tool_source.parse_input_pages() - return ToolParameterBundleModel(parameters=input_models_for_pages(pages)) + profile = parse_profile_version(tool_source) + return ToolParameterBundleModel(parameters=input_models_for_pages(pages, profile)) -def input_models_for_pages(pages: PagesSource) -> List[ToolParameterT]: +def input_models_for_pages(pages: PagesSource, profile: float) -> List[ToolParameterT]: input_models = [] if pages.inputs_defined: for page_source in pages.page_sources: - input_models.extend(input_models_for_page(page_source)) + input_models.extend(input_models_for_page(page_source, profile)) return input_models -def input_models_for_page(page_source: PageSource) -> List[ToolParameterT]: +def input_models_for_page(page_source: PageSource, profile: float) -> List[ToolParameterT]: input_models = [] for input_source in page_source.parse_input_sources(): input_type = input_source.parse_input_type() if input_type == "display": # not a real input... just skip this. Should this be handled in the parser layer better? continue - tool_parameter_model = from_input_source(input_source) + tool_parameter_model = from_input_source(input_source, profile) input_models.append(tool_parameter_model) return input_models -def from_input_source(input_source: InputSource) -> ToolParameterT: +def from_input_source(input_source: InputSource, profile: float) -> ToolParameterT: tool_parameter: ToolParameterT if isinstance(input_source, CwlInputSource): tool_parameter = _from_input_source_cwl(input_source) else: - tool_parameter = _from_input_source_galaxy(input_source) + tool_parameter = _from_input_source_galaxy(input_source, profile) return tool_parameter diff --git a/lib/galaxy/tool_util/parameters/models.py b/lib/galaxy/tool_util/parameters/models.py index 30d6b5a5a483..11957c37099a 100644 --- a/lib/galaxy/tool_util/parameters/models.py +++ b/lib/galaxy/tool_util/parameters/models.py @@ -61,7 +61,13 @@ # + request_internal: This is a pydantic model to validate what Galaxy expects to find in the database, # in particular dataset and collection references should be decoded integers. StateRepresentationT = Literal[ - "request", "request_internal", "job_internal", "test_case_xml", "workflow_step", "workflow_step_linked" + "request", + "request_internal", + "request_internal_dereferenced", + "job_internal", + "test_case_xml", + "workflow_step", + "workflow_step_linked", ] @@ -182,7 +188,10 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam py_type = self.py_type if state_representation == "workflow_step_linked": py_type = allow_connected_value(py_type) - return dynamic_model_information_from_py_type(self, py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -204,7 +213,10 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam py_type = self.py_type if state_representation == "workflow_step_linked": py_type = allow_connected_value(py_type) - return dynamic_model_information_from_py_type(self, py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -239,49 +251,109 @@ def request_requires_value(self) -> bool: TestCaseDataSrcT = Literal["File"] -class DataRequest(StrictModel): - src: DataSrcT +class DataRequestHda(StrictModel): + src: Literal["hda"] = "hda" id: StrictStr -class BatchDataInstance(StrictModel): - src: MultiDataSrcT +class DataRequestLdda(StrictModel): + src: Literal["ldda"] = "ldda" id: StrictStr -class MultiDataInstance(StrictModel): +class DataRequestHdca(StrictModel): + src: Literal["hdca"] = "hdca" + id: StrictStr + + +class DatasetHash(StrictModel): + hash_function: Literal["MD5", "SHA-1", "SHA-256", "SHA-512"] + hash_value: StrictStr + + +class DataRequestUri(StrictModel): + # calling it url instead of uri to match data fetch schema... + src: Literal["url"] = "url" + url: StrictStr + name: Optional[StrictStr] = None + ext: StrictStr + dbkey: StrictStr = "?" + deferred: StrictBool = False + created_from_basename: Optional[StrictStr] = None + info: Optional[StrictStr] = None + hashes: Optional[List[DatasetHash]] = None + space_to_tab: bool = False + to_posix_lines: bool = False + # to implement: + # tags + + +DataRequest: Type = cast( + Type, Annotated[union_type([DataRequestHda, DataRequestLdda, DataRequestUri]), Field(discriminator="src")] +) + + +class BatchDataInstance(StrictModel): src: MultiDataSrcT id: StrictStr -MultiDataRequest: Type = union_type([MultiDataInstance, List[MultiDataInstance]]) +MultiDataInstance: Type = cast( + Type, + Annotated[ + union_type([DataRequestHda, DataRequestLdda, DataRequestHdca, DataRequestUri]), Field(discriminator="src") + ], +) +MultiDataRequest: Type = cast(Type, union_type([MultiDataInstance, list_type(MultiDataInstance)])) -class DataRequestInternal(StrictModel): - src: DataSrcT +class DataRequestInternalHda(StrictModel): + src: Literal["hda"] = "hda" id: StrictInt -class BatchDataInstanceInternal(StrictModel): - src: MultiDataSrcT +class DataRequestInternalLdda(StrictModel): + src: Literal["ldda"] = "ldda" id: StrictInt -class MultiDataInstanceInternal(StrictModel): - src: MultiDataSrcT +class DataRequestInternalHdca(StrictModel): + src: Literal["hdca"] = "hdca" id: StrictInt -class DataTestCaseValue(StrictModel): - src: TestCaseDataSrcT - path: str +DataRequestInternal: Type = cast( + Type, Annotated[Union[DataRequestInternalHda, DataRequestInternalLdda, DataRequestUri], Field(discriminator="src")] +) +DataRequestInternalDereferenced: Type = cast( + Type, Annotated[Union[DataRequestInternalHda, DataRequestInternalLdda], Field(discriminator="src")] +) +DataJobInternal = DataRequestInternalDereferenced + +class BatchDataInstanceInternal(StrictModel): + src: MultiDataSrcT + id: StrictInt -class MultipleDataTestCaseValue(RootModel): - root: List[DataTestCaseValue] +MultiDataInstanceInternal: Type = cast( + Type, + Annotated[ + Union[DataRequestInternalHda, DataRequestInternalLdda, DataRequestInternalHdca, DataRequestUri], + Field(discriminator="src"), + ], +) +MultiDataInstanceInternalDereferenced: Type = cast( + Type, + Annotated[ + Union[DataRequestInternalHda, DataRequestInternalLdda, DataRequestInternalHdca], Field(discriminator="src") + ], +) -MultiDataRequestInternal: Type = union_type([MultiDataInstanceInternal, List[MultiDataInstanceInternal]]) +MultiDataRequestInternal: Type = union_type([MultiDataInstanceInternal, list_type(MultiDataInstanceInternal)]) +MultiDataRequestInternalDereferenced: Type = union_type( + [MultiDataInstanceInternalDereferenced, list_type(MultiDataInstanceInternalDereferenced)] +) class DataParameterModel(BaseGalaxyToolParameterModelDefinition): @@ -309,6 +381,15 @@ def py_type_internal(self) -> Type: base_model = DataRequestInternal return optional_if_needed(base_model, self.optional) + @property + def py_type_internal_dereferenced(self) -> Type: + base_model: Type + if self.multiple: + base_model = MultiDataRequestInternalDereferenced + else: + base_model = DataRequestInternalDereferenced + return optional_if_needed(base_model, self.optional) + @property def py_type_test_case(self) -> Type: base_model: Type @@ -325,8 +406,13 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam return allow_batching( dynamic_model_information_from_py_type(self, self.py_type_internal), BatchDataInstanceInternal ) + elif state_representation == "request_internal_dereferenced": + return allow_batching( + dynamic_model_information_from_py_type(self, self.py_type_internal_dereferenced), + BatchDataInstanceInternal, + ) elif state_representation == "job_internal": - return dynamic_model_information_from_py_type(self, self.py_type_internal) + return dynamic_model_information_from_py_type(self, self.py_type_internal_dereferenced, requires_value=True) elif state_representation == "test_case_xml": return dynamic_model_information_from_py_type(self, self.py_type_test_case) elif state_representation == "workflow_step": @@ -366,8 +452,10 @@ def py_type_internal(self) -> Type: def pydantic_template(self, state_representation: StateRepresentationT) -> DynamicModelInformation: if state_representation == "request": return allow_batching(dynamic_model_information_from_py_type(self, self.py_type)) - elif state_representation == "request_internal": + elif state_representation in ["request_internal", "request_internal_dereferenced"]: return allow_batching(dynamic_model_information_from_py_type(self, self.py_type_internal)) + elif state_representation == "job_internal": + return dynamic_model_information_from_py_type(self, self.py_type_internal, requires_value=True) elif state_representation == "workflow_step": return dynamic_model_information_from_py_type(self, type(None), requires_value=False) elif state_representation == "workflow_step_linked": @@ -401,6 +489,8 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam # allow it to be linked in so force allow optional... py_type = optional(py_type) requires_value = False + elif state_representation == "job_internal": + requires_value = True return dynamic_model_information_from_py_type(self, py_type, requires_value=requires_value) @property @@ -487,7 +577,10 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam py_type = self.py_type if state_representation == "workflow_step_linked": py_type = allow_connected_value(py_type) - return dynamic_model_information_from_py_type(self, py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -568,7 +661,7 @@ def py_type_if_required(self, allow_connections: bool = False) -> Type: @property def py_type(self) -> Type: - return optional_if_needed(self.py_type_if_required(), self.optional) + return optional_if_needed(self.py_type_if_required(), self.optional or self.multiple) @property def py_type_workflow_step(self) -> Type: @@ -580,7 +673,9 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam return dynamic_model_information_from_py_type(self, self.py_type_workflow_step, requires_value=False) elif state_representation == "workflow_step_linked": py_type = self.py_type_if_required(allow_connections=True) - return dynamic_model_information_from_py_type(self, optional_if_needed(py_type, self.optional)) + return dynamic_model_information_from_py_type( + self, optional_if_needed(py_type, self.optional or self.multiple) + ) elif state_representation == "test_case_xml": # in a YAML test case representation this can be string, in XML we are still expecting a comma separated string py_type = self.py_type_if_required(allow_connections=False) @@ -592,7 +687,10 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam self, optional_if_needed(py_type, self.optional), validators=validators ) else: - return dynamic_model_information_from_py_type(self, self.py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, self.py_type, requires_value=requires_value) @property def has_selected_static_option(self): @@ -614,8 +712,13 @@ def default_value(self) -> Optional[str]: def request_requires_value(self) -> bool: # API will allow an empty value and just grab the first static option # see API Tests -> test_tools.py -> test_select_first_by_default - # so only require a value in the multiple case if optional is False - return self.multiple and not self.optional + # If it is multiple - it will also always just allow null regardless of + # optional - see test_select_multiple_null_handling + return False + + @property + def dynamic_options(self) -> bool: + return self.options is None class GenomeBuildParameterModel(BaseGalaxyToolParameterModelDefinition): @@ -630,7 +733,10 @@ def py_type(self) -> Type: return optional_if_needed(py_type, self.optional) def pydantic_template(self, state_representation: StateRepresentationT) -> DynamicModelInformation: - return dynamic_model_information_from_py_type(self, self.py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, self.py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -641,11 +747,13 @@ def request_requires_value(self) -> bool: DrillDownHierarchyT = Literal["recurse", "exact"] -def drill_down_possible_values(options: List[DrillDownOptionsDict], multiple: bool) -> List[str]: +def drill_down_possible_values( + options: List[DrillDownOptionsDict], multiple: bool, hierarchy: DrillDownHierarchyT +) -> List[str]: possible_values = [] def add_value(option: str, is_leaf: bool): - if not multiple and not is_leaf: + if not multiple and not is_leaf and hierarchy == "recurse": return possible_values.append(option) @@ -673,7 +781,8 @@ class DrillDownParameterModel(BaseGalaxyToolParameterModelDefinition): def py_type(self) -> Type: if self.options is not None: literal_options: List[Type] = [ - cast_as_type(Literal[o]) for o in drill_down_possible_values(self.options, self.multiple) + cast_as_type(Literal[o]) + for o in drill_down_possible_values(self.options, self.multiple, self.hierarchy) ] py_type = union_type(literal_options) else: @@ -690,10 +799,12 @@ def py_type_test_case_xml(self) -> Type: return optional_if_needed(base_model, not self.request_requires_value) def pydantic_template(self, state_representation: StateRepresentationT) -> DynamicModelInformation: - if state_representation == "test_case_xml": - return dynamic_model_information_from_py_type(self, self.py_type_test_case_xml) - else: - return dynamic_model_information_from_py_type(self, self.py_type) + py_type = self.py_type_test_case_xml if state_representation == "test_case_xml" else self.py_type + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + + return dynamic_model_information_from_py_type(self, py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -706,6 +817,24 @@ def request_requires_value(self) -> bool: # do we need to default to assuming they're not required? return False + @property + def default_option(self) -> Optional[str]: + options = self.options + if options: + selected_options = selected_drill_down_options(options) + if len(selected_options) > 0: + return selected_options[0] + return None + + @property + def default_options(self) -> Optional[List[str]]: + options = self.options + if options: + selected_options = selected_drill_down_options(options) + return selected_options + + return None + def any_drill_down_options_selected(options: List[DrillDownOptionsDict]) -> bool: for option in options: @@ -719,6 +848,19 @@ def any_drill_down_options_selected(options: List[DrillDownOptionsDict]) -> bool return False +def selected_drill_down_options(options: List[DrillDownOptionsDict]) -> List[str]: + selected_options: List[str] = [] + for option in options: + selected = option.get("selected") + value = option.get("value") + if selected and value: + selected_options.append(value) + child_options = option.get("options", []) + selected_options.extend(selected_drill_down_options(child_options)) + + return selected_options + + class DataColumnParameterModel(BaseGalaxyToolParameterModelDefinition): parameter_type: Literal["gx_data_column"] = "gx_data_column" multiple: bool @@ -749,7 +891,10 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam validators = {} return dynamic_model_information_from_py_type(self, self.py_type, validators=validators) else: - return dynamic_model_information_from_py_type(self, self.py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, self.py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -768,7 +913,10 @@ def py_type(self) -> Type: return optional_if_needed(py_type, self.optional) def pydantic_template(self, state_representation: StateRepresentationT) -> DynamicModelInformation: - return dynamic_model_information_from_py_type(self, self.py_type) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + return dynamic_model_information_from_py_type(self, self.py_type, requires_value=requires_value) @property def request_requires_value(self) -> bool: @@ -819,10 +967,14 @@ class ConditionalParameterModel(BaseGalaxyToolParameterModelDefinition): whens: List[ConditionalWhen] def pydantic_template(self, state_representation: StateRepresentationT) -> DynamicModelInformation: + is_boolean = isinstance(self.test_parameter, BooleanParameterModel) test_param_name = self.test_parameter.name test_info = self.test_parameter.pydantic_template(state_representation) extra_validators = test_info.validators - test_parameter_requires_value = self.test_parameter.request_requires_value + if state_representation == "job_internal": + test_parameter_requires_value = True + else: + test_parameter_requires_value = self.test_parameter.request_requires_value when_types: List[Type[BaseModel]] = [] default_type = None for when in self.whens: @@ -832,7 +984,7 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam initialize_test = ... else: initialize_test = None - + tag = str(discriminator) if not is_boolean else str(discriminator).lower() extra_kwd = {test_param_name: (Union[str, bool], initialize_test)} when_types.append( cast( @@ -845,22 +997,29 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam extra_kwd=extra_kwd, extra_validators=extra_validators, ), - Tag(str(discriminator)), + Tag(tag), ], ) ) - if when.is_default_when: - extra_kwd = {} - default_type = create_field_model( - parameters, - f"When_{test_param_name}___absent", - state_representation, - extra_kwd=extra_kwd, - extra_validators={}, - ) - when_types.append(cast(Type[BaseModel], Annotated[default_type, Tag("__absent__")])) - - def model_x_discriminator(v: Any) -> str: + # job_internal requires parameters are filled in - so don't allow the absent branch + # here that most other state representations allow + if state_representation != "job_internal": + if when.is_default_when: + extra_kwd = {} + default_type = create_field_model( + parameters, + f"When_{test_param_name}___absent", + state_representation, + extra_kwd=extra_kwd, + extra_validators={}, + ) + when_types.append(cast(Type[BaseModel], Annotated[default_type, Tag("__absent__")])) + + def model_x_discriminator(v: Any) -> Optional[str]: + # returning None causes a validation error, this is what we would want if + # if the conditional state is not a dictionary. + if not isinstance(v, dict): + return None if test_param_name not in v: return "__absent__" else: @@ -872,17 +1031,29 @@ def model_x_discriminator(v: Any) -> str: else: return str(test_param_val) - cond_type = union_type(when_types) + py_type: Type - class ConditionalType(RootModel): - root: cond_type = Field(..., discriminator=Discriminator(model_x_discriminator)) # type: ignore[valid-type] + if len(when_types) > 1: + cond_type = union_type(when_types) - if default_type is not None: - initialize_cond = None - else: - initialize_cond = ... + class ConditionalType(RootModel): + root: cond_type = Field(..., discriminator=Discriminator(model_x_discriminator)) # type: ignore[valid-type] - py_type = ConditionalType + if default_type is not None: + initialize_cond = None + else: + initialize_cond = ... + + py_type = ConditionalType + + else: + py_type = when_types[0] + # a better check here would be if any of the parameters below this have a required value, + # in the case of job_internal though this is correct + if state_representation == "job_internal": + initialize_cond = ... + else: + initialize_cond = None return DynamicModelInformation( self.name, @@ -906,9 +1077,12 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam instance_class: Type[BaseModel] = create_field_model( self.parameters, f"Repeat_{self.name}", state_representation ) + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True initialize_repeat: Any - if self.request_requires_value: + if requires_value: initialize_repeat = ... else: initialize_repeat = None @@ -942,7 +1116,10 @@ def pydantic_template(self, state_representation: StateRepresentationT) -> Dynam instance_class: Type[BaseModel] = create_field_model( self.parameters, f"Section_{self.name}", state_representation ) - if self.request_requires_value: + requires_value = self.request_requires_value + if state_representation == "job_internal": + requires_value = True + if requires_value: initialize_section = ... else: initialize_section = None @@ -1199,6 +1376,12 @@ def create_request_internal_model(tool: ToolParameterBundle, name: str = "Dynami return create_field_model(tool.parameters, name, "request_internal") +def create_request_internal_dereferenced_model( + tool: ToolParameterBundle, name: str = "DynamicModelForTool" +) -> Type[BaseModel]: + return create_field_model(tool.parameters, name, "request_internal_dereferenced") + + def create_job_internal_model(tool: ToolParameterBundle, name: str = "DynamicModelForTool") -> Type[BaseModel]: return create_field_model(tool.parameters, name, "job_internal") @@ -1259,6 +1442,11 @@ def validate_internal_request(tool: ToolParameterBundle, request: Dict[str, Any] validate_against_model(pydantic_model, request) +def validate_internal_request_dereferenced(tool: ToolParameterBundle, request: Dict[str, Any]) -> None: + pydantic_model = create_request_internal_dereferenced_model(tool) + validate_against_model(pydantic_model, request) + + def validate_internal_job(tool: ToolParameterBundle, request: Dict[str, Any]) -> None: pydantic_model = create_job_internal_model(tool) validate_against_model(pydantic_model, request) diff --git a/lib/galaxy/tool_util/parameters/state.py b/lib/galaxy/tool_util/parameters/state.py index 94eb48b5de52..af15cf23ac7b 100644 --- a/lib/galaxy/tool_util/parameters/state.py +++ b/lib/galaxy/tool_util/parameters/state.py @@ -15,6 +15,7 @@ from .models import ( create_job_internal_model, + create_request_internal_dereferenced_model, create_request_internal_model, create_request_model, create_test_case_model, @@ -83,6 +84,14 @@ def _parameter_model_for(cls, parameters: ToolParameterBundle) -> Type[BaseModel return create_request_internal_model(parameters) +class RequestInternalDereferencedToolState(ToolState): + state_representation: Literal["request_internal_dereferenced"] = "request_internal_dereferenced" + + @classmethod + def _parameter_model_for(cls, parameters: ToolParameterBundle) -> Type[BaseModel]: + return create_request_internal_dereferenced_model(parameters) + + class JobInternalToolState(ToolState): state_representation: Literal["job_internal"] = "job_internal" diff --git a/lib/galaxy/tool_util/parameters/visitor.py b/lib/galaxy/tool_util/parameters/visitor.py index 5b8e059f2895..35d4bc176a3e 100644 --- a/lib/galaxy/tool_util/parameters/visitor.py +++ b/lib/galaxy/tool_util/parameters/visitor.py @@ -12,14 +12,27 @@ from typing_extensions import Protocol from .models import ( + ConditionalParameterModel, + ConditionalWhen, + RepeatParameterModel, + SectionParameterModel, simple_input_models, ToolParameterBundle, ToolParameterT, ) from .state import ToolState -VISITOR_NO_REPLACEMENT = object() -VISITOR_UNDEFINED = object() + +class VisitorNoReplacement: + pass + + +class VisitorUndefined: + pass + + +VISITOR_NO_REPLACEMENT = VisitorNoReplacement() +VISITOR_UNDEFINED = VisitorUndefined() class Callback(Protocol): @@ -47,20 +60,79 @@ def _visit_input_values( callback: Callback, no_replacement_value=VISITOR_NO_REPLACEMENT, ) -> Dict[str, Any]: - new_input_values = {} + + def _callback(name: str, old_values: Dict[str, Any], new_values: Dict[str, Any]): + input_value = old_values.get(name, VISITOR_UNDEFINED) + if input_value is VISITOR_UNDEFINED: + return + replacement = callback(model, input_value) + if replacement != no_replacement_value: + new_values[name] = replacement + else: + new_values[name] = input_value + + new_input_values: Dict[str, Any] = {} for model in input_models: name = model.name + parameter_type = model.parameter_type input_value = input_values.get(name, VISITOR_UNDEFINED) - replacement = callback(model, input_value) - if replacement != no_replacement_value: - new_input_values[name] = replacement - elif replacement is VISITOR_UNDEFINED: - pass + if input_value is VISITOR_UNDEFINED: + continue + + if parameter_type == "gx_repeat": + repeat_parameter = cast(RepeatParameterModel, model) + repeat_parameters = repeat_parameter.parameters + repeat_values = cast(list, input_value) + new_repeat_values = [] + for repeat_instance_values in repeat_values: + new_repeat_values.append( + _visit_input_values( + repeat_parameters, repeat_instance_values, callback, no_replacement_value=no_replacement_value + ) + ) + new_input_values[name] = new_repeat_values + elif parameter_type == "gx_section": + section_parameter = cast(SectionParameterModel, model) + section_parameters = section_parameter.parameters + section_values = cast(dict, input_value) + new_section_values = _visit_input_values( + section_parameters, section_values, callback, no_replacement_value=no_replacement_value + ) + new_input_values[name] = new_section_values + elif parameter_type == "gx_conditional": + conditional_parameter = cast(ConditionalParameterModel, model) + test_parameter = conditional_parameter.test_parameter + test_parameter_name = test_parameter.name + + conditional_values = cast(dict, input_value) + when: ConditionalWhen = _select_which_when(conditional_parameter, conditional_values) + new_conditional_values = _visit_input_values( + when.parameters, conditional_values, callback, no_replacement_value=no_replacement_value + ) + if test_parameter_name in conditional_values: + _callback(test_parameter_name, conditional_values, new_conditional_values) + new_input_values[name] = new_conditional_values else: - new_input_values[name] = input_value + _callback(name, input_values, new_input_values) return new_input_values +def _select_which_when(conditional: ConditionalParameterModel, state: dict) -> ConditionalWhen: + test_parameter = conditional.test_parameter + test_parameter_name = test_parameter.name + explicit_test_value = state.get(test_parameter_name) + test_value = validate_explicit_conditional_test_value(test_parameter_name, explicit_test_value) + for when in conditional.whens: + print(when.discriminator) + print(type(when.discriminator)) + if test_value is None and when.is_default_when: + return when + elif test_value == when.discriminator: + return when + else: + raise Exception(f"Invalid conditional test value ({explicit_test_value}) for parameter ({test_parameter_name})") + + def flat_state_path(has_name: Union[str, ToolParameterT], prefix: Optional[str] = None) -> str: """Given a parameter name or model and an optional prefix, give 'flat' name for parameter in tree.""" if hasattr(has_name, "name"): diff --git a/lib/galaxy/tool_util/parser/interface.py b/lib/galaxy/tool_util/parser/interface.py index 19c557cd53de..4680b0a0d17c 100644 --- a/lib/galaxy/tool_util/parser/interface.py +++ b/lib/galaxy/tool_util/parser/interface.py @@ -478,6 +478,12 @@ def get_bool(self, key, default): keys to be supported depend on the parameter type. """ + @abstractmethod + def get_bool_or_none(self, key, default): + """Return simple named properties as boolean or none for this input source. + keys to be supported depend on the parameter type. + """ + def parse_label(self): return self.get("label") @@ -655,12 +661,14 @@ class XmlTestCollectionDefDict(TypedDict): ) -def xml_data_input_to_json(xml_input: ToolSourceTestInput) -> "JsonTestDatasetDefDict": +def xml_data_input_to_json(xml_input: ToolSourceTestInput) -> Optional["JsonTestDatasetDefDict"]: attributes = xml_input["attributes"] + value = xml_input["value"] + if value is None and attributes.get("location") is None: + return None as_dict: JsonTestDatasetDefDict = { "class": "File", } - value = xml_input["value"] if value: as_dict["path"] = value _copy_if_exists(attributes, as_dict, "location") @@ -705,10 +713,16 @@ def to_element(xml_element_dict: "TestCollectionDefElementInternal") -> "JsonTes identifier=identifier, **element_object._test_format_to_dict() ) else: - as_dict = JsonTestCollectionDefDatasetElementDict( - identifier=identifier, - **xml_data_input_to_json(cast(ToolSourceTestInput, element_object)), + input_as_dict: Optional[JsonTestDatasetDefDict] = xml_data_input_to_json( + cast(ToolSourceTestInput, element_object) ) + if input_as_dict is not None: + as_dict = JsonTestCollectionDefDatasetElementDict( + identifier=identifier, + **input_as_dict, + ) + else: + raise Exception("Invalid empty test element...") return as_dict test_format_dict = BaseJsonTestCollectionDefCollectionElementDict( diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py index a89754553f84..6d11a8569922 100644 --- a/lib/galaxy/tool_util/parser/xml.py +++ b/lib/galaxy/tool_util/parser/xml.py @@ -30,6 +30,7 @@ Element, ElementTree, string_as_bool, + string_as_bool_or_none, XML, xml_text, xml_to_string, @@ -1328,6 +1329,9 @@ def get(self, key, value=None): def get_bool(self, key, default): return string_as_bool(self.get(key, default)) + def get_bool_or_none(self, key, default): + return string_as_bool_or_none(self.get(key, default)) + def parse_label(self): return xml_text(self.input_elem, "label") diff --git a/lib/galaxy/tool_util/parser/yaml.py b/lib/galaxy/tool_util/parser/yaml.py index c2cde0752f3a..c2e6352384c0 100644 --- a/lib/galaxy/tool_util/parser/yaml.py +++ b/lib/galaxy/tool_util/parser/yaml.py @@ -338,6 +338,9 @@ def get(self, key, default=None): def get_bool(self, key, default): return self.input_dict.get(key, default) + def get_bool_or_none(self, key, default): + return self.input_dict.get(key, default) + def parse_input_type(self): input_type = self.input_dict["type"] if input_type == "repeat": diff --git a/test/functional/tools/parameters/gx_boolean.xml b/test/functional/tools/parameters/gx_boolean.xml index e42c9c9b6af6..d89ec53fe3ff 100644 --- a/test/functional/tools/parameters/gx_boolean.xml +++ b/test/functional/tools/parameters/gx_boolean.xml @@ -9,6 +9,13 @@ echo '$parameter' >> '$output' + + + + + + + diff --git a/test/functional/tools/parameters/gx_boolean_checked.xml b/test/functional/tools/parameters/gx_boolean_checked.xml new file mode 100644 index 000000000000..de33a26e6f56 --- /dev/null +++ b/test/functional/tools/parameters/gx_boolean_checked.xml @@ -0,0 +1,36 @@ + + > '$output' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_boolean_optional.xml b/test/functional/tools/parameters/gx_boolean_optional.xml index dc667b614a1a..57a6d6f3f1be 100644 --- a/test/functional/tools/parameters/gx_boolean_optional.xml +++ b/test/functional/tools/parameters/gx_boolean_optional.xml @@ -34,7 +34,6 @@ cat '$inputs' >> $inputs_json; - diff --git a/test/functional/tools/parameters/gx_boolean_optional_checked.xml b/test/functional/tools/parameters/gx_boolean_optional_checked.xml new file mode 100644 index 000000000000..f1c27f98ce46 --- /dev/null +++ b/test/functional/tools/parameters/gx_boolean_optional_checked.xml @@ -0,0 +1,58 @@ + + > '$output'; +cat '$inputs' >> $inputs_json; + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/functional/tools/parameters/gx_select_optional.xml b/test/functional/tools/parameters/gx_select_optional.xml index 5f6b63813dd3..03fc045dba2a 100644 --- a/test/functional/tools/parameters/gx_select_optional.xml +++ b/test/functional/tools/parameters/gx_select_optional.xml @@ -23,5 +23,12 @@ echo '$parameter' >> '$output' + + + + + + + diff --git a/test/functional/tools/select_optional.xml b/test/functional/tools/select_optional.xml index ab809859794d..fa783c7f2ff3 100644 --- a/test/functional/tools/select_optional.xml +++ b/test/functional/tools/select_optional.xml @@ -1,4 +1,5 @@ + > '$output1' && echo select_mult_opt $select_mult_opt >> '$output1' && diff --git a/test/unit/tool_util/framework_tool_checks.yml b/test/unit/tool_util/framework_tool_checks.yml index c9a6cbf661df..883fd3d40ffc 100644 --- a/test/unit/tool_util/framework_tool_checks.yml +++ b/test/unit/tool_util/framework_tool_checks.yml @@ -6,3 +6,57 @@ empty_select: - {} request_invalid: - select_optional: anyoption + +select_from_dataset_in_conditional: + request_valid: + - single: {src: hda, id: abcde133543d} + request_invalid: + - single: 7 + request_internal_valid: + - single: {src: hda, id: 7} + request_internal_invalid: + - single: 7 + job_internal_valid: + - single: {src: hda, id: 7} + job_internal_valid: + - single: {src: hda, id: 7} + cond: + cond: single + select_single: chr10 + inner_cond: + inner_cond: single + select_single: chr10 + job_internal_invalid: + - single: {src: hda, id: 7} + cond: + cond: single + select_single: chr10 + inner_cond: + inner_cond: single + select_single: chr10 + badoption: true + - single: {src: hda, id: 7} + cond: + cond: single + select_single: chr10 + +column_param: + request_valid: + - input1: {src: hda, id: abcde133543d} + col: 1 + col_names: 1 + request_invalid: + - input1: {src: hda, id: abcde133543d} + col: moocow + col_names: moocow + request_internal_valid: + - input1: {src: hda, id: 7} + col: 1 + col_names: 1 + request_internal_invalid: + - input1: {src: hda, id: abcde133543d} + col: 1 + col_names: 1 + + + diff --git a/test/unit/tool_util/parameter_specification.yml b/test/unit/tool_util/parameter_specification.yml index 8497843a427c..761a1c3fb680 100644 --- a/test/unit/tool_util/parameter_specification.yml +++ b/test/unit/tool_util/parameter_specification.yml @@ -27,6 +27,11 @@ gx_int: - parameter: "None" - parameter: { 5 } - parameter: {__class__: 'ConnectedValue'} + job_internal_valid: + - parameter: 5 + job_internal_invalid: + - parameter: null + - {} test_case_xml_valid: - parameter: 5 - {} @@ -61,6 +66,11 @@ gx_boolean: - parameter: "mytrue" - parameter: null - parameter: {__class__: 'ConnectedValue'} + job_internal_valid: + - parameter: true + - parameter: false + job_internal_invalid: + - {} workflow_step_valid: - parameter: True - {} @@ -87,6 +97,11 @@ gx_int_optional: - parameter: "null" - parameter: [5] - parameter: {__class__: 'ConnectedValue'} + job_internal_valid: + - parameter: 5 + - parameter: null + job_internal_invalid: + - {} workflow_step_valid: - parameter: 5 - parameter: null @@ -113,22 +128,42 @@ gx_int_required: &gx_int_required - parameter: "None" - parameter: { 5 } - parameter: {__class__: 'ConnectedValue'} + job_internal_valid: + - parameter: 5 + job_internal_invalid: + - {} gx_int_required_via_empty_string: <<: *gx_int_required gx_text: - request_valid: + request_valid: &gx_text_request_valid - parameter: moocow - parameter: 'some spaces' - parameter: '' - {} - request_invalid: + request_invalid: &gx_text_request_invalid - parameter: 5 - parameter: null - parameter: {} - parameter: { "moo": "cow" } - parameter: {__class__: 'ConnectedValue'} + request_internal_valid: + *gx_text_request_valid + request_internal_invalid: + *gx_text_request_invalid + request_internal_dereferenced_valid: + *gx_text_request_valid + request_internal_dereferenced_invalid: + *gx_text_request_invalid + job_internal_valid: + - parameter: moocow + - parameter: 'some spaces' + - parameter: '' + job_internal_invalid: + - {} + - parameter: null + - parameter: { "moo": "cow" } workflow_step_valid: - parameter: moocow - parameter: 'some spaces' @@ -199,11 +234,20 @@ gx_select: - parameter: null - parameter: {} - parameter: 5 - request_internal_valid: + request_internal_valid: &gx_select_request_valid - parameter: "--ex1" - parameter: "ex2" - request_internal_invalid: + request_internal_invalid: &gx_select_request_invalid - parameter: {} + request_internal_dereferenced_valid: + *gx_select_request_valid + request_internal_dereferenced_invalid: + *gx_select_request_invalid + job_internal_valid: + - parameter: '--ex1' + - parameter: 'ex2' + job_internal_invalid: + - {} test_case_xml_valid: - parameter: 'ex2' - parameter: '--ex1' @@ -239,6 +283,15 @@ gx_select_optional: - parameter: ["ex2"] - parameter: {} - parameter: 5 + job_internal_valid: + - parameter: '--ex1' + - parameter: 'ex2' + - parameter: null + job_internal_invalid: + - parameter: 'invalid' + - {} + test_case_xml_valid: + - {} workflow_step_valid: - parameter: "--ex1" - parameter: "ex2" @@ -266,19 +319,26 @@ gx_select_multiple: request_valid: - parameter: ["--ex1"] - parameter: ["ex2"] + # ugh... but these aren't optional... + - parameter: null + - {} request_invalid: - parameter: ["Ex1"] - # DIVERGES_FROM_CURRENT_API - - parameter: null - parameter: {} - parameter: 5 - - {} workflow_step_valid: - parameter: ["--ex1"] - parameter: ["ex2"] - {} # could come in linked... # ... hmmm? this should maybe be invalid right? - parameter: null + job_internal_valid: + - parameter: ["--ex1"] + # itd be coool if this was forced to empty list - probably breaks backward compat though... + - parameter: null + job_internal_invalid: + - {} + - parameter: "--ex1" workflow_step_invalid: - parameter: ["Ex1"] - parameter: {} @@ -289,16 +349,15 @@ gx_select_multiple: - parameter: ["--ex1"] - parameter: ["ex2"] - parameter: [{__class__: 'ConnectedValue'}] + - {} + - parameter: null workflow_step_linked_invalid: - parameter: ["Ex1"] - parameter: {} - parameter: 5 - - {} # might be wrong? I guess we would expect the semantic of this to do like a map-over # but as far as I am aware that is not implemented https://github.com/galaxyproject/galaxy/issues/18541 - parameter: {__class__: 'ConnectedValue'} - # they are non-optinoal right? - - parameter: null gx_select_multiple_optional: request_valid: @@ -320,6 +379,10 @@ gx_genomebuild: request_invalid: - parameter: null - parameter: 9 + job_internal_valid: + - parameter: hg19 + job_internal_invalid: + - {} gx_genomebuild_optional: request_valid: @@ -331,6 +394,10 @@ gx_genomebuild_optional: request_invalid: - parameter: 8 - parameter: true + job_internal_valid: + - parameter: null + job_internal_invalid: + - {} gx_genomebuild_multiple: request_valid: @@ -348,6 +415,13 @@ gx_directory_uri: - parameter: "justsomestring" - parameter: true - parameter: null + job_internal_valid: + - parameter: "gxfiles://foobar/" + - parameter: "gxfiles://foobar" + job_internal_invalid: + - parameter: "justsomestring" + - parameter: true + - parameter: null gx_hidden: request_valid: @@ -361,6 +435,12 @@ gx_hidden: - parameter: 5 - parameter: {} - parameter: { "moo": "cow" } + job_internal_valid: + - parameter: moocow + - parameter: 'some spaces' + - parameter: '' + job_internal_invalid: + - {} workflow_step_valid: - parameter: moocow - {} @@ -501,6 +581,7 @@ gx_color: gx_data: request_valid: - parameter: {src: hda, id: abcdabcd} + - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", "ext": "txt"} - parameter: {__class__: "Batch", values: [{src: hdca, id: abcdabcd}]} request_invalid: - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]} @@ -518,6 +599,7 @@ gx_data: - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]} - parameter: {src: hda, id: 5} - parameter: {src: hda, id: 0} + - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} request_internal_invalid: - parameter: {__class__: "Batch", values: [{src: hdca, id: abcdabcd}]} - parameter: {src: hda, id: abcdabcd} @@ -526,6 +608,14 @@ gx_data: - parameter: true - parameter: 5 - parameter: "5" + request_internal_dereferenced_valid: + - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]} + - parameter: {src: hda, id: 5} + - parameter: {src: hda, id: 0} + request_internal_dereferenced_invalid: + # the difference between request internal and request internal dereferenced is that these have been converted + # to datasets. + - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} job_internal_valid: - parameter: {src: hda, id: 7} job_internal_invalid: @@ -533,6 +623,9 @@ gx_data: # expanded out. - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]} - parameter: {src: hda, id: abcdabcd} + # url parameters should be dereferrenced into datasets by this point... + - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", "ext": "txt"} + - {} test_case_xml_valid: - parameter: {class: File, path: foo.bed} - parameter: {class: File, location: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt"} @@ -584,6 +677,11 @@ gx_data_optional: - parameter: true - parameter: 5 - parameter: "5" + job_internal_valid: + - parameter: null + - parameter: {src: hda, id: 5} + job_internal_invalid: + - {} workflow_step_valid: - {} workflow_step_invalid: @@ -627,6 +725,8 @@ gx_data_multiple: - parameter: [{src: hda, id: 5}] - parameter: [{src: hdca, id: 5}] - parameter: [{src: hdca, id: 5}, {src: hda, id: 5}] + - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}] + - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]} request_internal_invalid: - parameter: {src: hda, id: abcdabcd} - parameter: [{src: hdca, id: abcdabcd}, {src: hda, id: abcdabcd}] @@ -636,6 +736,19 @@ gx_data_multiple: - parameter: true - parameter: 5 - parameter: "5" + request_internal_dereferenced_valid: + - parameter: {__class__: "Batch", values: [{src: hdca, id: 5}]} + - parameter: [{src: hda, id: 5}] + - parameter: [{src: hda, id: 0}] + request_internal_dereferenced_invalid: + # the difference between request internal and request internal dereferenced is that these have been converted + # to datasets. + - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}] + job_internal_valid: + - parameter: {src: hda, id: 5} + - parameter: [{src: hda, id: 5}, {src: hda, id: 6}] + job_internal_invalid: + - {} gx_data_multiple_optional: request_valid: @@ -646,6 +759,8 @@ gx_data_multiple_optional: - parameter: [{src: hdca, id: abcdabcd}, {src: hda, id: abcdabcd}] - parameter: null - {} + - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} + - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}] request_invalid: - parameter: {src: hda, id: 5} - parameter: {} @@ -660,12 +775,25 @@ gx_data_multiple_optional: - parameter: [{src: hdca, id: 5}, {src: hda, id: 5}] - parameter: null - {} + - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}] request_internal_invalid: - parameter: {src: hda, id: abcdabcd} - parameter: {} - parameter: true - parameter: 5 - parameter: "5" + request_internal_dereferenced_valid: + - parameter: {src: hda, id: 5} + request_internal_dereferenced_invalid: + - parameter: {src: hda, id: abcdabcd} + - parameter: [{src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"}] + - parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} + job_internal_valid: + - parameter: {src: hda, id: 5} + - parameter: [{src: hda, id: 5}, {src: hda, id: 6}] + - parameter: null + job_internal_invalid: + - {} gx_data_collection: request_valid: @@ -692,6 +820,10 @@ gx_data_collection: - parameter: true - parameter: 5 - parameter: "5" + request_internal_dereferenced_valid: + - parameter: {src: hdca, id: 5} + request_internal_dereferenced_invalid: + - parameter: {src: hdca, id: abcdabcd} workflow_step_valid: - {} workflow_step_invalid: @@ -745,6 +877,12 @@ gx_data_collection: elements: - {identifier: "forward", path: "1_f.bed", class: File} - {identifier: "reverse", path: "1_r.bed", class: FileX} + job_internal_valid: + - parameter: {src: hdca, id: 5} + job_internal_invalid: + - parameter: {src: hdca, id: abcdabcd} + - parameter: null + - {} gx_data_collection_optional: request_valid: @@ -771,6 +909,12 @@ gx_data_collection_optional: - parameter: 5 - parameter: "5" - parameter: {} + job_internal_valid: + - parameter: {src: hdca, id: 5} + - parameter: null + job_internal_invalid: + - parameter: {src: hdca, id: abcdabcd} + - {} gx_conditional_boolean: request_valid: @@ -813,6 +957,22 @@ gx_conditional_boolean: # in that case having an integer_parameter is not acceptable. - conditional_parameter: integer_parameter: 5 + job_internal_valid: + - conditional_parameter: + test_parameter: true + integer_parameter: 1 + - conditional_parameter: + test_parameter: false + boolean_parameter: false + job_internal_invalid: + # missing defaults are fine in request, but job_internal records parameters used, + # must include defaults. + - {} + - conditional_parameter: {} + - conditional_parameter: + test_parameter: true + - conditional_parameter: + boolean_parameter: true gx_conditional_boolean_checked: request_valid: @@ -829,6 +989,10 @@ gx_conditional_boolean_checked: - conditional_parameter: boolean_parameter: false + job_internal_invalid: + - conditional_parameter: {} + - {} + gx_conditional_conditional_boolean: request_valid: - outer_conditional_parameter: @@ -846,6 +1010,12 @@ gx_conditional_conditional_boolean: boolean_parameter: true # Test parameter has default and so does it "case" - so this should be fine - {} + - outer_conditional_parameter: {} + - outer_conditional_parameter: + outer_test_parameter: true + inner_conditional_parameter: {} + - outer_conditional_parameter: + outer_test_parameter: true request_invalid: - outer_conditional_parameter: outer_test_parameter: true @@ -860,6 +1030,15 @@ gx_conditional_conditional_boolean: inner_conditional_parameter: inner_test_parameter: true integer_parameter: true + job_internal_invalid: + - {} + - outer_conditional_parameter: {} + - outer_conditional_parameter: {} + - outer_conditional_parameter: + outer_test_parameter: true + inner_conditional_parameter: {} + - outer_conditional_parameter: + outer_test_parameter: true gx_conditional_select: request_valid: @@ -874,15 +1053,9 @@ gx_conditional_select: boolean_parameter: true # Test parameter has default and so does it "case" - so this should be fine - {} - # # The boolean_parameter is optional so just setting a test_parameter is fine - # - conditional_parameter: - # test_parameter: b - # - conditional_parameter: - # test_parameter: a - # # if test parameter is missing, it should be 'a' in this case - # - conditional_parameter: - # integer_parameter: 4 - # - conditional_parameter: {} + # # The select is optional so just setting a test_parameter is fine + - conditional_parameter: + integer_parameter: 1 request_invalid: - conditional_parameter: test_parameter: b @@ -899,6 +1072,19 @@ gx_conditional_select: # in that case having an integer_parameter is not acceptable. - conditional_parameter: boolean_parameter: true + job_internal_valid: + - conditional_parameter: + test_parameter: a + integer_parameter: 1 + - conditional_parameter: + test_parameter: b + boolean_parameter: true + job_internal_invalid: + - {} + - conditional_parameter: + integer_parameter: 1 + - conditional_parameter: null + - conditional_parameter: {} gx_repeat_boolean: request_valid: @@ -920,6 +1106,17 @@ gx_repeat_boolean: - { boolean_parameter: false } - { boolean_parameter: 4 } - parameter: 5 + job_internal_valid: + - parameter: + - { boolean_parameter: true } + - parameter: [] + - parameter: + - { boolean_parameter: true } + - { boolean_parameter: false } + job_internal_invalid: + - parameter: [{}] + - parameter: [{}, {}] + gx_repeat_boolean_min: request_valid: @@ -945,6 +1142,13 @@ gx_repeat_boolean_min: - { boolean_parameter: false } - { boolean_parameter: 4 } - parameter: 5 + job_internal_valid: + - parameter: + - { boolean_parameter: true } + - { boolean_parameter: false } + job_internal_invalid: + - parameter: [{}, {}] + - parameter: [] gx_repeat_data: request_valid: @@ -956,6 +1160,8 @@ gx_repeat_data: - { data_parameter: {src: hda, id: abcdabcd} } # an empty repeat is fine - {} + - parameter: + - { data_parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} } request_invalid: - parameter: [{}, {}] - parameter: [{}] @@ -963,9 +1169,19 @@ gx_repeat_data: request_internal_valid: - parameter: - { data_parameter: { src: hda, id: 5 } } + - parameter: + - { data_parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} } request_internal_invalid: - parameter: - { data_parameter: { src: hda, id: abcdabcd } } + job_internal_valid: + - parameter: + - { data_parameter: { src: hda, id: 5 } } + job_internal_invalid: + - parameter: null + - parameter: {} + - parameter: + - { data_parameter: {src: url, url: "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", ext: "txt"} } gx_repeat_data_min: request_valid: @@ -990,6 +1206,17 @@ gx_repeat_data_min: - { data_parameter: { src: hda, id: abcdabcd } } - parameter: - { data_parameter: { src: hda, id: 5 } } + job_internal_valid: + - parameter: + - { data_parameter: { src: hda, id: 5 } } + - { data_parameter: { src: hda, id: 6 } } + job_internal_invalid: + - parameter: + - { data_parameter: { src: hda, id: 5 } } + - parameter: + - {} + - parameter: {} + gx_section_boolean: request_valid: @@ -998,6 +1225,12 @@ gx_section_boolean: - {} request_invalid: - parameter: { boolean_parameter: 4 } + job_internal_valid: + - parameter: { boolean_parameter: true } + job_internal_invalid: + - {} + - parameter: { boolean_parameter: 4 } + gx_section_data: request_valid: @@ -1005,39 +1238,68 @@ gx_section_data: request_invalid: - parameter: { data_parameter: 4 } - parameter: { data_parameter: { src: hda, id: 5 } } - # data parameter is non-optional, so this should be invalid (unlike boolean parameter above) - # - {} + - {} request_internal_valid: - parameter: { data_parameter: { src: hda, id: 5 } } request_internal_invalid: - parameter: { data_parameter: { src: hda, id: abcdabcd } } + - {} + job_internal_valid: + - parameter: { data_parameter: { src: hda, id: 5 } } + job_internal_invalid: + - {} gx_drill_down_exact: request_valid: - parameter: aa - parameter: bbb - parameter: ba - request_invalid: - # not multiple so cannot choose a non-leaf + # non-leaf nodes seem to be selectable in exact mode - parameter: a + request_invalid: - parameter: c - parameter: {} # no implicit default currently - see test_drill_down_first_by_default in API test test_tools.py. - {} - parameter: null + job_internal_valid: + - parameter: aa + job_internal_invalid: + - parameter: c + - {} gx_drill_down_exact_with_selection: request_valid: - parameter: aa - parameter: bbb - parameter: ba - # - {} - request_invalid: - # not multiple so cannot choose a non-leaf + # non-leaf nodes seem to be selectable in exact mode - parameter: a + request_invalid: - parameter: c + # see note above no implicit selection - parameter: {} - parameter: null + job_internal_valid: + - parameter: aa + job_internal_invalid: + - parameter: c + - parameter: null + - {} + +gx_drill_down_recurse: + request_valid: + - parameter: bba + request_invalid: + - parameter: a + - parameter: c + +gx_drill_down_recurse_multiple: + request_valid: + - parameter: [bba] + - parameter: [a] + request_invalid: + - parameter: c gx_data_column: request_valid: @@ -1049,6 +1311,11 @@ gx_data_column: - { ref_parameter: {src: hda, id: 123}, parameter: 0 } request_internal_invalid: - { ref_parameter: {src: hda, id: 123}, parameter: "0" } + job_internal_valid: + - { ref_parameter: {src: hda, id: 123}, parameter: 0 } + job_internal_invalid: + - { ref_parameter: {src: hda, id: 123} } + - { } test_case_xml_valid: - { ref_parameter: {class: "File", path: "1.bed"}, parameter: 3 } test_case_xml_invalid: @@ -1064,6 +1331,13 @@ gx_data_column_optional: request_invalid: - { ref_parameter: {src: hda, id: abcdabcd}, parameter: "0" } - { ref_parameter: {src: hda, id: abcdabcd}, parameter: [ 0 ] } + job_internal_valid: + - { ref_parameter: {src: hda, id: 1}, parameter: 0 } + - { ref_parameter: {src: hda, id: 1}, parameter: null } + job_internal_invalid: + - { ref_parameter: {src: hda, id: 1} } + - { } + - { ref_parameter: {src: hda, id: 1}, parameter: "0" } request_internal_valid: - { ref_parameter: {src: hda, id: 123}, parameter: 0 } request_internal_invalid: @@ -1077,6 +1351,11 @@ gx_data_column_multiple: - { ref_parameter: {src: hda, id: abcdabcd}, parameter: ["0"] } request_internal_valid: - { ref_parameter: {src: hda, id: 123}, parameter: [0] } + job_internal_valid: + - { ref_parameter: {src: hda, id: 123}, parameter: [0] } + job_internal_invalid: + - { ref_parameter: {src: hda, id: 123}, parameter: ["0"] } + - { ref_parameter: {src: hda, id: 123}, parameter: null } request_internal_invalid: - { ref_parameter: {src: hda, id: 123}, parameter: "0" } - { ref_parameter: {src: hda, id: 123}, parameter: 0 } @@ -1109,6 +1388,11 @@ gx_group_tag_optional: - { ref_parameter: {src: hdca, id: 123}, parameter: null } request_internal_invalid: - { ref_parameter: {src: hdca, id: 123}, parameter: 8 } + job_internal_valid: + - { ref_parameter: {src: hdca, id: 123}, parameter: null } + - { ref_parameter: { src: hdca, id: 123}, parameter: "matched" } + job_internal_invalid: + - { ref_parameter: {src: hdca, id: 123} } gx_group_tag_multiple: @@ -1127,6 +1411,12 @@ gx_group_tag_multiple: request_internal_invalid: - { ref_parameter: {src: hdca, id: 123}, parameter: 8 } - { ref_parameter: {src: hdca, id: 123}, parameter: null } + job_internal_valid: + - { ref_parameter: { src: hdca, id: 123}, parameter: ['matched'] } + job_internal_invalid: + - { ref_parameter: { src: hdca, id: 123} } + - { ref_parameter: { src: hdca, id: 123}, parameter: null } + cwl_int: diff --git a/test/unit/tool_util/test_parameter_convert.py b/test/unit/tool_util/test_parameter_convert.py new file mode 100644 index 000000000000..f86750026766 --- /dev/null +++ b/test/unit/tool_util/test_parameter_convert.py @@ -0,0 +1,221 @@ +from typing import ( + Any, + Dict, + Optional, +) + +from galaxy.tool_util.parameters import ( + DataRequestInternalHda, + DataRequestUri, + decode, + dereference, + encode, + fill_static_defaults, + input_models_for_tool_source, + RequestInternalDereferencedToolState, + RequestInternalToolState, + RequestToolState, +) +from galaxy.tool_util.parser.util import parse_profile_version +from .test_parameter_test_cases import tool_source_for + +EXAMPLE_ID_1_ENCODED = "123456789abcde" +EXAMPLE_ID_1 = 13 +EXAMPLE_ID_2_ENCODED = "123456789abcd2" +EXAMPLE_ID_2 = 14 + +ID_MAP: Dict[int, str] = { + EXAMPLE_ID_1: EXAMPLE_ID_1_ENCODED, + EXAMPLE_ID_2: EXAMPLE_ID_2_ENCODED, +} + + +def test_encode_data(): + tool_source = tool_source_for("parameters/gx_data") + bundle = input_models_for_tool_source(tool_source) + request_state = RequestToolState({"parameter": {"src": "hda", "id": EXAMPLE_ID_1_ENCODED}}) + request_state.validate(bundle) + decoded_state = decode(request_state, bundle, _fake_decode) + assert decoded_state.input_state["parameter"]["src"] == "hda" + assert decoded_state.input_state["parameter"]["id"] == EXAMPLE_ID_1 + + +def test_encode_collection(): + tool_source = tool_source_for("parameters/gx_data_collection") + bundle = input_models_for_tool_source(tool_source) + request_state = RequestToolState({"parameter": {"src": "hdca", "id": EXAMPLE_ID_1_ENCODED}}) + request_state.validate(bundle) + decoded_state = decode(request_state, bundle, _fake_decode) + assert decoded_state.input_state["parameter"]["src"] == "hdca" + assert decoded_state.input_state["parameter"]["id"] == EXAMPLE_ID_1 + + +def test_encode_repeat(): + tool_source = tool_source_for("parameters/gx_repeat_data") + bundle = input_models_for_tool_source(tool_source) + request_state = RequestToolState({"parameter": [{"data_parameter": {"src": "hda", "id": EXAMPLE_ID_1_ENCODED}}]}) + request_state.validate(bundle) + decoded_state = decode(request_state, bundle, _fake_decode) + assert decoded_state.input_state["parameter"][0]["data_parameter"]["src"] == "hda" + assert decoded_state.input_state["parameter"][0]["data_parameter"]["id"] == EXAMPLE_ID_1 + + +def test_encode_section(): + tool_source = tool_source_for("parameters/gx_section_data") + bundle = input_models_for_tool_source(tool_source) + request_state = RequestToolState({"parameter": {"data_parameter": {"src": "hda", "id": EXAMPLE_ID_1_ENCODED}}}) + request_state.validate(bundle) + decoded_state = decode(request_state, bundle, _fake_decode) + assert decoded_state.input_state["parameter"]["data_parameter"]["src"] == "hda" + assert decoded_state.input_state["parameter"]["data_parameter"]["id"] == EXAMPLE_ID_1 + + +def test_encode_conditional(): + tool_source = tool_source_for("identifier_in_conditional") + bundle = input_models_for_tool_source(tool_source) + request_state = RequestToolState( + {"outer_cond": {"multi_input": False, "input1": {"src": "hda", "id": EXAMPLE_ID_1_ENCODED}}} + ) + request_state.validate(bundle) + decoded_state = decode(request_state, bundle, _fake_decode) + assert decoded_state.input_state["outer_cond"]["input1"]["src"] == "hda" + assert decoded_state.input_state["outer_cond"]["input1"]["id"] == EXAMPLE_ID_1 + + +def test_multi_data(): + tool_source = tool_source_for("parameters/gx_data_multiple") + bundle = input_models_for_tool_source(tool_source) + request_state = RequestToolState( + {"parameter": [{"src": "hda", "id": EXAMPLE_ID_1_ENCODED}, {"src": "hda", "id": EXAMPLE_ID_2_ENCODED}]} + ) + request_state.validate(bundle) + decoded_state = decode(request_state, bundle, _fake_decode) + assert decoded_state.input_state["parameter"][0]["src"] == "hda" + assert decoded_state.input_state["parameter"][0]["id"] == EXAMPLE_ID_1 + assert decoded_state.input_state["parameter"][1]["src"] == "hda" + assert decoded_state.input_state["parameter"][1]["id"] == EXAMPLE_ID_2 + + encoded_state = encode(decoded_state, bundle, _fake_encode) + assert encoded_state.input_state["parameter"][0]["src"] == "hda" + assert encoded_state.input_state["parameter"][0]["id"] == EXAMPLE_ID_1_ENCODED + assert encoded_state.input_state["parameter"][1]["src"] == "hda" + assert encoded_state.input_state["parameter"][1]["id"] == EXAMPLE_ID_2_ENCODED + + +def test_dereference(): + tool_source = tool_source_for("parameters/gx_data") + bundle = input_models_for_tool_source(tool_source) + raw_request_state = {"parameter": {"src": "url", "url": "gxfiles://mystorage/1.bed", "ext": "bed"}} + request_state = RequestInternalToolState(raw_request_state) + request_state.validate(bundle) + + exception: Optional[Exception] = None + try: + # quickly verify this request needs to be dereferenced + bad_state = RequestInternalDereferencedToolState(raw_request_state) + bad_state.validate(bundle) + except Exception as e: + exception = e + assert exception is not None + + dereferenced_state = dereference(request_state, bundle, _fake_dereference) + assert isinstance(dereferenced_state, RequestInternalDereferencedToolState) + dereferenced_state.validate(bundle) + + +def test_fill_defaults(): + with_defaults = fill_state_for({}, "parameters/gx_int") + assert with_defaults["parameter"] == 1 + with_defaults = fill_state_for({}, "parameters/gx_float") + assert with_defaults["parameter"] == 1.0 + with_defaults = fill_state_for({}, "parameters/gx_boolean") + assert with_defaults["parameter"] is False + with_defaults = fill_state_for({}, "parameters/gx_boolean_optional") + # This is False unfortunately - see comments in gx_boolean_optional XML. + assert with_defaults["parameter"] is False + with_defaults = fill_state_for({}, "parameters/gx_boolean_checked") + assert with_defaults["parameter"] is True + with_defaults = fill_state_for({}, "parameters/gx_boolean_optional_checked") + assert with_defaults["parameter"] is True + + with_defaults = fill_state_for({}, "parameters/gx_conditional_boolean") + assert with_defaults["conditional_parameter"]["test_parameter"] is False + assert with_defaults["conditional_parameter"]["boolean_parameter"] is False + + with_defaults = fill_state_for({"conditional_parameter": {}}, "parameters/gx_conditional_boolean") + assert with_defaults["conditional_parameter"]["test_parameter"] is False + assert with_defaults["conditional_parameter"]["boolean_parameter"] is False + + with_defaults = fill_state_for({}, "parameters/gx_repeat_boolean") + assert len(with_defaults["parameter"]) == 0 + with_defaults = fill_state_for({"parameter": [{}]}, "parameters/gx_repeat_boolean") + assert len(with_defaults["parameter"]) == 1 + instance_state = with_defaults["parameter"][0] + assert instance_state["boolean_parameter"] is False + + with_defaults = fill_state_for({}, "parameters/gx_repeat_boolean_min") + assert len(with_defaults["parameter"]) == 2 + assert with_defaults["parameter"][0]["boolean_parameter"] is False + assert with_defaults["parameter"][1]["boolean_parameter"] is False + with_defaults = fill_state_for({"parameter": [{}, {}]}, "parameters/gx_repeat_boolean_min") + assert len(with_defaults["parameter"]) == 2 + assert with_defaults["parameter"][0]["boolean_parameter"] is False + assert with_defaults["parameter"][1]["boolean_parameter"] is False + with_defaults = fill_state_for({"parameter": [{}]}, "parameters/gx_repeat_boolean_min") + assert with_defaults["parameter"][0]["boolean_parameter"] is False + assert with_defaults["parameter"][1]["boolean_parameter"] is False + + with_defaults = fill_state_for({}, "parameters/gx_section_boolean") + assert with_defaults["parameter"]["boolean_parameter"] is False + + with_defaults = fill_state_for({}, "parameters/gx_drill_down_exact_with_selection") + assert with_defaults["parameter"] == "aba" + + with_defaults = fill_state_for({}, "parameters/gx_hidden") + assert with_defaults["parameter"] == "moo" + + with_defaults = fill_state_for({}, "parameters/gx_genomebuild_optional") + assert with_defaults["parameter"] is None + + with_defaults = fill_state_for({}, "parameters/gx_select") + assert with_defaults["parameter"] == "--ex1" + + with_defaults = fill_state_for({}, "parameters/gx_select_optional") + assert with_defaults["parameter"] is None + + # Not ideal but matching current behavior + with_defaults = fill_state_for({}, "parameters/gx_select_multiple") + assert with_defaults["parameter"] is None + + with_defaults = fill_state_for({}, "parameters/gx_select_multiple_optional") + assert with_defaults["parameter"] is None + + # Do not fill in dynamic defaults... these require a Galaxy runtime. + with_defaults = fill_state_for({}, "remove_value", partial=True) + assert "choose_value" not in with_defaults + + with_defaults = fill_state_for( + {"single": {"src": "hda", "id": 4}}, "select_from_dataset_in_conditional", partial=True + ) + assert with_defaults["cond"]["cond"] == "single" + assert with_defaults["cond"]["inner_cond"]["inner_cond"] == "single" + + +def _fake_dereference(input: DataRequestUri) -> DataRequestInternalHda: + return DataRequestInternalHda(id=EXAMPLE_ID_1) + + +def _fake_decode(input: str) -> int: + return next(key for key, value in ID_MAP.items() if value == input) + + +def _fake_encode(input: int) -> str: + return ID_MAP[input] + + +def fill_state_for(tool_state: Dict[str, Any], tool_path: str, partial: bool = False) -> Dict[str, Any]: + tool_source = tool_source_for(tool_path) + bundle = input_models_for_tool_source(tool_source) + profile = parse_profile_version(tool_source) + internal_state = fill_static_defaults(tool_state, bundle, profile, partial=partial) + return internal_state diff --git a/test/unit/tool_util/test_parameter_specification.py b/test/unit/tool_util/test_parameter_specification.py index 6a54d78cfabf..dd54ac7276be 100644 --- a/test/unit/tool_util/test_parameter_specification.py +++ b/test/unit/tool_util/test_parameter_specification.py @@ -1,3 +1,4 @@ +import sys from functools import partial from typing import ( Any, @@ -7,6 +8,7 @@ Optional, ) +import pytest import yaml from galaxy.exceptions import RequestParameterInvalidException @@ -18,6 +20,7 @@ ToolParameterBundleModel, validate_internal_job, validate_internal_request, + validate_internal_request_dereferenced, validate_request, validate_test_case, validate_workflow_step, @@ -33,6 +36,10 @@ RawStateDict = Dict[str, Any] +if sys.version_info < (3, 8): # noqa: UP036 + pytest.skip(reason="Pydantic tool parameter models require python3.8 or higher", allow_module_level=True) + + def specification_object(): try: yaml_str = resource_string(__package__, "parameter_specification.yml") @@ -91,6 +98,8 @@ def _test_file(file: str, specification=None, parameter_bundle: Optional[ToolPar "request_invalid": _assert_requests_invalid, "request_internal_valid": _assert_internal_requests_validate, "request_internal_invalid": _assert_internal_requests_invalid, + "request_internal_dereferenced_valid": _assert_internal_requests_dereferenced_validate, + "request_internal_dereferenced_invalid": _assert_internal_requests_dereferenced_invalid, "job_internal_valid": _assert_internal_jobs_validate, "job_internal_invalid": _assert_internal_jobs_invalid, "test_case_xml_valid": _assert_test_cases_validate, @@ -153,6 +162,26 @@ def _assert_internal_request_invalid(parameters: ToolParameterBundleModel, reque ), f"Parameters {parameters} didn't result in validation error on internal request {request} as expected." +def _assert_internal_request_dereferenced_validates( + parameters: ToolParameterBundleModel, request: RawStateDict +) -> None: + try: + validate_internal_request_dereferenced(parameters, request) + except RequestParameterInvalidException as e: + raise AssertionError(f"Parameters {parameters} failed to validate dereferenced internal request {request}. {e}") + + +def _assert_internal_request_dereferenced_invalid(parameters: ToolParameterBundleModel, request: RawStateDict) -> None: + exc = None + try: + validate_internal_request_dereferenced(parameters, request) + except RequestParameterInvalidException as e: + exc = e + assert ( + exc is not None + ), f"Parameters {parameters} didn't result in validation error on dereferenced internal request {request} as expected." + + def _assert_internal_job_validates(parameters: ToolParameterBundleModel, request: RawStateDict) -> None: try: validate_internal_job(parameters, request) @@ -235,6 +264,8 @@ def _assert_workflow_step_linked_invalid( _assert_requests_invalid = partial(_for_each, _assert_request_invalid) _assert_internal_requests_validate = partial(_for_each, _assert_internal_request_validates) _assert_internal_requests_invalid = partial(_for_each, _assert_internal_request_invalid) +_assert_internal_requests_dereferenced_validate = partial(_for_each, _assert_internal_request_dereferenced_validates) +_assert_internal_requests_dereferenced_invalid = partial(_for_each, _assert_internal_request_dereferenced_invalid) _assert_internal_jobs_validate = partial(_for_each, _assert_internal_job_validates) _assert_internal_jobs_invalid = partial(_for_each, _assert_internal_job_invalid) _assert_test_cases_validate = partial(_for_each, _assert_test_case_validates) diff --git a/test/unit/tool_util/test_parameter_test_cases.py b/test/unit/tool_util/test_parameter_test_cases.py index 909c0bfd2da5..3ff5c3a0fdbf 100644 --- a/test/unit/tool_util/test_parameter_test_cases.py +++ b/test/unit/tool_util/test_parameter_test_cases.py @@ -1,11 +1,22 @@ import os import re import sys -from typing import List +from typing import ( + Any, + List, + Optional, + Tuple, +) import pytest from galaxy.tool_util.models import parse_tool +from galaxy.tool_util.parameters import ( + DataCollectionRequest, + DataRequestHda, + encode_test, + input_models_for_tool_source, +) from galaxy.tool_util.parameters.case import ( test_case_state as case_state, TestCaseStateAndWarnings, @@ -14,6 +25,8 @@ ) from galaxy.tool_util.parser.factory import get_tool_source from galaxy.tool_util.parser.interface import ( + JsonTestCollectionDefDict, + JsonTestDatasetDefDict, ToolSource, ToolSourceTest, ) @@ -59,6 +72,8 @@ ] ) +MOCK_ID = "thisisafakeid" + if sys.version_info < (3, 8): # noqa: UP036 pytest.skip(reason="Pydantic tool parameter models require python3.8 or higher", allow_module_level=True) @@ -76,7 +91,7 @@ def test_parameter_test_cases_validate(): def test_legacy_features_fail_validation_with_24_2(tmp_path): for filename in TOOLS_THAT_USE_UNQUALIFIED_PARAMETER_ACCESS + TOOLS_THAT_USE_TRUE_FALSE_VALUE_BOOLEAN_SPECIFICATION: - _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, filename) + _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, filename, index=None) # column parameters need to be indexes _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, "column_param.xml", index=2) @@ -85,7 +100,7 @@ def test_legacy_features_fail_validation_with_24_2(tmp_path): _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, "multi_select.xml", index=1) -def _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, filename: str, index: int = 0): +def _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, filename: str, index: Optional[int] = 0): test_tool_directory = functional_test_tool_directory() original_path = os.path.join(test_tool_directory, filename) new_path = tmp_path / filename @@ -96,11 +111,19 @@ def _assert_tool_test_parsing_only_fails_with_newer_profile(tmp_path, filename: with open(new_path, "w") as wf: wf.write(new_profile_contents) test_cases = list(parse_tool_test_descriptions(get_tool_source(original_path))) - assert test_cases[index].to_dict()["error"] is False + if index is not None: + assert test_cases[index].to_dict()["error"] is False + else: + # just make sure there is at least one failure... + assert not any(c.to_dict()["error"] is True for c in test_cases) + test_cases = list(parse_tool_test_descriptions(get_tool_source(new_path))) - assert ( - test_cases[index].to_dict()["error"] is True - ), f"expected {filename} to have validation failure preventing loading of tools" + if index is not None: + assert ( + test_cases[index].to_dict()["error"] is True + ), f"expected {filename} to have validation failure preventing loading of tools" + else: + assert any(c.to_dict()["error"] is True for c in test_cases) def test_validate_framework_test_tools(): @@ -125,6 +148,7 @@ def test_test_case_state_conversion(): tool_source = tool_source_for("collection_nested_test") test_cases: List[ToolSourceTest] = tool_source.parse_tests_to_dict()["tests"] state = case_state_for(tool_source, test_cases[0]) + expectations: List[Tuple[List[Any], Optional[Any]]] expectations = [ (["f1", "collection_type"], "list:paired"), (["f1", "class"], "Collection"), @@ -187,6 +211,77 @@ def test_test_case_state_conversion(): ] dict_verify_each(state.tool_state.input_state, expectations) + index = 2 + tool_source = tool_source_for("filter_param_value_ref_attribute") + test_cases = tool_source.parse_tests_to_dict()["tests"] + state = case_state_for(tool_source, test_cases[index]) + expectations = [ + (["data_mult", 0, "path"], "1.bed"), + (["data_mult", 0, "dbkey"], "hg19"), + (["data_mult", 1, "path"], "2.bed"), + (["data_mult", 0, "dbkey"], "hg19"), + ] + dict_verify_each(state.tool_state.input_state, expectations) + + index = 1 + tool_source = tool_source_for("expression_pick_larger_file") + test_cases = tool_source.parse_tests_to_dict()["tests"] + state = case_state_for(tool_source, test_cases[index]) + expectations = [ + (["input1", "path"], "simple_line_alternative.txt"), + (["input2"], None), + ] + dict_verify_each(state.tool_state.input_state, expectations) + + index = 2 + state = case_state_for(tool_source, test_cases[index]) + expectations = [ + (["input1"], None), + (["input2", "path"], "simple_line.txt"), + ] + dict_verify_each(state.tool_state.input_state, expectations) + + index = 0 + tool_source = tool_source_for("composite_shapefile") + test_cases = tool_source.parse_tests_to_dict()["tests"] + state = case_state_for(tool_source, test_cases[index]) + expectations = [ + (["input", "filetype"], "shp"), + (["input", "composite_data", 0], "shapefile/shapefile.shp"), + ] + dict_verify_each(state.tool_state.input_state, expectations) + + +def test_convert_to_requests(): + tools = [ + "parameters/gx_drill_down_recurse_multiple", + "parameters/gx_conditional_select", + "expression_pick_larger_file", + "identifier_in_conditional", + "column_param_list", + "composite_shapefile", + ] + for tool_path in tools: + tool_source = tool_source_for(tool_path) + parameters = input_models_for_tool_source(tool_source) + parsed_tool = parse_tool(tool_source) + profile = tool_source.parse_profile() + test_cases: List[ToolSourceTest] = tool_source.parse_tests_to_dict()["tests"] + + def mock_adapt_datasets(input: JsonTestDatasetDefDict) -> DataRequestHda: + return DataRequestHda(src="hda", id=MOCK_ID) + + def mock_adapt_collections(input: JsonTestCollectionDefDict) -> DataCollectionRequest: + return DataCollectionRequest(src="hdca", id=MOCK_ID) + + for test_case in test_cases: + if test_case.get("expect_failure"): + continue + test_case_state_and_warnings = case_state(test_case, parsed_tool.inputs, profile) + test_case_state = test_case_state_and_warnings.tool_state + + encode_test(test_case_state, parameters, mock_adapt_datasets, mock_adapt_collections) + def _validate_path(tool_path: str): tool_source = get_tool_source(tool_path) diff --git a/test/unit/tool_util/util.py b/test/unit/tool_util/util.py index b03aff0fc15f..27f0ee677fbd 100644 --- a/test/unit/tool_util/util.py +++ b/test/unit/tool_util/util.py @@ -19,7 +19,7 @@ def dict_verify_each(target_dict: dict, expectations: List[Any]): dict_verify(target_dict, path, expectation) -def dict_verify(target_dict: dict, expectation_path: List[str], expectation: Any): +def dict_verify(target_dict: dict, expectation_path: List[Any], expectation: Any): rest = target_dict for path_part in expectation_path: rest = rest[path_part] From 3cf8db82247eb7bd220975b6ee3b34d832907d05 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Mon, 23 Sep 2024 09:14:25 -0400 Subject: [PATCH 06/14] Tool Request API - DB Layer. --- lib/galaxy/model/__init__.py | 28 ++++++++++++++++++++++++++++ lib/galaxy/schema/schema.py | 16 ++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 36aee6b45441..848de080d074 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -176,6 +176,7 @@ DatasetValidatedState, InvocationsStateCounts, JobState, + ToolRequestState, ) from galaxy.schema.workflow.comments import WorkflowCommentModel from galaxy.security import get_permitted_actions @@ -1328,6 +1329,30 @@ def __init__(self, user, token=None): self.expiration_time = now() + timedelta(hours=24) +class ToolSource(Base, Dictifiable, RepresentById): + __tablename__ = "tool_source" + + id: Mapped[int] = mapped_column(primary_key=True) + hash: Mapped[Optional[str]] = mapped_column(Unicode(255)) + source: Mapped[dict] = mapped_column(JSONType) + + +class ToolRequest(Base, Dictifiable, RepresentById): + __tablename__ = "tool_request" + + states: TypeAlias = ToolRequestState + + id: Mapped[int] = mapped_column(primary_key=True) + tool_source_id: Mapped[int] = mapped_column(ForeignKey("tool_source.id"), index=True) + history_id: Mapped[Optional[int]] = mapped_column(ForeignKey("history.id"), index=True) + request: Mapped[dict] = mapped_column(JSONType) + state: Mapped[Optional[str]] = mapped_column(TrimmedString(32), index=True) + state_message: Mapped[Optional[str]] = mapped_column(JSONType, index=True) + + tool_source: Mapped["ToolSource"] = relationship() + history: Mapped[Optional["History"]] = relationship(back_populates="tool_requests") + + class DynamicTool(Base, Dictifiable, RepresentById): __tablename__ = "dynamic_tool" @@ -1454,7 +1479,9 @@ class Job(Base, JobLike, UsesCreateAndUpdateTime, Dictifiable, Serializable): handler: Mapped[Optional[str]] = mapped_column(TrimmedString(255), index=True) preferred_object_store_id: Mapped[Optional[str]] = mapped_column(String(255)) object_store_id_overrides: Mapped[Optional[STR_TO_STR_DICT]] = mapped_column(JSONType) + tool_request_id: Mapped[Optional[int]] = mapped_column(ForeignKey("tool_request.id"), index=True) + tool_request: Mapped[Optional["ToolRequest"]] = relationship() user: Mapped[Optional["User"]] = relationship() galaxy_session: Mapped[Optional["GalaxySession"]] = relationship() history: Mapped[Optional["History"]] = relationship(back_populates="jobs") @@ -3177,6 +3204,7 @@ class History(Base, HasTags, Dictifiable, UsesAnnotations, HasName, Serializable ) user: Mapped[Optional["User"]] = relationship(back_populates="histories") jobs: Mapped[List["Job"]] = relationship(back_populates="history", cascade_backrefs=False) + tool_requests: Mapped[List["ToolRequest"]] = relationship(back_populates="history") update_time = column_property( select(func.max(HistoryAudit.update_time)).where(HistoryAudit.history_id == id).scalar_subquery(), diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 9e461bd5655d..2e585087c318 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -3737,6 +3737,22 @@ class AsyncTaskResultSummary(Model): ) +ToolRequestIdField = Field(title="ID", description="Encoded ID of the role") + + +class ToolRequestState(str, Enum): + NEW = "new" + SUBMITTED = "submitted" + FAILED = "failed" + + +class ToolRequestModel(Model): + id: EncodedDatabaseIdField = ToolRequestIdField + request: Dict[str, Any] + state: ToolRequestState + state_message: Optional[str] + + class AsyncFile(Model): storage_request_id: UUID task: AsyncTaskResultSummary From 80aaff8d2325662f22b2a9c877f8c33c0cbb4206 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Fri, 20 Sep 2024 10:31:22 -0400 Subject: [PATCH 07/14] Plumbing for dereferencing URLs into datasets. --- lib/galaxy/managers/hdas.py | 15 ++++++++ lib/galaxy/model/deferred.py | 15 +++++--- lib/galaxy/model/dereference.py | 46 ++++++++++++++++++++++++ test/unit/data/test_dereference.py | 57 ++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 lib/galaxy/model/dereference.py create mode 100644 test/unit/data/test_dereference.py diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 3f38381ce4f0..2b6d20388398 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -44,6 +44,7 @@ taggable, users, ) +from galaxy.managers.context import ProvidesHistoryContext from galaxy.model import ( Job, JobStateHistory, @@ -51,6 +52,7 @@ ) from galaxy.model.base import transaction from galaxy.model.deferred import materializer_factory +from galaxy.model.dereference import dereference_to_model from galaxy.schema.schema import DatasetSourceType from galaxy.schema.storage_cleaner import ( CleanableItemsSummary, @@ -68,6 +70,7 @@ MinimalManagerApp, StructuredApp, ) +from galaxy.tool_util.parameters import DataRequestUri from galaxy.util.compression_utils import get_fileobj log = logging.getLogger(__name__) @@ -343,6 +346,18 @@ def _set_permissions(self, trans, hda, role_ids_dict): raise exceptions.RequestParameterInvalidException(error) +def dereference_input( + trans: ProvidesHistoryContext, data_request: DataRequestUri, history: Optional[model.History] = None +) -> model.HistoryDatasetAssociation: + target_history = history or trans.history + hda = dereference_to_model(trans.sa_session, trans.user, target_history, data_request) + permissions = trans.app.security_agent.history_get_default_permissions(target_history) + trans.app.security_agent.set_all_dataset_permissions(hda.dataset, permissions, new=True, flush=False) + with transaction(trans.sa_session): + trans.sa_session.commit() + return hda + + class HDAStorageCleanerManager(base.StorageCleanerManager): def __init__(self, hda_manager: HDAManager, dataset_manager: datasets.DatasetManager): self.hda_manager = hda_manager diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py index a241d1418c1c..042b6879cd2a 100644 --- a/lib/galaxy/model/deferred.py +++ b/lib/galaxy/model/deferred.py @@ -95,6 +95,7 @@ def ensure_materialized( self, dataset_instance: Union[HistoryDatasetAssociation, LibraryDatasetDatasetAssociation], target_history: Optional[History] = None, + in_place: bool = False, ) -> HistoryDatasetAssociation: """Create a new detached dataset instance from the supplied instance. @@ -148,10 +149,16 @@ def ensure_materialized( history = dataset_instance.history except DetachedInstanceError: history = None - materialized_dataset_instance = HistoryDatasetAssociation( - create_dataset=False, # is the default but lets make this really clear... - history=history, - ) + + materialized_dataset_instance: HistoryDatasetAssociation + if not in_place: + materialized_dataset_instance = HistoryDatasetAssociation( + create_dataset=False, # is the default but lets make this really clear... + history=history, + ) + else: + assert isinstance(dataset_instance, HistoryDatasetAssociation) + materialized_dataset_instance = cast(HistoryDatasetAssociation, dataset_instance) if attached: sa_session = self._sa_session if sa_session is None: diff --git a/lib/galaxy/model/dereference.py b/lib/galaxy/model/dereference.py new file mode 100644 index 000000000000..ef84cea67b3d --- /dev/null +++ b/lib/galaxy/model/dereference.py @@ -0,0 +1,46 @@ +import os.path +from typing import List + +from galaxy.model import ( + DatasetSource, + DatasetSourceHash, + HistoryDatasetAssociation, + TransformAction, +) +from galaxy.tool_util.parameters import DataRequestUri + + +def dereference_to_model(sa_session, user, history, data_request_uri: DataRequestUri) -> HistoryDatasetAssociation: + name = data_request_uri.name or os.path.basename(data_request_uri.url) + dbkey = data_request_uri.dbkey or "?" + hda = HistoryDatasetAssociation( + name=name, + extension=data_request_uri.ext, + dbkey=dbkey, # TODO + history=history, + create_dataset=True, + sa_session=sa_session, + ) + hda.state = hda.states.DEFERRED + dataset_source = DatasetSource() + dataset_source.source_uri = data_request_uri.url + hashes = [] + for dataset_hash in data_request_uri.hashes or []: + hash_object = DatasetSourceHash() + hash_object.hash_function = dataset_hash.hash_function + hash_object.hash_value = dataset_hash.hash_value + hashes.append(hash_object) + dataset_source.hashes = hashes + hda.dataset.sources = [dataset_source] + transform: List[TransformAction] = [] + if data_request_uri.space_to_tab: + transform.append({"action": "space_to_tab"}) + elif data_request_uri.to_posix_lines: + transform.append({"action": "to_posix_lines"}) + if len(transform) > 0: + dataset_source.transform = transform + + sa_session.add(hda) + sa_session.add(dataset_source) + history.add_dataset(hda, genome_build=dbkey, quota=False) + return hda diff --git a/test/unit/data/test_dereference.py b/test/unit/data/test_dereference.py new file mode 100644 index 000000000000..8448b731a081 --- /dev/null +++ b/test/unit/data/test_dereference.py @@ -0,0 +1,57 @@ +from base64 import b64encode + +from galaxy.model.dereference import dereference_to_model +from galaxy.tool_util.parameters import DataRequestUri +from .model.test_model_store import setup_fixture_context_with_history + +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +TEST_URI = "gxfiles://test/1.bed" +TEST_BASE64_URI = f"base64://{B64_FOR_1_2_3}" + + +def test_dereference(): + app, sa_session, user, history = setup_fixture_context_with_history() + uri_request = DataRequestUri(url=TEST_URI, ext="bed") + hda = dereference_to_model(sa_session, user, history, uri_request) + assert hda.name == "1.bed" + assert hda.dataset.sources[0].source_uri == TEST_URI + assert hda.ext == "bed" + + +def test_dereference_dbkey(): + app, sa_session, user, history = setup_fixture_context_with_history() + uri_request = DataRequestUri(url=TEST_URI, ext="bed", dbkey="hg19") + hda = dereference_to_model(sa_session, user, history, uri_request) + assert hda.name == "1.bed" + assert hda.dataset.sources[0].source_uri == TEST_URI + assert hda.dbkey == "hg19" + + +def test_dereference_md5(): + app, sa_session, user, history = setup_fixture_context_with_history() + md5 = "f2b33fb7b3d0eb95090a16060e6a24f9" + uri_request = DataRequestUri.model_validate( + { + "url": TEST_BASE64_URI, + "name": "foobar.txt", + "ext": "txt", + "hashes": [{"hash_function": "MD5", "hash_value": md5}], + } + ) + hda = dereference_to_model(sa_session, user, history, uri_request) + assert hda.name == "foobar.txt" + assert hda.dataset.sources[0].source_uri == TEST_BASE64_URI + assert hda.dataset.sources[0].hashes[0] + assert hda.dataset.sources[0].hashes[0].hash_function == "MD5" + assert hda.dataset.sources[0].hashes[0].hash_value == md5 + + +def test_dereference_to_posix(): + app, sa_session, user, history = setup_fixture_context_with_history() + uri_request = DataRequestUri.model_validate( + {"url": TEST_BASE64_URI, "name": "foobar.txt", "ext": "txt", "space_to_tab": True} + ) + hda = dereference_to_model(sa_session, user, history, uri_request) + assert hda.name == "foobar.txt" + assert hda.dataset.sources[0].source_uri == TEST_BASE64_URI + assert hda.dataset.sources[0].transform[0]["action"] == "space_to_tab" From 2fe88a05ea07e9c0dce85f0dbf894bf2c87f6d7e Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 25 Sep 2024 10:15:19 -0400 Subject: [PATCH 08/14] Allow workflows to consumer deferred {src: url} dicts.. --- client/src/api/schema/schema.ts | 9 ++++- lib/galaxy/config/schemas/config_schema.yml | 15 ++++++++ lib/galaxy/jobs/handler.py | 8 ++++- lib/galaxy/model/__init__.py | 28 ++++++++++++++- lib/galaxy/schema/invocation.py | 1 + lib/galaxy/workflow/run.py | 7 +++- lib/galaxy/workflow/run_request.py | 39 +++++++++++++++++---- lib/galaxy/workflow/scheduling_manager.py | 28 +++++++++++++-- lib/galaxy_test/api/test_workflows.py | 20 +++++++++-- lib/galaxy_test/base/populators.py | 27 ++++++++++++-- 10 files changed, 165 insertions(+), 17 deletions(-) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index af1b78aa5656..e14991a844ac 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -11856,7 +11856,14 @@ export interface components { * InvocationState * @enum {string} */ - InvocationState: "new" | "ready" | "scheduled" | "cancelled" | "cancelling" | "failed"; + InvocationState: + | "new" + | "requires_materialization" + | "ready" + | "scheduled" + | "cancelled" + | "cancelling" + | "failed"; /** * InvocationStep * @description Information about workflow invocation step diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index 5098d923d8ca..afcb6ec9c3ed 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -3738,6 +3738,21 @@ mapping: Optional configuration file similar to `job_config_file` to specify which Galaxy processes should schedule workflows. + workflow_scheduling_separate_materialization_iteration: + type: bool + default: false + required: false + desc: | + Workflows launched with URI/URL inputs that are not marked as 'deferred' + are "materialized" (or undeferred) by the workflow scheduler. This might be + a lengthy process. Setting this to 'True' will place the invocation back in + the queue after materialization before scheduling the workflow so it is less + likely to starve other workflow scheduling. Ideally, Galaxy would allow more + fine grain control of handlers but until then, this provides a way to tip the + balance between "doing more work" and "being more fair". The default here is + pretty arbitrary - it has been to False to optimize Galaxy for automated, + single user applications where "fairness" is mostly irrelevant. + cache_user_job_count: type: bool default: false diff --git a/lib/galaxy/jobs/handler.py b/lib/galaxy/jobs/handler.py index 0213a797aab4..e66065aab2e4 100644 --- a/lib/galaxy/jobs/handler.py +++ b/lib/galaxy/jobs/handler.py @@ -130,7 +130,13 @@ def setup_query(self): if self.grab_model is model.Job: grab_condition = self.grab_model.state == self.grab_model.states.NEW elif self.grab_model is model.WorkflowInvocation: - grab_condition = self.grab_model.state.in_((self.grab_model.states.NEW, self.grab_model.states.CANCELLING)) + grab_condition = self.grab_model.state.in_( + ( + self.grab_model.states.NEW, + self.grab_model.states.REQUIRES_MATERIALIZATION, + self.grab_model.states.CANCELLING, + ) + ) else: raise NotImplementedError(f"Grabbing {self.grab_model.__name__} not implemented") subq = ( diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 848de080d074..3df173304156 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -18,6 +18,7 @@ import string from collections import defaultdict from collections.abc import Callable +from dataclasses import dataclass from datetime import ( datetime, timedelta, @@ -8546,6 +8547,12 @@ class StoredWorkflowMenuEntry(Base, RepresentById): ) +@dataclass +class InputWithRequest: + input: Any + request: Dict[str, Any] + + class WorkflowInvocation(Base, UsesCreateAndUpdateTime, Dictifiable, Serializable): __tablename__ = "workflow_invocation" @@ -8777,6 +8784,7 @@ def poll_active_workflow_ids(engine, scheduler=None, handler=None): and_conditions = [ or_( WorkflowInvocation.state == WorkflowInvocation.states.NEW, + WorkflowInvocation.state == WorkflowInvocation.states.REQUIRES_MATERIALIZATION, WorkflowInvocation.state == WorkflowInvocation.states.READY, WorkflowInvocation.state == WorkflowInvocation.states.CANCELLING, ), @@ -8868,6 +8876,14 @@ def input_associations(self): inputs.append(input_dataset_collection_assoc) return inputs + def inputs_requiring_materialization(self): + hdas_to_materialize = [] + for input_dataset_assoc in self.input_datasets: + request = input_dataset_assoc.request + if request and not request.get("deferred", False): + hdas_to_materialize.append(input_dataset_assoc.dataset) + return hdas_to_materialize + def _serialize(self, id_encoder, serialization_options): invocation_attrs = dict_for(self) invocation_attrs["state"] = self.state @@ -9030,20 +9046,28 @@ def attach_step(request_to_content): else: request_to_content.workflow_step = step + request: Optional[Dict[str, Any]] = None + if isinstance(content, InputWithRequest): + request = content.request + content = content.input + history_content_type = getattr(content, "history_content_type", None) if history_content_type == "dataset": request_to_content = WorkflowRequestToInputDatasetAssociation() request_to_content.dataset = content + request_to_content.request = request attach_step(request_to_content) self.input_datasets.append(request_to_content) elif history_content_type == "dataset_collection": request_to_content = WorkflowRequestToInputDatasetCollectionAssociation() request_to_content.dataset_collection = content + request_to_content.request = request attach_step(request_to_content) self.input_dataset_collections.append(request_to_content) else: request_to_content = WorkflowRequestInputStepParameter() request_to_content.parameter_value = content + request_to_content.request = request attach_step(request_to_content) self.input_step_parameters.append(request_to_content) @@ -9467,6 +9491,7 @@ class WorkflowRequestToInputDatasetAssociation(Base, Dictifiable, Serializable): workflow_invocation_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow_invocation.id"), index=True) workflow_step_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow_step.id")) dataset_id: Mapped[Optional[int]] = mapped_column(ForeignKey("history_dataset_association.id"), index=True) + request: Mapped[Optional[Dict]] = mapped_column(JSONType) workflow_step: Mapped[Optional["WorkflowStep"]] = relationship() dataset: Mapped[Optional["HistoryDatasetAssociation"]] = relationship() @@ -9502,6 +9527,7 @@ class WorkflowRequestToInputDatasetCollectionAssociation(Base, Dictifiable, Seri workflow_invocation: Mapped[Optional["WorkflowInvocation"]] = relationship( back_populates="input_dataset_collections" ) + request: Mapped[Optional[Dict]] = mapped_column(JSONType) history_content_type = "dataset_collection" dict_collection_visible_keys = ["id", "workflow_invocation_id", "workflow_step_id", "dataset_collection_id", "name"] @@ -9525,6 +9551,7 @@ class WorkflowRequestInputStepParameter(Base, Dictifiable, Serializable): workflow_invocation_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow_invocation.id"), index=True) workflow_step_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow_step.id")) parameter_value: Mapped[Optional[bytes]] = mapped_column(MutableJSONType) + request: Mapped[Optional[Dict]] = mapped_column(JSONType) workflow_step: Mapped[Optional["WorkflowStep"]] = relationship() workflow_invocation: Mapped[Optional["WorkflowInvocation"]] = relationship(back_populates="input_step_parameters") @@ -9548,7 +9575,6 @@ class WorkflowInvocationOutputDatasetAssociation(Base, Dictifiable, Serializable workflow_step_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow_step.id"), index=True) dataset_id: Mapped[Optional[int]] = mapped_column(ForeignKey("history_dataset_association.id"), index=True) workflow_output_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow_output.id"), index=True) - workflow_invocation: Mapped[Optional["WorkflowInvocation"]] = relationship(back_populates="output_datasets") workflow_step: Mapped[Optional["WorkflowStep"]] = relationship() dataset: Mapped[Optional["HistoryDatasetAssociation"]] = relationship() diff --git a/lib/galaxy/schema/invocation.py b/lib/galaxy/schema/invocation.py index 4d5ce80548e8..1040aac4d1b4 100644 --- a/lib/galaxy/schema/invocation.py +++ b/lib/galaxy/schema/invocation.py @@ -281,6 +281,7 @@ class InvocationMessageResponseModel(RootModel): class InvocationState(str, Enum): NEW = "new" # Brand new workflow invocation... maybe this should be same as READY + REQUIRES_MATERIALIZATION = "requires_materialization" # an otherwise NEW or READY workflow that requires inputs to be materialized (undeferred) READY = "ready" # Workflow ready for another iteration of scheduling. SCHEDULED = "scheduled" # Workflow has been scheduled. CANCELLED = "cancelled" diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py index 5ebb586fd56f..5619a4547b8f 100644 --- a/lib/galaxy/workflow/run.py +++ b/lib/galaxy/workflow/run.py @@ -144,7 +144,12 @@ def queue_invoke( ) workflow_invocation = workflow_run_config_to_request(trans, workflow_run_config, workflow) workflow_invocation.workflow = workflow - return trans.app.workflow_scheduling_manager.queue(workflow_invocation, request_params, flush=flush) + initial_state = model.WorkflowInvocation.states.NEW + if workflow_run_config.requires_materialization: + initial_state = model.WorkflowInvocation.states.REQUIRES_MATERIALIZATION + return trans.app.workflow_scheduling_manager.queue( + workflow_invocation, request_params, flush=flush, initial_state=initial_state + ) class WorkflowInvoker: diff --git a/lib/galaxy/workflow/run_request.py b/lib/galaxy/workflow/run_request.py index 516c85d07eb1..24b81fff849d 100644 --- a/lib/galaxy/workflow/run_request.py +++ b/lib/galaxy/workflow/run_request.py @@ -10,11 +10,13 @@ ) from galaxy import exceptions +from galaxy.managers.hdas import dereference_input from galaxy.model import ( EffectiveOutput, History, HistoryDatasetAssociation, HistoryDatasetCollectionAssociation, + InputWithRequest, LibraryDataset, LibraryDatasetDatasetAssociation, WorkflowInvocation, @@ -25,6 +27,7 @@ ensure_object_added_to_session, transaction, ) +from galaxy.tool_util.parameters import DataRequestUri from galaxy.tools.parameters.meta import expand_workflow_inputs from galaxy.workflow.resources import get_resource_mapper_function @@ -57,6 +60,9 @@ class WorkflowRunConfig: :param inputs: Map from step ids to dict's containing HDA for these steps. :type inputs: dict + :param requires_materialization: True if an input requires materialization before + the workflow is scheduled. + :param inputs_by: How inputs maps to inputs (datasets/collections) to workflows steps - by unencoded database id ('step_id'), index in workflow 'step_index' (independent of database), or by input name for @@ -78,6 +84,7 @@ def __init__( copy_inputs_to_history: bool = False, use_cached_job: bool = False, resource_params: Optional[Dict[int, Any]] = None, + requires_materialization: bool = False, preferred_object_store_id: Optional[str] = None, preferred_outputs_object_store_id: Optional[str] = None, preferred_intermediate_object_store_id: Optional[str] = None, @@ -91,6 +98,7 @@ def __init__( self.resource_params = resource_params or {} self.allow_tool_state_corrections = allow_tool_state_corrections self.use_cached_job = use_cached_job + self.requires_materialization = requires_materialization self.preferred_object_store_id = preferred_object_store_id self.preferred_outputs_object_store_id = preferred_outputs_object_store_id self.preferred_intermediate_object_store_id = preferred_intermediate_object_store_id @@ -310,7 +318,7 @@ def build_workflow_run_configs( legacy = payload.get("legacy", False) already_normalized = payload.get("parameters_normalized", False) raw_parameters = payload.get("parameters", {}) - + requires_materialization: bool = False run_configs = [] unexpanded_param_map = _normalize_step_parameters( workflow.steps, raw_parameters, legacy=legacy, already_normalized=already_normalized @@ -368,16 +376,22 @@ def build_workflow_run_configs( raise exceptions.RequestParameterInvalidException( f"Not input source type defined for input '{input_dict}'." ) - if "id" not in input_dict: - raise exceptions.RequestParameterInvalidException(f"Not input id defined for input '{input_dict}'.") + input_source = input_dict["src"] + if "id" not in input_dict and input_source != "url": + raise exceptions.RequestParameterInvalidException(f"No input id defined for input '{input_dict}'.") + elif input_source == "url" and not input_dict.get("url"): + raise exceptions.RequestParameterInvalidException( + f"Supplied 'url' is empty or absent for input '{input_dict}'." + ) if "content" in input_dict: raise exceptions.RequestParameterInvalidException( f"Input cannot specify explicit 'content' attribute {input_dict}'." ) - input_source = input_dict["src"] - input_id = input_dict["id"] + input_id = input_dict.get("id") try: + added_to_history = False if input_source == "ldda": + assert input_id ldda = trans.sa_session.get(LibraryDatasetDatasetAssociation, trans.security.decode_id(input_id)) assert ldda assert trans.user_is_admin or trans.app.security_agent.can_access_dataset( @@ -385,6 +399,7 @@ def build_workflow_run_configs( ) content = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif input_source == "ld": + assert input_id library_dataset = trans.sa_session.get(LibraryDataset, trans.security.decode_id(input_id)) assert library_dataset ldda = library_dataset.library_dataset_dataset_association @@ -394,6 +409,7 @@ def build_workflow_run_configs( ) content = ldda.to_history_dataset_association(history, add_to_history=add_to_history) elif input_source == "hda": + assert input_id # Get dataset handle, add to dict and history if necessary content = trans.sa_session.get(HistoryDatasetAssociation, trans.security.decode_id(input_id)) assert trans.user_is_admin or trans.app.security_agent.can_access_dataset( @@ -401,11 +417,21 @@ def build_workflow_run_configs( ) elif input_source == "hdca": content = app.dataset_collection_manager.get_dataset_collection_instance(trans, "history", input_id) + elif input_source == "url": + data_request = DataRequestUri.model_validate(input_dict) + hda: HistoryDatasetAssociation = dereference_input(trans, data_request, history) + added_to_history = True + content = InputWithRequest( + input=hda, + request=data_request.model_dump(mode="json"), + ) + if not data_request.deferred: + requires_materialization = True else: raise exceptions.RequestParameterInvalidException( f"Unknown workflow input source '{input_source}' specified." ) - if add_to_history and content.history != history: + if not added_to_history and add_to_history and content.history != history: if isinstance(content, HistoryDatasetCollectionAssociation): content = content.copy(element_destination=history, flush=False) else: @@ -474,6 +500,7 @@ def build_workflow_run_configs( allow_tool_state_corrections=allow_tool_state_corrections, use_cached_job=use_cached_job, resource_params=resource_params, + requires_materialization=requires_materialization, preferred_object_store_id=preferred_object_store_id, preferred_outputs_object_store_id=preferred_outputs_object_store_id, preferred_intermediate_object_store_id=preferred_intermediate_object_store_id, diff --git a/lib/galaxy/workflow/scheduling_manager.py b/lib/galaxy/workflow/scheduling_manager.py index 8d31130bad21..d01c85d5d6bc 100644 --- a/lib/galaxy/workflow/scheduling_manager.py +++ b/lib/galaxy/workflow/scheduling_manager.py @@ -1,11 +1,17 @@ import os from functools import partial +from typing import Optional import galaxy.workflow.schedulers from galaxy import model from galaxy.exceptions import HandlerAssignmentError from galaxy.jobs.handler import InvocationGrabber from galaxy.model.base import transaction +from galaxy.schema.invocation import InvocationState +from galaxy.schema.tasks import ( + MaterializeDatasetInstanceTaskRequest, + RequestUser, +) from galaxy.util import plugin_config from galaxy.util.custom_logging import get_logger from galaxy.util.monitors import Monitors @@ -154,8 +160,9 @@ def shutdown(self): if exception: raise exception - def queue(self, workflow_invocation, request_params, flush=True): - workflow_invocation.set_state(model.WorkflowInvocation.states.NEW) + def queue(self, workflow_invocation, request_params, flush=True, initial_state: Optional[InvocationState] = None): + initial_state = initial_state or model.WorkflowInvocation.states.NEW + workflow_invocation.set_state(initial_state) workflow_invocation.scheduler = request_params.get("scheduler", None) or self.default_scheduler_id sa_session = self.app.model.context sa_session.add(workflow_invocation) @@ -329,6 +336,23 @@ def __schedule(self, workflow_scheduler_id, workflow_scheduler): def __attempt_schedule(self, invocation_id, workflow_scheduler): with self.app.model.context() as session: workflow_invocation = session.get(model.WorkflowInvocation, invocation_id) + if workflow_invocation.state == workflow_invocation.states.REQUIRES_MATERIALIZATION: + hdas_to_materialize = workflow_invocation.inputs_requiring_materialization() + for hda in hdas_to_materialize: + user = RequestUser(user_id=workflow_invocation.history.user_id) + task_request = MaterializeDatasetInstanceTaskRequest( + user=user, + history_id=workflow_invocation.history.id, + source="hda", + content=hda.id, + ) + self.app.hda_manager.materialize(task_request, in_place=True) + # place back into ready and let it proceed normally on next iteration? + workflow_invocation.set_state(model.WorkflowInvocation.states.READY) + session.add(workflow_invocation) + session.commit() + if self.app.config.workflow_scheduling_separate_materialization_iteration: + return None try: if workflow_invocation.state == workflow_invocation.states.CANCELLING: workflow_invocation.cancel_invocation_steps() diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index b6a5ec037bea..5ffb1ca347e8 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -1533,11 +1533,27 @@ def test_run_workflow_by_name(self): def test_run_workflow(self): self.__run_cat_workflow(inputs_by="step_id") - def __run_cat_workflow(self, inputs_by): + @skip_without_tool("cat1") + def test_run_workflow_by_deferred_url(self): + with self.dataset_populator.test_history() as history_id: + self.__run_cat_workflow(inputs_by="deferred_url", history_id=history_id) + input_dataset_details = self.dataset_populator.get_history_dataset_details(history_id, hid=1) + assert input_dataset_details["state"] == "deferred" + + @skip_without_tool("cat1") + def test_run_workflow_by_url(self): + with self.dataset_populator.test_history() as history_id: + self.__run_cat_workflow(inputs_by="url", history_id=history_id) + input_dataset_details = self.dataset_populator.get_history_dataset_details(history_id, hid=1) + assert input_dataset_details["state"] == "ok" + + def __run_cat_workflow(self, inputs_by, history_id: Optional[str] = None): workflow = self.workflow_populator.load_workflow(name="test_for_run") workflow["steps"]["0"]["uuid"] = str(uuid4()) workflow["steps"]["1"]["uuid"] = str(uuid4()) - workflow_request, _, workflow_id = self._setup_workflow_run(workflow, inputs_by=inputs_by) + workflow_request, _, workflow_id = self._setup_workflow_run( + workflow, inputs_by=inputs_by, history_id=history_id + ) invocation_id = self.workflow_populator.invoke_workflow_and_wait(workflow_id, request=workflow_request).json()[ "id" ] diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 5bbd28dbbc77..81afbbb28062 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -2155,23 +2155,33 @@ def setup_workflow_run( workflow_id = self.create_workflow(workflow) if not history_id: history_id = self.dataset_populator.new_history() - hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3", wait=True) - hda2 = self.dataset_populator.new_dataset(history_id, content="4 5 6", wait=True) + hda1: Optional[Dict[str, Any]] = None + hda2: Optional[Dict[str, Any]] = None + label_map: Optional[Dict[str, Any]] = None + if inputs_by != "url": + hda1 = self.dataset_populator.new_dataset(history_id, content="1 2 3", wait=True) + hda2 = self.dataset_populator.new_dataset(history_id, content="4 5 6", wait=True) + label_map = {"WorkflowInput1": ds_entry(hda1), "WorkflowInput2": ds_entry(hda2)} workflow_request = dict( history=f"hist_id={history_id}", ) - label_map = {"WorkflowInput1": ds_entry(hda1), "WorkflowInput2": ds_entry(hda2)} if inputs_by == "step_id": + assert label_map ds_map = self.build_ds_map(workflow_id, label_map) workflow_request["ds_map"] = ds_map elif inputs_by == "step_index": + assert hda1 + assert hda2 index_map = {"0": ds_entry(hda1), "1": ds_entry(hda2)} workflow_request["inputs"] = json.dumps(index_map) workflow_request["inputs_by"] = "step_index" elif inputs_by == "name": + assert label_map workflow_request["inputs"] = json.dumps(label_map) workflow_request["inputs_by"] = "name" elif inputs_by in ["step_uuid", "uuid_implicitly"]: + assert hda1 + assert hda2 assert workflow, f"Must specify workflow for this inputs_by {inputs_by} parameter value" uuid_map = { workflow["steps"]["0"]["uuid"]: ds_entry(hda1), @@ -2180,6 +2190,16 @@ def setup_workflow_run( workflow_request["inputs"] = json.dumps(uuid_map) if inputs_by == "step_uuid": workflow_request["inputs_by"] = "step_uuid" + elif inputs_by in ["url", "deferred_url"]: + input_b64_1 = base64.b64encode(b"1 2 3").decode("utf-8") + input_b64_2 = base64.b64encode(b"4 5 6").decode("utf-8") + deferred = inputs_by == "deferred_url" + inputs = { + "WorkflowInput1": {"src": "url", "url": f"base64://{input_b64_1}", "ext": "txt", "deferred": deferred}, + "WorkflowInput2": {"src": "url", "url": f"base64://{input_b64_2}", "ext": "txt", "deferred": deferred}, + } + workflow_request["inputs"] = json.dumps(inputs) + workflow_request["inputs_by"] = "name" return workflow_request, history_id, workflow_id @@ -3246,6 +3266,7 @@ def get_state(): "queued", "new", "ready", + "requires_materialization", "stop", "stopped", "setting_metadata", From 660f78fe3704d492d9279cd8beb0866162e857ed Mon Sep 17 00:00:00 2001 From: John Chilton Date: Fri, 6 Sep 2024 16:52:15 -0400 Subject: [PATCH 09/14] Implement workflow landings. Includes a lot of plumbing for tools but not the APIs - they are not ready to go yet. --- client/src/api/landings.ts | 4 + .../Form/Elements/FormData/FormDataUri.vue | 66 +++++++ client/src/components/Form/FormElement.vue | 15 ++ client/src/components/Landing/ToolLanding.vue | 5 + .../components/Landing/WorkflowLanding.vue | 61 ++++++ .../components/Workflow/Run/WorkflowRun.vue | 3 + .../Workflow/Run/WorkflowRunFormSimple.vue | 11 +- client/src/entry/analysis/router.js | 12 ++ lib/galaxy/exceptions/__init__.py | 5 + lib/galaxy/exceptions/error_codes.json | 5 + lib/galaxy/managers/landing.py | 172 +++++++++++++++++ lib/galaxy/model/__init__.py | 43 ++++- lib/galaxy/model/deferred.py | 7 +- lib/galaxy/schema/schema.py | 43 +++++ lib/galaxy/tool_util/client/landing.py | 119 ++++++++++++ .../client/landing_library.catalog.yml | 21 +++ lib/galaxy/webapps/galaxy/api/__init__.py | 13 +- lib/galaxy/webapps/galaxy/api/workflows.py | 39 ++++ lib/galaxy/webapps/galaxy/buildapp.py | 2 + lib/galaxy/workflow/scheduling_manager.py | 45 +++-- lib/galaxy_test/api/test_landing.py | 52 ++++++ lib/galaxy_test/api/test_workflows.py | 121 +++++++++++- lib/galaxy_test/base/populators.py | 55 +++++- lib/galaxy_test/selenium/framework.py | 6 +- test/unit/app/managers/test_landing.py | 176 ++++++++++++++++++ 25 files changed, 1068 insertions(+), 33 deletions(-) create mode 100644 client/src/api/landings.ts create mode 100644 client/src/components/Form/Elements/FormData/FormDataUri.vue create mode 100644 client/src/components/Landing/ToolLanding.vue create mode 100644 client/src/components/Landing/WorkflowLanding.vue create mode 100644 lib/galaxy/managers/landing.py create mode 100644 lib/galaxy/tool_util/client/landing.py create mode 100644 lib/galaxy/tool_util/client/landing_library.catalog.yml create mode 100644 lib/galaxy_test/api/test_landing.py create mode 100644 test/unit/app/managers/test_landing.py diff --git a/client/src/api/landings.ts b/client/src/api/landings.ts new file mode 100644 index 000000000000..03bbfc01d840 --- /dev/null +++ b/client/src/api/landings.ts @@ -0,0 +1,4 @@ +import { type components } from "@/api/schema"; + +export type ClaimLandingPayload = components["schemas"]["ClaimLandingPayload"]; +export type WorkflowLandingRequest = components["schemas"]["WorkflowLandingRequest"]; diff --git a/client/src/components/Form/Elements/FormData/FormDataUri.vue b/client/src/components/Form/Elements/FormData/FormDataUri.vue new file mode 100644 index 000000000000..0c888fbc4fce --- /dev/null +++ b/client/src/components/Form/Elements/FormData/FormDataUri.vue @@ -0,0 +1,66 @@ + + + + diff --git a/client/src/components/Form/FormElement.vue b/client/src/components/Form/FormElement.vue index 72af07dd7f8d..ed81622799e1 100644 --- a/client/src/components/Form/FormElement.vue +++ b/client/src/components/Form/FormElement.vue @@ -14,6 +14,7 @@ import type { FormParameterAttributes, FormParameterTypes, FormParameterValue } import FormBoolean from "./Elements/FormBoolean.vue"; import FormColor from "./Elements/FormColor.vue"; import FormData from "./Elements/FormData/FormData.vue"; +import FormDataUri from "./Elements/FormData/FormDataUri.vue"; import FormDataDialog from "./Elements/FormDataDialog.vue"; import FormDirectory from "./Elements/FormDirectory.vue"; import FormDrilldown from "./Elements/FormDrilldown/FormDrilldown.vue"; @@ -130,6 +131,14 @@ const elementId = computed(() => `form-element-${props.id}`); const hasAlert = computed(() => alerts.value.length > 0); const showPreview = computed(() => (collapsed.value && attrs.value["collapsible_preview"]) || props.disabled); const showField = computed(() => !collapsed.value && !props.disabled); +const isUriDataField = computed(() => { + const dataField = props.type == "data"; + if (dataField && props.value && "src" in props.value) { + const src = props.value.src; + return src == "url"; + } + return false; +}); const previewText = computed(() => attrs.value["text_value"]); const helpText = computed(() => { @@ -285,6 +294,12 @@ function onAlert(value: string | undefined) { :options="attrs.options" :optional="attrs.optional" :multiple="attrs.multiple" /> + + + diff --git a/client/src/components/Landing/WorkflowLanding.vue b/client/src/components/Landing/WorkflowLanding.vue new file mode 100644 index 000000000000..b5f7e2126809 --- /dev/null +++ b/client/src/components/Landing/WorkflowLanding.vue @@ -0,0 +1,61 @@ + + + diff --git a/client/src/components/Workflow/Run/WorkflowRun.vue b/client/src/components/Workflow/Run/WorkflowRun.vue index 4190257e3cdd..5e8d9dea8bb6 100644 --- a/client/src/components/Workflow/Run/WorkflowRun.vue +++ b/client/src/components/Workflow/Run/WorkflowRun.vue @@ -30,6 +30,7 @@ interface Props { preferSimpleForm?: boolean; simpleFormTargetHistory?: string; simpleFormUseJobCache?: boolean; + requestState?: Record; } const props = withDefaults(defineProps(), { @@ -37,6 +38,7 @@ const props = withDefaults(defineProps(), { preferSimpleForm: false, simpleFormTargetHistory: "current", simpleFormUseJobCache: false, + requestState: undefined, }); const loading = ref(true); @@ -200,6 +202,7 @@ defineExpose({ :target-history="simpleFormTargetHistory" :use-job-cache="simpleFormUseJobCache" :can-mutate-current-history="canRunOnHistory" + :request-state="requestState" @submissionSuccess="handleInvocations" @submissionError="handleSubmissionError" @showAdvanced="showAdvanced" /> diff --git a/client/src/components/Workflow/Run/WorkflowRunFormSimple.vue b/client/src/components/Workflow/Run/WorkflowRunFormSimple.vue index e7e45723ff16..feb1bb1b2ac2 100644 --- a/client/src/components/Workflow/Run/WorkflowRunFormSimple.vue +++ b/client/src/components/Workflow/Run/WorkflowRunFormSimple.vue @@ -99,6 +99,10 @@ export default { type: Boolean, required: true, }, + requestState: { + type: Object, + required: false, + }, }, setup() { const { config, isConfigLoaded } = useConfig(true); @@ -135,6 +139,7 @@ export default { if (isWorkflowInput(step.step_type)) { const stepName = new String(step.step_index); const stepLabel = step.step_label || new String(step.step_index + 1); + const stepType = step.step_type; const help = step.annotation; const longFormInput = step.inputs[0]; const stepAsInput = Object.assign({}, longFormInput, { @@ -142,10 +147,14 @@ export default { help: help, label: stepLabel, }); + if (this.requestState && this.requestState[stepLabel]) { + const value = this.requestState[stepLabel]; + stepAsInput.value = value; + } // disable collection mapping... stepAsInput.flavor = "module"; inputs.push(stepAsInput); - this.inputTypes[stepName] = step.step_type; + this.inputTypes[stepName] = stepType; } }); return inputs; diff --git a/client/src/entry/analysis/router.js b/client/src/entry/analysis/router.js index 0128829043b9..18024224e9d3 100644 --- a/client/src/entry/analysis/router.js +++ b/client/src/entry/analysis/router.js @@ -17,6 +17,8 @@ import HistoryImport from "components/HistoryImport"; import InteractiveTools from "components/InteractiveTools/InteractiveTools"; import JobDetails from "components/JobInformation/JobDetails"; import CarbonEmissionsCalculations from "components/JobMetrics/CarbonEmissions/CarbonEmissionsCalculations"; +import ToolLanding from "components/Landing/ToolLanding"; +import WorkflowLanding from "components/Landing/WorkflowLanding"; import PageDisplay from "components/PageDisplay/PageDisplay"; import PageEditor from "components/PageEditor/PageEditor"; import ToolSuccess from "components/Tool/ToolSuccess"; @@ -494,6 +496,16 @@ export function getRouter(Galaxy) { path: "tools/json", component: ToolsJson, }, + { + path: "tool_landings/:uuid", + component: ToolLanding, + props: true, + }, + { + path: "workflow_landings/:uuid", + component: WorkflowLanding, + props: true, + }, { path: "user", component: UserPreferences, diff --git a/lib/galaxy/exceptions/__init__.py b/lib/galaxy/exceptions/__init__.py index f8e8c956ac98..749b80a57433 100644 --- a/lib/galaxy/exceptions/__init__.py +++ b/lib/galaxy/exceptions/__init__.py @@ -224,6 +224,11 @@ class UserActivationRequiredException(MessageException): err_code = error_codes_by_name["USER_ACTIVATION_REQUIRED"] +class ItemAlreadyClaimedException(MessageException): + status_code = 403 + err_code = error_codes_by_name["ITEM_IS_CLAIMED"] + + class ObjectNotFound(MessageException): """Accessed object was not found""" diff --git a/lib/galaxy/exceptions/error_codes.json b/lib/galaxy/exceptions/error_codes.json index 1c57fbc7bbad..cfaca436a65c 100644 --- a/lib/galaxy/exceptions/error_codes.json +++ b/lib/galaxy/exceptions/error_codes.json @@ -144,6 +144,11 @@ "code": 403007, "message": "Action requires account activation." }, + { + "name": "ITEM_IS_CLAIMED", + "code": 403008, + "message": "This item has already been claimed and cannot be re-claimed." + }, { "name": "USER_REQUIRED", "code": 403008, diff --git a/lib/galaxy/managers/landing.py b/lib/galaxy/managers/landing.py new file mode 100644 index 000000000000..2138c0c81ca2 --- /dev/null +++ b/lib/galaxy/managers/landing.py @@ -0,0 +1,172 @@ +from typing import ( + Optional, + Union, +) +from uuid import uuid4 + +from pydantic import UUID4 +from sqlalchemy import select + +from galaxy.exceptions import ( + InconsistentDatabase, + InsufficientPermissionsException, + ItemAlreadyClaimedException, + ObjectNotFound, + RequestParameterMissingException, +) +from galaxy.model import ( + ToolLandingRequest as ToolLandingRequestModel, + WorkflowLandingRequest as WorkflowLandingRequestModel, +) +from galaxy.model.base import transaction +from galaxy.model.scoped_session import galaxy_scoped_session +from galaxy.schema.schema import ( + ClaimLandingPayload, + CreateToolLandingRequestPayload, + CreateWorkflowLandingRequestPayload, + LandingRequestState, + ToolLandingRequest, + WorkflowLandingRequest, +) +from galaxy.security.idencoding import IdEncodingHelper +from galaxy.util import safe_str_cmp +from .context import ProvidesUserContext + +LandingRequestModel = Union[ToolLandingRequestModel, WorkflowLandingRequestModel] + + +class LandingRequestManager: + + def __init__(self, sa_session: galaxy_scoped_session, security: IdEncodingHelper): + self.sa_session = sa_session + self.security = security + + def create_tool_landing_request(self, payload: CreateToolLandingRequestPayload) -> ToolLandingRequest: + model = ToolLandingRequestModel() + model.tool_id = payload.tool_id + model.tool_version = payload.tool_version + model.request_state = payload.request_state + model.uuid = uuid4() + model.client_secret = payload.client_secret + self._save(model) + return self._tool_response(model) + + def create_workflow_landing_request(self, payload: CreateWorkflowLandingRequestPayload) -> WorkflowLandingRequest: + model = WorkflowLandingRequestModel() + if payload.workflow_target_type == "stored_workflow": + model.stored_workflow_id = self.security.decode_id(payload.workflow_id) + else: + model.workflow_id = self.security.decode_id(payload.workflow_id) + model.request_state = payload.request_state + model.uuid = uuid4() + model.client_secret = payload.client_secret + self._save(model) + return self._workflow_response(model) + + def claim_tool_landing_request( + self, trans: ProvidesUserContext, uuid: UUID4, claim: Optional[ClaimLandingPayload] + ) -> ToolLandingRequest: + request = self._get_tool_landing_request(uuid) + self._check_can_claim(trans, request, claim) + request.user_id = trans.user.id + self._save(request) + return self._tool_response(request) + + def claim_workflow_landing_request( + self, trans: ProvidesUserContext, uuid: UUID4, claim: Optional[ClaimLandingPayload] + ) -> WorkflowLandingRequest: + request = self._get_workflow_landing_request(uuid) + self._check_can_claim(trans, request, claim) + request.user_id = trans.user.id + self._save(request) + return self._workflow_response(request) + + def get_tool_landing_request(self, trans: ProvidesUserContext, uuid: UUID4) -> ToolLandingRequest: + request = self._get_claimed_tool_landing_request(trans, uuid) + return self._tool_response(request) + + def get_workflow_landing_request(self, trans: ProvidesUserContext, uuid: UUID4) -> WorkflowLandingRequest: + request = self._get_claimed_workflow_landing_request(trans, uuid) + return self._workflow_response(request) + + def _check_can_claim( + self, trans: ProvidesUserContext, request: LandingRequestModel, claim: Optional[ClaimLandingPayload] + ): + if request.client_secret is not None: + if claim is None or not claim.client_secret: + raise RequestParameterMissingException() + if not safe_str_cmp(request.client_secret, claim.client_secret): + raise InsufficientPermissionsException() + if request.user_id is not None: + raise ItemAlreadyClaimedException() + + def _get_tool_landing_request(self, uuid: UUID4) -> ToolLandingRequestModel: + request = self.sa_session.scalars( + select(ToolLandingRequestModel).where(ToolLandingRequestModel.uuid == str(uuid)) + ).one_or_none() + if request is None: + raise ObjectNotFound() + return request + + def _get_workflow_landing_request(self, uuid: UUID4) -> WorkflowLandingRequestModel: + request = self.sa_session.scalars( + select(WorkflowLandingRequestModel).where(WorkflowLandingRequestModel.uuid == str(uuid)) + ).one_or_none() + if request is None: + raise ObjectNotFound() + return request + + def _get_claimed_tool_landing_request(self, trans: ProvidesUserContext, uuid: UUID4) -> ToolLandingRequestModel: + request = self._get_tool_landing_request(uuid) + self._check_ownership(trans, request) + return request + + def _get_claimed_workflow_landing_request( + self, trans: ProvidesUserContext, uuid: UUID4 + ) -> WorkflowLandingRequestModel: + request = self._get_workflow_landing_request(uuid) + self._check_ownership(trans, request) + return request + + def _tool_response(self, model: ToolLandingRequestModel) -> ToolLandingRequest: + response_model = ToolLandingRequest( + tool_id=model.tool_id, + tool_version=model.tool_version, + request_state=model.request_state, + uuid=model.uuid, + state=self._state(model), + ) + return response_model + + def _workflow_response(self, model: WorkflowLandingRequestModel) -> WorkflowLandingRequest: + workflow_id: Optional[int] + if model.stored_workflow_id is not None: + workflow_id = model.stored_workflow_id + target_type = "stored_workflow" + else: + workflow_id = model.workflow_id + target_type = "workflow" + if workflow_id is None: + raise InconsistentDatabase() + assert workflow_id + response_model = WorkflowLandingRequest( + workflow_id=self.security.encode_id(workflow_id), + workflow_target_type=target_type, + request_state=model.request_state, + uuid=model.uuid, + state=self._state(model), + ) + return response_model + + def _check_ownership(self, trans: ProvidesUserContext, model: LandingRequestModel): + if model.user_id != trans.user.id: + raise InsufficientPermissionsException() + + def _state(self, model: LandingRequestModel) -> LandingRequestState: + return LandingRequestState.UNCLAIMED if model.user_id is None else LandingRequestState.CLAIMED + + def _save(self, model: LandingRequestModel): + sa_session = self.sa_session + sa_session.add(model) + with transaction(sa_session): + sa_session.commit() diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 3df173304156..13a183212562 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -8880,8 +8880,10 @@ def inputs_requiring_materialization(self): hdas_to_materialize = [] for input_dataset_assoc in self.input_datasets: request = input_dataset_assoc.request - if request and not request.get("deferred", False): - hdas_to_materialize.append(input_dataset_assoc.dataset) + if request: + deferred = request.get("deferred", False) + if not deferred: + hdas_to_materialize.append(input_dataset_assoc.dataset) return hdas_to_materialize def _serialize(self, id_encoder, serialization_options): @@ -11216,6 +11218,43 @@ def file_source_configuration( raise ValueError("No template sent to file_source_configuration") +# TODO: add link from tool_request to this +class ToolLandingRequest(Base): + __tablename__ = "tool_landing_request" + + id: Mapped[int] = mapped_column(primary_key=True) + user_id: Mapped[Optional[int]] = mapped_column(ForeignKey("galaxy_user.id"), index=True) + create_time: Mapped[datetime] = mapped_column(default=now, nullable=True) + update_time: Mapped[Optional[datetime]] = mapped_column(index=True, default=now, onupdate=now, nullable=True) + uuid: Mapped[Union[UUID, str]] = mapped_column(UUIDType(), index=True) + tool_id: Mapped[str] = mapped_column(String(255)) + tool_version: Mapped[Optional[str]] = mapped_column(String(255), default=None) + request_state: Mapped[Optional[Dict]] = mapped_column(JSONType) + client_secret: Mapped[Optional[str]] = mapped_column(String(255), default=None) + + user: Mapped[Optional["User"]] = relationship() + + +# TODO: add link from workflow_invocation to this +class WorkflowLandingRequest(Base): + __tablename__ = "workflow_landing_request" + + id: Mapped[int] = mapped_column(primary_key=True) + user_id: Mapped[Optional[int]] = mapped_column(ForeignKey("galaxy_user.id"), index=True) + workflow_id: Mapped[Optional[int]] = mapped_column(ForeignKey("stored_workflow.id"), nullable=True) + stored_workflow_id: Mapped[Optional[int]] = mapped_column(ForeignKey("workflow.id"), nullable=True) + + create_time: Mapped[datetime] = mapped_column(default=now, nullable=True) + update_time: Mapped[Optional[datetime]] = mapped_column(index=True, default=now, onupdate=now, nullable=True) + uuid: Mapped[Union[UUID, str]] = mapped_column(UUIDType(), index=True) + request_state: Mapped[Optional[Dict]] = mapped_column(JSONType) + client_secret: Mapped[Optional[str]] = mapped_column(String(255), default=None) + + user: Mapped[Optional["User"]] = relationship() + stored_workflow: Mapped[Optional["StoredWorkflow"]] = relationship() + workflow: Mapped[Optional["Workflow"]] = relationship() + + class UserAction(Base, RepresentById): __tablename__ = "user_action" diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py index 042b6879cd2a..32ee9f954dda 100644 --- a/lib/galaxy/model/deferred.py +++ b/lib/galaxy/model/deferred.py @@ -165,9 +165,10 @@ def ensure_materialized( sa_session = object_session(dataset_instance) assert sa_session sa_session.add(materialized_dataset_instance) - materialized_dataset_instance.copy_from( - dataset_instance, new_dataset=materialized_dataset, include_tags=attached, include_metadata=True - ) + if not in_place: + materialized_dataset_instance.copy_from( + dataset_instance, new_dataset=materialized_dataset, include_tags=attached, include_metadata=True + ) require_metadata_regeneration = ( materialized_dataset_instance.has_metadata_files or materialized_dataset_instance.metadata_deferred ) diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 2e585087c318..9ed9c4ba3444 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -3832,6 +3832,49 @@ class PageSummaryList(RootModel): ) +class LandingRequestState(str, Enum): + UNCLAIMED = "unclaimed" + CLAIMED = "claimed" + + +ToolLandingRequestIdField = Field(title="ID", description="Encoded ID of the tool landing request") +WorkflowLandingRequestIdField = Field(title="ID", description="Encoded ID of the workflow landing request") + + +class CreateToolLandingRequestPayload(Model): + tool_id: str + tool_version: Optional[str] = None + request_state: Optional[Dict[str, Any]] = None + client_secret: Optional[str] = None + + +class CreateWorkflowLandingRequestPayload(Model): + workflow_id: str + workflow_target_type: Literal["stored_workflow", "workflow"] + request_state: Optional[Dict[str, Any]] = None + client_secret: Optional[str] = None + + +class ClaimLandingPayload(Model): + client_secret: Optional[str] = None + + +class ToolLandingRequest(Model): + uuid: UuidField + tool_id: str + tool_version: Optional[str] = None + request_state: Optional[Dict[str, Any]] = None + state: LandingRequestState + + +class WorkflowLandingRequest(Model): + uuid: UuidField + workflow_id: str + workflow_target_type: Literal["stored_workflow", "workflow"] + request_state: Dict[str, Any] + state: LandingRequestState + + class MessageExceptionModel(BaseModel): err_msg: str err_code: int diff --git a/lib/galaxy/tool_util/client/landing.py b/lib/galaxy/tool_util/client/landing.py new file mode 100644 index 000000000000..425559a87ffa --- /dev/null +++ b/lib/galaxy/tool_util/client/landing.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python + +# . .venv/bin/activate; PYTHONPATH=lib python lib/galaxy/tool_util/client/landing.py -g http://localhost:8081 simple_workflow + +import argparse +import random +import string +import sys +from dataclasses import dataclass +from typing import Optional + +import requests +import yaml + +from galaxy.util.resources import resource_string + +DESCRIPTION = """ +A small utility for demoing creation of tool and workflow landing endpoints. + +This allows an external developer to create tool and workflow forms with +pre-populated parameters and handing them off with URLs to their clients/users. +""" +RANDOM_SECRET_LENGTH = 10 + + +def load_default_catalog(): + catalog_yaml = resource_string("galaxy.tool_util.client", "landing_catalog.sample.yml") + return yaml.safe_load(catalog_yaml) + + +@dataclass +class Request: + template_id: str + catalog: str + client_secret: Optional[str] + galaxy_url: str + + +@dataclass +class Response: + landing_url: str + + +def generate_claim_url(request: Request) -> Response: + template_id = request.template_id + catalog_path = request.catalog + galaxy_url = request.galaxy_url + client_secret = request.client_secret + if client_secret == "__GEN__": + client_secret = "".join( + random.choice(string.ascii_lowercase + string.digits) for _ in range(RANDOM_SECRET_LENGTH) + ) + if catalog_path: + with open(catalog_path) as f: + catalog = yaml.safe_load(f) + else: + catalog = load_default_catalog() + template = catalog[template_id] + template_type = "tool" if "tool_id" in template else "workflow" + if client_secret: + template["client_secret"] = client_secret + + landing_request_url = f"{galaxy_url}/api/{template_type}_landings" + raw_response = requests.post( + landing_request_url, + json=template, + ) + raw_response.raise_for_status() + response = raw_response.json() + url = f"{galaxy_url}/{template_type}_landings/{response['uuid']}" + if client_secret: + url = url + f"?secret={client_secret}" + return Response(url) + + +def arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=DESCRIPTION) + parser.add_argument("template_id") + parser.add_argument( + "-g", + "--galaxy-url", + dest="galaxy_url", + default="https://usegalaxy.org/", + help="Galxy target for the landing request", + ) + parser.add_argument( + "-c", + "--catalog", + dest="catalog", + default=None, + help="YAML catalog to load landing request templates from.", + ) + parser.add_argument( + "-s", + "--secret", + dest="secret", + default=None, + help="An optional client secret to verify the request against, set to __GEN__ to generate one at random for this request.", + ) + return parser + + +def main(argv=None) -> None: + if argv is None: + argv = sys.argv[1:] + + args = arg_parser().parse_args(argv) + request = Request( + args.template_id, + args.catalog, + args.secret, + args.galaxy_url, + ) + response = generate_claim_url(request) + print(f"Your customized form is located at {response.landing_url}") + + +if __name__ == "__main__": + main() diff --git a/lib/galaxy/tool_util/client/landing_library.catalog.yml b/lib/galaxy/tool_util/client/landing_library.catalog.yml new file mode 100644 index 000000000000..71a1c6e03f62 --- /dev/null +++ b/lib/galaxy/tool_util/client/landing_library.catalog.yml @@ -0,0 +1,21 @@ +# Catalog of artifacts that the landing script can generate landing URLs for against +# the target Galaxy. +simple_workflow: + # Encoded ID for a workflow to target - if workflow_target_type is stored_workflow this + # will always be the latest version of that workflow and stored workflow ID should be provided. + # If instead workflow_target_type is workflow - this is a particular version of a workflow and + # the workflow ID should be provided. + workflow_id: f2db41e1fa331b3e + workflow_target_type: stored_workflow + # request state provides prepopulated parameters for this workflow or stored workflow when the + # user navigates to the landing URL. + request_state: + myinput: + src: url + url: https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed + ext: txt +int_workflow: + workflow_id: f597429621d6eb2b + workflow_target_type: stored_workflow + request_state: + int_input: 8 diff --git a/lib/galaxy/webapps/galaxy/api/__init__.py b/lib/galaxy/webapps/galaxy/api/__init__.py index 3b96292f955d..260f90cebced 100644 --- a/lib/galaxy/webapps/galaxy/api/__init__.py +++ b/lib/galaxy/webapps/galaxy/api/__init__.py @@ -26,6 +26,7 @@ APIRouter, Form, Header, + Path, Query, Request, Response, @@ -41,7 +42,10 @@ HTTPAuthorizationCredentials, HTTPBearer, ) -from pydantic import ValidationError +from pydantic import ( + UUID4, + ValidationError, +) from pydantic.main import BaseModel from routes import ( Mapper, @@ -618,3 +622,10 @@ def search_query_param(model_name: str, tags: list, free_text_fields: list) -> O title="Search query.", description=description, ) + + +LandingUuidPathParam: UUID4 = Path( + ..., + title="Landing UUID", + description="The UUID used to identify a persisted landing request.", +) diff --git a/lib/galaxy/webapps/galaxy/api/workflows.py b/lib/galaxy/webapps/galaxy/api/workflows.py index 05a475d430e6..002e705c8d98 100644 --- a/lib/galaxy/webapps/galaxy/api/workflows.py +++ b/lib/galaxy/webapps/galaxy/api/workflows.py @@ -42,6 +42,7 @@ ProvidesHistoryContext, ProvidesUserContext, ) +from galaxy.managers.landing import LandingRequestManager from galaxy.managers.workflows import ( MissingToolsException, RefactorRequest, @@ -68,12 +69,15 @@ from galaxy.schema.schema import ( AsyncFile, AsyncTaskResultSummary, + ClaimLandingPayload, + CreateWorkflowLandingRequestPayload, InvocationSortByEnum, InvocationsStateCounts, SetSlugPayload, ShareWithPayload, ShareWithStatus, SharingStatus, + WorkflowLandingRequest, WorkflowSortByEnum, ) from galaxy.schema.workflows import ( @@ -102,6 +106,7 @@ depends, DependsOnTrans, IndexQueryTag, + LandingUuidPathParam, Router, search_query_param, ) @@ -909,6 +914,7 @@ def __get_stored_workflow(self, trans, workflow_id, **kwd): @router.cbv class FastAPIWorkflows: service: WorkflowsService = depends(WorkflowsService) + landing_manager: LandingRequestManager = depends(LandingRequestManager) @router.get( "/api/workflows", @@ -1159,6 +1165,39 @@ def show_workflow( ) -> StoredWorkflowDetailed: return self.service.show_workflow(trans, workflow_id, instance, legacy, version) + @router.post("/api/workflow_landings", public=True) + def create_landing( + self, + trans: ProvidesUserContext = DependsOnTrans, + workflow_landing_request: CreateWorkflowLandingRequestPayload = Body(...), + ) -> WorkflowLandingRequest: + try: + return self.landing_manager.create_workflow_landing_request(workflow_landing_request) + except Exception: + log.exception("Problem...") + raise + + @router.post("/api/workflow_landings/{uuid}/claim") + def claim_landing( + self, + trans: ProvidesUserContext = DependsOnTrans, + uuid: UUID4 = LandingUuidPathParam, + payload: Optional[ClaimLandingPayload] = Body(...), + ) -> WorkflowLandingRequest: + try: + return self.landing_manager.claim_workflow_landing_request(trans, uuid, payload) + except Exception: + log.exception("claiim problem...") + raise + + @router.get("/api/workflow_landings/{uuid}") + def get_landing( + self, + trans: ProvidesUserContext = DependsOnTrans, + uuid: UUID4 = LandingUuidPathParam, + ) -> WorkflowLandingRequest: + return self.landing_manager.get_workflow_landing_request(trans, uuid) + StepDetailQueryParam = Annotated[ bool, diff --git a/lib/galaxy/webapps/galaxy/buildapp.py b/lib/galaxy/webapps/galaxy/buildapp.py index 951926dcc35b..37780b8f0f4e 100644 --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -227,6 +227,8 @@ def app_pair(global_conf, load_app_kwds=None, wsgi_preflight=True, **kwargs): webapp.add_client_route("/login/start") webapp.add_client_route("/tools/list") webapp.add_client_route("/tools/json") + webapp.add_client_route("/tool_landings/{uuid}") + webapp.add_client_route("/workflow_landings/{uuid}") webapp.add_client_route("/tours") webapp.add_client_route("/tours/{tour_id}") webapp.add_client_route("/user") diff --git a/lib/galaxy/workflow/scheduling_manager.py b/lib/galaxy/workflow/scheduling_manager.py index d01c85d5d6bc..01f7a17ff0e6 100644 --- a/lib/galaxy/workflow/scheduling_manager.py +++ b/lib/galaxy/workflow/scheduling_manager.py @@ -333,26 +333,39 @@ def __schedule(self, workflow_scheduler_id, workflow_scheduler): if not self.monitor_running: return + def __attempt_materialize(self, workflow_invocation, session) -> bool: + try: + hdas_to_materialize = workflow_invocation.inputs_requiring_materialization() + for hda in hdas_to_materialize: + user = RequestUser(user_id=workflow_invocation.history.user_id) + task_request = MaterializeDatasetInstanceTaskRequest( + user=user, + history_id=workflow_invocation.history.id, + source="hda", + content=hda.id, + validate_hashes=True, + ) + self.app.hda_manager.materialize(task_request, in_place=True) + # place back into ready and let it proceed normally on next iteration? + workflow_invocation.set_state(model.WorkflowInvocation.states.READY) + session.add(workflow_invocation) + session.commit() + return True + except Exception as e: + log.info(f"Failed to materialize dataset for workflow {workflow_invocation.id} - {e}") + workflow_invocation.fail() + session.add(workflow_invocation) + session.commit() + return False + def __attempt_schedule(self, invocation_id, workflow_scheduler): with self.app.model.context() as session: workflow_invocation = session.get(model.WorkflowInvocation, invocation_id) if workflow_invocation.state == workflow_invocation.states.REQUIRES_MATERIALIZATION: - hdas_to_materialize = workflow_invocation.inputs_requiring_materialization() - for hda in hdas_to_materialize: - user = RequestUser(user_id=workflow_invocation.history.user_id) - task_request = MaterializeDatasetInstanceTaskRequest( - user=user, - history_id=workflow_invocation.history.id, - source="hda", - content=hda.id, - ) - self.app.hda_manager.materialize(task_request, in_place=True) - # place back into ready and let it proceed normally on next iteration? - workflow_invocation.set_state(model.WorkflowInvocation.states.READY) - session.add(workflow_invocation) - session.commit() - if self.app.config.workflow_scheduling_separate_materialization_iteration: - return None + if not self.__attempt_materialize(workflow_invocation, session): + return + if self.app.config.workflow_scheduling_separate_materialization_iteration: + return None try: if workflow_invocation.state == workflow_invocation.states.CANCELLING: workflow_invocation.cancel_invocation_steps() diff --git a/lib/galaxy_test/api/test_landing.py b/lib/galaxy_test/api/test_landing.py new file mode 100644 index 000000000000..84057f92097e --- /dev/null +++ b/lib/galaxy_test/api/test_landing.py @@ -0,0 +1,52 @@ +from base64 import b64encode +from typing import ( + Any, + Dict, +) + +from galaxy.schema.schema import CreateWorkflowLandingRequestPayload +from galaxy_test.base.populators import ( + DatasetPopulator, + skip_without_tool, + WorkflowPopulator, +) +from ._framework import ApiTestCase + + +class TestLandingApi(ApiTestCase): + dataset_populator: DatasetPopulator + workflow_populator: WorkflowPopulator + + def setUp(self): + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + self.workflow_populator = WorkflowPopulator(self.galaxy_interactor) + + @skip_without_tool("cat1") + def test_workflow_landing(self): + workflow_id = self.workflow_populator.simple_workflow("test_landing") + workflow_target_type = "stored_workflow" + request_state = _workflow_request_state() + request = CreateWorkflowLandingRequestPayload( + workflow_id=workflow_id, + workflow_target_type=workflow_target_type, + request_state=request_state, + ) + response = self.dataset_populator.create_workflow_landing(request) + assert response.workflow_id == workflow_id + assert response.workflow_target_type == workflow_target_type + + response = self.dataset_populator.claim_workflow_landing(response.uuid) + assert response.workflow_id == workflow_id + assert response.workflow_target_type == workflow_target_type + + +def _workflow_request_state() -> Dict[str, Any]: + deferred = False + input_b64_1 = b64encode(b"1 2 3").decode("utf-8") + input_b64_2 = b64encode(b"4 5 6").decode("utf-8") + inputs = { + "WorkflowInput1": {"src": "url", "url": f"base64://{input_b64_1}", "ext": "txt", "deferred": deferred}, + "WorkflowInput2": {"src": "url", "url": f"base64://{input_b64_2}", "ext": "txt", "deferred": deferred}, + } + return inputs diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 5ffb1ca347e8..953aac0ba9e9 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -1537,16 +1537,133 @@ def test_run_workflow(self): def test_run_workflow_by_deferred_url(self): with self.dataset_populator.test_history() as history_id: self.__run_cat_workflow(inputs_by="deferred_url", history_id=history_id) - input_dataset_details = self.dataset_populator.get_history_dataset_details(history_id, hid=1) + # it did an upload of the inputs anyway - so this is a 3 is a bit of a hack... + # TODO fix this. + input_dataset_details = self.dataset_populator.get_history_dataset_details(history_id, hid=3) assert input_dataset_details["state"] == "deferred" @skip_without_tool("cat1") def test_run_workflow_by_url(self): with self.dataset_populator.test_history() as history_id: self.__run_cat_workflow(inputs_by="url", history_id=history_id) - input_dataset_details = self.dataset_populator.get_history_dataset_details(history_id, hid=1) + input_dataset_details = self.dataset_populator.get_history_dataset_details( + history_id, hid=3, assert_ok=False + ) assert input_dataset_details["state"] == "ok" + @skip_without_tool("cat1") + def test_run_workflow_with_valid_url_hashes(self): + with self.dataset_populator.test_history() as history_id: + workflow = self.workflow_populator.load_workflow(name="test_for_run_invalid_url_hashes") + workflow_id = self.workflow_populator.create_workflow(workflow) + input_b64_1 = base64.b64encode(b"1 2 3").decode("utf-8") + input_b64_2 = base64.b64encode(b"4 5 6").decode("utf-8") + deferred = False + hashes_1 = [{"hash_function": "MD5", "hash_value": "5ba48b6e5a7c4d4930fda256f411e55b"}] + hashes_2 = [{"hash_function": "MD5", "hash_value": "ad0f811416f7ed2deb9122007d649fb0"}] + inputs = { + "WorkflowInput1": { + "src": "url", + "url": f"base64://{input_b64_1}", + "ext": "txt", + "deferred": deferred, + "hashes": hashes_1, + }, + "WorkflowInput2": { + "src": "url", + "url": f"base64://{input_b64_2}", + "ext": "txt", + "deferred": deferred, + "hashes": hashes_2, + }, + } + workflow_request = dict( + history=f"hist_id={history_id}", + ) + workflow_request["inputs"] = json.dumps(inputs) + workflow_request["inputs_by"] = "name" + invocation_id = self.workflow_populator.invoke_workflow_and_wait( + workflow_id, request=workflow_request + ).json()["id"] + invocation = self._invocation_details(workflow_id, invocation_id) + assert invocation["state"] == "scheduled", invocation + invocation_jobs = self.workflow_populator.get_invocation_jobs(invocation_id) + for job in invocation_jobs: + assert job["state"] == "ok" + + @skip_without_tool("cat1") + def test_run_workflow_with_invalid_url_hashes(self): + with self.dataset_populator.test_history() as history_id: + workflow = self.workflow_populator.load_workflow(name="test_for_run_invalid_url_hashes") + workflow_id = self.workflow_populator.create_workflow(workflow) + input_b64_1 = base64.b64encode(b"1 2 3").decode("utf-8") + input_b64_2 = base64.b64encode(b"4 5 6").decode("utf-8") + deferred = False + hashes = [{"hash_function": "MD5", "hash_value": "abadmd5sumhash"}] + inputs = { + "WorkflowInput1": { + "src": "url", + "url": f"base64://{input_b64_1}", + "ext": "txt", + "deferred": deferred, + "hashes": hashes, + }, + "WorkflowInput2": { + "src": "url", + "url": f"base64://{input_b64_2}", + "ext": "txt", + "deferred": deferred, + "hashes": hashes, + }, + } + workflow_request = dict( + history=f"hist_id={history_id}", + ) + workflow_request["inputs"] = json.dumps(inputs) + workflow_request["inputs_by"] = "name" + invocation_id = self.workflow_populator.invoke_workflow_and_wait( + workflow_id, request=workflow_request, assert_ok=False + ).json()["id"] + invocation = self._invocation_details(workflow_id, invocation_id) + assert invocation["state"] == "scheduled", invocation + invocation_jobs = self.workflow_populator.get_invocation_jobs(invocation_id) + for job in invocation_jobs: + assert job["state"] == "paused" + + @skip_without_tool("cat1") + def test_run_workflow_with_invalid_url(self): + with self.dataset_populator.test_history() as history_id: + workflow = self.workflow_populator.load_workflow(name="test_for_run_invalid_url") + workflow_id = self.workflow_populator.create_workflow(workflow) + deferred = False + inputs = { + "WorkflowInput1": { + "src": "url", + "url": "gxfiles://thisurl/doesnt/work", + "ext": "txt", + "deferred": deferred, + }, + "WorkflowInput2": { + "src": "url", + "url": "gxfiles://thisurl/doesnt/work", + "ext": "txt", + "deferred": deferred, + }, + } + workflow_request = dict( + history=f"hist_id={history_id}", + ) + workflow_request["inputs"] = json.dumps(inputs) + workflow_request["inputs_by"] = "name" + invocation_id = self.workflow_populator.invoke_workflow_and_wait( + workflow_id, request=workflow_request, assert_ok=False + ).json()["id"] + invocation = self._invocation_details(workflow_id, invocation_id) + assert invocation["state"] == "scheduled", invocation + invocation_jobs = self.workflow_populator.get_invocation_jobs(invocation_id) + for job in invocation_jobs: + assert job["state"] == "paused" + def __run_cat_workflow(self, inputs_by, history_id: Optional[str] = None): workflow = self.workflow_populator.load_workflow(name="test_for_run") workflow["steps"]["0"]["uuid"] = str(uuid4()) diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 81afbbb28062..5636171cb46a 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -77,10 +77,17 @@ ImporterGalaxyInterface, ) from gxformat2.yaml import ordered_load +from pydantic import UUID4 from requests import Response from rocrate.rocrate import ROCrate from typing_extensions import Literal +from galaxy.schema.schema import ( + CreateToolLandingRequestPayload, + CreateWorkflowLandingRequestPayload, + ToolLandingRequest, + WorkflowLandingRequest, +) from galaxy.tool_util.client.staging import InteractorStaging from galaxy.tool_util.cwl.util import ( download_output, @@ -369,7 +376,9 @@ class BasePopulator(metaclass=ABCMeta): galaxy_interactor: ApiTestInteractor @abstractmethod - def _post(self, route, data=None, files=None, headers=None, admin=False, json: bool = False) -> Response: + def _post( + self, route, data=None, files=None, headers=None, admin=False, json: bool = False, anon: bool = False + ) -> Response: """POST data to target Galaxy instance on specified route.""" @abstractmethod @@ -758,6 +767,34 @@ def _wait_for_purge(): wait_on(_wait_for_purge, "dataset to become purged", timeout=2) return self._get(dataset_url) + def create_tool_landing(self, payload: CreateToolLandingRequestPayload) -> ToolLandingRequest: + create_url = "tool_landings" + json = payload.model_dump(mode="json") + create_response = self._post(create_url, json, json=True, anon=True) + api_asserts.assert_status_code_is(create_response, 200) + create_response.raise_for_status() + return ToolLandingRequest.model_validate(create_response.json()) + + def create_workflow_landing(self, payload: CreateWorkflowLandingRequestPayload) -> WorkflowLandingRequest: + create_url = "workflow_landings" + json = payload.model_dump(mode="json") + create_response = self._post(create_url, json, json=True, anon=True) + api_asserts.assert_status_code_is(create_response, 200) + create_response.raise_for_status() + return WorkflowLandingRequest.model_validate(create_response.json()) + + def claim_tool_landing(self, uuid: UUID4) -> ToolLandingRequest: + url = f"tool_landings/{uuid}/claim" + claim_response = self._post(url, {"client_secret": "foobar"}, json=True) + api_asserts.assert_status_code_is(claim_response, 200) + return ToolLandingRequest.model_validate(claim_response.json()) + + def claim_workflow_landing(self, uuid: UUID4) -> WorkflowLandingRequest: + url = f"workflow_landings/{uuid}/claim" + claim_response = self._post(url, {"client_secret": "foobar"}, json=True) + api_asserts.assert_status_code_is(claim_response, 200) + return WorkflowLandingRequest.model_validate(claim_response.json()) + def create_tool_from_path(self, tool_path: str) -> Dict[str, Any]: tool_directory = os.path.dirname(os.path.abspath(tool_path)) payload = dict( @@ -1664,8 +1701,10 @@ class GalaxyInteractorHttpMixin: def _api_key(self): return self.galaxy_interactor.api_key - def _post(self, route, data=None, files=None, headers=None, admin=False, json: bool = False) -> Response: - return self.galaxy_interactor.post(route, data, files=files, admin=admin, headers=headers, json=json) + def _post( + self, route, data=None, files=None, headers=None, admin=False, json: bool = False, anon: bool = False + ) -> Response: + return self.galaxy_interactor.post(route, data, files=files, admin=admin, headers=headers, json=json, anon=anon) def _put(self, route, data=None, headers=None, admin=False, json: bool = False): return self.galaxy_interactor.put(route, data, headers=headers, admin=admin, json=json) @@ -1956,6 +1995,7 @@ def invoke_workflow_and_wait( history_id: Optional[str] = None, inputs: Optional[dict] = None, request: Optional[dict] = None, + assert_ok: bool = True, ) -> Response: invoke_return = self.invoke_workflow(workflow_id, history_id=history_id, inputs=inputs, request=request) invoke_return.raise_for_status() @@ -1968,7 +2008,7 @@ def invoke_workflow_and_wait( if history_id.startswith("hist_id="): history_id = history_id[len("hist_id=") :] assert history_id - self.wait_for_workflow(workflow_id, invocation_id, history_id, assert_ok=True) + self.wait_for_workflow(workflow_id, invocation_id, history_id, assert_ok=assert_ok) return invoke_return def workflow_report_json(self, workflow_id: str, invocation_id: str) -> dict: @@ -3320,11 +3360,14 @@ def _api_url(self): def _get(self, route, data=None, headers=None, admin=False) -> Response: return self._gi.make_get_request(self._url(route), params=data) - def _post(self, route, data=None, files=None, headers=None, admin=False, json: bool = False) -> Response: + def _post( + self, route, data=None, files=None, headers=None, admin=False, json: bool = False, anon: bool = False + ) -> Response: if headers is None: headers = {} headers = headers.copy() - headers["x-api-key"] = self._gi.key + if not anon: + headers["x-api-key"] = self._gi.key return requests.post(self._url(route), data=data, headers=headers, timeout=DEFAULT_SOCKET_TIMEOUT) def _put(self, route, data=None, headers=None, admin=False, json: bool = False): diff --git a/lib/galaxy_test/selenium/framework.py b/lib/galaxy_test/selenium/framework.py index 310c88c88053..9c300dc7b0de 100644 --- a/lib/galaxy_test/selenium/framework.py +++ b/lib/galaxy_test/selenium/framework.py @@ -744,12 +744,14 @@ def _get(self, route, data=None, headers=None, admin=False) -> Response: response = requests.get(full_url, params=data, cookies=cookies, headers=headers, timeout=DEFAULT_SOCKET_TIMEOUT) return response - def _post(self, route, data=None, files=None, headers=None, admin=False, json: bool = False) -> Response: + def _post( + self, route, data=None, files=None, headers=None, admin=False, json: bool = False, anon: bool = False + ) -> Response: full_url = self.selenium_context.build_url(f"api/{route}", for_selenium=False) cookies = None if admin: full_url = f"{full_url}?key={self._mixin_admin_api_key}" - else: + elif not anon: cookies = self.selenium_context.selenium_to_requests_cookies() request_kwd = prepare_request_params(data=data, files=files, as_json=json, headers=headers, cookies=cookies) response = requests.post(full_url, timeout=DEFAULT_SOCKET_TIMEOUT, **request_kwd) diff --git a/test/unit/app/managers/test_landing.py b/test/unit/app/managers/test_landing.py new file mode 100644 index 000000000000..211e1f8010e5 --- /dev/null +++ b/test/unit/app/managers/test_landing.py @@ -0,0 +1,176 @@ +from uuid import uuid4 + +from galaxy.exceptions import ( + InsufficientPermissionsException, + ItemAlreadyClaimedException, + ObjectNotFound, +) +from galaxy.managers.landing import LandingRequestManager +from galaxy.model import ( + StoredWorkflow, + Workflow, +) +from galaxy.model.base import transaction +from galaxy.schema.schema import ( + ClaimLandingPayload, + CreateToolLandingRequestPayload, + CreateWorkflowLandingRequestPayload, + LandingRequestState, + ToolLandingRequest, + WorkflowLandingRequest, +) +from .base import BaseTestCase + +TEST_TOOL_ID = "cat1" +TEST_TOOL_VERSION = "1.0.0" +TEST_STATE = { + "input1": { + "src": "url", + "url": "https://raw.githubusercontent.com/galaxyproject/planemo/7be1bf5b3971a43eaa73f483125bfb8cabf1c440/tests/data/hello.txt", + "ext": "txt", + }, +} +CLIENT_SECRET = "mycoolsecret" + + +class TestLanding(BaseTestCase): + + def setUp(self): + super().setUp() + self.landing_manager = LandingRequestManager(self.trans.sa_session, self.app.security) + + def test_tool_landing_requests_typical_flow(self): + landing_request: ToolLandingRequest = self.landing_manager.create_tool_landing_request(self._tool_request) + assert landing_request.state == LandingRequestState.UNCLAIMED + assert landing_request.uuid is not None + uuid = landing_request.uuid + claim_payload = ClaimLandingPayload(client_secret=CLIENT_SECRET) + landing_request = self.landing_manager.claim_tool_landing_request(self.trans, uuid, claim_payload) + assert landing_request.state == LandingRequestState.CLAIMED + assert landing_request.uuid == uuid + landing_request = self.landing_manager.get_tool_landing_request(self.trans, uuid) + assert landing_request.state == LandingRequestState.CLAIMED + assert landing_request.uuid == uuid + + def test_tool_landing_requests_requires_matching_client_secret(self): + landing_request: ToolLandingRequest = self.landing_manager.create_tool_landing_request(self._tool_request) + uuid = landing_request.uuid + claim_payload = ClaimLandingPayload(client_secret="wrongsecret") + exception = None + try: + self.landing_manager.claim_tool_landing_request(self.trans, uuid, claim_payload) + except InsufficientPermissionsException as e: + exception = e + assert exception is not None + + def test_tool_landing_requests_get_requires_claim(self): + landing_request: ToolLandingRequest = self.landing_manager.create_tool_landing_request(self._tool_request) + uuid = landing_request.uuid + exception = None + try: + self.landing_manager.get_tool_landing_request(self.trans, uuid) + except InsufficientPermissionsException as e: + exception = e + assert exception is not None + + def test_cannot_reclaim_tool_landing(self): + landing_request: ToolLandingRequest = self.landing_manager.create_tool_landing_request(self._tool_request) + assert landing_request.state == LandingRequestState.UNCLAIMED + uuid = landing_request.uuid + claim_payload = ClaimLandingPayload(client_secret=CLIENT_SECRET) + landing_request = self.landing_manager.claim_tool_landing_request(self.trans, uuid, claim_payload) + assert landing_request.state == LandingRequestState.CLAIMED + exception = None + try: + self.landing_manager.claim_tool_landing_request(self.trans, uuid, claim_payload) + except ItemAlreadyClaimedException as e: + exception = e + assert exception + + def test_get_tool_unknown_claim(self): + exception = None + try: + self.landing_manager.get_tool_landing_request(self.trans, uuid4()) + except ObjectNotFound as e: + exception = e + assert exception + + def test_stored_workflow_landing_requests_typical_flow(self): + landing_request: WorkflowLandingRequest = self.landing_manager.create_workflow_landing_request( + self._stored_workflow_request + ) + assert landing_request.state == LandingRequestState.UNCLAIMED + assert landing_request.uuid is not None + assert landing_request.workflow_target_type == "stored_workflow" + uuid = landing_request.uuid + claim_payload = ClaimLandingPayload(client_secret=CLIENT_SECRET) + landing_request = self.landing_manager.claim_workflow_landing_request(self.trans, uuid, claim_payload) + assert landing_request.state == LandingRequestState.CLAIMED + assert landing_request.uuid == uuid + assert landing_request.workflow_target_type == "stored_workflow" + landing_request = self.landing_manager.get_workflow_landing_request(self.trans, uuid) + assert landing_request.state == LandingRequestState.CLAIMED + assert landing_request.uuid == uuid + assert landing_request.workflow_target_type == "stored_workflow" + + def test_workflow_landing_requests_typical_flow(self): + landing_request: WorkflowLandingRequest = self.landing_manager.create_workflow_landing_request( + self._workflow_request + ) + assert landing_request.state == LandingRequestState.UNCLAIMED + assert landing_request.uuid is not None + assert landing_request.workflow_target_type == "workflow" + uuid = landing_request.uuid + claim_payload = ClaimLandingPayload(client_secret=CLIENT_SECRET) + landing_request = self.landing_manager.claim_workflow_landing_request(self.trans, uuid, claim_payload) + assert landing_request.state == LandingRequestState.CLAIMED + assert landing_request.uuid == uuid + assert landing_request.workflow_target_type == "workflow" + landing_request = self.landing_manager.get_workflow_landing_request(self.trans, uuid) + assert landing_request.state == LandingRequestState.CLAIMED + assert landing_request.uuid == uuid + assert landing_request.workflow_target_type == "workflow" + + @property + def _tool_request(self) -> CreateToolLandingRequestPayload: + return CreateToolLandingRequestPayload( + tool_id=TEST_TOOL_ID, + tool_version=TEST_TOOL_VERSION, + request_state=TEST_STATE.copy(), + client_secret=CLIENT_SECRET, + ) + + @property + def _stored_workflow_request(self) -> CreateWorkflowLandingRequestPayload: + sa_session = self.app.model.context + stored_workflow = StoredWorkflow() + stored_workflow.user = self.trans.user + sa_session.add(stored_workflow) + with transaction(sa_session): + sa_session.commit() + + return CreateWorkflowLandingRequestPayload( + workflow_id=self.app.security.encode_id(stored_workflow.id), + workflow_target_type="stored_workflow", + request_state=TEST_STATE.copy(), + client_secret=CLIENT_SECRET, + ) + + @property + def _workflow_request(self) -> CreateWorkflowLandingRequestPayload: + sa_session = self.app.model.context + stored_workflow = StoredWorkflow() + stored_workflow.user = self.trans.user + workflow = Workflow() + workflow.stored_workflow = stored_workflow + sa_session.add(stored_workflow) + sa_session.add(workflow) + with transaction(sa_session): + sa_session.commit() + + return CreateWorkflowLandingRequestPayload( + workflow_id=self.app.security.encode_id(workflow.id), + workflow_target_type="workflow", + request_state=TEST_STATE.copy(), + client_secret=CLIENT_SECRET, + ) From 8eb493a5234c4962d59a62ccac72aa99c7d96835 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Fri, 20 Sep 2024 10:54:18 -0400 Subject: [PATCH 10/14] Schema updates for landing. --- client/src/api/schema/schema.ts | 231 ++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index e14991a844ac..c4ac46076c00 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -4880,6 +4880,57 @@ export interface paths { patch?: never; trace?: never; }; + "/api/workflow_landings": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Create Landing */ + post: operations["create_landing_api_workflow_landings_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/workflow_landings/{uuid}": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + /** Get Landing */ + get: operations["get_landing_api_workflow_landings__uuid__get"]; + put?: never; + post?: never; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; + "/api/workflow_landings/{uuid}/claim": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Claim Landing */ + post: operations["claim_landing_api_workflow_landings__uuid__claim_post"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/workflows": { parameters: { query?: never; @@ -6270,6 +6321,11 @@ export interface components { */ type: string; }; + /** ClaimLandingPayload */ + ClaimLandingPayload: { + /** Client Secret */ + client_secret?: string | null; + }; /** CleanableItemsSummary */ CleanableItemsSummary: { /** @@ -6915,6 +6971,20 @@ export interface components { */ url: string; }; + /** CreateWorkflowLandingRequestPayload */ + CreateWorkflowLandingRequestPayload: { + /** Client Secret */ + client_secret?: string | null; + /** Request State */ + request_state?: Record | null; + /** Workflow Id */ + workflow_id: string; + /** + * Workflow Target Type + * @enum {string} + */ + workflow_target_type: "stored_workflow" | "workflow"; + }; /** CreatedEntryResponse */ CreatedEntryResponse: { /** @@ -12797,6 +12867,11 @@ export interface components { */ value: string; }; + /** + * LandingRequestState + * @enum {string} + */ + LandingRequestState: "unclaimed" | "claimed"; /** LegacyLibraryPermissionsPayload */ LegacyLibraryPermissionsPayload: { /** @@ -17448,6 +17523,25 @@ export interface components { [key: string]: number; }; }; + /** WorkflowLandingRequest */ + WorkflowLandingRequest: { + /** Request State */ + request_state: Record; + state: components["schemas"]["LandingRequestState"]; + /** + * UUID + * Format: uuid4 + * @description Universal unique identifier for this dataset. + */ + uuid: string; + /** Workflow Id */ + workflow_id: string; + /** + * Workflow Target Type + * @enum {string} + */ + workflow_target_type: "stored_workflow" | "workflow"; + }; /** WriteInvocationStoreToPayload */ WriteInvocationStoreToPayload: { /** @@ -33173,6 +33267,143 @@ export interface operations { }; }; }; + create_landing_api_workflow_landings_post: { + parameters: { + query?: never; + header?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + "run-as"?: string | null; + }; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["CreateWorkflowLandingRequestPayload"]; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["WorkflowLandingRequest"]; + }; + }; + /** @description Request Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + }; + }; + get_landing_api_workflow_landings__uuid__get: { + parameters: { + query?: never; + header?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + "run-as"?: string | null; + }; + path: { + /** @description The UUID used to identify a persisted landing request. */ + uuid: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["WorkflowLandingRequest"]; + }; + }; + /** @description Request Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + }; + }; + claim_landing_api_workflow_landings__uuid__claim_post: { + parameters: { + query?: never; + header?: { + /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */ + "run-as"?: string | null; + }; + path: { + /** @description The UUID used to identify a persisted landing request. */ + uuid: string; + }; + cookie?: never; + }; + requestBody: { + content: { + "application/json": components["schemas"]["ClaimLandingPayload"] | null; + }; + }; + responses: { + /** @description Successful Response */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["WorkflowLandingRequest"]; + }; + }; + /** @description Request Error */ + "4XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + /** @description Server Error */ + "5XX": { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["MessageExceptionModel"]; + }; + }; + }; + }; index_api_workflows_get: { parameters: { query?: { From 93f2af6a444ed8db11898f5a3286abb1ecefb9c4 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 24 Sep 2024 13:52:51 -0400 Subject: [PATCH 11/14] Allow dataset and source checksum validation during materialization. --- lib/galaxy/managers/hdas.py | 4 +- lib/galaxy/model/__init__.py | 24 ++++++- lib/galaxy/model/deferred.py | 70 +++++++++++++++---- .../model/unittest_utils/store_fixtures.py | 2 +- lib/galaxy/schema/schema.py | 10 ++- lib/galaxy/schema/tasks.py | 5 ++ lib/galaxy/tools/data_fetch.py | 8 +-- lib/galaxy/util/hash_util.py | 8 +++ .../webapps/galaxy/api/history_contents.py | 6 ++ .../galaxy/services/history_contents.py | 1 + lib/galaxy_test/base/populators.py | 6 +- ...test_materialize_dataset_instance_tasks.py | 20 ++++++ .../unit/data/test_dataset_materialization.py | 63 +++++++++++++++++ 13 files changed, 202 insertions(+), 25 deletions(-) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 2b6d20388398..9f848c412618 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -190,7 +190,9 @@ def materialize(self, request: MaterializeDatasetInstanceTaskRequest, in_place: else: dataset_instance = self.ldda_manager.get_accessible(request.content, user) history = self.app.history_manager.by_id(request.history_id) - new_hda = materializer.ensure_materialized(dataset_instance, target_history=history, in_place=in_place) + new_hda = materializer.ensure_materialized( + dataset_instance, target_history=history, validate_hashes=request.validate_hashes, in_place=in_place + ) if not in_place: history.add_dataset(new_hda, set_hid=True) session = self.session() diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 13a183212562..9b59c616e7ac 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -214,6 +214,7 @@ WorkflowMappingField, ) from galaxy.util.hash_util import ( + HashFunctionNameEnum, md5_hash_str, new_insecure_hash, ) @@ -4493,7 +4494,17 @@ def copy(self) -> "DatasetSource": return new_source -class DatasetSourceHash(Base, Serializable): +class HasHashFunctionName: + hash_function: Mapped[Optional[str]] + + @property + def hash_func_name(self) -> HashFunctionNameEnum: + as_str = self.hash_function + assert as_str + return HashFunctionNameEnum(self.hash_function) + + +class DatasetSourceHash(Base, Serializable, HasHashFunctionName): __tablename__ = "dataset_source_hash" id: Mapped[int] = mapped_column(primary_key=True) @@ -4518,7 +4529,7 @@ def copy(self) -> "DatasetSourceHash": return new_hash -class DatasetHash(Base, Dictifiable, Serializable): +class DatasetHash(Base, Dictifiable, Serializable, HasHashFunctionName): __tablename__ = "dataset_hash" id: Mapped[int] = mapped_column(primary_key=True) @@ -4547,6 +4558,15 @@ def copy(self) -> "DatasetHash": new_hash.extra_files_path = self.extra_files_path return new_hash + @property + def hash_func_name(self) -> HashFunctionNameEnum: + as_str = self.hash_function + assert as_str + return HashFunctionNameEnum(self.hash_function) + + +DescribesHash = Union[DatasetSourceHash, DatasetHash] + def datatype_for_extension(extension, datatypes_registry=None) -> "Data": if extension is not None: diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py index 32ee9f954dda..f9b5f1414d3c 100644 --- a/lib/galaxy/model/deferred.py +++ b/lib/galaxy/model/deferred.py @@ -4,6 +4,7 @@ import shutil from typing import ( cast, + List, NamedTuple, Optional, Union, @@ -25,7 +26,9 @@ Dataset, DatasetCollection, DatasetCollectionElement, + DatasetHash, DatasetSource, + DescribesHash, History, HistoryDatasetAssociation, HistoryDatasetCollectionAssociation, @@ -36,6 +39,7 @@ ObjectStore, ObjectStorePopulator, ) +from galaxy.util.hash_util import verify_hash log = logging.getLogger(__name__) @@ -95,6 +99,7 @@ def ensure_materialized( self, dataset_instance: Union[HistoryDatasetAssociation, LibraryDatasetDatasetAssociation], target_history: Optional[History] = None, + validate_hashes: bool = False, in_place: bool = False, ) -> HistoryDatasetAssociation: """Create a new detached dataset instance from the supplied instance. @@ -107,15 +112,21 @@ def ensure_materialized( if dataset.state != Dataset.states.DEFERRED and isinstance(dataset_instance, HistoryDatasetAssociation): return dataset_instance - materialized_dataset = Dataset() - materialized_dataset.state = Dataset.states.OK - materialized_dataset.deleted = False - materialized_dataset.purged = False - materialized_dataset.sources = [s.copy() for s in dataset.sources] - materialized_dataset.hashes = [h.copy() for h in dataset.hashes] - + materialized_dataset_hashes = [h.copy() for h in dataset.hashes] + if in_place: + materialized_dataset = dataset_instance.dataset + materialized_dataset.state = Dataset.states.OK + else: + materialized_dataset = Dataset() + materialized_dataset.state = Dataset.states.OK + materialized_dataset.deleted = False + materialized_dataset.purged = False + materialized_dataset.sources = [s.copy() for s in dataset.sources] + materialized_dataset.hashes = materialized_dataset_hashes target_source = self._find_closest_dataset_source(dataset) transient_paths = None + + exception_materializing: Optional[Exception] = None if attached: object_store_populator = self._object_store_populator assert object_store_populator @@ -131,17 +142,28 @@ def ensure_materialized( with transaction(sa_session): sa_session.commit() object_store_populator.set_dataset_object_store_id(materialized_dataset) - path = self._stream_source(target_source, datatype=dataset_instance.datatype) - object_store.update_from_file(materialized_dataset, file_name=path) + try: + path = self._stream_source( + target_source, dataset_instance.datatype, validate_hashes, materialized_dataset_hashes + ) + object_store.update_from_file(materialized_dataset, file_name=path) + materialized_dataset.set_size() + except Exception as e: + exception_materializing = e else: transient_path_mapper = self._transient_path_mapper assert transient_path_mapper transient_paths = transient_path_mapper.transient_paths_for(dataset) # TODO: optimize this by streaming right to this path... # TODO: take into acount transform and ensure we are and are not modifying the file as appropriate. - path = self._stream_source(target_source, datatype=dataset_instance.datatype) - shutil.move(path, transient_paths.external_filename) - materialized_dataset.external_filename = transient_paths.external_filename + try: + path = self._stream_source( + target_source, dataset_instance.datatype, validate_hashes, materialized_dataset_hashes + ) + shutil.move(path, transient_paths.external_filename) + materialized_dataset.external_filename = transient_paths.external_filename + except Exception as e: + exception_materializing = e history = target_history if history is None and isinstance(dataset_instance, HistoryDatasetAssociation): @@ -159,6 +181,11 @@ def ensure_materialized( else: assert isinstance(dataset_instance, HistoryDatasetAssociation) materialized_dataset_instance = cast(HistoryDatasetAssociation, dataset_instance) + if exception_materializing is not None: + materialized_dataset.state = Dataset.states.ERROR + materialized_dataset_instance.info = ( + f"Failed to materialize deferred dataset with exception: {exception_materializing}" + ) if attached: sa_session = self._sa_session if sa_session is None: @@ -184,11 +211,17 @@ def ensure_materialized( materialized_dataset_instance.metadata_deferred = False return materialized_dataset_instance - def _stream_source(self, target_source: DatasetSource, datatype) -> str: + def _stream_source( + self, target_source: DatasetSource, datatype, validate_hashes: bool, dataset_hashes: List[DatasetHash] + ) -> str: source_uri = target_source.source_uri if source_uri is None: raise Exception("Cannot stream from dataset source without specified source_uri") path = stream_url_to_file(source_uri, file_sources=self._file_sources) + if validate_hashes and target_source.hashes: + for source_hash in target_source.hashes: + _validate_hash(path, source_hash, "downloaded file") + transform = target_source.transform or [] to_posix_lines = False spaces_to_tabs = False @@ -210,6 +243,11 @@ def _stream_source(self, target_source: DatasetSource, datatype) -> str: path = convert_result.converted_path if datatype_groom: datatype.groom_dataset_content(path) + + if validate_hashes and dataset_hashes: + for dataset_hash in dataset_hashes: + _validate_hash(path, dataset_hash, "dataset contents") + return path def _find_closest_dataset_source(self, dataset: Dataset) -> DatasetSource: @@ -306,3 +344,9 @@ def materializer_factory( file_sources=file_sources, sa_session=sa_session, ) + + +def _validate_hash(path: str, describes_hash: DescribesHash, what: str) -> None: + hash_value = describes_hash.hash_value + if hash_value is not None: + verify_hash(path, hash_func_name=describes_hash.hash_func_name, hash_value=hash_value) diff --git a/lib/galaxy/model/unittest_utils/store_fixtures.py b/lib/galaxy/model/unittest_utils/store_fixtures.py index d779e79d698e..c05f0660fcf1 100644 --- a/lib/galaxy/model/unittest_utils/store_fixtures.py +++ b/lib/galaxy/model/unittest_utils/store_fixtures.py @@ -11,7 +11,7 @@ TEST_SOURCE_URI = "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/2.bed" TEST_SOURCE_URI_BAM = "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bam" TEST_HASH_FUNCTION = "MD5" -TEST_HASH_VALUE = "moocowpretendthisisahas" +TEST_HASH_VALUE = "f568c29421792b1b1df4474dafae01f1" TEST_HISTORY_NAME = "My history in a model store" TEST_EXTENSION = "bed" TEST_LIBRARY_NAME = "My cool library" diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 9ed9c4ba3444..a2cb9ef72272 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -3681,7 +3681,15 @@ class PageSummaryBase(Model): ) -class MaterializeDatasetInstanceAPIRequest(Model): +class MaterializeDatasetOptions(Model): + validate_hashes: bool = Field( + False, + title="Validate hashes", + description="Set to true to enable dataset validation during materialization.", + ) + + +class MaterializeDatasetInstanceAPIRequest(MaterializeDatasetOptions): source: DatasetSourceType = Field( title="Source", description="The source of the content. Can be other history element to be copied or library elements.", diff --git a/lib/galaxy/schema/tasks.py b/lib/galaxy/schema/tasks.py index 313ec9f1dad0..d83640f52ffa 100644 --- a/lib/galaxy/schema/tasks.py +++ b/lib/galaxy/schema/tasks.py @@ -108,6 +108,11 @@ class MaterializeDatasetInstanceTaskRequest(Model): "- The decoded id of the HDA\n" ), ) + validate_hashes: bool = Field( + False, + title="Validate hashes", + description="Set to true to enable dataset validation during materialization.", + ) class ComputeDatasetHashTaskRequest(Model): diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index 540baacc2d91..fe1419aa1434 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -34,7 +34,7 @@ from galaxy.util.compression_utils import CompressedFile from galaxy.util.hash_util import ( HASH_NAMES, - memory_bound_hexdigest, + verify_hash, ) DESCRIPTION = """Data Import Script""" @@ -515,11 +515,7 @@ def _has_src_to_path(upload_config, item, is_dataset=False) -> Tuple[str, str]: def _handle_hash_validation(upload_config, hash_function, hash_value, path): if upload_config.validate_hashes: - calculated_hash_value = memory_bound_hexdigest(hash_func_name=hash_function, path=path) - if calculated_hash_value != hash_value: - raise Exception( - f"Failed to validate upload with [{hash_function}] - expected [{hash_value}] got [{calculated_hash_value}]" - ) + verify_hash(path, hash_func_name=hash_function, hash_value=hash_value, what="upload") def _arg_parser(): diff --git a/lib/galaxy/util/hash_util.py b/lib/galaxy/util/hash_util.py index 100adc23bcc4..7c685d2fd9f1 100644 --- a/lib/galaxy/util/hash_util.py +++ b/lib/galaxy/util/hash_util.py @@ -153,6 +153,14 @@ def parse_checksum_hash(checksum: str) -> Tuple[HashFunctionNameEnum, str]: return HashFunctionNameEnum(hash_name), hash_value +def verify_hash(path: str, hash_func_name: HashFunctionNameEnum, hash_value: str, what: str = "path"): + calculated_hash_value = memory_bound_hexdigest(hash_func_name=hash_func_name, path=path) + if calculated_hash_value != hash_value: + raise Exception( + f"Failed to validate {what} with [{hash_func_name}] - expected [{hash_value}] got [{calculated_hash_value}]" + ) + + __all__ = ( "md5", "hashlib", diff --git a/lib/galaxy/webapps/galaxy/api/history_contents.py b/lib/galaxy/webapps/galaxy/api/history_contents.py index 2386807211ba..f5653a6f7dae 100644 --- a/lib/galaxy/webapps/galaxy/api/history_contents.py +++ b/lib/galaxy/webapps/galaxy/api/history_contents.py @@ -54,6 +54,7 @@ HistoryContentType, MaterializeDatasetInstanceAPIRequest, MaterializeDatasetInstanceRequest, + MaterializeDatasetOptions, StoreExportPayload, UpdateHistoryContentsBatchPayload, UpdateHistoryContentsPayload, @@ -1072,12 +1073,17 @@ def materialize_dataset( history_id: HistoryIDPathParam, id: HistoryItemIDPathParam, trans: ProvidesHistoryContext = DependsOnTrans, + materialize_api_payload: Optional[MaterializeDatasetOptions] = Body(None), ) -> AsyncTaskResultSummary: + validate_hashes: bool = ( + materialize_api_payload.validate_hashes if materialize_api_payload is not None else False + ) # values are already validated, use model_construct materialize_request = MaterializeDatasetInstanceRequest.model_construct( history_id=history_id, source=DatasetSourceType.hda, content=id, + validate_hashes=validate_hashes, ) rval = self.service.materialize(trans, materialize_request) return rval diff --git a/lib/galaxy/webapps/galaxy/services/history_contents.py b/lib/galaxy/webapps/galaxy/services/history_contents.py index b1edebd3e897..42913b7f2b42 100644 --- a/lib/galaxy/webapps/galaxy/services/history_contents.py +++ b/lib/galaxy/webapps/galaxy/services/history_contents.py @@ -573,6 +573,7 @@ def materialize( history_id=request.history_id, source=request.source, content=request.content, + validate_hashes=request.validate_hashes, user=trans.async_request_user, ) results = materialize_task.delay(request=task_request) diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 5636171cb46a..c33b9b6f7b04 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -981,7 +981,9 @@ def tools_post(self, payload: dict, url="tools") -> Response: tool_response = self._post(url, data=payload) return tool_response - def materialize_dataset_instance(self, history_id: str, id: str, source: str = "hda"): + def materialize_dataset_instance( + self, history_id: str, id: str, source: str = "hda", validate_hashes: bool = False + ): payload: Dict[str, Any] if source == "ldda": url = f"histories/{history_id}/materialize" @@ -992,6 +994,8 @@ def materialize_dataset_instance(self, history_id: str, id: str, source: str = " else: url = f"histories/{history_id}/contents/datasets/{id}/materialize" payload = {} + if validate_hashes: + payload["validate_hashes"] = True create_response = self._post(url, payload, json=True) api_asserts.assert_status_code_is_ok(create_response) create_response_json = create_response.json() diff --git a/test/integration/test_materialize_dataset_instance_tasks.py b/test/integration/test_materialize_dataset_instance_tasks.py index 7bd606cd390a..5d3d209e7221 100644 --- a/test/integration/test_materialize_dataset_instance_tasks.py +++ b/test/integration/test_materialize_dataset_instance_tasks.py @@ -78,6 +78,26 @@ def test_materialize_gxfiles_uri(self, history_id: str): assert new_hda_details["state"] == "ok" assert not new_hda_details["deleted"] + @pytest.mark.require_new_history + def test_materialize_hash_failure(self, history_id: str): + store_dict = deferred_hda_model_store_dict(source_uri="gxfiles://testdatafiles/2.bed") + store_dict["datasets"][0]["file_metadata"]["hashes"][0]["hash_value"] = "invalidhash" + as_list = self.dataset_populator.create_contents_from_store(history_id, store_dict=store_dict) + assert len(as_list) == 1 + deferred_hda = as_list[0] + assert deferred_hda["model_class"] == "HistoryDatasetAssociation" + assert deferred_hda["state"] == "deferred" + assert not deferred_hda["deleted"] + + self.dataset_populator.materialize_dataset_instance(history_id, deferred_hda["id"], validate_hashes=True) + self.dataset_populator.wait_on_history_length(history_id, 2) + new_hda_details = self.dataset_populator.get_history_dataset_details( + history_id, hid=2, assert_ok=False, wait=False + ) + assert new_hda_details["model_class"] == "HistoryDatasetAssociation" + assert new_hda_details["state"] == "error" + assert not new_hda_details["deleted"] + @pytest.mark.require_new_history def test_materialize_history_dataset_bam(self, history_id: str): as_list = self.dataset_populator.create_contents_from_store( diff --git a/test/unit/data/test_dataset_materialization.py b/test/unit/data/test_dataset_materialization.py index 9015b107539f..e002b439726d 100644 --- a/test/unit/data/test_dataset_materialization.py +++ b/test/unit/data/test_dataset_materialization.py @@ -62,6 +62,69 @@ def test_deferred_hdas_basic_attached(): _assert_2_bed_metadata(materialized_hda) +def test_hash_validate(): + fixture_context = setup_fixture_context_with_history() + store_dict = deferred_hda_model_store_dict() + perform_import_from_store_dict(fixture_context, store_dict) + deferred_hda = fixture_context.history.datasets[0] + assert deferred_hda + _assert_2_bed_metadata(deferred_hda) + assert deferred_hda.dataset.state == "deferred" + materializer = materializer_factory(True, object_store=fixture_context.app.object_store) + materialized_hda = materializer.ensure_materialized(deferred_hda, validate_hashes=True) + materialized_dataset = materialized_hda.dataset + assert materialized_dataset.state == "ok" + + +def test_hash_invalid(): + fixture_context = setup_fixture_context_with_history() + store_dict = deferred_hda_model_store_dict() + store_dict["datasets"][0]["file_metadata"]["hashes"][0]["hash_value"] = "invalidhash" + perform_import_from_store_dict(fixture_context, store_dict) + deferred_hda = fixture_context.history.datasets[0] + assert deferred_hda + _assert_2_bed_metadata(deferred_hda) + assert deferred_hda.dataset.state == "deferred" + materializer = materializer_factory(True, object_store=fixture_context.app.object_store) + materialized_hda = materializer.ensure_materialized(deferred_hda, validate_hashes=True) + materialized_dataset = materialized_hda.dataset + assert materialized_dataset.state == "error" + + +def test_hash_validate_source_of_download(): + fixture_context = setup_fixture_context_with_history() + store_dict = deferred_hda_model_store_dict() + store_dict["datasets"][0]["file_metadata"]["sources"][0]["hashes"] = [ + {"model_class": "DatasetSourceHash", "hash_function": "MD5", "hash_value": "f568c29421792b1b1df4474dafae01f1"} + ] + perform_import_from_store_dict(fixture_context, store_dict) + deferred_hda = fixture_context.history.datasets[0] + assert deferred_hda + _assert_2_bed_metadata(deferred_hda) + assert deferred_hda.dataset.state == "deferred" + materializer = materializer_factory(True, object_store=fixture_context.app.object_store) + materialized_hda = materializer.ensure_materialized(deferred_hda, validate_hashes=True) + materialized_dataset = materialized_hda.dataset + assert materialized_dataset.state == "ok", materialized_hda.info + + +def test_hash_invalid_source_of_download(): + fixture_context = setup_fixture_context_with_history() + store_dict = deferred_hda_model_store_dict() + store_dict["datasets"][0]["file_metadata"]["sources"][0]["hashes"] = [ + {"model_class": "DatasetSourceHash", "hash_function": "MD5", "hash_value": "invalidhash"} + ] + perform_import_from_store_dict(fixture_context, store_dict) + deferred_hda = fixture_context.history.datasets[0] + assert deferred_hda + _assert_2_bed_metadata(deferred_hda) + assert deferred_hda.dataset.state == "deferred" + materializer = materializer_factory(True, object_store=fixture_context.app.object_store) + materialized_hda = materializer.ensure_materialized(deferred_hda, validate_hashes=True) + materialized_dataset = materialized_hda.dataset + assert materialized_dataset.state == "error", materialized_hda.info + + def test_deferred_hdas_basic_attached_store_by_uuid(): # skip a flush here so this is a different path... fixture_context = setup_fixture_context_with_history(store_by="uuid") From edb1cf8d6eb863cf02244b536e0f0631f7f54a17 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 24 Sep 2024 15:32:17 -0400 Subject: [PATCH 12/14] Update schema for validating hashes arguments. --- client/src/api/schema/schema.ts | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index c4ac46076c00..19954176a24f 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -13415,6 +13415,21 @@ export interface components { * @description The source of the content. Can be other history element to be copied or library elements. */ source: components["schemas"]["DatasetSourceType"]; + /** + * Validate hashes + * @description Set to true to enable dataset validation during materialization. + * @default false + */ + validate_hashes: boolean; + }; + /** MaterializeDatasetOptions */ + MaterializeDatasetOptions: { + /** + * Validate hashes + * @description Set to true to enable dataset validation during materialization. + * @default false + */ + validate_hashes: boolean; }; /** MessageExceptionModel */ MessageExceptionModel: { @@ -23481,7 +23496,11 @@ export interface operations { }; cookie?: never; }; - requestBody?: never; + requestBody?: { + content: { + "application/json": components["schemas"]["MaterializeDatasetOptions"] | null; + }; + }; responses: { /** @description Successful Response */ 200: { From 3bf8a809abbab7d658e67b171f6a339a20d35fc5 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Mon, 30 Sep 2024 12:47:12 -0400 Subject: [PATCH 13/14] PR comments. --- lib/galaxy/workflow/scheduling_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/workflow/scheduling_manager.py b/lib/galaxy/workflow/scheduling_manager.py index 01f7a17ff0e6..be052d7a0803 100644 --- a/lib/galaxy/workflow/scheduling_manager.py +++ b/lib/galaxy/workflow/scheduling_manager.py @@ -352,7 +352,7 @@ def __attempt_materialize(self, workflow_invocation, session) -> bool: session.commit() return True except Exception as e: - log.info(f"Failed to materialize dataset for workflow {workflow_invocation.id} - {e}") + log.exception(f"Failed to materialize dataset for workflow {workflow_invocation.id} - {e}") workflow_invocation.fail() session.add(workflow_invocation) session.commit() @@ -363,7 +363,7 @@ def __attempt_schedule(self, invocation_id, workflow_scheduler): workflow_invocation = session.get(model.WorkflowInvocation, invocation_id) if workflow_invocation.state == workflow_invocation.states.REQUIRES_MATERIALIZATION: if not self.__attempt_materialize(workflow_invocation, session): - return + return None if self.app.config.workflow_scheduling_separate_materialization_iteration: return None try: From f80207d2954050d94d1f02d799dcef1779f4c560 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Mon, 30 Sep 2024 13:24:35 -0400 Subject: [PATCH 14/14] Revise workflow failure around materializing URLs. --- lib/galaxy/managers/hdas.py | 5 +++- lib/galaxy/model/__init__.py | 17 ++++++++++--- lib/galaxy/workflow/scheduling_manager.py | 29 +++++++++++++++++++---- lib/galaxy_test/api/test_workflows.py | 20 ++++++++-------- 4 files changed, 53 insertions(+), 18 deletions(-) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 9f848c412618..faef0b0c1ac3 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -176,7 +176,7 @@ def create( session.commit() return hda - def materialize(self, request: MaterializeDatasetInstanceTaskRequest, in_place: bool = False) -> None: + def materialize(self, request: MaterializeDatasetInstanceTaskRequest, in_place: bool = False) -> bool: request_user: RequestUser = request.user materializer = materializer_factory( True, # attached... @@ -195,9 +195,12 @@ def materialize(self, request: MaterializeDatasetInstanceTaskRequest, in_place: ) if not in_place: history.add_dataset(new_hda, set_hid=True) + else: + new_hda.set_total_size() session = self.session() with transaction(session): session.commit() + return new_hda.is_ok def copy( self, item: Any, history=None, hide_copy: bool = False, flush: bool = True, **kwargs: Any diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 9b59c616e7ac..9967c08fe667 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -8573,6 +8573,12 @@ class InputWithRequest: request: Dict[str, Any] +@dataclass +class InputToMaterialize: + hda: "HistoryDatasetAssociation" + input_dataset: "WorkflowRequestToInputDatasetAssociation" + + class WorkflowInvocation(Base, UsesCreateAndUpdateTime, Dictifiable, Serializable): __tablename__ = "workflow_invocation" @@ -8896,14 +8902,19 @@ def input_associations(self): inputs.append(input_dataset_collection_assoc) return inputs - def inputs_requiring_materialization(self): - hdas_to_materialize = [] + def inputs_requiring_materialization(self) -> List[InputToMaterialize]: + hdas_to_materialize: List[InputToMaterialize] = [] for input_dataset_assoc in self.input_datasets: request = input_dataset_assoc.request if request: deferred = request.get("deferred", False) if not deferred: - hdas_to_materialize.append(input_dataset_assoc.dataset) + hdas_to_materialize.append( + InputToMaterialize( + input_dataset_assoc.dataset, + input_dataset_assoc, + ) + ) return hdas_to_materialize def _serialize(self, id_encoder, serialization_options): diff --git a/lib/galaxy/workflow/scheduling_manager.py b/lib/galaxy/workflow/scheduling_manager.py index be052d7a0803..918ca431a0dd 100644 --- a/lib/galaxy/workflow/scheduling_manager.py +++ b/lib/galaxy/workflow/scheduling_manager.py @@ -7,7 +7,12 @@ from galaxy.exceptions import HandlerAssignmentError from galaxy.jobs.handler import InvocationGrabber from galaxy.model.base import transaction -from galaxy.schema.invocation import InvocationState +from galaxy.schema.invocation import ( + FailureReason, + InvocationFailureDatasetFailed, + InvocationState, + InvocationUnexpectedFailure, +) from galaxy.schema.tasks import ( MaterializeDatasetInstanceTaskRequest, RequestUser, @@ -335,8 +340,9 @@ def __schedule(self, workflow_scheduler_id, workflow_scheduler): def __attempt_materialize(self, workflow_invocation, session) -> bool: try: - hdas_to_materialize = workflow_invocation.inputs_requiring_materialization() - for hda in hdas_to_materialize: + inputs_to_materialize = workflow_invocation.inputs_requiring_materialization() + for input_to_materialize in inputs_to_materialize: + hda = input_to_materialize.hda user = RequestUser(user_id=workflow_invocation.history.user_id) task_request = MaterializeDatasetInstanceTaskRequest( user=user, @@ -345,7 +351,20 @@ def __attempt_materialize(self, workflow_invocation, session) -> bool: content=hda.id, validate_hashes=True, ) - self.app.hda_manager.materialize(task_request, in_place=True) + materialized_okay = self.app.hda_manager.materialize(task_request, in_place=True) + if not materialized_okay: + workflow_invocation.fail() + workflow_invocation.add_message( + InvocationFailureDatasetFailed( + workflow_step_id=input_to_materialize.input_dataset.workflow_step.id, + reason=FailureReason.dataset_failed, + hda_id=hda.id, + ) + ) + session.add(workflow_invocation) + session.commit() + return False + # place back into ready and let it proceed normally on next iteration? workflow_invocation.set_state(model.WorkflowInvocation.states.READY) session.add(workflow_invocation) @@ -354,6 +373,8 @@ def __attempt_materialize(self, workflow_invocation, session) -> bool: except Exception as e: log.exception(f"Failed to materialize dataset for workflow {workflow_invocation.id} - {e}") workflow_invocation.fail() + failure = InvocationUnexpectedFailure(reason=FailureReason.unexpected_failure, details=str(e)) + workflow_invocation.add_message(failure) session.add(workflow_invocation) session.commit() return False diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 953aac0ba9e9..8408837df347 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -1624,11 +1624,11 @@ def test_run_workflow_with_invalid_url_hashes(self): invocation_id = self.workflow_populator.invoke_workflow_and_wait( workflow_id, request=workflow_request, assert_ok=False ).json()["id"] - invocation = self._invocation_details(workflow_id, invocation_id) - assert invocation["state"] == "scheduled", invocation - invocation_jobs = self.workflow_populator.get_invocation_jobs(invocation_id) - for job in invocation_jobs: - assert job["state"] == "paused" + invocation_details = self._invocation_details(workflow_id, invocation_id) + assert invocation_details["state"] == "failed" + assert len(invocation_details["messages"]) == 1 + message = invocation_details["messages"][0] + assert message["reason"] == "dataset_failed" @skip_without_tool("cat1") def test_run_workflow_with_invalid_url(self): @@ -1658,11 +1658,11 @@ def test_run_workflow_with_invalid_url(self): invocation_id = self.workflow_populator.invoke_workflow_and_wait( workflow_id, request=workflow_request, assert_ok=False ).json()["id"] - invocation = self._invocation_details(workflow_id, invocation_id) - assert invocation["state"] == "scheduled", invocation - invocation_jobs = self.workflow_populator.get_invocation_jobs(invocation_id) - for job in invocation_jobs: - assert job["state"] == "paused" + invocation_details = self._invocation_details(workflow_id, invocation_id) + assert invocation_details["state"] == "failed" + assert len(invocation_details["messages"]) == 1 + message = invocation_details["messages"][0] + assert message["reason"] == "dataset_failed" def __run_cat_workflow(self, inputs_by, history_id: Optional[str] = None): workflow = self.workflow_populator.load_workflow(name="test_for_run")