From cca074b9e0e979b81890f94260dd0779ce98d3f1 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 12 Sep 2024 09:09:01 -0400 Subject: [PATCH] Fixes and tests for data fetch models. --- lib/galaxy/schema/fetch_data.py | 9 +- lib/galaxy_test/api/test_tools_upload.py | 61 +++++++++++ test/unit/app/tools/test_data_fetch.py | 132 +++++++++++++++++++++++ 3 files changed, 201 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/schema/fetch_data.py b/lib/galaxy/schema/fetch_data.py index 2603b1471ea5..bd0408d51c37 100644 --- a/lib/galaxy/schema/fetch_data.py +++ b/lib/galaxy/schema/fetch_data.py @@ -101,6 +101,13 @@ class ExtraFiles(FetchBaseModel): ) +class DatasetHash(Model): + hash_function: Literal["MD5", "SHA-1", "SHA-256", "SHA-512"] + hash_value: str + + model_config = ConfigDict(extra="forbid") + + class BaseDataElement(FetchBaseModel): name: Optional[CoercedStringType] = None dbkey: str = Field("?") @@ -115,7 +122,7 @@ class BaseDataElement(FetchBaseModel): auto_decompress: bool = AutoDecompressField items_from: Optional[ElementsFromType] = Field(None, alias="elements_from") collection_type: Optional[str] = None - MD5: Optional[str] = None + hashes: Optional[List[DatasetHash]] = None description: Optional[str] = None model_config = ConfigDict(extra="forbid") diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py index 64d7e97365b9..ed32bc92caf0 100644 --- a/lib/galaxy_test/api/test_tools_upload.py +++ b/lib/galaxy_test/api/test_tools_upload.py @@ -1,6 +1,7 @@ import json import os import urllib.parse +from base64 import b64encode import pytest from tusclient import client @@ -25,6 +26,9 @@ ) from ._framework import ApiTestCase +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" + class TestToolsUpload(ApiTestCase): dataset_populator: DatasetPopulator @@ -927,6 +931,63 @@ def test_upload_and_validate_valid(self): terminal_validated_state = self.dataset_populator.validate_dataset_and_wait(history_id, dataset_id) assert terminal_validated_state == "ok", terminal_validated_state + def test_upload_and_validate_hash_valid(self): + with self.dataset_populator.test_history() as history_id: + destination = {"type": "hdas"} + targets = [ + { + "destination": destination, + "items": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + {"hash_function": "SHA-1", "hash_value": "65e9d53484d28eef5447bc06fe2d754d1090975a"} + ], + }, + ], + } + ] + payload = { + "history_id": history_id, + "targets": targets, + "validate_hashes": True, + } + fetch_response = self.dataset_populator.fetch(payload) + self._assert_status_code_is(fetch_response, 200) + # history ok implies the dataset upload work + self.dataset_populator.wait_for_history(history_id, assert_ok=True) + + def test_upload_and_validate_hash_invalid(self): + with self.dataset_populator.test_history() as history_id: + destination = {"type": "hdas"} + targets = [ + { + "destination": destination, + "items": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [{"hash_function": "SHA-1", "hash_value": "invalidhash"}], + }, + ], + } + ] + payload = { + "history_id": history_id, + "targets": targets, + "validate_hashes": True, + } + fetch_response = self.dataset_populator.fetch(payload, assert_ok=True, wait=False) + self._assert_status_code_is(fetch_response, 200) + outputs = fetch_response.json()["outputs"] + new_dataset = outputs[0] + self.dataset_populator.wait_for_history(history_id, assert_ok=False) + dataset_details = self.dataset_populator.get_history_dataset_details( + history_id, dataset=new_dataset, assert_ok=False + ) + assert dataset_details["state"] == "error" + def _velvet_upload(self, history_id, extra_inputs): payload = self.dataset_populator.upload_payload( history_id, diff --git a/test/unit/app/tools/test_data_fetch.py b/test/unit/app/tools/test_data_fetch.py index 58e13e5d1549..ee7cdd61536f 100644 --- a/test/unit/app/tools/test_data_fetch.py +++ b/test/unit/app/tools/test_data_fetch.py @@ -1,6 +1,7 @@ import json import os import tempfile +from base64 import b64encode from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp @@ -8,6 +9,9 @@ from galaxy.tools.data_fetch import main from galaxy.util.unittest_utils import skip_if_github_down +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" + def test_simple_path_get(): with _execute_context() as execute_context: @@ -55,6 +59,134 @@ def test_simple_uri_get(): assert hda_result["ext"] == "bed" +def test_correct_md5(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": "5ba48b6e5a7c4d4930fda256f411e55b", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert hda_result["state"] == "ok" + assert hda_result["ext"] == "txt" + + +def test_incorrect_md5(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": "thisisbad", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert ( + hda_result["error_message"] + == "Failed to validate upload with [MD5] - expected [thisisbad] got [5ba48b6e5a7c4d4930fda256f411e55b]" + ) + + +def test_correct_sha1(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "SHA-1", + "hash_value": "65e9d53484d28eef5447bc06fe2d754d1090975a", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert hda_result["state"] == "ok" + assert hda_result["ext"] == "txt" + + +def test_incorrect_sha1(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "SHA-1", + "hash_value": "thisisbad", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert ( + hda_result["error_message"] + == "Failed to validate upload with [SHA-1] - expected [thisisbad] got [65e9d53484d28eef5447bc06fe2d754d1090975a]" + ) + + @skip_if_github_down def test_deferred_uri_get(): with _execute_context() as execute_context: