From b97ede779c1ab72a1c714016dd58b705f001eb7b Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 5 Nov 2024 15:28:02 +0000 Subject: [PATCH 1/2] Add type annotations --- lib/galaxy/tools/data_fetch.py | 39 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index fe1419aa1434..01d43461f509 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -34,6 +34,7 @@ from galaxy.util.compression_utils import CompressedFile from galaxy.util.hash_util import ( HASH_NAMES, + HashFunctionNameEnum, verify_hash, ) @@ -85,7 +86,7 @@ def _request_to_galaxy_json(upload_config: "UploadConfig", request): return {"__unnamed_outputs": fetched_targets} -def _fetch_target(upload_config: "UploadConfig", target): +def _fetch_target(upload_config: "UploadConfig", target: Dict[str, Any]): destination = target.get("destination", None) assert destination, "No destination defined." @@ -254,14 +255,15 @@ def _resolve_item_with_primary(item): hash_value = item.get(hash_function) if hash_value: hashes.append({"hash_function": hash_function, "hash_value": hash_value}) - for hash_dict in hashes: - hash_function = hash_dict.get("hash_function") - hash_value = hash_dict.get("hash_value") - try: - _handle_hash_validation(upload_config, hash_function, hash_value, path) - except Exception as e: - error_message = str(e) - item["error_message"] = error_message + if path: + for hash_dict in hashes: + hash_function = hash_dict.get("hash_function") + hash_value = hash_dict.get("hash_value") + try: + _handle_hash_validation(upload_config, hash_function, hash_value, path) + except Exception as e: + error_message = str(e) + item["error_message"] = error_message dbkey = item.get("dbkey", "?") link_data_only = upload_config.link_data_only @@ -422,7 +424,7 @@ def _bagit_to_items(directory): return items -def _decompress_target(upload_config: "UploadConfig", target): +def _decompress_target(upload_config: "UploadConfig", target: Dict[str, Any]): elements_from_name, elements_from_path = _has_src_to_path(upload_config, target, is_dataset=False) # by default Galaxy will check for a directory with a single file and interpret that # as the new root for expansion, this is a good user experience for uploading single @@ -481,12 +483,13 @@ def _has_src_to_name(item) -> Optional[str]: return name -def _has_src_to_path(upload_config, item, is_dataset=False) -> Tuple[str, str]: +def _has_src_to_path(upload_config: "UploadConfig", item: Dict[str, Any], is_dataset: bool = False) -> Tuple[str, str]: assert "src" in item, item src = item.get("src") name = item.get("name") if src == "url": url = item.get("url") + assert url, "url cannot be empty" try: path = stream_url_to_file(url, file_sources=upload_config.file_sources, dir=upload_config.working_directory) except Exception as e: @@ -513,7 +516,9 @@ def _has_src_to_path(upload_config, item, is_dataset=False) -> Tuple[str, str]: return name, path -def _handle_hash_validation(upload_config, hash_function, hash_value, path): +def _handle_hash_validation( + upload_config: "UploadConfig", hash_function: HashFunctionNameEnum, hash_value: str, path: str +): if upload_config.validate_hashes: verify_hash(path, hash_func_name=hash_function, hash_value=hash_value, what="upload") @@ -548,11 +553,11 @@ def get_file_sources(working_directory, file_sources_as_dict=None): class UploadConfig: def __init__( self, - request, - registry, - working_directory, - allow_failed_collections, - file_sources_dict=None, + request: Dict[str, Any], + registry: Registry, + working_directory: str, + allow_failed_collections: bool, + file_sources_dict: Optional[Dict] = None, ): self.registry = registry self.working_directory = working_directory From 1f223d0dc501a5345f33a9cf8015f8e04fe36404 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Tue, 5 Nov 2024 15:33:40 +0000 Subject: [PATCH 2/2] Test hash validation also for upload by path --- test/unit/app/tools/test_data_fetch.py | 36 +++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/test/unit/app/tools/test_data_fetch.py b/test/unit/app/tools/test_data_fetch.py index ee7cdd61536f..8c1a64318de6 100644 --- a/test/unit/app/tools/test_data_fetch.py +++ b/test/unit/app/tools/test_data_fetch.py @@ -5,6 +5,9 @@ from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp +from typing import Optional + +import pytest from galaxy.tools.data_fetch import main from galaxy.util.unittest_utils import skip_if_github_down @@ -13,7 +16,17 @@ URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" -def test_simple_path_get(): +@pytest.mark.parametrize( + "hash_value, error_message", + [ + ("471ddd37fc297fba09b893b88739ece9", None), + ( + "thisisbad", + "Failed to validate upload with [MD5] - expected [thisisbad] got [471ddd37fc297fba09b893b88739ece9]", + ), + ], +) +def test_simple_path_get(hash_value: str, error_message: Optional[str]): with _execute_context() as execute_context: job_directory = execute_context.job_directory example_path = os.path.join(job_directory, "example_file") @@ -25,13 +38,30 @@ def test_simple_path_get(): "destination": { "type": "hdas", }, - "elements": [{"src": "path", "path": example_path}], + "elements": [ + { + "src": "path", + "path": example_path, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": hash_value, + } + ], + } + ], } - ] + ], + "validate_hashes": True, } execute_context.execute_request(request) output = _unnamed_output(execute_context) assert output + hda_result = output["elements"][0] + if error_message is not None: + assert hda_result["error_message"] == error_message + else: + assert "error_message" not in hda_result @skip_if_github_down