From 3e31b44af3d64b2a97f3ca6f7674bc82ce147399 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Tue, 29 Oct 2024 18:33:53 +0100 Subject: [PATCH 01/14] Allow deferred datasets to behave as URI --- lib/galaxy/model/__init__.py | 7 +++++++ lib/galaxy/tool_util/xsd/galaxy.xsd | 19 +++++++++++++++++++ lib/galaxy/tools/evaluation.py | 16 +++++++++++++++- lib/galaxy/tools/parameters/basic.py | 7 +++++++ lib/galaxy/tools/wrappers.py | 5 +++++ 5 files changed, 53 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index bcf956d2aae4..cc3252aadd76 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -4685,6 +4685,13 @@ def ext(self): def has_deferred_data(self): return self.dataset.state == Dataset.states.DEFERRED + @property + def deferred_source_uri(self): + if self.has_deferred_data: + # Assuming the first source is the deferred source + return self.sources[0].source_uri + return None + @property def state(self): # self._state holds state that should only affect this particular dataset association, not the dataset state itself diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index c8090c919568..c4dfe86e0da0 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -4314,6 +4314,25 @@ template if the parameter is ``false`` or not checked by the user. Used only when the ``type`` attribute value is ``boolean``. + + + +``` + +You can specify multiple prefixes separated by `|`. The URI will be passed to the tool +as is if it starts with any of the specified prefixes. + +This attribute is only valid for `data` parameters. + ]]> + + diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 28a4b27c4b0d..67d486627622 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -314,7 +314,8 @@ def _deferred_objects( deferred_objects: Dict[str, DeferrableObjectsT] = {} for key, value in input_datasets.items(): if value is not None and value.state == model.Dataset.states.DEFERRED: - deferred_objects[key] = value + if self._should_materialize_deferred_input(key, value): + deferred_objects[key] = value def find_deferred_collections(input, value, context, prefixed_name=None, **kwargs): if ( @@ -327,6 +328,19 @@ def find_deferred_collections(input, value, context, prefixed_name=None, **kwarg return deferred_objects + def _should_materialize_deferred_input(self, input_name: str, input_value: DeferrableObjectsT) -> bool: + """ + We can skip materializing some deferred datasets if the input can work with URIs that are prefixed + with a known prefix set in `allow_uri_if_prefixed`. + """ + deferred_input = self.tool.inputs[input_name] + if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance): + source_uri = input_value.sources[0].source_uri or "" + for prefix in deferred_input.allow_uri_if_prefixed: + if source_uri.startswith(prefix): + return False + return True + def __walk_inputs(self, inputs, input_values, func): def do_walk(inputs, input_values): """ diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 7ed9c657691a..bc41cdf0bd75 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -2068,6 +2068,7 @@ def __init__(self, tool, input_source, trans=None): self.tag = tag self.is_dynamic = True self._parse_options(input_source) + self._parse_allow_uri_if_prefixed(input_source) # Load conversions required for the dataset input self.conversions = [] self.default_object = input_source.parse_default() @@ -2089,6 +2090,12 @@ def __init__(self, tool, input_source, trans=None): ) self.conversions.append((name, conv_extension, [conv_type])) + def _parse_allow_uri_if_prefixed(self, input_source): + # In case of deferred datasets, if the source URI is prefixed with one of the values in this list, + # the dataset will behave as an URI and will not be materialized into a file path. + allow_uri_if_prefixed = input_source.get("allow_uri_if_prefixed", None) + self.allow_uri_if_prefixed = allow_uri_if_prefixed.split("|") if allow_uri_if_prefixed else [] + def from_json(self, value, trans, other_values=None): session = trans.sa_session diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py index 6dd47e7f9bb4..6ad72645d7bf 100644 --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -471,6 +471,11 @@ def is_of_type(self, *exts: str) -> bool: return self.dataset.datatype.matches_any(datatypes) def __str__(self) -> str: + return self._path_or_uri() + + def _path_or_uri(self) -> str: + if self.unsanitized.has_deferred_data: + return self.unsanitized.deferred_source_uri or "" if self.false_path is not None: return self.false_path else: From 154a39047836dcaec449b00c4f552253cc0e0b76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20L=C3=B3pez?= <46503462+davelopez@users.noreply.github.com> Date: Tue, 29 Oct 2024 19:01:16 +0100 Subject: [PATCH 02/14] Fix typo Co-authored-by: Nicola Soranzo --- lib/galaxy/tool_util/xsd/galaxy.xsd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index c4dfe86e0da0..8b9a185a4c17 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -4317,7 +4317,7 @@ when the ``type`` attribute value is ``boolean``. Date: Wed, 30 Oct 2024 10:10:17 +0100 Subject: [PATCH 03/14] Rename allow_uri_if_prefixed to allow_uri_if_protocol Co-authored-by: Nicola Soranzo --- lib/galaxy/tool_util/xsd/galaxy.xsd | 6 +++--- lib/galaxy/tools/evaluation.py | 4 ++-- lib/galaxy/tools/parameters/basic.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 8b9a185a4c17..3cfb3823b5da 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -4314,16 +4314,16 @@ template if the parameter is ``false`` or not checked by the user. Used only when the ``type`` attribute value is ``boolean``. - + + ``` You can specify multiple prefixes separated by `|`. The URI will be passed to the tool diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 67d486627622..144a2d1cfaa9 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -331,12 +331,12 @@ def find_deferred_collections(input, value, context, prefixed_name=None, **kwarg def _should_materialize_deferred_input(self, input_name: str, input_value: DeferrableObjectsT) -> bool: """ We can skip materializing some deferred datasets if the input can work with URIs that are prefixed - with a known prefix set in `allow_uri_if_prefixed`. + with a known prefix set in `allow_uri_if_protocol`. """ deferred_input = self.tool.inputs[input_name] if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance): source_uri = input_value.sources[0].source_uri or "" - for prefix in deferred_input.allow_uri_if_prefixed: + for prefix in deferred_input.allow_uri_if_protocol: if source_uri.startswith(prefix): return False return True diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index bc41cdf0bd75..ce9c6d086837 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -2068,7 +2068,7 @@ def __init__(self, tool, input_source, trans=None): self.tag = tag self.is_dynamic = True self._parse_options(input_source) - self._parse_allow_uri_if_prefixed(input_source) + self._parse_allow_uri_if_protocol(input_source) # Load conversions required for the dataset input self.conversions = [] self.default_object = input_source.parse_default() @@ -2090,11 +2090,11 @@ def __init__(self, tool, input_source, trans=None): ) self.conversions.append((name, conv_extension, [conv_type])) - def _parse_allow_uri_if_prefixed(self, input_source): + def _parse_allow_uri_if_protocol(self, input_source): # In case of deferred datasets, if the source URI is prefixed with one of the values in this list, # the dataset will behave as an URI and will not be materialized into a file path. - allow_uri_if_prefixed = input_source.get("allow_uri_if_prefixed", None) - self.allow_uri_if_prefixed = allow_uri_if_prefixed.split("|") if allow_uri_if_prefixed else [] + allow_uri_if_protocol = input_source.get("allow_uri_if_protocol", None) + self.allow_uri_if_protocol = allow_uri_if_protocol.split("|") if allow_uri_if_protocol else [] def from_json(self, value, trans, other_values=None): session = trans.sa_session From f1daa44d473b430311d2aeb670cf9a132b5d8d6c Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:16:50 +0100 Subject: [PATCH 04/14] Use commas to list protocols in allow_uri_if_protocol Co-authored-by: Bjoern Gruening --- lib/galaxy/tool_util/xsd/galaxy.xsd | 4 ++-- lib/galaxy/tools/parameters/basic.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 3cfb3823b5da..9de63fe28d73 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -4323,10 +4323,10 @@ can be used to avoid materialization and pass the URI as is to the tool. This is useful when the tool can handle the URI directly. Example: ```xml - + ``` -You can specify multiple prefixes separated by `|`. The URI will be passed to the tool +You can specify multiple prefixes separated by comma. The URI will be passed to the tool as is if it starts with any of the specified prefixes. This attribute is only valid for `data` parameters. diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index ce9c6d086837..d3bb7f119c58 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -2094,7 +2094,7 @@ def _parse_allow_uri_if_protocol(self, input_source): # In case of deferred datasets, if the source URI is prefixed with one of the values in this list, # the dataset will behave as an URI and will not be materialized into a file path. allow_uri_if_protocol = input_source.get("allow_uri_if_protocol", None) - self.allow_uri_if_protocol = allow_uri_if_protocol.split("|") if allow_uri_if_protocol else [] + self.allow_uri_if_protocol = allow_uri_if_protocol.split(",") if allow_uri_if_protocol else [] def from_json(self, value, trans, other_values=None): session = trans.sa_session From b3a70863fcb3850511757d2026d6d9fcb400410e Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:41:01 +0100 Subject: [PATCH 05/14] Add test for parsing allow_uri_if_protocol --- test/unit/app/tools/test_parameter_parsing.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/unit/app/tools/test_parameter_parsing.py b/test/unit/app/tools/test_parameter_parsing.py index 842e7df91844..e33e183fc962 100644 --- a/test/unit/app/tools/test_parameter_parsing.py +++ b/test/unit/app/tools/test_parameter_parsing.py @@ -452,3 +452,12 @@ def test_tool_collection(self): ) assert param.type == "data_collection" assert param.collection_types == ["list", "list:paired"] + + def test_data_allow_uri_if_protocol(self): + param = self._parameter_for( + xml=""" + + + """ + ) + assert param.allow_uri_if_protocol == ["https", "s3"] From 305dd2b181562b9a94dc0a514ce88f0cef53db5d Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:45:53 +0100 Subject: [PATCH 06/14] Add `has_deferred_data` flag to MockDataset and FakeDatasetAssociation --- lib/galaxy/model/__init__.py | 1 + test/unit/app/tools/test_wrappers.py | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index cc3252aadd76..7656a98fbed1 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -2737,6 +2737,7 @@ class FakeDatasetAssociation: def __init__(self, dataset: Optional["Dataset"] = None) -> None: self.dataset = dataset self.metadata: Dict = {} + self.has_deferred_data = False def get_file_name(self, sync_cache: bool = True) -> str: assert self.dataset diff --git a/test/unit/app/tools/test_wrappers.py b/test/unit/app/tools/test_wrappers.py index 746fef91f480..7fc20e8fa061 100644 --- a/test/unit/app/tools/test_wrappers.py +++ b/test/unit/app/tools/test_wrappers.py @@ -310,6 +310,7 @@ def __init__(self): self.extra_files_path = MOCK_DATASET_EXTRA_FILES_PATH self.ext = MOCK_DATASET_EXT self.tags = [] + self.has_deferred_data = False def get_file_name(self, sync_cache=True): return MOCK_DATASET_PATH From fd14e606e84c7e9cc0a6eecd91840369d6212dd0 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:11:16 +0100 Subject: [PATCH 07/14] Fix handling for missing deferred input in ToolEvaluator --- lib/galaxy/tools/evaluation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 144a2d1cfaa9..5b278b91d9e6 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -333,7 +333,11 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer We can skip materializing some deferred datasets if the input can work with URIs that are prefixed with a known prefix set in `allow_uri_if_protocol`. """ - deferred_input = self.tool.inputs[input_name] + deferred_input = self.tool.inputs.get(input_name) + if not deferred_input: + # TODO: Can this ever happen? It seems like it happens in the test suite. + # For example, in test_metadata_validator_on_deferred_input + return True if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance): source_uri = input_value.sources[0].source_uri or "" for prefix in deferred_input.allow_uri_if_protocol: From 678caa415f3167c8c6d6a2c3909058ba2a246479 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 30 Oct 2024 16:27:55 +0100 Subject: [PATCH 08/14] Add test for allow_uri_if_protocol with deferred input --- lib/galaxy_test/api/test_tools.py | 18 ++++++++++++++++++ .../parameters/gx_allow_uri_if_protocol.xml | 13 +++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 test/functional/tools/parameters/gx_allow_uri_if_protocol.xml diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 002b9d873389..41d610f7b29e 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -2573,6 +2573,24 @@ def test_metadata_validator_can_fail_on_deferred_input(self, history_id): job_details = self.dataset_populator.get_job_details(job_id=job_id).json() assert job_details["state"] == "failed" + @skip_without_tool("gx_allow_uri_if_protocol") + def test_allow_uri_if_protocol_on_deferred_input(self, history_id): + source_uri = "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/simple_line.txt" + deferred_hda = self.dataset_populator.create_deferred_hda(history_id, source_uri, ext="txt") + + inputs = {"input1": dataset_to_param(deferred_hda)} + # The tool just returns the URI (or file path if it was materialized) as the output content + run_response = self.dataset_populator.run_tool( + tool_id="gx_allow_uri_if_protocol", inputs=inputs, history_id=history_id + ) + output = run_response["outputs"][0] + output_details = self.dataset_populator.get_history_dataset_details( + history_id, dataset=output, wait=True, assert_ok=True + ) + assert output_details["state"] == "ok" + output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output) + assert output_content.strip() == source_uri.strip() + @skip_without_tool("cat1") def test_run_deferred_mapping(self, history_id: str): elements = [ diff --git a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml new file mode 100644 index 000000000000..ab554b470f70 --- /dev/null +++ b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml @@ -0,0 +1,13 @@ + + '$output' + ]]> + + + + + + + + + From 18fb0a785e03e0463c92b056500d5525b4f229a2 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:49:49 +0100 Subject: [PATCH 09/14] Enhance allow_uri_if_protocol to support wildcard '*' for deferred datasets --- lib/galaxy/tool_util/xsd/galaxy.xsd | 4 ++-- lib/galaxy/tools/evaluation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 9de63fe28d73..376ff0e8d1e5 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -4326,8 +4326,8 @@ This is useful when the tool can handle the URI directly. Example: ``` -You can specify multiple prefixes separated by comma. The URI will be passed to the tool -as is if it starts with any of the specified prefixes. +You can specify multiple prefixes separated by comma or use the wildcard '*' to always +treat deferred datasets as URIs. The source URI will be passed to the tool as is. This attribute is only valid for `data` parameters. ]]> diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 5b278b91d9e6..73dbeceaff39 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -341,7 +341,7 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance): source_uri = input_value.sources[0].source_uri or "" for prefix in deferred_input.allow_uri_if_protocol: - if source_uri.startswith(prefix): + if prefix == "*" or source_uri.startswith(prefix): return False return True From 72735ebf961b56cf5dea4a4dab7b77d0462d020c Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:57:47 +0100 Subject: [PATCH 10/14] Remove redundant check for missing deferred input in ToolEvaluator There might be cases with additional deferred inputs outside of the input definitions see test_metadata_validator_on_deferred_input. --- lib/galaxy/tools/evaluation.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 73dbeceaff39..8e9328c71408 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -334,10 +334,6 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer with a known prefix set in `allow_uri_if_protocol`. """ deferred_input = self.tool.inputs.get(input_name) - if not deferred_input: - # TODO: Can this ever happen? It seems like it happens in the test suite. - # For example, in test_metadata_validator_on_deferred_input - return True if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance): source_uri = input_value.sources[0].source_uri or "" for prefix in deferred_input.allow_uri_if_protocol: From 557e7bed8dcd9de8ed7aa2088a33eeeb34a4e2c4 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Sun, 3 Nov 2024 10:37:33 +0100 Subject: [PATCH 11/14] Add is_deferred property to DatasetFilenameWrapper For improved handling of deferred data --- lib/galaxy/tools/wrappers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py index 6ad72645d7bf..eb6ef0beeee0 100644 --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -456,6 +456,10 @@ def serialize(self, invalid_chars: Sequence[str] = ("/",)) -> Dict[str, Any]: def is_collection(self) -> bool: return False + @property + def is_deferred(self) -> bool: + return self.unsanitized.has_deferred_data + def is_of_type(self, *exts: str) -> bool: datatypes = [] if not self.datatypes_registry: @@ -474,7 +478,7 @@ def __str__(self) -> str: return self._path_or_uri() def _path_or_uri(self) -> str: - if self.unsanitized.has_deferred_data: + if self.is_deferred: return self.unsanitized.deferred_source_uri or "" if self.false_path is not None: return self.false_path From 605389f1f2dd35d82f369c6a78d799947d30ad19 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Sun, 3 Nov 2024 11:06:25 +0100 Subject: [PATCH 12/14] Expand documentation on allow_uri_if_protocol --- lib/galaxy/tool_util/xsd/galaxy.xsd | 22 +++++++++++++++++++ .../parameters/gx_allow_uri_if_protocol.xml | 13 ++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 376ff0e8d1e5..69e35696617b 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -4330,6 +4330,28 @@ You can specify multiple prefixes separated by comma or use the wildcard '*' to treat deferred datasets as URIs. The source URI will be passed to the tool as is. This attribute is only valid for `data` parameters. + +### Handling the input in the tool + +Since the input can be a regular file or a URI, the tool should be able to handle both cases. +The tool should check if the input ``is_deferred`` and if so, treat it as a URI, otherwise +it should treat it as a regular file. Please note that only deferred datasets with the specified +protocol will be passed as URIs, the rest will be materialized as files. + +Here is an example command section that handles the above sample input: + +```python + + ## We should handle the case where the input must be treated as a URI with a specific protocol. + #if $input.is_deferred: + ## Here, the input is a deferred dataset which source URI has the protocol 'https', 'http' or 's3'. + echo '$input' > '$output' ## The ouput will be the source URI of the input. + #else: + ## Here, the input is a regular dataset or a materialized dataset in case of a + ## deferred dataset which source URI has a protocol different than 'https', 'http' or 's3'. + cp '$input' '$output' ## The output will be a copy of the input content. + +``` ]]> diff --git a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml index ab554b470f70..7821b49718fb 100644 --- a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml +++ b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml @@ -1,6 +1,15 @@ '$output' + ## We should handle the case where the input must be treated as a URI with a specific protocol. + #if $input.is_deferred: + ## Here, the input is a deferred dataset which source URI has the protocol 'https'. + ## The ouput will be the source URI of the input. + echo '$input' > '$output' + #else: + ## Here, the input is a regular dataset or a materialized dataset in case of a + ## deferred dataset which source URI has a protocol different than 'https'. + ## The output will be a copy of the input content. + cp '$input' '$output' ]]> @@ -8,6 +17,4 @@ echo '$input' > '$output' - - From 53e8a237472fd0a7afeb6d44962a03dd4337c295 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Sun, 3 Nov 2024 12:43:24 +0100 Subject: [PATCH 13/14] Refactor deferred source URI handling in DatasetInstance and ToolEvaluator --- lib/galaxy/model/__init__.py | 2 +- lib/galaxy/tools/evaluation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 7656a98fbed1..02be7ec606da 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -4688,7 +4688,7 @@ def has_deferred_data(self): @property def deferred_source_uri(self): - if self.has_deferred_data: + if self.has_deferred_data and self.sources: # Assuming the first source is the deferred source return self.sources[0].source_uri return None diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 8e9328c71408..582bd65be06e 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -335,7 +335,7 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer """ deferred_input = self.tool.inputs.get(input_name) if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance): - source_uri = input_value.sources[0].source_uri or "" + source_uri = input_value.deferred_source_uri or "" for prefix in deferred_input.allow_uri_if_protocol: if prefix == "*" or source_uri.startswith(prefix): return False From 2bdcec0dda2d9e862fa7a043de6b0207223e43ca Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Sun, 3 Nov 2024 12:47:15 +0100 Subject: [PATCH 14/14] Add test for gx_allow_uri_if_protocol with deferred collections --- lib/galaxy_test/api/test_tools.py | 47 +++++++++++++++++++ .../parameters/gx_allow_uri_if_protocol.xml | 25 +++++----- 2 files changed, 61 insertions(+), 11 deletions(-) diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 41d610f7b29e..5842fb6a4e9c 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -2591,6 +2591,53 @@ def test_allow_uri_if_protocol_on_deferred_input(self, history_id): output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output) assert output_content.strip() == source_uri.strip() + @skip_without_tool("gx_allow_uri_if_protocol") + def test_allow_uri_if_protocol_on_collection_with_deferred(self, history_id): + source_uris = [ + "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/simple_line.txt", + "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/simple_line_alternative.txt", + ] + elements = [ + { + "src": "url", + "url": source_uri, + "deferred": True, + "ext": "txt", + } + for source_uri in source_uris + ] + targets = [ + { + "destination": {"type": "hdca"}, + "elements": elements, + "collection_type": "list", + "name": "deferred list", + } + ] + payload = { + "history_id": history_id, + "targets": json.dumps(targets), + } + fetch_response = self.dataset_populator.fetch(payload, wait=True) + dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response) + hdca_id = dataset_collection["id"] + inputs = { + "input1": {"batch": True, "values": [{"src": "hdca", "id": hdca_id}]}, + } + run_response = self.dataset_populator.run_tool( + tool_id="gx_allow_uri_if_protocol", inputs=inputs, history_id=history_id + ) + hdca_id = run_response["implicit_collections"][0]["id"] + dataset_collection = self.dataset_populator.get_history_collection_details(history_id, id=hdca_id) + elements = dataset_collection["elements"] + assert len(elements) == 2 + for element in elements: + object = element["object"] + assert isinstance(object, dict) + assert object["state"] == "ok" + output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=object) + assert output_content.strip() in source_uris + @skip_without_tool("cat1") def test_run_deferred_mapping(self, history_id: str): elements = [ diff --git a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml index 7821b49718fb..b6cd4d6d4205 100644 --- a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml +++ b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml @@ -1,18 +1,21 @@ '$output' - #else: - ## Here, the input is a regular dataset or a materialized dataset in case of a - ## deferred dataset which source URI has a protocol different than 'https'. - ## The output will be a copy of the input content. - cp '$input' '$output' + #for $input in $input1: + ## We should handle the case where the input must be treated as a URI with a specific protocol. + #if $input.is_deferred: + ## Here, the input is a deferred dataset which source URI has the protocol 'https'. + ## We append the URI to the output file. + echo '$input' >> '$output' + #else: + ## Here, the input is a regular dataset or a materialized dataset in case of a + ## deferred dataset which source URI has a protocol different than 'https'. + ## We append the content of the dataset to the output file. + cat '$input' >> '$output' + #end if + #end for ]]> - +