From 3e31b44af3d64b2a97f3ca6f7674bc82ce147399 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Tue, 29 Oct 2024 18:33:53 +0100
Subject: [PATCH 01/14] Allow deferred datasets to behave as URI
---
lib/galaxy/model/__init__.py | 7 +++++++
lib/galaxy/tool_util/xsd/galaxy.xsd | 19 +++++++++++++++++++
lib/galaxy/tools/evaluation.py | 16 +++++++++++++++-
lib/galaxy/tools/parameters/basic.py | 7 +++++++
lib/galaxy/tools/wrappers.py | 5 +++++
5 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index bcf956d2aae4..cc3252aadd76 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -4685,6 +4685,13 @@ def ext(self):
def has_deferred_data(self):
return self.dataset.state == Dataset.states.DEFERRED
+ @property
+ def deferred_source_uri(self):
+ if self.has_deferred_data:
+ # Assuming the first source is the deferred source
+ return self.sources[0].source_uri
+ return None
+
@property
def state(self):
# self._state holds state that should only affect this particular dataset association, not the dataset state itself
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index c8090c919568..c4dfe86e0da0 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -4314,6 +4314,25 @@ template if the parameter is ``false`` or not checked by the user. Used only
when the ``type`` attribute value is ``boolean``.
+
+
+
+```
+
+You can specify multiple prefixes separated by `|`. The URI will be passed to the tool
+as is if it starts with any of the specified prefixes.
+
+This attribute is only valid for `data` parameters.
+ ]]>
+
+
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 28a4b27c4b0d..67d486627622 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -314,7 +314,8 @@ def _deferred_objects(
deferred_objects: Dict[str, DeferrableObjectsT] = {}
for key, value in input_datasets.items():
if value is not None and value.state == model.Dataset.states.DEFERRED:
- deferred_objects[key] = value
+ if self._should_materialize_deferred_input(key, value):
+ deferred_objects[key] = value
def find_deferred_collections(input, value, context, prefixed_name=None, **kwargs):
if (
@@ -327,6 +328,19 @@ def find_deferred_collections(input, value, context, prefixed_name=None, **kwarg
return deferred_objects
+ def _should_materialize_deferred_input(self, input_name: str, input_value: DeferrableObjectsT) -> bool:
+ """
+ We can skip materializing some deferred datasets if the input can work with URIs that are prefixed
+ with a known prefix set in `allow_uri_if_prefixed`.
+ """
+ deferred_input = self.tool.inputs[input_name]
+ if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance):
+ source_uri = input_value.sources[0].source_uri or ""
+ for prefix in deferred_input.allow_uri_if_prefixed:
+ if source_uri.startswith(prefix):
+ return False
+ return True
+
def __walk_inputs(self, inputs, input_values, func):
def do_walk(inputs, input_values):
"""
diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
index 7ed9c657691a..bc41cdf0bd75 100644
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -2068,6 +2068,7 @@ def __init__(self, tool, input_source, trans=None):
self.tag = tag
self.is_dynamic = True
self._parse_options(input_source)
+ self._parse_allow_uri_if_prefixed(input_source)
# Load conversions required for the dataset input
self.conversions = []
self.default_object = input_source.parse_default()
@@ -2089,6 +2090,12 @@ def __init__(self, tool, input_source, trans=None):
)
self.conversions.append((name, conv_extension, [conv_type]))
+ def _parse_allow_uri_if_prefixed(self, input_source):
+ # In case of deferred datasets, if the source URI is prefixed with one of the values in this list,
+ # the dataset will behave as an URI and will not be materialized into a file path.
+ allow_uri_if_prefixed = input_source.get("allow_uri_if_prefixed", None)
+ self.allow_uri_if_prefixed = allow_uri_if_prefixed.split("|") if allow_uri_if_prefixed else []
+
def from_json(self, value, trans, other_values=None):
session = trans.sa_session
diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
index 6dd47e7f9bb4..6ad72645d7bf 100644
--- a/lib/galaxy/tools/wrappers.py
+++ b/lib/galaxy/tools/wrappers.py
@@ -471,6 +471,11 @@ def is_of_type(self, *exts: str) -> bool:
return self.dataset.datatype.matches_any(datatypes)
def __str__(self) -> str:
+ return self._path_or_uri()
+
+ def _path_or_uri(self) -> str:
+ if self.unsanitized.has_deferred_data:
+ return self.unsanitized.deferred_source_uri or ""
if self.false_path is not None:
return self.false_path
else:
From 154a39047836dcaec449b00c4f552253cc0e0b76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20L=C3=B3pez?=
<46503462+davelopez@users.noreply.github.com>
Date: Tue, 29 Oct 2024 19:01:16 +0100
Subject: [PATCH 02/14] Fix typo
Co-authored-by: Nicola Soranzo
---
lib/galaxy/tool_util/xsd/galaxy.xsd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index c4dfe86e0da0..8b9a185a4c17 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -4317,7 +4317,7 @@ when the ``type`` attribute value is ``boolean``.
Date: Wed, 30 Oct 2024 10:10:17 +0100
Subject: [PATCH 03/14] Rename allow_uri_if_prefixed to allow_uri_if_protocol
Co-authored-by: Nicola Soranzo
---
lib/galaxy/tool_util/xsd/galaxy.xsd | 6 +++---
lib/galaxy/tools/evaluation.py | 4 ++--
lib/galaxy/tools/parameters/basic.py | 8 ++++----
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index 8b9a185a4c17..3cfb3823b5da 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -4314,16 +4314,16 @@ template if the parameter is ``false`` or not checked by the user. Used only
when the ``type`` attribute value is ``boolean``.
-
+
+
```
You can specify multiple prefixes separated by `|`. The URI will be passed to the tool
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 67d486627622..144a2d1cfaa9 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -331,12 +331,12 @@ def find_deferred_collections(input, value, context, prefixed_name=None, **kwarg
def _should_materialize_deferred_input(self, input_name: str, input_value: DeferrableObjectsT) -> bool:
"""
We can skip materializing some deferred datasets if the input can work with URIs that are prefixed
- with a known prefix set in `allow_uri_if_prefixed`.
+ with a known prefix set in `allow_uri_if_protocol`.
"""
deferred_input = self.tool.inputs[input_name]
if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance):
source_uri = input_value.sources[0].source_uri or ""
- for prefix in deferred_input.allow_uri_if_prefixed:
+ for prefix in deferred_input.allow_uri_if_protocol:
if source_uri.startswith(prefix):
return False
return True
diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
index bc41cdf0bd75..ce9c6d086837 100644
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -2068,7 +2068,7 @@ def __init__(self, tool, input_source, trans=None):
self.tag = tag
self.is_dynamic = True
self._parse_options(input_source)
- self._parse_allow_uri_if_prefixed(input_source)
+ self._parse_allow_uri_if_protocol(input_source)
# Load conversions required for the dataset input
self.conversions = []
self.default_object = input_source.parse_default()
@@ -2090,11 +2090,11 @@ def __init__(self, tool, input_source, trans=None):
)
self.conversions.append((name, conv_extension, [conv_type]))
- def _parse_allow_uri_if_prefixed(self, input_source):
+ def _parse_allow_uri_if_protocol(self, input_source):
# In case of deferred datasets, if the source URI is prefixed with one of the values in this list,
# the dataset will behave as an URI and will not be materialized into a file path.
- allow_uri_if_prefixed = input_source.get("allow_uri_if_prefixed", None)
- self.allow_uri_if_prefixed = allow_uri_if_prefixed.split("|") if allow_uri_if_prefixed else []
+ allow_uri_if_protocol = input_source.get("allow_uri_if_protocol", None)
+ self.allow_uri_if_protocol = allow_uri_if_protocol.split("|") if allow_uri_if_protocol else []
def from_json(self, value, trans, other_values=None):
session = trans.sa_session
From f1daa44d473b430311d2aeb670cf9a132b5d8d6c Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Wed, 30 Oct 2024 10:16:50 +0100
Subject: [PATCH 04/14] Use commas to list protocols in allow_uri_if_protocol
Co-authored-by: Bjoern Gruening
---
lib/galaxy/tool_util/xsd/galaxy.xsd | 4 ++--
lib/galaxy/tools/parameters/basic.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index 3cfb3823b5da..9de63fe28d73 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -4323,10 +4323,10 @@ can be used to avoid materialization and pass the URI as is to the tool.
This is useful when the tool can handle the URI directly. Example:
```xml
-
+
```
-You can specify multiple prefixes separated by `|`. The URI will be passed to the tool
+You can specify multiple prefixes separated by comma. The URI will be passed to the tool
as is if it starts with any of the specified prefixes.
This attribute is only valid for `data` parameters.
diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
index ce9c6d086837..d3bb7f119c58 100644
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -2094,7 +2094,7 @@ def _parse_allow_uri_if_protocol(self, input_source):
# In case of deferred datasets, if the source URI is prefixed with one of the values in this list,
# the dataset will behave as an URI and will not be materialized into a file path.
allow_uri_if_protocol = input_source.get("allow_uri_if_protocol", None)
- self.allow_uri_if_protocol = allow_uri_if_protocol.split("|") if allow_uri_if_protocol else []
+ self.allow_uri_if_protocol = allow_uri_if_protocol.split(",") if allow_uri_if_protocol else []
def from_json(self, value, trans, other_values=None):
session = trans.sa_session
From b3a70863fcb3850511757d2026d6d9fcb400410e Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Wed, 30 Oct 2024 11:41:01 +0100
Subject: [PATCH 05/14] Add test for parsing allow_uri_if_protocol
---
test/unit/app/tools/test_parameter_parsing.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/test/unit/app/tools/test_parameter_parsing.py b/test/unit/app/tools/test_parameter_parsing.py
index 842e7df91844..e33e183fc962 100644
--- a/test/unit/app/tools/test_parameter_parsing.py
+++ b/test/unit/app/tools/test_parameter_parsing.py
@@ -452,3 +452,12 @@ def test_tool_collection(self):
)
assert param.type == "data_collection"
assert param.collection_types == ["list", "list:paired"]
+
+ def test_data_allow_uri_if_protocol(self):
+ param = self._parameter_for(
+ xml="""
+
+
+ """
+ )
+ assert param.allow_uri_if_protocol == ["https", "s3"]
From 305dd2b181562b9a94dc0a514ce88f0cef53db5d Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Wed, 30 Oct 2024 11:45:53 +0100
Subject: [PATCH 06/14] Add `has_deferred_data` flag to MockDataset and
FakeDatasetAssociation
---
lib/galaxy/model/__init__.py | 1 +
test/unit/app/tools/test_wrappers.py | 1 +
2 files changed, 2 insertions(+)
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index cc3252aadd76..7656a98fbed1 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -2737,6 +2737,7 @@ class FakeDatasetAssociation:
def __init__(self, dataset: Optional["Dataset"] = None) -> None:
self.dataset = dataset
self.metadata: Dict = {}
+ self.has_deferred_data = False
def get_file_name(self, sync_cache: bool = True) -> str:
assert self.dataset
diff --git a/test/unit/app/tools/test_wrappers.py b/test/unit/app/tools/test_wrappers.py
index 746fef91f480..7fc20e8fa061 100644
--- a/test/unit/app/tools/test_wrappers.py
+++ b/test/unit/app/tools/test_wrappers.py
@@ -310,6 +310,7 @@ def __init__(self):
self.extra_files_path = MOCK_DATASET_EXTRA_FILES_PATH
self.ext = MOCK_DATASET_EXT
self.tags = []
+ self.has_deferred_data = False
def get_file_name(self, sync_cache=True):
return MOCK_DATASET_PATH
From fd14e606e84c7e9cc0a6eecd91840369d6212dd0 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Wed, 30 Oct 2024 12:11:16 +0100
Subject: [PATCH 07/14] Fix handling for missing deferred input in
ToolEvaluator
---
lib/galaxy/tools/evaluation.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 144a2d1cfaa9..5b278b91d9e6 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -333,7 +333,11 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer
We can skip materializing some deferred datasets if the input can work with URIs that are prefixed
with a known prefix set in `allow_uri_if_protocol`.
"""
- deferred_input = self.tool.inputs[input_name]
+ deferred_input = self.tool.inputs.get(input_name)
+ if not deferred_input:
+ # TODO: Can this ever happen? It seems like it happens in the test suite.
+ # For example, in test_metadata_validator_on_deferred_input
+ return True
if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance):
source_uri = input_value.sources[0].source_uri or ""
for prefix in deferred_input.allow_uri_if_protocol:
From 678caa415f3167c8c6d6a2c3909058ba2a246479 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Wed, 30 Oct 2024 16:27:55 +0100
Subject: [PATCH 08/14] Add test for allow_uri_if_protocol with deferred input
---
lib/galaxy_test/api/test_tools.py | 18 ++++++++++++++++++
.../parameters/gx_allow_uri_if_protocol.xml | 13 +++++++++++++
2 files changed, 31 insertions(+)
create mode 100644 test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py
index 002b9d873389..41d610f7b29e 100644
--- a/lib/galaxy_test/api/test_tools.py
+++ b/lib/galaxy_test/api/test_tools.py
@@ -2573,6 +2573,24 @@ def test_metadata_validator_can_fail_on_deferred_input(self, history_id):
job_details = self.dataset_populator.get_job_details(job_id=job_id).json()
assert job_details["state"] == "failed"
+ @skip_without_tool("gx_allow_uri_if_protocol")
+ def test_allow_uri_if_protocol_on_deferred_input(self, history_id):
+ source_uri = "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/simple_line.txt"
+ deferred_hda = self.dataset_populator.create_deferred_hda(history_id, source_uri, ext="txt")
+
+ inputs = {"input1": dataset_to_param(deferred_hda)}
+ # The tool just returns the URI (or file path if it was materialized) as the output content
+ run_response = self.dataset_populator.run_tool(
+ tool_id="gx_allow_uri_if_protocol", inputs=inputs, history_id=history_id
+ )
+ output = run_response["outputs"][0]
+ output_details = self.dataset_populator.get_history_dataset_details(
+ history_id, dataset=output, wait=True, assert_ok=True
+ )
+ assert output_details["state"] == "ok"
+ output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output)
+ assert output_content.strip() == source_uri.strip()
+
@skip_without_tool("cat1")
def test_run_deferred_mapping(self, history_id: str):
elements = [
diff --git a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
new file mode 100644
index 000000000000..ab554b470f70
--- /dev/null
+++ b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
@@ -0,0 +1,13 @@
+
+ '$output'
+ ]]>
+
+
+
+
+
+
+
+
+
From 18fb0a785e03e0463c92b056500d5525b4f229a2 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 31 Oct 2024 11:49:49 +0100
Subject: [PATCH 09/14] Enhance allow_uri_if_protocol to support wildcard '*'
for deferred datasets
---
lib/galaxy/tool_util/xsd/galaxy.xsd | 4 ++--
lib/galaxy/tools/evaluation.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index 9de63fe28d73..376ff0e8d1e5 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -4326,8 +4326,8 @@ This is useful when the tool can handle the URI directly. Example:
```
-You can specify multiple prefixes separated by comma. The URI will be passed to the tool
-as is if it starts with any of the specified prefixes.
+You can specify multiple prefixes separated by comma or use the wildcard '*' to always
+treat deferred datasets as URIs. The source URI will be passed to the tool as is.
This attribute is only valid for `data` parameters.
]]>
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 5b278b91d9e6..73dbeceaff39 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -341,7 +341,7 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer
if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance):
source_uri = input_value.sources[0].source_uri or ""
for prefix in deferred_input.allow_uri_if_protocol:
- if source_uri.startswith(prefix):
+ if prefix == "*" or source_uri.startswith(prefix):
return False
return True
From 72735ebf961b56cf5dea4a4dab7b77d0462d020c Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 31 Oct 2024 11:57:47 +0100
Subject: [PATCH 10/14] Remove redundant check for missing deferred input in
ToolEvaluator
There might be cases with additional deferred inputs outside of the input definitions see test_metadata_validator_on_deferred_input.
---
lib/galaxy/tools/evaluation.py | 4 ----
1 file changed, 4 deletions(-)
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 73dbeceaff39..8e9328c71408 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -334,10 +334,6 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer
with a known prefix set in `allow_uri_if_protocol`.
"""
deferred_input = self.tool.inputs.get(input_name)
- if not deferred_input:
- # TODO: Can this ever happen? It seems like it happens in the test suite.
- # For example, in test_metadata_validator_on_deferred_input
- return True
if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance):
source_uri = input_value.sources[0].source_uri or ""
for prefix in deferred_input.allow_uri_if_protocol:
From 557e7bed8dcd9de8ed7aa2088a33eeeb34a4e2c4 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Sun, 3 Nov 2024 10:37:33 +0100
Subject: [PATCH 11/14] Add is_deferred property to DatasetFilenameWrapper
For improved handling of deferred data
---
lib/galaxy/tools/wrappers.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
index 6ad72645d7bf..eb6ef0beeee0 100644
--- a/lib/galaxy/tools/wrappers.py
+++ b/lib/galaxy/tools/wrappers.py
@@ -456,6 +456,10 @@ def serialize(self, invalid_chars: Sequence[str] = ("/",)) -> Dict[str, Any]:
def is_collection(self) -> bool:
return False
+ @property
+ def is_deferred(self) -> bool:
+ return self.unsanitized.has_deferred_data
+
def is_of_type(self, *exts: str) -> bool:
datatypes = []
if not self.datatypes_registry:
@@ -474,7 +478,7 @@ def __str__(self) -> str:
return self._path_or_uri()
def _path_or_uri(self) -> str:
- if self.unsanitized.has_deferred_data:
+ if self.is_deferred:
return self.unsanitized.deferred_source_uri or ""
if self.false_path is not None:
return self.false_path
From 605389f1f2dd35d82f369c6a78d799947d30ad19 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Sun, 3 Nov 2024 11:06:25 +0100
Subject: [PATCH 12/14] Expand documentation on allow_uri_if_protocol
---
lib/galaxy/tool_util/xsd/galaxy.xsd | 22 +++++++++++++++++++
.../parameters/gx_allow_uri_if_protocol.xml | 13 ++++++++---
2 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd
index 376ff0e8d1e5..69e35696617b 100644
--- a/lib/galaxy/tool_util/xsd/galaxy.xsd
+++ b/lib/galaxy/tool_util/xsd/galaxy.xsd
@@ -4330,6 +4330,28 @@ You can specify multiple prefixes separated by comma or use the wildcard '*' to
treat deferred datasets as URIs. The source URI will be passed to the tool as is.
This attribute is only valid for `data` parameters.
+
+### Handling the input in the tool
+
+Since the input can be a regular file or a URI, the tool should be able to handle both cases.
+The tool should check if the input ``is_deferred`` and if so, treat it as a URI, otherwise
+it should treat it as a regular file. Please note that only deferred datasets with the specified
+protocol will be passed as URIs, the rest will be materialized as files.
+
+Here is an example command section that handles the above sample input:
+
+```python
+
+ ## We should handle the case where the input must be treated as a URI with a specific protocol.
+ #if $input.is_deferred:
+ ## Here, the input is a deferred dataset which source URI has the protocol 'https', 'http' or 's3'.
+ echo '$input' > '$output' ## The ouput will be the source URI of the input.
+ #else:
+ ## Here, the input is a regular dataset or a materialized dataset in case of a
+ ## deferred dataset which source URI has a protocol different than 'https', 'http' or 's3'.
+ cp '$input' '$output' ## The output will be a copy of the input content.
+
+```
]]>
diff --git a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
index ab554b470f70..7821b49718fb 100644
--- a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
+++ b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
@@ -1,6 +1,15 @@
'$output'
+ ## We should handle the case where the input must be treated as a URI with a specific protocol.
+ #if $input.is_deferred:
+ ## Here, the input is a deferred dataset which source URI has the protocol 'https'.
+ ## The ouput will be the source URI of the input.
+ echo '$input' > '$output'
+ #else:
+ ## Here, the input is a regular dataset or a materialized dataset in case of a
+ ## deferred dataset which source URI has a protocol different than 'https'.
+ ## The output will be a copy of the input content.
+ cp '$input' '$output'
]]>
@@ -8,6 +17,4 @@ echo '$input' > '$output'
-
-
From 53e8a237472fd0a7afeb6d44962a03dd4337c295 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Sun, 3 Nov 2024 12:43:24 +0100
Subject: [PATCH 13/14] Refactor deferred source URI handling in
DatasetInstance and ToolEvaluator
---
lib/galaxy/model/__init__.py | 2 +-
lib/galaxy/tools/evaluation.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index 7656a98fbed1..02be7ec606da 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -4688,7 +4688,7 @@ def has_deferred_data(self):
@property
def deferred_source_uri(self):
- if self.has_deferred_data:
+ if self.has_deferred_data and self.sources:
# Assuming the first source is the deferred source
return self.sources[0].source_uri
return None
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 8e9328c71408..582bd65be06e 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -335,7 +335,7 @@ def _should_materialize_deferred_input(self, input_name: str, input_value: Defer
"""
deferred_input = self.tool.inputs.get(input_name)
if isinstance(deferred_input, DataToolParameter) and isinstance(input_value, model.DatasetInstance):
- source_uri = input_value.sources[0].source_uri or ""
+ source_uri = input_value.deferred_source_uri or ""
for prefix in deferred_input.allow_uri_if_protocol:
if prefix == "*" or source_uri.startswith(prefix):
return False
From 2bdcec0dda2d9e862fa7a043de6b0207223e43ca Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Sun, 3 Nov 2024 12:47:15 +0100
Subject: [PATCH 14/14] Add test for gx_allow_uri_if_protocol with deferred
collections
---
lib/galaxy_test/api/test_tools.py | 47 +++++++++++++++++++
.../parameters/gx_allow_uri_if_protocol.xml | 25 +++++-----
2 files changed, 61 insertions(+), 11 deletions(-)
diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py
index 41d610f7b29e..5842fb6a4e9c 100644
--- a/lib/galaxy_test/api/test_tools.py
+++ b/lib/galaxy_test/api/test_tools.py
@@ -2591,6 +2591,53 @@ def test_allow_uri_if_protocol_on_deferred_input(self, history_id):
output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output)
assert output_content.strip() == source_uri.strip()
+ @skip_without_tool("gx_allow_uri_if_protocol")
+ def test_allow_uri_if_protocol_on_collection_with_deferred(self, history_id):
+ source_uris = [
+ "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/simple_line.txt",
+ "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/simple_line_alternative.txt",
+ ]
+ elements = [
+ {
+ "src": "url",
+ "url": source_uri,
+ "deferred": True,
+ "ext": "txt",
+ }
+ for source_uri in source_uris
+ ]
+ targets = [
+ {
+ "destination": {"type": "hdca"},
+ "elements": elements,
+ "collection_type": "list",
+ "name": "deferred list",
+ }
+ ]
+ payload = {
+ "history_id": history_id,
+ "targets": json.dumps(targets),
+ }
+ fetch_response = self.dataset_populator.fetch(payload, wait=True)
+ dataset_collection = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
+ hdca_id = dataset_collection["id"]
+ inputs = {
+ "input1": {"batch": True, "values": [{"src": "hdca", "id": hdca_id}]},
+ }
+ run_response = self.dataset_populator.run_tool(
+ tool_id="gx_allow_uri_if_protocol", inputs=inputs, history_id=history_id
+ )
+ hdca_id = run_response["implicit_collections"][0]["id"]
+ dataset_collection = self.dataset_populator.get_history_collection_details(history_id, id=hdca_id)
+ elements = dataset_collection["elements"]
+ assert len(elements) == 2
+ for element in elements:
+ object = element["object"]
+ assert isinstance(object, dict)
+ assert object["state"] == "ok"
+ output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=object)
+ assert output_content.strip() in source_uris
+
@skip_without_tool("cat1")
def test_run_deferred_mapping(self, history_id: str):
elements = [
diff --git a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
index 7821b49718fb..b6cd4d6d4205 100644
--- a/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
+++ b/test/functional/tools/parameters/gx_allow_uri_if_protocol.xml
@@ -1,18 +1,21 @@
'$output'
- #else:
- ## Here, the input is a regular dataset or a materialized dataset in case of a
- ## deferred dataset which source URI has a protocol different than 'https'.
- ## The output will be a copy of the input content.
- cp '$input' '$output'
+ #for $input in $input1:
+ ## We should handle the case where the input must be treated as a URI with a specific protocol.
+ #if $input.is_deferred:
+ ## Here, the input is a deferred dataset which source URI has the protocol 'https'.
+ ## We append the URI to the output file.
+ echo '$input' >> '$output'
+ #else:
+ ## Here, the input is a regular dataset or a materialized dataset in case of a
+ ## deferred dataset which source URI has a protocol different than 'https'.
+ ## We append the content of the dataset to the output file.
+ cat '$input' >> '$output'
+ #end if
+ #end for
]]>
-
+