diff --git a/.github/workflows/cwl_conformance.yaml b/.github/workflows/cwl_conformance.yaml
index 10fb215bf042..03aaf4825b23 100644
--- a/.github/workflows/cwl_conformance.yaml
+++ b/.github/workflows/cwl_conformance.yaml
@@ -18,7 +18,6 @@ concurrency:
jobs:
test:
name: Test
- if: ${{ false }}
runs-on: ubuntu-latest
continue-on-error: ${{ startsWith(matrix.marker, 'red') }}
strategy:
@@ -26,7 +25,10 @@ jobs:
matrix:
python-version: ['3.8']
marker: ['green', 'red and required', 'red and not required']
- conformance-version: ['cwl_conformance_v1_0'] #, 'cwl_conformance_v1_1', 'cwl_conformance_v1_2']
+ conformance-version: ['cwl_conformance_v1_0', 'cwl_conformance_v1_1', 'cwl_conformance_v1_2']
+ exclude:
+ - marker: red and required
+ conformance-version: cwl_conformance_v1_0
services:
postgres:
image: postgres:13
diff --git a/client/src/api/datasets.ts b/client/src/api/datasets.ts
index 6b8f0606537c..fb4809c8487a 100644
--- a/client/src/api/datasets.ts
+++ b/client/src/api/datasets.ts
@@ -67,6 +67,7 @@ export async function copyDataset(
// TODO: Investigate. These should be optional, but the API requires explicit null values?
type,
copy_elements: null,
+ fields: null,
hide_source_items: null,
instance_type: null,
},
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
index 1e9ab944ad2e..1b441d740f2b 100644
--- a/client/src/api/schema/schema.ts
+++ b/client/src/api/schema/schema.ts
@@ -6955,6 +6955,12 @@ export interface components {
* @description List of elements that should be in the new collection.
*/
element_identifiers?: components["schemas"]["CollectionElementIdentifier"][] | null;
+ /**
+ * Fields
+ * @description List of fields to create for this collection. Set to 'auto' to guess fields from identifiers.
+ * @default []
+ */
+ fields: string | components["schemas"]["FieldDict"][] | null;
/**
* Folder Id
* @description The ID of the library folder that will contain the collection. Required if `instance_type=library`.
@@ -7147,6 +7153,12 @@ export interface components {
* @description List of elements that should be in the new collection.
*/
element_identifiers?: components["schemas"]["CollectionElementIdentifier"][] | null;
+ /**
+ * Fields
+ * @description List of fields to create for this collection. Set to 'auto' to guess fields from identifiers.
+ * @default []
+ */
+ fields: string | components["schemas"]["FieldDict"][] | null;
/**
* Folder Id
* @description The ID of the library folder that will contain the collection. Required if `instance_type=library`.
@@ -9097,6 +9109,13 @@ export interface components {
/** Hash Value */
hash_value: string;
};
+ /** FieldDict */
+ FieldDict: {
+ /** Name */
+ name: string;
+ /** Type */
+ type: string;
+ };
/** FileDataElement */
FileDataElement: {
/** Md5 */
diff --git a/client/src/components/History/model/queries.ts b/client/src/components/History/model/queries.ts
index 5cad0faee73e..dbd7bb44ba9b 100644
--- a/client/src/components/History/model/queries.ts
+++ b/client/src/components/History/model/queries.ts
@@ -86,6 +86,7 @@ export async function createDatasetCollection(history: HistorySummary, inputs =
copy_elements: true,
name: "list",
element_identifiers: [],
+ fields: "auto",
hide_source_items: true,
};
const payload = Object.assign({}, defaults, inputs);
diff --git a/doc/source/dev/cwl.md b/doc/source/dev/cwl.md
new file mode 100644
index 000000000000..a1cd882c3fd6
--- /dev/null
+++ b/doc/source/dev/cwl.md
@@ -0,0 +1,24 @@
+CWL import in Galaxy
+====================
+
+What is supported
+-----------------
+
+What is not supported
+---------------------
+
+Some CWL Expressions / Parameter references that do math on `$(resources.cores)`
+or similar will likely not work.
+
+How to enable it?
+-----------------
+
+1. List paths to CWL tools in `tool_conf.xml` .
+2. Set the following in `galaxy.yml`:
+
+ ```yaml
+ enable_beta_tool_formats: true
+ enable_beta_workflow_modules: true
+ check_upload_content: false
+ strict_cwl_validation: false
+ ```
diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py
index 1dc273bc7240..fddb34058b9d 100644
--- a/lib/galaxy/config/__init__.py
+++ b/lib/galaxy/config/__init__.py
@@ -931,6 +931,9 @@ def _process_config(self, kwargs: Dict[str, Any]) -> None:
else None
)
+ # TODO: migrate to schema.
+ # Should CWL artifacts be loaded with strict validation enabled.
+ self.strict_cwl_validation = string_as_bool(kwargs.get("strict_cwl_validation", "True"))
# These are not even beta - just experiments - don't use them unless
# you want yours tools to be broken in the future.
self.enable_beta_tool_formats = string_as_bool(kwargs.get("enable_beta_tool_formats", "False"))
diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample
index 89024e5ace7b..ba6fb15af152 100644
--- a/lib/galaxy/config/sample/datatypes_conf.xml.sample
+++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample
@@ -301,7 +301,7 @@
-
+
diff --git a/lib/galaxy/datatypes/converters/tar_to_directory.xml b/lib/galaxy/datatypes/converters/tar_to_directory.xml
index 0160746283da..850a4f67e640 100644
--- a/lib/galaxy/datatypes/converters/tar_to_directory.xml
+++ b/lib/galaxy/datatypes/converters/tar_to_directory.xml
@@ -1,18 +1,23 @@
-
+
galaxy-util
-
- mkdir '$output1.files_path';
- cd '$output1.files_path';
- python -c "from galaxy.util.compression_utils import CompressedFile; CompressedFile('$input1').extract('.');"
-
+
+
+ {"output1": {"created_from_basename": "${input1.created_from_basename}"}}
+
+
-
+
@@ -20,6 +25,6 @@
-
-
+
diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py
index 0789b0dd9ee5..45b9eaf766c4 100644
--- a/lib/galaxy/datatypes/registry.py
+++ b/lib/galaxy/datatypes/registry.py
@@ -72,6 +72,7 @@ def __init__(self, config=None):
self.config = config
self.edam = edam
self.datatypes_by_extension: Dict[str, Data] = {}
+ self.datatypes_by_format = {}
self.datatypes_by_suffix_inferences = {}
self.mimetypes_by_extension = {}
self.datatype_converters = {}
@@ -269,13 +270,25 @@ def __import_module(full_path: str, datatype_module: str):
upload_warning_template = Template(upload_warning_el.text or "")
datatype_instance = datatype_class()
self.datatypes_by_extension[extension] = datatype_instance
+ if not datatype_class.is_subclass:
+ edam_format = datatype_class.edam_format
+ prefixed_format = f"edam:{edam_format}"
+ if prefixed_format not in self.datatypes_by_format:
+ register_datatype_by_format = True
+ for super_klass in datatype_class.__mro__[1:-1]:
+ super_edam_format = getattr(super_klass, "edam_format", None)
+ if super_edam_format == edam_format:
+ register_datatype_by_format = False
+ break
+ if register_datatype_by_format:
+ self.datatypes_by_format[prefixed_format] = datatype_instance
if mimetype is None:
# Use default mimetype per datatype specification.
mimetype = self.datatypes_by_extension[extension].get_mime()
self.mimetypes_by_extension[extension] = mimetype
if datatype_class.track_type:
self.available_tracks.append(extension)
- if display_in_upload and extension not in self.upload_file_formats:
+ if display_in_upload:
self.upload_file_formats.append(extension)
# Max file size cut off for setting optional metadata.
self.datatypes_by_extension[extension].max_optional_metadata_filesize = elem.get(
@@ -413,6 +426,7 @@ def __import_module(full_path: str, datatype_module: str):
override=override,
compressed_sniffers=compressed_sniffers,
)
+ self.upload_file_formats = list(set(self.upload_file_formats))
self.upload_file_formats.sort()
# Load build sites
if use_build_sites:
@@ -613,6 +627,20 @@ def get_datatype_by_extension(self, ext) -> Optional["Data"]:
"""Returns a datatype object based on an extension"""
return self.datatypes_by_extension.get(ext, None)
+ def get_datatype_by_format_ontology(self, ontology: str):
+ """Returns a datatype by format ontology"""
+ if "edamontology.org/" in ontology:
+ ontology = f"edam:{ontology.split('edamontology.org/')[1]}"
+ return self.datatypes_by_format.get(ontology)
+
+ def get_datatype_ext_by_format_ontology(self, ontology: str, only_uploadable: bool = False) -> Optional[str]:
+ """Returns a datatype by format ontology"""
+ datatype = self.get_datatype_by_format_ontology(ontology)
+ if datatype:
+ if not only_uploadable or datatype.file_ext in self.upload_file_formats:
+ return datatype.file_ext
+ return None
+
def change_datatype(self, data, ext):
if data.extension != ext:
data.extension = ext
diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
index 9e4d3b65376c..340080121983 100644
--- a/lib/galaxy/jobs/__init__.py
+++ b/lib/galaxy/jobs/__init__.py
@@ -1144,7 +1144,7 @@ def can_split(self):
@property
def is_cwl_job(self):
- return self.tool.tool_type == "cwl"
+ return self.tool.tool_type in ["galactic_cwl", "cwl"]
def get_job_runner_url(self):
log.warning(f"({self.job_id}) Job runner URLs are deprecated, use destinations instead.")
@@ -1776,8 +1776,9 @@ def _finish_dataset(
dataset.mark_unhidden()
elif not purged:
# If the tool was expected to set the extension, attempt to retrieve it
- if dataset.ext == "auto":
- dataset.extension = context.get("ext", "data")
+ context_ext = context.get("ext", "data")
+ if dataset.ext == "auto" or (dataset.ext == "data" and context_ext != "data"):
+ dataset.extension = context_ext
dataset.init_meta(copy_from=dataset)
# if a dataset was copied, it won't appear in our dictionary:
# either use the metadata from originating output dataset, or call set_meta on the copies
diff --git a/lib/galaxy/jobs/command_factory.py b/lib/galaxy/jobs/command_factory.py
index 3e920fa7f52b..9f60c9852263 100644
--- a/lib/galaxy/jobs/command_factory.py
+++ b/lib/galaxy/jobs/command_factory.py
@@ -100,19 +100,26 @@ def build_command(
external_command_shell = container.shell
else:
external_command_shell = shell
- externalized_commands = __externalize_commands(
- job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container
- )
if container and modify_command_for_container:
- # Stop now and build command before handling metadata and copying
- # working directory files back. These should always happen outside
- # of docker container - no security implications when generating
- # metadata and means no need for Galaxy to be available to container
- # and not copying workdir outputs back means on can be more restrictive
- # of where container can write to in some circumstances.
- run_in_container_command = container.containerize_command(externalized_commands)
+ if job_wrapper.tool and not job_wrapper.tool.may_use_container_entry_point:
+ externalized_commands = __externalize_commands(
+ job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container
+ )
+ # Stop now and build command before handling metadata and copying
+ # working directory files back. These should always happen outside
+ # of docker container - no security implications when generating
+ # metadata and means no need for Galaxy to be available to container
+ # and not copying workdir outputs back means on can be more restrictive
+ # of where container can write to in some circumstances.
+ run_in_container_command = container.containerize_command(externalized_commands)
+ else:
+ tool_commands = commands_builder.build()
+ run_in_container_command = container.containerize_command(tool_commands)
commands_builder = CommandsBuilder(run_in_container_command)
else:
+ externalized_commands = __externalize_commands(
+ job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container
+ )
commands_builder = CommandsBuilder(externalized_commands)
# Galaxy writes I/O files to outputs, Pulsar uses metadata. metadata seems like
@@ -130,7 +137,13 @@ def build_command(
# Copy working and outputs before job submission so that these can be restored on resubmission
# xref https://github.com/galaxyproject/galaxy/issues/3289
- commands_builder.prepend_command(PREPARE_DIRS)
+ if not job_wrapper.is_cwl_job:
+ commands_builder.prepend_command(PREPARE_DIRS)
+ else:
+ # Can't do the rm -rf working for CWL jobs since we may have staged outputs
+ # into that directory. This does mean CWL is incompatible with job manager triggered
+ # retries - what can we do with that information?
+ commands_builder.prepend_command("mkdir -p outputs; cd working")
__handle_remote_command_line_building(commands_builder, job_wrapper, for_pulsar=for_pulsar)
diff --git a/lib/galaxy/jobs/runners/local.py b/lib/galaxy/jobs/runners/local.py
index eb1853eb01bf..5726080859aa 100644
--- a/lib/galaxy/jobs/runners/local.py
+++ b/lib/galaxy/jobs/runners/local.py
@@ -4,6 +4,7 @@
import datetime
import logging
+import math
import os
import subprocess
import tempfile
@@ -67,7 +68,16 @@ def _command_line(self, job_wrapper: "MinimalJobWrapper") -> Tuple[str, str]:
if slots:
slots_statement = f'GALAXY_SLOTS="{int(slots)}"; export GALAXY_SLOTS; GALAXY_SLOTS_CONFIGURED="1"; export GALAXY_SLOTS_CONFIGURED;'
else:
- slots_statement = 'GALAXY_SLOTS="1"; export GALAXY_SLOTS;'
+ cores_min = 1
+ if job_wrapper.tool:
+ try:
+ # In CWL 1.2 it can be a float that can be rounded to the next whole number
+ cores_min = math.ceil(float(job_wrapper.tool.cores_min))
+ except ValueError:
+ # TODO: in CWL this can be an expression referencing runtime
+ # parameters, e.g. `$(inputs.special_file.size)`
+ pass
+ slots_statement = f'GALAXY_SLOTS="{cores_min}"; export GALAXY_SLOTS;'
job_id = job_wrapper.get_id_tag()
job_file = JobState.default_job_file(job_wrapper.working_directory, job_id)
diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py
index a809f0214289..dd459064269a 100644
--- a/lib/galaxy/managers/collections.py
+++ b/lib/galaxy/managers/collections.py
@@ -175,6 +175,7 @@ def create(
flush=True,
completed_job=None,
output_name=None,
+ fields=None,
):
"""
PRECONDITION: security checks on ability to add to parent
@@ -199,6 +200,7 @@ def create(
hide_source_items=hide_source_items,
copy_elements=copy_elements,
history=history,
+ fields=fields,
)
implicit_inputs = []
@@ -242,8 +244,11 @@ def _create_instance_for_collection(
name=name,
)
assert isinstance(dataset_collection_instance, model.HistoryDatasetCollectionAssociation)
+
if implicit_inputs:
for input_name, input_collection in implicit_inputs:
+ if getattr(input_collection, "ephemeral", False):
+ input_collection = input_collection.persistent_object
dataset_collection_instance.add_implicit_input_collection(input_name, input_collection)
if implicit_output_name:
@@ -285,17 +290,20 @@ def create_dataset_collection(
hide_source_items=None,
copy_elements=False,
history=None,
+ fields=None,
):
# Make sure at least one of these is None.
assert element_identifiers is None or elements is None
-
if element_identifiers is None and elements is None:
raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION)
if not collection_type:
raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)
- collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
+ collection_type_description = self.collection_type_descriptions.for_collection_type(
+ collection_type, fields=fields
+ )
has_subcollections = collection_type_description.has_subcollections()
+
# If we have elements, this is an internal request, don't need to load
# objects from identifiers.
if elements is None:
@@ -319,8 +327,9 @@ def create_dataset_collection(
if elements is not self.ELEMENTS_UNINITIALIZED:
type_plugin = collection_type_description.rank_type_plugin()
- dataset_collection = builder.build_collection(type_plugin, elements)
+ dataset_collection = builder.build_collection(type_plugin, elements, fields=fields)
else:
+ # TODO: Pass fields here - need test case first.
dataset_collection = model.DatasetCollection(populated=False)
dataset_collection.collection_type = collection_type
return dataset_collection
@@ -400,6 +409,8 @@ def _append_tags(self, dataset_collection_instance, implicit_inputs=None, tags=N
tags = tags or {}
implicit_inputs = implicit_inputs or []
for _, v in implicit_inputs:
+ if getattr(v, "ephemeral", False):
+ v = v.persistent_object
for tag in v.auto_propagated_tags:
tags[tag.value] = tag
for _, tag in tags.items():
diff --git a/lib/galaxy/managers/collections_util.py b/lib/galaxy/managers/collections_util.py
index d0ca89f61626..0302de9c5fdb 100644
--- a/lib/galaxy/managers/collections_util.py
+++ b/lib/galaxy/managers/collections_util.py
@@ -39,6 +39,7 @@ def api_payload_to_create_params(payload):
name=payload.get("name", None),
hide_source_items=string_as_bool(payload.get("hide_source_items", False)),
copy_elements=string_as_bool(payload.get("copy_elements", False)),
+ fields=payload.get("fields", None),
)
return params
diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py
index 9dd9f87c0638..2645af3db524 100644
--- a/lib/galaxy/managers/datasets.py
+++ b/lib/galaxy/managers/datasets.py
@@ -666,6 +666,7 @@ def add_serializers(self):
"genome_build": lambda item, key, **context: str(item.dbkey) if item.dbkey is not None else None,
# derived (not mapped) attributes
"data_type": lambda item, key, **context: f"{item.datatype.__class__.__module__}.{item.datatype.__class__.__name__}",
+ "cwl_formats": lambda item, key, **context: item.cwl_formats,
"converted": self.serialize_converted_datasets,
# TODO: metadata/extra files
}
diff --git a/lib/galaxy/managers/executables.py b/lib/galaxy/managers/executables.py
index 0993b32856b1..fc42724e490f 100644
--- a/lib/galaxy/managers/executables.py
+++ b/lib/galaxy/managers/executables.py
@@ -29,10 +29,10 @@ def artifact_class(trans, as_dict: Dict[str, Any], allow_in_directory: Optional[
as_dict = yaml.safe_load(f)
artifact_class = as_dict.get("class", None)
+ target_object = None
if artifact_class is None and "$graph" in as_dict:
object_id = object_id or "main"
graph = as_dict["$graph"]
- target_object = None
if isinstance(graph, dict):
target_object = graph.get(object_id)
else:
@@ -40,11 +40,14 @@ def artifact_class(trans, as_dict: Dict[str, Any], allow_in_directory: Optional[
found_id = item.get("id")
if found_id == object_id or found_id == f"#{object_id}":
target_object = item
+ break
if target_object and target_object.get("class"):
artifact_class = target_object["class"]
+ if artifact_class in ("CommandLineTool", "ExpressionTool"):
+ target_object["cwlVersion"] = as_dict["cwlVersion"]
- return artifact_class, as_dict, object_id
+ return artifact_class, as_dict, object_id, target_object
__all__ = ("artifact_class",)
diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
index 5e2c08347f31..1a9eff0f508d 100644
--- a/lib/galaxy/managers/hdas.py
+++ b/lib/galaxy/managers/hdas.py
@@ -527,6 +527,7 @@ def __init__(self, app: StructuredApp):
"file_name",
"display_apps",
"display_types",
+ "cwl_formats",
"validated_state",
"validated_state_message",
# 'url',
diff --git a/lib/galaxy/managers/tools.py b/lib/galaxy/managers/tools.py
index c6dbe471dc84..180c080d4534 100644
--- a/lib/galaxy/managers/tools.py
+++ b/lib/galaxy/managers/tools.py
@@ -68,9 +68,10 @@ def create_tool(self, trans, tool_payload, allow_load=True):
if not dynamic_tool:
src = tool_payload.get("src", "representation")
is_path = src == "from_path"
+ target_object = None
if is_path:
- tool_format, representation, _ = artifact_class(None, tool_payload)
+ tool_format, representation, _, target_object = artifact_class(None, tool_payload)
else:
assert src == "representation"
representation = tool_payload.get("representation")
@@ -81,7 +82,10 @@ def create_tool(self, trans, tool_payload, allow_load=True):
if not tool_format:
raise exceptions.ObjectAttributeMissingException("Current tool representations require 'class'.")
- tool_path = tool_payload.get("path")
+ # Set tool_path to None so that in ToolBox.create_dynamic_tool()
+ # the tool source is by default recovered using
+ # get_tool_source_from_representation()
+ tool_path = None
tool_directory = tool_payload.get("tool_directory")
if tool_format == "GalaxyTool":
tool_id = representation.get("id")
@@ -89,7 +93,11 @@ def create_tool(self, trans, tool_payload, allow_load=True):
tool_id = str(uuid)
elif tool_format in ("CommandLineTool", "ExpressionTool"):
# CWL tools
- if is_path:
+ if target_object is not None:
+ representation = {"raw_process_reference": target_object, "uuid": str(uuid), "class": tool_format}
+ proxy = tool_proxy(tool_object=target_object, tool_directory=tool_directory, uuid=uuid)
+ elif is_path:
+ tool_path = tool_payload.get("path")
proxy = tool_proxy(tool_path=tool_path, uuid=uuid)
else:
# Build a tool proxy so that we can convert to the persistable
diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py
index 20354c029198..c6202eddaf6f 100644
--- a/lib/galaxy/managers/workflows.py
+++ b/lib/galaxy/managers/workflows.py
@@ -1,6 +1,7 @@
import json
import logging
import os
+import tempfile
import uuid
from typing import (
Any,
@@ -92,6 +93,7 @@
from galaxy.schema.invocation import InvocationCancellationUserRequest
from galaxy.schema.schema import WorkflowIndexQueryPayload
from galaxy.structured_app import MinimalManagerApp
+from galaxy.tool_util.cwl import workflow_proxy
from galaxy.tools.parameters import (
params_to_incoming,
visit_input_values,
@@ -613,7 +615,7 @@ def read_workflow_from_path(self, app, user, path, allow_in_directory=None) -> m
trans = WorkRequestContext(app=self.app, user=user)
as_dict = {"src": "from_path", "path": path}
- workflow_class, as_dict, object_id = artifact_class(trans, as_dict, allow_in_directory=allow_in_directory)
+ workflow_class, as_dict, object_id, _ = artifact_class(trans, as_dict, allow_in_directory=allow_in_directory)
assert workflow_class == "GalaxyWorkflow"
# Format 2 Galaxy workflow.
galaxy_interface = Format2ConverterGalaxyInterface()
@@ -643,7 +645,7 @@ def normalize_workflow_format(self, trans, as_dict):
workflow_path = as_dict.get("path")
workflow_directory = os.path.normpath(os.path.dirname(workflow_path))
- workflow_class, as_dict, object_id = artifact_class(trans, as_dict)
+ workflow_class, as_dict, object_id, _ = artifact_class(trans, as_dict)
if workflow_class == "GalaxyWorkflow" or "yaml_content" in as_dict:
# Format 2 Galaxy workflow.
galaxy_interface = Format2ConverterGalaxyInterface()
@@ -655,6 +657,37 @@ def normalize_workflow_format(self, trans, as_dict):
)
except yaml.scanner.ScannerError as e:
raise exceptions.MalformedContents(str(e))
+ elif workflow_class == "Workflow":
+ from galaxy.tool_util.cwl import workflow_proxy
+
+ # create a temporary file for the workflow if it is provided
+ # as JSON, to make it parseable by the WorkflowProxy
+ if workflow_path is None:
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f:
+ json.dump(as_dict, f)
+ workflow_path = f.name
+ if object_id:
+ workflow_path += "#" + object_id
+ wf_proxy = workflow_proxy(workflow_path)
+ os.unlink(f.name)
+ else:
+ # TODO: consume and use object_id...
+ if object_id:
+ workflow_path += "#" + object_id
+ wf_proxy = workflow_proxy(workflow_path)
+ tool_reference_proxies = wf_proxy.tool_reference_proxies()
+ for tool_reference_proxy in tool_reference_proxies:
+ # TODO: Namespace IDS in workflows.
+ representation = tool_reference_proxy.to_persistent_representation()
+ self.app.dynamic_tool_manager.create_tool(
+ trans,
+ {
+ "uuid": tool_reference_proxy.uuid,
+ "representation": representation,
+ },
+ allow_load=True,
+ )
+ as_dict = wf_proxy.to_dict()
return RawWorkflowDescription(as_dict, workflow_path)
@@ -806,6 +839,10 @@ def _workflow_from_raw_description(
data = raw_workflow_description.as_dict
if isinstance(data, str):
data = json.loads(data)
+ if "src" in data:
+ assert data["src"] == "path"
+ wf_proxy = workflow_proxy(data["path"])
+ data = wf_proxy.to_dict()
# Create new workflow from source data
workflow = model.Workflow()
@@ -1831,6 +1868,22 @@ def __module_from_dict(
)
step.temp_input_connections = temp_input_connections # type: ignore[assignment]
+ if "inputs" in step_dict:
+ for input_dict in step_dict["inputs"]:
+ step_input = model.WorkflowStepInput(step)
+ step_input.name = input_dict["name"]
+ step_input.merge_type = input_dict.get("merge_type", step_input.default_merge_type)
+ step_input.scatter_type = input_dict.get("scatter_type", step_input.default_scatter_type)
+ value_from = input_dict.get("value_from", None)
+ # if value_from is None:
+ # # Super hacky - we probably need distinct value from and
+ # # default handling.
+ # value_from = input_dict.get("default")
+ step_input.value_from = value_from
+ step_input.default_value = input_dict.get("default")
+ if step_input.default_value:
+ step_input.default_value_set = True
+
# Create the model class for the step
steps.append(step)
external_id = step_dict["id"]
diff --git a/lib/galaxy/metadata/__init__.py b/lib/galaxy/metadata/__init__.py
index 1f44e272ab8a..13ba1226ea88 100644
--- a/lib/galaxy/metadata/__init__.py
+++ b/lib/galaxy/metadata/__init__.py
@@ -207,6 +207,7 @@ def _metadata_path(what):
"max_discovered_files": max_discovered_files,
"outputs": outputs,
"change_datatype_actions": job.get_change_datatype_actions(),
+ "job_id_tag": job.get_id_tag(),
}
# export model objects and object store configuration for extended metadata also.
@@ -246,7 +247,6 @@ def _metadata_path(what):
metadata_params["tool"] = tool_as_dict
metadata_params["link_data_only"] = link_data_only
metadata_params["tool_path"] = tool.config_file
- metadata_params["job_id_tag"] = job.get_id_tag()
metadata_params["implicit_collection_jobs_association_id"] = (
job.implicit_collection_jobs_association and job.implicit_collection_jobs_association.id
)
diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py
index 38f3f7917729..009a66a54941 100644
--- a/lib/galaxy/metadata/set_metadata.py
+++ b/lib/galaxy/metadata/set_metadata.py
@@ -134,6 +134,13 @@ def set_meta_with_tool_provided(
dataset_instance.metadata.__extension__ = extension
except Exception:
log.exception("Problem sniffing datatype.")
+ elif extension == "data" and file_dict.get("format"):
+ format = file_dict["format"]
+ mapped_ext = datatypes_registry.get_datatype_ext_by_format_ontology(format)
+ if mapped_ext:
+ extension = mapped_ext
+ dataset_instance.metadata.__extension__ = extension
+ dataset_instance.extension = extension
for metadata_name, metadata_value in file_dict.get("metadata", {}).items():
setattr(dataset_instance.metadata, metadata_name, metadata_value)
diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
index 4dc18f87b1c9..2d4264c7b535 100644
--- a/lib/galaxy/model/__init__.py
+++ b/lib/galaxy/model/__init__.py
@@ -4755,6 +4755,10 @@ def deferred_source_uri(self):
return self.sources[0].source_uri
return None
+ @property
+ def cwl_formats(self):
+ return [f"http://edamontology.org/{self.datatype.edam_format}"]
+
@property
def state(self):
# self._state holds state that should only affect this particular dataset association, not the dataset state itself
@@ -4793,6 +4797,7 @@ def set_skipped(self, object_store_populator: "ObjectStorePopulator") -> None:
self.extension = "expression.json"
self.state = self.states.OK
self.blurb = "skipped"
+ self.peek = json.dumps(None)
self.visible = False
null = json.dumps(None)
with open(self.dataset.get_file_name(), "w") as out:
@@ -5598,6 +5603,7 @@ def to_dict(self, view="collection", expose_dataset_path=False):
uuid=(lambda uuid: str(uuid) if uuid else None)(hda.dataset.uuid),
hid=hda.hid,
file_ext=hda.ext,
+ cwl_formats=hda.cwl_formats,
peek=unicodify(hda.display_peek()) if hda.peek and hda.peek != "no peek" else None,
model_class=self.__class__.__name__,
name=hda.name,
@@ -6533,12 +6539,21 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable):
populated_states = DatasetCollectionPopulatedState
- def __init__(self, id=None, collection_type=None, populated=True, element_count=None):
+ def __init__(
+ self,
+ id=None,
+ collection_type=None,
+ populated=True,
+ element_count=None,
+ fields=None,
+ ):
self.id = id
self.collection_type = collection_type
if not populated:
self.populated_state = DatasetCollection.populated_states.NEW
self.element_count = element_count
+ # TODO: persist fields...
+ self.fields = fields
def _build_nested_collection_attributes_stmt(
self,
@@ -6713,6 +6728,10 @@ def populated_optimized(self):
return self._populated_optimized
+ @property
+ def allow_implicit_mapping(self):
+ return self.collection_type != "record"
+
@property
def populated(self):
top_level_populated = self.populated_state == DatasetCollection.populated_states.OK
@@ -8145,6 +8164,7 @@ class WorkflowStep(Base, RepresentById, UsesCreateAndUpdateTime):
DEFAULT_POSITION = {"left": 0, "top": 0}
def __init__(self):
+ self.position = WorkflowStep.DEFAULT_POSITION
self.uuid = uuid4()
self._input_connections_by_name = None
self._inputs_by_name = None
@@ -8441,6 +8461,9 @@ class WorkflowStepInput(Base, RepresentById):
cascade_backrefs=False,
)
+ default_merge_type = "merge_flattened"
+ default_scatter_type = "dotproduct"
+
def __init__(self, workflow_step):
add_object_to_object_session(self, workflow_step)
self.workflow_step = workflow_step
@@ -8513,6 +8536,9 @@ def copy(self):
copied_connection.output_name = self.output_name
return copied_connection
+ def log_str(self):
+ return f"WorkflowStepConnection[output_step_id={self.output_step_id},output_name={self.output_name},input_step_id={self.input_step_id},input_name={self.input_name}]"
+
class WorkflowOutput(Base, Serializable):
__tablename__ = "workflow_output"
@@ -9465,6 +9491,8 @@ def is_new(self):
return self.state == self.states.NEW
def add_output(self, output_name, output_object):
+ if getattr(output_object, "ephemeral", False):
+ return
if output_object.history_content_type == "dataset":
output_assoc = WorkflowInvocationStepOutputDatasetAssociation()
output_assoc.workflow_invocation_step = self
diff --git a/lib/galaxy/model/dataset_collections/builder.py b/lib/galaxy/model/dataset_collections/builder.py
index 2ae001f33a22..7c640a028d3d 100644
--- a/lib/galaxy/model/dataset_collections/builder.py
+++ b/lib/galaxy/model/dataset_collections/builder.py
@@ -1,28 +1,37 @@
+from typing import (
+ Any,
+ Dict,
+ List,
+)
+
from galaxy import model
from galaxy.model.orm.util import add_object_to_object_session
+from galaxy.schema.schema import FieldDict
from galaxy.util.oset import OrderedSet
from .type_description import COLLECTION_TYPE_DESCRIPTION_FACTORY
-def build_collection(type, dataset_instances, collection=None, associated_identifiers=None):
+def build_collection(type, dataset_instances, collection=None, associated_identifiers=None, fields=None):
"""
Build DatasetCollection with populated DatasetcollectionElement objects
corresponding to the supplied dataset instances or throw exception if
this is not a valid collection of the specified type.
"""
- dataset_collection = collection or model.DatasetCollection()
+ dataset_collection = collection or model.DatasetCollection(fields=fields)
associated_identifiers = associated_identifiers or set()
- set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers)
+ set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=fields)
return dataset_collection
-def set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers):
+def set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=None):
new_element_keys = OrderedSet(dataset_instances.keys()) - associated_identifiers
new_dataset_instances = {k: dataset_instances[k] for k in new_element_keys}
dataset_collection.element_count = dataset_collection.element_count or 0
element_index = dataset_collection.element_count
elements = []
- for element in type.generate_elements(new_dataset_instances):
+ if fields == "auto":
+ fields = guess_fields(dataset_instances)
+ for element in type.generate_elements(new_dataset_instances, fields=fields):
element.element_index = element_index
add_object_to_object_session(element, dataset_collection)
element.collection = dataset_collection
@@ -35,6 +44,16 @@ def set_collection_elements(dataset_collection, type, dataset_instances, associa
return dataset_collection
+def guess_fields(dataset_instances: Dict[str, Any]) -> List[FieldDict]:
+ fields: List[FieldDict] = []
+ for identifier, element in dataset_instances.items():
+ # TODO: Make generic enough to handle nested record types.
+ assert element.history_content_type == "dataset"
+ fields.append({"type": "File", "name": identifier})
+
+ return fields
+
+
class CollectionBuilder:
"""Purely functional builder pattern for building a dataset collection."""
diff --git a/lib/galaxy/model/dataset_collections/matching.py b/lib/galaxy/model/dataset_collections/matching.py
index 948e317ec69a..64161620b9ea 100644
--- a/lib/galaxy/model/dataset_collections/matching.py
+++ b/lib/galaxy/model/dataset_collections/matching.py
@@ -17,8 +17,10 @@ class CollectionsToMatch:
def __init__(self):
self.collections = {}
+ self.uses_ephemeral_collections = False
def add(self, input_name, hdca, subcollection_type=None, linked=True):
+ self.uses_ephemeral_collections = self.uses_ephemeral_collections or not hasattr(hdca, "hid")
self.collections[input_name] = bunch.Bunch(
hdca=hdca,
subcollection_type=subcollection_type,
@@ -49,6 +51,7 @@ def __init__(self):
self.subcollection_types = {}
self.action_tuples = {}
self.when_values = None
+ self.uses_ephemeral_collections = False
def __attempt_add_to_linked_match(self, input_name, hdca, collection_type_description, subcollection_type):
structure = get_structure(hdca, collection_type_description, leaf_subcollection_type=subcollection_type)
@@ -91,12 +94,21 @@ def map_over_action_tuples(self, input_name):
def is_mapped_over(self, input_name):
return input_name in self.collections
+ @property
+ def implicit_inputs(self):
+ if not self.uses_ephemeral_collections:
+ # Consider doing something smarter here.
+ return list(self.collections.items())
+ else:
+ return []
+
@staticmethod
def for_collections(collections_to_match, collection_type_descriptions) -> Optional["MatchingCollections"]:
if not collections_to_match.has_collections():
return None
matching_collections = MatchingCollections()
+ matching_collections.uses_ephemeral_collections = collections_to_match.uses_ephemeral_collections
for input_key, to_match in sorted(collections_to_match.items()):
hdca = to_match.hdca
collection_type_description = collection_type_descriptions.for_collection_type(
diff --git a/lib/galaxy/model/dataset_collections/registry.py b/lib/galaxy/model/dataset_collections/registry.py
index 9c849dfdad6f..bd148edafd2d 100644
--- a/lib/galaxy/model/dataset_collections/registry.py
+++ b/lib/galaxy/model/dataset_collections/registry.py
@@ -2,9 +2,14 @@
from .types import (
list,
paired,
+ record,
)
-PLUGIN_CLASSES = [list.ListDatasetCollectionType, paired.PairedDatasetCollectionType]
+PLUGIN_CLASSES = [
+ list.ListDatasetCollectionType,
+ paired.PairedDatasetCollectionType,
+ record.RecordDatasetCollectionType,
+]
class DatasetCollectionTypesRegistry:
@@ -14,13 +19,13 @@ def __init__(self):
def get(self, plugin_type):
return self.__plugins[plugin_type]
- def prototype(self, plugin_type):
+ def prototype(self, plugin_type, fields=None):
plugin_type_object = self.get(plugin_type)
if not hasattr(plugin_type_object, "prototype_elements"):
raise Exception(f"Cannot pre-determine structure for collection of type {plugin_type}")
dataset_collection = model.DatasetCollection()
- for e in plugin_type_object.prototype_elements():
+ for e in plugin_type_object.prototype_elements(fields=fields):
e.collection = dataset_collection
return dataset_collection
diff --git a/lib/galaxy/model/dataset_collections/type_description.py b/lib/galaxy/model/dataset_collections/type_description.py
index cade102453ca..9233faf3e4f9 100644
--- a/lib/galaxy/model/dataset_collections/type_description.py
+++ b/lib/galaxy/model/dataset_collections/type_description.py
@@ -9,9 +9,9 @@ def __init__(self, type_registry=DATASET_COLLECTION_TYPES_REGISTRY):
# I think.
self.type_registry = type_registry
- def for_collection_type(self, collection_type):
+ def for_collection_type(self, collection_type, fields=None):
assert collection_type is not None
- return CollectionTypeDescription(collection_type, self)
+ return CollectionTypeDescription(collection_type, self, fields=fields)
class CollectionTypeDescription:
@@ -47,12 +47,15 @@ class CollectionTypeDescription:
collection_type: str
- def __init__(self, collection_type: Union[str, "CollectionTypeDescription"], collection_type_description_factory):
+ def __init__(
+ self, collection_type: Union[str, "CollectionTypeDescription"], collection_type_description_factory, fields=None
+ ):
if isinstance(collection_type, CollectionTypeDescription):
self.collection_type = collection_type.collection_type
else:
self.collection_type = collection_type
self.collection_type_description_factory = collection_type_description_factory
+ self.fields = fields
self.__has_subcollections = self.collection_type.find(":") > 0
def child_collection_type(self):
@@ -90,9 +93,13 @@ def has_subcollections_of_type(self, other_collection_type):
collection_type = self.collection_type
return collection_type.endswith(other_collection_type) and collection_type != other_collection_type
- def is_subcollection_of_type(self, other_collection_type):
+ def is_subcollection_of_type(self, other_collection_type, proper=True):
+ """If proper is False, than a type is consider a subcollection of itself."""
if not hasattr(other_collection_type, "collection_type"):
other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type)
+ if not proper and self.can_match_type(other_collection_type):
+ return True
+
return other_collection_type.has_subcollections_of_type(self)
def can_match_type(self, other_collection_type):
diff --git a/lib/galaxy/model/dataset_collections/types/__init__.py b/lib/galaxy/model/dataset_collections/types/__init__.py
index bfcf7bae79a6..c294f6957be6 100644
--- a/lib/galaxy/model/dataset_collections/types/__init__.py
+++ b/lib/galaxy/model/dataset_collections/types/__init__.py
@@ -11,7 +11,7 @@
class DatasetCollectionType(metaclass=ABCMeta):
@abstractmethod
- def generate_elements(self, dataset_instances):
+ def generate_elements(self, dataset_instances: dict, **kwds):
"""Generate DatasetCollectionElements with corresponding
to the supplied dataset instances or throw exception if
this is not a valid collection of the specified type.
diff --git a/lib/galaxy/model/dataset_collections/types/list.py b/lib/galaxy/model/dataset_collections/types/list.py
index 18ce4db76537..d4421d009c34 100644
--- a/lib/galaxy/model/dataset_collections/types/list.py
+++ b/lib/galaxy/model/dataset_collections/types/list.py
@@ -7,8 +7,8 @@ class ListDatasetCollectionType(BaseDatasetCollectionType):
collection_type = "list"
- def generate_elements(self, elements):
- for identifier, element in elements.items():
+ def generate_elements(self, dataset_instances, **kwds):
+ for identifier, element in dataset_instances.items():
association = DatasetCollectionElement(
element=element,
element_identifier=identifier,
diff --git a/lib/galaxy/model/dataset_collections/types/paired.py b/lib/galaxy/model/dataset_collections/types/paired.py
index 4ae95a1442a2..e774ab67aace 100644
--- a/lib/galaxy/model/dataset_collections/types/paired.py
+++ b/lib/galaxy/model/dataset_collections/types/paired.py
@@ -15,21 +15,21 @@ class PairedDatasetCollectionType(BaseDatasetCollectionType):
collection_type = "paired"
- def generate_elements(self, elements):
- if forward_dataset := elements.get(FORWARD_IDENTIFIER):
+ def generate_elements(self, dataset_instances, **kwds):
+ if forward_dataset := dataset_instances.get(FORWARD_IDENTIFIER):
left_association = DatasetCollectionElement(
element=forward_dataset,
element_identifier=FORWARD_IDENTIFIER,
)
yield left_association
- if reverse_dataset := elements.get(REVERSE_IDENTIFIER):
+ if reverse_dataset := dataset_instances.get(REVERSE_IDENTIFIER):
right_association = DatasetCollectionElement(
element=reverse_dataset,
element_identifier=REVERSE_IDENTIFIER,
)
yield right_association
- def prototype_elements(self):
+ def prototype_elements(self, **kwds):
left_association = DatasetCollectionElement(
element=HistoryDatasetAssociation(),
element_identifier=FORWARD_IDENTIFIER,
diff --git a/lib/galaxy/model/dataset_collections/types/record.py b/lib/galaxy/model/dataset_collections/types/record.py
new file mode 100644
index 000000000000..193509f439ee
--- /dev/null
+++ b/lib/galaxy/model/dataset_collections/types/record.py
@@ -0,0 +1,45 @@
+from galaxy.exceptions import RequestParameterMissingException
+from galaxy.model import (
+ DatasetCollectionElement,
+ HistoryDatasetAssociation,
+)
+from ..types import BaseDatasetCollectionType
+
+
+class RecordDatasetCollectionType(BaseDatasetCollectionType):
+ """Arbitrary CWL-style record type."""
+
+ collection_type = "record"
+
+ def generate_elements(self, dataset_instances, **kwds):
+ fields = kwds.get("fields", None)
+ if fields is None:
+ raise RequestParameterMissingException("Missing or null parameter 'fields' required for record types.")
+ if len(dataset_instances) != len(fields):
+ self._validation_failed("Supplied element do not match fields.")
+ index = 0
+ for identifier, element in dataset_instances.items():
+ field = fields[index]
+ if field["name"] != identifier:
+ self._validation_failed("Supplied element do not match fields.")
+
+ # TODO: validate type and such.
+ association = DatasetCollectionElement(
+ element=element,
+ element_identifier=identifier,
+ )
+ yield association
+ index += 1
+
+ def prototype_elements(self, fields=None, **kwds):
+ if fields is None:
+ raise RequestParameterMissingException("Missing or null parameter 'fields' required for record types.")
+ for field in fields:
+ name = field.get("name", None)
+ assert name
+ assert field.get("type", "File") # NS: this assert doesn't make sense as it is
+ field_dataset = DatasetCollectionElement(
+ element=HistoryDatasetAssociation(),
+ element_identifier=name,
+ )
+ yield field_dataset
diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py
index 784e4e9a8ba3..9381186a7bb7 100644
--- a/lib/galaxy/model/deferred.py
+++ b/lib/galaxy/model/deferred.py
@@ -36,7 +36,9 @@
from galaxy.objectstore import (
ObjectStore,
ObjectStorePopulator,
+ persist_extra_files,
)
+from galaxy.util.compression_utils import CompressedFile
from galaxy.util.hash_util import verify_hash
log = logging.getLogger(__name__)
@@ -141,6 +143,13 @@ def ensure_materialized(
object_store_populator.set_dataset_object_store_id(materialized_dataset)
try:
path = self._stream_source(target_source, dataset_instance.datatype, materialized_dataset)
+ if dataset_instance.ext == "directory":
+ CompressedFile(path).extract(materialized_dataset.extra_files_path)
+ persist_extra_files(
+ object_store=object_store,
+ src_extra_files_path=materialized_dataset.extra_files_path,
+ primary_data=dataset_instance,
+ )
object_store.update_from_file(materialized_dataset, file_name=path)
materialized_dataset.set_size()
except Exception as e:
@@ -153,7 +162,7 @@ def ensure_materialized(
# TODO: take into account transform and ensure we are and are not modifying the file as appropriate.
try:
path = self._stream_source(target_source, dataset_instance.datatype, materialized_dataset)
- shutil.move(path, transient_paths.external_filename)
+ shutil.copy(path, transient_paths.external_filename)
materialized_dataset.external_filename = transient_paths.external_filename
except Exception as e:
exception_materializing = e
diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py
index 4bee49056c61..533f61701c99 100644
--- a/lib/galaxy/model/store/discover.py
+++ b/lib/galaxy/model/store/discover.py
@@ -870,6 +870,7 @@ def collect_elements_for_history(elements):
final_job_state=state,
storage_callbacks=storage_callbacks,
)
+ model_persistence_context.add_output_dataset_association("__unnamed_outputs", dataset)
dataset.discovered = True
if not hda_id:
datasets.append(dataset)
diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py
index 3febee546896..90dac7c73036 100644
--- a/lib/galaxy/schema/schema.py
+++ b/lib/galaxy/schema/schema.py
@@ -33,6 +33,7 @@
from typing_extensions import (
Annotated,
Literal,
+ TypedDict,
)
from galaxy.schema import partial_model
@@ -1647,6 +1648,11 @@ class CollectionElementIdentifier(Model):
)
+class FieldDict(TypedDict):
+ name: str
+ type: str
+
+
class CreateNewCollectionPayload(Model):
collection_type: Optional[CollectionType] = OptionalCollectionTypeField
element_identifiers: Optional[List[CollectionElementIdentifier]] = Field(
@@ -1682,6 +1688,11 @@ class CreateNewCollectionPayload(Model):
default=None,
description="The ID of the library folder that will contain the collection. Required if `instance_type=library`.",
)
+ fields_: Optional[Union[str, List[FieldDict]]] = Field(
+ default=[],
+ description="List of fields to create for this collection. Set to 'auto' to guess fields from identifiers.",
+ alias="fields",
+ )
class ModelStoreFormat(str, Enum):
diff --git a/lib/galaxy/tool_util/client/staging.py b/lib/galaxy/tool_util/client/staging.py
index 3d4d6ca73080..c4f6558dee2a 100644
--- a/lib/galaxy/tool_util/client/staging.py
+++ b/lib/galaxy/tool_util/client/staging.py
@@ -59,20 +59,19 @@ def _post(self, api_path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
def _attach_file(self, path: str) -> BinaryIO:
return open(path, "rb")
- def _tools_post(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+ def _job_details_from_tool_response(self, tool_response: Dict[str, Any]) -> List[Dict[str, Any]]:
+ return [self._handle_job(job) for job in tool_response.get("jobs", [])]
+
+ def _tools_post(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]:
tool_response = self._post("tools", payload)
- for job in tool_response.get("jobs", []):
- self._handle_job(job)
- return tool_response
+ return self._job_details_from_tool_response(tool_response)
- def _fetch_post(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+ def _fetch_post(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]:
tool_response = self._post("tools/fetch", payload)
- for job in tool_response.get("jobs", []):
- self._handle_job(job)
- return tool_response
+ return self._job_details_from_tool_response(tool_response)
@abc.abstractmethod
- def _handle_job(self, job_response: Dict[str, Any]):
+ def _handle_job(self, job_response: Dict[str, Any]) -> Dict[str, Any]:
"""Implementer can decide if to wait for job(s) individually or not here."""
def stage(
@@ -143,10 +142,9 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> Di
fetch_payload["targets"][0]["elements"][0]["tags"] = tags
elif isinstance(upload_target, DirectoryUploadTarget):
fetch_payload = _fetch_payload(history_id, file_type="directory")
- fetch_payload["targets"][0].pop("elements")
tar_path = upload_target.tar_path
src = _attach_file(fetch_payload, tar_path)
- fetch_payload["targets"][0]["elements_from"] = src
+ fetch_payload["targets"][0]["elements"][0].update(src)
elif isinstance(upload_target, ObjectUploadTarget):
content = json.dumps(upload_target.object)
fetch_payload = _fetch_payload(history_id, file_type="expression.json")
@@ -161,7 +159,7 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> Di
fetch_payload["targets"][0]["elements"][0]["tags"] = tags
else:
raise ValueError(f"Unsupported type for upload_target: {type(upload_target)}")
- return self._fetch_post(fetch_payload)
+ return self._fetch_post(fetch_payload)[0]
# Save legacy upload_func to target older Galaxy servers
def upload_func(upload_target: UploadTarget) -> Dict[str, Any]:
@@ -176,9 +174,14 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> No
if isinstance(upload_target, FileUploadTarget):
file_path = upload_target.path
- file_type = upload_target.properties.get("filetype", None) or DEFAULT_FILE_TYPE
+ file_type = DEFAULT_FILE_TYPE
dbkey = upload_target.properties.get("dbkey", None) or DEFAULT_DBKEY
- upload_payload = _upload_payload(history_id, file_type=file_type, to_posix_lines=dbkey)
+ upload_payload = _upload_payload(
+ history_id,
+ file_type=file_type,
+ to_posix_lines=dbkey,
+ cwl_format=upload_target.properties.get("filetype"),
+ )
name = _file_path_to_name(file_path)
upload_payload["inputs"]["files_0|auto_decompress"] = False
upload_payload["inputs"]["auto_decompress"] = False
@@ -201,12 +204,12 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> No
_attach_file(upload_payload, composite_data, index=i)
self._log(f"upload_payload is {upload_payload}")
- return self._tools_post(upload_payload)
+ return self._tools_post(upload_payload)[0]
elif isinstance(upload_target, FileLiteralTarget):
# For file literals - take them as is - never convert line endings.
payload = _upload_payload(history_id, file_type="auto", auto_decompress=False, to_posix_lines=False)
payload["inputs"]["files_0|url_paste"] = upload_target.contents
- return self._tools_post(payload)
+ return self._tools_post(payload)[0]
elif isinstance(upload_target, DirectoryUploadTarget):
tar_path = upload_target.tar_path
@@ -216,20 +219,21 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> No
)
upload_payload["inputs"]["files_0|auto_decompress"] = False
_attach_file(upload_payload, tar_path)
- tar_upload_response = self._tools_post(upload_payload)
+ tar_upload_first_job_details = self._tools_post(upload_payload)[0]
+ tar_upload_first_dataset_id = next(iter(tar_upload_first_job_details["outputs"].values()))["id"]
convert_payload = dict(
tool_id="CONVERTER_tar_to_directory",
- tool_inputs={"input1": {"src": "hda", "id": tar_upload_response["outputs"][0]["id"]}},
+ tool_inputs={"input1": {"src": "hda", "id": tar_upload_first_dataset_id}},
history_id=history_id,
)
- convert_response = self._tools_post(convert_payload)
+ convert_response = self._tools_post(convert_payload)[0]
assert "outputs" in convert_response, convert_response
return convert_response
elif isinstance(upload_target, ObjectUploadTarget):
content = json.dumps(upload_target.object)
payload = _upload_payload(history_id, file_type="expression.json")
payload["files_0|url_paste"] = content
- return self._tools_post(payload)
+ return self._tools_post(payload)[0]
else:
raise ValueError(f"Unsupported type for upload_target: {type(upload_target)}")
@@ -285,8 +289,9 @@ def _post(self, api_path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
assert response.status_code == 200, response.text
return response.json()
- def _handle_job(self, job_response: Dict[str, Any]):
+ def _handle_job(self, job_response: Dict[str, Any]) -> Dict[str, Any]:
self.galaxy_interactor.wait_for_job(job_response["id"])
+ return self.galaxy_interactor.get_job_stdio(job_response["id"])
@property
def use_fetch_api(self):
@@ -317,6 +322,8 @@ def _upload_payload(
tool_input["files_0|space_to_tab"] = "Yes"
if "file_name" in kwd:
tool_input["files_0|NAME"] = kwd["file_name"]
+ if kwd.get("cwl_format"):
+ tool_input["cwl_format"] = kwd["cwl_format"]
tool_input["files_0|type"] = "upload_dataset"
payload["inputs"] = tool_input
payload["__files"] = {}
diff --git a/lib/galaxy/tool_util/cwl/parser.py b/lib/galaxy/tool_util/cwl/parser.py
index e4a4ff83fb20..981e4d8ed37f 100644
--- a/lib/galaxy/tool_util/cwl/parser.py
+++ b/lib/galaxy/tool_util/cwl/parser.py
@@ -35,7 +35,6 @@
beta_relaxed_fmt_check,
ensure_cwltool_available,
getdefault,
- normalizeFilesDirs,
pathmapper,
process,
ref_resolver,
@@ -71,6 +70,7 @@
"EnvVarRequirement",
"InitialWorkDirRequirement",
"InlineJavascriptRequirement",
+ "LoadListingRequirement",
"ResourceRequirement",
"ShellCommandRequirement",
"ScatterFeatureRequirement",
@@ -83,6 +83,9 @@
SUPPORTED_WORKFLOW_REQUIREMENTS = SUPPORTED_TOOL_REQUIREMENTS + []
+PERSISTED_REPRESENTATION = "cwl_tool_object"
+SENTINEL_GALAXY_SLOTS_VALUE = 1.480231396
+
ToolStateType = Dict[str, Union[None, str, bool, Dict[str, str]]]
@@ -334,7 +337,13 @@ def _ensure_cwl_job_initialized(self):
args = [RuntimeContext(job_args)]
kwargs: Dict[str, str] = {}
- self._cwl_job = next(self._tool_proxy._tool.job(self._input_dict, self._output_callback, *args, **kwargs))
+ # The job method modifies inputs_record_schema in place to match the job definition
+ # This breaks subsequent use with other job definitions, so create a shallow copy of
+ # the CommandLineTool instance and add a deepcopy of the inputs_record_schema
+ # (instead of globally manipulating self._tool_proxy._tool, which is likely not thread-safe).
+ cwl_tool_instance = copy.copy(self._tool_proxy._tool)
+ cwl_tool_instance.inputs_record_schema = copy.deepcopy(cwl_tool_instance.inputs_record_schema)
+ self._cwl_job = next(cwl_tool_instance.job(self._input_dict, self._output_callback, *args, **kwargs))
self._is_command_line_job = hasattr(self._cwl_job, "command_line")
def _normalize_job(self):
@@ -350,7 +359,6 @@ def pathToLoc(p):
process.fill_in_defaults(self._tool_proxy._tool.tool["inputs"], self._input_dict, fs_access)
visit_class(self._input_dict, ("File", "Directory"), pathToLoc)
# TODO: Why doesn't fillInDefault fill in locations instead of paths?
- normalizeFilesDirs(self._input_dict)
# TODO: validate like cwltool process _init_job.
# validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job,
# strict=False, logger=_logger_validation_warnings)
@@ -397,13 +405,19 @@ def stage_recursive(value):
def _select_resources(self, request, runtime_context=None):
new_request = request.copy()
- new_request["cores"] = "$GALAXY_SLOTS"
+ # TODO: we really need to find a better solution to set cores here.
+ # This could be to delay building the cwl job until we're at the worker node,
+ # (see https://github.com/galaxyproject/galaxy/pull/12459 for an attempt)
+ # or guessing what the value of $GALAXY_SLOTS will be when preparing the job.
+ new_request["cores"] = SENTINEL_GALAXY_SLOTS_VALUE
return new_request
@property
def command_line(self):
if self.is_command_line_job:
- return self.cwl_job().command_line
+ command_line = self.cwl_job().command_line
+ # Undo the SENTINEL_GALAXY_SLOTS_VALUE hack above
+ return [fragment.replace(str(SENTINEL_GALAXY_SLOTS_VALUE), "$GALAXY_SLOTS") for fragment in command_line]
else:
return ["true"]
diff --git a/lib/galaxy/tool_util/cwl/representation.py b/lib/galaxy/tool_util/cwl/representation.py
index 740954fd8754..dfb89c69046b 100644
--- a/lib/galaxy/tool_util/cwl/representation.py
+++ b/lib/galaxy/tool_util/cwl/representation.py
@@ -4,9 +4,12 @@
import json
import logging
import os
+import tarfile
+import uuid
from enum import Enum
from typing import (
Any,
+ Dict,
NamedTuple,
Optional,
)
@@ -184,6 +187,7 @@ def dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper):
# Verify it isn't a NoneDataset
if dataset_wrapper.unsanitized:
raw_file_object["size"] = int(dataset_wrapper.get_size())
+ raw_file_object["format"] = str(dataset_wrapper.cwl_formats[0])
set_basename_and_derived_properties(
raw_file_object, str(dataset_wrapper.created_from_basename or dataset_wrapper.name)
@@ -205,6 +209,9 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper):
except Exception:
archive_location = None
+ extra_params = getattr(dataset_wrapper.unsanitized, "extra_params", {})
+ # We need to resolve path to location if there is a listing
+
directory_json = {
"location": dataset_wrapper.extra_files_path,
"class": "Directory",
@@ -214,7 +221,52 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper):
"archive_nameroot": nameroot,
}
- return directory_json
+ def tar_to_directory(directory_item):
+ # TODO: Should we just make sure that archive exists in extra_files_path ??
+ tar_file_location = directory_item["archive_location"]
+ directory_name = directory_item["name"]
+
+ assert os.path.exists(tar_file_location), tar_file_location
+
+ tmp_dir = os.path.join(inputs_dir, "direx", str(uuid.uuid4())) # direx for "DIR EXtract"
+ directory_location = os.path.join(tmp_dir, directory_name)
+
+ os.makedirs(tmp_dir)
+
+ assert os.path.exists(tmp_dir), tmp_dir
+
+ # TODO: safe version of this!
+ bkp_cwd = os.getcwd()
+ os.chdir(tmp_dir)
+ tar = tarfile.open(tar_file_location)
+ tar.extractall(directory_location)
+ tar.close()
+ os.chdir(bkp_cwd)
+
+ assert os.path.exists(directory_location), directory_location
+
+ directory_item["location"] = directory_location
+ directory_item["nameext"] = "None"
+ directory_item["nameroot"] = directory_name
+ directory_item["basename"] = directory_name
+
+ tar_to_directory(directory_json)
+ extra_params.update(directory_json)
+
+ entry_to_location(extra_params, extra_params["location"])
+ return extra_params
+
+
+def entry_to_location(entry: Dict[str, Any], parent_location: str):
+ # TODO unit test
+ if entry["class"] == "File" and "path" in entry and "location" not in entry:
+ entry["location"] = os.path.join(parent_location, entry.pop("path"))
+ entry["size"] = os.path.getsize(entry["location"])
+ elif entry["class"] == "Directory" and "listing" in entry:
+ if "location" not in entry and "path" in entry:
+ entry["location"] = os.path.join(parent_location, entry.pop("path"))
+ for listing_entry in entry["listing"]:
+ entry_to_location(listing_entry, parent_location=entry["location"])
def collection_wrapper_to_array(inputs_dir, wrapped_value):
@@ -231,6 +283,106 @@ def collection_wrapper_to_record(inputs_dir, wrapped_value):
return rval
+def galactic_flavored_to_cwl_job(tool, param_dict, local_working_directory):
+ def simple_value(input, param_dict_value, type_representation_name=None):
+ type_representation = type_representation_from_name(type_representation_name)
+ # Hmm... cwl_type isn't really the cwl type in every case,
+ # like in the case of json for instance.
+
+ if type_representation.galaxy_param_type == NO_GALAXY_INPUT:
+ assert param_dict_value is None
+ return None
+
+ if type_representation.name == "file":
+ dataset_wrapper = param_dict_value
+ return dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper)
+ elif type_representation.name == "directory":
+ dataset_wrapper = param_dict_value
+ return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper)
+ elif type_representation.name == "integer":
+ return int(str(param_dict_value))
+ elif type_representation.name == "long":
+ return int(str(param_dict_value))
+ elif type_representation.name in ["float", "double"]:
+ return float(str(param_dict_value))
+ elif type_representation.name == "boolean":
+ return string_as_bool(param_dict_value)
+ elif type_representation.name == "text":
+ return str(param_dict_value)
+ elif type_representation.name == "enum":
+ return str(param_dict_value)
+ elif type_representation.name == "json":
+ raw_value = param_dict_value.value
+ return json.loads(raw_value)
+ elif type_representation.name == "field":
+ if param_dict_value is None:
+ return None
+ if hasattr(param_dict_value, "value"):
+ # Is InputValueWrapper
+ rval = param_dict_value.value
+ if isinstance(rval, dict) and "src" in rval and rval["src"] == "json":
+ # needed for wf_step_connect_undeclared_param, so non-file defaults?
+ return rval["value"]
+ return rval
+ elif not param_dict_value.is_collection:
+ # Is DatasetFilenameWrapper
+ return dataset_wrapper_to_file_json(inputs_dir, param_dict_value)
+ else:
+ # Is DatasetCollectionWrapper
+ hdca_wrapper = param_dict_value
+ if hdca_wrapper.collection.collection_type == "list":
+ # TODO: generalize to lists of lists and lists of non-files...
+ return collection_wrapper_to_array(inputs_dir, hdca_wrapper)
+ elif hdca_wrapper.collection.collection_type.collection_type == "record":
+ return collection_wrapper_to_record(inputs_dir, hdca_wrapper)
+
+ elif type_representation.name == "array":
+ # TODO: generalize to lists of lists and lists of non-files...
+ return collection_wrapper_to_array(inputs_dir, param_dict_value)
+ elif type_representation.name == "record":
+ return collection_wrapper_to_record(inputs_dir, param_dict_value)
+ else:
+ return str(param_dict_value)
+
+ inputs_dir = os.path.join(local_working_directory, "_inputs")
+
+ inputs = {}
+
+ # TODO: walk tree
+ for input_name, input_param in tool.inputs.items():
+ if input_param.type == "data":
+ # Probably need to be passing in the wrappers and using them - this seems to be
+ # an HDA.
+ map_to = input_param.map_to
+ inputs_at_depth = inputs
+ if map_to:
+ while "/" in map_to:
+ first, map_to = map_to.split("/", 1)
+ if first not in inputs_at_depth:
+ inputs_at_depth[first] = {}
+ inputs_at_depth = inputs_at_depth[first]
+ else:
+ map_to = input_param.name
+ inputs_at_depth[map_to] = dataset_wrapper_to_file_json(inputs_dir, param_dict[input_name])
+ else:
+ matched_field = None
+ for field in tool._cwl_tool_proxy.input_fields():
+ if field["name"] == input_name: # CWL <=> Galaxy
+ matched_field = field
+ field_type = field_to_field_type(matched_field)
+ if isinstance(field_type, list):
+ assert USE_FIELD_TYPES
+ type_descriptions = [FIELD_TYPE_REPRESENTATION]
+ else:
+ type_descriptions = type_descriptions_for_field_types([field_type])
+ assert len(type_descriptions) == 1
+ type_description_name = type_descriptions[0].name
+
+ inputs[input_name] = simple_value(input_param, param_dict[input_name], type_description_name)
+
+ return inputs
+
+
def to_cwl_job(tool, param_dict, local_working_directory):
"""tool is Galaxy's representation of the tool and param_dict is the
parameter dictionary with wrapped values.
@@ -288,10 +440,10 @@ def simple_value(input, param_dict_value, type_representation_name=None):
else:
# Is DatasetCollectionWrapper
hdca_wrapper = param_dict_value
- if hdca_wrapper.collection_type == "list":
+ if hdca_wrapper.collection.collection_type == "list":
# TODO: generalize to lists of lists and lists of non-files...
return collection_wrapper_to_array(inputs_dir, hdca_wrapper)
- elif hdca_wrapper.collection_type.collection_type == "record":
+ elif hdca_wrapper.collection.collection_type == "record":
return collection_wrapper_to_record(inputs_dir, hdca_wrapper)
elif type_representation.name == "array":
diff --git a/lib/galaxy/tool_util/cwl/runtime_actions.py b/lib/galaxy/tool_util/cwl/runtime_actions.py
index d25d50740cfc..e9b9970123e9 100644
--- a/lib/galaxy/tool_util/cwl/runtime_actions.py
+++ b/lib/galaxy/tool_util/cwl/runtime_actions.py
@@ -156,9 +156,9 @@ def move_output(output, target_path, output_name=None):
with open(os.path.join(secondary_files_dir, "..", SECONDARY_FILES_INDEX_PATH), "w") as f:
json.dump(index_contents, f)
- return {"created_from_basename": output["basename"]}
+ return {"created_from_basename": output["basename"], "ext": "data", "format": output.get("format")}
- def handle_known_output(output, output_key, output_name):
+ def handle_known_output(output, output_name):
# if output["class"] != "File":
# # This case doesn't seem like it would be reached - why is this here?
# provided_metadata[output_name] = {
@@ -188,13 +188,13 @@ def handle_known_output_json(output, output_name):
for output_name, output in outputs.items():
handled_outputs.append(output_name)
if isinstance(output, dict) and "location" in output:
- handle_known_output(output, output_name, output_name)
+ handle_known_output(output, output_name)
elif isinstance(output, dict):
prefix = f"{output_name}|__part__|"
for record_key, record_value in output.items():
record_value_output_key = f"{prefix}{record_key}"
if isinstance(record_value, dict) and "class" in record_value:
- handle_known_output(record_value, record_value_output_key, output_name)
+ handle_known_output(record_value, record_value_output_key)
else:
# param_evaluation_noexpr
handle_known_output_json(output, output_name)
@@ -222,6 +222,8 @@ def handle_known_output_json(output, output_name):
handle_known_output_json(None, output_name)
job_metadata = os.path.join(job_directory, cwl_metadata_params["job_metadata"])
+ # We may have moved away the tool working directory
+ os.makedirs(tool_working_directory, exist_ok=True)
with open(job_metadata, "w") as f:
json.dump(provided_metadata, f)
diff --git a/lib/galaxy/tool_util/cwl/util.py b/lib/galaxy/tool_util/cwl/util.py
index 2f3edde16dba..a22659e4f39c 100644
--- a/lib/galaxy/tool_util/cwl/util.py
+++ b/lib/galaxy/tool_util/cwl/util.py
@@ -8,6 +8,8 @@
import io
import json
import os
+import pathlib
+import shutil
import tarfile
import tempfile
import urllib.parse
@@ -57,6 +59,7 @@ def set_basename_and_derived_properties(properties, basename):
"secondaryFiles": List[Any],
"checksum": str,
"size": int,
+ "format": Optional[str],
},
total=False,
)
@@ -67,6 +70,8 @@ def output_properties(
content: Optional[bytes] = None,
basename=None,
pseudo_location=False,
+ cwl_formats: Optional[List[str]] = None,
+ download_url="",
) -> OutputPropertiesType:
checksum = hashlib.sha1()
properties: OutputPropertiesType = {"class": "File", "checksum": "", "size": 0}
@@ -90,14 +95,16 @@ def output_properties(
f.close()
properties["checksum"] = f"sha1${checksum.hexdigest()}"
properties["size"] = filesize
+ properties["format"] = cwl_formats[0] if cwl_formats else None
set_basename_and_derived_properties(properties, basename)
- _handle_pseudo_location(properties, pseudo_location)
+ _handle_pseudo_location(properties, pseudo_location, download_url)
return properties
-def _handle_pseudo_location(properties, pseudo_location):
+def _handle_pseudo_location(properties, pseudo_location, download_url):
if pseudo_location:
- properties["location"] = properties["basename"]
+ # TODO: should be a URI to the dataset on the server
+ properties["location"] = pseudo_location.rstrip("/api") + download_url
def abs_path_or_uri(path_or_uri: str, relative_to: str) -> str:
@@ -150,10 +157,7 @@ def galactic_job_json(
dataset_collections: List[Dict[str, Any]] = []
def response_to_hda(target: UploadTarget, upload_response: Dict[str, Any]) -> Dict[str, str]:
- assert isinstance(upload_response, dict), upload_response
- assert "outputs" in upload_response, upload_response
- assert len(upload_response["outputs"]) > 0, upload_response
- dataset = upload_response["outputs"][0]
+ dataset = next(iter(upload_response["outputs"].values()))
datasets.append(dataset)
dataset_id = dataset["id"]
return {"src": "hda", "id": dataset_id}
@@ -281,18 +285,39 @@ def replacement_file(value):
def replacement_directory(value: Dict[str, Any]) -> Dict[str, Any]:
file_path = value.get("location", None) or value.get("path", None)
- if file_path is None:
- return value
+ temp_dir = None
+ try:
+ if file_path is None:
+ # Probably a directory literal
+ # Make directory, create tar, put listing
+ temp_dir = tempfile.mkdtemp(prefix="file_literal_upload_dir")
+ base_dir = pathlib.Path(temp_dir) / value["basename"]
+ base_dir.mkdir()
+ for entry in value["listing"]:
+ if entry["class"] == "File":
+ if "contents" in entry:
+ with open(base_dir / entry["basename"], "w") as fh:
+ fh.write(entry["contents"])
+ elif "path" in entry:
+ # if path is abspath test_data_directory is ignored, so no need to check if path is abspath
+ entry_path = os.path.join(test_data_directory, entry["path"])
+ os.symlink(entry_path, str((base_dir / entry["path"]).resolve()))
+ else:
+ raise Exception(f"{entry['class']} unimplemented")
+ file_path = str(base_dir.resolve())
- if not os.path.isabs(file_path):
file_path = os.path.join(test_data_directory, file_path)
- tmp = tempfile.NamedTemporaryFile(delete=False)
- tf = tarfile.open(fileobj=tmp, mode="w:")
- tf.add(file_path, ".")
- tf.close()
-
- return upload_tar(tmp.name)
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".tar")
+ tf = tarfile.open(fileobj=tmp, mode="w:", dereference=True)
+ tf.add(file_path, ".")
+ tf.close()
+ finally:
+ if temp_dir:
+ shutil.rmtree(temp_dir)
+ upload_response = upload_tar(tmp.name)
+ upload_response.update(value)
+ return upload_response
def replacement_list(value) -> Dict[str, str]:
collection_element_identifiers = []
@@ -517,7 +542,12 @@ def dataset_dict_to_json_content(dataset_dict):
if file_or_directory == "File":
dataset_dict = get_dataset(output_metadata)
- properties = output_properties(pseudo_location=pseudo_location, **dataset_dict)
+ properties = output_properties(
+ pseudo_location=pseudo_location,
+ cwl_formats=output_metadata["cwl_formats"],
+ download_url=output_metadata["download_url"],
+ **dataset_dict,
+ )
basename = properties["basename"]
extra_files = get_extra_files(output_metadata)
found_index = False
@@ -543,7 +573,13 @@ def dir_listing(dir_path):
if extra_file_class == "File":
ec = get_dataset(output_metadata, filename=path)
ec["basename"] = extra_file_basename
- ec_properties = output_properties(pseudo_location=pseudo_location, **ec)
+ filename = f"{dir_path}/{extra_file_basename}"
+ _download_url = (
+ output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(filename)}"
+ )
+ ec_properties = output_properties(
+ pseudo_location=pseudo_location, download_url=_download_url, **ec
+ )
elif extra_file_class == "Directory":
ec_properties = {}
ec_properties["class"] = "Directory"
@@ -571,7 +607,12 @@ def dir_listing(dir_path):
if extra_file_class == "File":
ec = get_dataset(output_metadata, filename=path)
ec["basename"] = ec_basename
- ec_properties = output_properties(pseudo_location=pseudo_location, **ec)
+ download_url = (
+ output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(path)}"
+ )
+ ec_properties = output_properties(
+ pseudo_location=pseudo_location, download_url=download_url, **ec
+ )
elif extra_file_class == "Directory":
ec_properties = {}
ec_properties["class"] = "Directory"
@@ -592,6 +633,11 @@ def dir_listing(dir_path):
"basename": basename,
"listing": listing,
}
+ _handle_pseudo_location(
+ properties,
+ pseudo_location=pseudo_location,
+ download_url=output_metadata["download_url"] + "?to_ext=directory",
+ )
extra_files = get_extra_files(output_metadata)
for extra_file in extra_files:
@@ -623,13 +669,6 @@ def dir_listing(dir_path):
raise NotImplementedError("Unknown history content type encountered")
-def download_output(galaxy_output, get_metadata, get_dataset, get_extra_files, output_path):
- output_metadata = get_metadata(galaxy_output.history_content_type, galaxy_output.history_content_id)
- dataset_dict = get_dataset(output_metadata)
- with open(output_path, "wb") as fh:
- fh.write(dataset_dict["content"])
-
-
def guess_artifact_type(path):
tool_or_workflow = "workflow"
path, object_id = urllib.parse.urldefrag(path)
diff --git a/lib/galaxy/tool_util/parser/cwl.py b/lib/galaxy/tool_util/parser/cwl.py
index 26d2fdfdc16c..5bb9f822d6c2 100644
--- a/lib/galaxy/tool_util/parser/cwl.py
+++ b/lib/galaxy/tool_util/parser/cwl.py
@@ -7,6 +7,7 @@
from galaxy.tool_util.cwl.parser import (
tool_proxy,
+ tool_proxy_from_persistent_representation,
ToolProxy,
)
from galaxy.tool_util.deps import requirements
@@ -17,24 +18,59 @@
ToolSource,
)
from .output_actions import ToolOutputActionGroup
-from .output_objects import ToolOutput
+from .output_collection_def import dataset_collector_descriptions_from_list
+from .output_objects import (
+ ToolOutput,
+ ToolOutputCollection,
+ ToolOutputCollectionStructure,
+)
from .stdio import (
StdioErrorLevel,
ToolStdioExitCode,
)
-from .yaml import YamlInputSource
+from .yaml import (
+ YamlInputSource,
+ YamlPageSource,
+)
+
+GX_INTERFACE_NAMESPACE = "http://galaxyproject.org/cwl#interface"
+
+CWL_DEFAULT_FILE_OUTPUT = "data" # set to _sniff_ to sniff output types automatically.
log = logging.getLogger(__name__)
+def strip_namespace(ordered_dict, namespace):
+ if isinstance(ordered_dict, dict):
+ value = {}
+ for k, v in ordered_dict.items():
+ if k.startswith(namespace):
+ k = k[len(namespace) :]
+ value[k] = strip_namespace(v, namespace)
+ return value
+ elif isinstance(ordered_dict, list):
+ return [strip_namespace(v, namespace) for v in ordered_dict]
+ return ordered_dict
+
+
class CwlToolSource(ToolSource):
language = "yaml"
- def __init__(self, tool_file=None, strict_cwl_validation=True, tool_proxy: Optional[ToolProxy] = None):
- self._cwl_tool_file = tool_file
- self._tool_proxy = tool_proxy
+ def __init__(
+ self,
+ tool_file=None,
+ tool_object=None,
+ strict_cwl_validation: bool = True,
+ tool_directory=None,
+ uuid=None,
+ tool_proxy: Optional[ToolProxy] = None,
+ ):
self._source_path = tool_file
+ self._source_object = tool_object
+ self._tool_proxy = tool_proxy
self._strict_cwl_validation = strict_cwl_validation
+ self._tool_directory = tool_directory
+ self._uuid = uuid
@property
def source_path(self):
@@ -43,11 +79,34 @@ def source_path(self):
@property
def tool_proxy(self) -> ToolProxy:
if self._tool_proxy is None:
- self._tool_proxy = tool_proxy(self._source_path, strict_cwl_validation=self._strict_cwl_validation)
+ if self._source_path is not None:
+ self._tool_proxy = tool_proxy(
+ self._source_path,
+ strict_cwl_validation=self._strict_cwl_validation,
+ tool_directory=self._tool_directory,
+ uuid=self._uuid,
+ )
+ else:
+ assert "uuid" in self._source_object
+ self._tool_proxy = tool_proxy_from_persistent_representation(
+ self._source_object,
+ strict_cwl_validation=self._strict_cwl_validation,
+ tool_directory=self._tool_directory,
+ )
return self._tool_proxy
+ def _get_gx_interface(self):
+ rval = None
+ for h in self.tool_proxy.hints_or_requirements_of_class(GX_INTERFACE_NAMESPACE):
+ rval = strip_namespace(h, GX_INTERFACE_NAMESPACE[: -len("interface")])
+
+ return rval
+
def parse_tool_type(self):
- return "cwl"
+ if self._get_gx_interface() is not None:
+ return "galactic_cwl"
+ else:
+ return "cwl"
def parse_id(self):
return self.tool_proxy.galaxy_id()
@@ -95,16 +154,36 @@ def parse_strict_shell(self):
def parse_stdio(self):
# TODO: remove duplication with YAML
- # New format - starting out just using exit code.
- exit_code_lower = ToolStdioExitCode()
- exit_code_lower.range_start = -math.inf
- exit_code_lower.range_end = -1
- exit_code_lower.error_level = StdioErrorLevel.FATAL
- exit_code_high = ToolStdioExitCode()
- exit_code_high.range_start = 1
- exit_code_high.range_end = math.inf
- exit_code_lower.error_level = StdioErrorLevel.FATAL
- return [exit_code_lower, exit_code_high], []
+ exit_codes = []
+
+ success_codes = sorted(set(self.tool_proxy._tool.tool.get("successCodes") or [0]))
+
+ last_success_code = None
+
+ for success_code in success_codes:
+ if last_success_code is not None and success_code == last_success_code + 1:
+ last_success_code = success_code
+ continue
+
+ exit_code = ToolStdioExitCode()
+ range_start = -math.inf
+ if last_success_code is not None:
+ range_start = last_success_code + 1
+
+ exit_code.range_start = range_start
+ exit_code.range_end = success_code - 1
+ exit_code.error_level = StdioErrorLevel.FATAL
+ exit_codes.append(exit_code)
+
+ last_success_code = success_code
+
+ exit_code = ToolStdioExitCode()
+ exit_code.range_start = last_success_code + 1
+ exit_code.range_end = math.inf
+ exit_code.error_level = StdioErrorLevel.FATAL
+ exit_codes.append(exit_code)
+
+ return exit_codes, []
def parse_interpreter(self):
return None
@@ -119,26 +198,47 @@ def parse_interactivetool(self):
return []
def parse_input_pages(self) -> PagesSource:
- page_source = CwlPageSource(self.tool_proxy)
+ gx_interface = self._get_gx_interface()
+ if gx_interface is None:
+ page_source: PageSource = CwlPageSource(self.tool_proxy)
+ else:
+ page_source = YamlPageSource(gx_interface["inputs"])
return PagesSource([page_source])
def parse_outputs(self, tool):
output_instances = self.tool_proxy.output_instances()
outputs = {}
+ output_collections = {}
output_defs = []
for output_instance in output_instances:
output_defs.append(self._parse_output(tool, output_instance))
+
# TODO: parse outputs collections
for output_def in output_defs:
- outputs[output_def.name] = output_def
- return outputs, {}
+ if isinstance(output_def, ToolOutput):
+ outputs[output_def.name] = output_def
+ else:
+ outputs[output_def.name] = output_def
+ output_collections[output_def.name] = output_def
+ return outputs, output_collections
def _parse_output(self, tool, output_instance):
+ output_type = output_instance.output_data_type
+ if isinstance(output_type, dict) and output_type.get("type") == "record":
+ return self._parse_output_record(tool, output_instance)
+ elif isinstance(output_type, dict) and output_type.get("type") == "array":
+ return self._parse_output_array(tool, output_instance)
+ else:
+ return self._parse_output_data(tool, output_instance)
+
+ def _parse_output_data(self, tool, output_instance):
name = output_instance.name
# TODO: handle filters, actions, change_format
output = ToolOutput(name)
if "File" in output_instance.output_data_type:
- output.format = "_sniff_"
+ output.format = CWL_DEFAULT_FILE_OUTPUT
+ elif "Directory" in output_instance.output_data_type:
+ output.format = "directory"
else:
output.format = "expression.json"
output.change_format = []
@@ -154,6 +254,35 @@ def _parse_output(self, tool, output_instance):
output.actions = ToolOutputActionGroup(output, None)
return output
+ def _parse_output_record(self, tool, output_instance):
+ name = output_instance.name
+ # TODO: clean output bindings and other non-structure information
+ # from this.
+ fields = output_instance.output_data_type.get("fields")
+ output_collection = ToolOutputCollection(
+ name,
+ ToolOutputCollectionStructure(
+ collection_type="record",
+ fields=fields,
+ ),
+ )
+ return output_collection
+
+ def _parse_output_array(self, tool, output_instance):
+ name = output_instance.name
+ # TODO: Handle nested arrays and such...
+ dataset_collector_descriptions = dataset_collector_descriptions_from_list(
+ [{"from_provided_metadata": True}],
+ )
+ output_collection = ToolOutputCollection(
+ name,
+ ToolOutputCollectionStructure(
+ collection_type="list",
+ dataset_collector_descriptions=dataset_collector_descriptions,
+ ),
+ )
+ return output_collection
+
def parse_requirements_and_containers(self):
containers = []
docker_identifier = self.tool_proxy.docker_identifier()
@@ -174,6 +303,16 @@ def parse_profile(self):
def parse_xrefs(self):
return []
+ def parse_provided_metadata_style(self):
+ return "default"
+
+ def parse_cores_min(self):
+ for h in self.tool_proxy.hints_or_requirements_of_class("ResourceRequirement"):
+ cores_min = h.get("coresMin")
+ if cores_min:
+ return cores_min
+ return 1
+
def parse_license(self):
return None
diff --git a/lib/galaxy/tool_util/parser/factory.py b/lib/galaxy/tool_util/parser/factory.py
index ce403638f7f0..16c07b969d71 100644
--- a/lib/galaxy/tool_util/parser/factory.py
+++ b/lib/galaxy/tool_util/parser/factory.py
@@ -7,9 +7,11 @@
List,
Optional,
)
+from uuid import uuid4
from yaml import safe_load
+from galaxy.tool_util.cwl.parser import tool_proxy_from_persistent_representation
from galaxy.tool_util.loader import load_tool_with_refereces
from galaxy.util import (
ElementTree,
@@ -17,10 +19,7 @@
)
from galaxy.util.path import StrPath
from galaxy.util.yaml_util import ordered_load
-from .cwl import (
- CwlToolSource,
- tool_proxy,
-)
+from .cwl import CwlToolSource
from .interface import (
InputSource,
ToolSource,
@@ -42,9 +41,8 @@ def build_xml_tool_source(xml_string: str) -> XmlToolSource:
return XmlToolSource(parse_xml_string_to_etree(xml_string))
-def build_cwl_tool_source(yaml_string: str) -> CwlToolSource:
- proxy = tool_proxy(tool_object=safe_load(yaml_string))
- # regular CwlToolSource sets basename as tool id, but that's not going to cut it in production
+def build_cwl_tool_source(persistent_representation: str) -> CwlToolSource:
+ proxy = tool_proxy_from_persistent_representation(persistent_representation)
return CwlToolSource(tool_proxy=proxy)
@@ -65,6 +63,8 @@ def get_tool_source(
enable_beta_formats: bool = True,
tool_location_fetcher: Optional[ToolLocationFetcher] = None,
macro_paths: Optional[List[str]] = None,
+ strict_cwl_validation: bool = True,
+ uuid: Optional[str] = None,
tool_source_class: Optional[str] = None,
raw_tool_source: Optional[str] = None,
) -> ToolSource:
@@ -100,21 +100,33 @@ def get_tool_source(
return YamlToolSource(as_dict, source_path=config_file)
elif config_file.endswith(".json") or config_file.endswith(".cwl"):
log.info(
- "Loading CWL tool - this is experimental - tool likely will not function in future at least in same way."
+ "Loading CWL tool [%s]. This is experimental - tool likely will not function in future at least in same way.",
+ config_file,
)
- return CwlToolSource(config_file)
+ uuid = uuid or str(uuid4())
+ return CwlToolSource(config_file, strict_cwl_validation=strict_cwl_validation, uuid=uuid)
else:
tree, macro_paths = load_tool_with_refereces(config_file)
return XmlToolSource(tree, source_path=config_file, macro_paths=macro_paths)
-def get_tool_source_from_representation(tool_format, tool_representation):
+def get_tool_source_from_representation(
+ tool_format, tool_representation, strict_cwl_validation=True, tool_directory=None, uuid=None
+):
+ # TODO: PRE-MERGE - ensure strict_cwl_validation is being set on caller - ignored right now.
# TODO: make sure whatever is consuming this method uses ordered load.
log.info("Loading dynamic tool - this is experimental - tool may not function in future.")
if tool_format == "GalaxyTool":
if "version" not in tool_representation:
tool_representation["version"] = "1.0.0" # Don't require version for embedded tools.
return YamlToolSource(tool_representation)
+ elif tool_format in ["CommandLineTool", "ExpressionTool"]:
+ return CwlToolSource(
+ tool_object=tool_representation,
+ strict_cwl_validation=strict_cwl_validation,
+ tool_directory=tool_directory,
+ uuid=uuid,
+ )
else:
raise Exception(f"Unknown tool representation format [{tool_format}].")
diff --git a/lib/galaxy/tool_util/parser/interface.py b/lib/galaxy/tool_util/parser/interface.py
index af72bf4a4825..695841cbc629 100644
--- a/lib/galaxy/tool_util/parser/interface.py
+++ b/lib/galaxy/tool_util/parser/interface.py
@@ -375,6 +375,10 @@ def parse_python_template_version(self) -> Optional[packaging.version.Version]:
Return minimum python version that the tool template has been developed against.
"""
+ def parse_cores_min(self) -> Union[float, int, str]:
+ """Return minimum number of cores required to run this tool."""
+ return 1
+
def parse_creator(self):
"""Return list of metadata relating to creator/author of tool.
diff --git a/lib/galaxy/tool_util/parser/output_objects.py b/lib/galaxy/tool_util/parser/output_objects.py
index 63148c1fb946..7825d6308197 100644
--- a/lib/galaxy/tool_util/parser/output_objects.py
+++ b/lib/galaxy/tool_util/parser/output_objects.py
@@ -402,12 +402,14 @@ def __init__(
collection_type_from_rules: Optional[str] = None,
structured_like: Optional[str] = None,
dataset_collector_descriptions: Optional[List[DatasetCollectionDescription]] = None,
+ fields=None,
) -> None:
self.collection_type = collection_type
self.collection_type_source = collection_type_source
self.collection_type_from_rules = collection_type_from_rules
self.structured_like = structured_like
self.dataset_collector_descriptions = dataset_collector_descriptions or []
+ self.fields = fields
if collection_type and collection_type_source:
raise ValueError("Cannot set both type and type_source on collection output.")
if (
@@ -424,6 +426,10 @@ def __init__(
raise ValueError(
"Cannot specify dynamic structure (discover_datasets) and collection type attributes structured_like or collection_type_from_rules."
)
+ if collection_type == "record" and fields is None:
+ raise ValueError("If record outputs are defined, fields must be defined as well.")
+ if fields is not None and collection_type != "record":
+ raise ValueError("If fields are specified for outputs, the collection type must be record.")
self.dynamic = bool(dataset_collector_descriptions)
def collection_prototype(self, inputs, type_registry):
@@ -433,7 +439,7 @@ def collection_prototype(self, inputs, type_registry):
else:
collection_type = self.collection_type
assert collection_type
- collection_prototype = type_registry.prototype(collection_type)
+ collection_prototype = type_registry.prototype(collection_type, fields=self.fields)
collection_prototype.collection_type = collection_type
return collection_prototype
diff --git a/lib/galaxy/tool_util/parser/yaml.py b/lib/galaxy/tool_util/parser/yaml.py
index 88be9c72846a..d7564916ca3a 100644
--- a/lib/galaxy/tool_util/parser/yaml.py
+++ b/lib/galaxy/tool_util/parser/yaml.py
@@ -401,6 +401,9 @@ def parse_default(self) -> Optional[Dict[str, Any]]:
default_def = input_dict.get("default", None)
return default_def
+ def parse_map_to(self):
+ return self.input_dict.get("mapTo")
+
def _ensure_has(dict, defaults):
for key, value in defaults.items():
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index 2e352e7c0f39..75f39467307d 100644
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -8,6 +8,7 @@
import math
import os
import re
+import shlex
import tarfile
import tempfile
from collections.abc import MutableMapping
@@ -54,6 +55,10 @@
from galaxy.model.dataset_collections.matching import MatchingCollections
from galaxy.tool_shed.util.repository_util import get_installed_repository
from galaxy.tool_shed.util.shed_util_common import set_image_paths
+from galaxy.tool_util.cwl import (
+ needs_shell_quoting,
+ to_galaxy_parameters,
+)
from galaxy.tool_util.deps import (
build_dependency_manager,
CachedDependencyManager,
@@ -163,6 +168,7 @@
parse_xml_string,
parse_xml_string_to_etree,
rst_to_html,
+ safe_makedirs,
string_as_bool,
unicodify,
UNKNOWN,
@@ -256,6 +262,7 @@
"CONVERTER_gff_to_interval_index_0",
"CONVERTER_maf_to_fasta_0",
"CONVERTER_maf_to_interval_0",
+ "CONVERTER_tar_to_directory", # WORKAROUND FOR CWL BRANCH! GOTTA FIX THIS I GUESS
# Tools improperly migrated to the tool shed (devteam)
"qualityFilter",
"pileup_interval",
@@ -590,6 +597,7 @@ def get_expanded_tool_source(self, config_file, **kwargs):
config_file,
enable_beta_formats=getattr(self.app.config, "enable_beta_tool_formats", False),
tool_location_fetcher=self.tool_location_fetcher,
+ strict_cwl_validation=getattr(self.app.config, "strict_cwl_validation", True),
**kwargs,
)
except Exception as e:
@@ -605,10 +613,26 @@ def create_dynamic_tool(self, dynamic_tool, **kwds):
tool_representation = dynamic_tool.value
if "name" not in tool_representation:
tool_representation["name"] = f"dynamic tool {dynamic_tool.uuid}"
- tool_source = get_tool_source_from_representation(
+ strict_cwl_validation = getattr(self.app.config, "strict_cwl_validation", True)
+ get_source_kwds = dict(
tool_format=tool_format,
tool_representation=tool_representation,
+ strict_cwl_validation=strict_cwl_validation,
+ uuid=dynamic_tool.uuid,
)
+ if dynamic_tool.tool_directory:
+ get_source_kwds["tool_directory"] = dynamic_tool.tool_directory
+ if dynamic_tool.tool_path:
+ config_file = dynamic_tool.tool_path
+ # TODO: uuid probably needed here...
+ tool_source = get_tool_source(
+ config_file,
+ enable_beta_formats=getattr(self.app.config, "enable_beta_tool_formats", True),
+ tool_location_fetcher=self.tool_location_fetcher,
+ strict_cwl_validation=strict_cwl_validation,
+ )
+ else:
+ tool_source = get_tool_source_from_representation(**get_source_kwds)
kwds["dynamic"] = True
tool = self._create_tool_from_source(tool_source, **kwds)
tool.dynamic_tool = dynamic_tool
@@ -778,6 +802,7 @@ class Tool(UsesDictVisibleKeys):
__help: Optional[Template]
job_search: "JobSearch"
version: str
+ may_use_container_entry_point = False
def __init__(
self,
@@ -1253,6 +1278,7 @@ def parse(self, tool_source: ToolSource, guid: Optional[str] = None, dynamic: bo
self.ports = tool_source.parse_interactivetool()
self._is_workflow_compatible = self.check_workflow_compatible(self.tool_source)
+ self.cores_min = tool_source.parse_cores_min()
def __parse_legacy_features(self, tool_source: ToolSource):
self.code_namespace: Dict[str, str] = {}
@@ -1782,6 +1808,22 @@ def check_workflow_compatible(self, tool_source):
# outputs?
return True
+ def inputs_from_dict(self, as_dict):
+ """Extra inputs from input dictionary (e.g. API payload).
+
+ Translate for tool type as needed.
+ """
+ inputs = as_dict.get("inputs", {})
+ if not isinstance(inputs, dict):
+ raise exceptions.RequestParameterInvalidException(f"inputs invalid [{inputs}]")
+ inputs_representation = as_dict.get("inputs_representation", "galaxy")
+ if inputs_representation != "galaxy":
+ raise exceptions.RequestParameterInvalidException(
+ "Only galaxy inputs representation is allowed for normal tools."
+ )
+ # TODO: Consider <>.
+ return inputs
+
def new_state(self, trans):
"""
Create a new `DefaultToolState` for this tool. It will be initialized
@@ -2566,7 +2608,9 @@ def to_dict(self, trans, link_details=False, io_details=False, tool_help=False):
tool_class = self.__class__
# FIXME: the Tool class should declare directly, instead of ad hoc inspection
- regular_form = tool_class == Tool or isinstance(self, (DatabaseOperationTool, InteractiveTool))
+ regular_form = tool_class == Tool or isinstance(
+ self, (DatabaseOperationTool, InteractiveTool, CwlCommandBindingTool)
+ )
tool_dict["form_style"] = "regular" if regular_form else "special"
if tool_help:
# create tool help
@@ -3211,6 +3255,134 @@ def job_failed(self, job_wrapper, message, exception=False):
self.__remove_interactivetool_by_job(job)
+class CwlCommandBindingTool(Tool):
+ """Tools that use CWL to bind parameters to command-line descriptions."""
+
+ def exec_before_job(self, app, inp_data, out_data, param_dict=None):
+ super().exec_before_job(app, inp_data, out_data, param_dict=param_dict)
+ # Working directory on Galaxy server (instead of remote compute).
+ local_working_directory = param_dict["__local_working_directory__"]
+ log.info("exec_before_job for CWL tool")
+ if param_dict is None:
+ raise Exception("Internal error - param_dict is empty.")
+
+ input_json = self.param_dict_to_cwl_inputs(param_dict, local_working_directory)
+
+ output_dict = {}
+ for name, dataset in out_data.items():
+ output_dict[name] = {
+ "id": str(getattr(dataset.dataset, dataset.dataset.store_by)),
+ "path": dataset.get_file_name(),
+ }
+
+ # prevent unset optional file to trigger 'ValidationException' exception
+ input_json = {
+ k: v
+ for k, v in input_json.items()
+ if not (isinstance(v, dict) and v.get("class") == "File" and v.get("location") == "None")
+ }
+
+ # prevent empty string
+ # this really seems wrong -John
+ input_json = {k: v for k, v in input_json.items() if v != ""}
+
+ cwl_job_proxy = self._cwl_tool_proxy.job_proxy(
+ input_json,
+ output_dict,
+ local_working_directory,
+ )
+ cwl_command_line = cwl_job_proxy.command_line
+ cwl_stdin = cwl_job_proxy.stdin
+ cwl_stdout = cwl_job_proxy.stdout
+ cwl_stderr = cwl_job_proxy.stderr
+ env = cwl_job_proxy.environment
+
+ def needs_shell_quoting_hack(arg):
+ if arg == "$GALAXY_SLOTS":
+ return False
+ else:
+ return needs_shell_quoting(arg)
+
+ command_line = " ".join(shlex.quote(arg) if needs_shell_quoting_hack(arg) else arg for arg in cwl_command_line)
+ if cwl_stdin:
+ command_line += ' < "' + cwl_stdin + '"'
+ if cwl_stdout:
+ command_line += ' > "' + cwl_stdout + '"'
+ if cwl_stderr:
+ command_line += ' 2> "' + cwl_stderr + '"'
+ cwl_job_state = {
+ "args": cwl_command_line,
+ "stdin": cwl_stdin,
+ "stdout": cwl_stdout,
+ "stderr": cwl_stderr,
+ "env": env,
+ }
+ tool_working_directory = os.path.join(local_working_directory, "working")
+ # Move to prepare...
+ safe_makedirs(tool_working_directory)
+ cwl_job_proxy.stage_files()
+
+ cwl_job_proxy.rewrite_inputs_for_staging()
+ # Write representation to disk that can be reloaded at runtime
+ # and outputs collected before Galaxy metadata is gathered.
+ cwl_job_proxy.save_job()
+
+ param_dict["__cwl_command"] = command_line
+ param_dict["__cwl_command_state"] = cwl_job_state
+ param_dict["__cwl_command_version"] = 1
+ log.info("CwlTool.exec_before_job() generated command_line %s", command_line)
+
+ def parse(self, tool_source, guid=None, dynamic=False):
+ super().parse(tool_source, guid=guid, dynamic=dynamic)
+ cwl_tool_proxy = getattr(tool_source, "tool_proxy", None)
+ if cwl_tool_proxy is None:
+ raise Exception("parse() called on tool source not defining a proxy object to underlying CWL tool.")
+ self._cwl_tool_proxy = cwl_tool_proxy
+
+ def param_dict_to_cwl_inputs(self, param_dict, local_working_directory):
+ """Map Galaxy API inputs description to a CWL job json."""
+ raise NotImplementedError()
+
+
+class GalacticCwlTool(CwlCommandBindingTool):
+ """A CWL tool with a gx:Interface defined so Galaxy tool state can be used."""
+
+ tool_type = "galactic_cwl"
+
+ def param_dict_to_cwl_inputs(self, param_dict, local_working_directory):
+ from galaxy.tool_util.cwl.representation import galactic_flavored_to_cwl_job
+
+ input_json = galactic_flavored_to_cwl_job(self, param_dict, local_working_directory)
+ return input_json
+
+
+class CwlTool(CwlCommandBindingTool):
+ tool_type = "cwl"
+ may_use_container_entry_point = True
+
+ def param_dict_to_cwl_inputs(self, param_dict, local_working_directory):
+ """Map Galaxy API inputs description to a CWL job json."""
+ from galaxy.tool_util.cwl import to_cwl_job
+
+ input_json = to_cwl_job(self, param_dict, local_working_directory)
+ return input_json
+
+ def inputs_from_dict(self, as_dict):
+ """Extra inputs from input dictionary (e.g. API payload).
+
+ Translate for tool type as needed.
+ """
+ inputs = as_dict.get("inputs", {})
+ inputs_representation = as_dict.get("inputs_representation", "galaxy")
+ if inputs_representation not in ["galaxy", "cwl"]:
+ raise exceptions.RequestParameterInvalidException("Inputs representation must be galaxy or cwl.")
+
+ if inputs_representation == "cwl":
+ inputs = to_galaxy_parameters(self, inputs)
+
+ return inputs
+
+
class DataManagerTool(OutputParameterJSONTool):
tool_type = "manage_data"
default_tool_action = DataManagerToolAction
@@ -4169,6 +4341,8 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
BuildListCollectionTool,
ExtractDatasetCollectionTool,
DataDestinationTool,
+ CwlTool,
+ GalacticCwlTool,
]
tool_types = {tool_class.tool_type: tool_class for tool_class in TOOL_CLASSES}
diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py
index 841eea988d49..50563eb55bcf 100644
--- a/lib/galaxy/tools/actions/__init__.py
+++ b/lib/galaxy/tools/actions/__init__.py
@@ -153,7 +153,9 @@ def record_permission(action, role_id):
def visitor(input, value, prefix, prefixed_name: str, parent=None, **kwargs):
def process_dataset(data, formats=None):
- if not data or isinstance(data, RuntimeValue):
+ # default file coming from a workflow
+ is_workflow_default = isinstance(data, dict) and data.get("class") == "File"
+ if not data or isinstance(data, RuntimeValue) or is_workflow_default:
return None
if formats is None:
formats = input.formats
@@ -679,7 +681,10 @@ def handle_output(name, output, hidden=None):
assert not element_identifiers # known_outputs must have been empty
element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED)
else:
- element_kwds = dict(element_identifiers=element_identifiers)
+ element_kwds = dict(
+ element_identifiers=element_identifiers,
+ fields=output.structure.fields,
+ )
output_collections.create_collection(
output=output, name=name, completed_job=completed_job, **element_kwds
)
@@ -694,7 +699,7 @@ def handle_output(name, output, hidden=None):
)
# Add all the top-level (non-child) datasets to the history unless otherwise specified
for name, data in out_data.items():
- if name not in incoming and name not in child_dataset_names:
+ if getattr(data, "hid", None) is None or (name not in incoming and name not in child_dataset_names):
# don't add already existing datasets, i.e. async created
history.stage_addition(data)
history.add_pending_items(set_output_hid=set_output_hid)
@@ -927,6 +932,9 @@ def _record_inputs(self, trans, tool, job, incoming, inp_data, inp_dataset_colle
reductions[name] = []
reductions[name].append(dataset_collection)
+ if getattr(dataset_collection, "ephemeral", False):
+ dataset_collection = dataset_collection.persistent_object
+
# TODO: verify can have multiple with same name, don't want to lose traceability
if isinstance(dataset_collection, model.HistoryDatasetCollectionAssociation):
job.add_input_dataset_collection(name, dataset_collection)
diff --git a/lib/galaxy/tools/actions/upload.py b/lib/galaxy/tools/actions/upload.py
index c758c96b2ae7..8fe7b7cae4d8 100644
--- a/lib/galaxy/tools/actions/upload.py
+++ b/lib/galaxy/tools/actions/upload.py
@@ -63,6 +63,11 @@ def execute(
assert dataset_upload_inputs, Exception("No dataset upload groups were found.")
persisting_uploads_timer = ExecutionTimer()
+ if incoming.get("file_type") == "auto" and incoming.get("cwl_format"):
+ cwl_format = incoming["cwl_format"]
+ ext = trans.app.datatypes_registry.get_datatype_ext_by_format_ontology(cwl_format, only_uploadable=True)
+ if ext:
+ incoming["file_type"] = ext
incoming = upload_common.persist_uploads(incoming, trans)
log.debug(f"Persisted uploads {persisting_uploads_timer}")
rval = self._setup_job(tool, trans, incoming, dataset_upload_inputs, history)
diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py
index 924bdc64a3b5..f6cdb8507d4a 100644
--- a/lib/galaxy/tools/actions/upload_common.py
+++ b/lib/galaxy/tools/actions/upload_common.py
@@ -39,7 +39,13 @@
def validate_datatype_extension(datatypes_registry, ext):
if ext and ext not in ("auto", "data") and not datatypes_registry.get_datatype_by_extension(ext):
+ # also allow putting in data ontology ... might want to model this as a separate input
+ if ":" in ext:
+ ext_by_ontology = datatypes_registry.get_datatype_ext_by_format_ontology(ext)
+ if ext_by_ontology:
+ return ext_by_ontology
raise RequestParameterInvalidException(f"Requested extension '{ext}' unknown, cannot upload dataset.")
+ return ext
def persist_uploads(params, trans):
diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py
index 8c25990ea73b..0f117b55ad5c 100644
--- a/lib/galaxy/tools/data_fetch.py
+++ b/lib/galaxy/tools/data_fetch.py
@@ -331,11 +331,11 @@ def _resolve_item_with_primary(item):
elif not link_data_only:
path = upload_config.ensure_in_working_directory(path, purge_source, in_place)
+ extra_files_path = f"{path}_extra"
extra_files = item.get("extra_files")
if extra_files:
# TODO: optimize to just copy the whole directory to extra files instead.
assert not upload_config.link_data_only, "linking composite dataset files not yet implemented"
- extra_files_path = f"{path}_extra"
staged_extra_files = extra_files_path
os.mkdir(extra_files_path)
@@ -375,6 +375,10 @@ def walk_extra_files(items, prefix=""):
assert path
datatype.groom_dataset_content(path)
+ if ext == "directory" and not deferred and path:
+ CompressedFile(path).extract(extra_files_path)
+ staged_extra_files = extra_files_path
+
if len(transform) > 0:
source_dict["transform"] = transform
elif not error_message:
diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py
index 582bd65be06e..4fce36d8f43e 100644
--- a/lib/galaxy/tools/evaluation.py
+++ b/lib/galaxy/tools/evaluation.py
@@ -40,6 +40,7 @@
from galaxy.tools.parameters.basic import (
DataCollectionToolParameter,
DataToolParameter,
+ FieldTypeToolParameter,
SelectToolParameter,
)
from galaxy.tools.parameters.grouping import (
@@ -75,6 +76,7 @@
from galaxy.tools import Tool
log = logging.getLogger(__name__)
+CWL_TOOL_TYPES = ("galactic_cwl", "cwl")
class ToolErrorLog:
@@ -208,7 +210,12 @@ def build_param_dict(self, incoming, input_datasets, output_datasets, output_col
compute_environment = self.compute_environment
job_working_directory = compute_environment.working_directory()
- param_dict = TreeDict(self.param_dict)
+ if self.tool.tool_type == "cwl":
+ param_dict: Union[Dict[str, Any], TreeDict] = self.param_dict
+ else:
+ # TreeDict provides a way to access parameters without their fully qualified path,
+ # we only need this for Galaxy tools.
+ param_dict = TreeDict(self.param_dict)
param_dict["__datatypes_config__"] = param_dict["GALAXY_DATATYPES_CONF_FILE"] = os.path.join(
job_working_directory, "registry.xml"
@@ -222,6 +229,10 @@ def build_param_dict(self, incoming, input_datasets, output_datasets, output_col
self.__populate_wrappers(param_dict, input_datasets, job_working_directory)
self.__populate_input_dataset_wrappers(param_dict, input_datasets)
+ if self.tool.tool_type == "cwl":
+ # don't need the outputs or the sanitization:
+ param_dict["__local_working_directory__"] = self.local_working_directory
+ return param_dict
self.__populate_output_dataset_wrappers(param_dict, output_datasets, job_working_directory)
self.__populate_output_collection_wrappers(param_dict, output_collections, job_working_directory)
self.__populate_unstructured_path_rewrites(param_dict)
@@ -235,6 +246,9 @@ def build_param_dict(self, incoming, input_datasets, output_datasets, output_col
if self.job.tool_id == "upload1":
param_dict["paramfile"] = os.path.join(job_working_directory, "upload_params.json")
+ if not isinstance(param_dict, TreeDict):
+ return param_dict
+
if "input" not in param_dict.data:
def input():
@@ -401,6 +415,34 @@ def wrap_input(input_values, input):
)
wrapper = DatasetCollectionWrapper(job_working_directory, dataset_collection, **wrapper_kwds)
input_values[input.name] = wrapper
+ elif isinstance(input, FieldTypeToolParameter):
+ field_wrapper: Optional[Union[InputValueWrapper, DatasetFilenameWrapper, DatasetCollectionWrapper]] = (
+ None
+ )
+ if value:
+ assert "value" in value, value
+ assert "src" in value, value
+ src = value["src"]
+ if src == "json":
+ field_wrapper = InputValueWrapper(input, value, param_dict)
+ elif src == "hda":
+ field_wrapper = DatasetFilenameWrapper(
+ value["value"],
+ datatypes_registry=self.app.datatypes_registry,
+ tool=self.tool,
+ name=input.name,
+ )
+ elif src == "hdca":
+ field_wrapper = DatasetCollectionWrapper(
+ job_working_directory=job_working_directory,
+ has_collection=value["value"],
+ datatypes_registry=self.app.datatypes_registry,
+ tool=self.tool,
+ name=input.name,
+ )
+ else:
+ raise ValueError(f"src should be 'json' or 'hda' or 'hdca' but is '{src}'")
+ input_values[input.name] = field_wrapper
elif isinstance(input, SelectToolParameter):
if input.multiple:
value = listify(value)
@@ -643,22 +685,28 @@ def _build_command_line(self):
command_line = None
if not command:
return
- try:
- # Substituting parameters into the command
- command_line = fill_template(
- command, context=param_dict, python_template_version=self.tool.python_template_version
- )
- cleaned_command_line = []
- # Remove leading and trailing whitespace from each line for readability.
- for line in command_line.split("\n"):
- cleaned_command_line.append(line.strip())
- command_line = "\n".join(cleaned_command_line)
- # Remove newlines from command line, and any leading/trailing white space
- command_line = command_line.replace("\n", " ").replace("\r", " ").strip()
- except Exception:
- # Modify exception message to be more clear
- # e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ), )
- raise
+
+ # TODO: this approach replaces specifies a command block as $__cwl_command_state
+ # and that other approach needs to be unraveled.
+ if self.tool.tool_type in CWL_TOOL_TYPES and "__cwl_command" in param_dict:
+ command_line = param_dict["__cwl_command"]
+ else:
+ try:
+ # Substituting parameters into the command
+ command_line = fill_template(
+ command, context=param_dict, python_template_version=self.tool.python_template_version
+ )
+ cleaned_command_line = []
+ # Remove leading and trailing whitespace from each line for readability.
+ for line in command_line.split("\n"):
+ cleaned_command_line.append(line.strip())
+ command_line = "\n".join(cleaned_command_line)
+ # Remove newlines from command line, and any leading/trailing white space
+ command_line = command_line.replace("\n", " ").replace("\r", " ").strip()
+ except Exception:
+ # Modify exception message to be more clear
+ # e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ), )
+ raise
if interpreter:
# TODO: path munging for cluster/dataset server relocatability
executable = command_line.split()[0]
@@ -679,8 +727,11 @@ def _build_config_files(self):
"""
Build temporary file for file based parameter transfer if needed
"""
+ config_filenames: List[str] = []
+ if self.tool.tool_type in CWL_TOOL_TYPES:
+ # will never happen for cwl tools
+ return config_filenames
param_dict = self.param_dict
- config_filenames = []
for name, filename, content in self.tool.config_files:
config_text, is_template = self.__build_config_file_text(content)
# If a particular filename was forced by the config use it
@@ -700,7 +751,12 @@ def _build_config_files(self):
def _build_environment_variables(self):
param_dict = self.param_dict
environment_variables = self.environment_variables
- for environment_variable_def in self.tool.environment_variables:
+ environment_variables_raw = self.tool.environment_variables
+ for key, value in param_dict.get("__cwl_command_state", {}).get("env", {}).items():
+ environment_variable = dict(name=key, template=value)
+ environment_variables_raw.append(environment_variable)
+
+ for environment_variable_def in environment_variables_raw:
directory = self.local_working_directory
environment_variable = environment_variable_def.copy()
environment_variable_template = environment_variable_def["template"]
@@ -729,7 +785,8 @@ def _build_environment_variables(self):
environment_variable_template = ""
is_template = False
else:
- is_template = True
+ # cwl tools should not template out values
+ is_template = self.tool.tool_type not in CWL_TOOL_TYPES
with tempfile.NamedTemporaryFile(dir=directory, prefix="tool_env_", delete=False) as temp:
config_filename = temp.name
self.__write_workdir_file(
diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py
index 7cd2d814bac2..a5e144b19f1b 100644
--- a/lib/galaxy/tools/execute.py
+++ b/lib/galaxy/tools/execute.py
@@ -325,9 +325,11 @@ def record_error(self, error):
def on_text(self) -> Optional[str]:
collection_info = self.collection_info
if self._on_text is None and collection_info is not None:
- collection_names = [f"collection {c.hid}" for c in collection_info.collections.values()]
- self._on_text = on_text_for_names(collection_names)
-
+ if not collection_info.uses_ephemeral_collections:
+ collection_names = [f"collection {c.hid}" for c in collection_info.collections.values()]
+ self._on_text = on_text_for_names(collection_names)
+ else:
+ self._on_text = "implicitly created collection from inputs"
return self._on_text
def output_name(self, trans, history, params, output):
diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
index 2a150806d60b..804ab0cae6ef 100644
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -199,6 +199,10 @@ def __init__(self, tool, input_source, context=None):
else:
self.sanitizer = None
self.validators = validation.to_validators(tool.app if tool else None, input_source.parse_validators())
+ if hasattr(input_source, "parse_map_to"):
+ self.map_to = input_source.parse_map_to()
+ else:
+ self.map_to = None
@property
def visible(self) -> bool:
@@ -1918,6 +1922,12 @@ def get_initial_value(self, trans, other_values):
return hdca
def to_json(self, value, app, use_security):
+ if getattr(value, "ephemeral", False):
+ # wf_wc_scatter_multiple_flattened
+ value = value.persistent_object
+ if value.id is None:
+ app.model.context.add(value)
+ app.model.context.flush()
if value not in [None, "", "None"]:
if isinstance(value, list) and len(value) > 0:
@@ -2696,7 +2706,9 @@ def raw_to_galaxy(
object_class = as_dict_value["class"]
if object_class == "File":
# TODO: relative_to = "/"
- location = as_dict_value.get("location")
+ location = as_dict_value.get("location") or as_dict_value.get("path")
+ assert location
+ assert os.path.exists(location[len("file://")])
name = (
as_dict_value.get("identifier")
or as_dict_value.get("basename")
@@ -2777,6 +2789,88 @@ def write_elements_to_collection(has_elements, collection_builder):
return hdca
+class FieldTypeToolParameter(ToolParameter):
+ """CWL field type defined parameter source."""
+
+ def __init__(self, tool, input_source, context=None):
+ input_source = ensure_input_source(input_source)
+ super().__init__(tool, input_source)
+ # self.field_type = input_source.parse_field_type()
+
+ def from_json(self, value, trans, other_values=None):
+ if trans.workflow_building_mode is workflow_building_modes.ENABLED:
+ return None
+
+ if value is None:
+ return None
+
+ if not isinstance(value, dict) or "src" not in value:
+ value = {"src": "json", "value": value}
+ elif value.get("class") == "File":
+ return raw_to_galaxy(trans.app, trans.history, value)
+ return self.to_python(value, trans.app)
+
+ def to_json(self, value, app, use_security):
+ """Convert a value to a string representation suitable for persisting"""
+ assert isinstance(value, dict)
+ assert "src" in value
+ return value
+
+ def to_python(self, value, app):
+ """Convert a value created with to_json back to an object representation"""
+ if value is None:
+ return None
+ # return super(FieldTypeToolParameter, self).to_python(value, app)
+ if not isinstance(value, dict):
+ value = json.loads(value)
+ assert isinstance(value, dict)
+ assert "src" in value or "class" in value
+ if "src" in value:
+ src = value["src"]
+ if "value" in value:
+ # We have an expanded value, not an ID
+ return value
+ elif src in ["hda", "hdca", "dce"]:
+ id = value["id"] if isinstance(value["id"], int) else app.security.decode_id(value["id"])
+ if src == "dce":
+ value = app.model.context.query(app.model.DatasetCollectionElement).get(id)
+ elif src == "hdca":
+ value = app.model.context.query(app.model.HistoryDatasetCollectionAssociation).get(id)
+ else:
+ value = app.model.context.query(app.model.HistoryDatasetAssociation).get(id)
+
+ return {"src": src, "value": value}
+ # Reaching this if we have a default filex
+ return value
+
+ def value_to_basic(self, value, app, use_security=False):
+ log.info(f"value_to_basic of {value} ({type(value)})")
+ if is_runtime_value(value):
+ return runtime_to_json(value)
+
+ if value is None:
+ return None
+
+ if isinstance(value, dict):
+ if "src" in value:
+ src = value["src"]
+ if src in ["hda", "hdca", "dce"]:
+ id = value["value"].id if not use_security else app.security.encode_id(value["value"].id)
+ value = {"src": src, "id": id}
+ else:
+ # Default file
+ assert "class" in value
+
+ else:
+ value = {"src": "json", "value": value}
+
+ return json.dumps(value)
+
+ def value_from_basic(self, value, app, ignore_errors=False):
+ return super().value_from_basic(value, app, ignore_errors)
+ # return json.loads(value)
+
+
parameter_types = dict(
text=TextToolParameter,
integer=IntegerToolParameter,
@@ -2797,6 +2891,7 @@ def write_elements_to_collection(has_elements, collection_builder):
rules=RulesListToolParameter,
directory_uri=DirectoryUriToolParameter,
drill_down=DrillDownSelectToolParameter,
+ field=FieldTypeToolParameter,
)
@@ -2824,4 +2919,7 @@ def history_item_to_json(value, app, use_security):
src = "hda"
if src is not None:
object_id = cached_id(value)
- return {"id": app.security.encode_id(object_id) if use_security else object_id, "src": src}
+ new_val = getattr(value, "extra_params", {})
+ new_val["id"] = app.security.encode_id(object_id) if use_security else object_id
+ new_val["src"] = src
+ return new_val
diff --git a/lib/galaxy/tools/parameters/grouping.py b/lib/galaxy/tools/parameters/grouping.py
index 26b4e171c0de..46f754bd4e35 100644
--- a/lib/galaxy/tools/parameters/grouping.py
+++ b/lib/galaxy/tools/parameters/grouping.py
@@ -295,11 +295,14 @@ def get_composite_dataset_name(self, context):
if dataset_name is None:
filenames = []
for composite_file in context.get("files", []):
+ if dataset_name is None and composite_file.get("NAME", None) is not None:
+ dataset_name = composite_file.get("NAME")
if not composite_file.get("ftp_files", ""):
filenames.append((composite_file.get("file_data") or {}).get("filename", ""))
else:
filenames.append(composite_file.get("ftp_files", [])[0])
- dataset_name = os.path.commonprefix(filenames).rstrip(".") or None
+ if dataset_name is None:
+ dataset_name = os.path.commonprefix(filenames).rstrip(".") or None
if dataset_name is None:
dataset_name = f"Uploaded Composite Dataset ({self.get_file_type(context)})"
return dataset_name
@@ -830,6 +833,10 @@ def nested_to_dict(input):
cond_dict["test_param"] = nested_to_dict(self.test_param)
return cond_dict
+ @property
+ def case_strings(self):
+ return [c.value for c in self.cases]
+
class ConditionalWhen(UsesDictVisibleKeys):
dict_collection_visible_keys = ["value"]
diff --git a/lib/galaxy/tools/parameters/wrapped.py b/lib/galaxy/tools/parameters/wrapped.py
index d23e9be5edf9..4b3c4bbc9b94 100644
--- a/lib/galaxy/tools/parameters/wrapped.py
+++ b/lib/galaxy/tools/parameters/wrapped.py
@@ -10,6 +10,7 @@
from galaxy.tools.parameters.basic import (
DataCollectionToolParameter,
DataToolParameter,
+ FieldTypeToolParameter,
SelectToolParameter,
)
from galaxy.tools.parameters.grouping import (
@@ -134,6 +135,32 @@ def wrap_values(self, inputs, input_values, skip_missing_values=False):
tool=tool,
name=input.name,
)
+ elif isinstance(input, FieldTypeToolParameter):
+ if value is None:
+ return None
+
+ if not isinstance(value, dict):
+ raise Exception(f"Simple values [{input}] need to be wrapped in a JSON envelope")
+
+ assert "value" in value, value
+ assert "src" in value
+ src = value["src"]
+ if src == "json":
+ input_values[input.name] = InputValueWrapper(input, value["value"], incoming)
+ elif src == "hda":
+ input_values[input.name] = DatasetFilenameWrapper(
+ value["value"], datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name
+ )
+ elif src == "hdca":
+ input_values[input.name] = DatasetCollectionWrapper(
+ None,
+ value["value"],
+ datatypes_registry=trans.app.datatypes_registry,
+ tool=tool,
+ name=input.name,
+ )
+ else:
+ raise AssertionError(f"Unknown src encountered [{src}] for field type value [{value}]")
else:
input_values[input.name] = InputValueWrapper(input, value, incoming, tool.profile)
diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
index 12d3d779cafc..93edcbc2805f 100644
--- a/lib/galaxy/tools/wrappers.py
+++ b/lib/galaxy/tools/wrappers.py
@@ -8,6 +8,7 @@
Any,
cast,
Dict,
+ ItemsView,
Iterable,
Iterator,
KeysView,
@@ -69,7 +70,7 @@ class ToolParameterValueWrapper:
Base class for object that Wraps a Tool Parameter and Value.
"""
- value: Optional[Union[str, List[str]]]
+ value: Any
input: "ToolParameter"
def __bool__(self) -> bool:
@@ -123,7 +124,7 @@ class InputValueWrapper(ToolParameterValueWrapper):
def __init__(
self,
input: "ToolParameter",
- value: Optional[str],
+ value: Any,
other_values: Optional[Dict[str, str]] = None,
profile: Optional[float] = None,
) -> None:
@@ -698,6 +699,9 @@ def keys(self) -> Union[List[str], KeysView[Any]]:
return []
return self.__element_instances.keys()
+ def items(self) -> ItemsView[str, Union["DatasetCollectionWrapper", DatasetFilenameWrapper]]:
+ return self.__element_instances.items()
+
@property
def is_collection(self) -> bool:
return True
diff --git a/lib/galaxy/util/compression_utils.py b/lib/galaxy/util/compression_utils.py
index 576e62623f99..d9fd3bd67310 100644
--- a/lib/galaxy/util/compression_utils.py
+++ b/lib/galaxy/util/compression_utils.py
@@ -214,15 +214,16 @@ def common_prefix_dir(self) -> str:
common_prefix = os.path.commonprefix([self.getname(item) for item in contents])
# If the common_prefix does not end with a slash, check that is a
# directory and all other files are contained in it
- common_prefix_member = self.getmember(common_prefix)
- if (
- len(common_prefix) >= 1
- and not common_prefix.endswith(os.sep)
- and common_prefix_member
- and self.isdir(common_prefix_member)
- and all(self.getname(item).startswith(common_prefix + os.sep) for item in contents if self.isfile(item))
- ):
- common_prefix += os.sep
+ if len(common_prefix) >= 1 and not common_prefix.endswith(os.sep):
+ common_prefix_member = self.getmember(common_prefix)
+ if (
+ common_prefix_member is not None
+ and self.isdir(common_prefix_member)
+ and all(
+ self.getname(item).startswith(common_prefix + os.sep) for item in contents if self.isfile(item)
+ )
+ ):
+ common_prefix += os.sep
if not common_prefix.endswith(os.sep):
common_prefix = ""
return common_prefix
diff --git a/lib/galaxy/webapps/galaxy/api/workflows.py b/lib/galaxy/webapps/galaxy/api/workflows.py
index 9348e25a977b..118104eb46e7 100644
--- a/lib/galaxy/webapps/galaxy/api/workflows.py
+++ b/lib/galaxy/webapps/galaxy/api/workflows.py
@@ -240,6 +240,7 @@ def create(self, trans: GalaxyWebTransaction, payload=None, **kwd):
archive_source = payload.get("archive_source")
archive_file = payload.get("archive_file")
archive_data = None
+ uploaded_file_name = None
if archive_source:
validate_uri_access(archive_source, trans.user_is_admin, trans.app.config.fetch_url_allowlist_ips)
if archive_source.startswith("file://"):
@@ -265,15 +266,17 @@ def create(self, trans: GalaxyWebTransaction, payload=None, **kwd):
raise exceptions.MessageException(f"Failed to open URL '{archive_source}'.")
elif hasattr(archive_file, "file"):
uploaded_file = archive_file.file
- uploaded_file_name = uploaded_file.name
- if os.path.getsize(os.path.abspath(uploaded_file_name)) > 0:
+ uploaded_file_name = os.path.abspath(uploaded_file.name)
+ if os.path.getsize(uploaded_file_name) > 0:
archive_data = util.unicodify(uploaded_file.read())
import_source = "uploaded file"
else:
raise exceptions.MessageException("You attempted to upload an empty file.")
else:
raise exceptions.MessageException("Please provide a URL or file.")
- return self.__api_import_from_archive(trans, archive_data, import_source, payload=payload)
+ return self.__api_import_from_archive(
+ trans, archive_data, import_source, payload=payload, from_path=uploaded_file_name
+ )
if "from_history_id" in payload:
from_history_id = payload.get("from_history_id")
@@ -573,10 +576,14 @@ def get_tool_predictions(self, trans: ProvidesUserContext, payload, **kwd):
#
# -- Helper methods --
#
- def __api_import_from_archive(self, trans: GalaxyWebTransaction, archive_data, source=None, payload=None):
+ def __api_import_from_archive(
+ self, trans: GalaxyWebTransaction, archive_data, source=None, payload=None, from_path=None
+ ):
payload = payload or {}
try:
data = json.loads(archive_data)
+ if from_path is not None:
+ data.update({"src": "from_path", "path": from_path})
except Exception:
if "GalaxyWorkflow" in archive_data:
data = {"yaml_content": archive_data}
diff --git a/lib/galaxy/webapps/galaxy/controllers/tool_runner.py b/lib/galaxy/webapps/galaxy/controllers/tool_runner.py
index e6bf157400b1..1fcdbc853fa1 100644
--- a/lib/galaxy/webapps/galaxy/controllers/tool_runner.py
+++ b/lib/galaxy/webapps/galaxy/controllers/tool_runner.py
@@ -72,7 +72,7 @@ def __tool_404__():
if not tool.allow_user_access(trans.user):
return __tool_404__()
# FIXME: Tool class should define behavior
- if tool.tool_type in ["default", "interactivetool"]:
+ if tool.tool_type in ["default", "interactivetool", "cwl", "galactic_cwl"]:
return trans.response.send_redirect(url_for(controller="root", tool_id=tool_id))
# execute tool without displaying form
diff --git a/lib/galaxy/webapps/galaxy/services/_fetch_util.py b/lib/galaxy/webapps/galaxy/services/_fetch_util.py
index ba5f33f3c756..e4723b231c57 100644
--- a/lib/galaxy/webapps/galaxy/services/_fetch_util.py
+++ b/lib/galaxy/webapps/galaxy/services/_fetch_util.py
@@ -59,7 +59,9 @@ def validate_and_normalize_targets(trans, payload):
payload["check_content"] = trans.app.config.check_upload_content
def check_src(item):
- validate_datatype_extension(datatypes_registry=trans.app.datatypes_registry, ext=item.get("ext"))
+ item_ext = validate_datatype_extension(datatypes_registry=trans.app.datatypes_registry, ext=item.get("ext"))
+ if item_ext:
+ item["ext"] = item_ext
# Normalize file:// URLs into paths.
if item["src"] == "url":
diff --git a/lib/galaxy/webapps/galaxy/services/dataset_collections.py b/lib/galaxy/webapps/galaxy/services/dataset_collections.py
index e37335d829f0..2c21909e095d 100644
--- a/lib/galaxy/webapps/galaxy/services/dataset_collections.py
+++ b/lib/galaxy/webapps/galaxy/services/dataset_collections.py
@@ -122,7 +122,7 @@ def create(self, trans: ProvidesHistoryContext, payload: CreateNewCollectionPayl
:returns: element view of new dataset collection
"""
# TODO: Error handling...
- create_params = api_payload_to_create_params(payload.dict(exclude_unset=True))
+ create_params = api_payload_to_create_params(payload.dict(exclude_unset=True, by_alias=True))
if payload.instance_type == "history":
if payload.history_id is None:
raise exceptions.RequestParameterInvalidException("Parameter history_id is required.")
diff --git a/lib/galaxy/webapps/galaxy/services/tools.py b/lib/galaxy/webapps/galaxy/services/tools.py
index 6897965d112f..69c28e7a27b3 100644
--- a/lib/galaxy/webapps/galaxy/services/tools.py
+++ b/lib/galaxy/webapps/galaxy/services/tools.py
@@ -142,9 +142,7 @@ def _create(self, trans: ProvidesHistoryContext, payload, **kwd):
target_history = None
# Set up inputs.
- inputs = payload.get("inputs", {})
- if not isinstance(inputs, dict):
- raise exceptions.RequestParameterInvalidException(f"inputs invalid {inputs}")
+ inputs = tool.inputs_from_dict(payload)
# Find files coming in as multipart file data and add to inputs.
for k, v in payload.items():
diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py
index f5c2da9b6dda..3a5f435c4fe3 100644
--- a/lib/galaxy/workflow/modules.py
+++ b/lib/galaxy/workflow/modules.py
@@ -77,6 +77,7 @@
BooleanToolParameter,
DataCollectionToolParameter,
DataToolParameter,
+ FieldTypeToolParameter,
FloatToolParameter,
HiddenToolParameter,
IntegerToolParameter,
@@ -141,7 +142,7 @@ class ConditionalStepWhen(BooleanToolParameter):
pass
-def to_cwl(value, hda_references, step):
+def to_cwl(value, hda_references, step, require_ok=True):
element_identifier = None
if isinstance(value, model.HistoryDatasetCollectionAssociation):
value = value.collection
@@ -154,7 +155,8 @@ def to_cwl(value, hda_references, step):
if not value.dataset.in_ready_state():
why = f"dataset [{value.id}] is needed for valueFrom expression and is non-ready"
raise DelayedWorkflowEvaluation(why=why)
- if not value.is_ok:
+ if require_ok and not value.is_ok:
+ # materialize and delay ?
raise FailWorkflowEvaluation(
why=InvocationFailureDatasetFailed(
reason=FailureReason.dataset_failed, hda_id=value.id, workflow_step_id=step.id
@@ -275,6 +277,7 @@ class WorkflowModule:
def __init__(self, trans, content_id=None, **kwds):
self.trans = trans
+ self.app = trans.app
self.content_id = content_id
self.state = DefaultToolState()
@@ -550,12 +553,34 @@ def _find_collections_to_match(self, progress: "WorkflowProgress", step, all_inp
for input_dict in all_inputs:
name = input_dict["name"]
+
+ step_input = step.inputs_by_name.get(name, None)
+ scatter_type = "dotproduct"
+ if step_input and step_input.scatter_type:
+ scatter_type = step_input.scatter_type
+ assert scatter_type in ["dotproduct", "disabled"], f"Unimplemented scatter type [{scatter_type}]"
+
+ subworkflow_structure = progress.subworkflow_structure
+ if subworkflow_structure and subworkflow_structure.is_leaf and scatter_type == "disabled":
+ continue
+
data = progress.replacement_for_input(self.trans, step, input_dict)
- can_map_over = hasattr(data, "collection") # and data.collection.allow_implicit_mapping
+ can_map_over = hasattr(data, "collection") and data.collection.allow_implicit_mapping
if not can_map_over:
continue
+ if subworkflow_structure and not subworkflow_structure.is_leaf:
+ if not subworkflow_structure.collection_type_description.is_subcollection_of_type(
+ data.collection.collection_type, proper=False
+ ):
+ template = "Workflow input replacement of collection type [%s] is not a super collection of workflow collection type [%s]."
+ message = template % (
+ data.collection.collection_type,
+ subworkflow_structure.collection_type_description,
+ )
+ raise Exception(message)
+
is_data_param = input_dict["input_type"] == "dataset"
is_data_collection_param = input_dict["input_type"] == "dataset_collection"
if is_data_param or is_data_collection_param:
@@ -626,6 +651,46 @@ def _find_collections_to_match(self, progress: "WorkflowProgress", step, all_inp
return collections_to_match
+def build_extra_step_state(
+ trans,
+ step: WorkflowStep,
+ progress: "WorkflowProgress",
+ iteration_elements,
+ execution_state=None,
+ all_inputs_by_name: Optional[Dict[str, Dict[str, Any]]] = None,
+):
+ extra_step_state = {}
+ for step_input in step.inputs:
+ step_input_name = step_input.name
+ if iteration_elements and step_input_name in iteration_elements: # noqa: B023
+ # extra state is set in execution slice, use it
+ extra_step_state[step_input_name] = iteration_elements[step_input_name] # noqa: B023
+ continue
+ input_in_execution_state = execution_state and step_input_name not in execution_state.inputs
+ if input_in_execution_state and all_inputs_by_name is not None:
+ if step_input_name in all_inputs_by_name:
+ # extra state value comes from input state
+ extra_step_state[step_input_name] = progress.replacement_for_input(
+ trans, step, all_inputs_by_name[step_input_name]
+ )
+ continue
+ # Might be needed someday...
+ # elif step_input.default_value_set:
+ # extra_step_state[step_input_name] = step_input.default_value
+ else:
+ # extra state value comes from connection
+ extra_step_state[step_input_name] = progress.replacement_for_connection(
+ step_input.connections[0], is_data=True
+ )
+ continue
+ if execution_state is None:
+ extra_step_state[step_input_name] = progress.replacement_for_connection(
+ step_input.connections[0], is_data=True
+ )
+
+ return extra_step_state
+
+
class SubWorkflowModule(WorkflowModule):
# Two step improvements to build runtime inputs for subworkflow modules
# - First pass verify nested workflow doesn't have an RuntimeInputs
@@ -675,6 +740,7 @@ def get_all_inputs(self, data_only=False, connectable_only=False):
if hasattr(self.subworkflow, "input_steps"):
for step in self.subworkflow.input_steps:
name = step.label
+ step_module = module_factory.from_workflow_step(self.trans, step)
if not name:
step_module = module_factory.from_workflow_step(self.trans, step)
name = f"{step.order_index}:{step_module.get_name()}"
@@ -692,6 +758,13 @@ def get_all_inputs(self, data_only=False, connectable_only=False):
if step_type == "parameter_input":
input["type"] = step.tool_inputs["parameter_type"]
input["optional"] = step.tool_inputs.get("optional", False)
+
+ # This predated the above lines in the CWL branch but looks odd now...
+ # not hurting anything per se but should be fixed up ideally...
+ collection_type = getattr(step_module, "collection_type", None)
+ if collection_type:
+ input["collection_types"] = [collection_type]
+
inputs.append(input)
return inputs
@@ -804,15 +877,9 @@ def execute(
else:
# Got a conditional step and we could potentially run it,
# so we have to build the step state and evaluate the expression
- extra_step_state = {}
- for step_input in step.inputs:
- step_input_name = step_input.name
- if iteration_elements and step_input_name in iteration_elements: # noqa: B023
- value = iteration_elements[step_input_name] # noqa: B023
- else:
- value = progress.replacement_for_connection(step_input.connections[0], is_data=True)
- extra_step_state[step_input_name] = value
-
+ extra_step_state = build_extra_step_state(
+ trans, step, progress=progress, iteration_elements=iteration_elements
+ )
when_values.append(
evaluate_value_from_expressions(
progress, step, execution_state={}, extra_step_state=extra_step_state
@@ -1211,6 +1278,7 @@ def get_inputs(self):
{"value": "boolean", "label": "Boolean (True or False)"},
{"value": "color", "label": "Color"},
{"value": "directory_uri", "label": "Directory URI"},
+ {"value": "field", "label": "Field"},
]
input_parameter_type = SelectToolParameter(None, select_source)
# encode following loop in description above instead
@@ -2258,13 +2326,47 @@ def decode_runtime_state(self, step, runtime_state):
if self.tool:
state = super().decode_runtime_state(step, runtime_state)
if RUNTIME_STEP_META_STATE_KEY in runtime_state:
- self.__restore_step_meta_runtime_state(json.loads(runtime_state[RUNTIME_STEP_META_STATE_KEY]))
+ self.__restore_step_meta_runtime_state(safe_loads(runtime_state[RUNTIME_STEP_META_STATE_KEY]))
return state
else:
raise ToolMissingException(
f"Tool {self.tool_id} missing. Cannot recover runtime state.", tool_id=self.tool_id
)
+ def evaluate_value_from_expressions(self, progress, step, execution_state, extra_step_state):
+ value_from_expressions = {}
+ replacements: Dict = {}
+
+ for key, value in execution_state.inputs.items():
+ step_input = step.inputs_by_name.get(key)
+ if step_input:
+ if step_input.value_from is not None:
+ value_from_expressions[key] = step_input.value_from
+ elif step_input.default_value is not None and value is None:
+ value_from_expressions[key] = step_input.default_value
+
+ if not value_from_expressions:
+ return replacements
+
+ hda_references: List[model.HistoryDatasetAssociation] = []
+ step_state = {}
+ for key, value in extra_step_state.items():
+ step_state[key] = to_cwl(value, hda_references=hda_references, step=step)
+ for key, value in execution_state.inputs.items():
+ # require_ok = False for deferred datasets ... might instead need to materialize ??
+ step_state[key] = to_cwl(value, hda_references=hda_references, step=step, require_ok=False)
+
+ for key, value_from in value_from_expressions.items():
+ as_cwl_value = do_eval(
+ value_from,
+ step_state,
+ context=step_state[key],
+ )
+ new_val = from_cwl(as_cwl_value, hda_references=hda_references, progress=progress)
+ replacements[key] = new_val
+
+ return replacements
+
def execute(
self, trans, progress: "WorkflowProgress", invocation_step, use_cached_job: bool = False
) -> Optional[bool]:
@@ -2310,20 +2412,44 @@ def execute(
def callback(input, prefixed_name: str, **kwargs):
input_dict = all_inputs_by_name[prefixed_name]
- replacement: Union[model.Dataset, NoReplacement] = NO_REPLACEMENT
+ replacement: Any = NO_REPLACEMENT
if iteration_elements and prefixed_name in iteration_elements: # noqa: B023
replacement = iteration_elements[prefixed_name] # noqa: B023
else:
replacement = progress.replacement_for_input(trans, step, input_dict)
if replacement is not NO_REPLACEMENT:
+ # We need to check if the replacement is an expression tool null,
+ # since that would mean that we have to pick a possible default value
+ dataset_instance: Optional[model.DatasetInstance] = None
+ if isinstance(replacement, model.DatasetCollectionElement):
+ dataset_instance = replacement.hda
+ elif isinstance(replacement, model.DatasetInstance):
+ dataset_instance = replacement
+
+ if dataset_instance and dataset_instance.extension == "expression.json":
+ # We could do this only if there is a default value on a step
+ if not dataset_instance.dataset.in_ready_state():
+ why = f"dataset [{dataset_instance.id}] is needed for non-data connection and is non-ready"
+ raise DelayedWorkflowEvaluation(why=why)
+
+ if not dataset_instance.is_ok:
+ raise CancelWorkflowEvaluation(
+ why=InvocationFailureDatasetFailed(
+ reason=FailureReason.dataset_failed,
+ workflow_step_id=step.id,
+ hda_id=dataset_instance.id,
+ dependent_workflow_step_id=None,
+ )
+ )
+
+ with open(dataset_instance.get_file_name()) as f:
+ replacement = json.loads(dataset_instance.peek)
+ if replacement is None:
+ return None
+
if not isinstance(input, BaseDataToolParameter):
# Probably a parameter that can be replaced
- dataset_instance: Optional[model.DatasetInstance] = None
- if isinstance(replacement, model.DatasetCollectionElement):
- dataset_instance = replacement.hda
- elif isinstance(replacement, model.DatasetInstance):
- dataset_instance = replacement
if dataset_instance and dataset_instance.extension == "expression.json":
with open(dataset_instance.get_file_name()) as f:
replacement = json.load(f)
@@ -2334,6 +2460,41 @@ def callback(input, prefixed_name: str, **kwargs):
if isinstance(input, ConditionalStepWhen) and bool(replacement) is False:
raise SkipWorkflowStepEvaluation
+ is_data = (
+ isinstance(input, DataToolParameter)
+ or isinstance(input, DataCollectionToolParameter)
+ or isinstance(input, FieldTypeToolParameter)
+ )
+ if (
+ not is_data
+ and isinstance(replacement, model.HistoryDatasetAssociation)
+ and replacement.ext == "expression.json"
+ ):
+ if not replacement.dataset.in_ready_state():
+ why = f"dataset [{replacement.id}] is needed for non-data connection and is non-ready"
+ raise DelayedWorkflowEvaluation(why=why)
+
+ if not replacement.is_ok:
+ raise CancelWorkflowEvaluation(
+ why=InvocationFailureDatasetFailed(
+ reason=FailureReason.dataset_failed,
+ workflow_step_id=step.id,
+ hda_id=replacement.id,
+ dependent_workflow_step_id=None,
+ )
+ )
+
+ with open(replacement.get_file_name()) as f:
+ replacement = safe_loads(f.read())
+
+ if isinstance(input, FieldTypeToolParameter):
+ if isinstance(replacement, model.HistoryDatasetAssociation):
+ return {"src": "hda", "value": replacement}
+ elif isinstance(replacement, model.HistoryDatasetCollectionAssociation):
+ return {"src": "hdca", "value": replacement}
+ elif replacement is not NO_REPLACEMENT:
+ return {"src": "json", "value": replacement}
+
return replacement
try:
@@ -2349,34 +2510,19 @@ def callback(input, prefixed_name: str, **kwargs):
message = f"Error due to input mapping of '{unicodify(k)}' in tool '{tool.id}'. A common cause of this is conditional outputs that cannot be determined until runtime, please review workflow step {step.order_index + 1}."
raise exceptions.MessageException(message)
+ extra_step_state = build_extra_step_state(
+ trans,
+ step,
+ progress=progress,
+ iteration_elements=iteration_elements,
+ execution_state=execution_state,
+ all_inputs_by_name=all_inputs_by_name,
+ )
+
if step.when_expression and when_value is not False:
- extra_step_state = {}
- for step_input in step.inputs:
- step_input_name = step_input.name
- input_in_execution_state = step_input_name not in execution_state.inputs
- if input_in_execution_state:
- if step_input_name in all_inputs_by_name:
- if iteration_elements and step_input_name in iteration_elements: # noqa: B023
- value = iteration_elements[step_input_name] # noqa: B023
- else:
- value = progress.replacement_for_input(trans, step, all_inputs_by_name[step_input_name])
- # TODO: only do this for values... is everything with a default
- # this way a field parameter? I guess not?
- extra_step_state[step_input_name] = value
- # Might be needed someday...
- # elif step_input.default_value_set:
- # extra_step_state[step_input_name] = step_input.default_value
- else:
- if iteration_elements and step_input_name in iteration_elements: # noqa: B023
- value = iteration_elements[step_input_name] # noqa: B023
- else:
- value = progress.replacement_for_connection(step_input.connections[0], is_data=True)
- extra_step_state[step_input_name] = value
-
- if when_value is not False:
- when_value = evaluate_value_from_expressions(
- progress, step, execution_state=execution_state, extra_step_state=extra_step_state
- )
+ when_value = evaluate_value_from_expressions(
+ progress, step, execution_state=execution_state, extra_step_state=extra_step_state
+ )
if when_value is not None:
# Track this more formally ?
execution_state.inputs["__when_value__"] = when_value
@@ -2385,6 +2531,36 @@ def callback(input, prefixed_name: str, **kwargs):
if unmatched_input_connections:
log.warning(f"Failed to use input connections for inputs [{unmatched_input_connections}]")
+ expression_replacements = self.evaluate_value_from_expressions(
+ progress,
+ step,
+ execution_state,
+ extra_step_state,
+ )
+
+ def expression_callback(input, prefixed_name, **kwargs):
+ history = trans.history
+ app = trans.app
+ if prefixed_name in expression_replacements: # noqa: B023
+ expression_replacement = expression_replacements[prefixed_name] # noqa: B023
+ if isinstance(input, FieldTypeToolParameter):
+ return {"src": "json", "value": expression_replacement}
+ else:
+ return expression_replacement
+ # This is not the right spot to fill in a tool default,
+ # but at this point we know there was no replacement via step defaults or value_from
+ value = kwargs["value"]
+ if isinstance(value, dict) and value.get("class") == "File":
+ value = raw_to_galaxy(app, history, kwargs["value"])
+ return {"src": "hda", "value": value}
+
+ return NO_REPLACEMENT
+
+ # Replace expression values with those calculated...
+ visit_input_values(
+ tool.inputs, execution_state.inputs, expression_callback, no_replacement_value=NO_REPLACEMENT
+ )
+
param_combinations.append(execution_state.inputs)
complete = False
@@ -2597,6 +2773,34 @@ def load_module_sections(trans):
return module_sections
+class EphemeralCollection:
+ """Interface for collecting datasets together in workflows and treating as collections.
+
+ These aren't real collections in the database - just datasets groupped together
+ in someway by workflows for passing data around as collections.
+ """
+
+ # Used to distinguish between datasets and collections frequently.
+ ephemeral = True
+ history_content_type = "dataset_collection"
+ name = "Dynamically generated collection"
+
+ def __init__(self, collection, history):
+ self.collection = collection
+ self.history = history
+
+ hdca = model.HistoryDatasetCollectionAssociation(
+ collection=collection,
+ history=history,
+ )
+ history.add_dataset_collection(hdca)
+ self.persistent_object = hdca
+
+ @property
+ def elements(self):
+ return self.collection.elements
+
+
class DelayedWorkflowEvaluation(Exception):
def __init__(self, why=None):
self.why = why
@@ -2639,6 +2843,7 @@ def inject(self, step: WorkflowStep, step_args=None, steps=None, **kwargs):
step.upgrade_messages = {}
# Make connection information available on each step by input name.
+ step.setup_inputs_by_name()
step.setup_input_connections_by_name()
# Populate module.
diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py
index d634f678999a..155eec2372fa 100644
--- a/lib/galaxy/workflow/run.py
+++ b/lib/galaxy/workflow/run.py
@@ -395,6 +395,10 @@ def maximum_jobs_to_schedule_or_none(self) -> Optional[int]:
else:
return None
+ @property
+ def trans(self):
+ return self.module_injector.trans
+
def record_executed_job_count(self, job_count: int) -> None:
self.jobs_scheduled_this_iteration += job_count
@@ -430,6 +434,102 @@ def remaining_steps(
remaining_steps.append((step, invocation_step))
return remaining_steps
+ def replacement_for_input_connections(self, step: "WorkflowStep", input_dict: Dict[str, Any], connections):
+ replacement = modules.NO_REPLACEMENT
+
+ prefixed_name = input_dict["name"]
+ step_input = step.inputs_by_name.get(prefixed_name, None)
+
+ merge_type = model.WorkflowStepInput.default_merge_type
+ if step_input:
+ merge_type = step_input.merge_type
+
+ is_data = input_dict["input_type"] in ["dataset", "dataset_collection"]
+ if len(connections) == 1:
+ replacement = self.replacement_for_connection(connections[0], is_data=is_data)
+ else:
+ # We've mapped multiple individual inputs to a single parameter,
+ # promote output to a collection.
+ inputs = []
+ input_history_content_type = None
+ input_collection_type = None
+ for i, c in enumerate(connections):
+ input_from_connection = self.replacement_for_connection(c, is_data=is_data)
+ is_data = hasattr(input_from_connection, "history_content_type")
+ if is_data:
+ input_history_content_type = input_from_connection.history_content_type
+ if i == 0:
+ if input_history_content_type == "dataset_collection":
+ input_collection_type = input_from_connection.collection.collection_type
+ else:
+ input_collection_type = None
+ else:
+ if input_collection_type is None:
+ if input_history_content_type != "dataset":
+ raise MessageException("Cannot map over a combination of datasets and collections.")
+ else:
+ if input_history_content_type != "dataset_collection":
+ raise MessageException("Cannot merge over combinations of datasets and collections.")
+ elif input_from_connection.collection.collection_type != input_collection_type:
+ raise MessageException("Cannot merge collections of different collection types.")
+
+ inputs.append(input_from_connection)
+
+ if input_dict["input_type"] == "dataset_collection":
+ # TODO: Implement more nested types here...
+ if input_dict.get("collection_types") != ["list"]:
+ return self.replacement_for_connection(connections[0], is_data=is_data)
+
+ collection = model.DatasetCollection()
+ # If individual datasets provided (type is None) - premote to a list.
+ collection.collection_type = input_collection_type or "list"
+
+ next_index = 0
+ if input_collection_type is None:
+ if merge_type == "merge_nested":
+ raise NotImplementedError()
+
+ for input in inputs:
+ model.DatasetCollectionElement(
+ collection=collection,
+ element=input,
+ element_index=next_index,
+ element_identifier=str(next_index),
+ )
+ next_index += 1
+
+ elif input_collection_type == "list":
+ if merge_type == "merge_flattened":
+ for input in inputs:
+ for dataset_instance in input.dataset_instances:
+ model.DatasetCollectionElement(
+ collection=collection,
+ element=dataset_instance,
+ element_index=next_index,
+ element_identifier=str(next_index),
+ )
+ next_index += 1
+ elif merge_type == "merge_nested":
+ # Increase nested level of collection
+ collection.collection_type = f"list:{input_collection_type}"
+ for input in inputs:
+ model.DatasetCollectionElement(
+ collection=collection,
+ element=input.collection,
+ element_index=next_index,
+ element_identifier=str(next_index),
+ )
+ next_index += 1
+ else:
+ raise NotImplementedError()
+
+ return modules.EphemeralCollection(
+ collection=collection,
+ history=self.workflow_invocation.history,
+ )
+
+ return replacement
+
def replacement_for_input(self, trans, step: "WorkflowStep", input_dict: Dict[str, Any]):
replacement: Union[
modules.NoReplacement,
@@ -455,16 +555,32 @@ def replacement_for_input(self, trans, step: "WorkflowStep", input_dict: Dict[st
else:
replacement = temp
else:
- replacement = self.replacement_for_connection(connection[0], is_data=is_data)
+ if is_data:
+ for step_input in step.inputs:
+ if step_input.name == prefixed_name and step_input.value_from:
+ # This might not be correct since value_from might be an expression to evaluate,
+ # and default values need to be applied before expressions.
+ # TODO: check if any tests fail because of this ?
+ return raw_to_galaxy(trans.app, trans.history, step_input.value_from)
+ replacement = self.replacement_for_input_connections(
+ step,
+ input_dict,
+ connection,
+ )
elif step.state and (state_input := get_path(step.state.inputs, nested_key_to_path(prefixed_name), None)):
# workflow submitted with step parameters populates state directly
# via populate_module_and_state
replacement = state_input
else:
for step_input in step.inputs:
- if step_input.name == prefixed_name and step_input.default_value_set:
+ if step_input.name == prefixed_name and (step_input.default_value or step_input.value_from):
if is_data:
- replacement = raw_to_galaxy(trans.app, trans.history, step_input.default_value)
+ # as above, this might not be correct since the default value needs to be applied
+ # before the value_from evaluation occurs.
+ # TODO: check if any tests fail because of this ?
+ replacement = raw_to_galaxy(
+ trans.app, trans.history, step_input.value_from or step_input.default_value
+ )
return replacement
def replacement_for_connection(self, connection: "WorkflowStepConnection", is_data: bool = True):
@@ -486,14 +602,20 @@ def replacement_for_connection(self, connection: "WorkflowStepConnection", is_da
try:
replacement = step_outputs[output_name]
except KeyError:
- raise modules.FailWorkflowEvaluation(
- why=InvocationFailureOutputNotFound(
- reason=FailureReason.output_not_found,
- workflow_step_id=connection.input_step_id,
- output_name=output_name,
- dependent_workflow_step_id=output_step_id,
+ if connection.non_data_connection:
+ replacement = modules.NO_REPLACEMENT
+ else:
+ # If this not a implicit connection (the state of which is checked before in `check_implicitly_dependent_steps`)
+ # we must resolve this.
+ raise modules.FailWorkflowEvaluation(
+ why=InvocationFailureOutputNotFound(
+ reason=FailureReason.output_not_found,
+ workflow_step_id=connection.input_step_id,
+ output_name=output_name,
+ dependent_workflow_step_id=output_step_id,
+ )
)
- )
+
if isinstance(replacement, model.HistoryDatasetCollectionAssociation):
if not replacement.collection.populated:
if not replacement.waiting_for_elements:
@@ -593,6 +715,14 @@ def set_outputs_for_input(
if step.label and step.type == "parameter_input" and "output" in outputs:
self.runtime_replacements[step.label] = str(outputs["output"])
+
+ output = outputs.get("output")
+ # TODO: handle extra files and directory types and collections and all the stuff...
+ if output and isinstance(output, dict) and output.get("class") == "File":
+ primary_data = self.raw_to_galaxy(output)
+ outputs["output"] = primary_data
+
+ log.debug("outputs are %s", outputs)
self.set_step_outputs(invocation_step, outputs, already_persisted=already_persisted)
def effective_replacement_dict(self):
@@ -699,20 +829,24 @@ def subworkflow_progress(
subworkflow = subworkflow_invocation.workflow
subworkflow_inputs = {}
for input_subworkflow_step in subworkflow.input_steps:
- connection_found = False
+ connections = []
subworkflow_step_id = input_subworkflow_step.id
for input_connection in step.input_connections:
if input_connection.input_subworkflow_step_id == subworkflow_step_id:
- is_data = input_connection.output_step.type != "parameter_input"
- replacement = self.replacement_for_connection(
- input_connection,
- is_data=is_data,
- )
- subworkflow_inputs[subworkflow_step_id] = replacement
- connection_found = True
- break
+ connections.append(input_connection)
+
+ if connections:
+ replacement = self.replacement_for_input_connections(
+ step=step,
+ input_dict={
+ "name": input_subworkflow_step.label, # TODO: only module knows this unfortunately
+ "input_type": input_subworkflow_step.input_type,
+ },
+ connections=connections,
+ )
+ subworkflow_inputs[subworkflow_step_id] = replacement
- if not connection_found and not input_subworkflow_step.input_optional:
+ if not connections and not input_subworkflow_step.input_optional:
raise modules.FailWorkflowEvaluation(
InvocationFailureOutputNotFound(
reason=FailureReason.output_not_found,
@@ -721,7 +855,6 @@ def subworkflow_progress(
dependent_workflow_step_id=input_connection.output_step.id,
)
)
-
return WorkflowProgress(
subworkflow_invocation,
subworkflow_inputs,
diff --git a/lib/galaxy/workflow/run_request.py b/lib/galaxy/workflow/run_request.py
index baeb5492dd6d..f1e640683445 100644
--- a/lib/galaxy/workflow/run_request.py
+++ b/lib/galaxy/workflow/run_request.py
@@ -366,7 +366,10 @@ def build_workflow_run_configs(
if step.type == "parameter_input":
if normalized_key in param_map:
value = param_map.pop(normalized_key)
- normalized_inputs[normalized_key] = value["input"]
+ input_value = value["input"]
+ if isinstance(input_value, dict) and input_value.get("src") == "json":
+ input_value = input_value.get("value")
+ normalized_inputs[normalized_key] = input_value
steps_by_id = workflow.steps_by_id
# Set workflow inputs.
@@ -384,7 +387,12 @@ def build_workflow_run_configs(
raise exceptions.RequestParameterInvalidException(
f"{step.label or step.order_index + 1}: {e.message_suffix}"
)
- continue
+ if (
+ step.tool_inputs["parameter_type"] != "field"
+ or not isinstance(input_dict, dict)
+ or "id" not in input_dict
+ ):
+ continue
if "src" not in input_dict:
raise exceptions.RequestParameterInvalidException(
f"Not input source type defined for input '{input_dict}'."
diff --git a/lib/galaxy/workflow/workflow_parameter_input_definitions.py b/lib/galaxy/workflow/workflow_parameter_input_definitions.py
index ee59a87d42a9..69923bcbc3fc 100644
--- a/lib/galaxy/workflow/workflow_parameter_input_definitions.py
+++ b/lib/galaxy/workflow/workflow_parameter_input_definitions.py
@@ -8,13 +8,14 @@
BooleanToolParameter,
ColorToolParameter,
DirectoryUriToolParameter,
+ FieldTypeToolParameter,
FloatToolParameter,
IntegerToolParameter,
TextToolParameter,
)
-INPUT_PARAMETER_TYPES = Literal["text", "integer", "float", "boolean", "color", "directory_uri"]
-default_source_type = Dict[str, Union[int, float, bool, str]]
+INPUT_PARAMETER_TYPES = Literal["text", "integer", "float", "boolean", "color", "directory_uri", "field"]
+default_source_type = Dict[str, Union[None, int, float, bool, str]]
tool_param_type = Union[
TextToolParameter,
IntegerToolParameter,
@@ -22,6 +23,7 @@
BooleanToolParameter,
ColorToolParameter,
DirectoryUriToolParameter,
+ FieldTypeToolParameter,
]
@@ -43,4 +45,6 @@ def get_default_parameter(param_type: INPUT_PARAMETER_TYPES) -> tool_param_type:
input_default_value = ColorToolParameter(None, default_source)
elif param_type == "directory_uri":
input_default_value = DirectoryUriToolParameter(None, default_source)
+ elif param_type == "field":
+ input_default_value = FieldTypeToolParameter(None, default_source)
return input_default_value
diff --git a/lib/galaxy_ext/cwl/__init__.py b/lib/galaxy_ext/cwl/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/lib/galaxy_ext/cwl/handle_outputs.py b/lib/galaxy_ext/cwl/handle_outputs.py
new file mode 100644
index 000000000000..9d38fbab0796
--- /dev/null
+++ b/lib/galaxy_ext/cwl/handle_outputs.py
@@ -0,0 +1,18 @@
+"""
+"""
+
+import logging
+import os
+import sys
+
+# insert *this* galaxy before all others on sys.path
+sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)))
+
+from galaxy.tool_util.cwl import handle_outputs
+
+logging.basicConfig()
+log = logging.getLogger(__name__)
+
+
+def relocate_dynamic_outputs():
+ handle_outputs()
diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py
index d7710c57b2fa..372d693d4fd0 100644
--- a/lib/galaxy_test/api/test_dataset_collections.py
+++ b/lib/galaxy_test/api/test_dataset_collections.py
@@ -1,3 +1,4 @@
+import json
import zipfile
from io import BytesIO
from typing import List
@@ -100,6 +101,106 @@ def test_create_list_of_new_pairs(self):
pair_1_element_1 = pair_elements[0]
assert pair_1_element_1["element_index"] == 0
+ def test_create_record(self, history_id):
+ contents = [
+ ("condition", "1\t2\t3"),
+ ("control1", "4\t5\t6"),
+ ("control2", "7\t8\t9"),
+ ]
+ record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents)
+ fields = [
+ {"name": "condition", "type": "File"},
+ {"name": "control1", "type": "File"},
+ {"name": "control2", "type": "File"},
+ ]
+ payload = dict(
+ name="a record",
+ instance_type="history",
+ history_id=history_id,
+ element_identifiers=record_identifiers,
+ collection_type="record",
+ fields=fields,
+ )
+ create_response = self._post("dataset_collections", payload, json=True)
+ dataset_collection = self._check_create_response(create_response)
+ assert dataset_collection["collection_type"] == "record"
+ assert dataset_collection["name"] == "a record"
+ returned_collections = dataset_collection["elements"]
+ assert len(returned_collections) == 3, dataset_collection
+ record_pos_0_element = returned_collections[0]
+ self._assert_has_keys(record_pos_0_element, "element_index")
+ record_pos_0_object = record_pos_0_element["object"]
+ self._assert_has_keys(record_pos_0_object, "name", "history_content_type")
+
+ def test_record_requires_fields(self, history_id):
+ contents = [
+ ("condition", "1\t2\t3"),
+ ("control1", "4\t5\t6"),
+ ("control2", "7\t8\t9"),
+ ]
+ record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents)
+ payload = dict(
+ name="a record",
+ instance_type="history",
+ history_id=history_id,
+ element_identifiers=json.dumps(record_identifiers),
+ collection_type="record",
+ )
+ create_response = self._post("dataset_collections", payload)
+ self._assert_status_code_is(create_response, 400)
+
+ def test_record_auto_fields(self, history_id):
+ contents = [
+ ("condition", "1\t2\t3"),
+ ("control1", "4\t5\t6"),
+ ("control2", "7\t8\t9"),
+ ]
+ record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents)
+ payload = dict(
+ name="a record",
+ instance_type="history",
+ history_id=history_id,
+ element_identifiers=record_identifiers,
+ collection_type="record",
+ fields="auto",
+ )
+ create_response = self._post("dataset_collections", payload, json=True)
+ self._check_create_response(create_response)
+
+ def test_record_field_validation(self, history_id):
+ contents = [
+ ("condition", "1\t2\t3"),
+ ("control1", "4\t5\t6"),
+ ("control2", "7\t8\t9"),
+ ]
+ record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents)
+ too_few_fields = [
+ {"name": "condition", "type": "File"},
+ {"name": "control1", "type": "File"},
+ ]
+ too_many_fields = [
+ {"name": "condition", "type": "File"},
+ {"name": "control1", "type": "File"},
+ {"name": "control2", "type": "File"},
+ {"name": "control3", "type": "File"},
+ ]
+ wrong_name_fields = [
+ {"name": "condition", "type": "File"},
+ {"name": "control1", "type": "File"},
+ {"name": "control3", "type": "File"},
+ ]
+ for fields in [too_few_fields, too_many_fields, wrong_name_fields]:
+ payload = dict(
+ name="a record",
+ instance_type="history",
+ history_id=history_id,
+ element_identifiers=json.dumps(record_identifiers),
+ collection_type="record",
+ fields=json.dumps(fields),
+ )
+ create_response = self._post("dataset_collections", payload)
+ self._assert_status_code_is(create_response, 400)
+
def test_list_download(self):
with self.dataset_populator.test_history(require_new=False) as history_id:
fetch_response = self.dataset_collection_populator.create_list_in_history(
diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py
index 41d982d246c9..ec6704f4727a 100644
--- a/lib/galaxy_test/api/test_tools.py
+++ b/lib/galaxy_test/api/test_tools.py
@@ -87,6 +87,7 @@ def _run(
use_cached_job=False,
wait_for_job=False,
input_format="legacy",
+ inputs_representation=None,
):
if inputs is None:
inputs = {}
@@ -97,6 +98,7 @@ def _run(
inputs=inputs,
history_id=history_id,
input_format=input_format,
+ inputs_representation=inputs_representation,
)
if tool_uuid:
payload["tool_uuid"] = tool_uuid
diff --git a/lib/galaxy_test/api/test_tools_cwl.py b/lib/galaxy_test/api/test_tools_cwl.py
new file mode 100644
index 000000000000..76f6346922d4
--- /dev/null
+++ b/lib/galaxy_test/api/test_tools_cwl.py
@@ -0,0 +1,397 @@
+"""Test CWL Tool Execution via the API."""
+
+from typing import (
+ Any,
+ Dict,
+ Optional,
+)
+
+from typing_extensions import Literal
+
+from galaxy.tool_util.cwl.representation import USE_FIELD_TYPES
+from galaxy_test.api._framework import ApiTestCase
+from galaxy_test.base.populators import (
+ CwlPopulator,
+ CwlToolRun,
+ DatasetPopulator,
+ skip_without_tool,
+ WorkflowPopulator,
+)
+
+
+class TestCwlTools(ApiTestCase):
+ """Test CWL Tool Execution via the API."""
+
+ dataset_populator: DatasetPopulator
+
+ require_admin_user = True
+
+ def setUp(self):
+ """Setup dataset populator."""
+ super().setUp()
+ self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
+ workflow_populator = WorkflowPopulator(self.galaxy_interactor)
+ self.cwl_populator = CwlPopulator(self.dataset_populator, workflow_populator)
+
+ @skip_without_tool("cat1-tool.cwl")
+ def test_cat1_number(self, history_id: str) -> None:
+ """Test execution of cat1 using the "normal" Galaxy job API representation."""
+ hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3", name="test1"))
+ if not USE_FIELD_TYPES:
+ inputs = {
+ "file1": hda1,
+ "numbering|_cwl__type_": "boolean",
+ "numbering|_cwl__value_": True,
+ }
+ else:
+ inputs = {
+ "file1": hda1,
+ "numbering": {"src": "json", "value": True},
+ }
+ stdout = self._run_and_get_stdout("cat1-tool.cwl", history_id, inputs, assert_ok=True)
+ assert stdout == " 1\t1\n 2\t2\n 3\t3\n"
+
+ @skip_without_tool("cat1-tool.cwl")
+ def test_cat1_number_cwl_json(self, history_id: str) -> None:
+ """Test execution of cat1 using the "CWL" Galaxy job API representation."""
+ hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3"))
+ inputs = {
+ "file1": hda1,
+ "numbering": True,
+ }
+ stdout = self._run_and_get_stdout(
+ "cat1-tool.cwl", history_id, inputs, assert_ok=True, inputs_representation="cwl"
+ )
+ assert stdout == " 1\t1\n 2\t2\n 3\t3\n"
+
+ @skip_without_tool("cat1-tool.cwl")
+ def test_cat1_number_cwl_json_file(self) -> None:
+ """Test execution of cat1 using the CWL job definition file."""
+ run_object = self.cwl_populator.run_cwl_job(
+ "cat1-tool.cwl", "test/functional/tools/cwl_tools/v1.0_custom/cat-job.json"
+ )
+ assert isinstance(run_object, CwlToolRun)
+ stdout = self._get_job_stdout(run_object.job_id)
+ assert stdout == "Hello world!\n"
+
+ @skip_without_tool("cat1-tool.cwl")
+ def test_cat1_number_cwl_n_json_file(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "cat1-tool.cwl", "test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json"
+ )
+ assert isinstance(run_object, CwlToolRun)
+ stdout = self._get_job_stdout(run_object.job_id)
+ assert stdout == " 1\tHello world!\n"
+
+ @skip_without_tool("cat2-tool.cwl")
+ def test_cat2(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "cat2-tool.cwl", "test/functional/tools/cwl_tools/v1.0_custom/cat-job.json"
+ )
+ assert isinstance(run_object, CwlToolRun)
+ stdout = self._get_job_stdout(run_object.job_id)
+ assert stdout == "Hello world!\n"
+
+ @skip_without_tool("galactic_cat.cwl#galactic_cat")
+ def test_galactic_cat_1(self, history_id: str) -> None:
+ hda_id = self.dataset_populator.new_dataset(history_id, name="test_dataset.txt")["id"]
+ self.dataset_populator.wait_for_history(history_id, assert_ok=True)
+ inputs = {"input1": {"src": "hda", "id": hda_id}}
+ run_response = self._run("galactic_cat.cwl#galactic_cat", history_id, inputs, assert_ok=True)
+ dataset = run_response["outputs"][0]
+ content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset)
+ assert content.strip() == "TestData123", content
+
+ @skip_without_tool("galactic_record_input.cwl#galactic_record_input")
+ def test_galactic_record_input(self, history_id: str) -> None:
+ hda1_id = self.dataset_populator.new_dataset(history_id, content="moo", name="test_dataset.txt")["id"]
+ hda2_id = self.dataset_populator.new_dataset(history_id, content="cow dog foo", name="test_dataset.txt")["id"]
+ self.dataset_populator.wait_for_history(history_id, assert_ok=True)
+ inputs = {
+ "input1": {"src": "hda", "id": hda1_id},
+ "input2": {"src": "hda", "id": hda2_id},
+ }
+ run_response = self._run("galactic_record_input.cwl#galactic_record_input", history_id, inputs, assert_ok=True)
+ dataset = run_response["outputs"][0]
+ content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset)
+ assert content.strip() == "moo", content
+
+ dataset = run_response["outputs"][1]
+ content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset)
+ assert content.strip() == "cow dog foo", content
+
+ def _run_and_get_stdout(self, tool_id: str, history_id: str, inputs: Dict[str, Any], **kwds) -> str:
+ response = self._run(tool_id, history_id, inputs, **kwds)
+ assert "jobs" in response
+ job = response["jobs"][0]
+ job_id = job["id"]
+ final_state = self.dataset_populator.wait_for_job(job_id)
+ assert final_state == "ok"
+ return self._get_job_stdout(job_id)
+
+ def _get_job_stdout(self, job_id: str) -> str:
+ job_details = self.dataset_populator.get_job_details(job_id, full=True)
+ stdout = job_details.json()["tool_stdout"]
+ return stdout
+
+ @skip_without_tool("cat3-tool.cwl")
+ def test_cat3(self, history_id: str) -> None:
+ hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\t2\t3"))
+ inputs = {
+ "f1": hda1,
+ }
+ response = self._run("cat3-tool.cwl", history_id, inputs, assert_ok=True)
+ output1 = response["outputs"][0]
+ output1_details = self.dataset_populator.get_history_dataset_details(history_id, dataset=output1)
+ assert "created_from_basename" in output1_details, output1_details.keys()
+ assert output1_details["created_from_basename"] == "output.txt", output1_details["created_from_basename"]
+ output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1)
+ assert output1_content == "1\t2\t3\n", output1_content
+
+ @skip_without_tool("sorttool.cwl")
+ def test_sorttool(self, history_id: str) -> None:
+ hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3"))
+ inputs = {"reverse": False, "input": hda1}
+ response = self._run("sorttool.cwl", history_id, inputs, assert_ok=True)
+ output1 = response["outputs"][0]
+ output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1)
+ assert output1_content == "1\n2\n3\n", output1_content
+
+ @skip_without_tool("sorttool.cwl")
+ def test_sorttool_reverse(self, history_id: str) -> None:
+ hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3"))
+ inputs = {"reverse": True, "input": hda1}
+ response = self._run("sorttool.cwl", history_id, inputs, assert_ok=True)
+ output1 = response["outputs"][0]
+ output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1)
+ assert output1_content == "3\n2\n1\n", output1_content
+
+ @skip_without_tool("env-tool1.cwl")
+ def test_env_tool1(self, history_id: str) -> None:
+ inputs = {
+ "in": "Hello World",
+ }
+ response = self._run("env-tool1.cwl", history_id, inputs, assert_ok=True)
+ output1 = response["outputs"][0]
+ output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1)
+ assert output1_content == "Hello World\n"
+
+ @skip_without_tool("optional-output.cwl")
+ def test_optional_output(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "optional-output.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/cat-job.json"
+ )
+ output_file_id = run_object._output_name_to_object("output_file").history_content_id
+ optional_file_id = run_object._output_name_to_object("optional_file").history_content_id
+ output_content = self.dataset_populator.get_history_dataset_content(
+ run_object.history_id, dataset_id=output_file_id
+ )
+ optional_content = self.dataset_populator.get_history_dataset_content(
+ run_object.history_id, dataset_id=optional_file_id
+ )
+ assert output_content == "Hello world!\n"
+ assert optional_content == "null"
+
+ @skip_without_tool("optional-output2.cwl")
+ def test_optional_output2_on(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "optional-output2.cwl",
+ job={
+ "produce": "do_write",
+ },
+ test_data_directory="test/functional/tools/cwl_tools/v1.0_custom",
+ )
+ output_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ assert output_content == "bees\n"
+
+ @skip_without_tool("optional-output2.cwl")
+ def test_optional_output2_off(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "optional-output2.cwl",
+ job={
+ "produce": "dont_write",
+ },
+ test_data_directory="test/functional/tools/cwl_tools/v1.0_custom",
+ )
+ output_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ assert output_content == "null"
+
+ @skip_without_tool("index1.cwl")
+ @skip_without_tool("showindex1.cwl")
+ def test_index1(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "index1.cwl",
+ job={
+ "file": {"class": "File", "path": "whale.txt"},
+ },
+ test_data_directory="test/functional/tools/cwl_tools/v1.0_custom",
+ )
+ output1 = self.dataset_populator.get_history_dataset_details(run_object.history_id)
+ run_object = self.cwl_populator.run_cwl_job(
+ "showindex1.cwl",
+ job={
+ "file": {
+ "src": "hda",
+ "id": output1["id"],
+ },
+ },
+ test_data_directory="test/functional/tools/cwl_tools/v1.0_custom",
+ history_id=run_object.history_id,
+ skip_input_staging=True,
+ )
+ output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ assert "call: 1\n" in output1_content, output1_content
+
+ @skip_without_tool("any1.cwl")
+ def test_any1_0(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "any1.cwl",
+ job={"bar": 7},
+ test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/",
+ )
+ output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ assert output1_content == "7", output1_content
+
+ @skip_without_tool("any1.cwl")
+ def test_any1_1(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "any1.cwl",
+ job={"bar": "7"},
+ test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/",
+ )
+ output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ assert output1_content == '"7"', output1_content
+
+ @skip_without_tool("any1.cwl")
+ def test_any1_file(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "any1.cwl",
+ job={
+ "bar": {
+ "class": "File",
+ "location": "whale.txt",
+ }
+ },
+ test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/",
+ )
+ output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ self.dataset_populator._summarize_history(run_object.history_id)
+ assert output1_content == '"File"'
+
+ @skip_without_tool("any1.cwl")
+ def test_any1_2(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "any1.cwl",
+ job={"bar": {"Cow": ["Turkey"]}},
+ test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/",
+ )
+ output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id)
+ assert output1_content == '{"Cow": ["Turkey"]}', output1_content
+
+ @skip_without_tool("null-expression2-tool.cwl")
+ def test_null_expression_any_bad_1(self) -> None:
+ """Test explicitly passing null to Any type without a default value fails."""
+ run_object = self.cwl_populator.run_cwl_job(
+ "null-expression2-tool.cwl",
+ "test/functional/tools/cwl_tools/v1.0/v1.0/null-expression1-job.json",
+ assert_ok=False,
+ )
+ with self.assertRaises(AssertionError):
+ run_object.wait()
+
+ @skip_without_tool("null-expression2-tool.cwl")
+ def test_null_expression_any_bad_2(self) -> None:
+ """Test Any without defaults can be unspecified."""
+ run_object = self.cwl_populator.run_cwl_job(
+ "null-expression2-tool.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/empty.json", assert_ok=False
+ )
+ with self.assertRaises(AssertionError):
+ run_object.wait()
+
+ @skip_without_tool("default_path_custom_1.cwl")
+ def test_default_path(self) -> None:
+ # produces no output - just test the job runs okay.
+ # later come back and verify standard output of the job.
+ run_object = self.cwl_populator.run_cwl_job("default_path_custom_1.cwl", job={})
+ assert isinstance(run_object, CwlToolRun)
+ stdout = self._get_job_stdout(run_object.job_id)
+ assert "this is the test file that will be used when calculating an md5sum" in stdout
+
+ @skip_without_tool("parseInt-tool.cwl")
+ def test_parse_int_tool(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "parseInt-tool.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/parseInt-job.json"
+ )
+ output1 = self.dataset_populator.get_history_dataset_details(run_object.history_id, hid=2)
+ assert output1["state"] == "ok"
+ output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id, hid=2)
+ assert output1_content == "42"
+ assert output1["extension"] == "expression.json"
+
+ @skip_without_tool("record-output.cwl")
+ def test_record_output(self) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ "record-output.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/record-output-job.json"
+ )
+ assert isinstance(run_object, CwlToolRun)
+ result_record = run_object.output_collection(0)
+ assert result_record["collection_type"] == "record"
+ record_elements = result_record["elements"]
+ first_element = record_elements[0]
+ assert first_element["element_identifier"] == "ofoo"
+ first_hda = first_element["object"]
+ output1_content = self.dataset_populator.get_history_dataset_content(
+ run_object.history_id, hid=first_hda["hid"]
+ )
+ assert "Call me Ishmael." in output1_content, f"Expected contents of whale.txt, got [{output1_content}]"
+
+ # def test_dynamic_tool_execution(self) -> None:
+ # workflow_tool_json = {
+ # "inputs": [{"inputBinding": {}, "type": "File", "id": "file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/v1.0/v1.0/count-lines2-wf.cwl#step1/wc/wc_file1"}],
+ # "stdout": "output.txt",
+ # "id": "file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/v1.0/v1.0/count-lines2-wf.cwl#step1/wc",
+ # "outputs": [{"outputBinding": {"glob": "output.txt"}, "type": "File", "id": "file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/v1.0/v1.0/count-lines2-wf.cwl#step1/wc/wc_output"}],
+ # "baseCommand": "wc",
+ # "class": "CommandLineTool",
+ # }
+
+ # create_payload = dict(
+ # representation=json.dumps(workflow_tool_json),
+ # )
+ # create_response = self._post("dynamic_tools", data=create_payload, admin=True)
+ # self._assert_status_code_is(create_response, 200)
+
+ # TODO: Use mixin so this can be shared with tools test case.
+ def _run(
+ self,
+ tool_id: str,
+ history_id: str,
+ inputs: Dict[str, Any],
+ assert_ok: bool = False,
+ tool_version: Optional[str] = None,
+ inputs_representation: Optional[Literal["cwl", "galaxy"]] = None,
+ ):
+ payload = self.dataset_populator.run_tool_payload(
+ tool_id=tool_id,
+ inputs=inputs,
+ history_id=history_id,
+ inputs_representation=inputs_representation,
+ )
+ if tool_version is not None:
+ payload["tool_version"] = tool_version
+ create_response = self._post("tools", data=payload)
+ if assert_ok:
+ self._assert_status_code_is(create_response, 200)
+ create = create_response.json()
+ self._assert_has_keys(create, "outputs")
+ return create
+ else:
+ return create_response
+
+
+def whale_text() -> str:
+ return open("test/functional/tools/cwl_tools/v1.0/v1.0/whale.txt").read()
+
+
+def _dataset_to_param(dataset: Dict) -> Dict[str, str]:
+ return dict(src="hda", id=dataset["id"])
diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py
index 6d14cb49e7df..57de8ac69550 100644
--- a/lib/galaxy_test/api/test_workflows.py
+++ b/lib/galaxy_test/api/test_workflows.py
@@ -66,6 +66,7 @@
WORKFLOW_WITH_OUTPUT_COLLECTION_MAPPING,
WORKFLOW_WITH_RULES_1,
WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT,
+ WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT_OVERRIDES_TOOL_DEFAULT,
)
from ._framework import ApiTestCase
from .sharable import SharingApiTests
@@ -5180,6 +5181,17 @@ def test_run_with_default_file_in_step_inline(self):
content = self.dataset_populator.get_history_dataset_content(history_id)
assert "chr1" in content
+ def test_run_with_default_file_in_step_inline_overrides_tool_default_file(self):
+ with self.dataset_populator.test_history() as history_id:
+ self._run_workflow(
+ WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT_OVERRIDES_TOOL_DEFAULT,
+ history_id=history_id,
+ wait=True,
+ assert_ok=True,
+ )
+ content = self.dataset_populator.get_history_dataset_content(history_id)
+ assert ">hg17" in content
+
def test_conditional_flat_crossproduct_subworkflow(self):
parent = yaml.safe_load(
"""
diff --git a/lib/galaxy_test/api/test_workflows_cwl.py b/lib/galaxy_test/api/test_workflows_cwl.py
index b9af5cbc1e2f..c5ed2b5b8c79 100644
--- a/lib/galaxy_test/api/test_workflows_cwl.py
+++ b/lib/galaxy_test/api/test_workflows_cwl.py
@@ -1,7 +1,18 @@
"""Test CWL workflow functionality."""
+import os
+import re
+
from galaxy_test.api.test_workflows import BaseWorkflowsApiTestCase
-from galaxy_test.base.populators import CwlPopulator
+from galaxy_test.base.populators import (
+ CWL_TOOL_DIRECTORY,
+ CwlPopulator,
+ CwlWorkflowRun,
+)
+
+
+def resolve_path(rel_path: str) -> str:
+ return os.path.join(CWL_TOOL_DIRECTORY, rel_path)
class BaseCwlWorkflowsApiTestCase(BaseWorkflowsApiTestCase):
@@ -11,3 +22,136 @@ class BaseCwlWorkflowsApiTestCase(BaseWorkflowsApiTestCase):
def setUp(self):
super().setUp()
self.cwl_populator = CwlPopulator(self.dataset_populator, self.workflow_populator)
+
+
+class TestCwlWorkflows(BaseCwlWorkflowsApiTestCase):
+ """Test case encompassing CWL workflow tests."""
+
+ def test_simplest_wf(self, history_id: str) -> None:
+ """Test simplest workflow."""
+ workflow_id = self._load_workflow("v1.0_custom/just-wc-wf.cwl")
+ workflow_content = self._download_workflow(workflow_id)
+ for step in workflow_content["steps"].values():
+ if "tool_representation" in step:
+ del step["tool_representation"]
+
+ hda1 = self.dataset_populator.new_dataset(
+ history_id, content="hello world\nhello all\nhello all in world\nhello"
+ )
+ inputs = {"file1": {"src": "hda", "id": hda1["id"]}}
+ invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok(
+ workflow_id, history_id, inputs, inputs_by="name"
+ )
+ self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id)
+ output = self.dataset_populator.get_history_dataset_content(history_id, hid=2)
+ assert re.search(r"\s+4\s+9\s+47\s+", output)
+
+ def test_load_ids(self) -> None:
+ workflow_id = self._load_workflow("v1.0/v1.0/search.cwl#main")
+ workflow_content = self._download_workflow(workflow_id)
+ for step in workflow_content["steps"].values():
+ if "tool_representation" in step:
+ del step["tool_representation"]
+
+ steps = workflow_content["steps"]
+ step_3 = steps["3"]
+ step_4 = steps["4"]
+
+ assert step_3["label"] == "index", step_3
+ assert step_4["label"] == "search", step_4
+
+ def test_count_line1_v1(self, history_id: str) -> None:
+ """Test simple workflow v1.0/v1.0/count-lines1-wf.cwl."""
+ self._run_count_lines_wf("v1.0/v1.0/count-lines1-wf.cwl", history_id)
+
+ def test_count_line1_v1_json(self, history_id: str) -> None:
+ run_object = self.cwl_populator.run_cwl_job(
+ resolve_path("v1.0/v1.0/count-lines1-wf.cwl"),
+ resolve_path("v1.0/v1.0/wc-job.json"),
+ history_id=history_id,
+ )
+ assert isinstance(run_object, CwlWorkflowRun)
+ self._check_countlines_wf(history_id, run_object.invocation_id, run_object.workflow_id, expected_count=16)
+
+ def test_count_line2_v1(self, history_id: str) -> None:
+ """Test simple workflow v1.0/v1.0/count-lines2-wf.cwl."""
+ self._run_count_lines_wf("v1.0/v1.0/count-lines2-wf.cwl", history_id)
+
+ def test_count_lines3_v1(self, history_id: str) -> None:
+ workflow_id = self._load_workflow("v1.0/v1.0/count-lines3-wf.cwl")
+ fetch_response = self.dataset_collection_populator.create_list_in_history(history_id).json()
+ hdca = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response)
+ inputs = {"file1": {"src": "hdca", "id": hdca["id"]}}
+ invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok(
+ workflow_id, history_id, inputs, inputs_by="name"
+ )
+ self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id)
+ hdca = self.dataset_populator.get_history_collection_details(history_id, hid=5)
+ assert hdca["collection_type"] == "list"
+ elements = hdca["elements"]
+ assert len(elements) == 3
+ element0 = elements[0]["object"]
+ assert element0["history_content_type"] == "dataset"
+ assert element0["state"] == "ok"
+ assert element0["file_ext"] == "expression.json"
+ # TODO: ensure this looks like an int[] - it doesn't currently...
+
+ def test_count_lines4_v1(self, history_id: str) -> None:
+ workflow_id = self._load_workflow("v1.0/v1.0/count-lines4-wf.cwl")
+ hda1 = self.dataset_populator.new_dataset(
+ history_id, content="hello world\nhello all\nhello all in world\nhello"
+ )
+ hda2 = self.dataset_populator.new_dataset(history_id, content="moo\ncow\nthat\nis\nall")
+ inputs = {"file1": {"src": "hda", "id": hda1["id"]}, "file2": {"src": "hda", "id": hda2["id"]}}
+ invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok(
+ workflow_id, history_id, inputs, inputs_by="name"
+ )
+ self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id)
+ self.dataset_populator.get_history_collection_details(history_id, hid=4)
+
+ def test_count_lines4_json(self, history_id: str) -> None:
+ self.cwl_populator.run_cwl_job(
+ resolve_path("v1.0/v1.0/count-lines4-wf.cwl"),
+ resolve_path("v1.0/v1.0/count-lines4-job.json"),
+ history_id=history_id,
+ )
+ self.dataset_populator.get_history_collection_details(history_id, hid=4)
+
+ def test_scatter_wf1_v1(self, history_id: str) -> None:
+ self.cwl_populator.run_cwl_job(
+ resolve_path("v1.0/v1.0/scatter-wf1.cwl"),
+ resolve_path("v1.0/v1.0/scatter-job1.json"),
+ history_id=history_id,
+ )
+ self.dataset_populator.get_history_collection_details(history_id, hid=5)
+
+ def _run_count_lines_wf(self, wf_path: str, history_id: str) -> None:
+ workflow_id = self._load_workflow(wf_path)
+ hda1 = self.dataset_populator.new_dataset(
+ history_id, content="hello world\nhello all\nhello all in world\nhello"
+ )
+ inputs = {"file1": {"src": "hda", "id": hda1["id"]}}
+ invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok(
+ workflow_id, history_id, inputs, inputs_by="name"
+ )
+ self._check_countlines_wf(history_id, invocation_id, workflow_id)
+
+ def _check_countlines_wf(
+ self, history_id: str, invocation_id: str, workflow_id: str, expected_count: int = 4
+ ) -> None:
+ self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id)
+ output = self.dataset_populator.get_history_dataset_content(history_id, hid=3)
+ assert int(output) == expected_count, output
+
+ def _load_workflow(self, rel_path: str) -> str:
+ rel_path = rel_path.split("#", 1)[0]
+ path = resolve_path(rel_path)
+ data = dict(
+ from_path=path,
+ )
+ route = "workflows"
+ upload_response = self._post(route, data=data)
+ self._assert_status_code_is(upload_response, 200)
+ workflow = upload_response.json()
+ workflow_id = workflow["id"]
+ return workflow_id
diff --git a/lib/galaxy_test/base/cwl_location_rewriter.py b/lib/galaxy_test/base/cwl_location_rewriter.py
new file mode 100644
index 000000000000..e8743e1dd4e8
--- /dev/null
+++ b/lib/galaxy_test/base/cwl_location_rewriter.py
@@ -0,0 +1,146 @@
+import functools
+import json
+import logging
+import os
+import urllib.parse
+
+from cwltool.context import LoadingContext
+from cwltool.load_tool import default_loader
+from cwltool.pack import pack
+from cwltool.utils import visit_field
+
+log = logging.getLogger(__name__)
+
+# https://github.com/common-workflow-language/cwltool/issues/1937
+PACKED_WORKFLOWS_CWL_BUG = {
+ "conflict-wf.cwl": """
+$graph:
+- baseCommand: echo
+ class: CommandLineTool
+ id: echo
+ inputs:
+ text:
+ inputBinding: {}
+ type: string
+ outputs:
+ fileout:
+ outputBinding: {glob: out.txt}
+ type: File
+ stdout: out.txt
+- baseCommand: cat
+ class: CommandLineTool
+ id: cat
+ inputs:
+ file1:
+ inputBinding: {position: 1}
+ type: File
+ file2:
+ inputBinding: {position: 2}
+ type: File
+ outputs:
+ fileout:
+ outputBinding: {glob: out.txt}
+ type: File
+ stdout: out.txt
+- class: Workflow
+ id: collision
+ inputs: {input_1: string, input_2: string}
+ outputs:
+ fileout: {outputSource: cat_step/fileout, type: File}
+ steps:
+ cat_step:
+ in:
+ file1: {source: echo_1/fileout}
+ file2: {source: echo_2/fileout}
+ out: [fileout]
+ run: '#cat'
+ echo_1:
+ in: {text: input_1}
+ out: [fileout]
+ run: '#echo'
+ echo_2:
+ in: {text: input_2}
+ out: [fileout]
+ run: '#echo'
+cwlVersion: v1.1
+""",
+ "js-expr-req-wf.cwl": """
+$graph:
+- arguments: [echo, $(foo())]
+ class: CommandLineTool
+ hints:
+ ResourceRequirement: {ramMin: 8}
+ id: tool
+ inputs: []
+ outputs: {out: stdout}
+ requirements:
+ InlineJavascriptRequirement:
+ expressionLib: ['function foo() { return 2; }']
+ stdout: whatever.txt
+- class: Workflow
+ id: wf
+ inputs: []
+ outputs:
+ out: {outputSource: tool/out, type: File}
+ requirements:
+ InlineJavascriptRequirement:
+ expressionLib: ['function bar() { return 1; }']
+ steps:
+ tool:
+ in: {}
+ out: [out]
+ run: '#tool'
+cwlVersion: v1.0
+""",
+}
+
+
+def get_cwl_test_url(cwl_version):
+ branch = "main"
+ if cwl_version == "1.0":
+ repo_name = "common-workflow-language"
+ tests_dir = "v1.0/v1.0"
+ else:
+ repo_name = f"cwl-v{cwl_version}"
+ tests_dir = "tests"
+ if cwl_version == "1.2.1":
+ branch = "1.2.1_proposed"
+ return f"https://raw.githubusercontent.com/common-workflow-language/{repo_name}/{branch}/{tests_dir}"
+
+
+def get_url(item, cwl_version, base_dir):
+ # quick hack, to make it more useful upload files/directories/paths to Galaxy instance ?
+ if isinstance(item, dict) and item.get("class") == "File":
+ location = item.pop("path", None)
+ if not location:
+ parse_result = urllib.parse.urlparse(item["location"])
+ if parse_result.scheme == "file":
+ location = urllib.parse.unquote(parse_result.path)
+ if base_dir not in location:
+ return item
+ location = os.path.relpath(location, base_dir)
+ url = f"{get_cwl_test_url(cwl_version)}/{location}"
+ log.debug("Rewrote location from '%s' to '%s'", location, url)
+ item["location"] = url
+ return item
+
+
+def rewrite_locations(workflow_path: str, output_path: str):
+ workflow_path_basename = os.path.basename(workflow_path)
+ if workflow_path_basename in PACKED_WORKFLOWS_CWL_BUG:
+ with open(output_path, "w") as output:
+ output.write(PACKED_WORKFLOWS_CWL_BUG[workflow_path_basename])
+ return
+ loading_context = LoadingContext()
+ loading_context.loader = default_loader()
+ workflow_obj = pack(loading_context, workflow_path)
+ cwl_version = workflow_path.split("test/functional/tools/cwl_tools/v")[1].split("/")[0]
+ # deps = find_deps(workflow_obj, loading_context.loader, uri)
+ # basedir=os.path.dirname(workflow_path)
+ visit_field(
+ workflow_obj,
+ ("default"),
+ functools.partial(get_url, cwl_version=cwl_version, base_dir=os.path.normpath(os.path.dirname(workflow_path))),
+ )
+ with open(output_path, "w") as output:
+ json.dump(workflow_obj, output)
diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
index 6503446c8485..c866eaec17b7 100644
--- a/lib/galaxy_test/base/populators.py
+++ b/lib/galaxy_test/base/populators.py
@@ -95,7 +95,6 @@
)
from galaxy.tool_util.client.staging import InteractorStaging
from galaxy.tool_util.cwl.util import (
- download_output,
GalaxyOutput,
guess_artifact_type,
invocation_to_output,
@@ -110,9 +109,11 @@
)
from galaxy.util import (
DEFAULT_SOCKET_TIMEOUT,
+ download_to_file,
galaxy_root_path,
UNKNOWN,
)
+from galaxy.util.compression_utils import CompressedFile
from galaxy.util.path import StrPath
from galaxy.util.resources import resource_string
from galaxy.util.unittest_utils import skip_if_site_down
@@ -295,6 +296,75 @@ def conformance_tests_gen(directory, filename="conformance_tests.yaml"):
yield conformance_test
+def to_local_location(listing, location):
+ for item in listing:
+ if "basename" in item:
+ item["location"] = f"file://{os.path.join(location[len('file://'):], item['basename'])}"
+ if "listing" in item:
+ to_local_location(item["listing"], location=item["location"])
+
+
+def fix_conflicts(path):
+ # find first key that does not clash with an existing entry in targets
+ # start with entry.target + '_' + 2 and then keep incrementing
+ # the number till there is no clash
+ i = 2
+ tgt = f"{path}_{i}"
+ while os.path.exists(tgt):
+ i += 1
+ tgt = f"{path}_{i}"
+ return tgt
+
+
+def output_to_disk(output, download_folder):
+ if isinstance(output, list):
+ return [output_to_disk(item, download_folder=download_folder) for item in output]
+ if isinstance(output, dict):
+ if "secondaryFiles" in output:
+ output["secondaryFiles"] = [
+ output_to_disk(secondary, download_folder=download_folder) for secondary in output["secondaryFiles"]
+ ]
+ if "basename" in output:
+ download_path = os.path.join(download_folder, output["basename"])
+ if os.path.exists(download_path):
+ download_path = fix_conflicts(download_path)
+ if output["class"] == "Directory":
+ zip_path = f"{download_path}.zip"
+ download_to_file(output["location"], zip_path)
+ CompressedFile(zip_path).extract(download_folder)
+ os.remove(zip_path)
+ else:
+ download_to_file(output["location"], download_path)
+ output["path"] = download_path
+ output["location"] = f"file://{download_path}"
+ if "listing" in output:
+ to_local_location(output["listing"], output["location"])
+
+ return output
+ elif output.get("class") == "Directory":
+ # Directory in secondary files
+ download_folder = os.path.join(download_folder, output["location"])
+ os.makedirs(download_folder, exist_ok=True)
+ output["location"] = download_folder
+ new_listing = [
+ output_to_disk(secondary, download_folder=download_folder) for secondary in output["listing"]
+ ]
+ output["listing"] = new_listing
+ return output
+ else:
+ new_output = {}
+ for key, value in output.items():
+ if isinstance(value, dict) and "basename" in value:
+ new_dir = os.path.join(download_folder, key)
+ os.makedirs(new_dir, exist_ok=True)
+ new_output[key] = output_to_disk(value, download_folder=new_dir)
+ else:
+ new_output[key] = value
+ return new_output
+ else:
+ return output
+
+
class CwlRun:
def __init__(self, dataset_populator, history_id):
self.dataset_populator = dataset_populator
@@ -343,14 +413,13 @@ def get_extra_files(dataset_details):
get_metadata,
get_dataset,
get_extra_files,
- pseudo_location=True,
+ pseudo_location=self.dataset_populator.galaxy_interactor.api_url,
)
if download_folder:
- if isinstance(output, dict) and "basename" in output:
- download_path = os.path.join(download_folder, output["basename"])
- download_output(galaxy_output, get_metadata, get_dataset, get_extra_files, download_path)
- output["path"] = download_path
- output["location"] = f"file://{download_path}"
+ return output_to_disk(
+ output,
+ download_folder=download_folder,
+ )
return output
@abstractmethod
@@ -369,6 +438,12 @@ def __init__(self, dataset_populator, history_id, run_response):
def job_id(self):
return self.run_response.json()["jobs"][0]["id"]
+ def output(self, output_index):
+ return self.run_response.json()["outputs"][output_index]
+
+ def output_collection(self, output_index):
+ return self.run_response.json()["output_collections"][output_index]
+
def _output_name_to_object(self, output_name):
return tool_response_to_output(self.run_response.json(), self.history_id, output_name)
@@ -1002,6 +1077,10 @@ def run_tool_payload(self, tool_id: Optional[str], inputs: dict, history_id: str
kwds["__files"][key] = value
del inputs[key]
+ ir = kwds.get("inputs_representation")
+ if ir is None and "inputs_representation" in kwds:
+ del kwds["inputs_representation"]
+
return dict(tool_id=tool_id, inputs=json.dumps(inputs), history_id=history_id, **kwds)
def build_tool_state(self, tool_id: str, history_id: str):
@@ -2625,7 +2704,11 @@ def _run_cwl_workflow_job(
assert_ok: bool = True,
):
workflow_path, object_id = urllib.parse.urldefrag(workflow_path)
- workflow_id = self.workflow_populator.import_workflow_from_path(workflow_path, object_id)
+ from .cwl_location_rewriter import rewrite_locations
+
+ with tempfile.NamedTemporaryFile() as temp:
+ rewrite_locations(workflow_path=workflow_path, output_path=temp.name)
+ workflow_id = self.workflow_populator.import_workflow_from_path(temp.name, object_id)
request = {
# TODO: rework tool state corrections so more of these are valid in Galaxy
@@ -2644,6 +2727,7 @@ def run_cwl_job(
job: Optional[Dict] = None,
test_data_directory: Optional[str] = None,
history_id: Optional[str] = None,
+ skip_input_staging: bool = False,
assert_ok: bool = True,
) -> CwlRun:
"""
@@ -2668,16 +2752,18 @@ def run_cwl_job(
job = yaml.safe_load(f)
elif job is None:
job = {}
- _, datasets = stage_inputs(
- self.dataset_populator.galaxy_interactor,
- history_id,
- job,
- use_fetch_api=False,
- tool_or_workflow=tool_or_workflow,
- job_dir=test_data_directory,
- )
- if datasets:
- self.dataset_populator.wait_for_history(history_id=history_id, assert_ok=True)
+ if not skip_input_staging:
+ _, datasets = stage_inputs(
+ self.dataset_populator.galaxy_interactor,
+ history_id,
+ job,
+ use_fetch_api=True,
+ tool_or_workflow=tool_or_workflow,
+ job_dir=test_data_directory,
+ use_path_paste=False,
+ )
+ if datasets:
+ self.dataset_populator.wait_for_history(history_id=history_id, assert_ok=True)
if tool_or_workflow == "tool":
run_object = self._run_cwl_tool_job(
artifact,
@@ -2722,12 +2808,13 @@ def run_conformance_test(self, version: str, doc: str):
expected_outputs = test["output"]
for key, value in expected_outputs.items():
- try:
- actual_output = run.get_output_as_object(key)
- cwltest.compare.compare(value, actual_output)
- except Exception:
- self.dataset_populator._summarize_history(run.history_id)
- raise
+ with tempfile.TemporaryDirectory() as tmpdir:
+ try:
+ actual_output = run.get_output_as_object(key, download_folder=tmpdir)
+ cwltest.compare.compare(value, actual_output)
+ except Exception:
+ self.dataset_populator._summarize_history(run.history_id)
+ raise
class LibraryPopulator:
diff --git a/lib/galaxy_test/base/workflow_fixtures.py b/lib/galaxy_test/base/workflow_fixtures.py
index 7b4eb6186b99..ce556c530afe 100644
--- a/lib/galaxy_test/base/workflow_fixtures.py
+++ b/lib/galaxy_test/base/workflow_fixtures.py
@@ -1179,6 +1179,20 @@
location: https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed
"""
+WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT_OVERRIDES_TOOL_DEFAULT = """
+class: GalaxyWorkflow
+steps:
+ cat1:
+ tool_id: cat_default
+ in:
+ input1:
+ default:
+ class: File
+ basename: a file
+ format: txt
+ location: https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.fasta
+"""
+
WORKFLOW_FLAT_CROSS_PRODUCT = """
class: GalaxyWorkflow
inputs:
diff --git a/lib/galaxy_test/driver/driver_util.py b/lib/galaxy_test/driver/driver_util.py
index 43f096a963e0..d4254ee93fcf 100644
--- a/lib/galaxy_test/driver/driver_util.py
+++ b/lib/galaxy_test/driver/driver_util.py
@@ -238,6 +238,7 @@ def setup_galaxy_config(
job_handler_monitor_sleep=0.2,
job_runner_monitor_sleep=0.2,
workflow_monitor_sleep=0.2,
+ strict_cwl_validation=False,
)
if default_shed_tool_data_table_config:
config["shed_tool_data_table_config"] = default_shed_tool_data_table_config
diff --git a/run_tests.sh b/run_tests.sh
index 11c146353909..0163e92f1593 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -335,6 +335,7 @@ do
GALAXY_TEST_TOOL_CONF="lib/galaxy/config/sample/tool_conf.xml.sample,test/functional/tools/sample_tool_conf.xml"
marker="not cwl_conformance"
report_file="./run_api_tests.html"
+ generate_cwl_conformance_tests=1
if [ $# -gt 1 ]; then
api_script=$2
shift 2
@@ -407,6 +408,8 @@ do
marker="tool"
report_file="run_framework_tests.html"
framework_test=1
+ # CWL tools don't have embedded tests, so no need to set
+ # generate_cwl_conformance_tests=1
shift 1
;;
-w|--framework-workflows)
diff --git a/scripts/cwl-runner b/scripts/cwl-runner
new file mode 100755
index 000000000000..36931e87b375
--- /dev/null
+++ b/scripts/cwl-runner
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+CWD=`pwd`
+cd `dirname $0`/..
+. ./scripts/common_startup_functions.sh >&2
+setup_python >&2
+python ./scripts/run_cwl.py --cwd="$CWD" "$@"
diff --git a/scripts/cwl_conformance_to_test_cases.py b/scripts/cwl_conformance_to_test_cases.py
index 1a440c00ab52..db15dfe44816 100644
--- a/scripts/cwl_conformance_to_test_cases.py
+++ b/scripts/cwl_conformance_to_test_cases.py
@@ -36,9 +36,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
RED_TESTS = {
"v1.0": [
- # required
- "step_input_default_value_overriden_2nd_step_noexp",
- "step_input_default_value_overriden_noexp",
# not required
"clt_any_input_with_mixed_array_provided",
"directory_secondaryfiles",
@@ -55,7 +52,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"job_input_secondary_subdirs",
"job_input_subdir_primary_and_secondary_subdirs",
"resreq_step_overrides_wf",
- "step_input_default_value_overriden",
"step_input_default_value_overriden_2nd_step",
"valuefrom_wf_step",
"valuefrom_wf_step_multiple",
@@ -90,8 +86,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"stage_file_array_to_dir",
"stage_file_array_to_dir_basename",
"stage_file_array_to_dir_basename_entryname",
- "step_input_default_value_overriden_2nd_step_noexp",
- "step_input_default_value_overriden_noexp",
# not required
"clt_any_input_with_mixed_array_provided",
"cwl_requirements_addition",
@@ -124,7 +118,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"stage_array_dirs",
"stage_null_array",
"stdin_shorcut",
- "step_input_default_value_overriden",
"step_input_default_value_overriden_2nd_step",
"symlink_to_file_out_of_workdir_illegal",
"timelimit_expressiontool",
@@ -168,8 +161,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"stage_file_array",
"stage_file_array_basename",
"stage_file_array_entryname_overrides",
- "step_input_default_value_overriden_2nd_step_noexp",
- "step_input_default_value_overriden_noexp",
# not required
"initial_work_dir_for_null_and_arrays",
"initial_work_dir_for_array_dirs",
@@ -193,16 +184,11 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"conditionals_multi_scatter_nojs",
"conditionals_nested_cross_scatter",
"conditionals_nested_cross_scatter_nojs",
- "conditionals_non_boolean_fail",
- "conditionals_non_boolean_fail_nojs",
"cwl_requirements_addition",
"cwl_requirements_override_expression",
"cwl_requirements_override_static",
"cwloutput_nolimit",
"direct_optional_nonnull_result_nojs",
- "direct_optional_null_result",
- "direct_required",
- "direct_required_nojs",
"directory_secondaryfiles",
"docker_entrypoint",
"dockeroutputdir",
@@ -243,7 +229,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"scatter_on_scattered_conditional",
"scatter_on_scattered_conditional_nojs",
"secondary_files_in_named_records",
- "step_input_default_value_overriden",
"step_input_default_value_overriden_2nd_step",
"storage_float",
"the_only_non_null_single_true",
@@ -277,6 +262,44 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase):
"workflow_input_loadContents_without_inputBinding",
"workflow_integer_input_optional_unspecified",
"workflow_step_in_loadContents",
+ "capture_dirs",
+ "capture_files",
+ "capture_files_and_dirs",
+ "colon_in_output_path",
+ "colon_in_paths",
+ "default_with_falsey_value",
+ "directory_literal_with_literal_file_in_subdir_nostdin",
+ "dotproduct_dotproduct_scatter",
+ "dotproduct_simple_scatter",
+ "filename_with_hash_mark",
+ "flat_crossproduct_flat_crossproduct_scatter",
+ "flat_crossproduct_simple_scatter",
+ "iwd_subdir",
+ "js_input_record",
+ "length_for_non_array",
+ "mixed_version_v12_wf",
+ "multiple_input_feature_requirement",
+ "nested_crossproduct_nested_crossproduct_scatter",
+ "nested_crossproduct_simple_scatter",
+ "nested_types",
+ "output_reference_workflow_input",
+ "paramref_arguments_inputs",
+ "paramref_arguments_self",
+ "params_broken_null",
+ "record_order_with_input_bindings",
+ "record_outputeval",
+ "record_outputeval_nojs",
+ "record_with_default",
+ "runtime_outdir",
+ "schemadef_types_with_import",
+ "simple_dotproduct_scatter",
+ "simple_flat_crossproduct_scatter",
+ "simple_nested_crossproduct_scatter",
+ "simple_simple_scatter",
+ "stdout_chained_commands",
+ "user_defined_length_in_parameter_reference",
+ "very_big_and_very_floats",
+ "very_big_and_very_floats_nojs",
],
}
diff --git a/scripts/run_cwl.py b/scripts/run_cwl.py
new file mode 100644
index 000000000000..bfa4b4b5ee2f
--- /dev/null
+++ b/scripts/run_cwl.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+import os
+import sys
+from typing import (
+ Any,
+ Dict,
+ List,
+)
+
+from bioblend import galaxy
+
+sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "lib")))
+
+from galaxy.tool_util.cwl.runnable import get_outputs
+from galaxy.version import VERSION
+from galaxy_test.base.populators import ( # noqa: I100,I202
+ CwlPopulator,
+ CwlRun,
+ GiDatasetPopulator,
+ GiWorkflowPopulator,
+)
+
+DESCRIPTION = """Simple CWL runner script."""
+
+
+def collect_outputs(cwl_run: CwlRun, output_names: List[str], outdir: str) -> Dict[str, Any]:
+ outputs = {}
+ for output_name in output_names:
+ cwl_output = cwl_run.get_output_as_object(output_name, download_folder=outdir)
+ outputs[output_name] = cwl_output
+ return outputs
+
+
+def main(argv=None):
+ """Entry point for workflow driving."""
+ arg_parser = argparse.ArgumentParser(description=DESCRIPTION)
+ arg_parser.add_argument("--api_key", required=True)
+ arg_parser.add_argument("--host", default="http://localhost:8080/")
+ arg_parser.add_argument("--outdir", default=".")
+ arg_parser.add_argument("--quiet", action="store_true")
+ arg_parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}~CWL")
+ arg_parser.add_argument("--cwd", default=os.getcwd())
+ arg_parser.add_argument("tool", metavar="TOOL", help="tool or workflow")
+ arg_parser.add_argument("job", metavar="JOB", help="job")
+
+ args = arg_parser.parse_args(argv)
+
+ gi = galaxy.GalaxyInstance(args.host, args.api_key)
+ dataset_populator = GiDatasetPopulator(gi)
+ workflow_populator = GiWorkflowPopulator(gi)
+ cwl_populator = CwlPopulator(dataset_populator, workflow_populator)
+
+ abs_cwd = os.path.abspath(args.cwd)
+
+ tool = args.tool
+ if not os.path.isabs(tool):
+ tool = os.path.join(abs_cwd, tool)
+
+ job = args.job
+ if not os.path.isabs(job):
+ job = os.path.join(abs_cwd, job)
+
+ run = cwl_populator.run_cwl_job(tool, job, skip_input_staging=True)
+
+ outputs = get_outputs(tool)
+ output_names = [o.get_id() for o in outputs]
+ outputs = collect_outputs(run, output_names, outdir=args.outdir)
+ print(json.dumps(outputs, indent=4))
+ # for output_dataset in output_datasets.values():
+ # name = output_dataset.name
+ # print(run.get_output_as_object(name))
+
+
+if __name__ == "__main__":
+ main()
diff --git a/test/functional/tools/cat.json b/test/functional/tools/cat.json
new file mode 100644
index 000000000000..ff4d73f82509
--- /dev/null
+++ b/test/functional/tools/cat.json
@@ -0,0 +1,3 @@
+input1:
+ class: File
+ location: cat.json
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/any1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/any1.cwl
new file mode 100644
index 000000000000..0a1cca295273
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/any1.cwl
@@ -0,0 +1,17 @@
+#!/usr/bin/env cwl-runner
+class: CommandLineTool
+cwlVersion: v1.0
+requirements:
+ - class: InlineJavascriptRequirement
+
+inputs:
+ - id: bar
+ type: Any
+
+outputs:
+ - id: t1
+ type: Any
+ outputBinding:
+ outputEval: $(inputs.bar.class || inputs.bar)
+
+baseCommand: "true"
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat-default.cwl b/test/functional/tools/cwl_tools/v1.0_custom/cat-default.cwl
new file mode 100755
index 000000000000..8feec95f2e2c
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/cat-default.cwl
@@ -0,0 +1,22 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: "v1.0"
+class: CommandLineTool
+doc: "Print the contents of a file to stdout using 'cat' running in a docker container."
+hints:
+ DockerRequirement:
+ dockerPull: debian:wheezy
+inputs:
+ file1:
+ label: Input File
+ doc: "The file that will be copied using 'cat'"
+ type: File
+ default:
+ class: File
+ path: hello.txt
+ inputBinding: {position: 1}
+baseCommand: cat
+stdout: output.txt
+outputs:
+ output_file:
+ type: File
+ outputBinding: {glob: output.txt}
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat-job.json b/test/functional/tools/cwl_tools/v1.0_custom/cat-job.json
new file mode 100644
index 000000000000..09f16ba7b173
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/cat-job.json
@@ -0,0 +1,6 @@
+{
+ "file1": {
+ "class": "File",
+ "path": "hello.txt"
+ }
+}
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json b/test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json
new file mode 100644
index 000000000000..ee6416857d39
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json
@@ -0,0 +1,7 @@
+{
+ "file1": {
+ "class": "File",
+ "path": "hello.txt"
+ },
+ "numbering": true
+}
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat1-tool.cwl b/test/functional/tools/cwl_tools/v1.0_custom/cat1-tool.cwl
new file mode 100755
index 000000000000..ecd2cacd6636
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/cat1-tool.cwl
@@ -0,0 +1,18 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+doc: "Print the contents of a file to stdout using 'cat' running in a docker container."
+hints:
+ DockerRequirement:
+ dockerPull: debian:wheezy
+inputs:
+ file1:
+ type: File
+ inputBinding: {position: 1}
+ numbering:
+ type: boolean?
+ inputBinding:
+ position: 0
+ prefix: -n
+baseCommand: cat
+outputs: {}
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat2-tool.cwl b/test/functional/tools/cwl_tools/v1.0_custom/cat2-tool.cwl
new file mode 100755
index 000000000000..4a0e229eec7f
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/cat2-tool.cwl
@@ -0,0 +1,12 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: v1.0
+class: CommandLineTool
+doc: "Print the contents of a file to stdout using 'cat' running in a docker container."
+hints:
+ DockerRequirement:
+ dockerPull: debian:wheezy
+inputs:
+ file1: File
+stdin: $(inputs.file1.path)
+baseCommand: cat
+outputs: {}
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/default_path_custom_1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/default_path_custom_1.cwl
new file mode 100644
index 000000000000..68dbd9f4a96c
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/default_path_custom_1.cwl
@@ -0,0 +1,10 @@
+cwlVersion: v1.0
+class: CommandLineTool
+inputs:
+ - id: "file1"
+ type: File
+ default:
+ class: File
+ path: md5sum.input
+outputs: []
+arguments: [cat,$(inputs.file1.path)]
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/hello.txt b/test/functional/tools/cwl_tools/v1.0_custom/hello.txt
new file mode 100644
index 000000000000..cd0875583aab
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/hello.txt
@@ -0,0 +1 @@
+Hello world!
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/index.py b/test/functional/tools/cwl_tools/v1.0_custom/index.py
new file mode 100644
index 000000000000..21aaabd8f949
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/index.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+# Toy program to generate inverted index of word to line.
+# Takes input text file on stdin and prints output index on stdout.
+
+import os
+import sys
+
+words = {}
+
+mainfile = sys.argv[1]
+indexfile = sys.argv[1] + ".idx1"
+
+main = open(mainfile)
+index = open(indexfile, "w")
+
+linenum = 0
+for line in main:
+ linenum += 1
+ line = line.rstrip().lower().replace(".", "").replace(",", "").replace(";", "").replace("-", " ")
+ for w in line.split(" "):
+ if w:
+ if w not in words:
+ words[w] = set()
+ words[w].add(linenum)
+
+for w in sorted(words.keys()):
+ index.write(f"{w}: {', '.join(str(i) for i in words[w])}\n")
+
+open(os.path.splitext(sys.argv[1])[0] + ".idx2", "w")
+open(sys.argv[1] + ".idx3", "w")
+open(sys.argv[1] + ".idx4", "w")
+open(sys.argv[1] + ".idx5", "w")
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/index1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/index1.cwl
new file mode 100755
index 000000000000..92346a4008be
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/index1.cwl
@@ -0,0 +1,28 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: "v1.0"
+class: CommandLineTool
+requirements:
+ InitialWorkDirRequirement:
+ listing:
+ - entryname: input.txt
+ entry: $(inputs.file)
+inputs:
+ file: File
+ index.py:
+ type: File
+ default:
+ class: File
+ path: index.py
+ inputBinding:
+ position: 0
+baseCommand: python
+arguments:
+- valueFrom: input.txt
+ position: 1
+outputs:
+ result:
+ type: File
+ secondaryFiles:
+ - ".idx1"
+ outputBinding:
+ glob: input.txt
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/just-wc-wf.cwl b/test/functional/tools/cwl_tools/v1.0_custom/just-wc-wf.cwl
new file mode 100644
index 000000000000..537c4db9b014
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/just-wc-wf.cwl
@@ -0,0 +1,32 @@
+#!/usr/bin/env cwl-runner
+class: Workflow
+cwlVersion: v1.0
+
+inputs:
+ file1:
+ type: File
+
+outputs:
+ count_output:
+ type: File
+ outputSource: step1/wc_output
+
+steps:
+ step1:
+ in:
+ wc_file1: file1
+ out: [wc_output]
+ run:
+ id: wc
+ class: CommandLineTool
+ inputs:
+ wc_file1:
+ type: File
+ inputBinding: {}
+ outputs:
+ wc_output:
+ type: File
+ outputBinding:
+ glob: output.txt
+ stdout: output.txt
+ baseCommand: wc
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/md5sum.input b/test/functional/tools/cwl_tools/v1.0_custom/md5sum.input
new file mode 100644
index 000000000000..3c5e4cdbdcfd
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/md5sum.input
@@ -0,0 +1 @@
+this is the test file that will be used when calculating an md5sum
\ No newline at end of file
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/md5sum_non_strict.cwl b/test/functional/tools/cwl_tools/v1.0_custom/md5sum_non_strict.cwl
new file mode 100644
index 000000000000..3656fe1176c7
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/md5sum_non_strict.cwl
@@ -0,0 +1,50 @@
+#!/usr/bin/env cwl-runner
+
+class: CommandLineTool
+id: Md5sum
+label: Simple md5sum tool
+cwlVersion: v1.0
+
+$namespaces:
+ dct: http://purl.org/dc/terms/
+ foaf: http://xmlns.com/foaf/0.1/
+
+doc: |
+ [![Docker Repository on Quay.io](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum/status "Docker Repository on Quay.io")](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum)
+ [![Build Status](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum.svg)](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum)
+ A very, very simple Docker container for the md5sum command. See the [README](https://github.com/briandoconnor/dockstore-tool-md5sum/blob/master/README.md) for more information.
+
+
+dct:creator:
+ '@id': http://orcid.org/0000-0002-7681-6415
+ foaf:name: Brian O'Connor
+ foaf:mbox: briandoconnor@gmail.com
+
+requirements:
+- class: DockerRequirement
+ dockerPull: quay.io/briandoconnor/dockstore-tool-md5sum:1.0.2
+- class: InlineJavascriptRequirement
+
+hints:
+- class: ResourceRequirement
+ coresMin: 1
+ ramMin: 1024
+ outdirMin: 512000
+ description: the command really requires very little resources.
+
+inputs:
+ input_file:
+ type: File
+ inputBinding:
+ position: 1
+ doc: The file that will have its md5sum calculated.
+
+outputs:
+ output_file:
+ type: File
+ format: http://edamontology.org/data_3671
+ outputBinding:
+ glob: md5sum.txt
+ doc: A text file that contains a single line that is the md5sum of the input file.
+
+baseCommand: [/bin/my_md5sum]
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/optional-output2.cwl b/test/functional/tools/cwl_tools/v1.0_custom/optional-output2.cwl
new file mode 100755
index 000000000000..f0e266a952b3
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/optional-output2.cwl
@@ -0,0 +1,25 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: "v1.0"
+class: CommandLineTool
+doc: "Print the contents of a file to stdout using 'cat' running in a docker container."
+hints:
+ DockerRequirement:
+ dockerPull: docker.io/python:3-slim
+inputs:
+ optionaloutput.py:
+ type: File
+ default:
+ class: File
+ path: optionaloutput.py
+ inputBinding:
+ position: 0
+ produce:
+ type: string
+ inputBinding:
+ position: 1
+baseCommand: python3
+outputs:
+ optional_file:
+ type: File?
+ outputBinding:
+ glob: bumble.txt
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/optionaloutput.py b/test/functional/tools/cwl_tools/v1.0_custom/optionaloutput.py
new file mode 100644
index 000000000000..07c26d19776b
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/optionaloutput.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+import sys
+
+if sys.argv[1] == "do_write":
+ open("bumble.txt", "w").write("bees\n")
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/showindex.py b/test/functional/tools/cwl_tools/v1.0_custom/showindex.py
new file mode 100644
index 000000000000..819dae03eebf
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/showindex.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import sys
+
+indexfile = sys.argv[1] + ".idx1"
+
+index = open(indexfile, "r").read()
+
+print(index)
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/showindex1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/showindex1.cwl
new file mode 100755
index 000000000000..e0bbae424757
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/showindex1.cwl
@@ -0,0 +1,24 @@
+#!/usr/bin/env cwl-runner
+cwlVersion: "v1.0"
+class: CommandLineTool
+inputs:
+ file:
+ type: File
+ secondaryFiles:
+ - ".idx1"
+ inputBinding:
+ position: 1
+ showindex.py:
+ type: File
+ default:
+ class: File
+ path: showindex.py
+ inputBinding:
+ position: 0
+baseCommand: python
+stdout: output.txt
+outputs:
+ output_txt:
+ type: File
+ outputBinding:
+ glob: output.txt
diff --git a/test/functional/tools/cwl_tools/v1.0_custom/whale.txt b/test/functional/tools/cwl_tools/v1.0_custom/whale.txt
new file mode 100644
index 000000000000..425d1ed02c8d
--- /dev/null
+++ b/test/functional/tools/cwl_tools/v1.0_custom/whale.txt
@@ -0,0 +1,16 @@
+Call me Ishmael. Some years ago--never mind how long precisely--having
+little or no money in my purse, and nothing particular to interest me on
+shore, I thought I would sail about a little and see the watery part of
+the world. It is a way I have of driving off the spleen and regulating
+the circulation. Whenever I find myself growing grim about the mouth;
+whenever it is a damp, drizzly November in my soul; whenever I find
+myself involuntarily pausing before coffin warehouses, and bringing up
+the rear of every funeral I meet; and especially whenever my hypos get
+such an upper hand of me, that it requires a strong moral principle to
+prevent me from deliberately stepping into the street, and methodically
+knocking people's hats off--then, I account it high time to get to sea
+as soon as I can. This is my substitute for pistol and ball. With a
+philosophical flourish Cato throws himself upon his sword; I quietly
+take to the ship. There is nothing surprising in this. If they but knew
+it, almost all men in their degree, some time or other, cherish very
+nearly the same feelings towards the ocean with me.
diff --git a/test/functional/tools/galactic_cat.cwl b/test/functional/tools/galactic_cat.cwl
new file mode 100644
index 000000000000..c577bd854221
--- /dev/null
+++ b/test/functional/tools/galactic_cat.cwl
@@ -0,0 +1,31 @@
+#!/usr/bin/env cwl-runner
+$namespaces:
+ gx: "http://galaxyproject.org/cwl#"
+cwlVersion: v1.0
+class: CommandLineTool
+id: "galactic_cat"
+gx:version: '1.2'
+doc: |
+ Galactic Cat.
+inputs:
+ - id: input1
+ type: File
+ inputBinding:
+ position: 1
+outputs:
+ - id: output1
+ type: File
+ outputBinding:
+ glob: out
+baseCommand: ["cat"]
+arguments: []
+stdout: out
+hints:
+ gx:interface:
+ gx:inputs:
+ - gx:name: input1
+ gx:type: data
+ gx:format: 'txt'
+ gx:outputs:
+ output1:
+ gx:format: 'txt'
diff --git a/test/functional/tools/galactic_record_input.cwl b/test/functional/tools/galactic_record_input.cwl
new file mode 100644
index 000000000000..f0b098540e9c
--- /dev/null
+++ b/test/functional/tools/galactic_record_input.cwl
@@ -0,0 +1,57 @@
+#!/usr/bin/env cwl-runner
+$namespaces:
+ gx: "http://galaxyproject.org/cwl#"
+class: CommandLineTool
+cwlVersion: v1.0
+requirements:
+ - class: ShellCommandRequirement
+id: galactic_record_input
+gx:version: '1.2'
+inputs:
+ irec:
+ type:
+ name: irec
+ type: record
+ fields:
+ - name: ifoo
+ type: File
+ inputBinding:
+ position: 2
+ - name: ibar
+ type: File
+ inputBinding:
+ position: 6
+
+outputs:
+ - id: ofoo
+ type: File
+ outputBinding:
+ glob: foo
+ - id: obar
+ type: File
+ outputBinding:
+ glob: bar
+
+arguments:
+ - {valueFrom: "cat", position: 1}
+ - {valueFrom: "> foo", position: 3, shellQuote: false}
+ - {valueFrom: "&&", position: 4, shellQuote: false}
+ - {valueFrom: "cat", position: 5}
+ - {valueFrom: "> bar", position: 7, shellQuote: false}
+
+hints:
+ gx:interface:
+ gx:inputs:
+ - gx:name: input1
+ gx:type: data
+ gx:format: 'txt'
+ gx:mapTo: 'irec/ifoo'
+ - gx:name: input2
+ gx:type: data
+ gx:format: 'txt'
+ gx:mapTo: 'irec/ibar'
+ gx:outputs:
+ ofoo:
+ gx:format: 'txt'
+ obar:
+ gx:format: 'txt'
diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml
index 0918d7dc92b4..7f44db4820fc 100644
--- a/test/functional/tools/sample_tool_conf.xml
+++ b/test/functional/tools/sample_tool_conf.xml
@@ -249,6 +249,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+