diff --git a/.github/workflows/cwl_conformance.yaml b/.github/workflows/cwl_conformance.yaml index 10fb215bf042..03aaf4825b23 100644 --- a/.github/workflows/cwl_conformance.yaml +++ b/.github/workflows/cwl_conformance.yaml @@ -18,7 +18,6 @@ concurrency: jobs: test: name: Test - if: ${{ false }} runs-on: ubuntu-latest continue-on-error: ${{ startsWith(matrix.marker, 'red') }} strategy: @@ -26,7 +25,10 @@ jobs: matrix: python-version: ['3.8'] marker: ['green', 'red and required', 'red and not required'] - conformance-version: ['cwl_conformance_v1_0'] #, 'cwl_conformance_v1_1', 'cwl_conformance_v1_2'] + conformance-version: ['cwl_conformance_v1_0', 'cwl_conformance_v1_1', 'cwl_conformance_v1_2'] + exclude: + - marker: red and required + conformance-version: cwl_conformance_v1_0 services: postgres: image: postgres:13 diff --git a/client/src/api/datasets.ts b/client/src/api/datasets.ts index 6b8f0606537c..fb4809c8487a 100644 --- a/client/src/api/datasets.ts +++ b/client/src/api/datasets.ts @@ -67,6 +67,7 @@ export async function copyDataset( // TODO: Investigate. These should be optional, but the API requires explicit null values? type, copy_elements: null, + fields: null, hide_source_items: null, instance_type: null, }, diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index 1e9ab944ad2e..1b441d740f2b 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -6955,6 +6955,12 @@ export interface components { * @description List of elements that should be in the new collection. */ element_identifiers?: components["schemas"]["CollectionElementIdentifier"][] | null; + /** + * Fields + * @description List of fields to create for this collection. Set to 'auto' to guess fields from identifiers. + * @default [] + */ + fields: string | components["schemas"]["FieldDict"][] | null; /** * Folder Id * @description The ID of the library folder that will contain the collection. Required if `instance_type=library`. @@ -7147,6 +7153,12 @@ export interface components { * @description List of elements that should be in the new collection. */ element_identifiers?: components["schemas"]["CollectionElementIdentifier"][] | null; + /** + * Fields + * @description List of fields to create for this collection. Set to 'auto' to guess fields from identifiers. + * @default [] + */ + fields: string | components["schemas"]["FieldDict"][] | null; /** * Folder Id * @description The ID of the library folder that will contain the collection. Required if `instance_type=library`. @@ -9097,6 +9109,13 @@ export interface components { /** Hash Value */ hash_value: string; }; + /** FieldDict */ + FieldDict: { + /** Name */ + name: string; + /** Type */ + type: string; + }; /** FileDataElement */ FileDataElement: { /** Md5 */ diff --git a/client/src/components/History/model/queries.ts b/client/src/components/History/model/queries.ts index 5cad0faee73e..dbd7bb44ba9b 100644 --- a/client/src/components/History/model/queries.ts +++ b/client/src/components/History/model/queries.ts @@ -86,6 +86,7 @@ export async function createDatasetCollection(history: HistorySummary, inputs = copy_elements: true, name: "list", element_identifiers: [], + fields: "auto", hide_source_items: true, }; const payload = Object.assign({}, defaults, inputs); diff --git a/doc/source/dev/cwl.md b/doc/source/dev/cwl.md new file mode 100644 index 000000000000..a1cd882c3fd6 --- /dev/null +++ b/doc/source/dev/cwl.md @@ -0,0 +1,24 @@ +CWL import in Galaxy +==================== + +What is supported +----------------- + +What is not supported +--------------------- + +Some CWL Expressions / Parameter references that do math on `$(resources.cores)` +or similar will likely not work. + +How to enable it? +----------------- + +1. List paths to CWL tools in `tool_conf.xml` . +2. Set the following in `galaxy.yml`: + + ```yaml + enable_beta_tool_formats: true + enable_beta_workflow_modules: true + check_upload_content: false + strict_cwl_validation: false + ``` diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py index 1dc273bc7240..fddb34058b9d 100644 --- a/lib/galaxy/config/__init__.py +++ b/lib/galaxy/config/__init__.py @@ -931,6 +931,9 @@ def _process_config(self, kwargs: Dict[str, Any]) -> None: else None ) + # TODO: migrate to schema. + # Should CWL artifacts be loaded with strict validation enabled. + self.strict_cwl_validation = string_as_bool(kwargs.get("strict_cwl_validation", "True")) # These are not even beta - just experiments - don't use them unless # you want yours tools to be broken in the future. self.enable_beta_tool_formats = string_as_bool(kwargs.get("enable_beta_tool_formats", "False")) diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 89024e5ace7b..ba6fb15af152 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -301,7 +301,7 @@ - + diff --git a/lib/galaxy/datatypes/converters/tar_to_directory.xml b/lib/galaxy/datatypes/converters/tar_to_directory.xml index 0160746283da..850a4f67e640 100644 --- a/lib/galaxy/datatypes/converters/tar_to_directory.xml +++ b/lib/galaxy/datatypes/converters/tar_to_directory.xml @@ -1,18 +1,23 @@ - + galaxy-util - - mkdir '$output1.files_path'; - cd '$output1.files_path'; - python -c "from galaxy.util.compression_utils import CompressedFile; CompressedFile('$input1').extract('.');" - + + + {"output1": {"created_from_basename": "${input1.created_from_basename}"}} + + - + @@ -20,6 +25,6 @@ - - + diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py index 0789b0dd9ee5..45b9eaf766c4 100644 --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -72,6 +72,7 @@ def __init__(self, config=None): self.config = config self.edam = edam self.datatypes_by_extension: Dict[str, Data] = {} + self.datatypes_by_format = {} self.datatypes_by_suffix_inferences = {} self.mimetypes_by_extension = {} self.datatype_converters = {} @@ -269,13 +270,25 @@ def __import_module(full_path: str, datatype_module: str): upload_warning_template = Template(upload_warning_el.text or "") datatype_instance = datatype_class() self.datatypes_by_extension[extension] = datatype_instance + if not datatype_class.is_subclass: + edam_format = datatype_class.edam_format + prefixed_format = f"edam:{edam_format}" + if prefixed_format not in self.datatypes_by_format: + register_datatype_by_format = True + for super_klass in datatype_class.__mro__[1:-1]: + super_edam_format = getattr(super_klass, "edam_format", None) + if super_edam_format == edam_format: + register_datatype_by_format = False + break + if register_datatype_by_format: + self.datatypes_by_format[prefixed_format] = datatype_instance if mimetype is None: # Use default mimetype per datatype specification. mimetype = self.datatypes_by_extension[extension].get_mime() self.mimetypes_by_extension[extension] = mimetype if datatype_class.track_type: self.available_tracks.append(extension) - if display_in_upload and extension not in self.upload_file_formats: + if display_in_upload: self.upload_file_formats.append(extension) # Max file size cut off for setting optional metadata. self.datatypes_by_extension[extension].max_optional_metadata_filesize = elem.get( @@ -413,6 +426,7 @@ def __import_module(full_path: str, datatype_module: str): override=override, compressed_sniffers=compressed_sniffers, ) + self.upload_file_formats = list(set(self.upload_file_formats)) self.upload_file_formats.sort() # Load build sites if use_build_sites: @@ -613,6 +627,20 @@ def get_datatype_by_extension(self, ext) -> Optional["Data"]: """Returns a datatype object based on an extension""" return self.datatypes_by_extension.get(ext, None) + def get_datatype_by_format_ontology(self, ontology: str): + """Returns a datatype by format ontology""" + if "edamontology.org/" in ontology: + ontology = f"edam:{ontology.split('edamontology.org/')[1]}" + return self.datatypes_by_format.get(ontology) + + def get_datatype_ext_by_format_ontology(self, ontology: str, only_uploadable: bool = False) -> Optional[str]: + """Returns a datatype by format ontology""" + datatype = self.get_datatype_by_format_ontology(ontology) + if datatype: + if not only_uploadable or datatype.file_ext in self.upload_file_formats: + return datatype.file_ext + return None + def change_datatype(self, data, ext): if data.extension != ext: data.extension = ext diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 9e4d3b65376c..340080121983 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1144,7 +1144,7 @@ def can_split(self): @property def is_cwl_job(self): - return self.tool.tool_type == "cwl" + return self.tool.tool_type in ["galactic_cwl", "cwl"] def get_job_runner_url(self): log.warning(f"({self.job_id}) Job runner URLs are deprecated, use destinations instead.") @@ -1776,8 +1776,9 @@ def _finish_dataset( dataset.mark_unhidden() elif not purged: # If the tool was expected to set the extension, attempt to retrieve it - if dataset.ext == "auto": - dataset.extension = context.get("ext", "data") + context_ext = context.get("ext", "data") + if dataset.ext == "auto" or (dataset.ext == "data" and context_ext != "data"): + dataset.extension = context_ext dataset.init_meta(copy_from=dataset) # if a dataset was copied, it won't appear in our dictionary: # either use the metadata from originating output dataset, or call set_meta on the copies diff --git a/lib/galaxy/jobs/command_factory.py b/lib/galaxy/jobs/command_factory.py index 3e920fa7f52b..9f60c9852263 100644 --- a/lib/galaxy/jobs/command_factory.py +++ b/lib/galaxy/jobs/command_factory.py @@ -100,19 +100,26 @@ def build_command( external_command_shell = container.shell else: external_command_shell = shell - externalized_commands = __externalize_commands( - job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container - ) if container and modify_command_for_container: - # Stop now and build command before handling metadata and copying - # working directory files back. These should always happen outside - # of docker container - no security implications when generating - # metadata and means no need for Galaxy to be available to container - # and not copying workdir outputs back means on can be more restrictive - # of where container can write to in some circumstances. - run_in_container_command = container.containerize_command(externalized_commands) + if job_wrapper.tool and not job_wrapper.tool.may_use_container_entry_point: + externalized_commands = __externalize_commands( + job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container + ) + # Stop now and build command before handling metadata and copying + # working directory files back. These should always happen outside + # of docker container - no security implications when generating + # metadata and means no need for Galaxy to be available to container + # and not copying workdir outputs back means on can be more restrictive + # of where container can write to in some circumstances. + run_in_container_command = container.containerize_command(externalized_commands) + else: + tool_commands = commands_builder.build() + run_in_container_command = container.containerize_command(tool_commands) commands_builder = CommandsBuilder(run_in_container_command) else: + externalized_commands = __externalize_commands( + job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container + ) commands_builder = CommandsBuilder(externalized_commands) # Galaxy writes I/O files to outputs, Pulsar uses metadata. metadata seems like @@ -130,7 +137,13 @@ def build_command( # Copy working and outputs before job submission so that these can be restored on resubmission # xref https://github.com/galaxyproject/galaxy/issues/3289 - commands_builder.prepend_command(PREPARE_DIRS) + if not job_wrapper.is_cwl_job: + commands_builder.prepend_command(PREPARE_DIRS) + else: + # Can't do the rm -rf working for CWL jobs since we may have staged outputs + # into that directory. This does mean CWL is incompatible with job manager triggered + # retries - what can we do with that information? + commands_builder.prepend_command("mkdir -p outputs; cd working") __handle_remote_command_line_building(commands_builder, job_wrapper, for_pulsar=for_pulsar) diff --git a/lib/galaxy/jobs/runners/local.py b/lib/galaxy/jobs/runners/local.py index eb1853eb01bf..5726080859aa 100644 --- a/lib/galaxy/jobs/runners/local.py +++ b/lib/galaxy/jobs/runners/local.py @@ -4,6 +4,7 @@ import datetime import logging +import math import os import subprocess import tempfile @@ -67,7 +68,16 @@ def _command_line(self, job_wrapper: "MinimalJobWrapper") -> Tuple[str, str]: if slots: slots_statement = f'GALAXY_SLOTS="{int(slots)}"; export GALAXY_SLOTS; GALAXY_SLOTS_CONFIGURED="1"; export GALAXY_SLOTS_CONFIGURED;' else: - slots_statement = 'GALAXY_SLOTS="1"; export GALAXY_SLOTS;' + cores_min = 1 + if job_wrapper.tool: + try: + # In CWL 1.2 it can be a float that can be rounded to the next whole number + cores_min = math.ceil(float(job_wrapper.tool.cores_min)) + except ValueError: + # TODO: in CWL this can be an expression referencing runtime + # parameters, e.g. `$(inputs.special_file.size)` + pass + slots_statement = f'GALAXY_SLOTS="{cores_min}"; export GALAXY_SLOTS;' job_id = job_wrapper.get_id_tag() job_file = JobState.default_job_file(job_wrapper.working_directory, job_id) diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py index a809f0214289..dd459064269a 100644 --- a/lib/galaxy/managers/collections.py +++ b/lib/galaxy/managers/collections.py @@ -175,6 +175,7 @@ def create( flush=True, completed_job=None, output_name=None, + fields=None, ): """ PRECONDITION: security checks on ability to add to parent @@ -199,6 +200,7 @@ def create( hide_source_items=hide_source_items, copy_elements=copy_elements, history=history, + fields=fields, ) implicit_inputs = [] @@ -242,8 +244,11 @@ def _create_instance_for_collection( name=name, ) assert isinstance(dataset_collection_instance, model.HistoryDatasetCollectionAssociation) + if implicit_inputs: for input_name, input_collection in implicit_inputs: + if getattr(input_collection, "ephemeral", False): + input_collection = input_collection.persistent_object dataset_collection_instance.add_implicit_input_collection(input_name, input_collection) if implicit_output_name: @@ -285,17 +290,20 @@ def create_dataset_collection( hide_source_items=None, copy_elements=False, history=None, + fields=None, ): # Make sure at least one of these is None. assert element_identifiers is None or elements is None - if element_identifiers is None and elements is None: raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION) if not collection_type: raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE) - collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type) + collection_type_description = self.collection_type_descriptions.for_collection_type( + collection_type, fields=fields + ) has_subcollections = collection_type_description.has_subcollections() + # If we have elements, this is an internal request, don't need to load # objects from identifiers. if elements is None: @@ -319,8 +327,9 @@ def create_dataset_collection( if elements is not self.ELEMENTS_UNINITIALIZED: type_plugin = collection_type_description.rank_type_plugin() - dataset_collection = builder.build_collection(type_plugin, elements) + dataset_collection = builder.build_collection(type_plugin, elements, fields=fields) else: + # TODO: Pass fields here - need test case first. dataset_collection = model.DatasetCollection(populated=False) dataset_collection.collection_type = collection_type return dataset_collection @@ -400,6 +409,8 @@ def _append_tags(self, dataset_collection_instance, implicit_inputs=None, tags=N tags = tags or {} implicit_inputs = implicit_inputs or [] for _, v in implicit_inputs: + if getattr(v, "ephemeral", False): + v = v.persistent_object for tag in v.auto_propagated_tags: tags[tag.value] = tag for _, tag in tags.items(): diff --git a/lib/galaxy/managers/collections_util.py b/lib/galaxy/managers/collections_util.py index d0ca89f61626..0302de9c5fdb 100644 --- a/lib/galaxy/managers/collections_util.py +++ b/lib/galaxy/managers/collections_util.py @@ -39,6 +39,7 @@ def api_payload_to_create_params(payload): name=payload.get("name", None), hide_source_items=string_as_bool(payload.get("hide_source_items", False)), copy_elements=string_as_bool(payload.get("copy_elements", False)), + fields=payload.get("fields", None), ) return params diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 9dd9f87c0638..2645af3db524 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -666,6 +666,7 @@ def add_serializers(self): "genome_build": lambda item, key, **context: str(item.dbkey) if item.dbkey is not None else None, # derived (not mapped) attributes "data_type": lambda item, key, **context: f"{item.datatype.__class__.__module__}.{item.datatype.__class__.__name__}", + "cwl_formats": lambda item, key, **context: item.cwl_formats, "converted": self.serialize_converted_datasets, # TODO: metadata/extra files } diff --git a/lib/galaxy/managers/executables.py b/lib/galaxy/managers/executables.py index 0993b32856b1..fc42724e490f 100644 --- a/lib/galaxy/managers/executables.py +++ b/lib/galaxy/managers/executables.py @@ -29,10 +29,10 @@ def artifact_class(trans, as_dict: Dict[str, Any], allow_in_directory: Optional[ as_dict = yaml.safe_load(f) artifact_class = as_dict.get("class", None) + target_object = None if artifact_class is None and "$graph" in as_dict: object_id = object_id or "main" graph = as_dict["$graph"] - target_object = None if isinstance(graph, dict): target_object = graph.get(object_id) else: @@ -40,11 +40,14 @@ def artifact_class(trans, as_dict: Dict[str, Any], allow_in_directory: Optional[ found_id = item.get("id") if found_id == object_id or found_id == f"#{object_id}": target_object = item + break if target_object and target_object.get("class"): artifact_class = target_object["class"] + if artifact_class in ("CommandLineTool", "ExpressionTool"): + target_object["cwlVersion"] = as_dict["cwlVersion"] - return artifact_class, as_dict, object_id + return artifact_class, as_dict, object_id, target_object __all__ = ("artifact_class",) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 5e2c08347f31..1a9eff0f508d 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -527,6 +527,7 @@ def __init__(self, app: StructuredApp): "file_name", "display_apps", "display_types", + "cwl_formats", "validated_state", "validated_state_message", # 'url', diff --git a/lib/galaxy/managers/tools.py b/lib/galaxy/managers/tools.py index c6dbe471dc84..180c080d4534 100644 --- a/lib/galaxy/managers/tools.py +++ b/lib/galaxy/managers/tools.py @@ -68,9 +68,10 @@ def create_tool(self, trans, tool_payload, allow_load=True): if not dynamic_tool: src = tool_payload.get("src", "representation") is_path = src == "from_path" + target_object = None if is_path: - tool_format, representation, _ = artifact_class(None, tool_payload) + tool_format, representation, _, target_object = artifact_class(None, tool_payload) else: assert src == "representation" representation = tool_payload.get("representation") @@ -81,7 +82,10 @@ def create_tool(self, trans, tool_payload, allow_load=True): if not tool_format: raise exceptions.ObjectAttributeMissingException("Current tool representations require 'class'.") - tool_path = tool_payload.get("path") + # Set tool_path to None so that in ToolBox.create_dynamic_tool() + # the tool source is by default recovered using + # get_tool_source_from_representation() + tool_path = None tool_directory = tool_payload.get("tool_directory") if tool_format == "GalaxyTool": tool_id = representation.get("id") @@ -89,7 +93,11 @@ def create_tool(self, trans, tool_payload, allow_load=True): tool_id = str(uuid) elif tool_format in ("CommandLineTool", "ExpressionTool"): # CWL tools - if is_path: + if target_object is not None: + representation = {"raw_process_reference": target_object, "uuid": str(uuid), "class": tool_format} + proxy = tool_proxy(tool_object=target_object, tool_directory=tool_directory, uuid=uuid) + elif is_path: + tool_path = tool_payload.get("path") proxy = tool_proxy(tool_path=tool_path, uuid=uuid) else: # Build a tool proxy so that we can convert to the persistable diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py index 20354c029198..c6202eddaf6f 100644 --- a/lib/galaxy/managers/workflows.py +++ b/lib/galaxy/managers/workflows.py @@ -1,6 +1,7 @@ import json import logging import os +import tempfile import uuid from typing import ( Any, @@ -92,6 +93,7 @@ from galaxy.schema.invocation import InvocationCancellationUserRequest from galaxy.schema.schema import WorkflowIndexQueryPayload from galaxy.structured_app import MinimalManagerApp +from galaxy.tool_util.cwl import workflow_proxy from galaxy.tools.parameters import ( params_to_incoming, visit_input_values, @@ -613,7 +615,7 @@ def read_workflow_from_path(self, app, user, path, allow_in_directory=None) -> m trans = WorkRequestContext(app=self.app, user=user) as_dict = {"src": "from_path", "path": path} - workflow_class, as_dict, object_id = artifact_class(trans, as_dict, allow_in_directory=allow_in_directory) + workflow_class, as_dict, object_id, _ = artifact_class(trans, as_dict, allow_in_directory=allow_in_directory) assert workflow_class == "GalaxyWorkflow" # Format 2 Galaxy workflow. galaxy_interface = Format2ConverterGalaxyInterface() @@ -643,7 +645,7 @@ def normalize_workflow_format(self, trans, as_dict): workflow_path = as_dict.get("path") workflow_directory = os.path.normpath(os.path.dirname(workflow_path)) - workflow_class, as_dict, object_id = artifact_class(trans, as_dict) + workflow_class, as_dict, object_id, _ = artifact_class(trans, as_dict) if workflow_class == "GalaxyWorkflow" or "yaml_content" in as_dict: # Format 2 Galaxy workflow. galaxy_interface = Format2ConverterGalaxyInterface() @@ -655,6 +657,37 @@ def normalize_workflow_format(self, trans, as_dict): ) except yaml.scanner.ScannerError as e: raise exceptions.MalformedContents(str(e)) + elif workflow_class == "Workflow": + from galaxy.tool_util.cwl import workflow_proxy + + # create a temporary file for the workflow if it is provided + # as JSON, to make it parseable by the WorkflowProxy + if workflow_path is None: + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: + json.dump(as_dict, f) + workflow_path = f.name + if object_id: + workflow_path += "#" + object_id + wf_proxy = workflow_proxy(workflow_path) + os.unlink(f.name) + else: + # TODO: consume and use object_id... + if object_id: + workflow_path += "#" + object_id + wf_proxy = workflow_proxy(workflow_path) + tool_reference_proxies = wf_proxy.tool_reference_proxies() + for tool_reference_proxy in tool_reference_proxies: + # TODO: Namespace IDS in workflows. + representation = tool_reference_proxy.to_persistent_representation() + self.app.dynamic_tool_manager.create_tool( + trans, + { + "uuid": tool_reference_proxy.uuid, + "representation": representation, + }, + allow_load=True, + ) + as_dict = wf_proxy.to_dict() return RawWorkflowDescription(as_dict, workflow_path) @@ -806,6 +839,10 @@ def _workflow_from_raw_description( data = raw_workflow_description.as_dict if isinstance(data, str): data = json.loads(data) + if "src" in data: + assert data["src"] == "path" + wf_proxy = workflow_proxy(data["path"]) + data = wf_proxy.to_dict() # Create new workflow from source data workflow = model.Workflow() @@ -1831,6 +1868,22 @@ def __module_from_dict( ) step.temp_input_connections = temp_input_connections # type: ignore[assignment] + if "inputs" in step_dict: + for input_dict in step_dict["inputs"]: + step_input = model.WorkflowStepInput(step) + step_input.name = input_dict["name"] + step_input.merge_type = input_dict.get("merge_type", step_input.default_merge_type) + step_input.scatter_type = input_dict.get("scatter_type", step_input.default_scatter_type) + value_from = input_dict.get("value_from", None) + # if value_from is None: + # # Super hacky - we probably need distinct value from and + # # default handling. + # value_from = input_dict.get("default") + step_input.value_from = value_from + step_input.default_value = input_dict.get("default") + if step_input.default_value: + step_input.default_value_set = True + # Create the model class for the step steps.append(step) external_id = step_dict["id"] diff --git a/lib/galaxy/metadata/__init__.py b/lib/galaxy/metadata/__init__.py index 1f44e272ab8a..13ba1226ea88 100644 --- a/lib/galaxy/metadata/__init__.py +++ b/lib/galaxy/metadata/__init__.py @@ -207,6 +207,7 @@ def _metadata_path(what): "max_discovered_files": max_discovered_files, "outputs": outputs, "change_datatype_actions": job.get_change_datatype_actions(), + "job_id_tag": job.get_id_tag(), } # export model objects and object store configuration for extended metadata also. @@ -246,7 +247,6 @@ def _metadata_path(what): metadata_params["tool"] = tool_as_dict metadata_params["link_data_only"] = link_data_only metadata_params["tool_path"] = tool.config_file - metadata_params["job_id_tag"] = job.get_id_tag() metadata_params["implicit_collection_jobs_association_id"] = ( job.implicit_collection_jobs_association and job.implicit_collection_jobs_association.id ) diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py index 38f3f7917729..009a66a54941 100644 --- a/lib/galaxy/metadata/set_metadata.py +++ b/lib/galaxy/metadata/set_metadata.py @@ -134,6 +134,13 @@ def set_meta_with_tool_provided( dataset_instance.metadata.__extension__ = extension except Exception: log.exception("Problem sniffing datatype.") + elif extension == "data" and file_dict.get("format"): + format = file_dict["format"] + mapped_ext = datatypes_registry.get_datatype_ext_by_format_ontology(format) + if mapped_ext: + extension = mapped_ext + dataset_instance.metadata.__extension__ = extension + dataset_instance.extension = extension for metadata_name, metadata_value in file_dict.get("metadata", {}).items(): setattr(dataset_instance.metadata, metadata_name, metadata_value) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 4dc18f87b1c9..2d4264c7b535 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -4755,6 +4755,10 @@ def deferred_source_uri(self): return self.sources[0].source_uri return None + @property + def cwl_formats(self): + return [f"http://edamontology.org/{self.datatype.edam_format}"] + @property def state(self): # self._state holds state that should only affect this particular dataset association, not the dataset state itself @@ -4793,6 +4797,7 @@ def set_skipped(self, object_store_populator: "ObjectStorePopulator") -> None: self.extension = "expression.json" self.state = self.states.OK self.blurb = "skipped" + self.peek = json.dumps(None) self.visible = False null = json.dumps(None) with open(self.dataset.get_file_name(), "w") as out: @@ -5598,6 +5603,7 @@ def to_dict(self, view="collection", expose_dataset_path=False): uuid=(lambda uuid: str(uuid) if uuid else None)(hda.dataset.uuid), hid=hda.hid, file_ext=hda.ext, + cwl_formats=hda.cwl_formats, peek=unicodify(hda.display_peek()) if hda.peek and hda.peek != "no peek" else None, model_class=self.__class__.__name__, name=hda.name, @@ -6533,12 +6539,21 @@ class DatasetCollection(Base, Dictifiable, UsesAnnotations, Serializable): populated_states = DatasetCollectionPopulatedState - def __init__(self, id=None, collection_type=None, populated=True, element_count=None): + def __init__( + self, + id=None, + collection_type=None, + populated=True, + element_count=None, + fields=None, + ): self.id = id self.collection_type = collection_type if not populated: self.populated_state = DatasetCollection.populated_states.NEW self.element_count = element_count + # TODO: persist fields... + self.fields = fields def _build_nested_collection_attributes_stmt( self, @@ -6713,6 +6728,10 @@ def populated_optimized(self): return self._populated_optimized + @property + def allow_implicit_mapping(self): + return self.collection_type != "record" + @property def populated(self): top_level_populated = self.populated_state == DatasetCollection.populated_states.OK @@ -8145,6 +8164,7 @@ class WorkflowStep(Base, RepresentById, UsesCreateAndUpdateTime): DEFAULT_POSITION = {"left": 0, "top": 0} def __init__(self): + self.position = WorkflowStep.DEFAULT_POSITION self.uuid = uuid4() self._input_connections_by_name = None self._inputs_by_name = None @@ -8441,6 +8461,9 @@ class WorkflowStepInput(Base, RepresentById): cascade_backrefs=False, ) + default_merge_type = "merge_flattened" + default_scatter_type = "dotproduct" + def __init__(self, workflow_step): add_object_to_object_session(self, workflow_step) self.workflow_step = workflow_step @@ -8513,6 +8536,9 @@ def copy(self): copied_connection.output_name = self.output_name return copied_connection + def log_str(self): + return f"WorkflowStepConnection[output_step_id={self.output_step_id},output_name={self.output_name},input_step_id={self.input_step_id},input_name={self.input_name}]" + class WorkflowOutput(Base, Serializable): __tablename__ = "workflow_output" @@ -9465,6 +9491,8 @@ def is_new(self): return self.state == self.states.NEW def add_output(self, output_name, output_object): + if getattr(output_object, "ephemeral", False): + return if output_object.history_content_type == "dataset": output_assoc = WorkflowInvocationStepOutputDatasetAssociation() output_assoc.workflow_invocation_step = self diff --git a/lib/galaxy/model/dataset_collections/builder.py b/lib/galaxy/model/dataset_collections/builder.py index 2ae001f33a22..7c640a028d3d 100644 --- a/lib/galaxy/model/dataset_collections/builder.py +++ b/lib/galaxy/model/dataset_collections/builder.py @@ -1,28 +1,37 @@ +from typing import ( + Any, + Dict, + List, +) + from galaxy import model from galaxy.model.orm.util import add_object_to_object_session +from galaxy.schema.schema import FieldDict from galaxy.util.oset import OrderedSet from .type_description import COLLECTION_TYPE_DESCRIPTION_FACTORY -def build_collection(type, dataset_instances, collection=None, associated_identifiers=None): +def build_collection(type, dataset_instances, collection=None, associated_identifiers=None, fields=None): """ Build DatasetCollection with populated DatasetcollectionElement objects corresponding to the supplied dataset instances or throw exception if this is not a valid collection of the specified type. """ - dataset_collection = collection or model.DatasetCollection() + dataset_collection = collection or model.DatasetCollection(fields=fields) associated_identifiers = associated_identifiers or set() - set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers) + set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=fields) return dataset_collection -def set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers): +def set_collection_elements(dataset_collection, type, dataset_instances, associated_identifiers, fields=None): new_element_keys = OrderedSet(dataset_instances.keys()) - associated_identifiers new_dataset_instances = {k: dataset_instances[k] for k in new_element_keys} dataset_collection.element_count = dataset_collection.element_count or 0 element_index = dataset_collection.element_count elements = [] - for element in type.generate_elements(new_dataset_instances): + if fields == "auto": + fields = guess_fields(dataset_instances) + for element in type.generate_elements(new_dataset_instances, fields=fields): element.element_index = element_index add_object_to_object_session(element, dataset_collection) element.collection = dataset_collection @@ -35,6 +44,16 @@ def set_collection_elements(dataset_collection, type, dataset_instances, associa return dataset_collection +def guess_fields(dataset_instances: Dict[str, Any]) -> List[FieldDict]: + fields: List[FieldDict] = [] + for identifier, element in dataset_instances.items(): + # TODO: Make generic enough to handle nested record types. + assert element.history_content_type == "dataset" + fields.append({"type": "File", "name": identifier}) + + return fields + + class CollectionBuilder: """Purely functional builder pattern for building a dataset collection.""" diff --git a/lib/galaxy/model/dataset_collections/matching.py b/lib/galaxy/model/dataset_collections/matching.py index 948e317ec69a..64161620b9ea 100644 --- a/lib/galaxy/model/dataset_collections/matching.py +++ b/lib/galaxy/model/dataset_collections/matching.py @@ -17,8 +17,10 @@ class CollectionsToMatch: def __init__(self): self.collections = {} + self.uses_ephemeral_collections = False def add(self, input_name, hdca, subcollection_type=None, linked=True): + self.uses_ephemeral_collections = self.uses_ephemeral_collections or not hasattr(hdca, "hid") self.collections[input_name] = bunch.Bunch( hdca=hdca, subcollection_type=subcollection_type, @@ -49,6 +51,7 @@ def __init__(self): self.subcollection_types = {} self.action_tuples = {} self.when_values = None + self.uses_ephemeral_collections = False def __attempt_add_to_linked_match(self, input_name, hdca, collection_type_description, subcollection_type): structure = get_structure(hdca, collection_type_description, leaf_subcollection_type=subcollection_type) @@ -91,12 +94,21 @@ def map_over_action_tuples(self, input_name): def is_mapped_over(self, input_name): return input_name in self.collections + @property + def implicit_inputs(self): + if not self.uses_ephemeral_collections: + # Consider doing something smarter here. + return list(self.collections.items()) + else: + return [] + @staticmethod def for_collections(collections_to_match, collection_type_descriptions) -> Optional["MatchingCollections"]: if not collections_to_match.has_collections(): return None matching_collections = MatchingCollections() + matching_collections.uses_ephemeral_collections = collections_to_match.uses_ephemeral_collections for input_key, to_match in sorted(collections_to_match.items()): hdca = to_match.hdca collection_type_description = collection_type_descriptions.for_collection_type( diff --git a/lib/galaxy/model/dataset_collections/registry.py b/lib/galaxy/model/dataset_collections/registry.py index 9c849dfdad6f..bd148edafd2d 100644 --- a/lib/galaxy/model/dataset_collections/registry.py +++ b/lib/galaxy/model/dataset_collections/registry.py @@ -2,9 +2,14 @@ from .types import ( list, paired, + record, ) -PLUGIN_CLASSES = [list.ListDatasetCollectionType, paired.PairedDatasetCollectionType] +PLUGIN_CLASSES = [ + list.ListDatasetCollectionType, + paired.PairedDatasetCollectionType, + record.RecordDatasetCollectionType, +] class DatasetCollectionTypesRegistry: @@ -14,13 +19,13 @@ def __init__(self): def get(self, plugin_type): return self.__plugins[plugin_type] - def prototype(self, plugin_type): + def prototype(self, plugin_type, fields=None): plugin_type_object = self.get(plugin_type) if not hasattr(plugin_type_object, "prototype_elements"): raise Exception(f"Cannot pre-determine structure for collection of type {plugin_type}") dataset_collection = model.DatasetCollection() - for e in plugin_type_object.prototype_elements(): + for e in plugin_type_object.prototype_elements(fields=fields): e.collection = dataset_collection return dataset_collection diff --git a/lib/galaxy/model/dataset_collections/type_description.py b/lib/galaxy/model/dataset_collections/type_description.py index cade102453ca..9233faf3e4f9 100644 --- a/lib/galaxy/model/dataset_collections/type_description.py +++ b/lib/galaxy/model/dataset_collections/type_description.py @@ -9,9 +9,9 @@ def __init__(self, type_registry=DATASET_COLLECTION_TYPES_REGISTRY): # I think. self.type_registry = type_registry - def for_collection_type(self, collection_type): + def for_collection_type(self, collection_type, fields=None): assert collection_type is not None - return CollectionTypeDescription(collection_type, self) + return CollectionTypeDescription(collection_type, self, fields=fields) class CollectionTypeDescription: @@ -47,12 +47,15 @@ class CollectionTypeDescription: collection_type: str - def __init__(self, collection_type: Union[str, "CollectionTypeDescription"], collection_type_description_factory): + def __init__( + self, collection_type: Union[str, "CollectionTypeDescription"], collection_type_description_factory, fields=None + ): if isinstance(collection_type, CollectionTypeDescription): self.collection_type = collection_type.collection_type else: self.collection_type = collection_type self.collection_type_description_factory = collection_type_description_factory + self.fields = fields self.__has_subcollections = self.collection_type.find(":") > 0 def child_collection_type(self): @@ -90,9 +93,13 @@ def has_subcollections_of_type(self, other_collection_type): collection_type = self.collection_type return collection_type.endswith(other_collection_type) and collection_type != other_collection_type - def is_subcollection_of_type(self, other_collection_type): + def is_subcollection_of_type(self, other_collection_type, proper=True): + """If proper is False, than a type is consider a subcollection of itself.""" if not hasattr(other_collection_type, "collection_type"): other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type) + if not proper and self.can_match_type(other_collection_type): + return True + return other_collection_type.has_subcollections_of_type(self) def can_match_type(self, other_collection_type): diff --git a/lib/galaxy/model/dataset_collections/types/__init__.py b/lib/galaxy/model/dataset_collections/types/__init__.py index bfcf7bae79a6..c294f6957be6 100644 --- a/lib/galaxy/model/dataset_collections/types/__init__.py +++ b/lib/galaxy/model/dataset_collections/types/__init__.py @@ -11,7 +11,7 @@ class DatasetCollectionType(metaclass=ABCMeta): @abstractmethod - def generate_elements(self, dataset_instances): + def generate_elements(self, dataset_instances: dict, **kwds): """Generate DatasetCollectionElements with corresponding to the supplied dataset instances or throw exception if this is not a valid collection of the specified type. diff --git a/lib/galaxy/model/dataset_collections/types/list.py b/lib/galaxy/model/dataset_collections/types/list.py index 18ce4db76537..d4421d009c34 100644 --- a/lib/galaxy/model/dataset_collections/types/list.py +++ b/lib/galaxy/model/dataset_collections/types/list.py @@ -7,8 +7,8 @@ class ListDatasetCollectionType(BaseDatasetCollectionType): collection_type = "list" - def generate_elements(self, elements): - for identifier, element in elements.items(): + def generate_elements(self, dataset_instances, **kwds): + for identifier, element in dataset_instances.items(): association = DatasetCollectionElement( element=element, element_identifier=identifier, diff --git a/lib/galaxy/model/dataset_collections/types/paired.py b/lib/galaxy/model/dataset_collections/types/paired.py index 4ae95a1442a2..e774ab67aace 100644 --- a/lib/galaxy/model/dataset_collections/types/paired.py +++ b/lib/galaxy/model/dataset_collections/types/paired.py @@ -15,21 +15,21 @@ class PairedDatasetCollectionType(BaseDatasetCollectionType): collection_type = "paired" - def generate_elements(self, elements): - if forward_dataset := elements.get(FORWARD_IDENTIFIER): + def generate_elements(self, dataset_instances, **kwds): + if forward_dataset := dataset_instances.get(FORWARD_IDENTIFIER): left_association = DatasetCollectionElement( element=forward_dataset, element_identifier=FORWARD_IDENTIFIER, ) yield left_association - if reverse_dataset := elements.get(REVERSE_IDENTIFIER): + if reverse_dataset := dataset_instances.get(REVERSE_IDENTIFIER): right_association = DatasetCollectionElement( element=reverse_dataset, element_identifier=REVERSE_IDENTIFIER, ) yield right_association - def prototype_elements(self): + def prototype_elements(self, **kwds): left_association = DatasetCollectionElement( element=HistoryDatasetAssociation(), element_identifier=FORWARD_IDENTIFIER, diff --git a/lib/galaxy/model/dataset_collections/types/record.py b/lib/galaxy/model/dataset_collections/types/record.py new file mode 100644 index 000000000000..193509f439ee --- /dev/null +++ b/lib/galaxy/model/dataset_collections/types/record.py @@ -0,0 +1,45 @@ +from galaxy.exceptions import RequestParameterMissingException +from galaxy.model import ( + DatasetCollectionElement, + HistoryDatasetAssociation, +) +from ..types import BaseDatasetCollectionType + + +class RecordDatasetCollectionType(BaseDatasetCollectionType): + """Arbitrary CWL-style record type.""" + + collection_type = "record" + + def generate_elements(self, dataset_instances, **kwds): + fields = kwds.get("fields", None) + if fields is None: + raise RequestParameterMissingException("Missing or null parameter 'fields' required for record types.") + if len(dataset_instances) != len(fields): + self._validation_failed("Supplied element do not match fields.") + index = 0 + for identifier, element in dataset_instances.items(): + field = fields[index] + if field["name"] != identifier: + self._validation_failed("Supplied element do not match fields.") + + # TODO: validate type and such. + association = DatasetCollectionElement( + element=element, + element_identifier=identifier, + ) + yield association + index += 1 + + def prototype_elements(self, fields=None, **kwds): + if fields is None: + raise RequestParameterMissingException("Missing or null parameter 'fields' required for record types.") + for field in fields: + name = field.get("name", None) + assert name + assert field.get("type", "File") # NS: this assert doesn't make sense as it is + field_dataset = DatasetCollectionElement( + element=HistoryDatasetAssociation(), + element_identifier=name, + ) + yield field_dataset diff --git a/lib/galaxy/model/deferred.py b/lib/galaxy/model/deferred.py index 784e4e9a8ba3..9381186a7bb7 100644 --- a/lib/galaxy/model/deferred.py +++ b/lib/galaxy/model/deferred.py @@ -36,7 +36,9 @@ from galaxy.objectstore import ( ObjectStore, ObjectStorePopulator, + persist_extra_files, ) +from galaxy.util.compression_utils import CompressedFile from galaxy.util.hash_util import verify_hash log = logging.getLogger(__name__) @@ -141,6 +143,13 @@ def ensure_materialized( object_store_populator.set_dataset_object_store_id(materialized_dataset) try: path = self._stream_source(target_source, dataset_instance.datatype, materialized_dataset) + if dataset_instance.ext == "directory": + CompressedFile(path).extract(materialized_dataset.extra_files_path) + persist_extra_files( + object_store=object_store, + src_extra_files_path=materialized_dataset.extra_files_path, + primary_data=dataset_instance, + ) object_store.update_from_file(materialized_dataset, file_name=path) materialized_dataset.set_size() except Exception as e: @@ -153,7 +162,7 @@ def ensure_materialized( # TODO: take into account transform and ensure we are and are not modifying the file as appropriate. try: path = self._stream_source(target_source, dataset_instance.datatype, materialized_dataset) - shutil.move(path, transient_paths.external_filename) + shutil.copy(path, transient_paths.external_filename) materialized_dataset.external_filename = transient_paths.external_filename except Exception as e: exception_materializing = e diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index 4bee49056c61..533f61701c99 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -870,6 +870,7 @@ def collect_elements_for_history(elements): final_job_state=state, storage_callbacks=storage_callbacks, ) + model_persistence_context.add_output_dataset_association("__unnamed_outputs", dataset) dataset.discovered = True if not hda_id: datasets.append(dataset) diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 3febee546896..90dac7c73036 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -33,6 +33,7 @@ from typing_extensions import ( Annotated, Literal, + TypedDict, ) from galaxy.schema import partial_model @@ -1647,6 +1648,11 @@ class CollectionElementIdentifier(Model): ) +class FieldDict(TypedDict): + name: str + type: str + + class CreateNewCollectionPayload(Model): collection_type: Optional[CollectionType] = OptionalCollectionTypeField element_identifiers: Optional[List[CollectionElementIdentifier]] = Field( @@ -1682,6 +1688,11 @@ class CreateNewCollectionPayload(Model): default=None, description="The ID of the library folder that will contain the collection. Required if `instance_type=library`.", ) + fields_: Optional[Union[str, List[FieldDict]]] = Field( + default=[], + description="List of fields to create for this collection. Set to 'auto' to guess fields from identifiers.", + alias="fields", + ) class ModelStoreFormat(str, Enum): diff --git a/lib/galaxy/tool_util/client/staging.py b/lib/galaxy/tool_util/client/staging.py index 3d4d6ca73080..c4f6558dee2a 100644 --- a/lib/galaxy/tool_util/client/staging.py +++ b/lib/galaxy/tool_util/client/staging.py @@ -59,20 +59,19 @@ def _post(self, api_path: str, payload: Dict[str, Any]) -> Dict[str, Any]: def _attach_file(self, path: str) -> BinaryIO: return open(path, "rb") - def _tools_post(self, payload: Dict[str, Any]) -> Dict[str, Any]: + def _job_details_from_tool_response(self, tool_response: Dict[str, Any]) -> List[Dict[str, Any]]: + return [self._handle_job(job) for job in tool_response.get("jobs", [])] + + def _tools_post(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]: tool_response = self._post("tools", payload) - for job in tool_response.get("jobs", []): - self._handle_job(job) - return tool_response + return self._job_details_from_tool_response(tool_response) - def _fetch_post(self, payload: Dict[str, Any]) -> Dict[str, Any]: + def _fetch_post(self, payload: Dict[str, Any]) -> List[Dict[str, Any]]: tool_response = self._post("tools/fetch", payload) - for job in tool_response.get("jobs", []): - self._handle_job(job) - return tool_response + return self._job_details_from_tool_response(tool_response) @abc.abstractmethod - def _handle_job(self, job_response: Dict[str, Any]): + def _handle_job(self, job_response: Dict[str, Any]) -> Dict[str, Any]: """Implementer can decide if to wait for job(s) individually or not here.""" def stage( @@ -143,10 +142,9 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> Di fetch_payload["targets"][0]["elements"][0]["tags"] = tags elif isinstance(upload_target, DirectoryUploadTarget): fetch_payload = _fetch_payload(history_id, file_type="directory") - fetch_payload["targets"][0].pop("elements") tar_path = upload_target.tar_path src = _attach_file(fetch_payload, tar_path) - fetch_payload["targets"][0]["elements_from"] = src + fetch_payload["targets"][0]["elements"][0].update(src) elif isinstance(upload_target, ObjectUploadTarget): content = json.dumps(upload_target.object) fetch_payload = _fetch_payload(history_id, file_type="expression.json") @@ -161,7 +159,7 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> Di fetch_payload["targets"][0]["elements"][0]["tags"] = tags else: raise ValueError(f"Unsupported type for upload_target: {type(upload_target)}") - return self._fetch_post(fetch_payload) + return self._fetch_post(fetch_payload)[0] # Save legacy upload_func to target older Galaxy servers def upload_func(upload_target: UploadTarget) -> Dict[str, Any]: @@ -176,9 +174,14 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> No if isinstance(upload_target, FileUploadTarget): file_path = upload_target.path - file_type = upload_target.properties.get("filetype", None) or DEFAULT_FILE_TYPE + file_type = DEFAULT_FILE_TYPE dbkey = upload_target.properties.get("dbkey", None) or DEFAULT_DBKEY - upload_payload = _upload_payload(history_id, file_type=file_type, to_posix_lines=dbkey) + upload_payload = _upload_payload( + history_id, + file_type=file_type, + to_posix_lines=dbkey, + cwl_format=upload_target.properties.get("filetype"), + ) name = _file_path_to_name(file_path) upload_payload["inputs"]["files_0|auto_decompress"] = False upload_payload["inputs"]["auto_decompress"] = False @@ -201,12 +204,12 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> No _attach_file(upload_payload, composite_data, index=i) self._log(f"upload_payload is {upload_payload}") - return self._tools_post(upload_payload) + return self._tools_post(upload_payload)[0] elif isinstance(upload_target, FileLiteralTarget): # For file literals - take them as is - never convert line endings. payload = _upload_payload(history_id, file_type="auto", auto_decompress=False, to_posix_lines=False) payload["inputs"]["files_0|url_paste"] = upload_target.contents - return self._tools_post(payload) + return self._tools_post(payload)[0] elif isinstance(upload_target, DirectoryUploadTarget): tar_path = upload_target.tar_path @@ -216,20 +219,21 @@ def _attach_file(upload_payload: Dict[str, Any], uri: str, index: int = 0) -> No ) upload_payload["inputs"]["files_0|auto_decompress"] = False _attach_file(upload_payload, tar_path) - tar_upload_response = self._tools_post(upload_payload) + tar_upload_first_job_details = self._tools_post(upload_payload)[0] + tar_upload_first_dataset_id = next(iter(tar_upload_first_job_details["outputs"].values()))["id"] convert_payload = dict( tool_id="CONVERTER_tar_to_directory", - tool_inputs={"input1": {"src": "hda", "id": tar_upload_response["outputs"][0]["id"]}}, + tool_inputs={"input1": {"src": "hda", "id": tar_upload_first_dataset_id}}, history_id=history_id, ) - convert_response = self._tools_post(convert_payload) + convert_response = self._tools_post(convert_payload)[0] assert "outputs" in convert_response, convert_response return convert_response elif isinstance(upload_target, ObjectUploadTarget): content = json.dumps(upload_target.object) payload = _upload_payload(history_id, file_type="expression.json") payload["files_0|url_paste"] = content - return self._tools_post(payload) + return self._tools_post(payload)[0] else: raise ValueError(f"Unsupported type for upload_target: {type(upload_target)}") @@ -285,8 +289,9 @@ def _post(self, api_path: str, payload: Dict[str, Any]) -> Dict[str, Any]: assert response.status_code == 200, response.text return response.json() - def _handle_job(self, job_response: Dict[str, Any]): + def _handle_job(self, job_response: Dict[str, Any]) -> Dict[str, Any]: self.galaxy_interactor.wait_for_job(job_response["id"]) + return self.galaxy_interactor.get_job_stdio(job_response["id"]) @property def use_fetch_api(self): @@ -317,6 +322,8 @@ def _upload_payload( tool_input["files_0|space_to_tab"] = "Yes" if "file_name" in kwd: tool_input["files_0|NAME"] = kwd["file_name"] + if kwd.get("cwl_format"): + tool_input["cwl_format"] = kwd["cwl_format"] tool_input["files_0|type"] = "upload_dataset" payload["inputs"] = tool_input payload["__files"] = {} diff --git a/lib/galaxy/tool_util/cwl/parser.py b/lib/galaxy/tool_util/cwl/parser.py index e4a4ff83fb20..981e4d8ed37f 100644 --- a/lib/galaxy/tool_util/cwl/parser.py +++ b/lib/galaxy/tool_util/cwl/parser.py @@ -35,7 +35,6 @@ beta_relaxed_fmt_check, ensure_cwltool_available, getdefault, - normalizeFilesDirs, pathmapper, process, ref_resolver, @@ -71,6 +70,7 @@ "EnvVarRequirement", "InitialWorkDirRequirement", "InlineJavascriptRequirement", + "LoadListingRequirement", "ResourceRequirement", "ShellCommandRequirement", "ScatterFeatureRequirement", @@ -83,6 +83,9 @@ SUPPORTED_WORKFLOW_REQUIREMENTS = SUPPORTED_TOOL_REQUIREMENTS + [] +PERSISTED_REPRESENTATION = "cwl_tool_object" +SENTINEL_GALAXY_SLOTS_VALUE = 1.480231396 + ToolStateType = Dict[str, Union[None, str, bool, Dict[str, str]]] @@ -334,7 +337,13 @@ def _ensure_cwl_job_initialized(self): args = [RuntimeContext(job_args)] kwargs: Dict[str, str] = {} - self._cwl_job = next(self._tool_proxy._tool.job(self._input_dict, self._output_callback, *args, **kwargs)) + # The job method modifies inputs_record_schema in place to match the job definition + # This breaks subsequent use with other job definitions, so create a shallow copy of + # the CommandLineTool instance and add a deepcopy of the inputs_record_schema + # (instead of globally manipulating self._tool_proxy._tool, which is likely not thread-safe). + cwl_tool_instance = copy.copy(self._tool_proxy._tool) + cwl_tool_instance.inputs_record_schema = copy.deepcopy(cwl_tool_instance.inputs_record_schema) + self._cwl_job = next(cwl_tool_instance.job(self._input_dict, self._output_callback, *args, **kwargs)) self._is_command_line_job = hasattr(self._cwl_job, "command_line") def _normalize_job(self): @@ -350,7 +359,6 @@ def pathToLoc(p): process.fill_in_defaults(self._tool_proxy._tool.tool["inputs"], self._input_dict, fs_access) visit_class(self._input_dict, ("File", "Directory"), pathToLoc) # TODO: Why doesn't fillInDefault fill in locations instead of paths? - normalizeFilesDirs(self._input_dict) # TODO: validate like cwltool process _init_job. # validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job, # strict=False, logger=_logger_validation_warnings) @@ -397,13 +405,19 @@ def stage_recursive(value): def _select_resources(self, request, runtime_context=None): new_request = request.copy() - new_request["cores"] = "$GALAXY_SLOTS" + # TODO: we really need to find a better solution to set cores here. + # This could be to delay building the cwl job until we're at the worker node, + # (see https://github.com/galaxyproject/galaxy/pull/12459 for an attempt) + # or guessing what the value of $GALAXY_SLOTS will be when preparing the job. + new_request["cores"] = SENTINEL_GALAXY_SLOTS_VALUE return new_request @property def command_line(self): if self.is_command_line_job: - return self.cwl_job().command_line + command_line = self.cwl_job().command_line + # Undo the SENTINEL_GALAXY_SLOTS_VALUE hack above + return [fragment.replace(str(SENTINEL_GALAXY_SLOTS_VALUE), "$GALAXY_SLOTS") for fragment in command_line] else: return ["true"] diff --git a/lib/galaxy/tool_util/cwl/representation.py b/lib/galaxy/tool_util/cwl/representation.py index 740954fd8754..dfb89c69046b 100644 --- a/lib/galaxy/tool_util/cwl/representation.py +++ b/lib/galaxy/tool_util/cwl/representation.py @@ -4,9 +4,12 @@ import json import logging import os +import tarfile +import uuid from enum import Enum from typing import ( Any, + Dict, NamedTuple, Optional, ) @@ -184,6 +187,7 @@ def dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper): # Verify it isn't a NoneDataset if dataset_wrapper.unsanitized: raw_file_object["size"] = int(dataset_wrapper.get_size()) + raw_file_object["format"] = str(dataset_wrapper.cwl_formats[0]) set_basename_and_derived_properties( raw_file_object, str(dataset_wrapper.created_from_basename or dataset_wrapper.name) @@ -205,6 +209,9 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper): except Exception: archive_location = None + extra_params = getattr(dataset_wrapper.unsanitized, "extra_params", {}) + # We need to resolve path to location if there is a listing + directory_json = { "location": dataset_wrapper.extra_files_path, "class": "Directory", @@ -214,7 +221,52 @@ def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper): "archive_nameroot": nameroot, } - return directory_json + def tar_to_directory(directory_item): + # TODO: Should we just make sure that archive exists in extra_files_path ?? + tar_file_location = directory_item["archive_location"] + directory_name = directory_item["name"] + + assert os.path.exists(tar_file_location), tar_file_location + + tmp_dir = os.path.join(inputs_dir, "direx", str(uuid.uuid4())) # direx for "DIR EXtract" + directory_location = os.path.join(tmp_dir, directory_name) + + os.makedirs(tmp_dir) + + assert os.path.exists(tmp_dir), tmp_dir + + # TODO: safe version of this! + bkp_cwd = os.getcwd() + os.chdir(tmp_dir) + tar = tarfile.open(tar_file_location) + tar.extractall(directory_location) + tar.close() + os.chdir(bkp_cwd) + + assert os.path.exists(directory_location), directory_location + + directory_item["location"] = directory_location + directory_item["nameext"] = "None" + directory_item["nameroot"] = directory_name + directory_item["basename"] = directory_name + + tar_to_directory(directory_json) + extra_params.update(directory_json) + + entry_to_location(extra_params, extra_params["location"]) + return extra_params + + +def entry_to_location(entry: Dict[str, Any], parent_location: str): + # TODO unit test + if entry["class"] == "File" and "path" in entry and "location" not in entry: + entry["location"] = os.path.join(parent_location, entry.pop("path")) + entry["size"] = os.path.getsize(entry["location"]) + elif entry["class"] == "Directory" and "listing" in entry: + if "location" not in entry and "path" in entry: + entry["location"] = os.path.join(parent_location, entry.pop("path")) + for listing_entry in entry["listing"]: + entry_to_location(listing_entry, parent_location=entry["location"]) def collection_wrapper_to_array(inputs_dir, wrapped_value): @@ -231,6 +283,106 @@ def collection_wrapper_to_record(inputs_dir, wrapped_value): return rval +def galactic_flavored_to_cwl_job(tool, param_dict, local_working_directory): + def simple_value(input, param_dict_value, type_representation_name=None): + type_representation = type_representation_from_name(type_representation_name) + # Hmm... cwl_type isn't really the cwl type in every case, + # like in the case of json for instance. + + if type_representation.galaxy_param_type == NO_GALAXY_INPUT: + assert param_dict_value is None + return None + + if type_representation.name == "file": + dataset_wrapper = param_dict_value + return dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper) + elif type_representation.name == "directory": + dataset_wrapper = param_dict_value + return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) + elif type_representation.name == "integer": + return int(str(param_dict_value)) + elif type_representation.name == "long": + return int(str(param_dict_value)) + elif type_representation.name in ["float", "double"]: + return float(str(param_dict_value)) + elif type_representation.name == "boolean": + return string_as_bool(param_dict_value) + elif type_representation.name == "text": + return str(param_dict_value) + elif type_representation.name == "enum": + return str(param_dict_value) + elif type_representation.name == "json": + raw_value = param_dict_value.value + return json.loads(raw_value) + elif type_representation.name == "field": + if param_dict_value is None: + return None + if hasattr(param_dict_value, "value"): + # Is InputValueWrapper + rval = param_dict_value.value + if isinstance(rval, dict) and "src" in rval and rval["src"] == "json": + # needed for wf_step_connect_undeclared_param, so non-file defaults? + return rval["value"] + return rval + elif not param_dict_value.is_collection: + # Is DatasetFilenameWrapper + return dataset_wrapper_to_file_json(inputs_dir, param_dict_value) + else: + # Is DatasetCollectionWrapper + hdca_wrapper = param_dict_value + if hdca_wrapper.collection.collection_type == "list": + # TODO: generalize to lists of lists and lists of non-files... + return collection_wrapper_to_array(inputs_dir, hdca_wrapper) + elif hdca_wrapper.collection.collection_type.collection_type == "record": + return collection_wrapper_to_record(inputs_dir, hdca_wrapper) + + elif type_representation.name == "array": + # TODO: generalize to lists of lists and lists of non-files... + return collection_wrapper_to_array(inputs_dir, param_dict_value) + elif type_representation.name == "record": + return collection_wrapper_to_record(inputs_dir, param_dict_value) + else: + return str(param_dict_value) + + inputs_dir = os.path.join(local_working_directory, "_inputs") + + inputs = {} + + # TODO: walk tree + for input_name, input_param in tool.inputs.items(): + if input_param.type == "data": + # Probably need to be passing in the wrappers and using them - this seems to be + # an HDA. + map_to = input_param.map_to + inputs_at_depth = inputs + if map_to: + while "/" in map_to: + first, map_to = map_to.split("/", 1) + if first not in inputs_at_depth: + inputs_at_depth[first] = {} + inputs_at_depth = inputs_at_depth[first] + else: + map_to = input_param.name + inputs_at_depth[map_to] = dataset_wrapper_to_file_json(inputs_dir, param_dict[input_name]) + else: + matched_field = None + for field in tool._cwl_tool_proxy.input_fields(): + if field["name"] == input_name: # CWL <=> Galaxy + matched_field = field + field_type = field_to_field_type(matched_field) + if isinstance(field_type, list): + assert USE_FIELD_TYPES + type_descriptions = [FIELD_TYPE_REPRESENTATION] + else: + type_descriptions = type_descriptions_for_field_types([field_type]) + assert len(type_descriptions) == 1 + type_description_name = type_descriptions[0].name + + inputs[input_name] = simple_value(input_param, param_dict[input_name], type_description_name) + + return inputs + + def to_cwl_job(tool, param_dict, local_working_directory): """tool is Galaxy's representation of the tool and param_dict is the parameter dictionary with wrapped values. @@ -288,10 +440,10 @@ def simple_value(input, param_dict_value, type_representation_name=None): else: # Is DatasetCollectionWrapper hdca_wrapper = param_dict_value - if hdca_wrapper.collection_type == "list": + if hdca_wrapper.collection.collection_type == "list": # TODO: generalize to lists of lists and lists of non-files... return collection_wrapper_to_array(inputs_dir, hdca_wrapper) - elif hdca_wrapper.collection_type.collection_type == "record": + elif hdca_wrapper.collection.collection_type == "record": return collection_wrapper_to_record(inputs_dir, hdca_wrapper) elif type_representation.name == "array": diff --git a/lib/galaxy/tool_util/cwl/runtime_actions.py b/lib/galaxy/tool_util/cwl/runtime_actions.py index d25d50740cfc..e9b9970123e9 100644 --- a/lib/galaxy/tool_util/cwl/runtime_actions.py +++ b/lib/galaxy/tool_util/cwl/runtime_actions.py @@ -156,9 +156,9 @@ def move_output(output, target_path, output_name=None): with open(os.path.join(secondary_files_dir, "..", SECONDARY_FILES_INDEX_PATH), "w") as f: json.dump(index_contents, f) - return {"created_from_basename": output["basename"]} + return {"created_from_basename": output["basename"], "ext": "data", "format": output.get("format")} - def handle_known_output(output, output_key, output_name): + def handle_known_output(output, output_name): # if output["class"] != "File": # # This case doesn't seem like it would be reached - why is this here? # provided_metadata[output_name] = { @@ -188,13 +188,13 @@ def handle_known_output_json(output, output_name): for output_name, output in outputs.items(): handled_outputs.append(output_name) if isinstance(output, dict) and "location" in output: - handle_known_output(output, output_name, output_name) + handle_known_output(output, output_name) elif isinstance(output, dict): prefix = f"{output_name}|__part__|" for record_key, record_value in output.items(): record_value_output_key = f"{prefix}{record_key}" if isinstance(record_value, dict) and "class" in record_value: - handle_known_output(record_value, record_value_output_key, output_name) + handle_known_output(record_value, record_value_output_key) else: # param_evaluation_noexpr handle_known_output_json(output, output_name) @@ -222,6 +222,8 @@ def handle_known_output_json(output, output_name): handle_known_output_json(None, output_name) job_metadata = os.path.join(job_directory, cwl_metadata_params["job_metadata"]) + # We may have moved away the tool working directory + os.makedirs(tool_working_directory, exist_ok=True) with open(job_metadata, "w") as f: json.dump(provided_metadata, f) diff --git a/lib/galaxy/tool_util/cwl/util.py b/lib/galaxy/tool_util/cwl/util.py index 2f3edde16dba..a22659e4f39c 100644 --- a/lib/galaxy/tool_util/cwl/util.py +++ b/lib/galaxy/tool_util/cwl/util.py @@ -8,6 +8,8 @@ import io import json import os +import pathlib +import shutil import tarfile import tempfile import urllib.parse @@ -57,6 +59,7 @@ def set_basename_and_derived_properties(properties, basename): "secondaryFiles": List[Any], "checksum": str, "size": int, + "format": Optional[str], }, total=False, ) @@ -67,6 +70,8 @@ def output_properties( content: Optional[bytes] = None, basename=None, pseudo_location=False, + cwl_formats: Optional[List[str]] = None, + download_url="", ) -> OutputPropertiesType: checksum = hashlib.sha1() properties: OutputPropertiesType = {"class": "File", "checksum": "", "size": 0} @@ -90,14 +95,16 @@ def output_properties( f.close() properties["checksum"] = f"sha1${checksum.hexdigest()}" properties["size"] = filesize + properties["format"] = cwl_formats[0] if cwl_formats else None set_basename_and_derived_properties(properties, basename) - _handle_pseudo_location(properties, pseudo_location) + _handle_pseudo_location(properties, pseudo_location, download_url) return properties -def _handle_pseudo_location(properties, pseudo_location): +def _handle_pseudo_location(properties, pseudo_location, download_url): if pseudo_location: - properties["location"] = properties["basename"] + # TODO: should be a URI to the dataset on the server + properties["location"] = pseudo_location.rstrip("/api") + download_url def abs_path_or_uri(path_or_uri: str, relative_to: str) -> str: @@ -150,10 +157,7 @@ def galactic_job_json( dataset_collections: List[Dict[str, Any]] = [] def response_to_hda(target: UploadTarget, upload_response: Dict[str, Any]) -> Dict[str, str]: - assert isinstance(upload_response, dict), upload_response - assert "outputs" in upload_response, upload_response - assert len(upload_response["outputs"]) > 0, upload_response - dataset = upload_response["outputs"][0] + dataset = next(iter(upload_response["outputs"].values())) datasets.append(dataset) dataset_id = dataset["id"] return {"src": "hda", "id": dataset_id} @@ -281,18 +285,39 @@ def replacement_file(value): def replacement_directory(value: Dict[str, Any]) -> Dict[str, Any]: file_path = value.get("location", None) or value.get("path", None) - if file_path is None: - return value + temp_dir = None + try: + if file_path is None: + # Probably a directory literal + # Make directory, create tar, put listing + temp_dir = tempfile.mkdtemp(prefix="file_literal_upload_dir") + base_dir = pathlib.Path(temp_dir) / value["basename"] + base_dir.mkdir() + for entry in value["listing"]: + if entry["class"] == "File": + if "contents" in entry: + with open(base_dir / entry["basename"], "w") as fh: + fh.write(entry["contents"]) + elif "path" in entry: + # if path is abspath test_data_directory is ignored, so no need to check if path is abspath + entry_path = os.path.join(test_data_directory, entry["path"]) + os.symlink(entry_path, str((base_dir / entry["path"]).resolve())) + else: + raise Exception(f"{entry['class']} unimplemented") + file_path = str(base_dir.resolve()) - if not os.path.isabs(file_path): file_path = os.path.join(test_data_directory, file_path) - tmp = tempfile.NamedTemporaryFile(delete=False) - tf = tarfile.open(fileobj=tmp, mode="w:") - tf.add(file_path, ".") - tf.close() - - return upload_tar(tmp.name) + tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".tar") + tf = tarfile.open(fileobj=tmp, mode="w:", dereference=True) + tf.add(file_path, ".") + tf.close() + finally: + if temp_dir: + shutil.rmtree(temp_dir) + upload_response = upload_tar(tmp.name) + upload_response.update(value) + return upload_response def replacement_list(value) -> Dict[str, str]: collection_element_identifiers = [] @@ -517,7 +542,12 @@ def dataset_dict_to_json_content(dataset_dict): if file_or_directory == "File": dataset_dict = get_dataset(output_metadata) - properties = output_properties(pseudo_location=pseudo_location, **dataset_dict) + properties = output_properties( + pseudo_location=pseudo_location, + cwl_formats=output_metadata["cwl_formats"], + download_url=output_metadata["download_url"], + **dataset_dict, + ) basename = properties["basename"] extra_files = get_extra_files(output_metadata) found_index = False @@ -543,7 +573,13 @@ def dir_listing(dir_path): if extra_file_class == "File": ec = get_dataset(output_metadata, filename=path) ec["basename"] = extra_file_basename - ec_properties = output_properties(pseudo_location=pseudo_location, **ec) + filename = f"{dir_path}/{extra_file_basename}" + _download_url = ( + output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(filename)}" + ) + ec_properties = output_properties( + pseudo_location=pseudo_location, download_url=_download_url, **ec + ) elif extra_file_class == "Directory": ec_properties = {} ec_properties["class"] = "Directory" @@ -571,7 +607,12 @@ def dir_listing(dir_path): if extra_file_class == "File": ec = get_dataset(output_metadata, filename=path) ec["basename"] = ec_basename - ec_properties = output_properties(pseudo_location=pseudo_location, **ec) + download_url = ( + output_metadata["download_url"] + f"?filename={urllib.parse.quote_plus(path)}" + ) + ec_properties = output_properties( + pseudo_location=pseudo_location, download_url=download_url, **ec + ) elif extra_file_class == "Directory": ec_properties = {} ec_properties["class"] = "Directory" @@ -592,6 +633,11 @@ def dir_listing(dir_path): "basename": basename, "listing": listing, } + _handle_pseudo_location( + properties, + pseudo_location=pseudo_location, + download_url=output_metadata["download_url"] + "?to_ext=directory", + ) extra_files = get_extra_files(output_metadata) for extra_file in extra_files: @@ -623,13 +669,6 @@ def dir_listing(dir_path): raise NotImplementedError("Unknown history content type encountered") -def download_output(galaxy_output, get_metadata, get_dataset, get_extra_files, output_path): - output_metadata = get_metadata(galaxy_output.history_content_type, galaxy_output.history_content_id) - dataset_dict = get_dataset(output_metadata) - with open(output_path, "wb") as fh: - fh.write(dataset_dict["content"]) - - def guess_artifact_type(path): tool_or_workflow = "workflow" path, object_id = urllib.parse.urldefrag(path) diff --git a/lib/galaxy/tool_util/parser/cwl.py b/lib/galaxy/tool_util/parser/cwl.py index 26d2fdfdc16c..5bb9f822d6c2 100644 --- a/lib/galaxy/tool_util/parser/cwl.py +++ b/lib/galaxy/tool_util/parser/cwl.py @@ -7,6 +7,7 @@ from galaxy.tool_util.cwl.parser import ( tool_proxy, + tool_proxy_from_persistent_representation, ToolProxy, ) from galaxy.tool_util.deps import requirements @@ -17,24 +18,59 @@ ToolSource, ) from .output_actions import ToolOutputActionGroup -from .output_objects import ToolOutput +from .output_collection_def import dataset_collector_descriptions_from_list +from .output_objects import ( + ToolOutput, + ToolOutputCollection, + ToolOutputCollectionStructure, +) from .stdio import ( StdioErrorLevel, ToolStdioExitCode, ) -from .yaml import YamlInputSource +from .yaml import ( + YamlInputSource, + YamlPageSource, +) + +GX_INTERFACE_NAMESPACE = "http://galaxyproject.org/cwl#interface" + +CWL_DEFAULT_FILE_OUTPUT = "data" # set to _sniff_ to sniff output types automatically. log = logging.getLogger(__name__) +def strip_namespace(ordered_dict, namespace): + if isinstance(ordered_dict, dict): + value = {} + for k, v in ordered_dict.items(): + if k.startswith(namespace): + k = k[len(namespace) :] + value[k] = strip_namespace(v, namespace) + return value + elif isinstance(ordered_dict, list): + return [strip_namespace(v, namespace) for v in ordered_dict] + return ordered_dict + + class CwlToolSource(ToolSource): language = "yaml" - def __init__(self, tool_file=None, strict_cwl_validation=True, tool_proxy: Optional[ToolProxy] = None): - self._cwl_tool_file = tool_file - self._tool_proxy = tool_proxy + def __init__( + self, + tool_file=None, + tool_object=None, + strict_cwl_validation: bool = True, + tool_directory=None, + uuid=None, + tool_proxy: Optional[ToolProxy] = None, + ): self._source_path = tool_file + self._source_object = tool_object + self._tool_proxy = tool_proxy self._strict_cwl_validation = strict_cwl_validation + self._tool_directory = tool_directory + self._uuid = uuid @property def source_path(self): @@ -43,11 +79,34 @@ def source_path(self): @property def tool_proxy(self) -> ToolProxy: if self._tool_proxy is None: - self._tool_proxy = tool_proxy(self._source_path, strict_cwl_validation=self._strict_cwl_validation) + if self._source_path is not None: + self._tool_proxy = tool_proxy( + self._source_path, + strict_cwl_validation=self._strict_cwl_validation, + tool_directory=self._tool_directory, + uuid=self._uuid, + ) + else: + assert "uuid" in self._source_object + self._tool_proxy = tool_proxy_from_persistent_representation( + self._source_object, + strict_cwl_validation=self._strict_cwl_validation, + tool_directory=self._tool_directory, + ) return self._tool_proxy + def _get_gx_interface(self): + rval = None + for h in self.tool_proxy.hints_or_requirements_of_class(GX_INTERFACE_NAMESPACE): + rval = strip_namespace(h, GX_INTERFACE_NAMESPACE[: -len("interface")]) + + return rval + def parse_tool_type(self): - return "cwl" + if self._get_gx_interface() is not None: + return "galactic_cwl" + else: + return "cwl" def parse_id(self): return self.tool_proxy.galaxy_id() @@ -95,16 +154,36 @@ def parse_strict_shell(self): def parse_stdio(self): # TODO: remove duplication with YAML - # New format - starting out just using exit code. - exit_code_lower = ToolStdioExitCode() - exit_code_lower.range_start = -math.inf - exit_code_lower.range_end = -1 - exit_code_lower.error_level = StdioErrorLevel.FATAL - exit_code_high = ToolStdioExitCode() - exit_code_high.range_start = 1 - exit_code_high.range_end = math.inf - exit_code_lower.error_level = StdioErrorLevel.FATAL - return [exit_code_lower, exit_code_high], [] + exit_codes = [] + + success_codes = sorted(set(self.tool_proxy._tool.tool.get("successCodes") or [0])) + + last_success_code = None + + for success_code in success_codes: + if last_success_code is not None and success_code == last_success_code + 1: + last_success_code = success_code + continue + + exit_code = ToolStdioExitCode() + range_start = -math.inf + if last_success_code is not None: + range_start = last_success_code + 1 + + exit_code.range_start = range_start + exit_code.range_end = success_code - 1 + exit_code.error_level = StdioErrorLevel.FATAL + exit_codes.append(exit_code) + + last_success_code = success_code + + exit_code = ToolStdioExitCode() + exit_code.range_start = last_success_code + 1 + exit_code.range_end = math.inf + exit_code.error_level = StdioErrorLevel.FATAL + exit_codes.append(exit_code) + + return exit_codes, [] def parse_interpreter(self): return None @@ -119,26 +198,47 @@ def parse_interactivetool(self): return [] def parse_input_pages(self) -> PagesSource: - page_source = CwlPageSource(self.tool_proxy) + gx_interface = self._get_gx_interface() + if gx_interface is None: + page_source: PageSource = CwlPageSource(self.tool_proxy) + else: + page_source = YamlPageSource(gx_interface["inputs"]) return PagesSource([page_source]) def parse_outputs(self, tool): output_instances = self.tool_proxy.output_instances() outputs = {} + output_collections = {} output_defs = [] for output_instance in output_instances: output_defs.append(self._parse_output(tool, output_instance)) + # TODO: parse outputs collections for output_def in output_defs: - outputs[output_def.name] = output_def - return outputs, {} + if isinstance(output_def, ToolOutput): + outputs[output_def.name] = output_def + else: + outputs[output_def.name] = output_def + output_collections[output_def.name] = output_def + return outputs, output_collections def _parse_output(self, tool, output_instance): + output_type = output_instance.output_data_type + if isinstance(output_type, dict) and output_type.get("type") == "record": + return self._parse_output_record(tool, output_instance) + elif isinstance(output_type, dict) and output_type.get("type") == "array": + return self._parse_output_array(tool, output_instance) + else: + return self._parse_output_data(tool, output_instance) + + def _parse_output_data(self, tool, output_instance): name = output_instance.name # TODO: handle filters, actions, change_format output = ToolOutput(name) if "File" in output_instance.output_data_type: - output.format = "_sniff_" + output.format = CWL_DEFAULT_FILE_OUTPUT + elif "Directory" in output_instance.output_data_type: + output.format = "directory" else: output.format = "expression.json" output.change_format = [] @@ -154,6 +254,35 @@ def _parse_output(self, tool, output_instance): output.actions = ToolOutputActionGroup(output, None) return output + def _parse_output_record(self, tool, output_instance): + name = output_instance.name + # TODO: clean output bindings and other non-structure information + # from this. + fields = output_instance.output_data_type.get("fields") + output_collection = ToolOutputCollection( + name, + ToolOutputCollectionStructure( + collection_type="record", + fields=fields, + ), + ) + return output_collection + + def _parse_output_array(self, tool, output_instance): + name = output_instance.name + # TODO: Handle nested arrays and such... + dataset_collector_descriptions = dataset_collector_descriptions_from_list( + [{"from_provided_metadata": True}], + ) + output_collection = ToolOutputCollection( + name, + ToolOutputCollectionStructure( + collection_type="list", + dataset_collector_descriptions=dataset_collector_descriptions, + ), + ) + return output_collection + def parse_requirements_and_containers(self): containers = [] docker_identifier = self.tool_proxy.docker_identifier() @@ -174,6 +303,16 @@ def parse_profile(self): def parse_xrefs(self): return [] + def parse_provided_metadata_style(self): + return "default" + + def parse_cores_min(self): + for h in self.tool_proxy.hints_or_requirements_of_class("ResourceRequirement"): + cores_min = h.get("coresMin") + if cores_min: + return cores_min + return 1 + def parse_license(self): return None diff --git a/lib/galaxy/tool_util/parser/factory.py b/lib/galaxy/tool_util/parser/factory.py index ce403638f7f0..16c07b969d71 100644 --- a/lib/galaxy/tool_util/parser/factory.py +++ b/lib/galaxy/tool_util/parser/factory.py @@ -7,9 +7,11 @@ List, Optional, ) +from uuid import uuid4 from yaml import safe_load +from galaxy.tool_util.cwl.parser import tool_proxy_from_persistent_representation from galaxy.tool_util.loader import load_tool_with_refereces from galaxy.util import ( ElementTree, @@ -17,10 +19,7 @@ ) from galaxy.util.path import StrPath from galaxy.util.yaml_util import ordered_load -from .cwl import ( - CwlToolSource, - tool_proxy, -) +from .cwl import CwlToolSource from .interface import ( InputSource, ToolSource, @@ -42,9 +41,8 @@ def build_xml_tool_source(xml_string: str) -> XmlToolSource: return XmlToolSource(parse_xml_string_to_etree(xml_string)) -def build_cwl_tool_source(yaml_string: str) -> CwlToolSource: - proxy = tool_proxy(tool_object=safe_load(yaml_string)) - # regular CwlToolSource sets basename as tool id, but that's not going to cut it in production +def build_cwl_tool_source(persistent_representation: str) -> CwlToolSource: + proxy = tool_proxy_from_persistent_representation(persistent_representation) return CwlToolSource(tool_proxy=proxy) @@ -65,6 +63,8 @@ def get_tool_source( enable_beta_formats: bool = True, tool_location_fetcher: Optional[ToolLocationFetcher] = None, macro_paths: Optional[List[str]] = None, + strict_cwl_validation: bool = True, + uuid: Optional[str] = None, tool_source_class: Optional[str] = None, raw_tool_source: Optional[str] = None, ) -> ToolSource: @@ -100,21 +100,33 @@ def get_tool_source( return YamlToolSource(as_dict, source_path=config_file) elif config_file.endswith(".json") or config_file.endswith(".cwl"): log.info( - "Loading CWL tool - this is experimental - tool likely will not function in future at least in same way." + "Loading CWL tool [%s]. This is experimental - tool likely will not function in future at least in same way.", + config_file, ) - return CwlToolSource(config_file) + uuid = uuid or str(uuid4()) + return CwlToolSource(config_file, strict_cwl_validation=strict_cwl_validation, uuid=uuid) else: tree, macro_paths = load_tool_with_refereces(config_file) return XmlToolSource(tree, source_path=config_file, macro_paths=macro_paths) -def get_tool_source_from_representation(tool_format, tool_representation): +def get_tool_source_from_representation( + tool_format, tool_representation, strict_cwl_validation=True, tool_directory=None, uuid=None +): + # TODO: PRE-MERGE - ensure strict_cwl_validation is being set on caller - ignored right now. # TODO: make sure whatever is consuming this method uses ordered load. log.info("Loading dynamic tool - this is experimental - tool may not function in future.") if tool_format == "GalaxyTool": if "version" not in tool_representation: tool_representation["version"] = "1.0.0" # Don't require version for embedded tools. return YamlToolSource(tool_representation) + elif tool_format in ["CommandLineTool", "ExpressionTool"]: + return CwlToolSource( + tool_object=tool_representation, + strict_cwl_validation=strict_cwl_validation, + tool_directory=tool_directory, + uuid=uuid, + ) else: raise Exception(f"Unknown tool representation format [{tool_format}].") diff --git a/lib/galaxy/tool_util/parser/interface.py b/lib/galaxy/tool_util/parser/interface.py index af72bf4a4825..695841cbc629 100644 --- a/lib/galaxy/tool_util/parser/interface.py +++ b/lib/galaxy/tool_util/parser/interface.py @@ -375,6 +375,10 @@ def parse_python_template_version(self) -> Optional[packaging.version.Version]: Return minimum python version that the tool template has been developed against. """ + def parse_cores_min(self) -> Union[float, int, str]: + """Return minimum number of cores required to run this tool.""" + return 1 + def parse_creator(self): """Return list of metadata relating to creator/author of tool. diff --git a/lib/galaxy/tool_util/parser/output_objects.py b/lib/galaxy/tool_util/parser/output_objects.py index 63148c1fb946..7825d6308197 100644 --- a/lib/galaxy/tool_util/parser/output_objects.py +++ b/lib/galaxy/tool_util/parser/output_objects.py @@ -402,12 +402,14 @@ def __init__( collection_type_from_rules: Optional[str] = None, structured_like: Optional[str] = None, dataset_collector_descriptions: Optional[List[DatasetCollectionDescription]] = None, + fields=None, ) -> None: self.collection_type = collection_type self.collection_type_source = collection_type_source self.collection_type_from_rules = collection_type_from_rules self.structured_like = structured_like self.dataset_collector_descriptions = dataset_collector_descriptions or [] + self.fields = fields if collection_type and collection_type_source: raise ValueError("Cannot set both type and type_source on collection output.") if ( @@ -424,6 +426,10 @@ def __init__( raise ValueError( "Cannot specify dynamic structure (discover_datasets) and collection type attributes structured_like or collection_type_from_rules." ) + if collection_type == "record" and fields is None: + raise ValueError("If record outputs are defined, fields must be defined as well.") + if fields is not None and collection_type != "record": + raise ValueError("If fields are specified for outputs, the collection type must be record.") self.dynamic = bool(dataset_collector_descriptions) def collection_prototype(self, inputs, type_registry): @@ -433,7 +439,7 @@ def collection_prototype(self, inputs, type_registry): else: collection_type = self.collection_type assert collection_type - collection_prototype = type_registry.prototype(collection_type) + collection_prototype = type_registry.prototype(collection_type, fields=self.fields) collection_prototype.collection_type = collection_type return collection_prototype diff --git a/lib/galaxy/tool_util/parser/yaml.py b/lib/galaxy/tool_util/parser/yaml.py index 88be9c72846a..d7564916ca3a 100644 --- a/lib/galaxy/tool_util/parser/yaml.py +++ b/lib/galaxy/tool_util/parser/yaml.py @@ -401,6 +401,9 @@ def parse_default(self) -> Optional[Dict[str, Any]]: default_def = input_dict.get("default", None) return default_def + def parse_map_to(self): + return self.input_dict.get("mapTo") + def _ensure_has(dict, defaults): for key, value in defaults.items(): diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 2e352e7c0f39..75f39467307d 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -8,6 +8,7 @@ import math import os import re +import shlex import tarfile import tempfile from collections.abc import MutableMapping @@ -54,6 +55,10 @@ from galaxy.model.dataset_collections.matching import MatchingCollections from galaxy.tool_shed.util.repository_util import get_installed_repository from galaxy.tool_shed.util.shed_util_common import set_image_paths +from galaxy.tool_util.cwl import ( + needs_shell_quoting, + to_galaxy_parameters, +) from galaxy.tool_util.deps import ( build_dependency_manager, CachedDependencyManager, @@ -163,6 +168,7 @@ parse_xml_string, parse_xml_string_to_etree, rst_to_html, + safe_makedirs, string_as_bool, unicodify, UNKNOWN, @@ -256,6 +262,7 @@ "CONVERTER_gff_to_interval_index_0", "CONVERTER_maf_to_fasta_0", "CONVERTER_maf_to_interval_0", + "CONVERTER_tar_to_directory", # WORKAROUND FOR CWL BRANCH! GOTTA FIX THIS I GUESS # Tools improperly migrated to the tool shed (devteam) "qualityFilter", "pileup_interval", @@ -590,6 +597,7 @@ def get_expanded_tool_source(self, config_file, **kwargs): config_file, enable_beta_formats=getattr(self.app.config, "enable_beta_tool_formats", False), tool_location_fetcher=self.tool_location_fetcher, + strict_cwl_validation=getattr(self.app.config, "strict_cwl_validation", True), **kwargs, ) except Exception as e: @@ -605,10 +613,26 @@ def create_dynamic_tool(self, dynamic_tool, **kwds): tool_representation = dynamic_tool.value if "name" not in tool_representation: tool_representation["name"] = f"dynamic tool {dynamic_tool.uuid}" - tool_source = get_tool_source_from_representation( + strict_cwl_validation = getattr(self.app.config, "strict_cwl_validation", True) + get_source_kwds = dict( tool_format=tool_format, tool_representation=tool_representation, + strict_cwl_validation=strict_cwl_validation, + uuid=dynamic_tool.uuid, ) + if dynamic_tool.tool_directory: + get_source_kwds["tool_directory"] = dynamic_tool.tool_directory + if dynamic_tool.tool_path: + config_file = dynamic_tool.tool_path + # TODO: uuid probably needed here... + tool_source = get_tool_source( + config_file, + enable_beta_formats=getattr(self.app.config, "enable_beta_tool_formats", True), + tool_location_fetcher=self.tool_location_fetcher, + strict_cwl_validation=strict_cwl_validation, + ) + else: + tool_source = get_tool_source_from_representation(**get_source_kwds) kwds["dynamic"] = True tool = self._create_tool_from_source(tool_source, **kwds) tool.dynamic_tool = dynamic_tool @@ -778,6 +802,7 @@ class Tool(UsesDictVisibleKeys): __help: Optional[Template] job_search: "JobSearch" version: str + may_use_container_entry_point = False def __init__( self, @@ -1253,6 +1278,7 @@ def parse(self, tool_source: ToolSource, guid: Optional[str] = None, dynamic: bo self.ports = tool_source.parse_interactivetool() self._is_workflow_compatible = self.check_workflow_compatible(self.tool_source) + self.cores_min = tool_source.parse_cores_min() def __parse_legacy_features(self, tool_source: ToolSource): self.code_namespace: Dict[str, str] = {} @@ -1782,6 +1808,22 @@ def check_workflow_compatible(self, tool_source): # outputs? return True + def inputs_from_dict(self, as_dict): + """Extra inputs from input dictionary (e.g. API payload). + + Translate for tool type as needed. + """ + inputs = as_dict.get("inputs", {}) + if not isinstance(inputs, dict): + raise exceptions.RequestParameterInvalidException(f"inputs invalid [{inputs}]") + inputs_representation = as_dict.get("inputs_representation", "galaxy") + if inputs_representation != "galaxy": + raise exceptions.RequestParameterInvalidException( + "Only galaxy inputs representation is allowed for normal tools." + ) + # TODO: Consider <>. + return inputs + def new_state(self, trans): """ Create a new `DefaultToolState` for this tool. It will be initialized @@ -2566,7 +2608,9 @@ def to_dict(self, trans, link_details=False, io_details=False, tool_help=False): tool_class = self.__class__ # FIXME: the Tool class should declare directly, instead of ad hoc inspection - regular_form = tool_class == Tool or isinstance(self, (DatabaseOperationTool, InteractiveTool)) + regular_form = tool_class == Tool or isinstance( + self, (DatabaseOperationTool, InteractiveTool, CwlCommandBindingTool) + ) tool_dict["form_style"] = "regular" if regular_form else "special" if tool_help: # create tool help @@ -3211,6 +3255,134 @@ def job_failed(self, job_wrapper, message, exception=False): self.__remove_interactivetool_by_job(job) +class CwlCommandBindingTool(Tool): + """Tools that use CWL to bind parameters to command-line descriptions.""" + + def exec_before_job(self, app, inp_data, out_data, param_dict=None): + super().exec_before_job(app, inp_data, out_data, param_dict=param_dict) + # Working directory on Galaxy server (instead of remote compute). + local_working_directory = param_dict["__local_working_directory__"] + log.info("exec_before_job for CWL tool") + if param_dict is None: + raise Exception("Internal error - param_dict is empty.") + + input_json = self.param_dict_to_cwl_inputs(param_dict, local_working_directory) + + output_dict = {} + for name, dataset in out_data.items(): + output_dict[name] = { + "id": str(getattr(dataset.dataset, dataset.dataset.store_by)), + "path": dataset.get_file_name(), + } + + # prevent unset optional file to trigger 'ValidationException' exception + input_json = { + k: v + for k, v in input_json.items() + if not (isinstance(v, dict) and v.get("class") == "File" and v.get("location") == "None") + } + + # prevent empty string + # this really seems wrong -John + input_json = {k: v for k, v in input_json.items() if v != ""} + + cwl_job_proxy = self._cwl_tool_proxy.job_proxy( + input_json, + output_dict, + local_working_directory, + ) + cwl_command_line = cwl_job_proxy.command_line + cwl_stdin = cwl_job_proxy.stdin + cwl_stdout = cwl_job_proxy.stdout + cwl_stderr = cwl_job_proxy.stderr + env = cwl_job_proxy.environment + + def needs_shell_quoting_hack(arg): + if arg == "$GALAXY_SLOTS": + return False + else: + return needs_shell_quoting(arg) + + command_line = " ".join(shlex.quote(arg) if needs_shell_quoting_hack(arg) else arg for arg in cwl_command_line) + if cwl_stdin: + command_line += ' < "' + cwl_stdin + '"' + if cwl_stdout: + command_line += ' > "' + cwl_stdout + '"' + if cwl_stderr: + command_line += ' 2> "' + cwl_stderr + '"' + cwl_job_state = { + "args": cwl_command_line, + "stdin": cwl_stdin, + "stdout": cwl_stdout, + "stderr": cwl_stderr, + "env": env, + } + tool_working_directory = os.path.join(local_working_directory, "working") + # Move to prepare... + safe_makedirs(tool_working_directory) + cwl_job_proxy.stage_files() + + cwl_job_proxy.rewrite_inputs_for_staging() + # Write representation to disk that can be reloaded at runtime + # and outputs collected before Galaxy metadata is gathered. + cwl_job_proxy.save_job() + + param_dict["__cwl_command"] = command_line + param_dict["__cwl_command_state"] = cwl_job_state + param_dict["__cwl_command_version"] = 1 + log.info("CwlTool.exec_before_job() generated command_line %s", command_line) + + def parse(self, tool_source, guid=None, dynamic=False): + super().parse(tool_source, guid=guid, dynamic=dynamic) + cwl_tool_proxy = getattr(tool_source, "tool_proxy", None) + if cwl_tool_proxy is None: + raise Exception("parse() called on tool source not defining a proxy object to underlying CWL tool.") + self._cwl_tool_proxy = cwl_tool_proxy + + def param_dict_to_cwl_inputs(self, param_dict, local_working_directory): + """Map Galaxy API inputs description to a CWL job json.""" + raise NotImplementedError() + + +class GalacticCwlTool(CwlCommandBindingTool): + """A CWL tool with a gx:Interface defined so Galaxy tool state can be used.""" + + tool_type = "galactic_cwl" + + def param_dict_to_cwl_inputs(self, param_dict, local_working_directory): + from galaxy.tool_util.cwl.representation import galactic_flavored_to_cwl_job + + input_json = galactic_flavored_to_cwl_job(self, param_dict, local_working_directory) + return input_json + + +class CwlTool(CwlCommandBindingTool): + tool_type = "cwl" + may_use_container_entry_point = True + + def param_dict_to_cwl_inputs(self, param_dict, local_working_directory): + """Map Galaxy API inputs description to a CWL job json.""" + from galaxy.tool_util.cwl import to_cwl_job + + input_json = to_cwl_job(self, param_dict, local_working_directory) + return input_json + + def inputs_from_dict(self, as_dict): + """Extra inputs from input dictionary (e.g. API payload). + + Translate for tool type as needed. + """ + inputs = as_dict.get("inputs", {}) + inputs_representation = as_dict.get("inputs_representation", "galaxy") + if inputs_representation not in ["galaxy", "cwl"]: + raise exceptions.RequestParameterInvalidException("Inputs representation must be galaxy or cwl.") + + if inputs_representation == "cwl": + inputs = to_galaxy_parameters(self, inputs) + + return inputs + + class DataManagerTool(OutputParameterJSONTool): tool_type = "manage_data" default_tool_action = DataManagerToolAction @@ -4169,6 +4341,8 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history BuildListCollectionTool, ExtractDatasetCollectionTool, DataDestinationTool, + CwlTool, + GalacticCwlTool, ] tool_types = {tool_class.tool_type: tool_class for tool_class in TOOL_CLASSES} diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index 841eea988d49..50563eb55bcf 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -153,7 +153,9 @@ def record_permission(action, role_id): def visitor(input, value, prefix, prefixed_name: str, parent=None, **kwargs): def process_dataset(data, formats=None): - if not data or isinstance(data, RuntimeValue): + # default file coming from a workflow + is_workflow_default = isinstance(data, dict) and data.get("class") == "File" + if not data or isinstance(data, RuntimeValue) or is_workflow_default: return None if formats is None: formats = input.formats @@ -679,7 +681,10 @@ def handle_output(name, output, hidden=None): assert not element_identifiers # known_outputs must have been empty element_kwds = dict(elements=collections_manager.ELEMENTS_UNINITIALIZED) else: - element_kwds = dict(element_identifiers=element_identifiers) + element_kwds = dict( + element_identifiers=element_identifiers, + fields=output.structure.fields, + ) output_collections.create_collection( output=output, name=name, completed_job=completed_job, **element_kwds ) @@ -694,7 +699,7 @@ def handle_output(name, output, hidden=None): ) # Add all the top-level (non-child) datasets to the history unless otherwise specified for name, data in out_data.items(): - if name not in incoming and name not in child_dataset_names: + if getattr(data, "hid", None) is None or (name not in incoming and name not in child_dataset_names): # don't add already existing datasets, i.e. async created history.stage_addition(data) history.add_pending_items(set_output_hid=set_output_hid) @@ -927,6 +932,9 @@ def _record_inputs(self, trans, tool, job, incoming, inp_data, inp_dataset_colle reductions[name] = [] reductions[name].append(dataset_collection) + if getattr(dataset_collection, "ephemeral", False): + dataset_collection = dataset_collection.persistent_object + # TODO: verify can have multiple with same name, don't want to lose traceability if isinstance(dataset_collection, model.HistoryDatasetCollectionAssociation): job.add_input_dataset_collection(name, dataset_collection) diff --git a/lib/galaxy/tools/actions/upload.py b/lib/galaxy/tools/actions/upload.py index c758c96b2ae7..8fe7b7cae4d8 100644 --- a/lib/galaxy/tools/actions/upload.py +++ b/lib/galaxy/tools/actions/upload.py @@ -63,6 +63,11 @@ def execute( assert dataset_upload_inputs, Exception("No dataset upload groups were found.") persisting_uploads_timer = ExecutionTimer() + if incoming.get("file_type") == "auto" and incoming.get("cwl_format"): + cwl_format = incoming["cwl_format"] + ext = trans.app.datatypes_registry.get_datatype_ext_by_format_ontology(cwl_format, only_uploadable=True) + if ext: + incoming["file_type"] = ext incoming = upload_common.persist_uploads(incoming, trans) log.debug(f"Persisted uploads {persisting_uploads_timer}") rval = self._setup_job(tool, trans, incoming, dataset_upload_inputs, history) diff --git a/lib/galaxy/tools/actions/upload_common.py b/lib/galaxy/tools/actions/upload_common.py index 924bdc64a3b5..f6cdb8507d4a 100644 --- a/lib/galaxy/tools/actions/upload_common.py +++ b/lib/galaxy/tools/actions/upload_common.py @@ -39,7 +39,13 @@ def validate_datatype_extension(datatypes_registry, ext): if ext and ext not in ("auto", "data") and not datatypes_registry.get_datatype_by_extension(ext): + # also allow putting in data ontology ... might want to model this as a separate input + if ":" in ext: + ext_by_ontology = datatypes_registry.get_datatype_ext_by_format_ontology(ext) + if ext_by_ontology: + return ext_by_ontology raise RequestParameterInvalidException(f"Requested extension '{ext}' unknown, cannot upload dataset.") + return ext def persist_uploads(params, trans): diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index 8c25990ea73b..0f117b55ad5c 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -331,11 +331,11 @@ def _resolve_item_with_primary(item): elif not link_data_only: path = upload_config.ensure_in_working_directory(path, purge_source, in_place) + extra_files_path = f"{path}_extra" extra_files = item.get("extra_files") if extra_files: # TODO: optimize to just copy the whole directory to extra files instead. assert not upload_config.link_data_only, "linking composite dataset files not yet implemented" - extra_files_path = f"{path}_extra" staged_extra_files = extra_files_path os.mkdir(extra_files_path) @@ -375,6 +375,10 @@ def walk_extra_files(items, prefix=""): assert path datatype.groom_dataset_content(path) + if ext == "directory" and not deferred and path: + CompressedFile(path).extract(extra_files_path) + staged_extra_files = extra_files_path + if len(transform) > 0: source_dict["transform"] = transform elif not error_message: diff --git a/lib/galaxy/tools/evaluation.py b/lib/galaxy/tools/evaluation.py index 582bd65be06e..4fce36d8f43e 100644 --- a/lib/galaxy/tools/evaluation.py +++ b/lib/galaxy/tools/evaluation.py @@ -40,6 +40,7 @@ from galaxy.tools.parameters.basic import ( DataCollectionToolParameter, DataToolParameter, + FieldTypeToolParameter, SelectToolParameter, ) from galaxy.tools.parameters.grouping import ( @@ -75,6 +76,7 @@ from galaxy.tools import Tool log = logging.getLogger(__name__) +CWL_TOOL_TYPES = ("galactic_cwl", "cwl") class ToolErrorLog: @@ -208,7 +210,12 @@ def build_param_dict(self, incoming, input_datasets, output_datasets, output_col compute_environment = self.compute_environment job_working_directory = compute_environment.working_directory() - param_dict = TreeDict(self.param_dict) + if self.tool.tool_type == "cwl": + param_dict: Union[Dict[str, Any], TreeDict] = self.param_dict + else: + # TreeDict provides a way to access parameters without their fully qualified path, + # we only need this for Galaxy tools. + param_dict = TreeDict(self.param_dict) param_dict["__datatypes_config__"] = param_dict["GALAXY_DATATYPES_CONF_FILE"] = os.path.join( job_working_directory, "registry.xml" @@ -222,6 +229,10 @@ def build_param_dict(self, incoming, input_datasets, output_datasets, output_col self.__populate_wrappers(param_dict, input_datasets, job_working_directory) self.__populate_input_dataset_wrappers(param_dict, input_datasets) + if self.tool.tool_type == "cwl": + # don't need the outputs or the sanitization: + param_dict["__local_working_directory__"] = self.local_working_directory + return param_dict self.__populate_output_dataset_wrappers(param_dict, output_datasets, job_working_directory) self.__populate_output_collection_wrappers(param_dict, output_collections, job_working_directory) self.__populate_unstructured_path_rewrites(param_dict) @@ -235,6 +246,9 @@ def build_param_dict(self, incoming, input_datasets, output_datasets, output_col if self.job.tool_id == "upload1": param_dict["paramfile"] = os.path.join(job_working_directory, "upload_params.json") + if not isinstance(param_dict, TreeDict): + return param_dict + if "input" not in param_dict.data: def input(): @@ -401,6 +415,34 @@ def wrap_input(input_values, input): ) wrapper = DatasetCollectionWrapper(job_working_directory, dataset_collection, **wrapper_kwds) input_values[input.name] = wrapper + elif isinstance(input, FieldTypeToolParameter): + field_wrapper: Optional[Union[InputValueWrapper, DatasetFilenameWrapper, DatasetCollectionWrapper]] = ( + None + ) + if value: + assert "value" in value, value + assert "src" in value, value + src = value["src"] + if src == "json": + field_wrapper = InputValueWrapper(input, value, param_dict) + elif src == "hda": + field_wrapper = DatasetFilenameWrapper( + value["value"], + datatypes_registry=self.app.datatypes_registry, + tool=self.tool, + name=input.name, + ) + elif src == "hdca": + field_wrapper = DatasetCollectionWrapper( + job_working_directory=job_working_directory, + has_collection=value["value"], + datatypes_registry=self.app.datatypes_registry, + tool=self.tool, + name=input.name, + ) + else: + raise ValueError(f"src should be 'json' or 'hda' or 'hdca' but is '{src}'") + input_values[input.name] = field_wrapper elif isinstance(input, SelectToolParameter): if input.multiple: value = listify(value) @@ -643,22 +685,28 @@ def _build_command_line(self): command_line = None if not command: return - try: - # Substituting parameters into the command - command_line = fill_template( - command, context=param_dict, python_template_version=self.tool.python_template_version - ) - cleaned_command_line = [] - # Remove leading and trailing whitespace from each line for readability. - for line in command_line.split("\n"): - cleaned_command_line.append(line.strip()) - command_line = "\n".join(cleaned_command_line) - # Remove newlines from command line, and any leading/trailing white space - command_line = command_line.replace("\n", " ").replace("\r", " ").strip() - except Exception: - # Modify exception message to be more clear - # e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ), ) - raise + + # TODO: this approach replaces specifies a command block as $__cwl_command_state + # and that other approach needs to be unraveled. + if self.tool.tool_type in CWL_TOOL_TYPES and "__cwl_command" in param_dict: + command_line = param_dict["__cwl_command"] + else: + try: + # Substituting parameters into the command + command_line = fill_template( + command, context=param_dict, python_template_version=self.tool.python_template_version + ) + cleaned_command_line = [] + # Remove leading and trailing whitespace from each line for readability. + for line in command_line.split("\n"): + cleaned_command_line.append(line.strip()) + command_line = "\n".join(cleaned_command_line) + # Remove newlines from command line, and any leading/trailing white space + command_line = command_line.replace("\n", " ").replace("\r", " ").strip() + except Exception: + # Modify exception message to be more clear + # e.args = ( 'Error substituting into command line. Params: %r, Command: %s' % ( param_dict, self.command ), ) + raise if interpreter: # TODO: path munging for cluster/dataset server relocatability executable = command_line.split()[0] @@ -679,8 +727,11 @@ def _build_config_files(self): """ Build temporary file for file based parameter transfer if needed """ + config_filenames: List[str] = [] + if self.tool.tool_type in CWL_TOOL_TYPES: + # will never happen for cwl tools + return config_filenames param_dict = self.param_dict - config_filenames = [] for name, filename, content in self.tool.config_files: config_text, is_template = self.__build_config_file_text(content) # If a particular filename was forced by the config use it @@ -700,7 +751,12 @@ def _build_config_files(self): def _build_environment_variables(self): param_dict = self.param_dict environment_variables = self.environment_variables - for environment_variable_def in self.tool.environment_variables: + environment_variables_raw = self.tool.environment_variables + for key, value in param_dict.get("__cwl_command_state", {}).get("env", {}).items(): + environment_variable = dict(name=key, template=value) + environment_variables_raw.append(environment_variable) + + for environment_variable_def in environment_variables_raw: directory = self.local_working_directory environment_variable = environment_variable_def.copy() environment_variable_template = environment_variable_def["template"] @@ -729,7 +785,8 @@ def _build_environment_variables(self): environment_variable_template = "" is_template = False else: - is_template = True + # cwl tools should not template out values + is_template = self.tool.tool_type not in CWL_TOOL_TYPES with tempfile.NamedTemporaryFile(dir=directory, prefix="tool_env_", delete=False) as temp: config_filename = temp.name self.__write_workdir_file( diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py index 7cd2d814bac2..a5e144b19f1b 100644 --- a/lib/galaxy/tools/execute.py +++ b/lib/galaxy/tools/execute.py @@ -325,9 +325,11 @@ def record_error(self, error): def on_text(self) -> Optional[str]: collection_info = self.collection_info if self._on_text is None and collection_info is not None: - collection_names = [f"collection {c.hid}" for c in collection_info.collections.values()] - self._on_text = on_text_for_names(collection_names) - + if not collection_info.uses_ephemeral_collections: + collection_names = [f"collection {c.hid}" for c in collection_info.collections.values()] + self._on_text = on_text_for_names(collection_names) + else: + self._on_text = "implicitly created collection from inputs" return self._on_text def output_name(self, trans, history, params, output): diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 2a150806d60b..804ab0cae6ef 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -199,6 +199,10 @@ def __init__(self, tool, input_source, context=None): else: self.sanitizer = None self.validators = validation.to_validators(tool.app if tool else None, input_source.parse_validators()) + if hasattr(input_source, "parse_map_to"): + self.map_to = input_source.parse_map_to() + else: + self.map_to = None @property def visible(self) -> bool: @@ -1918,6 +1922,12 @@ def get_initial_value(self, trans, other_values): return hdca def to_json(self, value, app, use_security): + if getattr(value, "ephemeral", False): + # wf_wc_scatter_multiple_flattened + value = value.persistent_object + if value.id is None: + app.model.context.add(value) + app.model.context.flush() if value not in [None, "", "None"]: if isinstance(value, list) and len(value) > 0: @@ -2696,7 +2706,9 @@ def raw_to_galaxy( object_class = as_dict_value["class"] if object_class == "File": # TODO: relative_to = "/" - location = as_dict_value.get("location") + location = as_dict_value.get("location") or as_dict_value.get("path") + assert location + assert os.path.exists(location[len("file://")]) name = ( as_dict_value.get("identifier") or as_dict_value.get("basename") @@ -2777,6 +2789,88 @@ def write_elements_to_collection(has_elements, collection_builder): return hdca +class FieldTypeToolParameter(ToolParameter): + """CWL field type defined parameter source.""" + + def __init__(self, tool, input_source, context=None): + input_source = ensure_input_source(input_source) + super().__init__(tool, input_source) + # self.field_type = input_source.parse_field_type() + + def from_json(self, value, trans, other_values=None): + if trans.workflow_building_mode is workflow_building_modes.ENABLED: + return None + + if value is None: + return None + + if not isinstance(value, dict) or "src" not in value: + value = {"src": "json", "value": value} + elif value.get("class") == "File": + return raw_to_galaxy(trans.app, trans.history, value) + return self.to_python(value, trans.app) + + def to_json(self, value, app, use_security): + """Convert a value to a string representation suitable for persisting""" + assert isinstance(value, dict) + assert "src" in value + return value + + def to_python(self, value, app): + """Convert a value created with to_json back to an object representation""" + if value is None: + return None + # return super(FieldTypeToolParameter, self).to_python(value, app) + if not isinstance(value, dict): + value = json.loads(value) + assert isinstance(value, dict) + assert "src" in value or "class" in value + if "src" in value: + src = value["src"] + if "value" in value: + # We have an expanded value, not an ID + return value + elif src in ["hda", "hdca", "dce"]: + id = value["id"] if isinstance(value["id"], int) else app.security.decode_id(value["id"]) + if src == "dce": + value = app.model.context.query(app.model.DatasetCollectionElement).get(id) + elif src == "hdca": + value = app.model.context.query(app.model.HistoryDatasetCollectionAssociation).get(id) + else: + value = app.model.context.query(app.model.HistoryDatasetAssociation).get(id) + + return {"src": src, "value": value} + # Reaching this if we have a default filex + return value + + def value_to_basic(self, value, app, use_security=False): + log.info(f"value_to_basic of {value} ({type(value)})") + if is_runtime_value(value): + return runtime_to_json(value) + + if value is None: + return None + + if isinstance(value, dict): + if "src" in value: + src = value["src"] + if src in ["hda", "hdca", "dce"]: + id = value["value"].id if not use_security else app.security.encode_id(value["value"].id) + value = {"src": src, "id": id} + else: + # Default file + assert "class" in value + + else: + value = {"src": "json", "value": value} + + return json.dumps(value) + + def value_from_basic(self, value, app, ignore_errors=False): + return super().value_from_basic(value, app, ignore_errors) + # return json.loads(value) + + parameter_types = dict( text=TextToolParameter, integer=IntegerToolParameter, @@ -2797,6 +2891,7 @@ def write_elements_to_collection(has_elements, collection_builder): rules=RulesListToolParameter, directory_uri=DirectoryUriToolParameter, drill_down=DrillDownSelectToolParameter, + field=FieldTypeToolParameter, ) @@ -2824,4 +2919,7 @@ def history_item_to_json(value, app, use_security): src = "hda" if src is not None: object_id = cached_id(value) - return {"id": app.security.encode_id(object_id) if use_security else object_id, "src": src} + new_val = getattr(value, "extra_params", {}) + new_val["id"] = app.security.encode_id(object_id) if use_security else object_id + new_val["src"] = src + return new_val diff --git a/lib/galaxy/tools/parameters/grouping.py b/lib/galaxy/tools/parameters/grouping.py index 26b4e171c0de..46f754bd4e35 100644 --- a/lib/galaxy/tools/parameters/grouping.py +++ b/lib/galaxy/tools/parameters/grouping.py @@ -295,11 +295,14 @@ def get_composite_dataset_name(self, context): if dataset_name is None: filenames = [] for composite_file in context.get("files", []): + if dataset_name is None and composite_file.get("NAME", None) is not None: + dataset_name = composite_file.get("NAME") if not composite_file.get("ftp_files", ""): filenames.append((composite_file.get("file_data") or {}).get("filename", "")) else: filenames.append(composite_file.get("ftp_files", [])[0]) - dataset_name = os.path.commonprefix(filenames).rstrip(".") or None + if dataset_name is None: + dataset_name = os.path.commonprefix(filenames).rstrip(".") or None if dataset_name is None: dataset_name = f"Uploaded Composite Dataset ({self.get_file_type(context)})" return dataset_name @@ -830,6 +833,10 @@ def nested_to_dict(input): cond_dict["test_param"] = nested_to_dict(self.test_param) return cond_dict + @property + def case_strings(self): + return [c.value for c in self.cases] + class ConditionalWhen(UsesDictVisibleKeys): dict_collection_visible_keys = ["value"] diff --git a/lib/galaxy/tools/parameters/wrapped.py b/lib/galaxy/tools/parameters/wrapped.py index d23e9be5edf9..4b3c4bbc9b94 100644 --- a/lib/galaxy/tools/parameters/wrapped.py +++ b/lib/galaxy/tools/parameters/wrapped.py @@ -10,6 +10,7 @@ from galaxy.tools.parameters.basic import ( DataCollectionToolParameter, DataToolParameter, + FieldTypeToolParameter, SelectToolParameter, ) from galaxy.tools.parameters.grouping import ( @@ -134,6 +135,32 @@ def wrap_values(self, inputs, input_values, skip_missing_values=False): tool=tool, name=input.name, ) + elif isinstance(input, FieldTypeToolParameter): + if value is None: + return None + + if not isinstance(value, dict): + raise Exception(f"Simple values [{input}] need to be wrapped in a JSON envelope") + + assert "value" in value, value + assert "src" in value + src = value["src"] + if src == "json": + input_values[input.name] = InputValueWrapper(input, value["value"], incoming) + elif src == "hda": + input_values[input.name] = DatasetFilenameWrapper( + value["value"], datatypes_registry=trans.app.datatypes_registry, tool=tool, name=input.name + ) + elif src == "hdca": + input_values[input.name] = DatasetCollectionWrapper( + None, + value["value"], + datatypes_registry=trans.app.datatypes_registry, + tool=tool, + name=input.name, + ) + else: + raise AssertionError(f"Unknown src encountered [{src}] for field type value [{value}]") else: input_values[input.name] = InputValueWrapper(input, value, incoming, tool.profile) diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py index 12d3d779cafc..93edcbc2805f 100644 --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -8,6 +8,7 @@ Any, cast, Dict, + ItemsView, Iterable, Iterator, KeysView, @@ -69,7 +70,7 @@ class ToolParameterValueWrapper: Base class for object that Wraps a Tool Parameter and Value. """ - value: Optional[Union[str, List[str]]] + value: Any input: "ToolParameter" def __bool__(self) -> bool: @@ -123,7 +124,7 @@ class InputValueWrapper(ToolParameterValueWrapper): def __init__( self, input: "ToolParameter", - value: Optional[str], + value: Any, other_values: Optional[Dict[str, str]] = None, profile: Optional[float] = None, ) -> None: @@ -698,6 +699,9 @@ def keys(self) -> Union[List[str], KeysView[Any]]: return [] return self.__element_instances.keys() + def items(self) -> ItemsView[str, Union["DatasetCollectionWrapper", DatasetFilenameWrapper]]: + return self.__element_instances.items() + @property def is_collection(self) -> bool: return True diff --git a/lib/galaxy/util/compression_utils.py b/lib/galaxy/util/compression_utils.py index 576e62623f99..d9fd3bd67310 100644 --- a/lib/galaxy/util/compression_utils.py +++ b/lib/galaxy/util/compression_utils.py @@ -214,15 +214,16 @@ def common_prefix_dir(self) -> str: common_prefix = os.path.commonprefix([self.getname(item) for item in contents]) # If the common_prefix does not end with a slash, check that is a # directory and all other files are contained in it - common_prefix_member = self.getmember(common_prefix) - if ( - len(common_prefix) >= 1 - and not common_prefix.endswith(os.sep) - and common_prefix_member - and self.isdir(common_prefix_member) - and all(self.getname(item).startswith(common_prefix + os.sep) for item in contents if self.isfile(item)) - ): - common_prefix += os.sep + if len(common_prefix) >= 1 and not common_prefix.endswith(os.sep): + common_prefix_member = self.getmember(common_prefix) + if ( + common_prefix_member is not None + and self.isdir(common_prefix_member) + and all( + self.getname(item).startswith(common_prefix + os.sep) for item in contents if self.isfile(item) + ) + ): + common_prefix += os.sep if not common_prefix.endswith(os.sep): common_prefix = "" return common_prefix diff --git a/lib/galaxy/webapps/galaxy/api/workflows.py b/lib/galaxy/webapps/galaxy/api/workflows.py index 9348e25a977b..118104eb46e7 100644 --- a/lib/galaxy/webapps/galaxy/api/workflows.py +++ b/lib/galaxy/webapps/galaxy/api/workflows.py @@ -240,6 +240,7 @@ def create(self, trans: GalaxyWebTransaction, payload=None, **kwd): archive_source = payload.get("archive_source") archive_file = payload.get("archive_file") archive_data = None + uploaded_file_name = None if archive_source: validate_uri_access(archive_source, trans.user_is_admin, trans.app.config.fetch_url_allowlist_ips) if archive_source.startswith("file://"): @@ -265,15 +266,17 @@ def create(self, trans: GalaxyWebTransaction, payload=None, **kwd): raise exceptions.MessageException(f"Failed to open URL '{archive_source}'.") elif hasattr(archive_file, "file"): uploaded_file = archive_file.file - uploaded_file_name = uploaded_file.name - if os.path.getsize(os.path.abspath(uploaded_file_name)) > 0: + uploaded_file_name = os.path.abspath(uploaded_file.name) + if os.path.getsize(uploaded_file_name) > 0: archive_data = util.unicodify(uploaded_file.read()) import_source = "uploaded file" else: raise exceptions.MessageException("You attempted to upload an empty file.") else: raise exceptions.MessageException("Please provide a URL or file.") - return self.__api_import_from_archive(trans, archive_data, import_source, payload=payload) + return self.__api_import_from_archive( + trans, archive_data, import_source, payload=payload, from_path=uploaded_file_name + ) if "from_history_id" in payload: from_history_id = payload.get("from_history_id") @@ -573,10 +576,14 @@ def get_tool_predictions(self, trans: ProvidesUserContext, payload, **kwd): # # -- Helper methods -- # - def __api_import_from_archive(self, trans: GalaxyWebTransaction, archive_data, source=None, payload=None): + def __api_import_from_archive( + self, trans: GalaxyWebTransaction, archive_data, source=None, payload=None, from_path=None + ): payload = payload or {} try: data = json.loads(archive_data) + if from_path is not None: + data.update({"src": "from_path", "path": from_path}) except Exception: if "GalaxyWorkflow" in archive_data: data = {"yaml_content": archive_data} diff --git a/lib/galaxy/webapps/galaxy/controllers/tool_runner.py b/lib/galaxy/webapps/galaxy/controllers/tool_runner.py index e6bf157400b1..1fcdbc853fa1 100644 --- a/lib/galaxy/webapps/galaxy/controllers/tool_runner.py +++ b/lib/galaxy/webapps/galaxy/controllers/tool_runner.py @@ -72,7 +72,7 @@ def __tool_404__(): if not tool.allow_user_access(trans.user): return __tool_404__() # FIXME: Tool class should define behavior - if tool.tool_type in ["default", "interactivetool"]: + if tool.tool_type in ["default", "interactivetool", "cwl", "galactic_cwl"]: return trans.response.send_redirect(url_for(controller="root", tool_id=tool_id)) # execute tool without displaying form diff --git a/lib/galaxy/webapps/galaxy/services/_fetch_util.py b/lib/galaxy/webapps/galaxy/services/_fetch_util.py index ba5f33f3c756..e4723b231c57 100644 --- a/lib/galaxy/webapps/galaxy/services/_fetch_util.py +++ b/lib/galaxy/webapps/galaxy/services/_fetch_util.py @@ -59,7 +59,9 @@ def validate_and_normalize_targets(trans, payload): payload["check_content"] = trans.app.config.check_upload_content def check_src(item): - validate_datatype_extension(datatypes_registry=trans.app.datatypes_registry, ext=item.get("ext")) + item_ext = validate_datatype_extension(datatypes_registry=trans.app.datatypes_registry, ext=item.get("ext")) + if item_ext: + item["ext"] = item_ext # Normalize file:// URLs into paths. if item["src"] == "url": diff --git a/lib/galaxy/webapps/galaxy/services/dataset_collections.py b/lib/galaxy/webapps/galaxy/services/dataset_collections.py index e37335d829f0..2c21909e095d 100644 --- a/lib/galaxy/webapps/galaxy/services/dataset_collections.py +++ b/lib/galaxy/webapps/galaxy/services/dataset_collections.py @@ -122,7 +122,7 @@ def create(self, trans: ProvidesHistoryContext, payload: CreateNewCollectionPayl :returns: element view of new dataset collection """ # TODO: Error handling... - create_params = api_payload_to_create_params(payload.dict(exclude_unset=True)) + create_params = api_payload_to_create_params(payload.dict(exclude_unset=True, by_alias=True)) if payload.instance_type == "history": if payload.history_id is None: raise exceptions.RequestParameterInvalidException("Parameter history_id is required.") diff --git a/lib/galaxy/webapps/galaxy/services/tools.py b/lib/galaxy/webapps/galaxy/services/tools.py index 6897965d112f..69c28e7a27b3 100644 --- a/lib/galaxy/webapps/galaxy/services/tools.py +++ b/lib/galaxy/webapps/galaxy/services/tools.py @@ -142,9 +142,7 @@ def _create(self, trans: ProvidesHistoryContext, payload, **kwd): target_history = None # Set up inputs. - inputs = payload.get("inputs", {}) - if not isinstance(inputs, dict): - raise exceptions.RequestParameterInvalidException(f"inputs invalid {inputs}") + inputs = tool.inputs_from_dict(payload) # Find files coming in as multipart file data and add to inputs. for k, v in payload.items(): diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py index f5c2da9b6dda..3a5f435c4fe3 100644 --- a/lib/galaxy/workflow/modules.py +++ b/lib/galaxy/workflow/modules.py @@ -77,6 +77,7 @@ BooleanToolParameter, DataCollectionToolParameter, DataToolParameter, + FieldTypeToolParameter, FloatToolParameter, HiddenToolParameter, IntegerToolParameter, @@ -141,7 +142,7 @@ class ConditionalStepWhen(BooleanToolParameter): pass -def to_cwl(value, hda_references, step): +def to_cwl(value, hda_references, step, require_ok=True): element_identifier = None if isinstance(value, model.HistoryDatasetCollectionAssociation): value = value.collection @@ -154,7 +155,8 @@ def to_cwl(value, hda_references, step): if not value.dataset.in_ready_state(): why = f"dataset [{value.id}] is needed for valueFrom expression and is non-ready" raise DelayedWorkflowEvaluation(why=why) - if not value.is_ok: + if require_ok and not value.is_ok: + # materialize and delay ? raise FailWorkflowEvaluation( why=InvocationFailureDatasetFailed( reason=FailureReason.dataset_failed, hda_id=value.id, workflow_step_id=step.id @@ -275,6 +277,7 @@ class WorkflowModule: def __init__(self, trans, content_id=None, **kwds): self.trans = trans + self.app = trans.app self.content_id = content_id self.state = DefaultToolState() @@ -550,12 +553,34 @@ def _find_collections_to_match(self, progress: "WorkflowProgress", step, all_inp for input_dict in all_inputs: name = input_dict["name"] + + step_input = step.inputs_by_name.get(name, None) + scatter_type = "dotproduct" + if step_input and step_input.scatter_type: + scatter_type = step_input.scatter_type + assert scatter_type in ["dotproduct", "disabled"], f"Unimplemented scatter type [{scatter_type}]" + + subworkflow_structure = progress.subworkflow_structure + if subworkflow_structure and subworkflow_structure.is_leaf and scatter_type == "disabled": + continue + data = progress.replacement_for_input(self.trans, step, input_dict) - can_map_over = hasattr(data, "collection") # and data.collection.allow_implicit_mapping + can_map_over = hasattr(data, "collection") and data.collection.allow_implicit_mapping if not can_map_over: continue + if subworkflow_structure and not subworkflow_structure.is_leaf: + if not subworkflow_structure.collection_type_description.is_subcollection_of_type( + data.collection.collection_type, proper=False + ): + template = "Workflow input replacement of collection type [%s] is not a super collection of workflow collection type [%s]." + message = template % ( + data.collection.collection_type, + subworkflow_structure.collection_type_description, + ) + raise Exception(message) + is_data_param = input_dict["input_type"] == "dataset" is_data_collection_param = input_dict["input_type"] == "dataset_collection" if is_data_param or is_data_collection_param: @@ -626,6 +651,46 @@ def _find_collections_to_match(self, progress: "WorkflowProgress", step, all_inp return collections_to_match +def build_extra_step_state( + trans, + step: WorkflowStep, + progress: "WorkflowProgress", + iteration_elements, + execution_state=None, + all_inputs_by_name: Optional[Dict[str, Dict[str, Any]]] = None, +): + extra_step_state = {} + for step_input in step.inputs: + step_input_name = step_input.name + if iteration_elements and step_input_name in iteration_elements: # noqa: B023 + # extra state is set in execution slice, use it + extra_step_state[step_input_name] = iteration_elements[step_input_name] # noqa: B023 + continue + input_in_execution_state = execution_state and step_input_name not in execution_state.inputs + if input_in_execution_state and all_inputs_by_name is not None: + if step_input_name in all_inputs_by_name: + # extra state value comes from input state + extra_step_state[step_input_name] = progress.replacement_for_input( + trans, step, all_inputs_by_name[step_input_name] + ) + continue + # Might be needed someday... + # elif step_input.default_value_set: + # extra_step_state[step_input_name] = step_input.default_value + else: + # extra state value comes from connection + extra_step_state[step_input_name] = progress.replacement_for_connection( + step_input.connections[0], is_data=True + ) + continue + if execution_state is None: + extra_step_state[step_input_name] = progress.replacement_for_connection( + step_input.connections[0], is_data=True + ) + + return extra_step_state + + class SubWorkflowModule(WorkflowModule): # Two step improvements to build runtime inputs for subworkflow modules # - First pass verify nested workflow doesn't have an RuntimeInputs @@ -675,6 +740,7 @@ def get_all_inputs(self, data_only=False, connectable_only=False): if hasattr(self.subworkflow, "input_steps"): for step in self.subworkflow.input_steps: name = step.label + step_module = module_factory.from_workflow_step(self.trans, step) if not name: step_module = module_factory.from_workflow_step(self.trans, step) name = f"{step.order_index}:{step_module.get_name()}" @@ -692,6 +758,13 @@ def get_all_inputs(self, data_only=False, connectable_only=False): if step_type == "parameter_input": input["type"] = step.tool_inputs["parameter_type"] input["optional"] = step.tool_inputs.get("optional", False) + + # This predated the above lines in the CWL branch but looks odd now... + # not hurting anything per se but should be fixed up ideally... + collection_type = getattr(step_module, "collection_type", None) + if collection_type: + input["collection_types"] = [collection_type] + inputs.append(input) return inputs @@ -804,15 +877,9 @@ def execute( else: # Got a conditional step and we could potentially run it, # so we have to build the step state and evaluate the expression - extra_step_state = {} - for step_input in step.inputs: - step_input_name = step_input.name - if iteration_elements and step_input_name in iteration_elements: # noqa: B023 - value = iteration_elements[step_input_name] # noqa: B023 - else: - value = progress.replacement_for_connection(step_input.connections[0], is_data=True) - extra_step_state[step_input_name] = value - + extra_step_state = build_extra_step_state( + trans, step, progress=progress, iteration_elements=iteration_elements + ) when_values.append( evaluate_value_from_expressions( progress, step, execution_state={}, extra_step_state=extra_step_state @@ -1211,6 +1278,7 @@ def get_inputs(self): {"value": "boolean", "label": "Boolean (True or False)"}, {"value": "color", "label": "Color"}, {"value": "directory_uri", "label": "Directory URI"}, + {"value": "field", "label": "Field"}, ] input_parameter_type = SelectToolParameter(None, select_source) # encode following loop in description above instead @@ -2258,13 +2326,47 @@ def decode_runtime_state(self, step, runtime_state): if self.tool: state = super().decode_runtime_state(step, runtime_state) if RUNTIME_STEP_META_STATE_KEY in runtime_state: - self.__restore_step_meta_runtime_state(json.loads(runtime_state[RUNTIME_STEP_META_STATE_KEY])) + self.__restore_step_meta_runtime_state(safe_loads(runtime_state[RUNTIME_STEP_META_STATE_KEY])) return state else: raise ToolMissingException( f"Tool {self.tool_id} missing. Cannot recover runtime state.", tool_id=self.tool_id ) + def evaluate_value_from_expressions(self, progress, step, execution_state, extra_step_state): + value_from_expressions = {} + replacements: Dict = {} + + for key, value in execution_state.inputs.items(): + step_input = step.inputs_by_name.get(key) + if step_input: + if step_input.value_from is not None: + value_from_expressions[key] = step_input.value_from + elif step_input.default_value is not None and value is None: + value_from_expressions[key] = step_input.default_value + + if not value_from_expressions: + return replacements + + hda_references: List[model.HistoryDatasetAssociation] = [] + step_state = {} + for key, value in extra_step_state.items(): + step_state[key] = to_cwl(value, hda_references=hda_references, step=step) + for key, value in execution_state.inputs.items(): + # require_ok = False for deferred datasets ... might instead need to materialize ?? + step_state[key] = to_cwl(value, hda_references=hda_references, step=step, require_ok=False) + + for key, value_from in value_from_expressions.items(): + as_cwl_value = do_eval( + value_from, + step_state, + context=step_state[key], + ) + new_val = from_cwl(as_cwl_value, hda_references=hda_references, progress=progress) + replacements[key] = new_val + + return replacements + def execute( self, trans, progress: "WorkflowProgress", invocation_step, use_cached_job: bool = False ) -> Optional[bool]: @@ -2310,20 +2412,44 @@ def execute( def callback(input, prefixed_name: str, **kwargs): input_dict = all_inputs_by_name[prefixed_name] - replacement: Union[model.Dataset, NoReplacement] = NO_REPLACEMENT + replacement: Any = NO_REPLACEMENT if iteration_elements and prefixed_name in iteration_elements: # noqa: B023 replacement = iteration_elements[prefixed_name] # noqa: B023 else: replacement = progress.replacement_for_input(trans, step, input_dict) if replacement is not NO_REPLACEMENT: + # We need to check if the replacement is an expression tool null, + # since that would mean that we have to pick a possible default value + dataset_instance: Optional[model.DatasetInstance] = None + if isinstance(replacement, model.DatasetCollectionElement): + dataset_instance = replacement.hda + elif isinstance(replacement, model.DatasetInstance): + dataset_instance = replacement + + if dataset_instance and dataset_instance.extension == "expression.json": + # We could do this only if there is a default value on a step + if not dataset_instance.dataset.in_ready_state(): + why = f"dataset [{dataset_instance.id}] is needed for non-data connection and is non-ready" + raise DelayedWorkflowEvaluation(why=why) + + if not dataset_instance.is_ok: + raise CancelWorkflowEvaluation( + why=InvocationFailureDatasetFailed( + reason=FailureReason.dataset_failed, + workflow_step_id=step.id, + hda_id=dataset_instance.id, + dependent_workflow_step_id=None, + ) + ) + + with open(dataset_instance.get_file_name()) as f: + replacement = json.loads(dataset_instance.peek) + if replacement is None: + return None + if not isinstance(input, BaseDataToolParameter): # Probably a parameter that can be replaced - dataset_instance: Optional[model.DatasetInstance] = None - if isinstance(replacement, model.DatasetCollectionElement): - dataset_instance = replacement.hda - elif isinstance(replacement, model.DatasetInstance): - dataset_instance = replacement if dataset_instance and dataset_instance.extension == "expression.json": with open(dataset_instance.get_file_name()) as f: replacement = json.load(f) @@ -2334,6 +2460,41 @@ def callback(input, prefixed_name: str, **kwargs): if isinstance(input, ConditionalStepWhen) and bool(replacement) is False: raise SkipWorkflowStepEvaluation + is_data = ( + isinstance(input, DataToolParameter) + or isinstance(input, DataCollectionToolParameter) + or isinstance(input, FieldTypeToolParameter) + ) + if ( + not is_data + and isinstance(replacement, model.HistoryDatasetAssociation) + and replacement.ext == "expression.json" + ): + if not replacement.dataset.in_ready_state(): + why = f"dataset [{replacement.id}] is needed for non-data connection and is non-ready" + raise DelayedWorkflowEvaluation(why=why) + + if not replacement.is_ok: + raise CancelWorkflowEvaluation( + why=InvocationFailureDatasetFailed( + reason=FailureReason.dataset_failed, + workflow_step_id=step.id, + hda_id=replacement.id, + dependent_workflow_step_id=None, + ) + ) + + with open(replacement.get_file_name()) as f: + replacement = safe_loads(f.read()) + + if isinstance(input, FieldTypeToolParameter): + if isinstance(replacement, model.HistoryDatasetAssociation): + return {"src": "hda", "value": replacement} + elif isinstance(replacement, model.HistoryDatasetCollectionAssociation): + return {"src": "hdca", "value": replacement} + elif replacement is not NO_REPLACEMENT: + return {"src": "json", "value": replacement} + return replacement try: @@ -2349,34 +2510,19 @@ def callback(input, prefixed_name: str, **kwargs): message = f"Error due to input mapping of '{unicodify(k)}' in tool '{tool.id}'. A common cause of this is conditional outputs that cannot be determined until runtime, please review workflow step {step.order_index + 1}." raise exceptions.MessageException(message) + extra_step_state = build_extra_step_state( + trans, + step, + progress=progress, + iteration_elements=iteration_elements, + execution_state=execution_state, + all_inputs_by_name=all_inputs_by_name, + ) + if step.when_expression and when_value is not False: - extra_step_state = {} - for step_input in step.inputs: - step_input_name = step_input.name - input_in_execution_state = step_input_name not in execution_state.inputs - if input_in_execution_state: - if step_input_name in all_inputs_by_name: - if iteration_elements and step_input_name in iteration_elements: # noqa: B023 - value = iteration_elements[step_input_name] # noqa: B023 - else: - value = progress.replacement_for_input(trans, step, all_inputs_by_name[step_input_name]) - # TODO: only do this for values... is everything with a default - # this way a field parameter? I guess not? - extra_step_state[step_input_name] = value - # Might be needed someday... - # elif step_input.default_value_set: - # extra_step_state[step_input_name] = step_input.default_value - else: - if iteration_elements and step_input_name in iteration_elements: # noqa: B023 - value = iteration_elements[step_input_name] # noqa: B023 - else: - value = progress.replacement_for_connection(step_input.connections[0], is_data=True) - extra_step_state[step_input_name] = value - - if when_value is not False: - when_value = evaluate_value_from_expressions( - progress, step, execution_state=execution_state, extra_step_state=extra_step_state - ) + when_value = evaluate_value_from_expressions( + progress, step, execution_state=execution_state, extra_step_state=extra_step_state + ) if when_value is not None: # Track this more formally ? execution_state.inputs["__when_value__"] = when_value @@ -2385,6 +2531,36 @@ def callback(input, prefixed_name: str, **kwargs): if unmatched_input_connections: log.warning(f"Failed to use input connections for inputs [{unmatched_input_connections}]") + expression_replacements = self.evaluate_value_from_expressions( + progress, + step, + execution_state, + extra_step_state, + ) + + def expression_callback(input, prefixed_name, **kwargs): + history = trans.history + app = trans.app + if prefixed_name in expression_replacements: # noqa: B023 + expression_replacement = expression_replacements[prefixed_name] # noqa: B023 + if isinstance(input, FieldTypeToolParameter): + return {"src": "json", "value": expression_replacement} + else: + return expression_replacement + # This is not the right spot to fill in a tool default, + # but at this point we know there was no replacement via step defaults or value_from + value = kwargs["value"] + if isinstance(value, dict) and value.get("class") == "File": + value = raw_to_galaxy(app, history, kwargs["value"]) + return {"src": "hda", "value": value} + + return NO_REPLACEMENT + + # Replace expression values with those calculated... + visit_input_values( + tool.inputs, execution_state.inputs, expression_callback, no_replacement_value=NO_REPLACEMENT + ) + param_combinations.append(execution_state.inputs) complete = False @@ -2597,6 +2773,34 @@ def load_module_sections(trans): return module_sections +class EphemeralCollection: + """Interface for collecting datasets together in workflows and treating as collections. + + These aren't real collections in the database - just datasets groupped together + in someway by workflows for passing data around as collections. + """ + + # Used to distinguish between datasets and collections frequently. + ephemeral = True + history_content_type = "dataset_collection" + name = "Dynamically generated collection" + + def __init__(self, collection, history): + self.collection = collection + self.history = history + + hdca = model.HistoryDatasetCollectionAssociation( + collection=collection, + history=history, + ) + history.add_dataset_collection(hdca) + self.persistent_object = hdca + + @property + def elements(self): + return self.collection.elements + + class DelayedWorkflowEvaluation(Exception): def __init__(self, why=None): self.why = why @@ -2639,6 +2843,7 @@ def inject(self, step: WorkflowStep, step_args=None, steps=None, **kwargs): step.upgrade_messages = {} # Make connection information available on each step by input name. + step.setup_inputs_by_name() step.setup_input_connections_by_name() # Populate module. diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py index d634f678999a..155eec2372fa 100644 --- a/lib/galaxy/workflow/run.py +++ b/lib/galaxy/workflow/run.py @@ -395,6 +395,10 @@ def maximum_jobs_to_schedule_or_none(self) -> Optional[int]: else: return None + @property + def trans(self): + return self.module_injector.trans + def record_executed_job_count(self, job_count: int) -> None: self.jobs_scheduled_this_iteration += job_count @@ -430,6 +434,102 @@ def remaining_steps( remaining_steps.append((step, invocation_step)) return remaining_steps + def replacement_for_input_connections(self, step: "WorkflowStep", input_dict: Dict[str, Any], connections): + replacement = modules.NO_REPLACEMENT + + prefixed_name = input_dict["name"] + step_input = step.inputs_by_name.get(prefixed_name, None) + + merge_type = model.WorkflowStepInput.default_merge_type + if step_input: + merge_type = step_input.merge_type + + is_data = input_dict["input_type"] in ["dataset", "dataset_collection"] + if len(connections) == 1: + replacement = self.replacement_for_connection(connections[0], is_data=is_data) + else: + # We've mapped multiple individual inputs to a single parameter, + # promote output to a collection. + inputs = [] + input_history_content_type = None + input_collection_type = None + for i, c in enumerate(connections): + input_from_connection = self.replacement_for_connection(c, is_data=is_data) + is_data = hasattr(input_from_connection, "history_content_type") + if is_data: + input_history_content_type = input_from_connection.history_content_type + if i == 0: + if input_history_content_type == "dataset_collection": + input_collection_type = input_from_connection.collection.collection_type + else: + input_collection_type = None + else: + if input_collection_type is None: + if input_history_content_type != "dataset": + raise MessageException("Cannot map over a combination of datasets and collections.") + else: + if input_history_content_type != "dataset_collection": + raise MessageException("Cannot merge over combinations of datasets and collections.") + elif input_from_connection.collection.collection_type != input_collection_type: + raise MessageException("Cannot merge collections of different collection types.") + + inputs.append(input_from_connection) + + if input_dict["input_type"] == "dataset_collection": + # TODO: Implement more nested types here... + if input_dict.get("collection_types") != ["list"]: + return self.replacement_for_connection(connections[0], is_data=is_data) + + collection = model.DatasetCollection() + # If individual datasets provided (type is None) - premote to a list. + collection.collection_type = input_collection_type or "list" + + next_index = 0 + if input_collection_type is None: + if merge_type == "merge_nested": + raise NotImplementedError() + + for input in inputs: + model.DatasetCollectionElement( + collection=collection, + element=input, + element_index=next_index, + element_identifier=str(next_index), + ) + next_index += 1 + + elif input_collection_type == "list": + if merge_type == "merge_flattened": + for input in inputs: + for dataset_instance in input.dataset_instances: + model.DatasetCollectionElement( + collection=collection, + element=dataset_instance, + element_index=next_index, + element_identifier=str(next_index), + ) + next_index += 1 + elif merge_type == "merge_nested": + # Increase nested level of collection + collection.collection_type = f"list:{input_collection_type}" + for input in inputs: + model.DatasetCollectionElement( + collection=collection, + element=input.collection, + element_index=next_index, + element_identifier=str(next_index), + ) + next_index += 1 + else: + raise NotImplementedError() + + return modules.EphemeralCollection( + collection=collection, + history=self.workflow_invocation.history, + ) + + return replacement + def replacement_for_input(self, trans, step: "WorkflowStep", input_dict: Dict[str, Any]): replacement: Union[ modules.NoReplacement, @@ -455,16 +555,32 @@ def replacement_for_input(self, trans, step: "WorkflowStep", input_dict: Dict[st else: replacement = temp else: - replacement = self.replacement_for_connection(connection[0], is_data=is_data) + if is_data: + for step_input in step.inputs: + if step_input.name == prefixed_name and step_input.value_from: + # This might not be correct since value_from might be an expression to evaluate, + # and default values need to be applied before expressions. + # TODO: check if any tests fail because of this ? + return raw_to_galaxy(trans.app, trans.history, step_input.value_from) + replacement = self.replacement_for_input_connections( + step, + input_dict, + connection, + ) elif step.state and (state_input := get_path(step.state.inputs, nested_key_to_path(prefixed_name), None)): # workflow submitted with step parameters populates state directly # via populate_module_and_state replacement = state_input else: for step_input in step.inputs: - if step_input.name == prefixed_name and step_input.default_value_set: + if step_input.name == prefixed_name and (step_input.default_value or step_input.value_from): if is_data: - replacement = raw_to_galaxy(trans.app, trans.history, step_input.default_value) + # as above, this might not be correct since the default value needs to be applied + # before the value_from evaluation occurs. + # TODO: check if any tests fail because of this ? + replacement = raw_to_galaxy( + trans.app, trans.history, step_input.value_from or step_input.default_value + ) return replacement def replacement_for_connection(self, connection: "WorkflowStepConnection", is_data: bool = True): @@ -486,14 +602,20 @@ def replacement_for_connection(self, connection: "WorkflowStepConnection", is_da try: replacement = step_outputs[output_name] except KeyError: - raise modules.FailWorkflowEvaluation( - why=InvocationFailureOutputNotFound( - reason=FailureReason.output_not_found, - workflow_step_id=connection.input_step_id, - output_name=output_name, - dependent_workflow_step_id=output_step_id, + if connection.non_data_connection: + replacement = modules.NO_REPLACEMENT + else: + # If this not a implicit connection (the state of which is checked before in `check_implicitly_dependent_steps`) + # we must resolve this. + raise modules.FailWorkflowEvaluation( + why=InvocationFailureOutputNotFound( + reason=FailureReason.output_not_found, + workflow_step_id=connection.input_step_id, + output_name=output_name, + dependent_workflow_step_id=output_step_id, + ) ) - ) + if isinstance(replacement, model.HistoryDatasetCollectionAssociation): if not replacement.collection.populated: if not replacement.waiting_for_elements: @@ -593,6 +715,14 @@ def set_outputs_for_input( if step.label and step.type == "parameter_input" and "output" in outputs: self.runtime_replacements[step.label] = str(outputs["output"]) + + output = outputs.get("output") + # TODO: handle extra files and directory types and collections and all the stuff... + if output and isinstance(output, dict) and output.get("class") == "File": + primary_data = self.raw_to_galaxy(output) + outputs["output"] = primary_data + + log.debug("outputs are %s", outputs) self.set_step_outputs(invocation_step, outputs, already_persisted=already_persisted) def effective_replacement_dict(self): @@ -699,20 +829,24 @@ def subworkflow_progress( subworkflow = subworkflow_invocation.workflow subworkflow_inputs = {} for input_subworkflow_step in subworkflow.input_steps: - connection_found = False + connections = [] subworkflow_step_id = input_subworkflow_step.id for input_connection in step.input_connections: if input_connection.input_subworkflow_step_id == subworkflow_step_id: - is_data = input_connection.output_step.type != "parameter_input" - replacement = self.replacement_for_connection( - input_connection, - is_data=is_data, - ) - subworkflow_inputs[subworkflow_step_id] = replacement - connection_found = True - break + connections.append(input_connection) + + if connections: + replacement = self.replacement_for_input_connections( + step=step, + input_dict={ + "name": input_subworkflow_step.label, # TODO: only module knows this unfortunately + "input_type": input_subworkflow_step.input_type, + }, + connections=connections, + ) + subworkflow_inputs[subworkflow_step_id] = replacement - if not connection_found and not input_subworkflow_step.input_optional: + if not connections and not input_subworkflow_step.input_optional: raise modules.FailWorkflowEvaluation( InvocationFailureOutputNotFound( reason=FailureReason.output_not_found, @@ -721,7 +855,6 @@ def subworkflow_progress( dependent_workflow_step_id=input_connection.output_step.id, ) ) - return WorkflowProgress( subworkflow_invocation, subworkflow_inputs, diff --git a/lib/galaxy/workflow/run_request.py b/lib/galaxy/workflow/run_request.py index baeb5492dd6d..f1e640683445 100644 --- a/lib/galaxy/workflow/run_request.py +++ b/lib/galaxy/workflow/run_request.py @@ -366,7 +366,10 @@ def build_workflow_run_configs( if step.type == "parameter_input": if normalized_key in param_map: value = param_map.pop(normalized_key) - normalized_inputs[normalized_key] = value["input"] + input_value = value["input"] + if isinstance(input_value, dict) and input_value.get("src") == "json": + input_value = input_value.get("value") + normalized_inputs[normalized_key] = input_value steps_by_id = workflow.steps_by_id # Set workflow inputs. @@ -384,7 +387,12 @@ def build_workflow_run_configs( raise exceptions.RequestParameterInvalidException( f"{step.label or step.order_index + 1}: {e.message_suffix}" ) - continue + if ( + step.tool_inputs["parameter_type"] != "field" + or not isinstance(input_dict, dict) + or "id" not in input_dict + ): + continue if "src" not in input_dict: raise exceptions.RequestParameterInvalidException( f"Not input source type defined for input '{input_dict}'." diff --git a/lib/galaxy/workflow/workflow_parameter_input_definitions.py b/lib/galaxy/workflow/workflow_parameter_input_definitions.py index ee59a87d42a9..69923bcbc3fc 100644 --- a/lib/galaxy/workflow/workflow_parameter_input_definitions.py +++ b/lib/galaxy/workflow/workflow_parameter_input_definitions.py @@ -8,13 +8,14 @@ BooleanToolParameter, ColorToolParameter, DirectoryUriToolParameter, + FieldTypeToolParameter, FloatToolParameter, IntegerToolParameter, TextToolParameter, ) -INPUT_PARAMETER_TYPES = Literal["text", "integer", "float", "boolean", "color", "directory_uri"] -default_source_type = Dict[str, Union[int, float, bool, str]] +INPUT_PARAMETER_TYPES = Literal["text", "integer", "float", "boolean", "color", "directory_uri", "field"] +default_source_type = Dict[str, Union[None, int, float, bool, str]] tool_param_type = Union[ TextToolParameter, IntegerToolParameter, @@ -22,6 +23,7 @@ BooleanToolParameter, ColorToolParameter, DirectoryUriToolParameter, + FieldTypeToolParameter, ] @@ -43,4 +45,6 @@ def get_default_parameter(param_type: INPUT_PARAMETER_TYPES) -> tool_param_type: input_default_value = ColorToolParameter(None, default_source) elif param_type == "directory_uri": input_default_value = DirectoryUriToolParameter(None, default_source) + elif param_type == "field": + input_default_value = FieldTypeToolParameter(None, default_source) return input_default_value diff --git a/lib/galaxy_ext/cwl/__init__.py b/lib/galaxy_ext/cwl/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/lib/galaxy_ext/cwl/handle_outputs.py b/lib/galaxy_ext/cwl/handle_outputs.py new file mode 100644 index 000000000000..9d38fbab0796 --- /dev/null +++ b/lib/galaxy_ext/cwl/handle_outputs.py @@ -0,0 +1,18 @@ +""" +""" + +import logging +import os +import sys + +# insert *this* galaxy before all others on sys.path +sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))) + +from galaxy.tool_util.cwl import handle_outputs + +logging.basicConfig() +log = logging.getLogger(__name__) + + +def relocate_dynamic_outputs(): + handle_outputs() diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index d7710c57b2fa..372d693d4fd0 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -1,3 +1,4 @@ +import json import zipfile from io import BytesIO from typing import List @@ -100,6 +101,106 @@ def test_create_list_of_new_pairs(self): pair_1_element_1 = pair_elements[0] assert pair_1_element_1["element_index"] == 0 + def test_create_record(self, history_id): + contents = [ + ("condition", "1\t2\t3"), + ("control1", "4\t5\t6"), + ("control2", "7\t8\t9"), + ] + record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents) + fields = [ + {"name": "condition", "type": "File"}, + {"name": "control1", "type": "File"}, + {"name": "control2", "type": "File"}, + ] + payload = dict( + name="a record", + instance_type="history", + history_id=history_id, + element_identifiers=record_identifiers, + collection_type="record", + fields=fields, + ) + create_response = self._post("dataset_collections", payload, json=True) + dataset_collection = self._check_create_response(create_response) + assert dataset_collection["collection_type"] == "record" + assert dataset_collection["name"] == "a record" + returned_collections = dataset_collection["elements"] + assert len(returned_collections) == 3, dataset_collection + record_pos_0_element = returned_collections[0] + self._assert_has_keys(record_pos_0_element, "element_index") + record_pos_0_object = record_pos_0_element["object"] + self._assert_has_keys(record_pos_0_object, "name", "history_content_type") + + def test_record_requires_fields(self, history_id): + contents = [ + ("condition", "1\t2\t3"), + ("control1", "4\t5\t6"), + ("control2", "7\t8\t9"), + ] + record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents) + payload = dict( + name="a record", + instance_type="history", + history_id=history_id, + element_identifiers=json.dumps(record_identifiers), + collection_type="record", + ) + create_response = self._post("dataset_collections", payload) + self._assert_status_code_is(create_response, 400) + + def test_record_auto_fields(self, history_id): + contents = [ + ("condition", "1\t2\t3"), + ("control1", "4\t5\t6"), + ("control2", "7\t8\t9"), + ] + record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents) + payload = dict( + name="a record", + instance_type="history", + history_id=history_id, + element_identifiers=record_identifiers, + collection_type="record", + fields="auto", + ) + create_response = self._post("dataset_collections", payload, json=True) + self._check_create_response(create_response) + + def test_record_field_validation(self, history_id): + contents = [ + ("condition", "1\t2\t3"), + ("control1", "4\t5\t6"), + ("control2", "7\t8\t9"), + ] + record_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents) + too_few_fields = [ + {"name": "condition", "type": "File"}, + {"name": "control1", "type": "File"}, + ] + too_many_fields = [ + {"name": "condition", "type": "File"}, + {"name": "control1", "type": "File"}, + {"name": "control2", "type": "File"}, + {"name": "control3", "type": "File"}, + ] + wrong_name_fields = [ + {"name": "condition", "type": "File"}, + {"name": "control1", "type": "File"}, + {"name": "control3", "type": "File"}, + ] + for fields in [too_few_fields, too_many_fields, wrong_name_fields]: + payload = dict( + name="a record", + instance_type="history", + history_id=history_id, + element_identifiers=json.dumps(record_identifiers), + collection_type="record", + fields=json.dumps(fields), + ) + create_response = self._post("dataset_collections", payload) + self._assert_status_code_is(create_response, 400) + def test_list_download(self): with self.dataset_populator.test_history(require_new=False) as history_id: fetch_response = self.dataset_collection_populator.create_list_in_history( diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 41d982d246c9..ec6704f4727a 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -87,6 +87,7 @@ def _run( use_cached_job=False, wait_for_job=False, input_format="legacy", + inputs_representation=None, ): if inputs is None: inputs = {} @@ -97,6 +98,7 @@ def _run( inputs=inputs, history_id=history_id, input_format=input_format, + inputs_representation=inputs_representation, ) if tool_uuid: payload["tool_uuid"] = tool_uuid diff --git a/lib/galaxy_test/api/test_tools_cwl.py b/lib/galaxy_test/api/test_tools_cwl.py new file mode 100644 index 000000000000..76f6346922d4 --- /dev/null +++ b/lib/galaxy_test/api/test_tools_cwl.py @@ -0,0 +1,397 @@ +"""Test CWL Tool Execution via the API.""" + +from typing import ( + Any, + Dict, + Optional, +) + +from typing_extensions import Literal + +from galaxy.tool_util.cwl.representation import USE_FIELD_TYPES +from galaxy_test.api._framework import ApiTestCase +from galaxy_test.base.populators import ( + CwlPopulator, + CwlToolRun, + DatasetPopulator, + skip_without_tool, + WorkflowPopulator, +) + + +class TestCwlTools(ApiTestCase): + """Test CWL Tool Execution via the API.""" + + dataset_populator: DatasetPopulator + + require_admin_user = True + + def setUp(self): + """Setup dataset populator.""" + super().setUp() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + workflow_populator = WorkflowPopulator(self.galaxy_interactor) + self.cwl_populator = CwlPopulator(self.dataset_populator, workflow_populator) + + @skip_without_tool("cat1-tool.cwl") + def test_cat1_number(self, history_id: str) -> None: + """Test execution of cat1 using the "normal" Galaxy job API representation.""" + hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3", name="test1")) + if not USE_FIELD_TYPES: + inputs = { + "file1": hda1, + "numbering|_cwl__type_": "boolean", + "numbering|_cwl__value_": True, + } + else: + inputs = { + "file1": hda1, + "numbering": {"src": "json", "value": True}, + } + stdout = self._run_and_get_stdout("cat1-tool.cwl", history_id, inputs, assert_ok=True) + assert stdout == " 1\t1\n 2\t2\n 3\t3\n" + + @skip_without_tool("cat1-tool.cwl") + def test_cat1_number_cwl_json(self, history_id: str) -> None: + """Test execution of cat1 using the "CWL" Galaxy job API representation.""" + hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3")) + inputs = { + "file1": hda1, + "numbering": True, + } + stdout = self._run_and_get_stdout( + "cat1-tool.cwl", history_id, inputs, assert_ok=True, inputs_representation="cwl" + ) + assert stdout == " 1\t1\n 2\t2\n 3\t3\n" + + @skip_without_tool("cat1-tool.cwl") + def test_cat1_number_cwl_json_file(self) -> None: + """Test execution of cat1 using the CWL job definition file.""" + run_object = self.cwl_populator.run_cwl_job( + "cat1-tool.cwl", "test/functional/tools/cwl_tools/v1.0_custom/cat-job.json" + ) + assert isinstance(run_object, CwlToolRun) + stdout = self._get_job_stdout(run_object.job_id) + assert stdout == "Hello world!\n" + + @skip_without_tool("cat1-tool.cwl") + def test_cat1_number_cwl_n_json_file(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "cat1-tool.cwl", "test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json" + ) + assert isinstance(run_object, CwlToolRun) + stdout = self._get_job_stdout(run_object.job_id) + assert stdout == " 1\tHello world!\n" + + @skip_without_tool("cat2-tool.cwl") + def test_cat2(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "cat2-tool.cwl", "test/functional/tools/cwl_tools/v1.0_custom/cat-job.json" + ) + assert isinstance(run_object, CwlToolRun) + stdout = self._get_job_stdout(run_object.job_id) + assert stdout == "Hello world!\n" + + @skip_without_tool("galactic_cat.cwl#galactic_cat") + def test_galactic_cat_1(self, history_id: str) -> None: + hda_id = self.dataset_populator.new_dataset(history_id, name="test_dataset.txt")["id"] + self.dataset_populator.wait_for_history(history_id, assert_ok=True) + inputs = {"input1": {"src": "hda", "id": hda_id}} + run_response = self._run("galactic_cat.cwl#galactic_cat", history_id, inputs, assert_ok=True) + dataset = run_response["outputs"][0] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset) + assert content.strip() == "TestData123", content + + @skip_without_tool("galactic_record_input.cwl#galactic_record_input") + def test_galactic_record_input(self, history_id: str) -> None: + hda1_id = self.dataset_populator.new_dataset(history_id, content="moo", name="test_dataset.txt")["id"] + hda2_id = self.dataset_populator.new_dataset(history_id, content="cow dog foo", name="test_dataset.txt")["id"] + self.dataset_populator.wait_for_history(history_id, assert_ok=True) + inputs = { + "input1": {"src": "hda", "id": hda1_id}, + "input2": {"src": "hda", "id": hda2_id}, + } + run_response = self._run("galactic_record_input.cwl#galactic_record_input", history_id, inputs, assert_ok=True) + dataset = run_response["outputs"][0] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset) + assert content.strip() == "moo", content + + dataset = run_response["outputs"][1] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=dataset) + assert content.strip() == "cow dog foo", content + + def _run_and_get_stdout(self, tool_id: str, history_id: str, inputs: Dict[str, Any], **kwds) -> str: + response = self._run(tool_id, history_id, inputs, **kwds) + assert "jobs" in response + job = response["jobs"][0] + job_id = job["id"] + final_state = self.dataset_populator.wait_for_job(job_id) + assert final_state == "ok" + return self._get_job_stdout(job_id) + + def _get_job_stdout(self, job_id: str) -> str: + job_details = self.dataset_populator.get_job_details(job_id, full=True) + stdout = job_details.json()["tool_stdout"] + return stdout + + @skip_without_tool("cat3-tool.cwl") + def test_cat3(self, history_id: str) -> None: + hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\t2\t3")) + inputs = { + "f1": hda1, + } + response = self._run("cat3-tool.cwl", history_id, inputs, assert_ok=True) + output1 = response["outputs"][0] + output1_details = self.dataset_populator.get_history_dataset_details(history_id, dataset=output1) + assert "created_from_basename" in output1_details, output1_details.keys() + assert output1_details["created_from_basename"] == "output.txt", output1_details["created_from_basename"] + output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1) + assert output1_content == "1\t2\t3\n", output1_content + + @skip_without_tool("sorttool.cwl") + def test_sorttool(self, history_id: str) -> None: + hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3")) + inputs = {"reverse": False, "input": hda1} + response = self._run("sorttool.cwl", history_id, inputs, assert_ok=True) + output1 = response["outputs"][0] + output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1) + assert output1_content == "1\n2\n3\n", output1_content + + @skip_without_tool("sorttool.cwl") + def test_sorttool_reverse(self, history_id: str) -> None: + hda1 = _dataset_to_param(self.dataset_populator.new_dataset(history_id, content="1\n2\n3")) + inputs = {"reverse": True, "input": hda1} + response = self._run("sorttool.cwl", history_id, inputs, assert_ok=True) + output1 = response["outputs"][0] + output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1) + assert output1_content == "3\n2\n1\n", output1_content + + @skip_without_tool("env-tool1.cwl") + def test_env_tool1(self, history_id: str) -> None: + inputs = { + "in": "Hello World", + } + response = self._run("env-tool1.cwl", history_id, inputs, assert_ok=True) + output1 = response["outputs"][0] + output1_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output1) + assert output1_content == "Hello World\n" + + @skip_without_tool("optional-output.cwl") + def test_optional_output(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "optional-output.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/cat-job.json" + ) + output_file_id = run_object._output_name_to_object("output_file").history_content_id + optional_file_id = run_object._output_name_to_object("optional_file").history_content_id + output_content = self.dataset_populator.get_history_dataset_content( + run_object.history_id, dataset_id=output_file_id + ) + optional_content = self.dataset_populator.get_history_dataset_content( + run_object.history_id, dataset_id=optional_file_id + ) + assert output_content == "Hello world!\n" + assert optional_content == "null" + + @skip_without_tool("optional-output2.cwl") + def test_optional_output2_on(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "optional-output2.cwl", + job={ + "produce": "do_write", + }, + test_data_directory="test/functional/tools/cwl_tools/v1.0_custom", + ) + output_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + assert output_content == "bees\n" + + @skip_without_tool("optional-output2.cwl") + def test_optional_output2_off(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "optional-output2.cwl", + job={ + "produce": "dont_write", + }, + test_data_directory="test/functional/tools/cwl_tools/v1.0_custom", + ) + output_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + assert output_content == "null" + + @skip_without_tool("index1.cwl") + @skip_without_tool("showindex1.cwl") + def test_index1(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "index1.cwl", + job={ + "file": {"class": "File", "path": "whale.txt"}, + }, + test_data_directory="test/functional/tools/cwl_tools/v1.0_custom", + ) + output1 = self.dataset_populator.get_history_dataset_details(run_object.history_id) + run_object = self.cwl_populator.run_cwl_job( + "showindex1.cwl", + job={ + "file": { + "src": "hda", + "id": output1["id"], + }, + }, + test_data_directory="test/functional/tools/cwl_tools/v1.0_custom", + history_id=run_object.history_id, + skip_input_staging=True, + ) + output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + assert "call: 1\n" in output1_content, output1_content + + @skip_without_tool("any1.cwl") + def test_any1_0(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "any1.cwl", + job={"bar": 7}, + test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/", + ) + output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + assert output1_content == "7", output1_content + + @skip_without_tool("any1.cwl") + def test_any1_1(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "any1.cwl", + job={"bar": "7"}, + test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/", + ) + output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + assert output1_content == '"7"', output1_content + + @skip_without_tool("any1.cwl") + def test_any1_file(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "any1.cwl", + job={ + "bar": { + "class": "File", + "location": "whale.txt", + } + }, + test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/", + ) + output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + self.dataset_populator._summarize_history(run_object.history_id) + assert output1_content == '"File"' + + @skip_without_tool("any1.cwl") + def test_any1_2(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "any1.cwl", + job={"bar": {"Cow": ["Turkey"]}}, + test_data_directory="test/functional/tools/cwl_tools/v1.0/v1.0/", + ) + output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id) + assert output1_content == '{"Cow": ["Turkey"]}', output1_content + + @skip_without_tool("null-expression2-tool.cwl") + def test_null_expression_any_bad_1(self) -> None: + """Test explicitly passing null to Any type without a default value fails.""" + run_object = self.cwl_populator.run_cwl_job( + "null-expression2-tool.cwl", + "test/functional/tools/cwl_tools/v1.0/v1.0/null-expression1-job.json", + assert_ok=False, + ) + with self.assertRaises(AssertionError): + run_object.wait() + + @skip_without_tool("null-expression2-tool.cwl") + def test_null_expression_any_bad_2(self) -> None: + """Test Any without defaults can be unspecified.""" + run_object = self.cwl_populator.run_cwl_job( + "null-expression2-tool.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/empty.json", assert_ok=False + ) + with self.assertRaises(AssertionError): + run_object.wait() + + @skip_without_tool("default_path_custom_1.cwl") + def test_default_path(self) -> None: + # produces no output - just test the job runs okay. + # later come back and verify standard output of the job. + run_object = self.cwl_populator.run_cwl_job("default_path_custom_1.cwl", job={}) + assert isinstance(run_object, CwlToolRun) + stdout = self._get_job_stdout(run_object.job_id) + assert "this is the test file that will be used when calculating an md5sum" in stdout + + @skip_without_tool("parseInt-tool.cwl") + def test_parse_int_tool(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "parseInt-tool.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/parseInt-job.json" + ) + output1 = self.dataset_populator.get_history_dataset_details(run_object.history_id, hid=2) + assert output1["state"] == "ok" + output1_content = self.dataset_populator.get_history_dataset_content(run_object.history_id, hid=2) + assert output1_content == "42" + assert output1["extension"] == "expression.json" + + @skip_without_tool("record-output.cwl") + def test_record_output(self) -> None: + run_object = self.cwl_populator.run_cwl_job( + "record-output.cwl", "test/functional/tools/cwl_tools/v1.0/v1.0/record-output-job.json" + ) + assert isinstance(run_object, CwlToolRun) + result_record = run_object.output_collection(0) + assert result_record["collection_type"] == "record" + record_elements = result_record["elements"] + first_element = record_elements[0] + assert first_element["element_identifier"] == "ofoo" + first_hda = first_element["object"] + output1_content = self.dataset_populator.get_history_dataset_content( + run_object.history_id, hid=first_hda["hid"] + ) + assert "Call me Ishmael." in output1_content, f"Expected contents of whale.txt, got [{output1_content}]" + + # def test_dynamic_tool_execution(self) -> None: + # workflow_tool_json = { + # "inputs": [{"inputBinding": {}, "type": "File", "id": "file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/v1.0/v1.0/count-lines2-wf.cwl#step1/wc/wc_file1"}], + # "stdout": "output.txt", + # "id": "file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/v1.0/v1.0/count-lines2-wf.cwl#step1/wc", + # "outputs": [{"outputBinding": {"glob": "output.txt"}, "type": "File", "id": "file:///home/john/workspace/galaxy/test/unit/tools/cwl_tools/v1.0/v1.0/count-lines2-wf.cwl#step1/wc/wc_output"}], + # "baseCommand": "wc", + # "class": "CommandLineTool", + # } + + # create_payload = dict( + # representation=json.dumps(workflow_tool_json), + # ) + # create_response = self._post("dynamic_tools", data=create_payload, admin=True) + # self._assert_status_code_is(create_response, 200) + + # TODO: Use mixin so this can be shared with tools test case. + def _run( + self, + tool_id: str, + history_id: str, + inputs: Dict[str, Any], + assert_ok: bool = False, + tool_version: Optional[str] = None, + inputs_representation: Optional[Literal["cwl", "galaxy"]] = None, + ): + payload = self.dataset_populator.run_tool_payload( + tool_id=tool_id, + inputs=inputs, + history_id=history_id, + inputs_representation=inputs_representation, + ) + if tool_version is not None: + payload["tool_version"] = tool_version + create_response = self._post("tools", data=payload) + if assert_ok: + self._assert_status_code_is(create_response, 200) + create = create_response.json() + self._assert_has_keys(create, "outputs") + return create + else: + return create_response + + +def whale_text() -> str: + return open("test/functional/tools/cwl_tools/v1.0/v1.0/whale.txt").read() + + +def _dataset_to_param(dataset: Dict) -> Dict[str, str]: + return dict(src="hda", id=dataset["id"]) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 6d14cb49e7df..57de8ac69550 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -66,6 +66,7 @@ WORKFLOW_WITH_OUTPUT_COLLECTION_MAPPING, WORKFLOW_WITH_RULES_1, WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT, + WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT_OVERRIDES_TOOL_DEFAULT, ) from ._framework import ApiTestCase from .sharable import SharingApiTests @@ -5180,6 +5181,17 @@ def test_run_with_default_file_in_step_inline(self): content = self.dataset_populator.get_history_dataset_content(history_id) assert "chr1" in content + def test_run_with_default_file_in_step_inline_overrides_tool_default_file(self): + with self.dataset_populator.test_history() as history_id: + self._run_workflow( + WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT_OVERRIDES_TOOL_DEFAULT, + history_id=history_id, + wait=True, + assert_ok=True, + ) + content = self.dataset_populator.get_history_dataset_content(history_id) + assert ">hg17" in content + def test_conditional_flat_crossproduct_subworkflow(self): parent = yaml.safe_load( """ diff --git a/lib/galaxy_test/api/test_workflows_cwl.py b/lib/galaxy_test/api/test_workflows_cwl.py index b9af5cbc1e2f..c5ed2b5b8c79 100644 --- a/lib/galaxy_test/api/test_workflows_cwl.py +++ b/lib/galaxy_test/api/test_workflows_cwl.py @@ -1,7 +1,18 @@ """Test CWL workflow functionality.""" +import os +import re + from galaxy_test.api.test_workflows import BaseWorkflowsApiTestCase -from galaxy_test.base.populators import CwlPopulator +from galaxy_test.base.populators import ( + CWL_TOOL_DIRECTORY, + CwlPopulator, + CwlWorkflowRun, +) + + +def resolve_path(rel_path: str) -> str: + return os.path.join(CWL_TOOL_DIRECTORY, rel_path) class BaseCwlWorkflowsApiTestCase(BaseWorkflowsApiTestCase): @@ -11,3 +22,136 @@ class BaseCwlWorkflowsApiTestCase(BaseWorkflowsApiTestCase): def setUp(self): super().setUp() self.cwl_populator = CwlPopulator(self.dataset_populator, self.workflow_populator) + + +class TestCwlWorkflows(BaseCwlWorkflowsApiTestCase): + """Test case encompassing CWL workflow tests.""" + + def test_simplest_wf(self, history_id: str) -> None: + """Test simplest workflow.""" + workflow_id = self._load_workflow("v1.0_custom/just-wc-wf.cwl") + workflow_content = self._download_workflow(workflow_id) + for step in workflow_content["steps"].values(): + if "tool_representation" in step: + del step["tool_representation"] + + hda1 = self.dataset_populator.new_dataset( + history_id, content="hello world\nhello all\nhello all in world\nhello" + ) + inputs = {"file1": {"src": "hda", "id": hda1["id"]}} + invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok( + workflow_id, history_id, inputs, inputs_by="name" + ) + self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id) + output = self.dataset_populator.get_history_dataset_content(history_id, hid=2) + assert re.search(r"\s+4\s+9\s+47\s+", output) + + def test_load_ids(self) -> None: + workflow_id = self._load_workflow("v1.0/v1.0/search.cwl#main") + workflow_content = self._download_workflow(workflow_id) + for step in workflow_content["steps"].values(): + if "tool_representation" in step: + del step["tool_representation"] + + steps = workflow_content["steps"] + step_3 = steps["3"] + step_4 = steps["4"] + + assert step_3["label"] == "index", step_3 + assert step_4["label"] == "search", step_4 + + def test_count_line1_v1(self, history_id: str) -> None: + """Test simple workflow v1.0/v1.0/count-lines1-wf.cwl.""" + self._run_count_lines_wf("v1.0/v1.0/count-lines1-wf.cwl", history_id) + + def test_count_line1_v1_json(self, history_id: str) -> None: + run_object = self.cwl_populator.run_cwl_job( + resolve_path("v1.0/v1.0/count-lines1-wf.cwl"), + resolve_path("v1.0/v1.0/wc-job.json"), + history_id=history_id, + ) + assert isinstance(run_object, CwlWorkflowRun) + self._check_countlines_wf(history_id, run_object.invocation_id, run_object.workflow_id, expected_count=16) + + def test_count_line2_v1(self, history_id: str) -> None: + """Test simple workflow v1.0/v1.0/count-lines2-wf.cwl.""" + self._run_count_lines_wf("v1.0/v1.0/count-lines2-wf.cwl", history_id) + + def test_count_lines3_v1(self, history_id: str) -> None: + workflow_id = self._load_workflow("v1.0/v1.0/count-lines3-wf.cwl") + fetch_response = self.dataset_collection_populator.create_list_in_history(history_id).json() + hdca = self.dataset_collection_populator.wait_for_fetched_collection(fetch_response) + inputs = {"file1": {"src": "hdca", "id": hdca["id"]}} + invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok( + workflow_id, history_id, inputs, inputs_by="name" + ) + self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id) + hdca = self.dataset_populator.get_history_collection_details(history_id, hid=5) + assert hdca["collection_type"] == "list" + elements = hdca["elements"] + assert len(elements) == 3 + element0 = elements[0]["object"] + assert element0["history_content_type"] == "dataset" + assert element0["state"] == "ok" + assert element0["file_ext"] == "expression.json" + # TODO: ensure this looks like an int[] - it doesn't currently... + + def test_count_lines4_v1(self, history_id: str) -> None: + workflow_id = self._load_workflow("v1.0/v1.0/count-lines4-wf.cwl") + hda1 = self.dataset_populator.new_dataset( + history_id, content="hello world\nhello all\nhello all in world\nhello" + ) + hda2 = self.dataset_populator.new_dataset(history_id, content="moo\ncow\nthat\nis\nall") + inputs = {"file1": {"src": "hda", "id": hda1["id"]}, "file2": {"src": "hda", "id": hda2["id"]}} + invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok( + workflow_id, history_id, inputs, inputs_by="name" + ) + self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id) + self.dataset_populator.get_history_collection_details(history_id, hid=4) + + def test_count_lines4_json(self, history_id: str) -> None: + self.cwl_populator.run_cwl_job( + resolve_path("v1.0/v1.0/count-lines4-wf.cwl"), + resolve_path("v1.0/v1.0/count-lines4-job.json"), + history_id=history_id, + ) + self.dataset_populator.get_history_collection_details(history_id, hid=4) + + def test_scatter_wf1_v1(self, history_id: str) -> None: + self.cwl_populator.run_cwl_job( + resolve_path("v1.0/v1.0/scatter-wf1.cwl"), + resolve_path("v1.0/v1.0/scatter-job1.json"), + history_id=history_id, + ) + self.dataset_populator.get_history_collection_details(history_id, hid=5) + + def _run_count_lines_wf(self, wf_path: str, history_id: str) -> None: + workflow_id = self._load_workflow(wf_path) + hda1 = self.dataset_populator.new_dataset( + history_id, content="hello world\nhello all\nhello all in world\nhello" + ) + inputs = {"file1": {"src": "hda", "id": hda1["id"]}} + invocation_id = self.workflow_populator.invoke_workflow_and_assert_ok( + workflow_id, history_id, inputs, inputs_by="name" + ) + self._check_countlines_wf(history_id, invocation_id, workflow_id) + + def _check_countlines_wf( + self, history_id: str, invocation_id: str, workflow_id: str, expected_count: int = 4 + ) -> None: + self.workflow_populator.wait_for_invocation_and_jobs(history_id, workflow_id, invocation_id) + output = self.dataset_populator.get_history_dataset_content(history_id, hid=3) + assert int(output) == expected_count, output + + def _load_workflow(self, rel_path: str) -> str: + rel_path = rel_path.split("#", 1)[0] + path = resolve_path(rel_path) + data = dict( + from_path=path, + ) + route = "workflows" + upload_response = self._post(route, data=data) + self._assert_status_code_is(upload_response, 200) + workflow = upload_response.json() + workflow_id = workflow["id"] + return workflow_id diff --git a/lib/galaxy_test/base/cwl_location_rewriter.py b/lib/galaxy_test/base/cwl_location_rewriter.py new file mode 100644 index 000000000000..e8743e1dd4e8 --- /dev/null +++ b/lib/galaxy_test/base/cwl_location_rewriter.py @@ -0,0 +1,146 @@ +import functools +import json +import logging +import os +import urllib.parse + +from cwltool.context import LoadingContext +from cwltool.load_tool import default_loader +from cwltool.pack import pack +from cwltool.utils import visit_field + +log = logging.getLogger(__name__) + +# https://github.com/common-workflow-language/cwltool/issues/1937 +PACKED_WORKFLOWS_CWL_BUG = { + "conflict-wf.cwl": """ +$graph: +- baseCommand: echo + class: CommandLineTool + id: echo + inputs: + text: + inputBinding: {} + type: string + outputs: + fileout: + outputBinding: {glob: out.txt} + type: File + stdout: out.txt +- baseCommand: cat + class: CommandLineTool + id: cat + inputs: + file1: + inputBinding: {position: 1} + type: File + file2: + inputBinding: {position: 2} + type: File + outputs: + fileout: + outputBinding: {glob: out.txt} + type: File + stdout: out.txt +- class: Workflow + id: collision + inputs: {input_1: string, input_2: string} + outputs: + fileout: {outputSource: cat_step/fileout, type: File} + steps: + cat_step: + in: + file1: {source: echo_1/fileout} + file2: {source: echo_2/fileout} + out: [fileout] + run: '#cat' + echo_1: + in: {text: input_1} + out: [fileout] + run: '#echo' + echo_2: + in: {text: input_2} + out: [fileout] + run: '#echo' +cwlVersion: v1.1 +""", + "js-expr-req-wf.cwl": """ +$graph: +- arguments: [echo, $(foo())] + class: CommandLineTool + hints: + ResourceRequirement: {ramMin: 8} + id: tool + inputs: [] + outputs: {out: stdout} + requirements: + InlineJavascriptRequirement: + expressionLib: ['function foo() { return 2; }'] + stdout: whatever.txt +- class: Workflow + id: wf + inputs: [] + outputs: + out: {outputSource: tool/out, type: File} + requirements: + InlineJavascriptRequirement: + expressionLib: ['function bar() { return 1; }'] + steps: + tool: + in: {} + out: [out] + run: '#tool' +cwlVersion: v1.0 +""", +} + + +def get_cwl_test_url(cwl_version): + branch = "main" + if cwl_version == "1.0": + repo_name = "common-workflow-language" + tests_dir = "v1.0/v1.0" + else: + repo_name = f"cwl-v{cwl_version}" + tests_dir = "tests" + if cwl_version == "1.2.1": + branch = "1.2.1_proposed" + return f"https://raw.githubusercontent.com/common-workflow-language/{repo_name}/{branch}/{tests_dir}" + + +def get_url(item, cwl_version, base_dir): + # quick hack, to make it more useful upload files/directories/paths to Galaxy instance ? + if isinstance(item, dict) and item.get("class") == "File": + location = item.pop("path", None) + if not location: + parse_result = urllib.parse.urlparse(item["location"]) + if parse_result.scheme == "file": + location = urllib.parse.unquote(parse_result.path) + if base_dir not in location: + return item + location = os.path.relpath(location, base_dir) + url = f"{get_cwl_test_url(cwl_version)}/{location}" + log.debug("Rewrote location from '%s' to '%s'", location, url) + item["location"] = url + return item + + +def rewrite_locations(workflow_path: str, output_path: str): + workflow_path_basename = os.path.basename(workflow_path) + if workflow_path_basename in PACKED_WORKFLOWS_CWL_BUG: + with open(output_path, "w") as output: + output.write(PACKED_WORKFLOWS_CWL_BUG[workflow_path_basename]) + return + loading_context = LoadingContext() + loading_context.loader = default_loader() + workflow_obj = pack(loading_context, workflow_path) + cwl_version = workflow_path.split("test/functional/tools/cwl_tools/v")[1].split("/")[0] + # deps = find_deps(workflow_obj, loading_context.loader, uri) + # basedir=os.path.dirname(workflow_path) + visit_field( + workflow_obj, + ("default"), + functools.partial(get_url, cwl_version=cwl_version, base_dir=os.path.normpath(os.path.dirname(workflow_path))), + ) + with open(output_path, "w") as output: + json.dump(workflow_obj, output) diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index 6503446c8485..c866eaec17b7 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -95,7 +95,6 @@ ) from galaxy.tool_util.client.staging import InteractorStaging from galaxy.tool_util.cwl.util import ( - download_output, GalaxyOutput, guess_artifact_type, invocation_to_output, @@ -110,9 +109,11 @@ ) from galaxy.util import ( DEFAULT_SOCKET_TIMEOUT, + download_to_file, galaxy_root_path, UNKNOWN, ) +from galaxy.util.compression_utils import CompressedFile from galaxy.util.path import StrPath from galaxy.util.resources import resource_string from galaxy.util.unittest_utils import skip_if_site_down @@ -295,6 +296,75 @@ def conformance_tests_gen(directory, filename="conformance_tests.yaml"): yield conformance_test +def to_local_location(listing, location): + for item in listing: + if "basename" in item: + item["location"] = f"file://{os.path.join(location[len('file://'):], item['basename'])}" + if "listing" in item: + to_local_location(item["listing"], location=item["location"]) + + +def fix_conflicts(path): + # find first key that does not clash with an existing entry in targets + # start with entry.target + '_' + 2 and then keep incrementing + # the number till there is no clash + i = 2 + tgt = f"{path}_{i}" + while os.path.exists(tgt): + i += 1 + tgt = f"{path}_{i}" + return tgt + + +def output_to_disk(output, download_folder): + if isinstance(output, list): + return [output_to_disk(item, download_folder=download_folder) for item in output] + if isinstance(output, dict): + if "secondaryFiles" in output: + output["secondaryFiles"] = [ + output_to_disk(secondary, download_folder=download_folder) for secondary in output["secondaryFiles"] + ] + if "basename" in output: + download_path = os.path.join(download_folder, output["basename"]) + if os.path.exists(download_path): + download_path = fix_conflicts(download_path) + if output["class"] == "Directory": + zip_path = f"{download_path}.zip" + download_to_file(output["location"], zip_path) + CompressedFile(zip_path).extract(download_folder) + os.remove(zip_path) + else: + download_to_file(output["location"], download_path) + output["path"] = download_path + output["location"] = f"file://{download_path}" + if "listing" in output: + to_local_location(output["listing"], output["location"]) + + return output + elif output.get("class") == "Directory": + # Directory in secondary files + download_folder = os.path.join(download_folder, output["location"]) + os.makedirs(download_folder, exist_ok=True) + output["location"] = download_folder + new_listing = [ + output_to_disk(secondary, download_folder=download_folder) for secondary in output["listing"] + ] + output["listing"] = new_listing + return output + else: + new_output = {} + for key, value in output.items(): + if isinstance(value, dict) and "basename" in value: + new_dir = os.path.join(download_folder, key) + os.makedirs(new_dir, exist_ok=True) + new_output[key] = output_to_disk(value, download_folder=new_dir) + else: + new_output[key] = value + return new_output + else: + return output + + class CwlRun: def __init__(self, dataset_populator, history_id): self.dataset_populator = dataset_populator @@ -343,14 +413,13 @@ def get_extra_files(dataset_details): get_metadata, get_dataset, get_extra_files, - pseudo_location=True, + pseudo_location=self.dataset_populator.galaxy_interactor.api_url, ) if download_folder: - if isinstance(output, dict) and "basename" in output: - download_path = os.path.join(download_folder, output["basename"]) - download_output(galaxy_output, get_metadata, get_dataset, get_extra_files, download_path) - output["path"] = download_path - output["location"] = f"file://{download_path}" + return output_to_disk( + output, + download_folder=download_folder, + ) return output @abstractmethod @@ -369,6 +438,12 @@ def __init__(self, dataset_populator, history_id, run_response): def job_id(self): return self.run_response.json()["jobs"][0]["id"] + def output(self, output_index): + return self.run_response.json()["outputs"][output_index] + + def output_collection(self, output_index): + return self.run_response.json()["output_collections"][output_index] + def _output_name_to_object(self, output_name): return tool_response_to_output(self.run_response.json(), self.history_id, output_name) @@ -1002,6 +1077,10 @@ def run_tool_payload(self, tool_id: Optional[str], inputs: dict, history_id: str kwds["__files"][key] = value del inputs[key] + ir = kwds.get("inputs_representation") + if ir is None and "inputs_representation" in kwds: + del kwds["inputs_representation"] + return dict(tool_id=tool_id, inputs=json.dumps(inputs), history_id=history_id, **kwds) def build_tool_state(self, tool_id: str, history_id: str): @@ -2625,7 +2704,11 @@ def _run_cwl_workflow_job( assert_ok: bool = True, ): workflow_path, object_id = urllib.parse.urldefrag(workflow_path) - workflow_id = self.workflow_populator.import_workflow_from_path(workflow_path, object_id) + from .cwl_location_rewriter import rewrite_locations + + with tempfile.NamedTemporaryFile() as temp: + rewrite_locations(workflow_path=workflow_path, output_path=temp.name) + workflow_id = self.workflow_populator.import_workflow_from_path(temp.name, object_id) request = { # TODO: rework tool state corrections so more of these are valid in Galaxy @@ -2644,6 +2727,7 @@ def run_cwl_job( job: Optional[Dict] = None, test_data_directory: Optional[str] = None, history_id: Optional[str] = None, + skip_input_staging: bool = False, assert_ok: bool = True, ) -> CwlRun: """ @@ -2668,16 +2752,18 @@ def run_cwl_job( job = yaml.safe_load(f) elif job is None: job = {} - _, datasets = stage_inputs( - self.dataset_populator.galaxy_interactor, - history_id, - job, - use_fetch_api=False, - tool_or_workflow=tool_or_workflow, - job_dir=test_data_directory, - ) - if datasets: - self.dataset_populator.wait_for_history(history_id=history_id, assert_ok=True) + if not skip_input_staging: + _, datasets = stage_inputs( + self.dataset_populator.galaxy_interactor, + history_id, + job, + use_fetch_api=True, + tool_or_workflow=tool_or_workflow, + job_dir=test_data_directory, + use_path_paste=False, + ) + if datasets: + self.dataset_populator.wait_for_history(history_id=history_id, assert_ok=True) if tool_or_workflow == "tool": run_object = self._run_cwl_tool_job( artifact, @@ -2722,12 +2808,13 @@ def run_conformance_test(self, version: str, doc: str): expected_outputs = test["output"] for key, value in expected_outputs.items(): - try: - actual_output = run.get_output_as_object(key) - cwltest.compare.compare(value, actual_output) - except Exception: - self.dataset_populator._summarize_history(run.history_id) - raise + with tempfile.TemporaryDirectory() as tmpdir: + try: + actual_output = run.get_output_as_object(key, download_folder=tmpdir) + cwltest.compare.compare(value, actual_output) + except Exception: + self.dataset_populator._summarize_history(run.history_id) + raise class LibraryPopulator: diff --git a/lib/galaxy_test/base/workflow_fixtures.py b/lib/galaxy_test/base/workflow_fixtures.py index 7b4eb6186b99..ce556c530afe 100644 --- a/lib/galaxy_test/base/workflow_fixtures.py +++ b/lib/galaxy_test/base/workflow_fixtures.py @@ -1179,6 +1179,20 @@ location: https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed """ +WORKFLOW_WITH_STEP_DEFAULT_FILE_DATASET_INPUT_OVERRIDES_TOOL_DEFAULT = """ +class: GalaxyWorkflow +steps: + cat1: + tool_id: cat_default + in: + input1: + default: + class: File + basename: a file + format: txt + location: https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.fasta +""" + WORKFLOW_FLAT_CROSS_PRODUCT = """ class: GalaxyWorkflow inputs: diff --git a/lib/galaxy_test/driver/driver_util.py b/lib/galaxy_test/driver/driver_util.py index 43f096a963e0..d4254ee93fcf 100644 --- a/lib/galaxy_test/driver/driver_util.py +++ b/lib/galaxy_test/driver/driver_util.py @@ -238,6 +238,7 @@ def setup_galaxy_config( job_handler_monitor_sleep=0.2, job_runner_monitor_sleep=0.2, workflow_monitor_sleep=0.2, + strict_cwl_validation=False, ) if default_shed_tool_data_table_config: config["shed_tool_data_table_config"] = default_shed_tool_data_table_config diff --git a/run_tests.sh b/run_tests.sh index 11c146353909..0163e92f1593 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -335,6 +335,7 @@ do GALAXY_TEST_TOOL_CONF="lib/galaxy/config/sample/tool_conf.xml.sample,test/functional/tools/sample_tool_conf.xml" marker="not cwl_conformance" report_file="./run_api_tests.html" + generate_cwl_conformance_tests=1 if [ $# -gt 1 ]; then api_script=$2 shift 2 @@ -407,6 +408,8 @@ do marker="tool" report_file="run_framework_tests.html" framework_test=1 + # CWL tools don't have embedded tests, so no need to set + # generate_cwl_conformance_tests=1 shift 1 ;; -w|--framework-workflows) diff --git a/scripts/cwl-runner b/scripts/cwl-runner new file mode 100755 index 000000000000..36931e87b375 --- /dev/null +++ b/scripts/cwl-runner @@ -0,0 +1,7 @@ +#!/bin/bash + +CWD=`pwd` +cd `dirname $0`/.. +. ./scripts/common_startup_functions.sh >&2 +setup_python >&2 +python ./scripts/run_cwl.py --cwd="$CWD" "$@" diff --git a/scripts/cwl_conformance_to_test_cases.py b/scripts/cwl_conformance_to_test_cases.py index 1a440c00ab52..db15dfe44816 100644 --- a/scripts/cwl_conformance_to_test_cases.py +++ b/scripts/cwl_conformance_to_test_cases.py @@ -36,9 +36,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): RED_TESTS = { "v1.0": [ - # required - "step_input_default_value_overriden_2nd_step_noexp", - "step_input_default_value_overriden_noexp", # not required "clt_any_input_with_mixed_array_provided", "directory_secondaryfiles", @@ -55,7 +52,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "job_input_secondary_subdirs", "job_input_subdir_primary_and_secondary_subdirs", "resreq_step_overrides_wf", - "step_input_default_value_overriden", "step_input_default_value_overriden_2nd_step", "valuefrom_wf_step", "valuefrom_wf_step_multiple", @@ -90,8 +86,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "stage_file_array_to_dir", "stage_file_array_to_dir_basename", "stage_file_array_to_dir_basename_entryname", - "step_input_default_value_overriden_2nd_step_noexp", - "step_input_default_value_overriden_noexp", # not required "clt_any_input_with_mixed_array_provided", "cwl_requirements_addition", @@ -124,7 +118,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "stage_array_dirs", "stage_null_array", "stdin_shorcut", - "step_input_default_value_overriden", "step_input_default_value_overriden_2nd_step", "symlink_to_file_out_of_workdir_illegal", "timelimit_expressiontool", @@ -168,8 +161,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "stage_file_array", "stage_file_array_basename", "stage_file_array_entryname_overrides", - "step_input_default_value_overriden_2nd_step_noexp", - "step_input_default_value_overriden_noexp", # not required "initial_work_dir_for_null_and_arrays", "initial_work_dir_for_array_dirs", @@ -193,16 +184,11 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "conditionals_multi_scatter_nojs", "conditionals_nested_cross_scatter", "conditionals_nested_cross_scatter_nojs", - "conditionals_non_boolean_fail", - "conditionals_non_boolean_fail_nojs", "cwl_requirements_addition", "cwl_requirements_override_expression", "cwl_requirements_override_static", "cwloutput_nolimit", "direct_optional_nonnull_result_nojs", - "direct_optional_null_result", - "direct_required", - "direct_required_nojs", "directory_secondaryfiles", "docker_entrypoint", "dockeroutputdir", @@ -243,7 +229,6 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "scatter_on_scattered_conditional", "scatter_on_scattered_conditional_nojs", "secondary_files_in_named_records", - "step_input_default_value_overriden", "step_input_default_value_overriden_2nd_step", "storage_float", "the_only_non_null_single_true", @@ -277,6 +262,44 @@ class TestCwlConformance(BaseCwlWorkflowsApiTestCase): "workflow_input_loadContents_without_inputBinding", "workflow_integer_input_optional_unspecified", "workflow_step_in_loadContents", + "capture_dirs", + "capture_files", + "capture_files_and_dirs", + "colon_in_output_path", + "colon_in_paths", + "default_with_falsey_value", + "directory_literal_with_literal_file_in_subdir_nostdin", + "dotproduct_dotproduct_scatter", + "dotproduct_simple_scatter", + "filename_with_hash_mark", + "flat_crossproduct_flat_crossproduct_scatter", + "flat_crossproduct_simple_scatter", + "iwd_subdir", + "js_input_record", + "length_for_non_array", + "mixed_version_v12_wf", + "multiple_input_feature_requirement", + "nested_crossproduct_nested_crossproduct_scatter", + "nested_crossproduct_simple_scatter", + "nested_types", + "output_reference_workflow_input", + "paramref_arguments_inputs", + "paramref_arguments_self", + "params_broken_null", + "record_order_with_input_bindings", + "record_outputeval", + "record_outputeval_nojs", + "record_with_default", + "runtime_outdir", + "schemadef_types_with_import", + "simple_dotproduct_scatter", + "simple_flat_crossproduct_scatter", + "simple_nested_crossproduct_scatter", + "simple_simple_scatter", + "stdout_chained_commands", + "user_defined_length_in_parameter_reference", + "very_big_and_very_floats", + "very_big_and_very_floats_nojs", ], } diff --git a/scripts/run_cwl.py b/scripts/run_cwl.py new file mode 100644 index 000000000000..bfa4b4b5ee2f --- /dev/null +++ b/scripts/run_cwl.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import sys +from typing import ( + Any, + Dict, + List, +) + +from bioblend import galaxy + +sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "lib"))) + +from galaxy.tool_util.cwl.runnable import get_outputs +from galaxy.version import VERSION +from galaxy_test.base.populators import ( # noqa: I100,I202 + CwlPopulator, + CwlRun, + GiDatasetPopulator, + GiWorkflowPopulator, +) + +DESCRIPTION = """Simple CWL runner script.""" + + +def collect_outputs(cwl_run: CwlRun, output_names: List[str], outdir: str) -> Dict[str, Any]: + outputs = {} + for output_name in output_names: + cwl_output = cwl_run.get_output_as_object(output_name, download_folder=outdir) + outputs[output_name] = cwl_output + return outputs + + +def main(argv=None): + """Entry point for workflow driving.""" + arg_parser = argparse.ArgumentParser(description=DESCRIPTION) + arg_parser.add_argument("--api_key", required=True) + arg_parser.add_argument("--host", default="http://localhost:8080/") + arg_parser.add_argument("--outdir", default=".") + arg_parser.add_argument("--quiet", action="store_true") + arg_parser.add_argument("--version", action="version", version=f"%(prog)s {VERSION}~CWL") + arg_parser.add_argument("--cwd", default=os.getcwd()) + arg_parser.add_argument("tool", metavar="TOOL", help="tool or workflow") + arg_parser.add_argument("job", metavar="JOB", help="job") + + args = arg_parser.parse_args(argv) + + gi = galaxy.GalaxyInstance(args.host, args.api_key) + dataset_populator = GiDatasetPopulator(gi) + workflow_populator = GiWorkflowPopulator(gi) + cwl_populator = CwlPopulator(dataset_populator, workflow_populator) + + abs_cwd = os.path.abspath(args.cwd) + + tool = args.tool + if not os.path.isabs(tool): + tool = os.path.join(abs_cwd, tool) + + job = args.job + if not os.path.isabs(job): + job = os.path.join(abs_cwd, job) + + run = cwl_populator.run_cwl_job(tool, job, skip_input_staging=True) + + outputs = get_outputs(tool) + output_names = [o.get_id() for o in outputs] + outputs = collect_outputs(run, output_names, outdir=args.outdir) + print(json.dumps(outputs, indent=4)) + # for output_dataset in output_datasets.values(): + # name = output_dataset.name + # print(run.get_output_as_object(name)) + + +if __name__ == "__main__": + main() diff --git a/test/functional/tools/cat.json b/test/functional/tools/cat.json new file mode 100644 index 000000000000..ff4d73f82509 --- /dev/null +++ b/test/functional/tools/cat.json @@ -0,0 +1,3 @@ +input1: + class: File + location: cat.json diff --git a/test/functional/tools/cwl_tools/v1.0_custom/any1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/any1.cwl new file mode 100644 index 000000000000..0a1cca295273 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/any1.cwl @@ -0,0 +1,17 @@ +#!/usr/bin/env cwl-runner +class: CommandLineTool +cwlVersion: v1.0 +requirements: + - class: InlineJavascriptRequirement + +inputs: + - id: bar + type: Any + +outputs: + - id: t1 + type: Any + outputBinding: + outputEval: $(inputs.bar.class || inputs.bar) + +baseCommand: "true" diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat-default.cwl b/test/functional/tools/cwl_tools/v1.0_custom/cat-default.cwl new file mode 100755 index 000000000000..8feec95f2e2c --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/cat-default.cwl @@ -0,0 +1,22 @@ +#!/usr/bin/env cwl-runner +cwlVersion: "v1.0" +class: CommandLineTool +doc: "Print the contents of a file to stdout using 'cat' running in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy +inputs: + file1: + label: Input File + doc: "The file that will be copied using 'cat'" + type: File + default: + class: File + path: hello.txt + inputBinding: {position: 1} +baseCommand: cat +stdout: output.txt +outputs: + output_file: + type: File + outputBinding: {glob: output.txt} diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat-job.json b/test/functional/tools/cwl_tools/v1.0_custom/cat-job.json new file mode 100644 index 000000000000..09f16ba7b173 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/cat-job.json @@ -0,0 +1,6 @@ +{ + "file1": { + "class": "File", + "path": "hello.txt" + } +} diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json b/test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json new file mode 100644 index 000000000000..ee6416857d39 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/cat-n-job.json @@ -0,0 +1,7 @@ +{ + "file1": { + "class": "File", + "path": "hello.txt" + }, + "numbering": true +} diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat1-tool.cwl b/test/functional/tools/cwl_tools/v1.0_custom/cat1-tool.cwl new file mode 100755 index 000000000000..ecd2cacd6636 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/cat1-tool.cwl @@ -0,0 +1,18 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +doc: "Print the contents of a file to stdout using 'cat' running in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy +inputs: + file1: + type: File + inputBinding: {position: 1} + numbering: + type: boolean? + inputBinding: + position: 0 + prefix: -n +baseCommand: cat +outputs: {} diff --git a/test/functional/tools/cwl_tools/v1.0_custom/cat2-tool.cwl b/test/functional/tools/cwl_tools/v1.0_custom/cat2-tool.cwl new file mode 100755 index 000000000000..4a0e229eec7f --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/cat2-tool.cwl @@ -0,0 +1,12 @@ +#!/usr/bin/env cwl-runner +cwlVersion: v1.0 +class: CommandLineTool +doc: "Print the contents of a file to stdout using 'cat' running in a docker container." +hints: + DockerRequirement: + dockerPull: debian:wheezy +inputs: + file1: File +stdin: $(inputs.file1.path) +baseCommand: cat +outputs: {} diff --git a/test/functional/tools/cwl_tools/v1.0_custom/default_path_custom_1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/default_path_custom_1.cwl new file mode 100644 index 000000000000..68dbd9f4a96c --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/default_path_custom_1.cwl @@ -0,0 +1,10 @@ +cwlVersion: v1.0 +class: CommandLineTool +inputs: + - id: "file1" + type: File + default: + class: File + path: md5sum.input +outputs: [] +arguments: [cat,$(inputs.file1.path)] diff --git a/test/functional/tools/cwl_tools/v1.0_custom/hello.txt b/test/functional/tools/cwl_tools/v1.0_custom/hello.txt new file mode 100644 index 000000000000..cd0875583aab --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/hello.txt @@ -0,0 +1 @@ +Hello world! diff --git a/test/functional/tools/cwl_tools/v1.0_custom/index.py b/test/functional/tools/cwl_tools/v1.0_custom/index.py new file mode 100644 index 000000000000..21aaabd8f949 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/index.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +# Toy program to generate inverted index of word to line. +# Takes input text file on stdin and prints output index on stdout. + +import os +import sys + +words = {} + +mainfile = sys.argv[1] +indexfile = sys.argv[1] + ".idx1" + +main = open(mainfile) +index = open(indexfile, "w") + +linenum = 0 +for line in main: + linenum += 1 + line = line.rstrip().lower().replace(".", "").replace(",", "").replace(";", "").replace("-", " ") + for w in line.split(" "): + if w: + if w not in words: + words[w] = set() + words[w].add(linenum) + +for w in sorted(words.keys()): + index.write(f"{w}: {', '.join(str(i) for i in words[w])}\n") + +open(os.path.splitext(sys.argv[1])[0] + ".idx2", "w") +open(sys.argv[1] + ".idx3", "w") +open(sys.argv[1] + ".idx4", "w") +open(sys.argv[1] + ".idx5", "w") diff --git a/test/functional/tools/cwl_tools/v1.0_custom/index1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/index1.cwl new file mode 100755 index 000000000000..92346a4008be --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/index1.cwl @@ -0,0 +1,28 @@ +#!/usr/bin/env cwl-runner +cwlVersion: "v1.0" +class: CommandLineTool +requirements: + InitialWorkDirRequirement: + listing: + - entryname: input.txt + entry: $(inputs.file) +inputs: + file: File + index.py: + type: File + default: + class: File + path: index.py + inputBinding: + position: 0 +baseCommand: python +arguments: +- valueFrom: input.txt + position: 1 +outputs: + result: + type: File + secondaryFiles: + - ".idx1" + outputBinding: + glob: input.txt diff --git a/test/functional/tools/cwl_tools/v1.0_custom/just-wc-wf.cwl b/test/functional/tools/cwl_tools/v1.0_custom/just-wc-wf.cwl new file mode 100644 index 000000000000..537c4db9b014 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/just-wc-wf.cwl @@ -0,0 +1,32 @@ +#!/usr/bin/env cwl-runner +class: Workflow +cwlVersion: v1.0 + +inputs: + file1: + type: File + +outputs: + count_output: + type: File + outputSource: step1/wc_output + +steps: + step1: + in: + wc_file1: file1 + out: [wc_output] + run: + id: wc + class: CommandLineTool + inputs: + wc_file1: + type: File + inputBinding: {} + outputs: + wc_output: + type: File + outputBinding: + glob: output.txt + stdout: output.txt + baseCommand: wc diff --git a/test/functional/tools/cwl_tools/v1.0_custom/md5sum.input b/test/functional/tools/cwl_tools/v1.0_custom/md5sum.input new file mode 100644 index 000000000000..3c5e4cdbdcfd --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/md5sum.input @@ -0,0 +1 @@ +this is the test file that will be used when calculating an md5sum \ No newline at end of file diff --git a/test/functional/tools/cwl_tools/v1.0_custom/md5sum_non_strict.cwl b/test/functional/tools/cwl_tools/v1.0_custom/md5sum_non_strict.cwl new file mode 100644 index 000000000000..3656fe1176c7 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/md5sum_non_strict.cwl @@ -0,0 +1,50 @@ +#!/usr/bin/env cwl-runner + +class: CommandLineTool +id: Md5sum +label: Simple md5sum tool +cwlVersion: v1.0 + +$namespaces: + dct: http://purl.org/dc/terms/ + foaf: http://xmlns.com/foaf/0.1/ + +doc: | + [![Docker Repository on Quay.io](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum/status "Docker Repository on Quay.io")](https://quay.io/repository/briandoconnor/dockstore-tool-md5sum) + [![Build Status](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum.svg)](https://travis-ci.org/briandoconnor/dockstore-tool-md5sum) + A very, very simple Docker container for the md5sum command. See the [README](https://github.com/briandoconnor/dockstore-tool-md5sum/blob/master/README.md) for more information. + + +dct:creator: + '@id': http://orcid.org/0000-0002-7681-6415 + foaf:name: Brian O'Connor + foaf:mbox: briandoconnor@gmail.com + +requirements: +- class: DockerRequirement + dockerPull: quay.io/briandoconnor/dockstore-tool-md5sum:1.0.2 +- class: InlineJavascriptRequirement + +hints: +- class: ResourceRequirement + coresMin: 1 + ramMin: 1024 + outdirMin: 512000 + description: the command really requires very little resources. + +inputs: + input_file: + type: File + inputBinding: + position: 1 + doc: The file that will have its md5sum calculated. + +outputs: + output_file: + type: File + format: http://edamontology.org/data_3671 + outputBinding: + glob: md5sum.txt + doc: A text file that contains a single line that is the md5sum of the input file. + +baseCommand: [/bin/my_md5sum] diff --git a/test/functional/tools/cwl_tools/v1.0_custom/optional-output2.cwl b/test/functional/tools/cwl_tools/v1.0_custom/optional-output2.cwl new file mode 100755 index 000000000000..f0e266a952b3 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/optional-output2.cwl @@ -0,0 +1,25 @@ +#!/usr/bin/env cwl-runner +cwlVersion: "v1.0" +class: CommandLineTool +doc: "Print the contents of a file to stdout using 'cat' running in a docker container." +hints: + DockerRequirement: + dockerPull: docker.io/python:3-slim +inputs: + optionaloutput.py: + type: File + default: + class: File + path: optionaloutput.py + inputBinding: + position: 0 + produce: + type: string + inputBinding: + position: 1 +baseCommand: python3 +outputs: + optional_file: + type: File? + outputBinding: + glob: bumble.txt diff --git a/test/functional/tools/cwl_tools/v1.0_custom/optionaloutput.py b/test/functional/tools/cwl_tools/v1.0_custom/optionaloutput.py new file mode 100644 index 000000000000..07c26d19776b --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/optionaloutput.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +import sys + +if sys.argv[1] == "do_write": + open("bumble.txt", "w").write("bees\n") diff --git a/test/functional/tools/cwl_tools/v1.0_custom/showindex.py b/test/functional/tools/cwl_tools/v1.0_custom/showindex.py new file mode 100644 index 000000000000..819dae03eebf --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/showindex.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import sys + +indexfile = sys.argv[1] + ".idx1" + +index = open(indexfile, "r").read() + +print(index) diff --git a/test/functional/tools/cwl_tools/v1.0_custom/showindex1.cwl b/test/functional/tools/cwl_tools/v1.0_custom/showindex1.cwl new file mode 100755 index 000000000000..e0bbae424757 --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/showindex1.cwl @@ -0,0 +1,24 @@ +#!/usr/bin/env cwl-runner +cwlVersion: "v1.0" +class: CommandLineTool +inputs: + file: + type: File + secondaryFiles: + - ".idx1" + inputBinding: + position: 1 + showindex.py: + type: File + default: + class: File + path: showindex.py + inputBinding: + position: 0 +baseCommand: python +stdout: output.txt +outputs: + output_txt: + type: File + outputBinding: + glob: output.txt diff --git a/test/functional/tools/cwl_tools/v1.0_custom/whale.txt b/test/functional/tools/cwl_tools/v1.0_custom/whale.txt new file mode 100644 index 000000000000..425d1ed02c8d --- /dev/null +++ b/test/functional/tools/cwl_tools/v1.0_custom/whale.txt @@ -0,0 +1,16 @@ +Call me Ishmael. Some years ago--never mind how long precisely--having +little or no money in my purse, and nothing particular to interest me on +shore, I thought I would sail about a little and see the watery part of +the world. It is a way I have of driving off the spleen and regulating +the circulation. Whenever I find myself growing grim about the mouth; +whenever it is a damp, drizzly November in my soul; whenever I find +myself involuntarily pausing before coffin warehouses, and bringing up +the rear of every funeral I meet; and especially whenever my hypos get +such an upper hand of me, that it requires a strong moral principle to +prevent me from deliberately stepping into the street, and methodically +knocking people's hats off--then, I account it high time to get to sea +as soon as I can. This is my substitute for pistol and ball. With a +philosophical flourish Cato throws himself upon his sword; I quietly +take to the ship. There is nothing surprising in this. If they but knew +it, almost all men in their degree, some time or other, cherish very +nearly the same feelings towards the ocean with me. diff --git a/test/functional/tools/galactic_cat.cwl b/test/functional/tools/galactic_cat.cwl new file mode 100644 index 000000000000..c577bd854221 --- /dev/null +++ b/test/functional/tools/galactic_cat.cwl @@ -0,0 +1,31 @@ +#!/usr/bin/env cwl-runner +$namespaces: + gx: "http://galaxyproject.org/cwl#" +cwlVersion: v1.0 +class: CommandLineTool +id: "galactic_cat" +gx:version: '1.2' +doc: | + Galactic Cat. +inputs: + - id: input1 + type: File + inputBinding: + position: 1 +outputs: + - id: output1 + type: File + outputBinding: + glob: out +baseCommand: ["cat"] +arguments: [] +stdout: out +hints: + gx:interface: + gx:inputs: + - gx:name: input1 + gx:type: data + gx:format: 'txt' + gx:outputs: + output1: + gx:format: 'txt' diff --git a/test/functional/tools/galactic_record_input.cwl b/test/functional/tools/galactic_record_input.cwl new file mode 100644 index 000000000000..f0b098540e9c --- /dev/null +++ b/test/functional/tools/galactic_record_input.cwl @@ -0,0 +1,57 @@ +#!/usr/bin/env cwl-runner +$namespaces: + gx: "http://galaxyproject.org/cwl#" +class: CommandLineTool +cwlVersion: v1.0 +requirements: + - class: ShellCommandRequirement +id: galactic_record_input +gx:version: '1.2' +inputs: + irec: + type: + name: irec + type: record + fields: + - name: ifoo + type: File + inputBinding: + position: 2 + - name: ibar + type: File + inputBinding: + position: 6 + +outputs: + - id: ofoo + type: File + outputBinding: + glob: foo + - id: obar + type: File + outputBinding: + glob: bar + +arguments: + - {valueFrom: "cat", position: 1} + - {valueFrom: "> foo", position: 3, shellQuote: false} + - {valueFrom: "&&", position: 4, shellQuote: false} + - {valueFrom: "cat", position: 5} + - {valueFrom: "> bar", position: 7, shellQuote: false} + +hints: + gx:interface: + gx:inputs: + - gx:name: input1 + gx:type: data + gx:format: 'txt' + gx:mapTo: 'irec/ifoo' + - gx:name: input2 + gx:type: data + gx:format: 'txt' + gx:mapTo: 'irec/ibar' + gx:outputs: + ofoo: + gx:format: 'txt' + obar: + gx:format: 'txt' diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml index 0918d7dc92b4..7f44db4820fc 100644 --- a/test/functional/tools/sample_tool_conf.xml +++ b/test/functional/tools/sample_tool_conf.xml @@ -249,6 +249,26 @@ + + + + + + + + + + + + + + + + + + + +