From 694d667248557c5ffa41b5892861cadda62a0e37 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:29:45 +0200 Subject: [PATCH 1/7] Add test coverage for sorting null size datasets --- test/integration/test_storage_cleaner.py | 42 +++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/test/integration/test_storage_cleaner.py b/test/integration/test_storage_cleaner.py index c358536c3244..51485f0ede2e 100644 --- a/test/integration/test_storage_cleaner.py +++ b/test/integration/test_storage_cleaner.py @@ -6,7 +6,10 @@ from uuid import uuid4 from galaxy_test.base.decorators import requires_new_history -from galaxy_test.base.populators import DatasetPopulator +from galaxy_test.base.populators import ( + DatasetPopulator, + skip_without_tool, +) from galaxy_test.driver import integration_util @@ -39,6 +42,43 @@ def test_discarded_datasets_monitoring_and_cleanup(self): "datasets", test_datasets, dataset_ids, delete_resource_uri=f"histories/{history_id}/contents" ) + @requires_new_history + @skip_without_tool("cat_data_and_sleep") + def test_discarded_datasets_with_null_size_are_sorted_correctly(self): + history_id = self.dataset_populator.new_history(f"History for discarded datasets {uuid4()}") + test_datasets = [ + StoredItemDataForTests(name=f"TestDataset01_{uuid4()}", size=10), + StoredItemDataForTests(name=f"TestDataset02_{uuid4()}", size=50), + ] + dataset_ids = self._create_datasets_in_history_with(history_id, test_datasets) + + # Run a tool on the first dataset and delete the output before completing the job + # so it has a null size in the database + inputs = { + "input1": {"src": "hda", "id": dataset_ids[0]}, + "sleep_time": 10, + } + run_response = self.dataset_populator.run_tool_raw( + "cat_data_and_sleep", + inputs, + history_id, + ) + null_size_dataset = run_response.json()["outputs"][0] + self.dataset_populator.delete_dataset(history_id, null_size_dataset["id"], stop_job=True) + # delete the other datasets too + for dataset_id in dataset_ids: + self.dataset_populator.delete_dataset(history_id, dataset_id) + + # Check the dataset size sorting is correct [0, 10, 50] + item_names_forward_order = [null_size_dataset["name"], test_datasets[0].name, test_datasets[1].name] + item_names_reverse_order = list(reversed(item_names_forward_order)) + expected_order_by_map = { + "size-asc": item_names_forward_order, + "size-dsc": item_names_reverse_order, + } + for order_by, expected_ordered_names in expected_order_by_map.items(): + self._assert_order_is_expected("storage/datasets/discarded", order_by, expected_ordered_names) + @requires_new_history def test_archived_histories_monitoring_and_cleanup(self): test_histories = self._build_test_items(resource_name="History") From 3950785c9cd5b707875dba10d43d7ff5df18728b Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:56:37 +0200 Subject: [PATCH 2/7] Sort null size values accordingly --- lib/galaxy/managers/hdas.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 17abe5063337..09354dd71ebc 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -21,6 +21,8 @@ desc, false, func, + nulls_first, + nulls_last, select, true, ) @@ -347,8 +349,8 @@ def __init__(self, hda_manager: HDAManager, dataset_manager: datasets.DatasetMan self.sort_map = { StoredItemOrderBy.NAME_ASC: asc(model.HistoryDatasetAssociation.name), StoredItemOrderBy.NAME_DSC: desc(model.HistoryDatasetAssociation.name), - StoredItemOrderBy.SIZE_ASC: asc(model.Dataset.total_size), - StoredItemOrderBy.SIZE_DSC: desc(model.Dataset.total_size), + StoredItemOrderBy.SIZE_ASC: nulls_first(asc(model.Dataset.total_size)), + StoredItemOrderBy.SIZE_DSC: nulls_last(desc(model.Dataset.total_size)), StoredItemOrderBy.UPDATE_TIME_ASC: asc(model.HistoryDatasetAssociation.update_time), StoredItemOrderBy.UPDATE_TIME_DSC: desc(model.HistoryDatasetAssociation.update_time), } From ffed1573ffb33fa54ca0263ed7cb18d8ad9d48b5 Mon Sep 17 00:00:00 2001 From: Dannon Baker Date: Thu, 26 Oct 2023 09:38:17 -0400 Subject: [PATCH 3/7] When exporting a workflow, include owner's annotation if the exporting user doesn't have an overriding one. --- lib/galaxy/managers/workflows.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py index 6106d0043517..8449c70f04d7 100644 --- a/lib/galaxy/managers/workflows.py +++ b/lib/galaxy/managers/workflows.py @@ -1337,7 +1337,12 @@ def _workflow_to_dict_export(self, trans, stored=None, workflow=None, internal=F tag_str = "" if stored is not None: if stored.id: - annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, stored) or "" + # if the active user doesn't have an annotation on the workflow, default to the owner's annotation. + annotation_str = ( + self.get_item_annotation_str(trans.sa_session, trans.user, stored) + or self.get_item_annotation_str(trans.sa_session, stored.user, stored) + or "" + ) tag_str = stored.make_tag_string_list() else: # dry run with flushed workflow objects, just use the annotation From e48e490b3a9e6659b9a5a23973a44597dd4a788c Mon Sep 17 00:00:00 2001 From: Dannon Baker Date: Thu, 26 Oct 2023 14:36:59 -0400 Subject: [PATCH 4/7] Also get owner annotations for individual steps, unless trans.user has one. It's still unclear how one would, so I'm being cautious here. --- lib/galaxy/managers/workflows.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py index 8449c70f04d7..58bc7e52a2b6 100644 --- a/lib/galaxy/managers/workflows.py +++ b/lib/galaxy/managers/workflows.py @@ -1335,12 +1335,14 @@ def _workflow_to_dict_export(self, trans, stored=None, workflow=None, internal=F """ annotation_str = "" tag_str = "" + annotation_owner = None if stored is not None: if stored.id: # if the active user doesn't have an annotation on the workflow, default to the owner's annotation. + annotation_owner = stored.user annotation_str = ( self.get_item_annotation_str(trans.sa_session, trans.user, stored) - or self.get_item_annotation_str(trans.sa_session, stored.user, stored) + or self.get_item_annotation_str(trans.sa_session, annotation_owner, stored) or "" ) tag_str = stored.make_tag_string_list() @@ -1375,8 +1377,10 @@ def _workflow_to_dict_export(self, trans, stored=None, workflow=None, internal=F module = module_factory.from_workflow_step(trans, step) if not module: raise exceptions.MessageException(f"Unrecognized step type: {step.type}") - # Get user annotation. + # Get user annotation if it exists, otherwise get owner annotation. annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, step) or "" + if not annotation_str and annotation_owner: + annotation_str = self.get_item_annotation_str(trans.sa_session, annotation_owner, step) or "" content_id = module.get_content_id() if allow_upgrade else step.content_id # Export differences for backward compatibility tool_state = module.get_export_state() From 6372253e88478d246104f924faced23cec1b803e Mon Sep 17 00:00:00 2001 From: Dannon Baker Date: Thu, 26 Oct 2023 15:04:17 -0400 Subject: [PATCH 5/7] Add execute back to published workflows list --- client/src/components/Workflow/WorkflowList.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/components/Workflow/WorkflowList.vue b/client/src/components/Workflow/WorkflowList.vue index e702bdf3ef40..473184f71d40 100644 --- a/client/src/components/Workflow/WorkflowList.vue +++ b/client/src/components/Workflow/WorkflowList.vue @@ -160,7 +160,7 @@ const EXECUTE_FIELD = { key: "execute", label: "Run", thStyle: { width: "10%" } const OWNER_FIELD = { key: "owner", label: _l("Owner"), sortable: false, thStyle: { width: "15%" } }; const PERSONAL_FIELDS = [NAME_FIELD, TAGS_FIELD, UPDATED_FIELD, SHARING_FIELD, BOOKMARKED_FIELD, EXECUTE_FIELD]; -const PUBLISHED_FIELDS = [NAME_FIELD, TAGS_FIELD, UPDATED_FIELD, OWNER_FIELD]; +const PUBLISHED_FIELDS = [NAME_FIELD, TAGS_FIELD, UPDATED_FIELD, OWNER_FIELD, EXECUTE_FIELD]; export default { components: { From 69ca24f67d4b4e6289398df01b1fdc12209f0dc7 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Fri, 27 Oct 2023 10:02:39 +0200 Subject: [PATCH 6/7] Skip state filering in __MERGE_COLLECTION__ tool Fixes inputs getting skipped that are NEW, QUEUED, etc. If you're after filtering out errored you should use the `__FILTER_FAILED_DATASETS__` tool. --- lib/galaxy/tools/__init__.py | 51 ++++++++++----------------- lib/galaxy_test/api/test_workflows.py | 41 +++++++++++++++++++++ 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 5a92fd960e4b..98abc079e416 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3376,40 +3376,27 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history for copy, input_list in enumerate(input_lists): for dce in input_list.collection.elements: element = dce.element_object - valid = False - - # dealing with a single element - if hasattr(element, "is_ok"): - if element.is_ok: - valid = True - elif hasattr(element, "dataset_instances"): - # we are probably a list:paired dataset, both need to be in non error state - forward_o, reverse_o = element.dataset_instances - if forward_o.is_ok and reverse_o.is_ok: - valid = True + element_identifier = dce.element_identifier + identifier_seen = element_identifier in new_element_structure + appearances = identifiers_map[element_identifier] + add_suffix = False + if dupl_actions == "suffix_every": + add_suffix = True + elif dupl_actions == "suffix_conflict" and len(appearances) > 1: + add_suffix = True + elif dupl_actions == "suffix_conflict_rest" and len(appearances) > 1 and appearances[0] != copy: + add_suffix = True + + if dupl_actions == "keep_first" and identifier_seen: + continue - if valid: - element_identifier = dce.element_identifier - identifier_seen = element_identifier in new_element_structure - appearances = identifiers_map[element_identifier] - add_suffix = False - if dupl_actions == "suffix_every": - add_suffix = True - elif dupl_actions == "suffix_conflict" and len(appearances) > 1: - add_suffix = True - elif dupl_actions == "suffix_conflict_rest" and len(appearances) > 1 and appearances[0] != copy: - add_suffix = True - - if dupl_actions == "keep_first" and identifier_seen: - continue - - if add_suffix and suffix_pattern: - suffix = suffix_pattern.replace("#", str(copy + 1)) - effective_identifer = f"{element_identifier}{suffix}" - else: - effective_identifer = element_identifier + if add_suffix and suffix_pattern: + suffix = suffix_pattern.replace("#", str(copy + 1)) + effective_identifer = f"{element_identifier}{suffix}" + else: + effective_identifer = element_identifier - new_element_structure[effective_identifer] = element + new_element_structure[effective_identifer] = element # Don't copy until we know everything is fine and we have the structure of the list ready to go. new_elements = {} diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 393546d95ec6..6de9b4f7d79b 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -2976,6 +2976,47 @@ def test_export_invocation_ro_crate(self): workflow = crate.mainEntity assert workflow + @skip_without_tool("__MERGE_COLLECTION__") + def test_merge_collection_scheduling(self, history_id): + summary = self._run_workflow( + """ +class: GalaxyWorkflow +inputs: + collection: + type: collection + collection_type: list +outputs: + merge_out: + outputSource: merge/output +steps: + sleep: + tool_id: cat_data_and_sleep + in: + input1: collection + state: + sleep_time: 5 + merge: + tool_id: __MERGE_COLLECTION__ + in: + inputs_1|input: sleep/out_file1 + inputs_0|input: sleep/out_file1 +test_data: + collection: + collection_type: list + elements: + - identifier: 1 + content: A +""", + history_id=history_id, + wait=True, + assert_ok=True, + ) + invocation = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True) + merge_out_id = invocation["output_collections"]["merge_out"]["id"] + merge_out = self.dataset_populator.get_history_collection_details(history_id, content_id=merge_out_id) + assert merge_out["element_count"] == 1 + assert merge_out["elements"][0]["object"]["state"] == "ok" + @skip_without_tool("__MERGE_COLLECTION__") @skip_without_tool("cat_collection") @skip_without_tool("head") From 21fb9be7fd327769925e5078335cca982efc6690 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Fri, 27 Oct 2023 10:56:05 +0200 Subject: [PATCH 7/7] Add regression test for #16931 --- lib/galaxy_test/api/test_workflows.py | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 6de9b4f7d79b..8dba0706a3fe 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -5354,6 +5354,39 @@ def test_run_build_list_change_datatype_collection_output(self): # Also check that we don't overwrite the original HDA's datatype assert details2["elements"][0]["object"]["file_ext"] == "fasta" + @skip_without_tool("__EXTRACT_DATASET__") + def test_run_build_list_change_datatype_new_metadata_file_parameter(self): + # Regression test for changing datatype to a datatype with a MetadataFileParameter + with self.dataset_populator.test_history() as history_id: + self._run_workflow( + """ +class: GalaxyWorkflow +inputs: + input1: data +steps: + build_list: + tool_id: __BUILD_LIST__ + in: + datasets_0|input: input1 + extract_dataset: + tool_id: __EXTRACT_DATASET__ + in: + input: build_list/output + outputs: + output: + change_datatype: vcf_bgzip +""", + test_data=""" +input1: + value: test.vcf.gz + type: File + file_type: vcf_bgzip +""", + history_id=history_id, + assert_ok=True, + wait=True, + ) + @skip_without_tool("__BUILD_LIST__") def test_run_build_list_rename_collection_output(self): with self.dataset_populator.test_history() as history_id: