From 694d667248557c5ffa41b5892861cadda62a0e37 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 26 Oct 2023 11:29:45 +0200
Subject: [PATCH 1/7] Add test coverage for sorting null size datasets

---
 test/integration/test_storage_cleaner.py | 42 +++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/test/integration/test_storage_cleaner.py b/test/integration/test_storage_cleaner.py
index c358536c3244..51485f0ede2e 100644
--- a/test/integration/test_storage_cleaner.py
+++ b/test/integration/test_storage_cleaner.py
@@ -6,7 +6,10 @@
 from uuid import uuid4
 
 from galaxy_test.base.decorators import requires_new_history
-from galaxy_test.base.populators import DatasetPopulator
+from galaxy_test.base.populators import (
+    DatasetPopulator,
+    skip_without_tool,
+)
 from galaxy_test.driver import integration_util
 
 
@@ -39,6 +42,43 @@ def test_discarded_datasets_monitoring_and_cleanup(self):
             "datasets", test_datasets, dataset_ids, delete_resource_uri=f"histories/{history_id}/contents"
         )
 
+    @requires_new_history
+    @skip_without_tool("cat_data_and_sleep")
+    def test_discarded_datasets_with_null_size_are_sorted_correctly(self):
+        history_id = self.dataset_populator.new_history(f"History for discarded datasets {uuid4()}")
+        test_datasets = [
+            StoredItemDataForTests(name=f"TestDataset01_{uuid4()}", size=10),
+            StoredItemDataForTests(name=f"TestDataset02_{uuid4()}", size=50),
+        ]
+        dataset_ids = self._create_datasets_in_history_with(history_id, test_datasets)
+
+        # Run a tool on the first dataset and delete the output before completing the job
+        # so it has a null size in the database
+        inputs = {
+            "input1": {"src": "hda", "id": dataset_ids[0]},
+            "sleep_time": 10,
+        }
+        run_response = self.dataset_populator.run_tool_raw(
+            "cat_data_and_sleep",
+            inputs,
+            history_id,
+        )
+        null_size_dataset = run_response.json()["outputs"][0]
+        self.dataset_populator.delete_dataset(history_id, null_size_dataset["id"], stop_job=True)
+        # delete the other datasets too
+        for dataset_id in dataset_ids:
+            self.dataset_populator.delete_dataset(history_id, dataset_id)
+
+        # Check the dataset size sorting is correct [0, 10, 50]
+        item_names_forward_order = [null_size_dataset["name"], test_datasets[0].name, test_datasets[1].name]
+        item_names_reverse_order = list(reversed(item_names_forward_order))
+        expected_order_by_map = {
+            "size-asc": item_names_forward_order,
+            "size-dsc": item_names_reverse_order,
+        }
+        for order_by, expected_ordered_names in expected_order_by_map.items():
+            self._assert_order_is_expected("storage/datasets/discarded", order_by, expected_ordered_names)
+
     @requires_new_history
     def test_archived_histories_monitoring_and_cleanup(self):
         test_histories = self._build_test_items(resource_name="History")

From 3950785c9cd5b707875dba10d43d7ff5df18728b Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 26 Oct 2023 11:56:37 +0200
Subject: [PATCH 2/7] Sort null size values accordingly

---
 lib/galaxy/managers/hdas.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py
index 17abe5063337..09354dd71ebc 100644
--- a/lib/galaxy/managers/hdas.py
+++ b/lib/galaxy/managers/hdas.py
@@ -21,6 +21,8 @@
     desc,
     false,
     func,
+    nulls_first,
+    nulls_last,
     select,
     true,
 )
@@ -347,8 +349,8 @@ def __init__(self, hda_manager: HDAManager, dataset_manager: datasets.DatasetMan
         self.sort_map = {
             StoredItemOrderBy.NAME_ASC: asc(model.HistoryDatasetAssociation.name),
             StoredItemOrderBy.NAME_DSC: desc(model.HistoryDatasetAssociation.name),
-            StoredItemOrderBy.SIZE_ASC: asc(model.Dataset.total_size),
-            StoredItemOrderBy.SIZE_DSC: desc(model.Dataset.total_size),
+            StoredItemOrderBy.SIZE_ASC: nulls_first(asc(model.Dataset.total_size)),
+            StoredItemOrderBy.SIZE_DSC: nulls_last(desc(model.Dataset.total_size)),
             StoredItemOrderBy.UPDATE_TIME_ASC: asc(model.HistoryDatasetAssociation.update_time),
             StoredItemOrderBy.UPDATE_TIME_DSC: desc(model.HistoryDatasetAssociation.update_time),
         }

From ffed1573ffb33fa54ca0263ed7cb18d8ad9d48b5 Mon Sep 17 00:00:00 2001
From: Dannon Baker <dannon.baker@gmail.com>
Date: Thu, 26 Oct 2023 09:38:17 -0400
Subject: [PATCH 3/7] When exporting a workflow, include owner's annotation if
 the exporting user doesn't have an overriding one.

---
 lib/galaxy/managers/workflows.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py
index 6106d0043517..8449c70f04d7 100644
--- a/lib/galaxy/managers/workflows.py
+++ b/lib/galaxy/managers/workflows.py
@@ -1337,7 +1337,12 @@ def _workflow_to_dict_export(self, trans, stored=None, workflow=None, internal=F
         tag_str = ""
         if stored is not None:
             if stored.id:
-                annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, stored) or ""
+                # if the active user doesn't have an annotation on the workflow, default to the owner's annotation.
+                annotation_str = (
+                    self.get_item_annotation_str(trans.sa_session, trans.user, stored)
+                    or self.get_item_annotation_str(trans.sa_session, stored.user, stored)
+                    or ""
+                )
                 tag_str = stored.make_tag_string_list()
             else:
                 # dry run with flushed workflow objects, just use the annotation

From e48e490b3a9e6659b9a5a23973a44597dd4a788c Mon Sep 17 00:00:00 2001
From: Dannon Baker <dannon.baker@gmail.com>
Date: Thu, 26 Oct 2023 14:36:59 -0400
Subject: [PATCH 4/7] Also get owner annotations for individual steps, unless
 trans.user has one.  It's still unclear how one would, so I'm being cautious
 here.

---
 lib/galaxy/managers/workflows.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/galaxy/managers/workflows.py b/lib/galaxy/managers/workflows.py
index 8449c70f04d7..58bc7e52a2b6 100644
--- a/lib/galaxy/managers/workflows.py
+++ b/lib/galaxy/managers/workflows.py
@@ -1335,12 +1335,14 @@ def _workflow_to_dict_export(self, trans, stored=None, workflow=None, internal=F
         """
         annotation_str = ""
         tag_str = ""
+        annotation_owner = None
         if stored is not None:
             if stored.id:
                 # if the active user doesn't have an annotation on the workflow, default to the owner's annotation.
+                annotation_owner = stored.user
                 annotation_str = (
                     self.get_item_annotation_str(trans.sa_session, trans.user, stored)
-                    or self.get_item_annotation_str(trans.sa_session, stored.user, stored)
+                    or self.get_item_annotation_str(trans.sa_session, annotation_owner, stored)
                     or ""
                 )
                 tag_str = stored.make_tag_string_list()
@@ -1375,8 +1377,10 @@ def _workflow_to_dict_export(self, trans, stored=None, workflow=None, internal=F
             module = module_factory.from_workflow_step(trans, step)
             if not module:
                 raise exceptions.MessageException(f"Unrecognized step type: {step.type}")
-            # Get user annotation.
+            # Get user annotation if it exists, otherwise get owner annotation.
             annotation_str = self.get_item_annotation_str(trans.sa_session, trans.user, step) or ""
+            if not annotation_str and annotation_owner:
+                annotation_str = self.get_item_annotation_str(trans.sa_session, annotation_owner, step) or ""
             content_id = module.get_content_id() if allow_upgrade else step.content_id
             # Export differences for backward compatibility
             tool_state = module.get_export_state()

From 6372253e88478d246104f924faced23cec1b803e Mon Sep 17 00:00:00 2001
From: Dannon Baker <dannon.baker@gmail.com>
Date: Thu, 26 Oct 2023 15:04:17 -0400
Subject: [PATCH 5/7] Add execute back to published workflows list

---
 client/src/components/Workflow/WorkflowList.vue | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/client/src/components/Workflow/WorkflowList.vue b/client/src/components/Workflow/WorkflowList.vue
index e702bdf3ef40..473184f71d40 100644
--- a/client/src/components/Workflow/WorkflowList.vue
+++ b/client/src/components/Workflow/WorkflowList.vue
@@ -160,7 +160,7 @@ const EXECUTE_FIELD = { key: "execute", label: "Run", thStyle: { width: "10%" }
 const OWNER_FIELD = { key: "owner", label: _l("Owner"), sortable: false, thStyle: { width: "15%" } };
 
 const PERSONAL_FIELDS = [NAME_FIELD, TAGS_FIELD, UPDATED_FIELD, SHARING_FIELD, BOOKMARKED_FIELD, EXECUTE_FIELD];
-const PUBLISHED_FIELDS = [NAME_FIELD, TAGS_FIELD, UPDATED_FIELD, OWNER_FIELD];
+const PUBLISHED_FIELDS = [NAME_FIELD, TAGS_FIELD, UPDATED_FIELD, OWNER_FIELD, EXECUTE_FIELD];
 
 export default {
     components: {

From 69ca24f67d4b4e6289398df01b1fdc12209f0dc7 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Fri, 27 Oct 2023 10:02:39 +0200
Subject: [PATCH 6/7] Skip state filering in __MERGE_COLLECTION__ tool

Fixes inputs getting skipped that are NEW, QUEUED, etc.

If you're after filtering out errored you should use the
`__FILTER_FAILED_DATASETS__` tool.
---
 lib/galaxy/tools/__init__.py          | 51 ++++++++++-----------------
 lib/galaxy_test/api/test_workflows.py | 41 +++++++++++++++++++++
 2 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index 5a92fd960e4b..98abc079e416 100644
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -3376,40 +3376,27 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
         for copy, input_list in enumerate(input_lists):
             for dce in input_list.collection.elements:
                 element = dce.element_object
-                valid = False
-
-                # dealing with a single element
-                if hasattr(element, "is_ok"):
-                    if element.is_ok:
-                        valid = True
-                elif hasattr(element, "dataset_instances"):
-                    # we are probably a list:paired dataset, both need to be in non error state
-                    forward_o, reverse_o = element.dataset_instances
-                    if forward_o.is_ok and reverse_o.is_ok:
-                        valid = True
+                element_identifier = dce.element_identifier
+                identifier_seen = element_identifier in new_element_structure
+                appearances = identifiers_map[element_identifier]
+                add_suffix = False
+                if dupl_actions == "suffix_every":
+                    add_suffix = True
+                elif dupl_actions == "suffix_conflict" and len(appearances) > 1:
+                    add_suffix = True
+                elif dupl_actions == "suffix_conflict_rest" and len(appearances) > 1 and appearances[0] != copy:
+                    add_suffix = True
+
+                if dupl_actions == "keep_first" and identifier_seen:
+                    continue
 
-                if valid:
-                    element_identifier = dce.element_identifier
-                    identifier_seen = element_identifier in new_element_structure
-                    appearances = identifiers_map[element_identifier]
-                    add_suffix = False
-                    if dupl_actions == "suffix_every":
-                        add_suffix = True
-                    elif dupl_actions == "suffix_conflict" and len(appearances) > 1:
-                        add_suffix = True
-                    elif dupl_actions == "suffix_conflict_rest" and len(appearances) > 1 and appearances[0] != copy:
-                        add_suffix = True
-
-                    if dupl_actions == "keep_first" and identifier_seen:
-                        continue
-
-                    if add_suffix and suffix_pattern:
-                        suffix = suffix_pattern.replace("#", str(copy + 1))
-                        effective_identifer = f"{element_identifier}{suffix}"
-                    else:
-                        effective_identifer = element_identifier
+                if add_suffix and suffix_pattern:
+                    suffix = suffix_pattern.replace("#", str(copy + 1))
+                    effective_identifer = f"{element_identifier}{suffix}"
+                else:
+                    effective_identifer = element_identifier
 
-                    new_element_structure[effective_identifer] = element
+                new_element_structure[effective_identifer] = element
 
         # Don't copy until we know everything is fine and we have the structure of the list ready to go.
         new_elements = {}
diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py
index 393546d95ec6..6de9b4f7d79b 100644
--- a/lib/galaxy_test/api/test_workflows.py
+++ b/lib/galaxy_test/api/test_workflows.py
@@ -2976,6 +2976,47 @@ def test_export_invocation_ro_crate(self):
             workflow = crate.mainEntity
             assert workflow
 
+    @skip_without_tool("__MERGE_COLLECTION__")
+    def test_merge_collection_scheduling(self, history_id):
+        summary = self._run_workflow(
+            """
+class: GalaxyWorkflow
+inputs:
+  collection:
+    type: collection
+    collection_type: list
+outputs:
+  merge_out:
+    outputSource: merge/output
+steps:
+  sleep:
+    tool_id: cat_data_and_sleep
+    in:
+      input1: collection
+    state:
+      sleep_time: 5
+  merge:
+    tool_id: __MERGE_COLLECTION__
+    in:
+      inputs_1|input: sleep/out_file1
+      inputs_0|input: sleep/out_file1
+test_data:
+  collection:
+    collection_type: list
+    elements:
+      - identifier: 1
+        content: A
+""",
+            history_id=history_id,
+            wait=True,
+            assert_ok=True,
+        )
+        invocation = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True)
+        merge_out_id = invocation["output_collections"]["merge_out"]["id"]
+        merge_out = self.dataset_populator.get_history_collection_details(history_id, content_id=merge_out_id)
+        assert merge_out["element_count"] == 1
+        assert merge_out["elements"][0]["object"]["state"] == "ok"
+
     @skip_without_tool("__MERGE_COLLECTION__")
     @skip_without_tool("cat_collection")
     @skip_without_tool("head")

From 21fb9be7fd327769925e5078335cca982efc6690 Mon Sep 17 00:00:00 2001
From: mvdbeek <m.vandenbeek@gmail.com>
Date: Fri, 27 Oct 2023 10:56:05 +0200
Subject: [PATCH 7/7] Add regression test for #16931

---
 lib/galaxy_test/api/test_workflows.py | 33 +++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py
index 6de9b4f7d79b..8dba0706a3fe 100644
--- a/lib/galaxy_test/api/test_workflows.py
+++ b/lib/galaxy_test/api/test_workflows.py
@@ -5354,6 +5354,39 @@ def test_run_build_list_change_datatype_collection_output(self):
             # Also check that we don't overwrite the original HDA's datatype
             assert details2["elements"][0]["object"]["file_ext"] == "fasta"
 
+    @skip_without_tool("__EXTRACT_DATASET__")
+    def test_run_build_list_change_datatype_new_metadata_file_parameter(self):
+        # Regression test for changing datatype to a datatype with a MetadataFileParameter
+        with self.dataset_populator.test_history() as history_id:
+            self._run_workflow(
+                """
+class: GalaxyWorkflow
+inputs:
+  input1: data
+steps:
+  build_list:
+    tool_id: __BUILD_LIST__
+    in:
+      datasets_0|input: input1
+  extract_dataset:
+    tool_id: __EXTRACT_DATASET__
+    in:
+      input: build_list/output
+    outputs:
+      output:
+        change_datatype: vcf_bgzip
+""",
+                test_data="""
+input1:
+  value: test.vcf.gz
+  type: File
+  file_type: vcf_bgzip
+""",
+                history_id=history_id,
+                assert_ok=True,
+                wait=True,
+            )
+
     @skip_without_tool("__BUILD_LIST__")
     def test_run_build_list_rename_collection_output(self):
         with self.dataset_populator.test_history() as history_id: