Merge pull request #18798 from mvdbeek/fix_wrong_extension_on_pick_da…

…ta_output [24.1] Fix wrong extension on pick data output
galaxyproject · Sep 11, 2024 · a6d8851 · a6d8851
2 parents a5da8ac + ee63214
commit a6d8851
Show file tree

Hide file tree

Showing 9 changed files with 87 additions and 10 deletions.
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -5704,6 +5704,11 @@ export interface components {
              * @description TODO
              */
             api_type?: "file" | null;
+            /**
+             * Copied From History Dataset Association Id
+             * @description ID of HDA this HDA was copied from.
+             */
+            copied_from_history_dataset_association_id?: string | null;
             /** Copied From Ldda Id */
             copied_from_ldda_id?: string | null;
             /**
@@ -5948,6 +5953,11 @@ export interface components {
              * @enum {string}
              */
             api_type?: "file";
+            /**
+             * Copied From History Dataset Association Id
+             * @description ID of HDA this HDA was copied from.
+             */
+            copied_from_history_dataset_association_id?: string | null;
             /** Copied From Ldda Id */
             copied_from_ldda_id?: string | null;
             /**

diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py
@@ -648,7 +648,7 @@ def add_serializers(self):
             "file_size": lambda item, key, **context: self.serializers["size"](item, key, **context),
             "nice_size": lambda item, key, **context: item.get_size(nice_size=True, calculate_size=False),
             # common to lddas and hdas - from mapping.py
-            "copied_from_history_dataset_association_id": self.serialize_id,
+            "copied_from_history_dataset_association_id": lambda item, key, **context: item.id,
             "copied_from_library_dataset_dataset_association_id": self.serialize_id,
             "info": lambda item, key, **context: item.info.strip() if isinstance(item.info, str) else item.info,
             "blurb": lambda item, key, **context: item.blurb,

diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py
@@ -5273,6 +5273,7 @@ def copy_from(self, other_hda, new_dataset=None, include_tags=True, include_meta
         if include_tags and self.history:
             self.copy_tags_from(self.user, other_hda)
         self.dataset = new_dataset or other_hda.dataset
+        self.copied_from_history_dataset_association_id = other_hda.id
         if old_dataset:
             old_dataset.full_delete()
 

diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py
@@ -902,6 +902,9 @@ class HDADetailed(HDASummary, WithModelClass):
             description="The list of sources associated with this dataset.",
         ),
     ]
+    copied_from_history_dataset_association_id: Annotated[
+        Optional[EncodedDatabaseIdField], Field(None, description="ID of HDA this HDA was copied from.")
+    ]
 
 
 class HDAExtended(HDADetailed):

diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
@@ -2880,7 +2880,9 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
                 # if change_datatype PJA is associated with expression tool output the new output already has
                 # the desired datatype, so we use it. If the extension is "data" there's no change_dataset PJA and
                 # we want to use the existing extension.
-                new_ext = output.extension if output.extension != "data" else copy_object.extension
+                new_ext = (
+                    output.extension if output.extension not in ("data", "expression.json") else copy_object.extension
+                )
                 require_metadata_regeneration = copy_object.extension != new_ext
                 output.copy_from(copy_object, include_metadata=not require_metadata_regeneration)
                 output.extension = new_ext
@@ -2895,8 +2897,12 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
                     else:
                         # TODO: move exec_after_process into metadata script so this doesn't run on the headnode ?
                         output.init_meta()
-                        output.set_meta()
-                        output.set_metadata_success_state()
+                        try:
+                            output.set_meta()
+                            output.set_metadata_success_state()
+                        except Exception:
+                            output.state = model.HistoryDatasetAssociation.states.FAILED_METADATA
+                            log.exception("Exception occured while setting metdata")
 
     def parse_environment_variables(self, tool_source):
         """Setup environment variable for inputs file."""

diff --git a/lib/galaxy/webapps/galaxy/services/datasets.py b/lib/galaxy/webapps/galaxy/services/datasets.py
@@ -403,7 +403,11 @@ def show(
             # Default: return dataset as dict.
             if hda_ldda == DatasetSourceType.hda:
                 return self.hda_serializer.serialize_to_view(
-                    dataset, view=serialization_params.view or "detailed", user=trans.user, trans=trans
+                    dataset,
+                    view=serialization_params.view or "detailed",
+                    keys=serialization_params.keys,
+                    user=trans.user,
+                    trans=trans,
                 )
             else:
                 dataset_dict = dataset.to_dict()

diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
@@ -1009,14 +1009,17 @@ def get_history_dataset_source_transform_actions(self, history_id: str, **kwd) -
         assert isinstance(transform, list)
         return {t["action"] for t in transform}
 
-    def get_history_dataset_details(self, history_id: str, **kwds) -> Dict[str, Any]:
+    def get_history_dataset_details(self, history_id: str, keys: Optional[str] = None, **kwds) -> Dict[str, Any]:
         dataset_id = self.__history_content_id(history_id, **kwds)
-        details_response = self.get_history_dataset_details_raw(history_id, dataset_id)
+        details_response = self.get_history_dataset_details_raw(history_id, dataset_id, keys=keys)
         details_response.raise_for_status()
         return details_response.json()
 
-    def get_history_dataset_details_raw(self, history_id: str, dataset_id: str) -> Response:
-        details_response = self._get_contents_request(history_id, f"/datasets/{dataset_id}")
+    def get_history_dataset_details_raw(self, history_id: str, dataset_id: str, keys: Optional[str] = None) -> Response:
+        data = None
+        if keys:
+            data = {"keys": keys}
+        details_response = self._get_contents_request(history_id, f"/datasets/{dataset_id}", data=data)
         return details_response
 
     def get_history_dataset_extra_files(self, history_id: str, **kwds) -> list:

diff --git a/test/integration/test_workflow_invocation.py b/test/integration/test_workflow_invocation.py
@@ -69,6 +69,56 @@ def test_run_workflow_optional_data_skips_step(self) -> None:
                 if step["workflow_step_label"] == "cat1":
                     assert sum(1 for j in step["jobs"] if j["state"] == "skipped") == 1
 
+    def test_pick_value_preserves_datatype_and_inheritance_chain(self):
+        self.install_repository("iuc", "pick_value", "b19e21af9c52")
+        with self.dataset_populator.test_history() as history_id:
+            summary = self.workflow_populator.run_workflow(
+                """
+class: GalaxyWorkflow
+inputs:
+  input_dataset: data
+outputs:
+  the_output:
+    outputSource: pick_value/data_param
+steps:
+  skip_step:
+    tool_id: job_properties
+    when: $(false)
+  pick_value:
+    tool_id: toolshed.g2.bx.psu.edu/repos/iuc/pick_value/pick_value/0.2.0
+    in:
+      style_cond|type_cond|pick_from_0|value:
+        source: skip_step/out_file1
+      style_cond|type_cond|pick_from_1|value:
+        source: input_dataset
+    tool_state:
+      style_cond:
+        pick_style: first
+        type_cond:
+          param_type: data
+          pick_from:
+          - value:
+            __class__: RuntimeValue
+          - value:
+            __class__: RuntimeValue
+test_data:
+  input_dataset:
+      value: 1.txt
+      type: File
+""",
+                history_id=history_id,
+            )
+            invocation = self.workflow_populator.get_invocation(summary.invocation_id)
+            output = self.dataset_populator.get_history_dataset_details(
+                history_id,
+                content_id=invocation["outputs"]["the_output"]["id"],
+                # copied_from_history_dataset_association_id is not in any of the default serializers
+                keys="state,extension,copied_from_history_dataset_association_id",
+            )
+            assert output["state"] == "ok"
+            assert output["extension"] == "txt"
+            assert output["copied_from_history_dataset_association_id"]
+
     def test_run_workflow_optional_data_provided_runs_step(self) -> None:
         self.install_repository("iuc", "map_param_value", "5ac8a4bf7a8d")
         with self.dataset_populator.test_history() as history_id:

diff --git a/test/unit/app/managers/test_HDAManager.py b/test/unit/app/managers/test_HDAManager.py
@@ -412,7 +412,7 @@ def test_serializers(self):
         assert isinstance(serialized["file_size"], int)
         assert isinstance(serialized["nice_size"], str)
         # TODO: these should be tested w/copy
-        self.assertNullableEncodedId(serialized["copied_from_history_dataset_association_id"])
+        assert isinstance(serialized["copied_from_history_dataset_association_id"], int)
         self.assertNullableEncodedId(serialized["copied_from_library_dataset_dataset_association_id"])
         self.assertNullableBasestring(serialized["info"])
         self.assertNullableBasestring(serialized["blurb"])