From 91660b9d5eff9320d2faaa36d49aff859f827bba Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 10 Dec 2024 18:54:07 +0100 Subject: [PATCH 1/2] Fix target_type in auto-conversion context which means we collect and store the converted dataset as the job input. We don't need to wait for the galaxy.json collection, we know the exact target type already. That fixes the retrieval in `get_converted_files_by_type`. I believe this was always the intention and is how it works if the dataset already exists (i.e. on a re-run), and for all converters that don't use galaxy.json The converter records the dataset that the user chose, so there's no gap in the provenance either. This somewhat addresses https://github.com/galaxyproject/total-perspective-vortex/issues/141 so you can (reliably) differetiate your rules on the input datatype and filesize combination. --- lib/galaxy/datatypes/data.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 05f5adbad6bf..f5d15fa406a5 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -847,7 +847,11 @@ def convert_dataset( job, converted_datasets, *_ = converter.execute( trans, incoming=params, set_output_hid=visible, history=history, flush_job=False ) + # We should only have a single converted output, but let's be defensive here + n_converted_datasets = len(converted_datasets) for converted_dataset in converted_datasets.values(): + if converted_dataset.extension == "auto" and n_converted_datasets == 1: + converted_dataset.extension = target_type original_dataset.attach_implicitly_converted_dataset(trans.sa_session, converted_dataset, target_type) trans.app.job_manager.enqueue(job, tool=converter) if len(params) > 0: From a75bca59b45777b6df4769791bab6badc2267bb6 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 11 Dec 2024 12:18:14 +0100 Subject: [PATCH 2/2] Add test --- lib/galaxy_test/api/test_tools.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 56fea13f8285..4547e7d13317 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -2511,6 +2511,24 @@ def test_multi_param_column_nested_list_fails_on_invalid_column(self): exception_raised = e assert exception_raised, "Expected invalid column selection to fail job" + @skip_without_tool("implicit_conversion_format_input") + def test_implicit_conversion_input_dataset_tracking(self): + with self.dataset_populator.test_history() as history_id: + compressed_path = self.test_data_resolver.get_filename("1.fastqsanger.gz") + with open(compressed_path, "rb") as fh: + dataset = self.dataset_populator.new_dataset( + history_id, content=fh, file_type="fastqsanger.gz", wait=True + ) + outputs = self._run( + "Grep1", history_id=history_id, inputs={"data": {"src": "hda", "id": dataset["id"]}}, assert_ok=True + ) + job_details = self.dataset_populator.get_job_details(outputs["jobs"][0]["id"], full=True).json() + assert job_details["inputs"]["input"]["id"] != dataset["id"] + converted_input = self.dataset_populator.get_history_dataset_details( + history_id=history_id, content_id=job_details["inputs"]["input"]["id"] + ) + assert converted_input["extension"] == "fastqsanger" + @skip_without_tool("column_multi_param") def test_implicit_conversion_and_reduce(self): with self.dataset_populator.test_history() as history_id: