From c62b2d9d20c6d3cad5b417da1ce9acc23da4f5a8 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 15 Oct 2023 13:00:42 +0200 Subject: [PATCH 1/6] Raise exception if persisting HDA will null file_size --- lib/galaxy/model/__init__.py | 1 + lib/galaxy/model/base.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 6652ff72814d..cb3601e279fd 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -3870,6 +3870,7 @@ class Dataset(Base, StorableObject, Serializable): non_ready_states = (states.NEW, states.UPLOAD, states.QUEUED, states.RUNNING, states.SETTING_METADATA) ready_states = tuple(set(states.__members__.values()) - set(non_ready_states)) valid_input_states = tuple(set(states.__members__.values()) - {states.ERROR, states.DISCARDED}) + no_data_states = (states.PAUSED, states.DEFERRED, states.DISCARDED, *non_ready_states) terminal_states = ( states.OK, states.EMPTY, diff --git a/lib/galaxy/model/base.py b/lib/galaxy/model/base.py index d5326c785b20..bcd76da2c6a8 100644 --- a/lib/galaxy/model/base.py +++ b/lib/galaxy/model/base.py @@ -152,9 +152,13 @@ def versioned_objects_strict(iter): # These should get some other type of permanent storage, perhaps UserDatasetAssociation ? # Everything else needs to have a hid and a history if not obj.history and not obj.history_id: - raise Exception(f"HistoryDatsetAssociation {obj} without history detected, this is not valid") + raise Exception(f"HistoryDatasetAssociation {obj} without history detected, this is not valid") elif not obj.hid: - raise Exception(f"HistoryDatsetAssociation {obj} without has no hid, this is not valid") + raise Exception(f"HistoryDatasetAssociation {obj} without hid, this is not valid") + elif obj.dataset.file_size is None and obj.dataset.state not in obj.dataset.no_data_states: + raise Exception( + f"HistoryDatasetAssociation {obj} in state {obj.dataset.state} with null file size, this is not valid" + ) yield obj From 7a06271971eeee7a5ee932fc1068103a6f258130 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 15 Oct 2023 14:37:38 +0200 Subject: [PATCH 2/6] Fix and prevent persisting null file_size --- lib/galaxy/metadata/set_metadata.py | 1 + lib/galaxy/model/store/discover.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py index ff9380ece0b4..b512f3f905b5 100644 --- a/lib/galaxy/metadata/set_metadata.py +++ b/lib/galaxy/metadata/set_metadata.py @@ -449,6 +449,7 @@ def set_meta(new_dataset_instance, file_dict): partial(push_if_necessary, object_store, dataset, external_filename) ) object_store_update_actions.append(partial(reset_external_filename, dataset)) + object_store_update_actions.append(partial(dataset.set_total_size)) object_store_update_actions.append(partial(export_store.add_dataset, dataset)) if dataset_instance_id not in unnamed_id_to_path: object_store_update_actions.append(partial(collect_extra_files, object_store, dataset, ".")) diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index 832357acb45b..a213327f9618 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -265,6 +265,8 @@ def set_datasets_metadata(datasets, datasets_attributes=None): except Exception: log.exception("Exception occured while setting dataset peek") + primary_data.set_total_size() + def populate_collection_elements( self, collection, From 1003351485f3f531b0849bccc9b05f883ace148d Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 15 Oct 2023 16:50:27 +0200 Subject: [PATCH 3/6] Skip unnecessary flush when applying permissions --- lib/galaxy/job_execution/output_collect.py | 2 +- lib/galaxy/model/security.py | 4 ++-- lib/galaxy/model/store/discover.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index b9e254150271..3b15f7feace6 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -91,7 +91,7 @@ def set_default_hda_permissions(self, primary_data): self._security_agent.set_all_dataset_permissions(primary_data.dataset, permissions, new=True, flush=False) def copy_dataset_permissions(self, init_from, primary_data): - self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset) + self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset, flush=False) class MetadataSourceProvider(AbstractMetadataSourceProvider): diff --git a/lib/galaxy/model/security.py b/lib/galaxy/model/security.py index 84c3eb5fb976..bb07ea071efe 100644 --- a/lib/galaxy/model/security.py +++ b/lib/galaxy/model/security.py @@ -998,12 +998,12 @@ def get_permissions(self, item): permissions[action] = [item_permission.role] return permissions - def copy_dataset_permissions(self, src, dst): + def copy_dataset_permissions(self, src, dst, flush=True): if not isinstance(src, self.model.Dataset): src = src.dataset if not isinstance(dst, self.model.Dataset): dst = dst.dataset - self.set_all_dataset_permissions(dst, self.get_permissions(src)) + self.set_all_dataset_permissions(dst, self.get_permissions(src), flush=flush) def privately_share_dataset(self, dataset, users=None): dataset.ensure_shareable() diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index a213327f9618..634ce6048eec 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -125,7 +125,7 @@ def create_dataset( if init_from: self.permission_provider.copy_dataset_permissions(init_from, primary_data) - primary_data.state = init_from.state + primary_data.raw_set_dataset_state(init_from.state) else: self.permission_provider.set_default_hda_permissions(primary_data) else: From 6d68b78f151f3bc99f802d00572c82168a31616f Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 15 Oct 2023 22:06:47 +0200 Subject: [PATCH 4/6] Set dataset size also when failing jobs --- lib/galaxy/jobs/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index f70d7250bad4..2b2a19ad34db 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1430,8 +1430,6 @@ def fail( dataset.state = dataset.states.ERROR dataset.blurb = "tool error" dataset.info = message - dataset.set_size() - dataset.dataset.set_total_size() dataset.mark_unhidden() if dataset.ext == "auto": dataset.extension = "data" @@ -1738,7 +1736,6 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r # Ensure white space between entries dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}" dataset.tool_version = self.version_string - dataset.set_size() if "uuid" in context: dataset.dataset.uuid = context["uuid"] self.__update_output(job, dataset) @@ -2423,6 +2420,7 @@ def __update_output(self, job, hda, clean_only=False): cleaned up if the dataset has been purged. """ dataset = hda.dataset + dataset.set_total_size() if dataset not in job.output_library_datasets: purged = dataset.purged if not purged and not clean_only: From 23d938f7f90546006e789ede77e2cb6dd80dfd18 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 15 Oct 2023 22:10:44 +0200 Subject: [PATCH 5/6] Fix dataset collection unit tests --- test/unit/app/tools/test_collect_primary_datasets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/unit/app/tools/test_collect_primary_datasets.py b/test/unit/app/tools/test_collect_primary_datasets.py index 08bceb0be90b..40618ca9207b 100644 --- a/test/unit/app/tools/test_collect_primary_datasets.py +++ b/test/unit/app/tools/test_collect_primary_datasets.py @@ -447,6 +447,9 @@ class MockObjectStore: def __init__(self): self.created_datasets = {} + def get_store_by(self, obj, **kwargs): + return "uuid" + def update_from_file(self, dataset, file_name, create): if create: self.created_datasets[dataset] = file_name @@ -458,7 +461,7 @@ def size(self, dataset): def exists(self, *args, **kwargs): return True - def get_filename(self, dataset): + def get_filename(self, dataset, **kwargs): return self.created_datasets[dataset] From 7e41cce56d1ec59f7ad53333b4ea12248492c31c Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 15 Oct 2023 23:25:24 +0200 Subject: [PATCH 6/6] Set size for null datasets --- lib/galaxy/tools/actions/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index fd658c163a07..249b34f2b484 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -675,6 +675,7 @@ def handle_output(name, output, hidden=None): data.visible = False with open(data.dataset.file_name, "w") as out: out.write(json.dumps(None)) + data.set_total_size() job.preferred_object_store_id = preferred_object_store_id self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections) self._record_outputs(job, out_data, output_collections)