diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index b9e254150271..3b15f7feace6 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -91,7 +91,7 @@ def set_default_hda_permissions(self, primary_data): self._security_agent.set_all_dataset_permissions(primary_data.dataset, permissions, new=True, flush=False) def copy_dataset_permissions(self, init_from, primary_data): - self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset) + self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset, flush=False) class MetadataSourceProvider(AbstractMetadataSourceProvider): diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index f70d7250bad4..2b2a19ad34db 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1430,8 +1430,6 @@ def fail( dataset.state = dataset.states.ERROR dataset.blurb = "tool error" dataset.info = message - dataset.set_size() - dataset.dataset.set_total_size() dataset.mark_unhidden() if dataset.ext == "auto": dataset.extension = "data" @@ -1738,7 +1736,6 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r # Ensure white space between entries dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}" dataset.tool_version = self.version_string - dataset.set_size() if "uuid" in context: dataset.dataset.uuid = context["uuid"] self.__update_output(job, dataset) @@ -2423,6 +2420,7 @@ def __update_output(self, job, hda, clean_only=False): cleaned up if the dataset has been purged. """ dataset = hda.dataset + dataset.set_total_size() if dataset not in job.output_library_datasets: purged = dataset.purged if not purged and not clean_only: diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py index ff9380ece0b4..b512f3f905b5 100644 --- a/lib/galaxy/metadata/set_metadata.py +++ b/lib/galaxy/metadata/set_metadata.py @@ -449,6 +449,7 @@ def set_meta(new_dataset_instance, file_dict): partial(push_if_necessary, object_store, dataset, external_filename) ) object_store_update_actions.append(partial(reset_external_filename, dataset)) + object_store_update_actions.append(partial(dataset.set_total_size)) object_store_update_actions.append(partial(export_store.add_dataset, dataset)) if dataset_instance_id not in unnamed_id_to_path: object_store_update_actions.append(partial(collect_extra_files, object_store, dataset, ".")) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 6652ff72814d..cb3601e279fd 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -3870,6 +3870,7 @@ class Dataset(Base, StorableObject, Serializable): non_ready_states = (states.NEW, states.UPLOAD, states.QUEUED, states.RUNNING, states.SETTING_METADATA) ready_states = tuple(set(states.__members__.values()) - set(non_ready_states)) valid_input_states = tuple(set(states.__members__.values()) - {states.ERROR, states.DISCARDED}) + no_data_states = (states.PAUSED, states.DEFERRED, states.DISCARDED, *non_ready_states) terminal_states = ( states.OK, states.EMPTY, diff --git a/lib/galaxy/model/base.py b/lib/galaxy/model/base.py index d5326c785b20..bcd76da2c6a8 100644 --- a/lib/galaxy/model/base.py +++ b/lib/galaxy/model/base.py @@ -152,9 +152,13 @@ def versioned_objects_strict(iter): # These should get some other type of permanent storage, perhaps UserDatasetAssociation ? # Everything else needs to have a hid and a history if not obj.history and not obj.history_id: - raise Exception(f"HistoryDatsetAssociation {obj} without history detected, this is not valid") + raise Exception(f"HistoryDatasetAssociation {obj} without history detected, this is not valid") elif not obj.hid: - raise Exception(f"HistoryDatsetAssociation {obj} without has no hid, this is not valid") + raise Exception(f"HistoryDatasetAssociation {obj} without hid, this is not valid") + elif obj.dataset.file_size is None and obj.dataset.state not in obj.dataset.no_data_states: + raise Exception( + f"HistoryDatasetAssociation {obj} in state {obj.dataset.state} with null file size, this is not valid" + ) yield obj diff --git a/lib/galaxy/model/security.py b/lib/galaxy/model/security.py index 84c3eb5fb976..bb07ea071efe 100644 --- a/lib/galaxy/model/security.py +++ b/lib/galaxy/model/security.py @@ -998,12 +998,12 @@ def get_permissions(self, item): permissions[action] = [item_permission.role] return permissions - def copy_dataset_permissions(self, src, dst): + def copy_dataset_permissions(self, src, dst, flush=True): if not isinstance(src, self.model.Dataset): src = src.dataset if not isinstance(dst, self.model.Dataset): dst = dst.dataset - self.set_all_dataset_permissions(dst, self.get_permissions(src)) + self.set_all_dataset_permissions(dst, self.get_permissions(src), flush=flush) def privately_share_dataset(self, dataset, users=None): dataset.ensure_shareable() diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index 832357acb45b..634ce6048eec 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -125,7 +125,7 @@ def create_dataset( if init_from: self.permission_provider.copy_dataset_permissions(init_from, primary_data) - primary_data.state = init_from.state + primary_data.raw_set_dataset_state(init_from.state) else: self.permission_provider.set_default_hda_permissions(primary_data) else: @@ -265,6 +265,8 @@ def set_datasets_metadata(datasets, datasets_attributes=None): except Exception: log.exception("Exception occured while setting dataset peek") + primary_data.set_total_size() + def populate_collection_elements( self, collection, diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index fd658c163a07..249b34f2b484 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -675,6 +675,7 @@ def handle_output(name, output, hidden=None): data.visible = False with open(data.dataset.file_name, "w") as out: out.write(json.dumps(None)) + data.set_total_size() job.preferred_object_store_id = preferred_object_store_id self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections) self._record_outputs(job, out_data, output_collections) diff --git a/test/unit/app/tools/test_collect_primary_datasets.py b/test/unit/app/tools/test_collect_primary_datasets.py index 08bceb0be90b..40618ca9207b 100644 --- a/test/unit/app/tools/test_collect_primary_datasets.py +++ b/test/unit/app/tools/test_collect_primary_datasets.py @@ -447,6 +447,9 @@ class MockObjectStore: def __init__(self): self.created_datasets = {} + def get_store_by(self, obj, **kwargs): + return "uuid" + def update_from_file(self, dataset, file_name, create): if create: self.created_datasets[dataset] = file_name @@ -458,7 +461,7 @@ def size(self, dataset): def exists(self, *args, **kwargs): return True - def get_filename(self, dataset): + def get_filename(self, dataset, **kwargs): return self.created_datasets[dataset]