From c0a7cdc2dd91c820610ea31c1c37c5e8274f169a Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Fri, 22 Nov 2024 19:18:40 +0000 Subject: [PATCH] Don't calculate dataset hash for datasets in non-OK state Also: - Check again that the dataset hasn't been purged right before hash calculation starts. Address comments in https://github.com/galaxyproject/galaxy/pull/19181/files#r1853605980 --- lib/galaxy/jobs/__init__.py | 2 +- lib/galaxy/managers/datasets.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 22a909466b41..9e4d3b65376c 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -2042,7 +2042,7 @@ def fail(message=job.info, exception=None): # Calculate dataset hash for dataset_assoc in output_dataset_associations: dataset = dataset_assoc.dataset.dataset - if not dataset.purged and dataset.state != Dataset.states.DEFERRED and not dataset.hashes: + if not dataset.purged and dataset.state == Dataset.states.OK and not dataset.hashes: if self.app.config.calculate_dataset_hash == "always" or ( self.app.config.calculate_dataset_hash == "upload" and job.tool_id in ("upload1", "__DATA_FETCH__") ): diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 59137af76e9a..9dd9f87c0638 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -161,8 +161,11 @@ def update_object_store_id(self, trans, dataset, object_store_id: str): sa_session.commit() def compute_hash(self, request: ComputeDatasetHashTaskRequest): - # For files in extra_files_path dataset = self.by_id(request.dataset_id) + if dataset.purged: + log.warning("Unable to calculate hash for purged dataset [%s].", dataset.id) + return + # For files in extra_files_path extra_files_path = request.extra_files_path if extra_files_path: extra_dir = dataset.extra_files_path_name @@ -192,7 +195,7 @@ def compute_hash(self, request: ComputeDatasetHashTaskRequest): f"Re-calculated dataset hash for dataset [{dataset.id}] and new hash value [{calculated_hash_value}] does not equal previous hash value [{old_hash_value}]." ) else: - log.debug("Duplicated dataset hash request, no update to the database.") + log.debug("Duplicated dataset hash request for dataset [%s], no update to the database.", dataset.id) # TODO: implement above for groups # TODO: datatypes?