Skip to content

Commit

Permalink
Don't calculate dataset hash for datasets in non-OK state
Browse files Browse the repository at this point in the history
Also:
- Check again that the dataset hasn't been purged right before
  hash calculation starts.

Address comments in
https://github.com/galaxyproject/galaxy/pull/19181/files#r1853605980
  • Loading branch information
nsoranzo committed Nov 22, 2024
1 parent 1d99e6d commit c0a7cdc
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 1 addition & 1 deletion lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2042,7 +2042,7 @@ def fail(message=job.info, exception=None):
# Calculate dataset hash
for dataset_assoc in output_dataset_associations:
dataset = dataset_assoc.dataset.dataset
if not dataset.purged and dataset.state != Dataset.states.DEFERRED and not dataset.hashes:
if not dataset.purged and dataset.state == Dataset.states.OK and not dataset.hashes:
if self.app.config.calculate_dataset_hash == "always" or (
self.app.config.calculate_dataset_hash == "upload" and job.tool_id in ("upload1", "__DATA_FETCH__")
):
Expand Down
7 changes: 5 additions & 2 deletions lib/galaxy/managers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,11 @@ def update_object_store_id(self, trans, dataset, object_store_id: str):
sa_session.commit()

def compute_hash(self, request: ComputeDatasetHashTaskRequest):
# For files in extra_files_path
dataset = self.by_id(request.dataset_id)
if dataset.purged:
log.warning("Unable to calculate hash for purged dataset [%s].", dataset.id)
return
# For files in extra_files_path
extra_files_path = request.extra_files_path
if extra_files_path:
extra_dir = dataset.extra_files_path_name
Expand Down Expand Up @@ -192,7 +195,7 @@ def compute_hash(self, request: ComputeDatasetHashTaskRequest):
f"Re-calculated dataset hash for dataset [{dataset.id}] and new hash value [{calculated_hash_value}] does not equal previous hash value [{old_hash_value}]."
)
else:
log.debug("Duplicated dataset hash request, no update to the database.")
log.debug("Duplicated dataset hash request for dataset [%s], no update to the database.", dataset.id)

# TODO: implement above for groups
# TODO: datatypes?
Expand Down

0 comments on commit c0a7cdc

Please sign in to comment.