Skip to content

Commit

Permalink
Merge pull request #16855 from mvdbeek/file_size_debug
Browse files Browse the repository at this point in the history
[23.1] Fix and prevent persisting null file_size
  • Loading branch information
mvdbeek authored Oct 16, 2023
2 parents 8082c2d + 7e41cce commit d01a21a
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 10 deletions.
2 changes: 1 addition & 1 deletion lib/galaxy/job_execution/output_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def set_default_hda_permissions(self, primary_data):
self._security_agent.set_all_dataset_permissions(primary_data.dataset, permissions, new=True, flush=False)

def copy_dataset_permissions(self, init_from, primary_data):
self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset)
self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset, flush=False)


class MetadataSourceProvider(AbstractMetadataSourceProvider):
Expand Down
4 changes: 1 addition & 3 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,8 +1430,6 @@ def fail(
dataset.state = dataset.states.ERROR
dataset.blurb = "tool error"
dataset.info = message
dataset.set_size()
dataset.dataset.set_total_size()
dataset.mark_unhidden()
if dataset.ext == "auto":
dataset.extension = "data"
Expand Down Expand Up @@ -1738,7 +1736,6 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
# Ensure white space between entries
dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}"
dataset.tool_version = self.version_string
dataset.set_size()
if "uuid" in context:
dataset.dataset.uuid = context["uuid"]
self.__update_output(job, dataset)
Expand Down Expand Up @@ -2423,6 +2420,7 @@ def __update_output(self, job, hda, clean_only=False):
cleaned up if the dataset has been purged.
"""
dataset = hda.dataset
dataset.set_total_size()
if dataset not in job.output_library_datasets:
purged = dataset.purged
if not purged and not clean_only:
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/metadata/set_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def set_meta(new_dataset_instance, file_dict):
partial(push_if_necessary, object_store, dataset, external_filename)
)
object_store_update_actions.append(partial(reset_external_filename, dataset))
object_store_update_actions.append(partial(dataset.set_total_size))
object_store_update_actions.append(partial(export_store.add_dataset, dataset))
if dataset_instance_id not in unnamed_id_to_path:
object_store_update_actions.append(partial(collect_extra_files, object_store, dataset, "."))
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3870,6 +3870,7 @@ class Dataset(Base, StorableObject, Serializable):
non_ready_states = (states.NEW, states.UPLOAD, states.QUEUED, states.RUNNING, states.SETTING_METADATA)
ready_states = tuple(set(states.__members__.values()) - set(non_ready_states))
valid_input_states = tuple(set(states.__members__.values()) - {states.ERROR, states.DISCARDED})
no_data_states = (states.PAUSED, states.DEFERRED, states.DISCARDED, *non_ready_states)
terminal_states = (
states.OK,
states.EMPTY,
Expand Down
8 changes: 6 additions & 2 deletions lib/galaxy/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,13 @@ def versioned_objects_strict(iter):
# These should get some other type of permanent storage, perhaps UserDatasetAssociation ?
# Everything else needs to have a hid and a history
if not obj.history and not obj.history_id:
raise Exception(f"HistoryDatsetAssociation {obj} without history detected, this is not valid")
raise Exception(f"HistoryDatasetAssociation {obj} without history detected, this is not valid")
elif not obj.hid:
raise Exception(f"HistoryDatsetAssociation {obj} without has no hid, this is not valid")
raise Exception(f"HistoryDatasetAssociation {obj} without hid, this is not valid")
elif obj.dataset.file_size is None and obj.dataset.state not in obj.dataset.no_data_states:
raise Exception(
f"HistoryDatasetAssociation {obj} in state {obj.dataset.state} with null file size, this is not valid"
)
yield obj


Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/model/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,12 +998,12 @@ def get_permissions(self, item):
permissions[action] = [item_permission.role]
return permissions

def copy_dataset_permissions(self, src, dst):
def copy_dataset_permissions(self, src, dst, flush=True):
if not isinstance(src, self.model.Dataset):
src = src.dataset
if not isinstance(dst, self.model.Dataset):
dst = dst.dataset
self.set_all_dataset_permissions(dst, self.get_permissions(src))
self.set_all_dataset_permissions(dst, self.get_permissions(src), flush=flush)

def privately_share_dataset(self, dataset, users=None):
dataset.ensure_shareable()
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def create_dataset(

if init_from:
self.permission_provider.copy_dataset_permissions(init_from, primary_data)
primary_data.state = init_from.state
primary_data.raw_set_dataset_state(init_from.state)
else:
self.permission_provider.set_default_hda_permissions(primary_data)
else:
Expand Down Expand Up @@ -265,6 +265,8 @@ def set_datasets_metadata(datasets, datasets_attributes=None):
except Exception:
log.exception("Exception occured while setting dataset peek")

primary_data.set_total_size()

def populate_collection_elements(
self,
collection,
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ def handle_output(name, output, hidden=None):
data.visible = False
with open(data.dataset.file_name, "w") as out:
out.write(json.dumps(None))
data.set_total_size()
job.preferred_object_store_id = preferred_object_store_id
self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections)
self._record_outputs(job, out_data, output_collections)
Expand Down
5 changes: 4 additions & 1 deletion test/unit/app/tools/test_collect_primary_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,9 @@ class MockObjectStore:
def __init__(self):
self.created_datasets = {}

def get_store_by(self, obj, **kwargs):
return "uuid"

def update_from_file(self, dataset, file_name, create):
if create:
self.created_datasets[dataset] = file_name
Expand All @@ -458,7 +461,7 @@ def size(self, dataset):
def exists(self, *args, **kwargs):
return True

def get_filename(self, dataset):
def get_filename(self, dataset, **kwargs):
return self.created_datasets[dataset]


Expand Down

0 comments on commit d01a21a

Please sign in to comment.