Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[23.1] Fix and prevent persisting null file_size #16855

Merged
merged 6 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/galaxy/job_execution/output_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def set_default_hda_permissions(self, primary_data):
self._security_agent.set_all_dataset_permissions(primary_data.dataset, permissions, new=True, flush=False)

def copy_dataset_permissions(self, init_from, primary_data):
self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset)
self._security_agent.copy_dataset_permissions(init_from.dataset, primary_data.dataset, flush=False)


class MetadataSourceProvider(AbstractMetadataSourceProvider):
Expand Down
4 changes: 1 addition & 3 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,8 +1430,6 @@ def fail(
dataset.state = dataset.states.ERROR
dataset.blurb = "tool error"
dataset.info = message
dataset.set_size()
dataset.dataset.set_total_size()
dataset.mark_unhidden()
if dataset.ext == "auto":
dataset.extension = "data"
Expand Down Expand Up @@ -1738,7 +1736,6 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
# Ensure white space between entries
dataset.info = f"{dataset.info.rstrip()}\n{context['stderr'].strip()}"
dataset.tool_version = self.version_string
dataset.set_size()
if "uuid" in context:
dataset.dataset.uuid = context["uuid"]
self.__update_output(job, dataset)
Expand Down Expand Up @@ -2423,6 +2420,7 @@ def __update_output(self, job, hda, clean_only=False):
cleaned up if the dataset has been purged.
"""
dataset = hda.dataset
dataset.set_total_size()
if dataset not in job.output_library_datasets:
purged = dataset.purged
if not purged and not clean_only:
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/metadata/set_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ def set_meta(new_dataset_instance, file_dict):
partial(push_if_necessary, object_store, dataset, external_filename)
)
object_store_update_actions.append(partial(reset_external_filename, dataset))
object_store_update_actions.append(partial(dataset.set_total_size))
object_store_update_actions.append(partial(export_store.add_dataset, dataset))
if dataset_instance_id not in unnamed_id_to_path:
object_store_update_actions.append(partial(collect_extra_files, object_store, dataset, "."))
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3870,6 +3870,7 @@ class Dataset(Base, StorableObject, Serializable):
non_ready_states = (states.NEW, states.UPLOAD, states.QUEUED, states.RUNNING, states.SETTING_METADATA)
ready_states = tuple(set(states.__members__.values()) - set(non_ready_states))
valid_input_states = tuple(set(states.__members__.values()) - {states.ERROR, states.DISCARDED})
no_data_states = (states.PAUSED, states.DEFERRED, states.DISCARDED, *non_ready_states)
terminal_states = (
states.OK,
states.EMPTY,
Expand Down
8 changes: 6 additions & 2 deletions lib/galaxy/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,13 @@ def versioned_objects_strict(iter):
# These should get some other type of permanent storage, perhaps UserDatasetAssociation ?
# Everything else needs to have a hid and a history
if not obj.history and not obj.history_id:
raise Exception(f"HistoryDatsetAssociation {obj} without history detected, this is not valid")
raise Exception(f"HistoryDatasetAssociation {obj} without history detected, this is not valid")
elif not obj.hid:
raise Exception(f"HistoryDatsetAssociation {obj} without has no hid, this is not valid")
raise Exception(f"HistoryDatasetAssociation {obj} without hid, this is not valid")
elif obj.dataset.file_size is None and obj.dataset.state not in obj.dataset.no_data_states:
raise Exception(
f"HistoryDatasetAssociation {obj} in state {obj.dataset.state} with null file size, this is not valid"
)
yield obj


Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/model/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,12 +998,12 @@ def get_permissions(self, item):
permissions[action] = [item_permission.role]
return permissions

def copy_dataset_permissions(self, src, dst):
def copy_dataset_permissions(self, src, dst, flush=True):
if not isinstance(src, self.model.Dataset):
src = src.dataset
if not isinstance(dst, self.model.Dataset):
dst = dst.dataset
self.set_all_dataset_permissions(dst, self.get_permissions(src))
self.set_all_dataset_permissions(dst, self.get_permissions(src), flush=flush)

def privately_share_dataset(self, dataset, users=None):
dataset.ensure_shareable()
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def create_dataset(

if init_from:
self.permission_provider.copy_dataset_permissions(init_from, primary_data)
primary_data.state = init_from.state
primary_data.raw_set_dataset_state(init_from.state)
else:
self.permission_provider.set_default_hda_permissions(primary_data)
else:
Expand Down Expand Up @@ -265,6 +265,8 @@ def set_datasets_metadata(datasets, datasets_attributes=None):
except Exception:
log.exception("Exception occured while setting dataset peek")

primary_data.set_total_size()

def populate_collection_elements(
self,
collection,
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ def handle_output(name, output, hidden=None):
data.visible = False
with open(data.dataset.file_name, "w") as out:
out.write(json.dumps(None))
data.set_total_size()
job.preferred_object_store_id = preferred_object_store_id
self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections)
self._record_outputs(job, out_data, output_collections)
Expand Down
5 changes: 4 additions & 1 deletion test/unit/app/tools/test_collect_primary_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,9 @@ class MockObjectStore:
def __init__(self):
self.created_datasets = {}

def get_store_by(self, obj, **kwargs):
return "uuid"

def update_from_file(self, dataset, file_name, create):
if create:
self.created_datasets[dataset] = file_name
Expand All @@ -458,7 +461,7 @@ def size(self, dataset):
def exists(self, *args, **kwargs):
return True

def get_filename(self, dataset):
def get_filename(self, dataset, **kwargs):
return self.created_datasets[dataset]


Expand Down
Loading