Skip to content

Commit

Permalink
Merge pull request galaxyproject#17474 from mvdbeek/fix_failed_metada…
Browse files Browse the repository at this point in the history
…ta_state

[23.2] Set metadata states on dataset association, not dataset
  • Loading branch information
mvdbeek authored Feb 15, 2024
2 parents 3cfe20c + 513ca74 commit 44487a5
Show file tree
Hide file tree
Showing 11 changed files with 43 additions and 44 deletions.
5 changes: 3 additions & 2 deletions lib/galaxy/celery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,11 @@ def set_metadata(
hda_manager.overwrite_metadata(dataset_instance)
dataset_instance.datatype.set_meta(dataset_instance)
dataset_instance.set_peek()
dataset_instance.dataset.state = dataset_instance.dataset.states.OK
# Reset SETTING_METADATA state so the dataset instance getter picks the dataset state
dataset_instance.set_metadata_succces_state()
except Exception as e:
log.info(f"Setting metadata failed on {model_class} {dataset_instance.id}: {str(e)}")
dataset_instance.dataset.state = dataset_instance.dataset.states.FAILED_METADATA
dataset_instance.state = dataset_instance.states.FAILED_METADATA
with transaction(sa_session):
sa_session.commit()

Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
)
if not metadata_set_successfully:
if self.tool.tool_type == "expression":
dataset._state = model.Dataset.states.OK
dataset.set_metadata_succces_state()
elif retry_internally:
# If Galaxy was expected to sniff type and didn't - do so.
if dataset.ext == "_sniff_":
Expand All @@ -1781,7 +1781,7 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
# call datatype.set_meta directly for the initial set_meta call during dataset creation
dataset.datatype.set_meta(dataset, overwrite=False)
else:
dataset._state = model.Dataset.states.FAILED_METADATA
dataset.state = model.HistoryDatasetAssociation.states.FAILED_METADATA
else:
self.external_output_metadata.load_metadata(
dataset,
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/jobs/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,9 @@ def __filter_jobs_with_invalid_input_states(self, jobs):
)
),
input_association.deleted == true(),
input_association._state == input_association.states.FAILED_METADATA,
input_association._state.in_(
(input_association.states.FAILED_METADATA, input_association.states.SETTING_METADATA)
),
)
)
.all()
Expand Down
41 changes: 19 additions & 22 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4310,7 +4310,7 @@ class DatasetInstance(RepresentById, UsesCreateAndUpdateTime, _HasTable):
"""A base class for all 'dataset instances', HDAs, LDAs, etc"""

states = Dataset.states
_state: str
_state: Optional[str]
conversion_messages = Dataset.conversion_messages
permitted_actions = Dataset.permitted_actions
purged: bool
Expand Down Expand Up @@ -4394,32 +4394,29 @@ def ext(self):

@property
def has_deferred_data(self):
return self.get_dataset_state() == Dataset.states.DEFERRED
return self.dataset.state == Dataset.states.DEFERRED

def get_dataset_state(self):
# self._state is currently only used when setting metadata externally
# leave setting the state as-is, we'll currently handle this specially in the external metadata code
@property
def state(self):
# self._state holds state that should only affect this particular dataset association, not the dataset state itself
if self._state:
return self._state
return self.dataset.state

def raw_set_dataset_state(self, state):
if state != self.dataset.state:
self.dataset.state = state
return True
else:
return False

def set_dataset_state(self, state):
if self.raw_set_dataset_state(state):
sa_session = object_session(self)
if sa_session:
object_session(self).add(self.dataset)
session = object_session(self)
with transaction(session):
session.commit() # flush here, because hda.flush() won't flush the Dataset object

state = property(get_dataset_state, set_dataset_state)
@state.setter
def state(self, state: Optional[DatasetState]):
if state != self.state:
if state in (DatasetState.FAILED_METADATA, DatasetState.SETTING_METADATA):
self._state = state
else:
self.set_metadata_succces_state()
sa_session = object_session(self)
if sa_session:
sa_session.add(self.dataset)
self.dataset.state = state

def set_metadata_succces_state(self):
self._state = None

def get_file_name(self, sync_cache=True) -> str:
if self.dataset.purged:
Expand Down
7 changes: 3 additions & 4 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,10 +620,9 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
discarded_data = self.import_options.discarded_data
dataset_state = dataset_attrs.get("state", dataset_instance.states.OK)
if dataset_state == dataset_instance.states.DEFERRED:
dataset_instance._state = dataset_instance.states.DEFERRED
dataset_instance.state = dataset_instance.states.DEFERRED
dataset_instance.deleted = False
dataset_instance.purged = False
dataset_instance.dataset.state = dataset_instance.states.DEFERRED
dataset_instance.dataset.deleted = False
dataset_instance.dataset.purged = False
elif (
Expand All @@ -635,7 +634,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
target_state = (
dataset_instance.states.DISCARDED if is_discarded else dataset_instance.states.DEFERRED
)
dataset_instance._state = target_state
dataset_instance.state = target_state
deleted = is_discarded and (discarded_data == ImportDiscardedDataType.FORBID)
dataset_instance.deleted = deleted
dataset_instance.purged = deleted
Expand Down Expand Up @@ -708,7 +707,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
dataset_instance.datatype.set_meta(dataset_instance)
except Exception:
log.debug(f"Metadata setting failed on {dataset_instance}", exc_info=True)
dataset_instance.dataset.state = dataset_instance.dataset.states.FAILED_METADATA
dataset_instance.state = dataset_instance.dataset.states.FAILED_METADATA

if model_class == "HistoryDatasetAssociation":
if not isinstance(dataset_instance, model.HistoryDatasetAssociation):
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def create_dataset(

if init_from:
self.permission_provider.copy_dataset_permissions(init_from, primary_data)
primary_data.raw_set_dataset_state(init_from.state)
primary_data.state = init_from.state
else:
self.permission_provider.set_default_hda_permissions(primary_data)
else:
Expand All @@ -147,7 +147,7 @@ def create_dataset(

self.add_library_dataset_to_folder(library_folder, ld)
primary_data = ldda
primary_data.raw_set_dataset_state(final_job_state)
primary_data.state = final_job_state
if final_job_state == galaxy.model.Job.states.ERROR and not self.get_implicit_collection_jobs_association_id():
primary_data.visible = True

Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3020,7 +3020,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
metadata_set_successfully = False
log.exception("Exception occured while loading metadata results")
if not metadata_set_successfully:
dataset._state = model.Dataset.states.FAILED_METADATA
dataset.state = model.DatasetInstance.states.FAILED_METADATA
self.sa_session.add(dataset)
with transaction(self.sa_session):
self.sa_session.commit()
Expand All @@ -3033,7 +3033,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
dataset.state = param_dict.get("__ORIGINAL_DATASET_STATE__")
else:
# Revert dataset.state to fall back to dataset.dataset.state
dataset._state = None
dataset.set_metadata_succces_state()
# Need to reset the peek, which may rely on metadata
# TODO: move this into metadata setting, setting the peek requires dataset access,
# and large chunks of the dataset may be read here.
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/actions/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def execute_via_app(
job.add_input_library_dataset(dataset_name, dataset)
# Need a special state here to show that metadata is being set and also allow the job to run
# i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state
dataset._state = dataset.states.SETTING_METADATA
dataset.state = dataset.states.SETTING_METADATA
job.state = start_job_state # job inputs have been configured, restore initial job state
with transaction(sa_session):
sa_session.commit()
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/webapps/galaxy/services/history_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -1477,13 +1477,13 @@ def _change_item_datatype(
self.hda_manager.ensure_can_change_datatype(item)
self.hda_manager.ensure_can_set_metadata(item)
is_deferred = item.has_deferred_data
item.dataset.state = item.dataset.states.SETTING_METADATA
item.state = item.dataset.states.SETTING_METADATA
if is_deferred:
if params.datatype == "auto": # if `auto` just keep the original guessed datatype
item.update() # TODO: remove this `update` when we can properly track the operation results to notify the history
else:
trans.app.datatypes_registry.change_datatype(item, params.datatype)
item.dataset.state = item.dataset.states.DEFERRED
item.state = item.dataset.states.DEFERRED
else:
return change_datatype.si(
dataset_id=item.id, datatype=params.datatype, task_user_id=getattr(trans.user, "id", None)
Expand Down
2 changes: 1 addition & 1 deletion test/unit/app/tools/test_column_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def build_ready_hda(self):
extension="interval", create_dataset=True, sa_session=self.app.model.context
)
)
ready_hda.set_dataset_state("ok")
ready_hda.state = "ok"
return ready_hda

@property
Expand Down
10 changes: 5 additions & 5 deletions test/unit/app/tools/test_parameter_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,11 @@ def test_DatasetOkValidator(self):
ok_hda = hist.add_dataset(
HistoryDatasetAssociation(id=1, extension="interval", create_dataset=True, sa_session=sa_session)
)
ok_hda.set_dataset_state(Dataset.states.OK)
ok_hda.state = Dataset.states.OK
notok_hda = hist.add_dataset(
HistoryDatasetAssociation(id=2, extension="interval", create_dataset=True, sa_session=sa_session)
)
notok_hda.set_dataset_state(Dataset.states.EMPTY)
notok_hda.state = Dataset.states.EMPTY

p = self._parameter_for(
xml="""
Expand Down Expand Up @@ -349,7 +349,7 @@ def test_MetadataValidator(self):
dataset=Dataset(external_filename=get_test_fname("1.bed")),
)
)
hda.set_dataset_state(Dataset.states.OK)
hda.state = Dataset.states.OK
hda.set_meta()
hda.metadata.strandCol = hda.metadata.spec["strandCol"].no_value

Expand Down Expand Up @@ -409,12 +409,12 @@ def test_UnspecifiedBuildValidator(self):
has_dbkey_hda = hist.add_dataset(
HistoryDatasetAssociation(id=1, extension="interval", create_dataset=True, sa_session=sa_session)
)
has_dbkey_hda.set_dataset_state(Dataset.states.OK)
has_dbkey_hda.state = Dataset.states.OK
has_dbkey_hda.metadata.dbkey = "hg19"
has_no_dbkey_hda = hist.add_dataset(
HistoryDatasetAssociation(id=2, extension="interval", create_dataset=True, sa_session=sa_session)
)
has_no_dbkey_hda.set_dataset_state(Dataset.states.OK)
has_no_dbkey_hda.state = Dataset.states.OK

p = self._parameter_for(
xml="""
Expand Down

0 comments on commit 44487a5

Please sign in to comment.