Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.0] Fix History Dataset Association creation so that hid is always set #18036

Merged
Merged
7 changes: 4 additions & 3 deletions client/src/api/schema/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3751,7 +3751,7 @@ export interface components {
* Element Type
* @description The type of the element. Used to interpret the `object` field.
*/
element_type: components["schemas"]["DCEType"];
element_type?: components["schemas"]["DCEType"] | null;
/**
* Dataset Collection Element ID
* @example 0123456789ABCDEF
Expand All @@ -3767,10 +3767,11 @@ export interface components {
* Object
* @description The element's specific data depending on the value of `element_type`.
*/
object:
object?:
| components["schemas"]["HDAObject"]
| components["schemas"]["HDADetailed"]
| components["schemas"]["DCObject"];
| components["schemas"]["DCObject"]
| null;
};
/**
* DCEType
Expand Down
6 changes: 5 additions & 1 deletion lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,11 @@ def convert_dataset(
# Make the target datatype available to the converter
params["__target_datatype__"] = target_type
# Run converter, job is dispatched through Queue
job, converted_datasets, *_ = converter.execute(trans, incoming=params, set_output_hid=visible, history=history)
job, converted_datasets, *_ = converter.execute(
trans, incoming=params, set_output_hid=visible, history=history, flush_job=False
)
for converted_dataset in converted_datasets.values():
original_dataset.attach_implicitly_converted_dataset(trans.sa_session, converted_dataset, target_type)
trans.app.job_manager.enqueue(job, tool=converter)
if len(params) > 0:
trans.log_event(f"Converter params: {str(params)}", tool_id=converter.id)
Expand Down
18 changes: 1 addition & 17 deletions lib/galaxy/datatypes/display_applications/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from galaxy.datatypes.data import Data
from galaxy.model import DatasetInstance
from galaxy.model.base import transaction
from galaxy.schema.schema import DatasetState
from galaxy.util import string_as_bool
from galaxy.util.template import fill_template
Expand Down Expand Up @@ -182,22 +181,7 @@ def prepare(self, other_values, dataset_hash, user_hash, trans):
if target_ext and not converted_dataset:
if isinstance(data, DisplayDataValueWrapper):
data = data.value
new_data = next(
iter(
data.datatype.convert_dataset(
trans, data, target_ext, return_output=True, visible=False
).values()
)
)
new_data.hid = data.hid
new_data.name = data.name
trans.sa_session.add(new_data)
assoc = trans.app.model.ImplicitlyConvertedDatasetAssociation(
parent=data, file_type=target_ext, dataset=new_data, metadata_safe=False
)
trans.sa_session.add(assoc)
with transaction(trans.sa_session):
trans.sa_session.commit()
data.datatype.convert_dataset(trans, data, target_ext, return_output=True, visible=False)
elif converted_dataset and converted_dataset.state == DatasetState.ERROR:
raise Exception(f"Dataset conversion failed for data parameter: {self.name}")
return self.get_value(other_values, dataset_hash, user_hash, trans)
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/datatypes/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,7 @@ def has_data(self) -> bool: ...

def set_peek(self) -> None: ...

def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext: str) -> None: ...


class DatasetHasHidProtocol(DatasetProtocol, HasHid, Protocol): ...
8 changes: 4 additions & 4 deletions lib/galaxy/managers/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def precreate_dataset_collection_instance(
# TODO: prebuild all required HIDs and send them in so no need to flush in between.
dataset_collection = self.precreate_dataset_collection(
structure,
allow_unitialized_element=implicit_output_name is not None,
allow_uninitialized_element=implicit_output_name is not None,
completed_collection=completed_collection,
implicit_output_name=implicit_output_name,
)
Expand All @@ -112,10 +112,10 @@ def precreate_dataset_collection_instance(
return instance

def precreate_dataset_collection(
self, structure, allow_unitialized_element=True, completed_collection=None, implicit_output_name=None
self, structure, allow_uninitialized_element=True, completed_collection=None, implicit_output_name=None
):
has_structure = not structure.is_leaf and structure.children_known
if not has_structure and allow_unitialized_element:
if not has_structure and allow_uninitialized_element:
dataset_collection = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
elif not has_structure:
collection_type_description = structure.collection_type_description
Expand Down Expand Up @@ -143,7 +143,7 @@ def precreate_dataset_collection(
element = model.DatasetCollectionElement.UNINITIALIZED_ELEMENT
else:
element = self.precreate_dataset_collection(
substructure, allow_unitialized_element=allow_unitialized_element
substructure, allow_uninitialized_element=allow_uninitialized_element
)

element = model.DatasetCollectionElement(
Expand Down
9 changes: 2 additions & 7 deletions lib/galaxy/managers/hdas.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def copy(
if not isinstance(item, model.HistoryDatasetAssociation):
raise TypeError()
hda = item
copy = hda.copy(parent_id=kwargs.get("parent_id"), copy_hid=False, copy_tags=hda.tags, flush=flush)
copy = hda.copy(parent_id=kwargs.get("parent_id"), copy_hid=False, copy_tags=hda.tags, flush=False)
if hide_copy:
copy.visible = False
if history:
Expand All @@ -221,12 +221,6 @@ def copy(

return copy

def copy_ldda(self, history, ldda, **kwargs):
"""
Copy this HDA as a LDDA and return.
"""
return ldda.to_history_dataset_association(history, add_to_history=True)

# .... deletion and purging
def purge(self, hda, flush=True, **kwargs):
if self.app.config.enable_celery_tasks:
Expand Down Expand Up @@ -561,6 +555,7 @@ def add_serializers(self):
annotatable.AnnotatableSerializerMixin.add_serializers(self)

serializers: Dict[str, base.Serializer] = {
"hid": lambda item, key, **context: item.hid if item.hid is not None else -1,
"model_class": lambda item, key, **context: "HistoryDatasetAssociation",
"history_content_type": lambda item, key, **context: "dataset",
"hda_ldda": lambda item, key, **context: "hda",
Expand Down
20 changes: 9 additions & 11 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4736,7 +4736,7 @@ def get_converted_dataset(self, trans, target_ext, target_context=None, history=
raise NoConverterException(f"A dependency ({dependency}) is missing a converter.")
except KeyError:
pass # No deps
new_dataset = next(
return next(
iter(
self.datatype.convert_dataset(
trans,
Expand All @@ -4750,7 +4750,6 @@ def get_converted_dataset(self, trans, target_ext, target_context=None, history=
).values()
)
)
return self.attach_implicitly_converted_dataset(trans.sa_session, new_dataset, target_ext)

def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext: str):
new_dataset.name = self.name
Expand All @@ -4760,9 +4759,6 @@ def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext:
)
session.add(new_dataset)
session.add(assoc)
with transaction(session):
session.commit()
return new_dataset

def copy_attributes(self, new_dataset):
"""
Expand Down Expand Up @@ -5134,7 +5130,8 @@ def copy_tags_to(self, copy_tags=None):
self.tags.append(copied_tag)

def copy_attributes(self, new_dataset):
new_dataset.hid = self.hid
if new_dataset.hid is None:
new_dataset.hid = self.hid

def to_library_dataset_dataset_association(
self,
Expand Down Expand Up @@ -5821,11 +5818,9 @@ def to_history_dataset_association(self, target_history, parent_id=None, add_to_

tag_manager = galaxy.model.tags.GalaxyTagHandler(sa_session)
src_ldda_tags = tag_manager.get_tags_str(self.tags)
tag_manager.apply_item_tags(user=self.user, item=hda, tags_str=src_ldda_tags)
tag_manager.apply_item_tags(user=self.user, item=hda, tags_str=src_ldda_tags, flush=False)
sa_session.add(hda)
with transaction(sa_session):
sa_session.commit()
hda.metadata = self.metadata # need to set after flushed, as MetadataFiles require dataset.id
hda.metadata = self.metadata
if add_to_history and target_history:
target_history.add_dataset(hda)
with transaction(sa_session):
Expand Down Expand Up @@ -6362,6 +6357,8 @@ def has_deferred_data(self):
@property
def populated_optimized(self):
if not hasattr(self, "_populated_optimized"):
if not self.id:
return self.populated
_populated_optimized = True
if ":" not in self.collection_type:
_populated_optimized = self.populated_state == DatasetCollection.populated_states.OK
Expand Down Expand Up @@ -7149,7 +7146,8 @@ def __init__(
self.element_identifier = element_identifier or str(element_index)

def __strict_check_before_flush__(self):
assert self.element_object, "Dataset Collection Element without child entity detected, this is not valid"
if self.collection.populated_optimized:
assert self.element_object, "Dataset Collection Element without child entity detected, this is not valid"

@property
def element_type(self):
Expand Down
32 changes: 23 additions & 9 deletions lib/galaxy/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,18 +174,32 @@ def versioned_objects_strict(iter):
yield obj


if os.environ.get("GALAXY_TEST_RAISE_EXCEPTION_ON_HISTORYLESS_HDA"):
log.debug("Using strict flush checks")
versioned_objects = versioned_objects_strict # noqa: F811
def get_before_flush_handler():
if os.environ.get("GALAXY_TEST_RAISE_EXCEPTION_ON_HISTORYLESS_HDA"):
log.debug("Using strict flush checks")

def before_flush(session, flush_context, instances):
for obj in session.new:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't that what versioned_objects_strict() does? If so, why not do something like this:

for obj in versioned_objects_strict(session.new):
    pass

Copy link
Member Author

@mvdbeek mvdbeek Apr 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It creates a version, which doesn't make sense for new objects.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes, I could do that, though it's not related to versioning objects, which is what threw my off in your comment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And then, if I'm not mistaken, you wouldn't need this if/then that checks os.environ and contains 2 similar function definitions, and just use the existing logic, I think? i.e., setting versioned_objects = versioned_objects_strict would still work?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think one is better than the other with the exception this one is slightly better since we don't run the os.environ check in the module space ?

if hasattr(obj, "__strict_check_before_flush__"):
obj.__strict_check_before_flush__()
for obj in versioned_objects_strict(session.dirty):
obj.__create_version__(session)
for obj in versioned_objects_strict(session.deleted):
obj.__create_version__(session, deleted=True)

else:

def before_flush(session, flush_context, instances):
for obj in versioned_objects(session.dirty):
obj.__create_version__(session)
for obj in versioned_objects(session.deleted):
obj.__create_version__(session, deleted=True)

return before_flush


def versioned_session(session):
@event.listens_for(session, "before_flush")
def before_flush(session, flush_context, instances):
for obj in versioned_objects(session.dirty):
obj.__create_version__(session)
for obj in versioned_objects(session.deleted):
obj.__create_version__(session, deleted=True)
event.listens_for(session, "before_flush")(get_before_flush_handler())


def ensure_object_added_to_session(object_to_add, *, object_in_session=None, session=None) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/model/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def ensure_materialized(
if transient_paths:
metadata_tmp_files_dir = transient_paths.metadata_files_dir
else:
# If metadata_tmp_files_dir is set we generate a MetdataTempFile,
# If metadata_tmp_files_dir is set we generate a MetadataTempFile,
# which we don't want when we're generating an attached materialized dataset instance
metadata_tmp_files_dir = None
materialized_dataset_instance.set_meta(metadata_tmp_files_dir=metadata_tmp_files_dir)
Expand Down
6 changes: 2 additions & 4 deletions lib/galaxy/model/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def from_external_value(self, value, parent, path_rewriter=None):
# directory. Correct.
file_name = path_rewriter(file_name)
mf.update_from_file(file_name)
value = mf.id
value = str(mf.uuid)
return value

def to_external_value(self, value):
Expand All @@ -692,11 +692,9 @@ def new_file(self, dataset=None, metadata_tmp_files_dir=None, **kwds):
sa_session = object_session(dataset)
if sa_session:
sa_session.add(mf)
with transaction(sa_session):
sa_session.commit() # commit to assign id
return mf
else:
# we need to make a tmp file that is accessable to the head node,
# we need to make a tmp file that is accessible to the head node,
# we will be copying its contents into the MetadataFile objects filename after restoring from JSON
# we do not include 'dataset' in the kwds passed, as from_JSON_value() will handle this for us
return MetadataTempFile(metadata_tmp_files_dir=metadata_tmp_files_dir, **kwds)
Expand Down
8 changes: 4 additions & 4 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,13 +971,13 @@ class DCESummary(Model, WithModelClass):
title="Element Identifier",
description="The actual name of this element.",
)
element_type: DCEType = Field(
...,
element_type: Optional[DCEType] = Field(
None,
title="Element Type",
description="The type of the element. Used to interpret the `object` field.",
)
object: Union[HDAObject, HDADetailed, DCObject] = Field(
...,
object: Optional[Union[HDAObject, HDADetailed, DCObject]] = Field(
None,
title="Object",
description="The element's specific data depending on the value of `element_type`.",
)
Expand Down
7 changes: 0 additions & 7 deletions test/unit/app/managers/test_HDAManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,6 @@ def test_copy_from_hda(self):
hda3_annotation = self.hda_manager.annotation(hda3)
assert annotation == hda3_annotation

# def test_copy_from_ldda( self ):
# owner = self.user_manager.create( self.trans, **user2_data )
# history1 = self.history_mgr.create( self.trans, name='history1', user=owner )
#
# self.log( "should be able to copy an HDA" )
# hda2 = self.hda_manager.copy_ldda( history1, hda1 )

def test_delete(self):
owner = self.user_manager.create(**user2_data)
history1 = self.history_manager.create(name="history1", user=owner)
Expand Down
Loading