diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 5c0b62e1a08a..99a7c70d80ff 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -18,6 +18,7 @@ from sqlalchemy import ( asc, desc, + exists, false, func, select, @@ -340,13 +341,12 @@ def get_sharing_extra_information( return extra def is_history_shared_with(self, history: model.History, user: model.User) -> bool: - stmt = ( - select(HistoryUserShareAssociation.id) + stmt = select( + exists() .where(HistoryUserShareAssociation.user_id == user.id) .where(HistoryUserShareAssociation.history_id == history.id) - .limit(1) ) - return bool(self.session().execute(stmt).first()) + return self.session().scalar(stmt) def make_members_public(self, trans, item): """Make the non-purged datasets in history public. diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 4cdf328771a3..60794fbdcbe3 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6118,7 +6118,7 @@ def __init__(self, id=None, collection_type=None, populated=True, element_count= self.populated_state = DatasetCollection.populated_states.NEW self.element_count = element_count - def _get_nested_collection_attributes( + def _build_nested_collection_attributes_stmt( self, collection_attributes: Optional[Iterable[str]] = None, element_attributes: Optional[Iterable[str]] = None, @@ -6145,10 +6145,8 @@ def _get_nested_collection_attributes( dataset_permission_attributes = dataset_permission_attributes or () return_entities = return_entities or () dataset_collection = self - db_session = object_session(self) dc = alias(DatasetCollection) dce = alias(DatasetCollectionElement) - depth_collection_type = dataset_collection.collection_type order_by_columns = [dce.c.element_index] nesting_level = 0 @@ -6158,7 +6156,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): return [getattr(column_collection, a).label(f"{a}{label_fragment}") for a in attributes] q = ( - db_session.query( + select( *attribute_columns(dce.c, element_attributes, nesting_level), *attribute_columns(dc.c, collection_attributes, nesting_level), ) @@ -6166,6 +6164,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): .join(dce, dce.c.dataset_collection_id == dc.c.id) .filter(dc.c.id == dataset_collection.id) ) + while ":" in depth_collection_type: nesting_level += 1 inner_dc = alias(DatasetCollection) @@ -6200,18 +6199,28 @@ def attribute_columns(column_collection, attributes, nesting_level=None): .add_columns(*attribute_columns(DatasetPermissions, dataset_permission_attributes)) ) for entity in return_entities: - q = q.add_entity(entity) + q = q.add_columns(entity) if entity == DatasetCollectionElement: q = q.filter(entity.id == dce.c.id) - return q.distinct().order_by(*order_by_columns) + + q = q.order_by(*order_by_columns) + return q @property def dataset_states_and_extensions_summary(self): if not hasattr(self, "_dataset_states_and_extensions_summary"): - q = self._get_nested_collection_attributes(hda_attributes=("extension",), dataset_attributes=("state",)) + stmt = self._build_nested_collection_attributes_stmt( + hda_attributes=("extension",), dataset_attributes=("state",) + ) + # With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause. + stmt = stmt.add_columns(*stmt._order_by_clauses) + stmt = stmt.distinct() + + tuples = object_session(self).execute(stmt) + extensions = set() states = set() - for extension, state in q: + for extension, state, *_ in tuples: # we discard the added columns from the order-by clause states.add(state) extensions.add(extension) @@ -6225,8 +6234,9 @@ def has_deferred_data(self): has_deferred_data = False if object_session(self): # TODO: Optimize by just querying without returning the states... - q = self._get_nested_collection_attributes(dataset_attributes=("state",)) - for (state,) in q: + stmt = self._build_nested_collection_attributes_stmt(dataset_attributes=("state",)) + tuples = object_session(self).execute(stmt) + for (state,) in tuples: if state == Dataset.states.DEFERRED: has_deferred_data = True break @@ -6247,13 +6257,16 @@ def populated_optimized(self): if ":" not in self.collection_type: _populated_optimized = self.populated_state == DatasetCollection.populated_states.OK else: - q = self._get_nested_collection_attributes( + stmt = self._build_nested_collection_attributes_stmt( collection_attributes=("populated_state",), inner_filter=InnerCollectionFilter( "populated_state", operator.__ne__, DatasetCollection.populated_states.OK ), ) - _populated_optimized = q.session.query(~exists(q.subquery())).scalar() + stmt = stmt.subquery() + stmt = select(~exists(stmt)) + session = object_session(self) + _populated_optimized = session.scalar(stmt) self._populated_optimized = _populated_optimized @@ -6269,37 +6282,25 @@ def populated(self): @property def dataset_action_tuples(self): if not hasattr(self, "_dataset_action_tuples"): - q = self._get_nested_collection_attributes(dataset_permission_attributes=("action", "role_id")) - _dataset_action_tuples = [] - for _dataset_action_tuple in q: - if _dataset_action_tuple[0] is None: - continue - _dataset_action_tuples.append(_dataset_action_tuple) - - self._dataset_action_tuples = _dataset_action_tuples - + stmt = self._build_nested_collection_attributes_stmt(dataset_permission_attributes=("action", "role_id")) + tuples = object_session(self).execute(stmt) + self._dataset_action_tuples = [(action, role_id) for action, role_id in tuples if action is not None] return self._dataset_action_tuples - @property - def element_identifiers_extensions_and_paths(self): - q = self._get_nested_collection_attributes( - element_attributes=("element_identifier",), hda_attributes=("extension",), return_entities=(Dataset,) - ) - return [(row[:-2], row.extension, row.Dataset.get_file_name()) for row in q] - @property def element_identifiers_extensions_paths_and_metadata_files( self, ) -> List[List[Any]]: results = [] if object_session(self): - q = self._get_nested_collection_attributes( + stmt = self._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), return_entities=(HistoryDatasetAssociation, Dataset), ) + tuples = object_session(self).execute(stmt) # element_identifiers, extension, path - for row in q: + for row in tuples: result = [row[:-3], row.extension, row.Dataset.get_file_name()] hda = row.HistoryDatasetAssociation result.append(hda.get_metadata_file_paths_and_extensions()) @@ -6344,7 +6345,9 @@ def finalize(self, collection_type_description): def dataset_instances(self): db_session = object_session(self) if db_session and self.id: - return self._get_nested_collection_attributes(return_entities=(HistoryDatasetAssociation,)).all() + stmt = self._build_nested_collection_attributes_stmt(return_entities=(HistoryDatasetAssociation,)) + tuples = db_session.execute(stmt).all() + return [tuple[0] for tuple in tuples] else: # Sessionless context instances = [] @@ -6360,7 +6363,9 @@ def dataset_instances(self): def dataset_elements(self): db_session = object_session(self) if db_session and self.id: - return self._get_nested_collection_attributes(return_entities=(DatasetCollectionElement,)).all() + stmt = self._build_nested_collection_attributes_stmt(return_entities=(DatasetCollectionElement,)) + tuples = db_session.execute(stmt).all() + return [tuple[0] for tuple in tuples] elements = [] for element in self.elements: if element.is_collection: @@ -6445,9 +6450,11 @@ def copy( return new_collection def replace_failed_elements(self, replacements): - hda_id_to_element = dict( - self._get_nested_collection_attributes(return_entities=[DatasetCollectionElement], hda_attributes=["id"]) + stmt = self._build_nested_collection_attributes_stmt( + return_entities=[DatasetCollectionElement], hda_attributes=["id"] ) + tuples = object_session(self).execute(stmt).all() + hda_id_to_element = dict(tuples) for failed, replacement in replacements.items(): element = hda_id_to_element.get(failed.id) if element: @@ -6712,10 +6719,12 @@ def job_state_summary_dict(self): @property def dataset_dbkeys_and_extensions_summary(self): if not hasattr(self, "_dataset_dbkeys_and_extensions_summary"): - rows = self.collection._get_nested_collection_attributes(hda_attributes=("_metadata", "extension")) + stmt = self.collection._build_nested_collection_attributes_stmt(hda_attributes=("_metadata", "extension")) + tuples = object_session(self).execute(stmt) + extensions = set() dbkeys = set() - for row in rows: + for row in tuples: if row is not None: dbkey_field = row._metadata.get("dbkey") if isinstance(dbkey_field, list): diff --git a/scripts/check_model.py b/scripts/check_model.py index 3f411963270a..0c2ffa47f629 100644 --- a/scripts/check_model.py +++ b/scripts/check_model.py @@ -47,8 +47,9 @@ def load_indexes(metadata): # create EMPTY metadata, then load from database db_url = get_config(sys.argv)["db_url"] - metadata = MetaData(bind=create_engine(db_url)) - metadata.reflect() + metadata = MetaData() + engine = create_engine(db_url) + metadata.reflect(bind=engine) indexes_in_db = load_indexes(metadata) all_indexes = set(mapping_indexes.keys()) | set(tsi_mapping_indexes.keys()) diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index c7a2d4eb8366..4a0f4a16b663 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -356,6 +356,22 @@ def test_collections_in_library_folders(self): # assert len(loaded_dataset_collection.datasets) == 2 # assert loaded_dataset_collection.collection_type == "pair" + def test_dataset_action_tuples(self): + u = model.User(email="foo", password="foo") + h1 = model.History(user=u) + hda1 = model.HistoryDatasetAssociation(history=h1, create_dataset=True, sa_session=self.model.session) + hda2 = model.HistoryDatasetAssociation(history=h1, create_dataset=True, sa_session=self.model.session) + r1 = model.Role() + dp1 = model.DatasetPermissions(action="action1", dataset=hda1.dataset, role=r1) + dp2 = model.DatasetPermissions(action=None, dataset=hda1.dataset, role=r1) + dp3 = model.DatasetPermissions(action="action3", dataset=hda1.dataset, role=r1) + c1 = model.DatasetCollection(collection_type="type1") + dce1 = model.DatasetCollectionElement(collection=c1, element=hda1) + dce2 = model.DatasetCollectionElement(collection=c1, element=hda2) + self.model.session.add_all([u, h1, hda1, hda2, r1, dp1, dp2, dp3, c1, dce1, dce2]) + self.model.session.flush() + assert c1.dataset_action_tuples == [("action1", r1.id), ("action3", r1.id)] + def test_nested_collection_attributes(self): u = model.User(email="mary2@example.com", password="password") h1 = model.History(name="History 1", user=u) @@ -392,18 +408,31 @@ def test_nested_collection_attributes(self): ) self.model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4]) self.model.session.flush() - q = c2._get_nested_collection_attributes( + + stmt = c2._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) ) - assert [(r._fields) for r in q] == [ + result = self.model.session.execute(stmt).all() + assert [(r._fields) for r in result] == [ ("element_identifier_0", "element_identifier_1", "extension", "state"), ("element_identifier_0", "element_identifier_1", "extension", "state"), ] - assert q.all() == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] - q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation,)) - assert q.all() == [d1, d2] - q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation, model.Dataset)) - assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)] + + stmt = c2._build_nested_collection_attributes_stmt( + element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) + ) + result = self.model.session.execute(stmt).all() + assert result == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] + + stmt = c2._build_nested_collection_attributes_stmt(return_entities=(model.HistoryDatasetAssociation,)) + result = self.model.session.execute(stmt).all() + assert result == [(d1,), (d2,)] + + stmt = c2._build_nested_collection_attributes_stmt( + return_entities=(model.HistoryDatasetAssociation, model.Dataset) + ) + result = self.model.session.execute(stmt).all() + assert result == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] @@ -422,13 +451,14 @@ def test_nested_collection_attributes(self): assert c3.dataset_instances == [] assert c3.dataset_elements == [] assert c3.dataset_states_and_extensions_summary == (set(), set()) - q = c4._get_nested_collection_attributes(element_attributes=("element_identifier",)) - assert q.all() == [("outer_list", "inner_list", "forward"), ("outer_list", "inner_list", "reverse")] - assert c4.dataset_elements == [dce1, dce2] - assert c4.element_identifiers_extensions_and_paths == [ - (("outer_list", "inner_list", "forward"), "bam", "mock_dataset_14.dat"), - (("outer_list", "inner_list", "reverse"), "txt", "mock_dataset_14.dat"), + + stmt = c4._build_nested_collection_attributes_stmt(element_attributes=("element_identifier",)) + result = self.model.session.execute(stmt).all() + assert result == [ + ("outer_list", "inner_list", "forward"), + ("outer_list", "inner_list", "reverse"), ] + assert c4.dataset_elements == [dce1, dce2] def test_dataset_dbkeys_and_extensions_summary(self): u = model.User(email="mary2@example.com", password="password")