From fa5adb0f5218c2751a3857766d5376ace3abb0bf Mon Sep 17 00:00:00 2001 From: John Davis Date: Thu, 26 Oct 2023 11:49:32 -0400 Subject: [PATCH 01/16] SA2.0: Do not bind MetaData: must use engine explicitly --- scripts/check_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/check_model.py b/scripts/check_model.py index 3f411963270a..0c2ffa47f629 100644 --- a/scripts/check_model.py +++ b/scripts/check_model.py @@ -47,8 +47,9 @@ def load_indexes(metadata): # create EMPTY metadata, then load from database db_url = get_config(sys.argv)["db_url"] - metadata = MetaData(bind=create_engine(db_url)) - metadata.reflect() + metadata = MetaData() + engine = create_engine(db_url) + metadata.reflect(bind=engine) indexes_in_db = load_indexes(metadata) all_indexes = set(mapping_indexes.keys()) | set(tsi_mapping_indexes.keys()) From 81d571cbc2f6f12defe13d6b0df847009b185560 Mon Sep 17 00:00:00 2001 From: John Davis Date: Tue, 14 Nov 2023 14:28:29 -0500 Subject: [PATCH 02/16] Use `exists` for improved readability I double checked the performance of "exists().where(criteria)" vs. "select(foo).where(criteria).limit(1)" with explain analyze. While the startup cost is 40% higher for exists, the total costs are identical. In terms of readability, "exists" is more succinct and straightforward. --- lib/galaxy/managers/histories.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/managers/histories.py b/lib/galaxy/managers/histories.py index 5c0b62e1a08a..99a7c70d80ff 100644 --- a/lib/galaxy/managers/histories.py +++ b/lib/galaxy/managers/histories.py @@ -18,6 +18,7 @@ from sqlalchemy import ( asc, desc, + exists, false, func, select, @@ -340,13 +341,12 @@ def get_sharing_extra_information( return extra def is_history_shared_with(self, history: model.History, user: model.User) -> bool: - stmt = ( - select(HistoryUserShareAssociation.id) + stmt = select( + exists() .where(HistoryUserShareAssociation.user_id == user.id) .where(HistoryUserShareAssociation.history_id == history.id) - .limit(1) ) - return bool(self.session().execute(stmt).first()) + return self.session().scalar(stmt) def make_members_public(self, trans, item): """Make the non-purged datasets in history public. From f5c93cd7042b42bae2c53e51f57c7ed40418a723 Mon Sep 17 00:00:00 2001 From: John Davis Date: Thu, 16 Nov 2023 13:41:33 -0500 Subject: [PATCH 03/16] Use add_columns instead of add_entity This is a step towards converting the _get_nested_collection_attributes method from Query to Select. The add_entity method does not exist on Select; add_column should work the same way, in theory... --- lib/galaxy/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 4cdf328771a3..01f72df78c01 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6200,7 +6200,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): .add_columns(*attribute_columns(DatasetPermissions, dataset_permission_attributes)) ) for entity in return_entities: - q = q.add_entity(entity) + q = q.add_columns(entity) if entity == DatasetCollectionElement: q = q.filter(entity.id == dce.c.id) return q.distinct().order_by(*order_by_columns) From c0f6b1d2c8eba5657f6c4d154551810bbeeb531e Mon Sep 17 00:00:00 2001 From: John Davis Date: Fri, 17 Nov 2023 11:05:09 -0500 Subject: [PATCH 04/16] Refactor, upgrade get_nested_collection_attributes 1. Upgrade Query to Select 2. Factor out query-building logic. The previous version returned tuples of items OR models (ORM objects), depending on the calling code (several similar data access methods were combined into this one generic method in PR #12056). The Query object would "magically" convert tuples of ORM objects to ORM objects. The new unified Select object does not do that. As as result, with Select, this method would return tuples of items or tuples of models (not models): result1 = session.execute(statement2) result1 == [("element_identifier_0", "element_identifier_1", "extension", "state"), ...] result2 = session.execute(statement2) result2 == [(dataset1,), (dataset2,) ...] Factoring out the query-building logic and having the caller execute it depending on the expected data structure solves this. --- lib/galaxy/model/__init__.py | 66 +++++++++++++++++---------- test/unit/data/test_galaxy_mapping.py | 34 ++++++++++---- 2 files changed, 68 insertions(+), 32 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 01f72df78c01..cc1f9bc03c32 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6118,7 +6118,7 @@ def __init__(self, id=None, collection_type=None, populated=True, element_count= self.populated_state = DatasetCollection.populated_states.NEW self.element_count = element_count - def _get_nested_collection_attributes( + def _build_nested_collection_attributes_stmt( self, collection_attributes: Optional[Iterable[str]] = None, element_attributes: Optional[Iterable[str]] = None, @@ -6145,10 +6145,8 @@ def _get_nested_collection_attributes( dataset_permission_attributes = dataset_permission_attributes or () return_entities = return_entities or () dataset_collection = self - db_session = object_session(self) dc = alias(DatasetCollection) dce = alias(DatasetCollectionElement) - depth_collection_type = dataset_collection.collection_type order_by_columns = [dce.c.element_index] nesting_level = 0 @@ -6158,7 +6156,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): return [getattr(column_collection, a).label(f"{a}{label_fragment}") for a in attributes] q = ( - db_session.query( + select( *attribute_columns(dce.c, element_attributes, nesting_level), *attribute_columns(dc.c, collection_attributes, nesting_level), ) @@ -6166,6 +6164,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): .join(dce, dce.c.dataset_collection_id == dc.c.id) .filter(dc.c.id == dataset_collection.id) ) + while ":" in depth_collection_type: nesting_level += 1 inner_dc = alias(DatasetCollection) @@ -6203,15 +6202,21 @@ def attribute_columns(column_collection, attributes, nesting_level=None): q = q.add_columns(entity) if entity == DatasetCollectionElement: q = q.filter(entity.id == dce.c.id) - return q.distinct().order_by(*order_by_columns) + + q = q.distinct().order_by(*order_by_columns) + return q @property def dataset_states_and_extensions_summary(self): if not hasattr(self, "_dataset_states_and_extensions_summary"): - q = self._get_nested_collection_attributes(hda_attributes=("extension",), dataset_attributes=("state",)) + stmt = self._build_nested_collection_attributes_stmt( + hda_attributes=("extension",), dataset_attributes=("state",) + ) + col_attrs = object_session(self).execute(stmt) + extensions = set() states = set() - for extension, state in q: + for extension, state in col_attrs: states.add(state) extensions.add(extension) @@ -6225,8 +6230,10 @@ def has_deferred_data(self): has_deferred_data = False if object_session(self): # TODO: Optimize by just querying without returning the states... - q = self._get_nested_collection_attributes(dataset_attributes=("state",)) - for (state,) in q: + stmt = self._build_nested_collection_attributes_stmt(dataset_attributes=("state",)) + col_attrs = object_session(self).execute(stmt) + + for (state,) in col_attrs: if state == Dataset.states.DEFERRED: has_deferred_data = True break @@ -6247,13 +6254,16 @@ def populated_optimized(self): if ":" not in self.collection_type: _populated_optimized = self.populated_state == DatasetCollection.populated_states.OK else: - q = self._get_nested_collection_attributes( + stmt = self._build_nested_collection_attributes_stmt( collection_attributes=("populated_state",), inner_filter=InnerCollectionFilter( "populated_state", operator.__ne__, DatasetCollection.populated_states.OK ), ) - _populated_optimized = q.session.query(~exists(q.subquery())).scalar() + stmt = stmt.subquery() + stmt = select(~exists(stmt)) + session = object_session(self) + _populated_optimized = session.scalar(stmt) self._populated_optimized = _populated_optimized @@ -6269,9 +6279,11 @@ def populated(self): @property def dataset_action_tuples(self): if not hasattr(self, "_dataset_action_tuples"): - q = self._get_nested_collection_attributes(dataset_permission_attributes=("action", "role_id")) + stmt = self._build_nested_collection_attributes_stmt(dataset_permission_attributes=("action", "role_id")) + col_attrs = object_session(self).execute(stmt) + _dataset_action_tuples = [] - for _dataset_action_tuple in q: + for _dataset_action_tuple in col_attrs: if _dataset_action_tuple[0] is None: continue _dataset_action_tuples.append(_dataset_action_tuple) @@ -6282,10 +6294,11 @@ def dataset_action_tuples(self): @property def element_identifiers_extensions_and_paths(self): - q = self._get_nested_collection_attributes( + stmt = self._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), return_entities=(Dataset,) ) - return [(row[:-2], row.extension, row.Dataset.get_file_name()) for row in q] + col_attrs = object_session(self).execute(stmt) + return [(row[:-2], row.extension, row.Dataset.get_file_name()) for row in col_attrs] @property def element_identifiers_extensions_paths_and_metadata_files( @@ -6293,13 +6306,14 @@ def element_identifiers_extensions_paths_and_metadata_files( ) -> List[List[Any]]: results = [] if object_session(self): - q = self._get_nested_collection_attributes( + stmt = self._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), return_entities=(HistoryDatasetAssociation, Dataset), ) + col_attrs = object_session(self).execute(stmt) # element_identifiers, extension, path - for row in q: + for row in col_attrs: result = [row[:-3], row.extension, row.Dataset.get_file_name()] hda = row.HistoryDatasetAssociation result.append(hda.get_metadata_file_paths_and_extensions()) @@ -6344,7 +6358,8 @@ def finalize(self, collection_type_description): def dataset_instances(self): db_session = object_session(self) if db_session and self.id: - return self._get_nested_collection_attributes(return_entities=(HistoryDatasetAssociation,)).all() + stmt = self._build_nested_collection_attributes_stmt(return_entities=(HistoryDatasetAssociation,)) + return db_session.scalars(stmt).all() else: # Sessionless context instances = [] @@ -6360,7 +6375,8 @@ def dataset_instances(self): def dataset_elements(self): db_session = object_session(self) if db_session and self.id: - return self._get_nested_collection_attributes(return_entities=(DatasetCollectionElement,)).all() + stmt = self._build_nested_collection_attributes_stmt(return_entities=(DatasetCollectionElement,)) + return db_session.scalars(stmt).all() elements = [] for element in self.elements: if element.is_collection: @@ -6445,9 +6461,11 @@ def copy( return new_collection def replace_failed_elements(self, replacements): - hda_id_to_element = dict( - self._get_nested_collection_attributes(return_entities=[DatasetCollectionElement], hda_attributes=["id"]) + stmt = self._build_nested_collection_attributes_stmt( + return_entities=[DatasetCollectionElement], hda_attributes=["id"] ) + col_attrs = object_session(self).execute(stmt).all() + hda_id_to_element = dict(col_attrs) for failed, replacement in replacements.items(): element = hda_id_to_element.get(failed.id) if element: @@ -6712,10 +6730,12 @@ def job_state_summary_dict(self): @property def dataset_dbkeys_and_extensions_summary(self): if not hasattr(self, "_dataset_dbkeys_and_extensions_summary"): - rows = self.collection._get_nested_collection_attributes(hda_attributes=("_metadata", "extension")) + stmt = self.collection._build_nested_collection_attributes_stmt(hda_attributes=("_metadata", "extension")) + col_attrs = object_session(self).execute(stmt) + extensions = set() dbkeys = set() - for row in rows: + for row in col_attrs: if row is not None: dbkey_field = row._metadata.get("dbkey") if isinstance(dbkey_field, list): diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index c7a2d4eb8366..d2b4dbf7bd7b 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -356,6 +356,7 @@ def test_collections_in_library_folders(self): # assert len(loaded_dataset_collection.datasets) == 2 # assert loaded_dataset_collection.collection_type == "pair" + # TODO breakup this test into separate tests that test the model's public attributes, not the internal query-building logic def test_nested_collection_attributes(self): u = model.User(email="mary2@example.com", password="password") h1 = model.History(name="History 1", user=u) @@ -392,18 +393,31 @@ def test_nested_collection_attributes(self): ) self.model.session.add_all([d1, d2, c1, dce1, dce2, c2, dce3, c3, c4, dce4]) self.model.session.flush() - q = c2._get_nested_collection_attributes( + + stmt = c2._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) ) - assert [(r._fields) for r in q] == [ + result = self.model.session.execute(stmt).all() + assert [(r._fields) for r in result] == [ ("element_identifier_0", "element_identifier_1", "extension", "state"), ("element_identifier_0", "element_identifier_1", "extension", "state"), ] - assert q.all() == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] - q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation,)) - assert q.all() == [d1, d2] - q = c2._get_nested_collection_attributes(return_entities=(model.HistoryDatasetAssociation, model.Dataset)) - assert q.all() == [(d1, d1.dataset), (d2, d2.dataset)] + + stmt = c2._build_nested_collection_attributes_stmt( + element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) + ) + result = self.model.session.execute(stmt).all() + assert result == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] + + stmt = c2._build_nested_collection_attributes_stmt(return_entities=(model.HistoryDatasetAssociation,)) + result = self.model.session.scalars(stmt).all() + assert result == [d1, d2] + + stmt = c2._build_nested_collection_attributes_stmt( + return_entities=(model.HistoryDatasetAssociation, model.Dataset) + ) + result = self.model.session.execute(stmt).all() + assert result == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] @@ -422,8 +436,10 @@ def test_nested_collection_attributes(self): assert c3.dataset_instances == [] assert c3.dataset_elements == [] assert c3.dataset_states_and_extensions_summary == (set(), set()) - q = c4._get_nested_collection_attributes(element_attributes=("element_identifier",)) - assert q.all() == [("outer_list", "inner_list", "forward"), ("outer_list", "inner_list", "reverse")] + + stmt = c4._build_nested_collection_attributes_stmt(element_attributes=("element_identifier",)) + result = self.model.session.execute(stmt).all() + assert result == [("outer_list", "inner_list", "forward"), ("outer_list", "inner_list", "reverse")] assert c4.dataset_elements == [dce1, dce2] assert c4.element_identifiers_extensions_and_paths == [ (("outer_list", "inner_list", "forward"), "bam", "mock_dataset_14.dat"), From 0256f3c0cbd21af60783e1f3549dc19dd3e7687a Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 15:25:50 -0500 Subject: [PATCH 05/16] Fix dataset_action_tuples --- lib/galaxy/model/__init__.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index cc1f9bc03c32..c095dcbae5c2 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6280,16 +6280,8 @@ def populated(self): def dataset_action_tuples(self): if not hasattr(self, "_dataset_action_tuples"): stmt = self._build_nested_collection_attributes_stmt(dataset_permission_attributes=("action", "role_id")) - col_attrs = object_session(self).execute(stmt) - - _dataset_action_tuples = [] - for _dataset_action_tuple in col_attrs: - if _dataset_action_tuple[0] is None: - continue - _dataset_action_tuples.append(_dataset_action_tuple) - - self._dataset_action_tuples = _dataset_action_tuples - + tuples = object_session(self).execute(stmt) + self._dataset_action_tuples = [(action, role_id) for action, role_id, *_ in tuples if action is not None] return self._dataset_action_tuples @property From 762f119d9f7e010f17b7c2e1d6c77d1c32db8d90 Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 15:43:32 -0500 Subject: [PATCH 06/16] Fix dataset_states_and_extensions_summary --- lib/galaxy/model/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index c095dcbae5c2..d8be6e9d251e 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6212,11 +6212,11 @@ def dataset_states_and_extensions_summary(self): stmt = self._build_nested_collection_attributes_stmt( hda_attributes=("extension",), dataset_attributes=("state",) ) - col_attrs = object_session(self).execute(stmt) + tuples = object_session(self).execute(stmt) extensions = set() states = set() - for extension, state in col_attrs: + for extension, state, *_ in tuples: states.add(state) extensions.add(extension) From 67d73183075503ec3048b91dcae812e2e2468abb Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 15:50:37 -0500 Subject: [PATCH 07/16] Fix has_deferred_data --- lib/galaxy/model/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index d8be6e9d251e..1ef385876e34 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6231,9 +6231,9 @@ def has_deferred_data(self): if object_session(self): # TODO: Optimize by just querying without returning the states... stmt = self._build_nested_collection_attributes_stmt(dataset_attributes=("state",)) - col_attrs = object_session(self).execute(stmt) + tuples = object_session(self).execute(stmt) - for (state,) in col_attrs: + for state, *_ in tuples: if state == Dataset.states.DEFERRED: has_deferred_data = True break From 851334e40ea8380da5f369ffd31644ff0b035a4d Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 16:17:06 -0500 Subject: [PATCH 08/16] Drop unused property --- lib/galaxy/model/__init__.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 1ef385876e34..b4c5b198fe52 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6284,14 +6284,6 @@ def dataset_action_tuples(self): self._dataset_action_tuples = [(action, role_id) for action, role_id, *_ in tuples if action is not None] return self._dataset_action_tuples - @property - def element_identifiers_extensions_and_paths(self): - stmt = self._build_nested_collection_attributes_stmt( - element_attributes=("element_identifier",), hda_attributes=("extension",), return_entities=(Dataset,) - ) - col_attrs = object_session(self).execute(stmt) - return [(row[:-2], row.extension, row.Dataset.get_file_name()) for row in col_attrs] - @property def element_identifiers_extensions_paths_and_metadata_files( self, From 8ece42025b4f496c51fcb808b7b6bfd403da11bd Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 17:33:14 -0500 Subject: [PATCH 09/16] Fix element_identifiers_extensions_paths_and_metadata_files --- lib/galaxy/model/__init__.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index b4c5b198fe52..acabba1ea144 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6288,6 +6288,21 @@ def dataset_action_tuples(self): def element_identifiers_extensions_paths_and_metadata_files( self, ) -> List[List[Any]]: + def find_identifiers(row): + # Assume row has this structure: [id1, id2, ..., idn, extension, model, anything]; + # model is an instance of a Dataset or a HistoryDatasetAssociation; + # extension is a string that always preceeds the model; + # we look for the first model, then pick the preceeding items minus the extention. + identifiers = [] + i = 0 + while i + 1 < len(row): + item, next_item = row[i], row[i + 1] + if isinstance(next_item, HistoryDatasetAssociation) or isinstance(next_item, Dataset): + break + identifiers.append(item) + i += 1 + return tuple(identifiers) + results = [] if object_session(self): stmt = self._build_nested_collection_attributes_stmt( @@ -6295,10 +6310,10 @@ def element_identifiers_extensions_paths_and_metadata_files( hda_attributes=("extension",), return_entities=(HistoryDatasetAssociation, Dataset), ) - col_attrs = object_session(self).execute(stmt) + tuples = object_session(self).execute(stmt) # element_identifiers, extension, path - for row in col_attrs: - result = [row[:-3], row.extension, row.Dataset.get_file_name()] + for row in tuples: + result = [find_identifiers(row), row.extension, row.Dataset.get_file_name()] hda = row.HistoryDatasetAssociation result.append(hda.get_metadata_file_paths_and_extensions()) results.append(result) From 408f6e9ec7dd72c30dcc21cb71b6ebf374a20381 Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 17:38:17 -0500 Subject: [PATCH 10/16] Fix dataset_instances --- lib/galaxy/model/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index acabba1ea144..b68230fcab94 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6358,7 +6358,8 @@ def dataset_instances(self): db_session = object_session(self) if db_session and self.id: stmt = self._build_nested_collection_attributes_stmt(return_entities=(HistoryDatasetAssociation,)) - return db_session.scalars(stmt).all() + tuples = db_session.execute(stmt).all() + return [tuple[0] for tuple in tuples] else: # Sessionless context instances = [] From 5b312488ed24434a15b62b411ebd45dd390b0f62 Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 17:39:07 -0500 Subject: [PATCH 11/16] Fix dataset_elements --- lib/galaxy/model/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index b68230fcab94..18e242f73cf1 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6376,7 +6376,8 @@ def dataset_elements(self): db_session = object_session(self) if db_session and self.id: stmt = self._build_nested_collection_attributes_stmt(return_entities=(DatasetCollectionElement,)) - return db_session.scalars(stmt).all() + tuples = db_session.execute(stmt).all() + return [tuple[0] for tuple in tuples] elements = [] for element in self.elements: if element.is_collection: From 007c83f7c1c7158d67dd04d08dc58ef74c512a3b Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 17:51:23 -0500 Subject: [PATCH 12/16] Fix replace_failed_elements --- lib/galaxy/model/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 18e242f73cf1..f966f5935cf4 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6465,8 +6465,9 @@ def replace_failed_elements(self, replacements): stmt = self._build_nested_collection_attributes_stmt( return_entities=[DatasetCollectionElement], hda_attributes=["id"] ) - col_attrs = object_session(self).execute(stmt).all() - hda_id_to_element = dict(col_attrs) + tuples = object_session(self).execute(stmt).all() + tuples = [(element, id) for element, id, *_ in tuples] + hda_id_to_element = dict(tuples) for failed, replacement in replacements.items(): element = hda_id_to_element.get(failed.id) if element: From b9c0176b26da0ae4ebd0ec488fd31ec89675e2fe Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 17:58:24 -0500 Subject: [PATCH 13/16] Rename local vars --- lib/galaxy/model/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index f966f5935cf4..57b32f3adbf4 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6733,11 +6733,11 @@ def job_state_summary_dict(self): def dataset_dbkeys_and_extensions_summary(self): if not hasattr(self, "_dataset_dbkeys_and_extensions_summary"): stmt = self.collection._build_nested_collection_attributes_stmt(hda_attributes=("_metadata", "extension")) - col_attrs = object_session(self).execute(stmt) + tuples = object_session(self).execute(stmt) extensions = set() dbkeys = set() - for row in col_attrs: + for row in tuples: if row is not None: dbkey_field = row._metadata.get("dbkey") if isinstance(dbkey_field, list): From 836131392fe28273ba39fedc30ee3cdf36d4ffb3 Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 17:58:45 -0500 Subject: [PATCH 14/16] Add columns from ORDER BY clause to SELECT clause Required if using DISTINCT --- lib/galaxy/model/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 57b32f3adbf4..0c91431f4e3a 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6204,6 +6204,8 @@ def attribute_columns(column_collection, attributes, nesting_level=None): q = q.filter(entity.id == dce.c.id) q = q.distinct().order_by(*order_by_columns) + # With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause. + q = q.add_columns(*order_by_columns) return q @property From f45e6d1e46e8dbe053052b4666e6a8a6e3d77fad Mon Sep 17 00:00:00 2001 From: John Davis Date: Mon, 27 Nov 2023 18:05:13 -0500 Subject: [PATCH 15/16] Add unit test; adjust tests to account for order-by columns --- test/unit/data/test_galaxy_mapping.py | 38 ++++++++++++++++++--------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index d2b4dbf7bd7b..e8b27e140c8f 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -356,7 +356,22 @@ def test_collections_in_library_folders(self): # assert len(loaded_dataset_collection.datasets) == 2 # assert loaded_dataset_collection.collection_type == "pair" - # TODO breakup this test into separate tests that test the model's public attributes, not the internal query-building logic + def test_dataset_action_tuples(self): + u = model.User(email="foo", password="foo") + h1 = model.History(user=u) + hda1 = model.HistoryDatasetAssociation(history=h1, create_dataset=True, sa_session=self.model.session) + hda2 = model.HistoryDatasetAssociation(history=h1, create_dataset=True, sa_session=self.model.session) + r1 = model.Role() + dp1 = model.DatasetPermissions(action="action1", dataset=hda1.dataset, role=r1) + dp2 = model.DatasetPermissions(action=None, dataset=hda1.dataset, role=r1) + dp3 = model.DatasetPermissions(action="action3", dataset=hda1.dataset, role=r1) + c1 = model.DatasetCollection(collection_type="type1") + dce1 = model.DatasetCollectionElement(collection=c1, element=hda1) + dce2 = model.DatasetCollectionElement(collection=c1, element=hda2) + self.model.session.add_all([u, h1, hda1, hda2, r1, dp1, dp2, dp3, c1, dce1, dce2]) + self.model.session.flush() + assert c1.dataset_action_tuples == [("action1", r1.id), ("action3", r1.id)] + def test_nested_collection_attributes(self): u = model.User(email="mary2@example.com", password="password") h1 = model.History(name="History 1", user=u) @@ -399,25 +414,25 @@ def test_nested_collection_attributes(self): ) result = self.model.session.execute(stmt).all() assert [(r._fields) for r in result] == [ - ("element_identifier_0", "element_identifier_1", "extension", "state"), - ("element_identifier_0", "element_identifier_1", "extension", "state"), + ("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"), + ("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"), ] stmt = c2._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) ) result = self.model.session.execute(stmt).all() - assert result == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] + assert result == [("inner_list", "forward", "bam", "new", 0, 0), ("inner_list", "reverse", "txt", "new", 0, 1)] stmt = c2._build_nested_collection_attributes_stmt(return_entities=(model.HistoryDatasetAssociation,)) - result = self.model.session.scalars(stmt).all() - assert result == [d1, d2] + result = self.model.session.execute(stmt).all() + assert result == [(d1, 0, 0), (d2, 0, 1)] stmt = c2._build_nested_collection_attributes_stmt( return_entities=(model.HistoryDatasetAssociation, model.Dataset) ) result = self.model.session.execute(stmt).all() - assert result == [(d1, d1.dataset), (d2, d2.dataset)] + assert result == [(d1, d1.dataset, 0, 0), (d2, d2.dataset, 0, 1)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] @@ -439,12 +454,11 @@ def test_nested_collection_attributes(self): stmt = c4._build_nested_collection_attributes_stmt(element_attributes=("element_identifier",)) result = self.model.session.execute(stmt).all() - assert result == [("outer_list", "inner_list", "forward"), ("outer_list", "inner_list", "reverse")] - assert c4.dataset_elements == [dce1, dce2] - assert c4.element_identifiers_extensions_and_paths == [ - (("outer_list", "inner_list", "forward"), "bam", "mock_dataset_14.dat"), - (("outer_list", "inner_list", "reverse"), "txt", "mock_dataset_14.dat"), + assert result == [ + ("outer_list", "inner_list", "forward", 0, 0, 0), + ("outer_list", "inner_list", "reverse", 0, 0, 1), ] + assert c4.dataset_elements == [dce1, dce2] def test_dataset_dbkeys_and_extensions_summary(self): u = model.User(email="mary2@example.com", password="password") From eeeaf7b4cc28f42db874d3f7cd77ac966c2586b0 Mon Sep 17 00:00:00 2001 From: John Davis Date: Tue, 28 Nov 2023 14:46:16 -0500 Subject: [PATCH 16/16] Use distinct only for dataset states/extensions summary --- lib/galaxy/model/__init__.py | 33 ++++++++------------------- test/unit/data/test_galaxy_mapping.py | 14 ++++++------ 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 0c91431f4e3a..60794fbdcbe3 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6203,9 +6203,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None): if entity == DatasetCollectionElement: q = q.filter(entity.id == dce.c.id) - q = q.distinct().order_by(*order_by_columns) - # With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause. - q = q.add_columns(*order_by_columns) + q = q.order_by(*order_by_columns) return q @property @@ -6214,11 +6212,15 @@ def dataset_states_and_extensions_summary(self): stmt = self._build_nested_collection_attributes_stmt( hda_attributes=("extension",), dataset_attributes=("state",) ) + # With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause. + stmt = stmt.add_columns(*stmt._order_by_clauses) + stmt = stmt.distinct() + tuples = object_session(self).execute(stmt) extensions = set() states = set() - for extension, state, *_ in tuples: + for extension, state, *_ in tuples: # we discard the added columns from the order-by clause states.add(state) extensions.add(extension) @@ -6234,8 +6236,7 @@ def has_deferred_data(self): # TODO: Optimize by just querying without returning the states... stmt = self._build_nested_collection_attributes_stmt(dataset_attributes=("state",)) tuples = object_session(self).execute(stmt) - - for state, *_ in tuples: + for (state,) in tuples: if state == Dataset.states.DEFERRED: has_deferred_data = True break @@ -6283,28 +6284,13 @@ def dataset_action_tuples(self): if not hasattr(self, "_dataset_action_tuples"): stmt = self._build_nested_collection_attributes_stmt(dataset_permission_attributes=("action", "role_id")) tuples = object_session(self).execute(stmt) - self._dataset_action_tuples = [(action, role_id) for action, role_id, *_ in tuples if action is not None] + self._dataset_action_tuples = [(action, role_id) for action, role_id in tuples if action is not None] return self._dataset_action_tuples @property def element_identifiers_extensions_paths_and_metadata_files( self, ) -> List[List[Any]]: - def find_identifiers(row): - # Assume row has this structure: [id1, id2, ..., idn, extension, model, anything]; - # model is an instance of a Dataset or a HistoryDatasetAssociation; - # extension is a string that always preceeds the model; - # we look for the first model, then pick the preceeding items minus the extention. - identifiers = [] - i = 0 - while i + 1 < len(row): - item, next_item = row[i], row[i + 1] - if isinstance(next_item, HistoryDatasetAssociation) or isinstance(next_item, Dataset): - break - identifiers.append(item) - i += 1 - return tuple(identifiers) - results = [] if object_session(self): stmt = self._build_nested_collection_attributes_stmt( @@ -6315,7 +6301,7 @@ def find_identifiers(row): tuples = object_session(self).execute(stmt) # element_identifiers, extension, path for row in tuples: - result = [find_identifiers(row), row.extension, row.Dataset.get_file_name()] + result = [row[:-3], row.extension, row.Dataset.get_file_name()] hda = row.HistoryDatasetAssociation result.append(hda.get_metadata_file_paths_and_extensions()) results.append(result) @@ -6468,7 +6454,6 @@ def replace_failed_elements(self, replacements): return_entities=[DatasetCollectionElement], hda_attributes=["id"] ) tuples = object_session(self).execute(stmt).all() - tuples = [(element, id) for element, id, *_ in tuples] hda_id_to_element = dict(tuples) for failed, replacement in replacements.items(): element = hda_id_to_element.get(failed.id) diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index e8b27e140c8f..4a0f4a16b663 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -414,25 +414,25 @@ def test_nested_collection_attributes(self): ) result = self.model.session.execute(stmt).all() assert [(r._fields) for r in result] == [ - ("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"), - ("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"), + ("element_identifier_0", "element_identifier_1", "extension", "state"), + ("element_identifier_0", "element_identifier_1", "extension", "state"), ] stmt = c2._build_nested_collection_attributes_stmt( element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",) ) result = self.model.session.execute(stmt).all() - assert result == [("inner_list", "forward", "bam", "new", 0, 0), ("inner_list", "reverse", "txt", "new", 0, 1)] + assert result == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")] stmt = c2._build_nested_collection_attributes_stmt(return_entities=(model.HistoryDatasetAssociation,)) result = self.model.session.execute(stmt).all() - assert result == [(d1, 0, 0), (d2, 0, 1)] + assert result == [(d1,), (d2,)] stmt = c2._build_nested_collection_attributes_stmt( return_entities=(model.HistoryDatasetAssociation, model.Dataset) ) result = self.model.session.execute(stmt).all() - assert result == [(d1, d1.dataset, 0, 0), (d2, d2.dataset, 0, 1)] + assert result == [(d1, d1.dataset), (d2, d2.dataset)] # Assert properties that use _get_nested_collection_attributes return correct content assert c2.dataset_instances == [d1, d2] assert c2.dataset_elements == [dce1, dce2] @@ -455,8 +455,8 @@ def test_nested_collection_attributes(self): stmt = c4._build_nested_collection_attributes_stmt(element_attributes=("element_identifier",)) result = self.model.session.execute(stmt).all() assert result == [ - ("outer_list", "inner_list", "forward", 0, 0, 0), - ("outer_list", "inner_list", "reverse", 0, 0, 1), + ("outer_list", "inner_list", "forward"), + ("outer_list", "inner_list", "reverse"), ] assert c4.dataset_elements == [dce1, dce2]