Skip to content

Commit

Permalink
Use distinct only for dataset states/extensions summary
Browse files Browse the repository at this point in the history
  • Loading branch information
jdavcs committed Nov 28, 2023
1 parent f45e6d1 commit eeeaf7b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 31 deletions.
33 changes: 9 additions & 24 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6203,9 +6203,7 @@ def attribute_columns(column_collection, attributes, nesting_level=None):
if entity == DatasetCollectionElement:
q = q.filter(entity.id == dce.c.id)

q = q.distinct().order_by(*order_by_columns)
# With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause.
q = q.add_columns(*order_by_columns)
q = q.order_by(*order_by_columns)
return q

@property
Expand All @@ -6214,11 +6212,15 @@ def dataset_states_and_extensions_summary(self):
stmt = self._build_nested_collection_attributes_stmt(
hda_attributes=("extension",), dataset_attributes=("state",)
)
# With DISTINCT, all columns that appear in the ORDER BY clause must appear in the SELECT clause.
stmt = stmt.add_columns(*stmt._order_by_clauses)
stmt = stmt.distinct()

tuples = object_session(self).execute(stmt)

extensions = set()
states = set()
for extension, state, *_ in tuples:
for extension, state, *_ in tuples: # we discard the added columns from the order-by clause
states.add(state)
extensions.add(extension)

Expand All @@ -6234,8 +6236,7 @@ def has_deferred_data(self):
# TODO: Optimize by just querying without returning the states...
stmt = self._build_nested_collection_attributes_stmt(dataset_attributes=("state",))
tuples = object_session(self).execute(stmt)

for state, *_ in tuples:
for (state,) in tuples:
if state == Dataset.states.DEFERRED:
has_deferred_data = True
break
Expand Down Expand Up @@ -6283,28 +6284,13 @@ def dataset_action_tuples(self):
if not hasattr(self, "_dataset_action_tuples"):
stmt = self._build_nested_collection_attributes_stmt(dataset_permission_attributes=("action", "role_id"))
tuples = object_session(self).execute(stmt)
self._dataset_action_tuples = [(action, role_id) for action, role_id, *_ in tuples if action is not None]
self._dataset_action_tuples = [(action, role_id) for action, role_id in tuples if action is not None]
return self._dataset_action_tuples

@property
def element_identifiers_extensions_paths_and_metadata_files(
self,
) -> List[List[Any]]:
def find_identifiers(row):
# Assume row has this structure: [id1, id2, ..., idn, extension, model, anything];
# model is an instance of a Dataset or a HistoryDatasetAssociation;
# extension is a string that always preceeds the model;
# we look for the first model, then pick the preceeding items minus the extention.
identifiers = []
i = 0
while i + 1 < len(row):
item, next_item = row[i], row[i + 1]
if isinstance(next_item, HistoryDatasetAssociation) or isinstance(next_item, Dataset):
break
identifiers.append(item)
i += 1
return tuple(identifiers)

results = []
if object_session(self):
stmt = self._build_nested_collection_attributes_stmt(
Expand All @@ -6315,7 +6301,7 @@ def find_identifiers(row):
tuples = object_session(self).execute(stmt)
# element_identifiers, extension, path
for row in tuples:
result = [find_identifiers(row), row.extension, row.Dataset.get_file_name()]
result = [row[:-3], row.extension, row.Dataset.get_file_name()]
hda = row.HistoryDatasetAssociation
result.append(hda.get_metadata_file_paths_and_extensions())
results.append(result)
Expand Down Expand Up @@ -6468,7 +6454,6 @@ def replace_failed_elements(self, replacements):
return_entities=[DatasetCollectionElement], hda_attributes=["id"]
)
tuples = object_session(self).execute(stmt).all()
tuples = [(element, id) for element, id, *_ in tuples]
hda_id_to_element = dict(tuples)
for failed, replacement in replacements.items():
element = hda_id_to_element.get(failed.id)
Expand Down
14 changes: 7 additions & 7 deletions test/unit/data/test_galaxy_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,25 +414,25 @@ def test_nested_collection_attributes(self):
)
result = self.model.session.execute(stmt).all()
assert [(r._fields) for r in result] == [
("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"),
("element_identifier_0", "element_identifier_1", "extension", "state", "element_index", "element_index_1"),
("element_identifier_0", "element_identifier_1", "extension", "state"),
("element_identifier_0", "element_identifier_1", "extension", "state"),
]

stmt = c2._build_nested_collection_attributes_stmt(
element_attributes=("element_identifier",), hda_attributes=("extension",), dataset_attributes=("state",)
)
result = self.model.session.execute(stmt).all()
assert result == [("inner_list", "forward", "bam", "new", 0, 0), ("inner_list", "reverse", "txt", "new", 0, 1)]
assert result == [("inner_list", "forward", "bam", "new"), ("inner_list", "reverse", "txt", "new")]

stmt = c2._build_nested_collection_attributes_stmt(return_entities=(model.HistoryDatasetAssociation,))
result = self.model.session.execute(stmt).all()
assert result == [(d1, 0, 0), (d2, 0, 1)]
assert result == [(d1,), (d2,)]

stmt = c2._build_nested_collection_attributes_stmt(
return_entities=(model.HistoryDatasetAssociation, model.Dataset)
)
result = self.model.session.execute(stmt).all()
assert result == [(d1, d1.dataset, 0, 0), (d2, d2.dataset, 0, 1)]
assert result == [(d1, d1.dataset), (d2, d2.dataset)]
# Assert properties that use _get_nested_collection_attributes return correct content
assert c2.dataset_instances == [d1, d2]
assert c2.dataset_elements == [dce1, dce2]
Expand All @@ -455,8 +455,8 @@ def test_nested_collection_attributes(self):
stmt = c4._build_nested_collection_attributes_stmt(element_attributes=("element_identifier",))
result = self.model.session.execute(stmt).all()
assert result == [
("outer_list", "inner_list", "forward", 0, 0, 0),
("outer_list", "inner_list", "reverse", 0, 0, 1),
("outer_list", "inner_list", "forward"),
("outer_list", "inner_list", "reverse"),
]
assert c4.dataset_elements == [dce1, dce2]

Expand Down

0 comments on commit eeeaf7b

Please sign in to comment.