From c0692738b5be6aa20052fba21142cd35935b8249 Mon Sep 17 00:00:00 2001 From: Harshad Date: Thu, 16 Nov 2023 16:18:06 -0600 Subject: [PATCH] Implemented `entities_metadata_statements()` for `SQLImplementation()` (#679) * Adde `entities_metadata_statements` to SQLImplementation * Moved test to core compliance tests * included axiom annotations (optional) * refactored to follow format * formattted * added expected results * formatted * added more cases * formatted * refactor include_all_triples to include_nested_metadata * added kwargs * Added **kwargs, --- .../sqldb/sql_implementation.py | 28 ++++ .../interfaces/basic_ontology_interface.py | 6 +- tests/test_implementations/__init__.py | 126 ++++++++++++++++++ tests/test_implementations/test_sqldb.py | 3 + 4 files changed, 162 insertions(+), 1 deletion(-) diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py index a3655c972..1838a2aa0 100644 --- a/src/oaklib/implementations/sqldb/sql_implementation.py +++ b/src/oaklib/implementations/sqldb/sql_implementation.py @@ -127,6 +127,7 @@ DEFINITION, LANGUAGE_TAG, METADATA_MAP, + METADATA_STATEMENT, PRED_CURIE, PREFIX_MAP, RELATIONSHIP, @@ -671,6 +672,33 @@ def entity_metadata_map(self, curie: CURIE, include_all_triples=False) -> METADA self.add_missing_property_values(curie, m) return dict(m) + def entities_metadata_statements( + self, + curies: Iterable[CURIE], + predicates: Optional[List[PRED_CURIE]] = None, + include_nested_metadata=False, + **kwargs, + ) -> Iterator[METADATA_STATEMENT]: + q = self.session.query(Statements) + if not include_nested_metadata: + subquery = self.session.query(RdfTypeStatement.subject).filter( + RdfTypeStatement.object == "owl:AnnotationProperty" + ) + annotation_properties = {row.subject for row in subquery} + annotation_properties = annotation_properties.union(STANDARD_ANNOTATION_PROPERTIES) + q = q.filter(Statements.predicate.in_(tuple(annotation_properties))) + q = q.filter(Statements.subject.in_(curies)) + if predicates is not None: + q = q.filter(Statements.predicate.in_(predicates)) + for row in q: + if row.value is not None: + v = _python_value(row.value, row.datatype) + elif row.object is not None: + v = row.object + else: + v = None + yield row.subject, row.predicate, v, row.datatype, {} + def ontologies(self) -> Iterable[CURIE]: for row in self.session.query(OntologyNode): yield row.id diff --git a/src/oaklib/interfaces/basic_ontology_interface.py b/src/oaklib/interfaces/basic_ontology_interface.py index 2a11cfc28..4e4322a1b 100644 --- a/src/oaklib/interfaces/basic_ontology_interface.py +++ b/src/oaklib/interfaces/basic_ontology_interface.py @@ -1419,7 +1419,11 @@ def entity_metadata_map(self, curie: CURIE) -> METADATA_MAP: raise NotImplementedError def entities_metadata_statements( - self, curies: Iterable[CURIE], predicates: Optional[List[PRED_CURIE]] = None + self, + curies: Iterable[CURIE], + predicates: Optional[List[PRED_CURIE]] = None, + include_nested_metadata=False, + **kwargs, ) -> Iterator[METADATA_STATEMENT]: """ Retrieve metadata statements (entity annotations) for a collection of entities. diff --git a/tests/test_implementations/__init__.py b/tests/test_implementations/__init__.py index 7362e8e18..269eb093f 100644 --- a/tests/test_implementations/__init__.py +++ b/tests/test_implementations/__init__.py @@ -112,6 +112,7 @@ INTRACELLULAR, INTRACELLULAR_ORGANELLE, MAMMALIA, + MEMBRANE, NUCLEAR_ENVELOPE, NUCLEAR_MEMBRANE, NUCLEUS, @@ -1880,3 +1881,128 @@ def test_annotate_text(self, oi: TextAnnotatorInterface): test.assertEqual(object_label, ann.object_label) test.assertEqual(subject_start, ann.subject_start) test.assertEqual(subject_end, ann.subject_end) + + def test_entities_metadata_statements(self, oi: BasicOntologyInterface): + test = self.test + + cases = [ + ( + [MEMBRANE], + [OIO_CREATION_DATE], + [("GO:0016020", "oio:creation_date", "2014-03-06T11:37:54Z", "xsd:string", {})], + ), + ( + [MEMBRANE], + None, + [ + ( + "GO:0016020", + "IAO:0000115", + "A lipid bilayer along with all the proteins and protein complexes embedded in it an attached to it.", # noqa:E501 + "xsd:string", + {}, + ), + ("GO:0016020", "oio:creation_date", "2014-03-06T11:37:54Z", "xsd:string", {}), + ("GO:0016020", "oio:hasAlternativeId", "GO:0098589", "xsd:string", {}), + ("GO:0016020", "oio:hasAlternativeId", "GO:0098805", "xsd:string", {}), + ( + "GO:0016020", + "oio:hasDbXref", + "Wikipedia:Biological_membrane", + "xsd:string", + {}, + ), + ("GO:0016020", "oio:hasNarrowSynonym", "membrane region", "xsd:string", {}), + ("GO:0016020", "oio:hasNarrowSynonym", "region of membrane", "xsd:string", {}), + ("GO:0016020", "oio:hasNarrowSynonym", "whole membrane", "xsd:string", {}), + ("GO:0016020", "oio:hasOBONamespace", "cellular_component", "xsd:string", {}), + ("GO:0016020", "oio:id", "GO:0016020", "xsd:string", {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_yeast", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_plant", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_pir", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_metagenomics", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_flybase_ribbon", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_chembl", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_candida", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_aspergillus", None, {}), + ("GO:0016020", "rdfs:label", "membrane", "xsd:string", {}), + ], + ), + ( + [MEMBRANE], + ["oio:inSubset"], + [ + ("GO:0016020", "oio:inSubset", "obo:go#goslim_yeast", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_plant", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_pir", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_metagenomics", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_flybase_ribbon", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_chembl", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_candida", None, {}), + ("GO:0016020", "oio:inSubset", "obo:go#goslim_aspergillus", None, {}), + ], + ), + ( + [NUCLEUS], + None, + [ + ( + "GO:0005634", + "IAO:0000115", + "A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated. In most cells, the nucleus contains all of the cell's chromosomes except the organellar chromosomes, and is the site of RNA synthesis and processing. In some species, or in specialized cell types, RNA metabolism or DNA replication may be absent.", # noqa:E501 + "xsd:string", + {}, + ), + ( + "GO:0005634", + "oio:hasDbXref", + "NIF_Subcellular:sao1702920020", + "xsd:string", + {}, + ), + ("GO:0005634", "oio:hasDbXref", "Wikipedia:Cell_nucleus", "xsd:string", {}), + ("GO:0005634", "oio:hasExactSynonym", "cell nucleus", "xsd:string", {}), + ("GO:0005634", "oio:hasNarrowSynonym", "horsetail nucleus", "xsd:string", {}), + ("GO:0005634", "oio:hasOBONamespace", "cellular_component", "xsd:string", {}), + ("GO:0005634", "oio:id", "GO:0005634", "xsd:string", {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_yeast", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_plant", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_pir", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_mouse", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_metagenomics", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_generic", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_flybase_ribbon", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_drosophila", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_chembl", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_candida", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_aspergillus", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_agr", None, {}), + ("GO:0005634", "rdfs:label", "nucleus", "xsd:string", {}), + ], + ), + ( + [NUCLEUS], + ["oio:inSubset"], + [ + ("GO:0005634", "oio:inSubset", "obo:go#goslim_yeast", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_plant", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_pir", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_mouse", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_metagenomics", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_generic", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_flybase_ribbon", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_drosophila", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_chembl", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_candida", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_aspergillus", None, {}), + ("GO:0005634", "oio:inSubset", "obo:go#goslim_agr", None, {}), + ], + ), + ] + for case in cases: + curies, predicates, expected_result = case + results = list(oi.entities_metadata_statements(curies=curies, predicates=predicates)) + test.assertCountEqual(results, expected_result) + test.assertCountEqual(results[0], expected_result[0]) + for idx, result in enumerate(results): + test.assertEqual(result, expected_result[idx]) diff --git a/tests/test_implementations/test_sqldb.py b/tests/test_implementations/test_sqldb.py index e56116c47..42feb32a9 100644 --- a/tests/test_implementations/test_sqldb.py +++ b/tests/test_implementations/test_sqldb.py @@ -899,3 +899,6 @@ def test_transitive_object_properties(self): def test_simple_subproperty_of_chains(self): self.compliance_tester.test_simple_subproperty_of_chains(self.oi) + + def test_entities_metadata_statements(self): + self.compliance_tester.test_entities_metadata_statements(self.oi)