From 664154f23e1bb8f5d4eabc701603f88bda46c1e9 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Wed, 16 Oct 2024 09:39:00 -0700 Subject: [PATCH 1/2] remove hpoa ingests that have been moved to out to their own repo --- src/monarch_ingest/ingests.yaml | 20 --- .../hpoa/disease_mode_of_inheritance.py | 85 --------- .../hpoa/disease_mode_of_inheritance.yaml | 60 ------- .../ingests/hpoa/disease_to_phenotype.py | 117 ------------ .../ingests/hpoa/disease_to_phenotype.yaml | 67 ------- .../ingests/hpoa/gene_to_disease.py | 43 ----- .../ingests/hpoa/gene_to_disease.yaml | 28 --- .../ingests/hpoa/gene_to_phenotype.py | 35 ---- .../ingests/hpoa/gene_to_phenotype.yaml | 32 ---- .../ingests/hpoa/hpoa-translation.yaml | 50 ------ src/monarch_ingest/ingests/hpoa/hpoa_utils.py | 168 ------------------ 11 files changed, 705 deletions(-) delete mode 100644 src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py delete mode 100644 src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml delete mode 100644 src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py delete mode 100644 src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml delete mode 100644 src/monarch_ingest/ingests/hpoa/gene_to_disease.py delete mode 100644 src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml delete mode 100644 src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py delete mode 100644 src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml delete mode 100644 src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml delete mode 100644 src/monarch_ingest/ingests/hpoa/hpoa_utils.py diff --git a/src/monarch_ingest/ingests.yaml b/src/monarch_ingest/ingests.yaml index ff6702a6..64a1dcdd 100644 --- a/src/monarch_ingest/ingests.yaml +++ b/src/monarch_ingest/ingests.yaml @@ -50,8 +50,6 @@ biogrid: config: 'ingests/biogrid/biogrid.yaml' ctd_chemical_to_disease: config: 'ingests/ctd/chemical_to_disease.yaml' -# flybase_publication_to_gene:: -# config: 'ingests/flybase/publication_to_gene.yaml' dictybase_gene: config: 'ingests/dictybase/gene.yaml' dictybase_gene_to_phenotype: @@ -60,16 +58,6 @@ go_annotation: config: 'ingests/go/annotation.yaml' hgnc_gene: config: 'ingests/hgnc/gene.yaml' -hpoa_disease_to_phenotype: - config: 'ingests/hpoa/disease_to_phenotype.yaml' -hpoa_gene_to_disease: - config: 'ingests/hpoa/gene_to_disease.yaml' -hpoa_disease_mode_of_inheritance: - config: 'ingests/hpoa/disease_mode_of_inheritance.yaml' -hpoa_gene_to_phenotype: - config: 'ingests/hpoa/gene_to_phenotype.yaml' -# mgi_publication_to_gene: -# config: 'ingests/mgi/publication_to_gene.yaml' panther_genome_orthologs: config: 'ingests/panther/genome_orthologs.yaml' pombase_gene: @@ -82,10 +70,6 @@ reactome_gene_to_pathway: config: 'ingests/reactome/gene_to_pathway.yaml' reactome_pathway: config: 'ingests/reactome/pathway.yaml' -# rgd_publication_to_gene: -# config: 'ingests/rgd/publication_to_gene.yaml' -# sgd_publication_to_gene: -# config: 'ingests/sgd/publication_to_gene.yaml' string_protein_links: config: 'ingests/string/protein_links.yaml' xenbase_gene_to_phenotype: @@ -94,10 +78,6 @@ xenbase_orthologs: config: 'ingests/xenbase/orthologs.yaml' xenbase_non_entrez_orthologs: config: 'ingests/xenbase/non_entrez_orthologs.yaml' -# xenbase_publication_to_gene: -# config: 'ingests/xenbase/publication_to_gene.yaml' zfin_gene_to_phenotype: config: 'ingests/zfin/gene_to_phenotype.yaml' -# zfin_publication_to_gene: -# config: 'ingests/zfin/publication_to_gene.yaml' diff --git a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py b/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py deleted file mode 100644 index 0b3d56ba..00000000 --- a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py +++ /dev/null @@ -1,85 +0,0 @@ -""" -The [Human Phenotype Ontology](http://human-phenotype-ontology.org) group -curates and assembles over 115,000 annotations to hereditary diseases -using the HPO ontology. Here we create Biolink associations -between diseases and their mode of inheritance. - -This parser only processes out the "inheritance" (aspect == 'I') annotation records. - -filters: - - inclusion: 'include' - column: 'aspect' - filter_code: 'eq' - value: 'I' - -Usage: -poetry run koza transform \ - --global-table src/monarch_ingest/translation_table.yaml \ - --local-table src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml \ - --source src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml \ - --output-format tsv -""" - -from typing import List - -import uuid - -from koza.cli_utils import get_koza_app - -from biolink_model.datamodel.pydanticmodel_v2 import ( - DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation, - KnowledgeLevelEnum, - AgentTypeEnum, -) - -from loguru import logger - -koza_app = get_koza_app("hpoa_disease_mode_of_inheritance") - -while (row := koza_app.get_row()) is not None: - - # Object: Actually a Genetic Inheritance (as should be specified by a suitable HPO term) - # TODO: perhaps load the proper (Genetic Inheritance) node concepts into the Monarch Graph (simply as Ontology terms?). - hpo_id = row["hpo_id"] - - # We ignore records that don't map to a known HPO term for Genetic Inheritance - # (as recorded in the locally bound 'hpoa-modes-of-inheritance' table) - if hpo_id and hpo_id in koza_app.translation_table.local_table: - - # Nodes - - # Subject: Disease - disease_id = row["database_id"] - - # Predicate (canonical direction) - predicate = "biolink:has_mode_of_inheritance" - - # Annotations - - # Three letter ECO code to ECO class based on HPO documentation - evidence_curie = koza_app.translation_table.resolve_term(row["evidence"]) - - # Publications - publications_field: str = row["reference"] - publications: List[str] = publications_field.split(";") - - # Filter out some weird NCBI web endpoints - publications = [p for p in publications if not p.startswith("http")] - - # Association/Edge - association = DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation( - id="uuid:" + str(uuid.uuid1()), - subject=disease_id, - predicate=predicate, - object=hpo_id, - publications=publications, - has_evidence=[evidence_curie], - aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:hpo-annotations", - knowledge_level=KnowledgeLevelEnum.knowledge_assertion, - agent_type=AgentTypeEnum.manual_agent, - ) - koza_app.write(association) - - else: - logger.warning(f"HPOA ID field value '{str(hpo_id)}' is missing or an invalid disease mode of inheritance?") diff --git a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml b/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml deleted file mode 100644 index 545fc58d..00000000 --- a/src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.yaml +++ /dev/null @@ -1,60 +0,0 @@ -name: 'hpoa_disease_mode_of_inheritance' - -metadata: - ingest_title: 'Human Phenotype Ontology Annotations' - ingest_url: 'https://hpo.jax.org/app/' - description: 'Ingest of HPO disease to phenotype associations' - rights: 'https://hpo.jax.org/app/license' - -# List of files to be ingested -files: - - './data/hpoa/phenotype.hpoa' - -# Format options csv, json -format: 'csv' - -# Specify a delimiter for csv formats -delimiter: '\t' - -# If the source_file has metadata lines to skip, use -header: 4 - -global_table: './src/monarch_ingest/translation_table.yaml' -local_table: './src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml' - -# We only concern ourselves with Aspect == 'I' (inheritance) records in this ingest. -filters: - - inclusion: 'include' - column: 'aspect' - filter_code: 'eq' - value: 'I' - -# For a csv/tsv file, list expected columns -columns: - - 'database_id' - - 'disease_name' - - 'qualifier' - - 'hpo_id' - - 'reference' - - 'evidence' - - 'onset' - - 'frequency' - - 'sex' - - 'modifier' - - 'aspect' - - 'biocuration' - -# Disease subject with annotated 'Mode of Inheritance' -# DiseaseOrPhenotypicFeatureToModeOfGeneticInheritanceAssociation -edge_properties: - - 'id' - - 'category' - - 'subject' - - 'predicate' - - 'object' - - 'publications' - - 'has_evidence' - - 'aggregator_knowledge_source' - - 'primary_knowledge_source' - - 'knowledge_level' - - 'agent_type' diff --git a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py b/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py deleted file mode 100644 index 6b027778..00000000 --- a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py +++ /dev/null @@ -1,117 +0,0 @@ -""" -The [Human Phenotype Ontology](http://human-phenotype-ontology.org) group -curates and assembles over 115,000 annotations to hereditary diseases -using the HPO ontology. Here we create Biolink associations -between diseases and phenotypic features, together with their evidence, -and age of onset and frequency (if known). - -The parser currently only processes the "abnormal" annotations. -Association to "remarkable normality" will be added in the near future. - -filters: - - inclusion: 'include' - column: 'Aspect' - filter_code: 'eq' - value: 'P' - -We are only keeping 'P' == 'phenotypic anomaly' records. - -Usage: -poetry run koza transform \ - --global-table src/monarch_ingest/translation_table.yaml \ - --local-table src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml \ - --source src/monarch_ingest/ingests/hpoa/disease_phenotype.yaml \ - --output-format tsv -""" - -from typing import Optional, List - -import uuid - -from koza.cli_utils import get_koza_app - -from biolink_model.datamodel.pydanticmodel_v2 import ( - DiseaseToPhenotypicFeatureAssociation, - KnowledgeLevelEnum, - AgentTypeEnum, -) -from monarch_ingest.ingests.hpoa.hpoa_utils import phenotype_frequency_to_hpo_term, Frequency - - -def get_primary_knowledge_source(disease_id: str) -> str: - if disease_id.startswith("OMIM"): - return "infores:omim" - elif disease_id.startswith("ORPHA") or "orpha" in disease_id.lower(): - return "infores:orphanet" - elif disease_id.startswith("DECIPHER"): - return "infores:decipher" - else: - raise ValueError(f"Unknown disease ID prefix for {disease_id}, can't set primary_knowledge_source") - - -koza_app = get_koza_app("hpoa_disease_to_phenotype") - -while (row := koza_app.get_row()) is not None: - - # Nodes - disease_id = row["database_id"] - - predicate = "biolink:has_phenotype" - - hpo_id = row["hpo_id"] - assert hpo_id, "HPOA Disease to Phenotype has missing HP ontology ('HPO_ID') field identifier?" - - # Predicate negation - negated: Optional[bool] - if row["qualifier"] == "NOT": - negated = True - else: - negated = False - - # Annotations - - # Translations to curies - # Three letter ECO code to ECO class based on hpo documentation - evidence_curie = koza_app.translation_table.resolve_term(row["evidence"]) - - # female -> PATO:0000383 - # male -> PATO:0000384 - sex: Optional[str] = row["sex"] # may be translated by local table - sex_qualifier = koza_app.translation_table.resolve_term(sex) if sex else None - - onset = row["onset"] - - # Raw frequencies - HPO term curies, ratios, percentages - normalized to HPO terms - frequency: Frequency = phenotype_frequency_to_hpo_term(row["frequency"]) - - # Publications - publications_field: str = row["reference"] - publications: List[str] = publications_field.split(";") - - # don't populate the reference with the database_id / disease id - publications = [p for p in publications if not p == row["database_id"]] - - primary_knowledge_source = get_primary_knowledge_source(disease_id) - - # Association/Edge - association = DiseaseToPhenotypicFeatureAssociation( - id="uuid:" + str(uuid.uuid1()), - subject=disease_id.replace("ORPHA:", "Orphanet:"), # match `Orphanet` as used in Mondo SSSOM - predicate=predicate, - negated=negated, - object=hpo_id, - publications=publications, - has_evidence=[evidence_curie], - sex_qualifier=sex_qualifier, - onset_qualifier=onset, - has_percentage=frequency.has_percentage, - has_quotient=frequency.has_quotient, - frequency_qualifier=frequency.frequency_qualifier if frequency.frequency_qualifier else None, - has_count=frequency.has_count, - has_total=frequency.has_total, - aggregator_knowledge_source=["infores:monarchinitiative", "infores:hpo-annotations"], - primary_knowledge_source=primary_knowledge_source, - knowledge_level=KnowledgeLevelEnum.knowledge_assertion, - agent_type=AgentTypeEnum.manual_agent, - ) - koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml b/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml deleted file mode 100644 index 8c3c87aa..00000000 --- a/src/monarch_ingest/ingests/hpoa/disease_to_phenotype.yaml +++ /dev/null @@ -1,67 +0,0 @@ -name: 'hpoa_disease_to_phenotype' - -metadata: - ingest_title: 'Human Phenotype Ontology Annotations' - ingest_url: 'https://hpo.jax.org/app/' - description: 'Ingest of HPO disease to phenotype associations' - rights: 'https://hpo.jax.org/app/license' - -# List of files to be ingested -files: - - './data/hpoa/phenotype.hpoa' - -# Format options csv, json -format: 'csv' - -# Specify a delimiter for csv formats -delimiter: '\t' - -# If the source_file has metadata lines to skip, use -header: 4 - -global_table: './src/monarch_ingest/translation_table.yaml' -local_table: './src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml' - -# We only concern ourselves with Aspect == 'P' (phenotypic anomalies) records in this ingest. -filters: - - inclusion: 'include' - column: 'aspect' - filter_code: 'eq' - value: 'P' - -# For a csv/tsv file, list expected columns -columns: - - 'database_id' - - 'disease_name' - - 'qualifier' - - 'hpo_id' - - 'reference' - - 'evidence' - - 'onset' - - 'frequency' - - 'sex' - - 'modifier' - - 'aspect' - - 'biocuration' - -# DiseaseToPhenotypicFeatureAssociation -edge_properties: - - 'id' - - 'category' - - 'subject' - - 'predicate' - - 'negated' - - 'object' - - 'publications' - - 'onset_qualifier' - - 'frequency_qualifier' - - 'has_count' - - 'has_total' - - 'has_percentage' - - 'has_quotient' - - 'sex_qualifier' - - 'has_evidence' - - 'aggregator_knowledge_source' - - 'primary_knowledge_source' - - 'knowledge_level' - - 'agent_type' \ No newline at end of file diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_disease.py b/src/monarch_ingest/ingests/hpoa/gene_to_disease.py deleted file mode 100644 index 54f8cc19..00000000 --- a/src/monarch_ingest/ingests/hpoa/gene_to_disease.py +++ /dev/null @@ -1,43 +0,0 @@ -import uuid - -from biolink_model.datamodel.pydanticmodel_v2 import ( - CausalGeneToDiseaseAssociation, - CorrelatedGeneToDiseaseAssociation, - KnowledgeLevelEnum, - AgentTypeEnum, -) -from koza.cli_utils import get_koza_app - -from monarch_ingest.constants import INFORES_MONARCHINITIATIVE, BIOLINK_CAUSES -from monarch_ingest.ingests.hpoa.hpoa_utils import get_knowledge_sources, get_predicate - -koza_app = get_koza_app("hpoa_gene_to_disease") - - -while (row := koza_app.get_row()) is not None: - gene_id = row["ncbi_gene_id"] - disease_id = row["disease_id"].replace("ORPHA:", "Orphanet:") - - predicate = get_predicate(row["association_type"]) - - primary_knowledge_source, aggregator_knowledge_source = get_knowledge_sources( - row["source"], INFORES_MONARCHINITIATIVE - ) - - if predicate == BIOLINK_CAUSES: - association_class = CausalGeneToDiseaseAssociation - else: - association_class = CorrelatedGeneToDiseaseAssociation - - association = association_class( - id="uuid:" + str(uuid.uuid1()), - subject=gene_id, - predicate=predicate, - object=disease_id, - primary_knowledge_source=primary_knowledge_source, - aggregator_knowledge_source=aggregator_knowledge_source, - knowledge_level=KnowledgeLevelEnum.knowledge_assertion, - agent_type=AgentTypeEnum.manual_agent, - ) - - koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml b/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml deleted file mode 100644 index 22998dd8..00000000 --- a/src/monarch_ingest/ingests/hpoa/gene_to_disease.yaml +++ /dev/null @@ -1,28 +0,0 @@ -name: 'hpoa_gene_to_disease' - -files: - - './data/hpoa/genes_to_disease.txt' - -format: 'csv' - -delimiter: '\t' - -columns: - - 'ncbi_gene_id' - - 'gene_symbol' - - 'association_type' - - 'disease_id' - - 'source' - -edge_properties: - - 'id' - - 'category' - - 'subject' - - 'predicate' - - 'object' - - 'aggregator_knowledge_source' - - 'primary_knowledge_source' - - 'knowledge_level' - - 'agent_type' - -transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py b/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py deleted file mode 100644 index 1ba53908..00000000 --- a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Usage: -poetry run koza transform \ - --source src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml \ - --output-format tsv -""" - -import uuid - -from koza.cli_utils import get_koza_app - -from biolink_model.datamodel.pydanticmodel_v2 import ( - GeneToPhenotypicFeatureAssociation, - KnowledgeLevelEnum, - AgentTypeEnum, -) - -koza_app = get_koza_app("hpoa_gene_to_phenotype") - -while (row := koza_app.get_row()) is not None: - gene_id = "NCBIGene:" + row["ncbi_gene_id"] - phenotype_id = row["hpo_id"] - - association = GeneToPhenotypicFeatureAssociation( - id="uuid:" + str(uuid.uuid1()), - subject=gene_id, - predicate="biolink:has_phenotype", - object=phenotype_id, - aggregator_knowledge_source=["infores:monarchinitiative"], - primary_knowledge_source="infores:hpo-annotations", - knowledge_level=KnowledgeLevelEnum.logical_entailment, - agent_type=AgentTypeEnum.automated_agent, - ) - - koza_app.write(association) diff --git a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml b/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml deleted file mode 100644 index f8e8a0bf..00000000 --- a/src/monarch_ingest/ingests/hpoa/gene_to_phenotype.yaml +++ /dev/null @@ -1,32 +0,0 @@ -name: 'hpoa_gene_to_phenotype' - -files: - - './data/hpoa/genes_to_phenotype.txt' - -header: 'none' - -format: 'csv' - -delimiter: '\t' - -columns: - - 'ncbi_gene_id' - - 'gene_symbol' - - 'hpo_id' - - 'hpo_name' - - 'frequency' - - 'disease_id' - -edge_properties: - - 'id' - - 'category' - - 'subject' - - 'predicate' - - 'object' - - 'qualifiers' - - 'aggregator_knowledge_source' - - 'primary_knowledge_source' - - 'knowledge_level' - - 'agent_type' - -transform_mode: 'flat' diff --git a/src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml b/src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml deleted file mode 100644 index 3b79c0ad..00000000 --- a/src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml +++ /dev/null @@ -1,50 +0,0 @@ ---- -# HPO Annotations - -# Evidence Code translations - -"IEA": "evidence used in automatic assertion" # "ECO:0000501", -"PCS": "experimental evidence used in manual assertion" # "ECO:0000269", -"TAS": "author statement supported by traceable reference used in manual assertion" # "ECO:0000304", -"ITM": "computational combinatorial evidence used in automatic assertion" # "ECO:0000246", -"ICE": "curator inference used in manual assertion" # "ECO:0000305", should be gone?? - -# Sex (right now both all uppercase and all lowercase -"male": "male" -"MALE": "male" -"female": "female" -"FEMALE": "female" - -# HPO "Mode of Inheritance" terms -"HP:0001417": "X-linked inheritance" -"HP:0000005": "Mode of inheritance" -"HP:0001423": "X-linked dominant inheritance" -"HP:0010982": "Polygenic inheritance" -"HP:0010984": "Digenic inheritance" -"HP:0001450": "Y-linked inheritance" -"HP:0001475": "Male-limited autosomal dominant" -"HP:0032384": "Uniparental isodisomy" -"HP:0001426": "Multifactorial inheritance" -"HP:0000006": "Autosomal dominant inheritance" -"HP:0032113": "Semidominant inheritance" -"HP:0032382": "Uniparental disomy" -"HP:0032383": "Uniparental heterodisomy" -"HP:0001452": "Autosomal dominant contiguous gene syndrome" -"HP:0003745": "Sporadic" -"HP:0001425": "Heterogeneous" -"HP:0001466": "Contiguous gene syndrome" -"HP:0003744": "Genetic anticipation with paternal anticipation bias" -"HP:0012274": "Autosomal dominant inheritance with paternal imprinting" -"HP:0000007": "Autosomal recessive inheritance" -"HP:0003743": "Genetic anticipation" -"HP:0001419": "X-linked recessive inheritance" -"HP:0001442": "Somatic mosaicism" -"HP:0001428": "Somatic mutation" -"HP:0010983": "Oligogenic inheritance" -"HP:0001444": "Autosomal dominant somatic cell mutation" -"HP:0031362": "Sex-limited autosomal recessive inheritance" -"HP:0025352": "Autosomal dominant germline de novo mutation" -"HP:0001470": "Sex-limited autosomal dominant" -"HP:0012275": "Autosomal dominant inheritance with maternal imprinting" -"HP:0001427": "Mitochondrial inheritance" -"HP:0010985": "Gonosomal inheritance" diff --git a/src/monarch_ingest/ingests/hpoa/hpoa_utils.py b/src/monarch_ingest/ingests/hpoa/hpoa_utils.py deleted file mode 100644 index 6e2efbf7..00000000 --- a/src/monarch_ingest/ingests/hpoa/hpoa_utils.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -HPOA processing utility methods -""" - -from typing import Optional, List, Dict - - -from loguru import logger -from pydantic import BaseModel - -from monarch_ingest.constants import ( - INFORES_MEDGEN, - INFORES_OMIM, - INFORES_ORPHANET, - BIOLINK_CAUSES, - BIOLINK_CONTRIBUTES_TO, - BIOLINK_GENE_ASSOCIATED_WITH_CONDITION, -) - - -class FrequencyHpoTerm(BaseModel): - curie: str - name: str - lower: float - upper: float - - -class Frequency(BaseModel): - frequency_qualifier: Optional[str] = None - has_percentage: Optional[float] = None - has_quotient: Optional[float] = None - has_count: Optional[int] = None - has_total: Optional[int] = None - # convert the fields above to pydantic field declarations - - -# HPO "HP:0040279": representing the frequency of phenotypic abnormalities within a patient cohort. -hpo_term_to_frequency: Dict = { - "HP:0040280": FrequencyHpoTerm( - curie="HP:0040280", name="Obligate", lower=100.0, upper=100.0 - ), # Always present,i.e. in 100% of the cases. - "HP:0040281": FrequencyHpoTerm( - curie="HP:0040281", name="Very frequent", lower=80.0, upper=99.0 - ), # Present in 80% to 99% of the cases. - "HP:0040282": FrequencyHpoTerm( - curie="HP:0040282", name="Frequent", lower=30.0, upper=79.0 - ), # Present in 30% to 79% of the cases. - "HP:0040283": FrequencyHpoTerm( - curie="HP:0040283", name="Occasional", lower=5.0, upper=29.0 - ), # Present in 5% to 29% of the cases. - "HP:0040284": FrequencyHpoTerm( - curie="HP:0040284", name="Very rare", lower=1.0, upper=4.0 - ), # Present in 1% to 4% of the cases. - "HP:0040285": FrequencyHpoTerm( - curie="HP:0040285", name="Excluded", lower=0.0, upper=0.0 - ), # Present in 0% of the cases. -} - - -def get_hpo_term(hpo_id: str) -> Optional[FrequencyHpoTerm]: - if hpo_id: - return hpo_term_to_frequency[hpo_id] if hpo_id in hpo_term_to_frequency else None - else: - return None - - -def map_percentage_frequency_to_hpo_term(percentage_or_quotient: float) -> Optional[FrequencyHpoTerm]: - """ - Map phenotypic percentage frequency to a corresponding HPO term corresponding to (HP:0040280 to HP:0040285). - - :param percentage_or_quotient: int, should be in range 0.0 to 100.0 - :return: str, HPO term mapping onto percentage range of term definition; None if outside range - """ - for hpo_id, details in hpo_term_to_frequency.items(): - if details.lower <= percentage_or_quotient <= details.upper: - return details - - return None - - -def phenotype_frequency_to_hpo_term(frequency_field: Optional[str]) -> Frequency: - """ - Maps a raw frequency field onto HPO, for consistency. This is needed since the **phenotypes.hpoa** - file field #8 which tracks phenotypic frequency, has a variable values. There are three allowed options for this field: - - 1. A term-id from the HPO-sub-ontology below the term “Frequency” (HP:0040279). (since December 2016 ; before was a mixture of values). The terms for frequency are in alignment with Orphanet; - 2. A percentage value such as 17%. - 3. A count of patients affected within a cohort. For instance, 7/13 would indicate that 7 of the 13 patients with the specified disease were found to have the phenotypic abnormality referred to by the HPO term in question in the study referred to by the DB_Reference; - - :param frequency_field: str, raw frequency value in one of the three above forms - :return: Optional[FrequencyHpoTerm, float, float], raw frequency mapped to its HPO term, quotient or percentage - respectively (as applicable); return None if unmappable; - percentage and/or quotient returned are also None, if not applicable - """ - hpo_term: Optional[FrequencyHpoTerm] = None - quotient: Optional[float] = None - percentage: Optional[float] = None - has_count: Optional[int] = None - has_total: Optional[int] = None - if frequency_field: - try: - - if frequency_field.startswith("HP:"): - hpo_term = get_hpo_term(hpo_id=frequency_field) - - elif frequency_field.endswith("%"): - percentage = float(frequency_field.removesuffix("%")) - quotient = percentage / 100.0 - - else: - # assume a ratio - ratio_parts = frequency_field.split("/") - has_count = int(ratio_parts[0]) - has_total = int(ratio_parts[1]) - quotient = float(has_count / has_total) - percentage = quotient * 100.0 - - except Exception: - # expected ratio not recognized - logger.error(f"hpoa_frequency(): invalid frequency ratio '{frequency_field}'") - frequency_field = None - else: - # may be None, if original field was empty or has an invalid value - return Frequency() - - return Frequency( - frequency_qualifier=hpo_term.curie if hpo_term else None, - has_percentage=percentage, - has_quotient=quotient, - has_count=has_count, - has_total=has_total, - ) - - -def get_knowledge_sources(original_source: str, additional_source: str) -> (str, List[str]): - """ - Return a tuple of the primary_knowledge_source and original_knowledge_source - """ - _primary_knowledge_source: str = "" - _aggregator_knowledge_source: List[str] = [] - - if additional_source is not None: - _aggregator_knowledge_source.append(additional_source) - - if "medgen" in original_source: - _aggregator_knowledge_source.append(INFORES_MEDGEN) - _primary_knowledge_source = INFORES_OMIM - elif "orphadata" in original_source: - _primary_knowledge_source = INFORES_ORPHANET - - if _primary_knowledge_source == "": - raise ValueError(f"Unknown knowledge source: {original_source}") - - return _primary_knowledge_source, _aggregator_knowledge_source - - -def get_predicate(original_predicate: str) -> str: - """ - Convert the association column into a Biolink Model predicate - """ - if original_predicate == 'MENDELIAN': - return BIOLINK_CAUSES - elif original_predicate == 'POLYGENIC': - return BIOLINK_CONTRIBUTES_TO - elif original_predicate == 'UNKNOWN': - return BIOLINK_GENE_ASSOCIATED_WITH_CONDITION - else: - raise ValueError(f"Unknown predicate: {original_predicate}") From 7722f9f829fd336280b2861dcb4963fca8639329 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Wed, 16 Oct 2024 13:46:07 -0700 Subject: [PATCH 2/2] small config refactor & cleanup --- src/monarch_ingest/ingests.yaml | 5 +- src/monarch_ingest/ingests/hgnc/gene.py | 17 -- .../test_hpoa_disease_mode_of_inheritance.py | 48 ---- .../unit/hpoa/test_hpoa_disease_phenotype.py | 245 ------------------ tests/unit/hpoa/test_hpoa_gene_to_disease.py | 83 ------ .../unit/hpoa/test_hpoa_gene_to_phenotype.py | 68 ----- tests/unit/hpoa/test_hpoa_utils.py | 53 ---- 7 files changed, 1 insertion(+), 518 deletions(-) delete mode 100644 tests/unit/hpoa/test_hpoa_disease_mode_of_inheritance.py delete mode 100644 tests/unit/hpoa/test_hpoa_disease_phenotype.py delete mode 100644 tests/unit/hpoa/test_hpoa_gene_to_disease.py delete mode 100644 tests/unit/hpoa/test_hpoa_gene_to_phenotype.py delete mode 100644 tests/unit/hpoa/test_hpoa_utils.py diff --git a/src/monarch_ingest/ingests.yaml b/src/monarch_ingest/ingests.yaml index 64a1dcdd..45d5716c 100644 --- a/src/monarch_ingest/ingests.yaml +++ b/src/monarch_ingest/ingests.yaml @@ -16,14 +16,11 @@ clinvar_variant: url: - 'https://github.com/monarch-initiative/clinvar-ingest/releases/latest/download/clinvar_variant_nodes.tsv' - 'https://github.com/monarch-initiative/clinvar-ingest/releases/latest/download/clinvar_variant_edges.tsv' -hpoa_disease_mode_of_inheritance: +hpoa: url: - 'https://github.com/monarch-initiative/monarch-phenotype-profile-ingest/releases/latest/download/hpoa_disease_mode_of_inheritance_edges.tsv' -hpoa_disease_to_phenotype: - 'https://github.com/monarch-initiative/monarch-phenotype-profile-ingest/releases/latest/download/hpoa_disease_to_phenotype_edges.tsv' -hpoa_gene_to_disease: - 'https://github.com/monarch-initiative/monarch-phenotype-profile-ingest/releases/latest/download/hpoa_gene_to_disease_edges.tsv' -hpoa_gene_to_phenotype: - 'https://github.com/monarch-initiative/monarch-phenotype-profile-ingest/releases/latest/download/hpoa_gene_to_phenotype_edges.tsv' ncbi_gene: url: diff --git a/src/monarch_ingest/ingests/hgnc/gene.py b/src/monarch_ingest/ingests/hgnc/gene.py index 591ef851..b35b5ac2 100644 --- a/src/monarch_ingest/ingests/hgnc/gene.py +++ b/src/monarch_ingest/ingests/hgnc/gene.py @@ -40,21 +40,4 @@ in_taxon_label=in_taxon_label, provided_by=["infores:hgnc"], ) - - # Excluding pub to gene associations for now - # pubmed_id_list = row["pubmed_id"].split('|') - # for each_id in pubmed_id_list: - # publication_id = "PMID:" + each_id - # publication = Publication( - # id=publication_id, - # type=koza_app.translation_table.resolve_term("publication"), - # ) - # relation = koza_app.translation_table.resolve_term("mentions"), - # association = InformationContentEntityToNamedThingAssociation( - # id="uuid:" + str(uuid.uuid1()), - # subject=gene.id, - # predicate=Predicate.mentions, - # object=publication.id, - # ) - koza_app.write(gene) diff --git a/tests/unit/hpoa/test_hpoa_disease_mode_of_inheritance.py b/tests/unit/hpoa/test_hpoa_disease_mode_of_inheritance.py deleted file mode 100644 index 476afaa1..00000000 --- a/tests/unit/hpoa/test_hpoa_disease_mode_of_inheritance.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest -from biolink_model.datamodel.pydanticmodel_v2 import DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation -from koza.utils.testing_utils import mock_koza # noqa: F401 - - -@pytest.fixture -def d2moi_entities(mock_koza, global_table): - row = { - "database_id": "OMIM:300425", - "disease_name": "Autism susceptibility, X-linked 1", - "qualifier": "", - "hpo_id": "HP:0001417", - "reference": "OMIM:300425", - "evidence": "IEA", - "onset": "", - "frequency": "", - "sex": "", - "modifier": "", - "aspect": "I", # assert 'Inheritance' test record - "biocuration": "HPO:iea[2009-02-17]", - } - - return mock_koza( - name="hpoa_disease_mode_of_inheritance", - data=row, - transform_code="./src/monarch_ingest/ingests/hpoa/disease_mode_of_inheritance.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_disease_to_mode_of_inheritance_transform(d2moi_entities): - assert d2moi_entities - assert len(d2moi_entities) == 1 - association = [ - entity - for entity in d2moi_entities - if isinstance(entity, DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation) - ][0] - assert association.subject == "OMIM:300425" - - assert association.predicate == "biolink:has_mode_of_inheritance" - - assert association.object == "HP:0001417" - assert "OMIM:300425" in association.publications - assert "ECO:0000501" in association.has_evidence # from local HPOA translation table - assert association.primary_knowledge_source == "infores:hpo-annotations" - assert "infores:monarchinitiative" in association.aggregator_knowledge_source diff --git a/tests/unit/hpoa/test_hpoa_disease_phenotype.py b/tests/unit/hpoa/test_hpoa_disease_phenotype.py deleted file mode 100644 index 1143f295..00000000 --- a/tests/unit/hpoa/test_hpoa_disease_phenotype.py +++ /dev/null @@ -1,245 +0,0 @@ -import pytest -from biolink_model.datamodel.pydanticmodel_v2 import DiseaseToPhenotypicFeatureAssociation -from koza.utils.testing_utils import mock_koza # noqa: F401 - - -@pytest.fixture -def d2pf_entities_1(mock_koza, global_table): - row = { - "database_id": "OMIM:614856", - "disease_name": "Osteogenesis imperfecta, type XIII", - "qualifier": "NOT", - "hpo_id": "HP:0000343", - "reference": "OMIM:614856", - "evidence": "TAS", - "onset": "HP:0003593", - "frequency": "1/1", - "sex": "FEMALE", - "modifier": "", - "aspect": "C", # assert 'Clinical' test record - "biocuration": "HPO:skoehler[2012-11-16]", - } - return mock_koza( - name="hpoa_disease_to_phenotype", - data=row, - transform_code="./src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_disease_to_phenotype_transform_1(d2pf_entities_1): - assert d2pf_entities_1 - assert len(d2pf_entities_1) == 1 - association = [entity for entity in d2pf_entities_1 if isinstance(entity, DiseaseToPhenotypicFeatureAssociation)][0] - assert association.subject == "OMIM:614856" - assert association.predicate == "biolink:has_phenotype" - assert association.negated - assert association.object == "HP:0000343" - assert len(association.publications) == 0 - assert "ECO:0000304" in association.has_evidence # from local HPOA translation table - assert association.sex_qualifier == "PATO:0000383" - assert association.onset_qualifier == "HP:0003593" - assert association.has_count == 1 - assert association.has_total == 1 - assert association.has_quotient == 1.0 # '1/1' implies Always present, i.e. in 100% of the cases. - assert association.has_percentage == 100.0 - assert association.frequency_qualifier is None # No implied frequency qualifier based on the '1/1' ratio. - assert association.primary_knowledge_source == "infores:omim" - assert association.aggregator_knowledge_source == ["infores:monarchinitiative", "infores:hpo-annotations"] - - -@pytest.fixture -def d2pf_entities_2(mock_koza, global_table): - row = { - "database_id": "OMIM:117650", - "disease_name": "Cerebrocostomandibular syndrome", - "qualifier": "", - "hpo_id": "HP:0001249", - "reference": "OMIM:117650", - "evidence": "TAS", - "onset": "", - "frequency": "50%", - "sex": "", - "modifier": "", - "aspect": "P", - "biocuration": "HPO:probinson[2009-02-17]", - } - return mock_koza( - name="hpoa_disease_to_phenotype", - data=row, - transform_code="./src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_disease_to_phenotype_transform_2(d2pf_entities_2): - assert d2pf_entities_2 - assert len(d2pf_entities_2) == 1 - association = [entity for entity in d2pf_entities_2 if isinstance(entity, DiseaseToPhenotypicFeatureAssociation)][0] - assert association.subject == "OMIM:117650" - assert association.predicate == "biolink:has_phenotype" - assert not association.negated - assert association.object == "HP:0001249" - assert len(association.publications) == 0 - assert "ECO:0000304" in association.has_evidence # from local HPOA translation table - assert not association.sex_qualifier - assert not association.onset_qualifier - assert association.has_percentage == 50.0 # '50%' implies Present in 30% to 79% of the cases. - assert association.has_quotient == 0.5 - assert association.frequency_qualifier is None # No implied frequency qualifier based on the '50%' ratio. - assert association.primary_knowledge_source == "infores:omim" - assert association.aggregator_knowledge_source == ["infores:monarchinitiative", "infores:hpo-annotations"] - - -@pytest.fixture -def d2pf_entities_3(mock_koza, global_table): - row = { - "database_id": "OMIM:117650", - "disease_name": "Cerebrocostomandibular syndrome", - "qualifier": "", - "hpo_id": "HP:0001545", - "reference": "OMIM:117650;PMID:12345", - "evidence": "TAS", - "onset": "", - "frequency": "HP:0040283", - "sex": "", - "modifier": "", - "aspect": "P", - "biocuration": "HPO:skoehler[2017-07-13]", - } - return mock_koza( - name="hpoa_disease_to_phenotype", - data=row, - transform_code="./src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_disease_to_phenotype_transform_3(d2pf_entities_3): - assert d2pf_entities_3 - assert len(d2pf_entities_3) == 1 - association = [entity for entity in d2pf_entities_3 if isinstance(entity, DiseaseToPhenotypicFeatureAssociation)][0] - assert association.subject == "OMIM:117650" - assert association.predicate == "biolink:has_phenotype" - assert not association.negated - assert association.object == "HP:0001545" - assert len(association.publications) == 1 - assert "PMID:12345" in association.publications - assert "ECO:0000304" in association.has_evidence # from local HPOA translation table - assert not association.sex_qualifier - assert not association.onset_qualifier - assert association.has_count is None - assert association.has_total is None - assert association.has_percentage is None - assert association.has_quotient is None - assert association.frequency_qualifier == "HP:0040283" # "HP:0040283" implies Present in 5% to 29% of the cases. - assert association.primary_knowledge_source == "infores:omim" - assert association.aggregator_knowledge_source == ["infores:monarchinitiative", "infores:hpo-annotations"] - - -@pytest.fixture -def d2pf_frequency_fraction_entities(mock_koza, global_table, d2pf_entities_1): - row = { - "database_id": "OMIM:117650", - "disease_name": "Cerebrocostomandibular syndrome", - "qualifier": "", - "hpo_id": "HP:0001545", - "reference": "OMIM:117650", - "evidence": "TAS", - "onset": "", - "frequency": "3/20", - "sex": "", - "modifier": "", - "aspect": "P", - "biocuration": "HPO:skoehler[2017-07-13]", - } - return mock_koza( - name="hpoa_disease_to_phenotype", - data=row, - transform_code="./src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_disease_to_phenotype_transform_frequency_fraction(d2pf_frequency_fraction_entities): - assert d2pf_frequency_fraction_entities - assert len(d2pf_frequency_fraction_entities) == 1 - association = [ - entity - for entity in d2pf_frequency_fraction_entities - if isinstance(entity, DiseaseToPhenotypicFeatureAssociation) - ][0] - assert association.has_count == 3 - assert association.has_total == 20 - assert association.has_quotient == 0.15 - assert association.has_percentage == 15.0 - - -@pytest.fixture -def count_zero_entities(mock_koza, global_table): - row = { - 'database_id': 'OMIM:615654', - 'disease_name': 'Deafness, autosomal dominant 58', - 'qualifier': '', - 'hpo_id': 'HP:0007663', - 'reference': 'PMID:32337552', - 'evidence': 'PCS', - 'onset': '', - 'frequency': '0/20', - 'sex': '', - 'modifier': '', - 'aspect': 'P', - 'biocuration': 'HPO:probinson[2024-03-15];HPO:probinson[2024-03-15]', - } - - return mock_koza( - name="hpoa_disease_to_phenotype", - data=[row], - transform_code="./src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_zero_fraction(count_zero_entities): - entities = count_zero_entities - assert len(entities) == 1 - association = [entity for entity in entities if isinstance(entity, DiseaseToPhenotypicFeatureAssociation)][0] - assert association.has_count == 0 - assert association.has_total == 20 - - -@pytest.fixture -def orphanet_entities(mock_koza, global_table): - row = { - 'database_id': 'ORPHA:79474', - 'disease_name': 'Atypical Werner syndrome', - 'qualifier': '', - 'hpo_id': 'HP:0000347', - 'reference': 'ORPHA:79474', - 'evidence': 'TAS', - 'onset': '', - 'frequency': 'HP:0040281', - 'sex': '', - 'modifier': '', - 'aspect': 'P', - 'biocuration': 'ORPHA:orphadata[2024-06-25]', - } - return mock_koza( - name="hpoa_disease_to_phenotype", - data=[row], - transform_code="./src/monarch_ingest/ingests/hpoa/disease_to_phenotype.py", - global_table=global_table, - local_table="./src/monarch_ingest/ingests/hpoa/hpoa-translation.yaml", - ) - - -def test_orphanet_entities(orphanet_entities): - entities = orphanet_entities - assert len(entities) == 1 - association = [entity for entity in entities if isinstance(entity, DiseaseToPhenotypicFeatureAssociation)][0] - assert association.primary_knowledge_source == "infores:orphanet" diff --git a/tests/unit/hpoa/test_hpoa_gene_to_disease.py b/tests/unit/hpoa/test_hpoa_gene_to_disease.py deleted file mode 100644 index 8f209ef8..00000000 --- a/tests/unit/hpoa/test_hpoa_gene_to_disease.py +++ /dev/null @@ -1,83 +0,0 @@ -from typing import List - -import pytest -from biolink_model.datamodel.pydanticmodel_v2 import CausalGeneToDiseaseAssociation -from koza.utils.testing_utils import mock_koza # noqa: F401 - -from monarch_ingest.constants import ( - BIOLINK_CAUSES, - BIOLINK_CONTRIBUTES_TO, - BIOLINK_GENE_ASSOCIATED_WITH_CONDITION, - INFORES_MEDGEN, - INFORES_MONARCHINITIATIVE, - INFORES_OMIM, - INFORES_ORPHANET, -) -from monarch_ingest.ingests.hpoa.hpoa_utils import get_knowledge_sources, get_predicate - - -@pytest.mark.parametrize( - ("original_source", "expected_primary_knowledge_source", "expected_aggregator_knowledge_source"), - [ - ( - "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/mim2gene_medgen", - INFORES_OMIM, - [INFORES_MEDGEN, INFORES_MONARCHINITIATIVE], - ), - ("http://www.orphadata.org/data/xml/en_product6.xml", INFORES_ORPHANET, [INFORES_MONARCHINITIATIVE]), - ], -) -def test_knowledge_source( - original_source: str, expected_primary_knowledge_source: str, expected_aggregator_knowledge_source: List[str] -): - primary_knowledge_source, aggregator_knowledge_source = get_knowledge_sources( - original_source, INFORES_MONARCHINITIATIVE - ) - - assert primary_knowledge_source == expected_primary_knowledge_source - assert aggregator_knowledge_source.sort() == expected_aggregator_knowledge_source.sort() - - -@pytest.mark.parametrize( - ("association", "expected_predicate"), - [ - ("MENDELIAN", BIOLINK_CAUSES), - ("POLYGENIC", BIOLINK_CONTRIBUTES_TO), - ("UNKNOWN", BIOLINK_GENE_ASSOCIATED_WITH_CONDITION), - ], -) -def test_predicate(association: str, expected_predicate: str): - predicate = get_predicate(association) - - assert predicate == expected_predicate - - -@pytest.fixture -def row(): - return { - 'association_type': 'MENDELIAN', - 'disease_id': 'OMIM:212050', - 'gene_symbol': 'CARD9', - 'ncbi_gene_id': 'NCBIGene:64170', - 'source': 'ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/mim2gene_medgen', - } - - -@pytest.fixture -def basic_g2d_entities(mock_koza, row): - return mock_koza( - name="hpoa_gene_to_disease", - data=row, - transform_code="./src/monarch_ingest/ingests/hpoa/gene_to_disease.py", - ) - - -def test_hpoa_gene_to_disease(basic_g2d_entities): - assert len(basic_g2d_entities) == 1 - association = basic_g2d_entities[0] - assert isinstance(association, CausalGeneToDiseaseAssociation) - assert association.subject == "NCBIGene:64170" - assert association.object == "OMIM:212050" - assert association.predicate == "biolink:causes" - assert association.primary_knowledge_source == "infores:omim" - assert association.aggregator_knowledge_source.sort() == ["infores:medgen", "infores:monarchinitiative"].sort() diff --git a/tests/unit/hpoa/test_hpoa_gene_to_phenotype.py b/tests/unit/hpoa/test_hpoa_gene_to_phenotype.py deleted file mode 100644 index ab2fdd0b..00000000 --- a/tests/unit/hpoa/test_hpoa_gene_to_phenotype.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest -from biolink_model.datamodel.pydanticmodel_v2 import GeneToPhenotypicFeatureAssociation -from koza.utils.testing_utils import mock_koza # noqa: F401 - - -@pytest.fixture -def source_name(): - """ - :return: string source name of HPOA Gene to Phenotype ingest - """ - return "hpoa_gene_to_phenotype" - - -@pytest.fixture -def script(): - """ - :return: string path to HPOA Gene to Phenotype ingest script - """ - return "./src/monarch_ingest/ingests/hpoa/gene_to_phenotype.py" - - -@pytest.fixture -def test_row(): - """ - :return: Test HPOA Gene to Phenotype data row. - """ - return { - "ncbi_gene_id": "8192", - "gene_symbol": "CLPP", - "hpo_id": "HP:0000252", - "hpo_name": "Microcephaly", - } - - -@pytest.fixture -def basic_hpoa(mock_koza, source_name, script, test_row): - """ - Mock Koza run for HPOA Gene to Phenotype ingest. - - :param mock_koza: - :param source_name: - :param test_row: - :param script: - - :return: mock_koza application - """ - return mock_koza(name=source_name, data=test_row, transform_code=script) - - -@pytest.mark.parametrize("cls", [GeneToPhenotypicFeatureAssociation]) -def test_confirm_one_of_each_classes(cls, basic_hpoa): - class_entities = [entity for entity in basic_hpoa if isinstance(entity, cls)] - assert class_entities - assert len(class_entities) == 1 - assert class_entities[0] - - -def test_hpoa_g2p_association(basic_hpoa): - assert basic_hpoa - assert len(basic_hpoa) == 1 - association = [entity for entity in basic_hpoa if isinstance(entity, GeneToPhenotypicFeatureAssociation)][0] - assert len(basic_hpoa) == 1 - assert basic_hpoa[0] - assert basic_hpoa[0].subject == "NCBIGene:8192" - assert basic_hpoa[0].object == "HP:0000252" - assert basic_hpoa[0].predicate == "biolink:has_phenotype" - assert association.primary_knowledge_source == "infores:hpo-annotations" - assert "infores:monarchinitiative" in association.aggregator_knowledge_source diff --git a/tests/unit/hpoa/test_hpoa_utils.py b/tests/unit/hpoa/test_hpoa_utils.py deleted file mode 100644 index aa510e26..00000000 --- a/tests/unit/hpoa/test_hpoa_utils.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Tests of HPOA Utils methods -""" - -import pytest - -from monarch_ingest.ingests.hpoa.hpoa_utils import ( - FrequencyHpoTerm, - get_hpo_term, - phenotype_frequency_to_hpo_term, -) - - -def test_get_hpo_term(): - assert get_hpo_term("HP:0040282") == FrequencyHpoTerm(curie="HP:0040282", name="Frequent", lower=30, upper=79) - - -@pytest.mark.parametrize( - "raw_value, frequency_qualifier, percentage, quotient, count, total", - [ - # basic guard rails - (None, None, None, None, None, None), - ("", None, None, None, None, None), - # frequencies given as percentages should be mapped to HPO terms - ("0%", None, 0.0, 0.0, None, None), - ("3%", None, 3.0, 0.03, None, None), - ("20%", None, 20.0, 0.2, None, None), - ("60%", None, 60.0, 0.6, None, None), - ("90%", None, 90.0, 0.9, None, None), - ("100%", None, 100.0, 1.0, None, None), - # frequencies given as fractions should be mapped to HPO terms, and have percentages and quotients - ("0/100", None, 0.0, 0.0, 0, 100), - ("3/100", None, 3.0, 0.03, 3, 100), - ("5/20", None, 25.0, 0.25, 5, 20), - ("60/100", None, 60.0, 0.6, 60, 100), - ("90/100", None, 90.0, 0.9, 90, 100), - ("100/100", None, 100.0, 1.0, 100, 100), - # frequencies given as HPO qualifiers should be mapped to percentages only for 0 and 100 - ("HP:0040285", "HP:0040285", None, None, None, None), - ("HP:0040284", "HP:0040284", None, None, None, None), - ("HP:0040283", "HP:0040283", None, None, None, None), - ("HP:0040282", "HP:0040282", None, None, None, None), - ("HP:0040281", "HP:0040281", None, None, None, None), - ("HP:0040280", "HP:0040280", None, None, None, None), - ], -) -def test_frequency_result(raw_value, frequency_qualifier, percentage, quotient, count, total): - frequency = phenotype_frequency_to_hpo_term(raw_value) - assert frequency.frequency_qualifier == frequency_qualifier - assert frequency.has_percentage == percentage - assert frequency.has_quotient == quotient - assert frequency.has_count == count - assert frequency.has_total == total