Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New annotations from opencravat #188

Merged
merged 10 commits into from
Dec 11, 2024
4 changes: 2 additions & 2 deletions backend/src/core/annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ def increment_delay_count(self):

def delay_count_exceeds(self):
"""
Checks if the annotation unit has exceeded the delay count within the queue.
Checks if the annotation unit has exceeded the delay count within the queue
Delay count is set as a magic number (10).
"""
if self.dataset['delay_count'] < 10:
if self.dataset['delay_count'] < 15:
return False
return True

Expand Down
27 changes: 27 additions & 0 deletions backend/tests/unit/core/test_annotation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def test_extraction_forge_gene_linkout_dataset(forge_annotation_task_gene):
assert extracted_annotations[0]['value'] == 'https://www.ncbi.nlm.nih.gov/gene?Db=gene&Cmd=DetailsSearch&Term=45614'


def test_extraction_forge_hgvs_variant_without_transcript_version(hgvs_without_transcript_version_annotation_task):
"""Verifies the jq query used to forge create the transcript without a version dataset for a variant"""
forge_annotation = hgvs_without_transcript_version_annotation_task.annotate()
actual_extractions = hgvs_without_transcript_version_annotation_task.extract(forge_annotation)
assert actual_extractions[0]['value'] == "NM_170707:c.745C>T"


def test_annotation_extraction_for_transcript_id_dataset(http_annotation_transcript_id, transcript_annotation_response):
"""Verifying genomic unit extraction for a transcript using the the transcript ID dataset"""
actual_extractions = http_annotation_transcript_id.extract(transcript_annotation_response)
Expand Down Expand Up @@ -250,6 +257,26 @@ def fixture_transcript_id_dataset():
}


@pytest.fixture(name="hgvs_without_transcript_version_annotation_task")
def fixture_hgvs_without_transcript_version(hgvs_variant_genomic_unit):
"""An Annotation Unit to experiment with jq parsing and rebuilding a string in a result."""
annotation_unit = AnnotationUnit(
hgvs_variant_genomic_unit,
{
"data_set": "hgvs_variant_without_transcript_version",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "hgvs_variant",
"base_string": "{hgvs_variant}",
"attribute":
".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}", # pylint: disable=line-too-long
"versioning_type": "rosalution"
}
)
task = ForgeAnnotationTask(annotation_unit)
return task


@pytest.fixture(name="hgvs_variant_annotation_unit")
def fixture_hgvs_variant_annotation_unit(hgvs_variant_genomic_unit, transcript_id_dataset):
"""
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/unit/core/test_annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,5 @@ def fixture_annotation_unit_lmna_has_dependencies(annotation_unit_lmna):
@pytest.fixture(name="annotation_unit_lmna_exceeded_delay_count")
def fixture_annotation_unit_lmna_with_annotated_dependency(annotation_unit_lmna):
"""Returns the annotation unit for the genomic unit LMNA and the dataset Clingen gene url"""
annotation_unit_lmna.dataset['delay_count'] = 10
annotation_unit_lmna.dataset['delay_count'] = 20
return annotation_unit_lmna
161 changes: 134 additions & 27 deletions etc/fixtures/initial-seed/annotations-config.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
[
{
"data_set": "hgvs_variant_without_transcript_version",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "hgvs_variant",
"base_string": "{hgvs_variant}",
"attribute": ".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}",
"versioning_type": "rosalution"
},
{
"data_set": "transcript_id",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }",
"dependencies": ["hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
},
{
"data_set": "HPO_NCBI_GENE_ID",
"data_source": "HPO",
"genomic_unit_type": "gene",
"data_set": "ensembl_vep_vcf_string",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10",
"attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }",
"versioning_type": "date"
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;vcf_string=1;",
"attribute": ".[] | {\"ensembl_vep_vcf_string\": .vcf_string} ",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "Ensembl Gene Id",
Expand Down Expand Up @@ -48,8 +61,9 @@
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"attribute": ".[] | select(.colocated_variants != null) | .colocated_variants[] | select(.var_synonyms != null) | .var_synonyms | select( .ClinVar != null ) | select(.ClinVar != []) | .ClinVar[] | select(contains(\"VCV\")) | sub(\"VCV0+\"; \"\") | {\"ClinVar_Variantion_Id\": . } ",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;clinvar=1;",
"versioning_type": "rosalution"
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;clinvar=1;",
"versioning_type": "rosalution",
"dependencies": ["hgvs_variant_without_transcript_version"]
},
{
"data_set": "OMIM_gene_search_url",
Expand Down Expand Up @@ -214,6 +228,44 @@
"dependencies": ["Ensembl Gene Id"],
"versioning_type": "rosalution"
},
{
"data_set": "gnomAD_variant_url",
"data_source": "Rosalution",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "forge",
"base_string": "https://gnomad.broadinstitute.org/variant/{ensembl_vep_vcf_string}?dataset=gnomad_r4",
"attribute": "{ \"gnomAD_variant_url\": .gnomAD_variant_url }",
"dependencies": ["ensembl_vep_vcf_string"],
"versioning_type": "rosalution"
},
{
"data_set": "gnomAD4",
"data_source": "OpenCravat",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"base_string": "https://run.opencravat.org/submit/annotate?{opencravat_search_variant_vcf_string}&annotators=gnomad4",
"attribute": "{ \"gnomad4\": .gnomad4 }",
"dependencies": ["opencravat_search_variant_vcf_string"],
"versioning_type": "date"
},
{
"data_set": "COSMIC_gene_url",
"data_source": "Rosalution",
"genomic_unit_type": "gene",
"annotation_source_type": "forge",
"base_string": "https://cancer.sanger.ac.uk/cosmic/gene/analysis?ln={gene}",
"attribute": "{ \"COSMIC_gene_url\": .COSMIC_gene_url }",
"versioning_type": "rosalution"
},
{
"data_set": "HPO_NCBI_GENE_ID",
"data_source": "HPO",
"genomic_unit_type": "gene",
"annotation_source_type": "http",
"url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10",
"attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }",
"versioning_type": "date"
},
{
"data_set": "OMIM",
"data_source": "HPO",
Expand Down Expand Up @@ -384,9 +436,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { polyphen_prediction: .polyphen_prediction, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -396,9 +449,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { polyphen_score: .polyphen_score, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -408,9 +462,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { sift_prediction: .sift_prediction, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -420,9 +475,11 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { sift_score: .sift_score, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],

"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -432,20 +489,57 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { consequence_terms: .consequence_terms, transcript_id: .transcript_id }",
"dependencies": ["hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "CADD",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred }",
"dependencies": ["transcript"],
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "alphamissense_pathogenicity",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;AlphaMissense=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { alphamissense: .alphamissense.am_pathogenicity }",
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "alphamissense_classification",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;AlphaMissense=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { alphamissense: .alphamissense.am_class }",
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "revel",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;AlphaMissense=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { revel: .revel }",
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
Expand All @@ -456,9 +550,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { impact: .impact, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand Down Expand Up @@ -538,6 +633,18 @@
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
},
{
"data_set": "C-Elegens_Worm_Base_url",
"data_source": "Alliance Genome",
"annotation_source_type": "http",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }",
"dependencies": ["C-Elegens Gene Identifier"],
"versioning_type": "rest",
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
},
{
"data_set": "C-Elegens_Alliance_Genome_url",
"data_source": "Rosalution",
Expand All @@ -549,15 +656,15 @@
"versioning_type": "rosalution"
},
{
"data_set": "C-Elegens_Worm_Base_url",
"data_source": "Alliance Genome",
"data_set": "opencravat_search_variant_vcf_string",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }",
"dependencies": ["C-Elegens Gene Identifier"],
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;vcf_string=1;",
"attribute": ".[] | .vcf_string | split(\"-\") | \"chrom=chr\" + .[0] + \"&pos=\" + .[1] + \"&ref_base=\" + .[2] + \"&alt_base=\" + .[3] | {\"opencravat_search_variant_vcf_string\": . }",
"versioning_type": "rest",
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
}
]
46 changes: 46 additions & 0 deletions etc/fixtures/initial-seed/genomic-units.json
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,34 @@
}
]
},
{
"alphamissense_classification": [
{
"data_source": "Ensembl",
"version": 112,
"value": "ambiguous"
}
]
},
{
"alphamissense_pathogenicity": [
{
"data_source": "Ensembl",
"version": 112,
"value": 0.5303
}
]
},
{
"alphamissense": [{
"data_source": "Ensembl",
"version": 112,
"value": {
"am_class": "ambiguous",
"am_pathogenicity": "0.5303"
}
}]
},
{
"ClinVar_variant_url": [
{
Expand All @@ -465,6 +493,24 @@
"value": "https://www.ncbi.nlm.nih.gov/clinvar/variation/581244"
}
]
},
{
"ensembl_vep_vcf_string": [
{
"data_source": "Rosalution",
"version": "rosalution-manifest-00",
"value": "X-151404916-G-T"
}
]
},
{
"gnomAD_variant_url": [
{
"data_source": "Rosalution",
"version": "rosalution-manifest-00",
"value": "https://gnomad.broadinstitute.org/variant/X-151404916-G-T?dataset=gnomad_r4"
}
]
}
]
},
Expand Down
Binary file added frontend/src/assets/cosmic_logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading