Skip to content

Commit

Permalink
New annotations from opencravat (#188)
Browse files Browse the repository at this point in the history
* Rendering some of the new linkouts and datasets, with some fixture updates

* Fixed linting and added gnomad to be added as annotations; even though not visualizing yet.  Some additional linkout datasets included also.
  • Loading branch information
SeriousHorncat committed Dec 11, 2024
1 parent bc739b6 commit 95499a8
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 36 deletions.
4 changes: 2 additions & 2 deletions backend/src/core/annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ def increment_delay_count(self):

def delay_count_exceeds(self):
"""
Checks if the annotation unit has exceeded the delay count within the queue.
Checks if the annotation unit has exceeded the delay count within the queue
Delay count is set as a magic number (10).
"""
if self.dataset['delay_count'] < 10:
if self.dataset['delay_count'] < 15:
return False
return True

Expand Down
27 changes: 27 additions & 0 deletions backend/tests/unit/core/test_annotation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def test_extraction_forge_gene_linkout_dataset(forge_annotation_task_gene):
assert extracted_annotations[0]['value'] == 'https://www.ncbi.nlm.nih.gov/gene?Db=gene&Cmd=DetailsSearch&Term=45614'


def test_extraction_forge_hgvs_variant_without_transcript_version(hgvs_without_transcript_version_annotation_task):
"""Verifies the jq query used to forge create the transcript without a version dataset for a variant"""
forge_annotation = hgvs_without_transcript_version_annotation_task.annotate()
actual_extractions = hgvs_without_transcript_version_annotation_task.extract(forge_annotation)
assert actual_extractions[0]['value'] == "NM_170707:c.745C>T"


def test_annotation_extraction_for_transcript_id_dataset(http_annotation_transcript_id, transcript_annotation_response):
"""Verifying genomic unit extraction for a transcript using the the transcript ID dataset"""
actual_extractions = http_annotation_transcript_id.extract(transcript_annotation_response)
Expand Down Expand Up @@ -250,6 +257,26 @@ def fixture_transcript_id_dataset():
}


@pytest.fixture(name="hgvs_without_transcript_version_annotation_task")
def fixture_hgvs_without_transcript_version(hgvs_variant_genomic_unit):
"""An Annotation Unit to experiment with jq parsing and rebuilding a string in a result."""
annotation_unit = AnnotationUnit(
hgvs_variant_genomic_unit,
{
"data_set": "hgvs_variant_without_transcript_version",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "hgvs_variant",
"base_string": "{hgvs_variant}",
"attribute":
".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}", # pylint: disable=line-too-long
"versioning_type": "rosalution"
}
)
task = ForgeAnnotationTask(annotation_unit)
return task


@pytest.fixture(name="hgvs_variant_annotation_unit")
def fixture_hgvs_variant_annotation_unit(hgvs_variant_genomic_unit, transcript_id_dataset):
"""
Expand Down
2 changes: 1 addition & 1 deletion backend/tests/unit/core/test_annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,5 @@ def fixture_annotation_unit_lmna_has_dependencies(annotation_unit_lmna):
@pytest.fixture(name="annotation_unit_lmna_exceeded_delay_count")
def fixture_annotation_unit_lmna_with_annotated_dependency(annotation_unit_lmna):
"""Returns the annotation unit for the genomic unit LMNA and the dataset Clingen gene url"""
annotation_unit_lmna.dataset['delay_count'] = 10
annotation_unit_lmna.dataset['delay_count'] = 20
return annotation_unit_lmna
161 changes: 134 additions & 27 deletions etc/fixtures/initial-seed/annotations-config.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
[
{
"data_set": "hgvs_variant_without_transcript_version",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "hgvs_variant",
"base_string": "{hgvs_variant}",
"attribute": ".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}",
"versioning_type": "rosalution"
},
{
"data_set": "transcript_id",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }",
"dependencies": ["hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
},
{
"data_set": "HPO_NCBI_GENE_ID",
"data_source": "HPO",
"genomic_unit_type": "gene",
"data_set": "ensembl_vep_vcf_string",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10",
"attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }",
"versioning_type": "date"
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;vcf_string=1;",
"attribute": ".[] | {\"ensembl_vep_vcf_string\": .vcf_string} ",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "Ensembl Gene Id",
Expand Down Expand Up @@ -48,8 +61,9 @@
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"attribute": ".[] | select(.colocated_variants != null) | .colocated_variants[] | select(.var_synonyms != null) | .var_synonyms | select( .ClinVar != null ) | select(.ClinVar != []) | .ClinVar[] | select(contains(\"VCV\")) | sub(\"VCV0+\"; \"\") | {\"ClinVar_Variantion_Id\": . } ",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;clinvar=1;",
"versioning_type": "rosalution"
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;clinvar=1;",
"versioning_type": "rosalution",
"dependencies": ["hgvs_variant_without_transcript_version"]
},
{
"data_set": "OMIM_gene_search_url",
Expand Down Expand Up @@ -214,6 +228,44 @@
"dependencies": ["Ensembl Gene Id"],
"versioning_type": "rosalution"
},
{
"data_set": "gnomAD_variant_url",
"data_source": "Rosalution",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "forge",
"base_string": "https://gnomad.broadinstitute.org/variant/{ensembl_vep_vcf_string}?dataset=gnomad_r4",
"attribute": "{ \"gnomAD_variant_url\": .gnomAD_variant_url }",
"dependencies": ["ensembl_vep_vcf_string"],
"versioning_type": "rosalution"
},
{
"data_set": "gnomAD4",
"data_source": "OpenCravat",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"base_string": "https://run.opencravat.org/submit/annotate?{opencravat_search_variant_vcf_string}&annotators=gnomad4",
"attribute": "{ \"gnomad4\": .gnomad4 }",
"dependencies": ["opencravat_search_variant_vcf_string"],
"versioning_type": "date"
},
{
"data_set": "COSMIC_gene_url",
"data_source": "Rosalution",
"genomic_unit_type": "gene",
"annotation_source_type": "forge",
"base_string": "https://cancer.sanger.ac.uk/cosmic/gene/analysis?ln={gene}",
"attribute": "{ \"COSMIC_gene_url\": .COSMIC_gene_url }",
"versioning_type": "rosalution"
},
{
"data_set": "HPO_NCBI_GENE_ID",
"data_source": "HPO",
"genomic_unit_type": "gene",
"annotation_source_type": "http",
"url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10",
"attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }",
"versioning_type": "date"
},
{
"data_set": "OMIM",
"data_source": "HPO",
Expand Down Expand Up @@ -384,9 +436,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { polyphen_prediction: .polyphen_prediction, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -396,9 +449,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { polyphen_score: .polyphen_score, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -408,9 +462,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { sift_prediction: .sift_prediction, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -420,9 +475,11 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { sift_score: .sift_score, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],

"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -432,20 +489,57 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { consequence_terms: .consequence_terms, transcript_id: .transcript_id }",
"dependencies": ["hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "CADD",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred }",
"dependencies": ["transcript"],
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "alphamissense_pathogenicity",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;AlphaMissense=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { alphamissense: .alphamissense.am_pathogenicity }",
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "alphamissense_classification",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;AlphaMissense=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { alphamissense: .alphamissense.am_class }",
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "revel",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;AlphaMissense=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { revel: .revel }",
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
Expand All @@ -456,9 +550,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { impact: .impact, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand Down Expand Up @@ -538,6 +633,18 @@
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
},
{
"data_set": "C-Elegens_Worm_Base_url",
"data_source": "Alliance Genome",
"annotation_source_type": "http",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }",
"dependencies": ["C-Elegens Gene Identifier"],
"versioning_type": "rest",
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
},
{
"data_set": "C-Elegens_Alliance_Genome_url",
"data_source": "Rosalution",
Expand All @@ -549,15 +656,15 @@
"versioning_type": "rosalution"
},
{
"data_set": "C-Elegens_Worm_Base_url",
"data_source": "Alliance Genome",
"data_set": "opencravat_search_variant_vcf_string",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }",
"dependencies": ["C-Elegens Gene Identifier"],
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;vcf_string=1;",
"attribute": ".[] | .vcf_string | split(\"-\") | \"chrom=chr\" + .[0] + \"&pos=\" + .[1] + \"&ref_base=\" + .[2] + \"&alt_base=\" + .[3] | {\"opencravat_search_variant_vcf_string\": . }",
"versioning_type": "rest",
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
}
]
46 changes: 46 additions & 0 deletions etc/fixtures/initial-seed/genomic-units.json
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,34 @@
}
]
},
{
"alphamissense_classification": [
{
"data_source": "Ensembl",
"version": 112,
"value": "ambiguous"
}
]
},
{
"alphamissense_pathogenicity": [
{
"data_source": "Ensembl",
"version": 112,
"value": 0.5303
}
]
},
{
"alphamissense": [{
"data_source": "Ensembl",
"version": 112,
"value": {
"am_class": "ambiguous",
"am_pathogenicity": "0.5303"
}
}]
},
{
"ClinVar_variant_url": [
{
Expand All @@ -465,6 +493,24 @@
"value": "https://www.ncbi.nlm.nih.gov/clinvar/variation/581244"
}
]
},
{
"ensembl_vep_vcf_string": [
{
"data_source": "Rosalution",
"version": "rosalution-manifest-00",
"value": "X-151404916-G-T"
}
]
},
{
"gnomAD_variant_url": [
{
"data_source": "Rosalution",
"version": "rosalution-manifest-00",
"value": "https://gnomad.broadinstitute.org/variant/X-151404916-G-T?dataset=gnomad_r4"
}
]
}
]
},
Expand Down
Binary file added frontend/src/assets/cosmic_logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 95499a8

Please sign in to comment.