Skip to content

Commit

Permalink
work in progress for new annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
SeriousHorncat committed Dec 10, 2024
1 parent 969453a commit 3e23ed5
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 28 deletions.
1 change: 1 addition & 0 deletions backend/src/core/annotation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def extract(self, incomming_json):
))
jq_result = next(jq_results, None)
while jq_result is not None:
print(jq_result)
result_keys = list(jq_result.keys())

if 'transcript' in self.annotation_unit.dataset:
Expand Down
21 changes: 21 additions & 0 deletions backend/tests/unit/core/test_annotation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def test_extraction_forge_gene_linkout_dataset(forge_annotation_task_gene):
assert extracted_annotations[0]['value'] == 'https://www.ncbi.nlm.nih.gov/gene?Db=gene&Cmd=DetailsSearch&Term=45614'


def test_extraction_forge_hgvs_variant_without_transcript_version(hgvs_without_transcript_version_annotation_task):
"""Verifies the jq query used to forge create the transcript without a version dataset for a variant"""
forge_annotation = hgvs_without_transcript_version_annotation_task.annotate()
actual_extractions = hgvs_without_transcript_version_annotation_task.extract(forge_annotation)
assert(actual_extractions, "NM_170707:c.745C>T")


def test_annotation_extraction_for_transcript_id_dataset(http_annotation_transcript_id, transcript_annotation_response):
"""Verifying genomic unit extraction for a transcript using the the transcript ID dataset"""
actual_extractions = http_annotation_transcript_id.extract(transcript_annotation_response)
Expand Down Expand Up @@ -249,6 +256,20 @@ def fixture_transcript_id_dataset():
"attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }",
}

@pytest.fixture(name="hgvs_without_transcript_version_annotation_task")
def fixture_hgvs_without_transcript_version(hgvs_variant_genomic_unit):
annotation_unit = AnnotationUnit(hgvs_variant_genomic_unit, {
"data_set": "hgvs_variant_without_transcript_version",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "hgvs_variant",
"base_string": "{hgvs_variant}",
"attribute": ".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}",
"versioning_type": "rosalution"
})
task = ForgeAnnotationTask(annotation_unit)
return task


@pytest.fixture(name="hgvs_variant_annotation_unit")
def fixture_hgvs_variant_annotation_unit(hgvs_variant_genomic_unit, transcript_id_dataset):
Expand Down
104 changes: 78 additions & 26 deletions etc/fixtures/initial-seed/annotations-config.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
[
{
"data_set": "hgvs_variant_without_transcript_version",
"data_source": "Rosalution",
"annotation_source_type": "forge",
"genomic_unit_type": "hgvs_variant",
"base_string": "{hgvs_variant}",
"attribute": ".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}",
"versioning_type": "rosalution"
},
{
"data_set": "transcript_id",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }",
"dependencies": ["hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
},
{
"data_set": "HPO_NCBI_GENE_ID",
"data_source": "HPO",
"genomic_unit_type": "gene",
"data_set": "ensembl_vep_vcf_string",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10",
"attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }",
"versioning_type": "date"
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;vcf_string=1;",
"attribute": ".[] | .vcf_string",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
{
"data_set": "Ensembl Gene Id",
Expand Down Expand Up @@ -48,8 +61,9 @@
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"attribute": ".[] | select(.colocated_variants != null) | .colocated_variants[] | select(.var_synonyms != null) | .var_synonyms | select( .ClinVar != null ) | select(.ClinVar != []) | .ClinVar[] | select(contains(\"VCV\")) | sub(\"VCV0+\"; \"\") | {\"ClinVar_Variantion_Id\": . } ",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;clinvar=1;",
"versioning_type": "rosalution"
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;clinvar=1;",
"versioning_type": "rosalution",
"dependencies": ["hgvs_variant_without_transcript_version"]
},
{
"data_set": "OMIM_gene_search_url",
Expand Down Expand Up @@ -214,6 +228,25 @@
"dependencies": ["Ensembl Gene Id"],
"versioning_type": "rosalution"
},
{
"data_set": "gnomAD_variant_url",
"data_source": "Rosalution",
"genomic_unit_type": "gene",
"annotation_source_type": "forge",
"base_string": "https://gnomad.broadinstitute.org/gene/{ensembl_vep_vcf_string}?dataset=gnomad_r4",
"attribute": "{ \"gnomAD_variant_url\": .gnomAD_variant_url }",
"dependencies": ["Ensembl Gene Id"],
"versioning_type": "rosalution"
},
{
"data_set": "HPO_NCBI_GENE_ID",
"data_source": "HPO",
"genomic_unit_type": "gene",
"annotation_source_type": "http",
"url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10",
"attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }",
"versioning_type": "date"
},
{
"data_set": "OMIM",
"data_source": "HPO",
Expand Down Expand Up @@ -384,9 +417,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { polyphen_prediction: .polyphen_prediction, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -396,9 +430,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { polyphen_score: .polyphen_score, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -408,9 +443,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { sift_prediction: .sift_prediction, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -420,9 +456,11 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { sift_score: .sift_score, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],

"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand All @@ -432,8 +470,9 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { consequence_terms: .consequence_terms, transcript_id: .transcript_id }",
"dependencies": ["hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
Expand All @@ -443,9 +482,9 @@
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;",
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;",
"attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred }",
"dependencies": ["transcript"],
"dependencies": ["transcript","hgvs_variant_without_transcript_version"],
"versioning_type": "rest",
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
Expand All @@ -456,9 +495,10 @@
"genomic_unit_type": "hgvs_variant",
"transcript": true,
"annotation_source_type": "http",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;",
"url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;",
"attribute": ".[].transcript_consequences[] | { impact: .impact, transcript_id: .transcript_id }",
"versioning_type": "rest",
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
},
Expand Down Expand Up @@ -538,6 +578,18 @@
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
},
{
"data_set": "C-Elegens_Worm_Base_url",
"data_source": "Alliance Genome",
"annotation_source_type": "http",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }",
"dependencies": ["C-Elegens Gene Identifier"],
"versioning_type": "rest",
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
},
{
"data_set": "C-Elegens_Alliance_Genome_url",
"data_source": "Rosalution",
Expand All @@ -549,15 +601,15 @@
"versioning_type": "rosalution"
},
{
"data_set": "C-Elegens_Worm_Base_url",
"data_source": "Alliance Genome",
"data_set": "opencravat_search_variant_vcf_string",
"data_source": "Ensembl",
"genomic_unit_type": "hgvs_variant",
"annotation_source_type": "http",
"genomic_unit_type": "gene",
"url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}",
"attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }",
"dependencies": ["C-Elegens Gene Identifier"],
"url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;vcf_string=1;",
"attribute": ".[] | .vcf_string | split(\"-\") | \"chrom=chr\" + .[0] + \"&pos=\" + .[1] + \"&ref_base=\" + .[2] + \"&alt_base=\" + .[3] ",
"versioning_type": "rest",
"version_url": "https://www.alliancegenome.org/api/releaseInfo",
"version_attribute": ".releaseVersion"
"dependencies": ["hgvs_variant_without_transcript_version"],
"version_url": "https://rest.ensembl.org/info/data/?content-type=application/json",
"version_attribute": ".releases[]"
}
]
18 changes: 18 additions & 0 deletions etc/fixtures/initial-seed/genomic-units.json
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,24 @@
"value": "https://www.ncbi.nlm.nih.gov/clinvar/variation/581244"
}
]
},
{
"ensembl_vep_vcf_string": [
{
"data_source": "Rosalution",
"version": "rosalution-manifest-00",
"value": "X-151404916-G-T"
}
]
},
{
"gnomAD_variant_url": [
{
"data_source": "Rosalution",
"version": "rosalution-manifest-00",
"value": "https://gnomad.broadinstitute.org/gene/X-151404916-G-T?dataset=gnomad_r4"
}
]
}
]
},
Expand Down
39 changes: 37 additions & 2 deletions frontend/src/models/analyses.js
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ const annotationRenderingTemporary = [
'type': 'icon-linkout-dataset',
'props': {
'imageFilename': 'gnomad-logo.png',
'altText': 'Genome Aggregation Database (gnomAD) from Broad Institute',
'altText': 'Genome Aggregation Database (gnomAD) for variants from Broad Institute',
},
}],
'rows': [
Expand Down Expand Up @@ -368,7 +368,14 @@ const annotationRenderingTemporary = [
'class': '',
'header': 'variant',
'anchor': 'Variant',
'header_datasets': [],
'header_datasets': [{
'dataset': 'gnomAD_variant_url',
'type': 'icon-linkout-dataset',
'props': {
'imageFilename': 'gnomad-logo.png',
'altText': 'Genome Aggregation Database (gnomAD) for genes from Broad Institute',
},
}],
'rows': [
{
'class': '',
Expand Down Expand Up @@ -401,6 +408,34 @@ const annotationRenderingTemporary = [
'cutoff': 1,
},
},
{
'dataset': 'REVEL',
'type': 'score-dataset',
'props': {
'label': 'REVEL',
'minimum': 0,
'maximum': 1,
'bounds': {
'lowerBound': 0.5,
'upperBound': 0.79,
},
'cutoff': 1,
},
},
{
'dataset': 'AlphaMissense',
'type': 'score-dataset',
'props': {
'label': 'REVEL',
'minimum': 0,
'maximum': 1,
'bounds': {
'lowerBound': 0.34,
'upperBound': 0.564,
},
'cutoff': 1,
},
},
// {
// 'dataset': 'Phylop100',
// 'type': 'score-dataset',
Expand Down

0 comments on commit 3e23ed5

Please sign in to comment.