diff --git a/backend/src/core/annotation_task.py b/backend/src/core/annotation_task.py index df86b3ea..b0921a1b 100644 --- a/backend/src/core/annotation_task.py +++ b/backend/src/core/annotation_task.py @@ -100,6 +100,7 @@ def extract(self, incomming_json): )) jq_result = next(jq_results, None) while jq_result is not None: + print(jq_result) result_keys = list(jq_result.keys()) if 'transcript' in self.annotation_unit.dataset: diff --git a/backend/tests/unit/core/test_annotation_task.py b/backend/tests/unit/core/test_annotation_task.py index 3338a66e..f19b18f7 100644 --- a/backend/tests/unit/core/test_annotation_task.py +++ b/backend/tests/unit/core/test_annotation_task.py @@ -47,6 +47,13 @@ def test_extraction_forge_gene_linkout_dataset(forge_annotation_task_gene): assert extracted_annotations[0]['value'] == 'https://www.ncbi.nlm.nih.gov/gene?Db=gene&Cmd=DetailsSearch&Term=45614' +def test_extraction_forge_hgvs_variant_without_transcript_version(hgvs_without_transcript_version_annotation_task): + """Verifies the jq query used to forge create the transcript without a version dataset for a variant""" + forge_annotation = hgvs_without_transcript_version_annotation_task.annotate() + actual_extractions = hgvs_without_transcript_version_annotation_task.extract(forge_annotation) + assert(actual_extractions, "NM_170707:c.745C>T") + + def test_annotation_extraction_for_transcript_id_dataset(http_annotation_transcript_id, transcript_annotation_response): """Verifying genomic unit extraction for a transcript using the the transcript ID dataset""" actual_extractions = http_annotation_transcript_id.extract(transcript_annotation_response) @@ -249,6 +256,20 @@ def fixture_transcript_id_dataset(): "attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }", } +@pytest.fixture(name="hgvs_without_transcript_version_annotation_task") +def fixture_hgvs_without_transcript_version(hgvs_variant_genomic_unit): + annotation_unit = AnnotationUnit(hgvs_variant_genomic_unit, { + "data_set": "hgvs_variant_without_transcript_version", + "data_source": "Rosalution", + "annotation_source_type": "forge", + "genomic_unit_type": "hgvs_variant", + "base_string": "{hgvs_variant}", + "attribute": ".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}", + "versioning_type": "rosalution" + }) + task = ForgeAnnotationTask(annotation_unit) + return task + @pytest.fixture(name="hgvs_variant_annotation_unit") def fixture_hgvs_variant_annotation_unit(hgvs_variant_genomic_unit, transcript_id_dataset): diff --git a/etc/fixtures/initial-seed/annotations-config.json b/etc/fixtures/initial-seed/annotations-config.json index 1fe945cd..5cfc206c 100644 --- a/etc/fixtures/initial-seed/annotations-config.json +++ b/etc/fixtures/initial-seed/annotations-config.json @@ -1,24 +1,37 @@ [ + { + "data_set": "hgvs_variant_without_transcript_version", + "data_source": "Rosalution", + "annotation_source_type": "forge", + "genomic_unit_type": "hgvs_variant", + "base_string": "{hgvs_variant}", + "attribute": ".hgvs_variant_without_transcript_version | split(\":\") as $transcript_split | $transcript_split[0] | split(\".\")[0] | . + \":\" + $transcript_split[1] | {\"hgvs_variant_without_transcript_version\": .}", + "versioning_type": "rosalution" + }, { "data_set": "transcript_id", "data_source": "Ensembl", "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }", + "dependencies": ["hgvs_variant_without_transcript_version"], "versioning_type": "rest", "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" - }, + }, { - "data_set": "HPO_NCBI_GENE_ID", - "data_source": "HPO", - "genomic_unit_type": "gene", + "data_set": "ensembl_vep_vcf_string", + "data_source": "Ensembl", + "genomic_unit_type": "hgvs_variant", "annotation_source_type": "http", - "url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10", - "attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }", - "versioning_type": "date" + "url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;vcf_string=1;", + "attribute": ".[] | .vcf_string", + "versioning_type": "rest", + "dependencies": ["hgvs_variant_without_transcript_version"], + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", + "version_attribute": ".releases[]" }, { "data_set": "Ensembl Gene Id", @@ -48,8 +61,9 @@ "genomic_unit_type": "hgvs_variant", "annotation_source_type": "http", "attribute": ".[] | select(.colocated_variants != null) | .colocated_variants[] | select(.var_synonyms != null) | .var_synonyms | select( .ClinVar != null ) | select(.ClinVar != []) | .ClinVar[] | select(contains(\"VCV\")) | sub(\"VCV0+\"; \"\") | {\"ClinVar_Variantion_Id\": . } ", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;clinvar=1;", - "versioning_type": "rosalution" + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;clinvar=1;", + "versioning_type": "rosalution", + "dependencies": ["hgvs_variant_without_transcript_version"] }, { "data_set": "OMIM_gene_search_url", @@ -214,6 +228,25 @@ "dependencies": ["Ensembl Gene Id"], "versioning_type": "rosalution" }, + { + "data_set": "gnomAD_variant_url", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "https://gnomad.broadinstitute.org/gene/{ensembl_vep_vcf_string}?dataset=gnomad_r4", + "attribute": "{ \"gnomAD_variant_url\": .gnomAD_variant_url }", + "dependencies": ["Ensembl Gene Id"], + "versioning_type": "rosalution" + }, + { + "data_set": "HPO_NCBI_GENE_ID", + "data_source": "HPO", + "genomic_unit_type": "gene", + "annotation_source_type": "http", + "url": "https://ontology.jax.org/api/network/search/GENE?q={gene}&page=0&limit=10", + "attribute": "{ \"HPO_NCBI_GENE_ID\": .results[] | select( .name == \"{gene}\") | .id }", + "versioning_type": "date" + }, { "data_set": "OMIM", "data_source": "HPO", @@ -384,9 +417,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { polyphen_prediction: .polyphen_prediction, transcript_id: .transcript_id }", "versioning_type": "rest", + "dependencies": ["hgvs_variant_without_transcript_version"], "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, @@ -396,9 +430,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { polyphen_score: .polyphen_score, transcript_id: .transcript_id }", "versioning_type": "rest", + "dependencies": ["hgvs_variant_without_transcript_version"], "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, @@ -408,9 +443,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { sift_prediction: .sift_prediction, transcript_id: .transcript_id }", "versioning_type": "rest", + "dependencies": ["hgvs_variant_without_transcript_version"], "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, @@ -420,9 +456,11 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { sift_score: .sift_score, transcript_id: .transcript_id }", "versioning_type": "rest", + "dependencies": ["hgvs_variant_without_transcript_version"], + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, @@ -432,8 +470,9 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { consequence_terms: .consequence_terms, transcript_id: .transcript_id }", + "dependencies": ["hgvs_variant_without_transcript_version"], "versioning_type": "rest", "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" @@ -443,9 +482,9 @@ "data_source": "Ensembl", "genomic_unit_type": "hgvs_variant", "annotation_source_type": "http", - "url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;", + "url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;", "attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred }", - "dependencies": ["transcript"], + "dependencies": ["transcript","hgvs_variant_without_transcript_version"], "versioning_type": "rest", "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" @@ -456,9 +495,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { impact: .impact, transcript_id: .transcript_id }", "versioning_type": "rest", + "dependencies": ["hgvs_variant_without_transcript_version"], "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, @@ -538,6 +578,18 @@ "version_url": "https://www.alliancegenome.org/api/releaseInfo", "version_attribute": ".releaseVersion" }, + { + "data_set": "C-Elegens_Worm_Base_url", + "data_source": "Alliance Genome", + "annotation_source_type": "http", + "genomic_unit_type": "gene", + "url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}", + "attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }", + "dependencies": ["C-Elegens Gene Identifier"], + "versioning_type": "rest", + "version_url": "https://www.alliancegenome.org/api/releaseInfo", + "version_attribute": ".releaseVersion" + }, { "data_set": "C-Elegens_Alliance_Genome_url", "data_source": "Rosalution", @@ -549,15 +601,15 @@ "versioning_type": "rosalution" }, { - "data_set": "C-Elegens_Worm_Base_url", - "data_source": "Alliance Genome", + "data_set": "opencravat_search_variant_vcf_string", + "data_source": "Ensembl", + "genomic_unit_type": "hgvs_variant", "annotation_source_type": "http", - "genomic_unit_type": "gene", - "url": "https://www.alliancegenome.org/api/gene/{C-Elegens Gene Identifier}", - "attribute": "{ \"C-Elegens_Worm_Base_url\": .modCrossRefCompleteUrl }", - "dependencies": ["C-Elegens Gene Identifier"], + "url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant_without_transcript_version}?content-type=application/json;CADD=1;refseq=1;vcf_string=1;", + "attribute": ".[] | .vcf_string | split(\"-\") | \"chrom=chr\" + .[0] + \"&pos=\" + .[1] + \"&ref_base=\" + .[2] + \"&alt_base=\" + .[3] ", "versioning_type": "rest", - "version_url": "https://www.alliancegenome.org/api/releaseInfo", - "version_attribute": ".releaseVersion" + "dependencies": ["hgvs_variant_without_transcript_version"], + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", + "version_attribute": ".releases[]" } ] \ No newline at end of file diff --git a/etc/fixtures/initial-seed/genomic-units.json b/etc/fixtures/initial-seed/genomic-units.json index f5806761..844ea907 100644 --- a/etc/fixtures/initial-seed/genomic-units.json +++ b/etc/fixtures/initial-seed/genomic-units.json @@ -465,6 +465,24 @@ "value": "https://www.ncbi.nlm.nih.gov/clinvar/variation/581244" } ] + }, + { + "ensembl_vep_vcf_string": [ + { + "data_source": "Rosalution", + "version": "rosalution-manifest-00", + "value": "X-151404916-G-T" + } + ] + }, + { + "gnomAD_variant_url": [ + { + "data_source": "Rosalution", + "version": "rosalution-manifest-00", + "value": "https://gnomad.broadinstitute.org/gene/X-151404916-G-T?dataset=gnomad_r4" + } + ] } ] }, diff --git a/frontend/src/models/analyses.js b/frontend/src/models/analyses.js index cafeee46..1b254643 100644 --- a/frontend/src/models/analyses.js +++ b/frontend/src/models/analyses.js @@ -321,7 +321,7 @@ const annotationRenderingTemporary = [ 'type': 'icon-linkout-dataset', 'props': { 'imageFilename': 'gnomad-logo.png', - 'altText': 'Genome Aggregation Database (gnomAD) from Broad Institute', + 'altText': 'Genome Aggregation Database (gnomAD) for variants from Broad Institute', }, }], 'rows': [ @@ -368,7 +368,14 @@ const annotationRenderingTemporary = [ 'class': '', 'header': 'variant', 'anchor': 'Variant', - 'header_datasets': [], + 'header_datasets': [{ + 'dataset': 'gnomAD_variant_url', + 'type': 'icon-linkout-dataset', + 'props': { + 'imageFilename': 'gnomad-logo.png', + 'altText': 'Genome Aggregation Database (gnomAD) for genes from Broad Institute', + }, + }], 'rows': [ { 'class': '', @@ -401,6 +408,34 @@ const annotationRenderingTemporary = [ 'cutoff': 1, }, }, + { + 'dataset': 'REVEL', + 'type': 'score-dataset', + 'props': { + 'label': 'REVEL', + 'minimum': 0, + 'maximum': 1, + 'bounds': { + 'lowerBound': 0.5, + 'upperBound': 0.79, + }, + 'cutoff': 1, + }, + }, + { + 'dataset': 'AlphaMissense', + 'type': 'score-dataset', + 'props': { + 'label': 'REVEL', + 'minimum': 0, + 'maximum': 1, + 'bounds': { + 'lowerBound': 0.34, + 'upperBound': 0.564, + }, + 'cutoff': 1, + }, + }, // { // 'dataset': 'Phylop100', // 'type': 'score-dataset',