From 4d277ab12b400d7a0caa5664819250dca119a7d4 Mon Sep 17 00:00:00 2001 From: Rabab Fatima Date: Thu, 12 Sep 2024 16:08:06 -0500 Subject: [PATCH] Able to retrieve version for 'rest' versioning type. Hardcoded 'rosalution' type version for rosalution's manifest. Paired with Angelina on Wednesday to create a couple of helper functions for testing. Thursday - Rabab worked on combining & testing all 3 versioning types in one test. --- backend/src/core/annotation_task.py | 59 +++++++------ backend/tests/unit/conftest.py | 31 ++++++- .../tests/unit/core/test_annotation_task.py | 84 +++++++++++++++++-- 3 files changed, 139 insertions(+), 35 deletions(-) diff --git a/backend/src/core/annotation_task.py b/backend/src/core/annotation_task.py index a130ba5c..566a68db 100644 --- a/backend/src/core/annotation_task.py +++ b/backend/src/core/annotation_task.py @@ -31,7 +31,7 @@ class AnnotationTaskInterface: def __init__(self, annotation_unit: AnnotationUnit): self.annotation_unit = annotation_unit - def aggregate_string_replacements(self, base_string): + def aggregate_string_replacements(self, base_string) -> str: """ Replaces the content 'base_string' where strings within the pattern {item} are replaced, 'item' can be the genomic unit's type such as @@ -39,6 +39,20 @@ def aggregate_string_replacements(self, base_string): The follow are examples of the genomic_unit's dict's attributes like genomic_unit['gene'] or genomic_unit['Entrez Gene Id'] + + example base string: + https://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1; + return value: https://grch37.rest.ensembl.org/vep/human/hgvs/NM_001017980.3:c.164G>T?content-type=application/json;CADD=1;refseq=1; + + example base string: + .[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred } + return value: .[].transcript_consequences[] | select( .transcript_id | contains(\"NM_001017980\") ) | { CADD: .cadd_phred } + + genomic unit within the annotation unit in this task to be + { + 'hgvs_variant': "hgvs_variant", + 'transcript': 'NM_001017980', + } """ genomic_unit_string = f"{{{self.annotation_unit.get_genomic_unit_type()}}}" replace_string = base_string.replace(genomic_unit_string, self.annotation_unit.get_genomic_unit()) @@ -194,24 +208,6 @@ def annotate(self): json_result = result.json() return json_result - def base_url(self): - """ - Creates the base url for the annotation according to the configuration. Searches for string {genomic_unit_type} - within the 'url' attribute and replaces it with the genomic_unit being annotated. - """ - string_to_replace = f"{{{self.annotation_unit.dataset['genomic_unit_type']}}}" - replace_string = self.annotation_unit.dataset['url'].replace( - string_to_replace, self.annotation_unit.get_genomic_unit() - ) - - if 'dependencies' in self.annotation_unit.dataset: - for depedency in self.annotation_unit.dataset['dependencies']: - depedency_replace_string = f"{{{depedency}}}" - replace_string = replace_string.replace( - depedency_replace_string, self.annotation_unit.genomic_unit[depedency] - ) - return replace_string - def build_url(self): """ Builds the URL from the base_url and then appends the list of query parameters for the list of datasets. @@ -234,7 +230,8 @@ def __init__(self, annotation_unit): def annotate(self): """Gets version by versioning type and returns the version data to the annotation unit""" - version_type = self.annotation_unit.dataset["versioning_type"] + + version_type = self.annotation_unit.dataset['versioning_type'] version = "" if version_type not in self.version_types: @@ -242,27 +239,37 @@ def annotate(self): return {} version = self.version_types[version_type]() + print("THIS IS THE VERSION RETREIVED FROM VERSION URL FOR " + version_type + " TYPE") + print(version) return version def get_annotation_version_from_rest(self): """Gets version for rest type and returns the version data""" - version_from_rest = "rosalution-temp-manifest-00" - return version_from_rest + version = {"rest": "rosalution-temp-manifest-00"} + + url_to_query = self.build_versioning_url() + result = requests.get(url_to_query, verify=False, headers={"Accept": "application/json"}, timeout=30) + version = result.json() + return version + + def build_versioning_url(self): + """ + Builds the version URL from aggregate_string_replacements and then appends the list of query parameters for the list of datasets. + """ + return self.aggregate_string_replacements(self.annotation_unit.dataset['version_url']) def get_annotation_version_from_rosalution(self): """Gets version for rosalution type and returns the version data""" - version_from_rosalution = "rosalution-temp-manifest-00" - version = {"rosalution": "rosalution-temp-manifest-00"} + version = {"rosalution": "rosalution-manifest-00"} return version def get_annotation_version_from_date(self): """Gets version for date type and returns the version data""" - version_from_date = "rosalution-temp-manifest-00" # getting version from date version = {"date": "rosalution-temp-manifest-00"} - return version_from_date + return version class AnnotationTaskFactory: diff --git a/backend/tests/unit/conftest.py b/backend/tests/unit/conftest.py index 55ebad27..9c7e2aef 100644 --- a/backend/tests/unit/conftest.py +++ b/backend/tests/unit/conftest.py @@ -3,6 +3,7 @@ from unittest.mock import Mock import pytest +from backend.src.enums import GenomicUnitType from src.core.annotation_unit import AnnotationUnit from src.config import Settings from src.core.annotation import AnnotationService @@ -137,12 +138,38 @@ def _create_dataset_manifest(analysis_name, dataset_name): return _create_dataset_manifest +@pytest.fixture(name="cpam0046_analysis") +def fixture_cpam0046_analysis(cpam0046_analysis_json): + """Returns the Analysis for CPAM0046 to verify creating annotation tasks""" + return Analysis(**cpam0046_analysis_json) + + +@pytest.fixture(name="genomic_units_with_types") +def fixture_genomic_units_with_types(analysis_collection_json): + """Returns the multiple analyses being mocked as an array""" + + def get_units(analysis_json): + analysis = Analysis(**analysis_json) + return analysis.units_to_annotate() + + genomic_units_lists = list(map(get_units, analysis_collection_json)) + flattened_list = [unit for analysis_units in genomic_units_lists for unit in analysis_units] + types = {unit['unit']: unit['type'] for unit in flattened_list} + return types + + @pytest.fixture(name='get_annotation_unit') -def get_standard_annotation_unit(annotation_config_collection_json): +def get_standard_annotation_unit(annotation_config_collection_json, genomic_units_with_types): """Fixture factory method to create an AnnotationUnit from the genomic unit information and name of the datset.""" - def _create_annotation_unit(genomic_unit_name, genomic_unit_type, dataset_name): + # units = { + # 'VMA21': GenomicUnitType.GENE, + # 'NM_001017980.3:c.164G>T': GenomicUnitType.HGVS_VARIANT + # } + + def _create_annotation_unit(genomic_unit_name, dataset_name): """Method to create the Annotation Unit""" + genomic_unit_type = genomic_units_with_types[genomic_unit_name] genomic_unit = {'unit': genomic_unit_name, 'type': genomic_unit_type} dataset_config = next((unit for unit in annotation_config_collection_json if unit['data_set'] == dataset_name), None) diff --git a/backend/tests/unit/core/test_annotation_task.py b/backend/tests/unit/core/test_annotation_task.py index 48008a6a..d10d9c7d 100644 --- a/backend/tests/unit/core/test_annotation_task.py +++ b/backend/tests/unit/core/test_annotation_task.py @@ -1,17 +1,12 @@ """Tests Annotation Tasks and the creation of them""" import pytest +from unittest.mock import patch -from src.core.annotation_task import AnnotationTaskFactory, ForgeAnnotationTask, HttpAnnotationTask +from src.core.annotation_task import AnnotationTaskFactory, ForgeAnnotationTask, HttpAnnotationTask, VersionAnnotationTask from src.enums import GenomicUnitType from src.core.annotation_unit import AnnotationUnit -def test_http_annotation_base_url(http_annotation_transcript_id): - """Verifies if the HTTP annotation creates the base url using the url and genomic_unit as expected.""" - actual = http_annotation_transcript_id.base_url() - assert actual == "http://grch37.rest.ensembl.org/vep/human/hgvs/NM_170707.3:c.745C>T?content-type=application/json;refseq=1;" # pylint: disable=line-too-long - - def test_http_annotation_task_build_url(http_annotation_transcript_id): """Verifies that the HTTP annotation task creates the base url using the 'url' and the genomic unit""" actual = http_annotation_transcript_id.build_url() @@ -104,9 +99,84 @@ def test_annotation_extraction_value_error_exception(http_annotation_task_gene, assert len(actual_extractions) == 0 +@pytest.mark.parametrize( + "test_case", [('VMA21', GenomicUnitType.GENE, 'Entrez Gene Id'), + ('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'ClinVar_Variantion_Id')] +) +def test_annotation_versioning_task_created(test_case, get_annotation_unit): + """Verifies that the annotation task factory creates the correct version annotation task for the annotation unit""" + genomic_unit, genomic_unit_type, dataset_name = test_case + annotation_unit = get_annotation_unit(genomic_unit, genomic_unit_type, dataset_name) + actual_task = AnnotationTaskFactory.create_version_task(annotation_unit) + assert isinstance(actual_task, VersionAnnotationTask) + + +@pytest.mark.parametrize( + "genomic_unit,genomic_unit_type,dataset_name,expected", [ + ('VMA21', GenomicUnitType.GENE, 'Entrez Gene Id', {"rosalution": "rosalution-manifest-00"}), + ( + 'NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'ClinVar_Variantion_Id', + {"rosalution": "rosalution-manifest-00"} + ), + ('VMA21', GenomicUnitType.GENE, 'Ensembl Gene Id', {"releases": [112]}), + ('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'Polyphen Prediction', {"releases": [112]}), + ('VMA21', GenomicUnitType.GENE, 'HPO_NCBI_GENE_ID', {"date": "rosalution-temp-manifest-00"}), + ('LMNA', GenomicUnitType.GENE, 'OMIM', {"date": "rosalution-temp-manifest-00"}), + ] +) +def test_process_annotation_versioning_all_types( + genomic_unit, genomic_unit_type, dataset_name, expected, get_version_task +): + """Verifies that Version Annotation Tasks process and annotate for all 3 versioning types- date, rest, rosalution""" + task = get_version_task(genomic_unit, dataset_name) + actual_version_json = task.annotate() + assert actual_version_json == expected + + +@pytest.mark.parametrize( + "test_case", + [ + ( + 'VMA21', + GenomicUnitType.GENE, + 'Entrez Gene Id', + ), + # ('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'ClinVar_Variantion_Id') + ] +) +def test_version_extraction(test_case, get_annotation_unit): + genomic_unit, genomic_unit_type, dataset_name = test_case + annotation_unit = get_annotation_unit(genomic_unit, genomic_unit_type, dataset_name) + version_task = AnnotationTaskFactory.create_version_task(annotation_unit) + version_json = version_task.annotate() + actual_version_extraction = version_task.extract_version(version_json) + assert actual_version_extraction == "rosalution-temp-manifest-00" + + ## Fixtures ## +@pytest.fixture(name="get_version_task") +def get_version_annotation_task(get_annotation_unit, genomic_units_with_types): + """creating version task""" + + def _create_version_task(genomic_unit, dataset_name): + + annotation_unit = get_annotation_unit(genomic_unit, dataset_name) + + return VersionAnnotationTask(annotation_unit) + + return _create_version_task + + +# @pytest.fixture(name="get_patch") +# def fixture_requests_patch(): +# with( +# patch("requests.get") as requests_get +# ): +# yield: requests_get + + @pytest.fixture(name="gene_ncbi_linkout_dataset") def fixture_ncbi_linkout_dataset(): """