Skip to content

Commit

Permalink
Able to retrieve version for 'rest' versioning type. Hardcoded 'rosal…
Browse files Browse the repository at this point in the history
…ution' type version for rosalution's manifest. Paired with Angelina on Wednesday to create a couple of helper functions for testing. Thursday - Rabab worked on combining & testing all 3 versioning types in one test.
  • Loading branch information
fatimarabab committed Sep 12, 2024
1 parent 4e4ab98 commit 4d277ab
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 35 deletions.
59 changes: 33 additions & 26 deletions backend/src/core/annotation_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,28 @@ class AnnotationTaskInterface:
def __init__(self, annotation_unit: AnnotationUnit):
self.annotation_unit = annotation_unit

def aggregate_string_replacements(self, base_string):
def aggregate_string_replacements(self, base_string) -> str:
"""
Replaces the content 'base_string' where strings within the pattern
{item} are replaced, 'item' can be the genomic unit's type such as
{gene} or {hgvs_variant} or a dataset dependency, such as {Entrez Gene Id}.
The follow are examples of the genomic_unit's dict's attributes like
genomic_unit['gene'] or genomic_unit['Entrez Gene Id']
example base string:
https://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;
return value: https://grch37.rest.ensembl.org/vep/human/hgvs/NM_001017980.3:c.164G>T?content-type=application/json;CADD=1;refseq=1;
example base string:
.[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred }
return value: .[].transcript_consequences[] | select( .transcript_id | contains(\"NM_001017980\") ) | { CADD: .cadd_phred }
genomic unit within the annotation unit in this task to be
{
'hgvs_variant': "hgvs_variant",
'transcript': 'NM_001017980',
}
"""
genomic_unit_string = f"{{{self.annotation_unit.get_genomic_unit_type()}}}"
replace_string = base_string.replace(genomic_unit_string, self.annotation_unit.get_genomic_unit())
Expand Down Expand Up @@ -194,24 +208,6 @@ def annotate(self):
json_result = result.json()
return json_result

def base_url(self):
"""
Creates the base url for the annotation according to the configuration. Searches for string {genomic_unit_type}
within the 'url' attribute and replaces it with the genomic_unit being annotated.
"""
string_to_replace = f"{{{self.annotation_unit.dataset['genomic_unit_type']}}}"
replace_string = self.annotation_unit.dataset['url'].replace(
string_to_replace, self.annotation_unit.get_genomic_unit()
)

if 'dependencies' in self.annotation_unit.dataset:
for depedency in self.annotation_unit.dataset['dependencies']:
depedency_replace_string = f"{{{depedency}}}"
replace_string = replace_string.replace(
depedency_replace_string, self.annotation_unit.genomic_unit[depedency]
)
return replace_string

def build_url(self):
"""
Builds the URL from the base_url and then appends the list of query parameters for the list of datasets.
Expand All @@ -234,35 +230,46 @@ def __init__(self, annotation_unit):

def annotate(self):
"""Gets version by versioning type and returns the version data to the annotation unit"""
version_type = self.annotation_unit.dataset["versioning_type"]

version_type = self.annotation_unit.dataset['versioning_type']
version = ""

if version_type not in self.version_types:
logger.error(('Failed versioning: "%s" is an Invalid Version Type', version_type))
return {}

version = self.version_types[version_type]()
print("THIS IS THE VERSION RETREIVED FROM VERSION URL FOR " + version_type + " TYPE")
print(version)
return version

def get_annotation_version_from_rest(self):
"""Gets version for rest type and returns the version data"""
version_from_rest = "rosalution-temp-manifest-00"
return version_from_rest
version = {"rest": "rosalution-temp-manifest-00"}

url_to_query = self.build_versioning_url()
result = requests.get(url_to_query, verify=False, headers={"Accept": "application/json"}, timeout=30)
version = result.json()
return version

def build_versioning_url(self):
"""
Builds the version URL from aggregate_string_replacements and then appends the list of query parameters for the list of datasets.
"""
return self.aggregate_string_replacements(self.annotation_unit.dataset['version_url'])

def get_annotation_version_from_rosalution(self):
"""Gets version for rosalution type and returns the version data"""
version_from_rosalution = "rosalution-temp-manifest-00"

version = {"rosalution": "rosalution-temp-manifest-00"}
version = {"rosalution": "rosalution-manifest-00"}
return version

def get_annotation_version_from_date(self):
"""Gets version for date type and returns the version data"""
version_from_date = "rosalution-temp-manifest-00"
# getting version from date

version = {"date": "rosalution-temp-manifest-00"}
return version_from_date
return version


class AnnotationTaskFactory:
Expand Down
31 changes: 29 additions & 2 deletions backend/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from unittest.mock import Mock
import pytest

from backend.src.enums import GenomicUnitType
from src.core.annotation_unit import AnnotationUnit
from src.config import Settings
from src.core.annotation import AnnotationService
Expand Down Expand Up @@ -137,12 +138,38 @@ def _create_dataset_manifest(analysis_name, dataset_name):
return _create_dataset_manifest


@pytest.fixture(name="cpam0046_analysis")
def fixture_cpam0046_analysis(cpam0046_analysis_json):
"""Returns the Analysis for CPAM0046 to verify creating annotation tasks"""
return Analysis(**cpam0046_analysis_json)


@pytest.fixture(name="genomic_units_with_types")
def fixture_genomic_units_with_types(analysis_collection_json):
"""Returns the multiple analyses being mocked as an array"""

def get_units(analysis_json):
analysis = Analysis(**analysis_json)
return analysis.units_to_annotate()

genomic_units_lists = list(map(get_units, analysis_collection_json))
flattened_list = [unit for analysis_units in genomic_units_lists for unit in analysis_units]
types = {unit['unit']: unit['type'] for unit in flattened_list}
return types


@pytest.fixture(name='get_annotation_unit')
def get_standard_annotation_unit(annotation_config_collection_json):
def get_standard_annotation_unit(annotation_config_collection_json, genomic_units_with_types):
"""Fixture factory method to create an AnnotationUnit from the genomic unit information and name of the datset."""

def _create_annotation_unit(genomic_unit_name, genomic_unit_type, dataset_name):
# units = {
# 'VMA21': GenomicUnitType.GENE,
# 'NM_001017980.3:c.164G>T': GenomicUnitType.HGVS_VARIANT
# }

def _create_annotation_unit(genomic_unit_name, dataset_name):
"""Method to create the Annotation Unit"""
genomic_unit_type = genomic_units_with_types[genomic_unit_name]
genomic_unit = {'unit': genomic_unit_name, 'type': genomic_unit_type}
dataset_config = next((unit for unit in annotation_config_collection_json if unit['data_set'] == dataset_name),
None)
Expand Down
84 changes: 77 additions & 7 deletions backend/tests/unit/core/test_annotation_task.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
"""Tests Annotation Tasks and the creation of them"""
import pytest
from unittest.mock import patch

from src.core.annotation_task import AnnotationTaskFactory, ForgeAnnotationTask, HttpAnnotationTask
from src.core.annotation_task import AnnotationTaskFactory, ForgeAnnotationTask, HttpAnnotationTask, VersionAnnotationTask
from src.enums import GenomicUnitType
from src.core.annotation_unit import AnnotationUnit


def test_http_annotation_base_url(http_annotation_transcript_id):
"""Verifies if the HTTP annotation creates the base url using the url and genomic_unit as expected."""
actual = http_annotation_transcript_id.base_url()
assert actual == "http://grch37.rest.ensembl.org/vep/human/hgvs/NM_170707.3:c.745C>T?content-type=application/json;refseq=1;" # pylint: disable=line-too-long


def test_http_annotation_task_build_url(http_annotation_transcript_id):
"""Verifies that the HTTP annotation task creates the base url using the 'url' and the genomic unit"""
actual = http_annotation_transcript_id.build_url()
Expand Down Expand Up @@ -104,9 +99,84 @@ def test_annotation_extraction_value_error_exception(http_annotation_task_gene,
assert len(actual_extractions) == 0


@pytest.mark.parametrize(
"test_case", [('VMA21', GenomicUnitType.GENE, 'Entrez Gene Id'),
('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'ClinVar_Variantion_Id')]
)
def test_annotation_versioning_task_created(test_case, get_annotation_unit):
"""Verifies that the annotation task factory creates the correct version annotation task for the annotation unit"""
genomic_unit, genomic_unit_type, dataset_name = test_case
annotation_unit = get_annotation_unit(genomic_unit, genomic_unit_type, dataset_name)
actual_task = AnnotationTaskFactory.create_version_task(annotation_unit)
assert isinstance(actual_task, VersionAnnotationTask)


@pytest.mark.parametrize(
"genomic_unit,genomic_unit_type,dataset_name,expected", [
('VMA21', GenomicUnitType.GENE, 'Entrez Gene Id', {"rosalution": "rosalution-manifest-00"}),
(
'NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'ClinVar_Variantion_Id',
{"rosalution": "rosalution-manifest-00"}
),
('VMA21', GenomicUnitType.GENE, 'Ensembl Gene Id', {"releases": [112]}),
('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'Polyphen Prediction', {"releases": [112]}),
('VMA21', GenomicUnitType.GENE, 'HPO_NCBI_GENE_ID', {"date": "rosalution-temp-manifest-00"}),
('LMNA', GenomicUnitType.GENE, 'OMIM', {"date": "rosalution-temp-manifest-00"}),
]
)
def test_process_annotation_versioning_all_types(
genomic_unit, genomic_unit_type, dataset_name, expected, get_version_task
):
"""Verifies that Version Annotation Tasks process and annotate for all 3 versioning types- date, rest, rosalution"""
task = get_version_task(genomic_unit, dataset_name)
actual_version_json = task.annotate()
assert actual_version_json == expected


@pytest.mark.parametrize(
"test_case",
[
(
'VMA21',
GenomicUnitType.GENE,
'Entrez Gene Id',
),
# ('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT, 'ClinVar_Variantion_Id')
]
)
def test_version_extraction(test_case, get_annotation_unit):
genomic_unit, genomic_unit_type, dataset_name = test_case
annotation_unit = get_annotation_unit(genomic_unit, genomic_unit_type, dataset_name)
version_task = AnnotationTaskFactory.create_version_task(annotation_unit)
version_json = version_task.annotate()
actual_version_extraction = version_task.extract_version(version_json)
assert actual_version_extraction == "rosalution-temp-manifest-00"


## Fixtures ##


@pytest.fixture(name="get_version_task")
def get_version_annotation_task(get_annotation_unit, genomic_units_with_types):
"""creating version task"""

def _create_version_task(genomic_unit, dataset_name):

annotation_unit = get_annotation_unit(genomic_unit, dataset_name)

return VersionAnnotationTask(annotation_unit)

return _create_version_task


# @pytest.fixture(name="get_patch")
# def fixture_requests_patch():
# with(
# patch("requests.get") as requests_get
# ):
# yield: requests_get


@pytest.fixture(name="gene_ncbi_linkout_dataset")
def fixture_ncbi_linkout_dataset():
"""
Expand Down

0 comments on commit 4d277ab

Please sign in to comment.