Skip to content

Commit

Permalink
wip to get annotation by analysis name'
Browse files Browse the repository at this point in the history
  • Loading branch information
SeriousHorncat committed Sep 9, 2024
1 parent f7395cb commit 0c9d6c1
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 21 deletions.
6 changes: 3 additions & 3 deletions backend/src/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import logging
import queue

from backend.src.repository.analysis_collection import AnalysisCollection
from backend.src.repository.genomic_unit_collection import GenomicUnitCollection
from ..repository.analysis_collection import AnalysisCollection
from ..repository.genomic_unit_collection import GenomicUnitCollection

from .annotation_task import AnnotationTaskFactory, VersionAnnotationTask
from ..models.analysis import Analysis
Expand Down Expand Up @@ -108,7 +108,7 @@ def process_tasks(annotation_queue: AnnotationQueue, analysis_name: str, genomic
missing_dependencies = annotation_unit.get_missing_dependencies()
for missing_dataset_name in missing_dependencies:
# missing_dataset_name
dependency_dataset = analysis_collection.get_manifest_dataset(analysis_name, missing_dataset_name)
dependency_dataset = analysis_collection.get_manifest_dataset_config(analysis_name, missing_dataset_name)
dependency_annotation_unit = AnnotationUnit(annotation_unit.genomic_unit, dependency_dataset)
annotation_value = genomic_unit_collection.find_genomic_unit_annotation_value(
dependency_annotation_unit
Expand Down
2 changes: 1 addition & 1 deletion backend/src/core/annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class AnnotationUnit:
def __init__(self, genomic_unit, dataset):
self.genomic_unit = copy.deepcopy(genomic_unit)
self.dataset = dataset
self.version = "" if not dataset['version'] else dataset['version']
self.version = ""

def get_genomic_unit(self):
"""Returs 'unit' from genomic_unit"""
Expand Down
9 changes: 9 additions & 0 deletions backend/src/core/dataset_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@


class DatasetManifest:
"""a helper to streamline getting the config for a dataset"""

def __init__(self, manifest):
self.manifest = manifest


21 changes: 18 additions & 3 deletions backend/src/repository/analysis_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from pymongo import ReturnDocument

from backend.src.core.annotation_unit import AnnotationUnit
from ..core.annotation_unit import AnnotationUnit

from ..models.analysis import Section
from ..models.event import Event
Expand Down Expand Up @@ -148,13 +148,28 @@ def add_dataset_to_manifest(self, analysis_name: str, annotation_unit: Annotatio

return updated_document['manifest']

def get_manifest_dataset(self, analysis_name: str, dataset_name: str):
def get_manifest_dataset_config(self, analysis_name: str, dataset_name: str):
dataset_attribute = f"manifest.{dataset_name}"
return self.collection.find_one({
result = self.collection.find_one({
"name": analysis_name,
dataset_attribute : {'$exists': True }
})

if not result:
return None

return {
"data_set": dataset_name,
"data_source": result[dataset_name]['data_source'],
"version": result[dataset_name]['version']
}

def get_dataset_manifest(self, analysis_name):
analysis = self.find_by_name(analysis_name)
if analysis is None:
return

return analysis['manifest']

def create_analysis(self, analysis_data: dict):
"""Creates a new analysis if the name does not already exist"""
Expand Down
15 changes: 10 additions & 5 deletions backend/src/routers/annotation_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def annotate_analysis(
return {"name": f"{name} annotations queued."}


@router.get("/gene/{gene}")
@router.get("/gene/{gene}/analysis/{analysis_name}")
def get_annotations_by_gene(gene, repositories=Depends(database)):
"""Returns annotations data by calling method to find annotations by gene"""

Expand All @@ -72,8 +72,8 @@ def get_annotations_by_gene(gene, repositories=Depends(database)):
return annotations


@router.get("/hgvsVariant/{variant}/analysis/CPAM0002")
def get_annotations_by_hgvs_variant(variant: str, repositories=Depends(database)):
@router.get("/hgvsVariant/{variant}/analysis/{analysis_name}")
def get_annotations_by_hgvs_variant(variant: str, analysis_name: str, repositories=Depends(database)):
"""Returns annotations data by calling method to find annotations for variant and relevant transcripts
by HGVS Variant"""

Expand All @@ -82,6 +82,7 @@ def get_annotations_by_hgvs_variant(variant: str, repositories=Depends(database)
'unit': variant,
}

dataset_manifest = repositories["analyses"].get_dataset_manifest(analysis_name)
queried_genomic_unit = repositories["genomic_unit"].find_genomic_unit(genomic_unit)

if queried_genomic_unit is None:
Expand All @@ -90,8 +91,12 @@ def get_annotations_by_hgvs_variant(variant: str, repositories=Depends(database)
annotations = {}
for annotation in queried_genomic_unit['annotations']:
for dataset in annotation:
if len(annotation[dataset]) > 0:
annotations[dataset] = annotation[dataset][0]['value']
dataset_config = next((config for config in dataset_manifest if dataset in config), None)
if dataset_config is None:
continue
"annotation[dataset][0]['value']"
found_dataset = next((by_version for by_version in annotation[dataset] if dataset in dataset_manifest), None)
annotations[dataset] =

transcript_annotation_list = []
for transcript_annotation in queried_genomic_unit['transcripts']:
Expand Down
5 changes: 5 additions & 0 deletions backend/tests/fixtures/analysis-CPAM0002.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"HGNC_ID": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}
],
"genomic_units": [
Expand Down
7 changes: 6 additions & 1 deletion backend/tests/fixtures/analysis-CPAM0046.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
},{
"HGNC_ID": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
"genomic_units":[
{
"gene":"LMNA",
Expand Down
7 changes: 6 additions & 1 deletion backend/tests/fixtures/analysis-CPAM0047.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
},{
"HGNC_ID": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
"genomic_units":[
{
"gene":"SBF1",
Expand Down
5 changes: 5 additions & 0 deletions backend/tests/fixtures/analysis-CPAM0112.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
},{
"HGNC_ID": {
"data_source": "Ensembl",
"version": "rosalution-manifest-00"
}
}],
"genomic_units": [
{
Expand Down
22 changes: 22 additions & 0 deletions backend/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,28 @@ def fixture_annotation_config_collection(annotation_config_collection_json):
mock_collection.find_one = Mock(return_value=read_test_fixture("annotations-config.json"))
return AnnotationConfigCollection(mock_collection)

@pytest.fixture(name='get_dataset_manifest_config')
def get_dataset_manifest_config(analysis_collection_json):
"""Fixture factory method to create an dataset from the genomic unit information and name of the datset."""

def _create_dataset_manifest(analysis_name, dataset_name):
"""Method to create the dataset manifest config"""

analysis_json = next((item for item in analysis_collection_json if item['name'] == analysis_name),
None)
analysis = Analysis(**analysis_json)
dataset_manifest = next((item for item in analysis.manifest if dataset_name in item),
None)

dataset_config = {
"data_set": dataset_name,
"data_source": dataset_manifest[dataset_name]['data_source'],
"version": dataset_manifest[dataset_name]['version']
}

return dataset_config

return _create_dataset_manifest

@pytest.fixture(name='get_annotation_unit')
def get_standard_annotation_unit(annotation_config_collection_json):
Expand Down
22 changes: 15 additions & 7 deletions backend/tests/unit/core/test_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from unittest.mock import Mock, patch
import pytest

from src.core.annotation_unit import AnnotationUnit
from src.core.annotation import AnnotationService
from src.enums import GenomicUnitType
from src.repository.analysis_collection import AnalysisCollection
from src.repository.genomic_unit_collection import GenomicUnitCollection


Expand Down Expand Up @@ -81,7 +83,7 @@ def fixture_cpam0046_hgvs_variant(cpam0046_analysis):


@pytest.fixture(name="process_cpam0002_tasks")
def fixture_extract_and_annotate_cpam0002(cpam0002_annotation_queue):
def fixture_extract_and_annotate_cpam0002(cpam0002_annotation_queue, get_dataset_manifest_config):
"""
Emulates processing the annotations for the configured genomic unit's datasets within the CPAM0002 analysis.
"""
Expand All @@ -101,12 +103,14 @@ def fixture_extract_and_annotate_cpam0002(cpam0002_annotation_queue):
):
skip_depends = SkipDepedencies()
mock_genomic_unit_collection = Mock(spec=GenomicUnitCollection)
mock_analysis_collection = Mock(spec=AnalysisCollection)
mock_genomic_unit_collection.find_genomic_unit_annotation_value.side_effect = (
skip_depends.skip_hgncid_get_value_first_time_mock
)
mock_analysis_collection.get_manifest_dataset_config.return_value = get_dataset_manifest_config("CPAM0002",'HGNC_ID')
mock_genomic_unit_collection.annotation_exist.return_value = False

AnnotationService.process_tasks(cpam0002_annotation_queue, mock_genomic_unit_collection)
AnnotationService.process_tasks(cpam0002_annotation_queue, "CPAM0002", mock_genomic_unit_collection,mock_analysis_collection)
yield {
'extract': extract_task_annotate, 'version': version_task_annotate, 'http': http_task_annotate,
'none': none_task_annotate, 'forge': forge_task_annotate,
Expand All @@ -126,14 +130,15 @@ def __init__(self, dependencies_to_skip=None):

def skip_hgncid_get_value_first_time_mock(self, *args):
""" Mock method that tracks if the provided dependencies are one of the ones indicated to skip"""
unit, name = args

annotation_unit = args[0]
name = annotation_unit.get_dataset_name()
genomic_unit = annotation_unit.get_genomic_unit()
should_skip = (name in self.to_skip and name not in self.skip_tracker)
return self.skip_tracker.setdefault(name, None) if should_skip else f"{unit['unit']}-{name}-value"
return self.skip_tracker.setdefault(name, None) if should_skip else f"{genomic_unit}-{name}-value"


@pytest.fixture(name="process_cpam0046_tasks")
def fixture_extract_and_annotate_cpam0046(cpam0046_annotation_queue):
def fixture_extract_and_annotate_cpam0046(cpam0046_annotation_queue, get_dataset_manifest_config):
"""
Emulates processing the annotations for the configured genomic unit's datasets within the CPAM0046 analysis.
"""
Expand All @@ -153,12 +158,15 @@ def fixture_extract_and_annotate_cpam0046(cpam0046_annotation_queue):
):
skip_depends = SkipDepedencies()
mock_genomic_unit_collection = Mock(spec=GenomicUnitCollection)
mock_analysis_collection = Mock(spec=AnalysisCollection)
mock_genomic_unit_collection.find_genomic_unit_annotation_value.side_effect = (
skip_depends.skip_hgncid_get_value_first_time_mock
)
dependency_dataset = get_dataset_manifest_config("CPAM0046", 'HGNC_ID')
mock_analysis_collection.get_manifest_dataset_config.return_value = dependency_dataset
mock_genomic_unit_collection.annotation_exist.return_value = False

AnnotationService.process_tasks(cpam0046_annotation_queue, mock_genomic_unit_collection)
AnnotationService.process_tasks(cpam0046_annotation_queue, "CPAM0046", mock_genomic_unit_collection, mock_analysis_collection)
yield {
'extract': extract_task_annotate, 'version': version_task_annotate, 'http': http_task_annotate,
'none': none_task_annotate, 'forge': forge_task_annotate,
Expand Down

0 comments on commit 0c9d6c1

Please sign in to comment.