diff --git a/backend/src/core/annotation.py b/backend/src/core/annotation.py index a480b5f8..1dbf4e95 100644 --- a/backend/src/core/annotation.py +++ b/backend/src/core/annotation.py @@ -83,7 +83,7 @@ def queue_annotation_tasks(self, analysis: Analysis, annotation_task_queue: Anno annotation_task_queue.put(annotation_unit_queued) @staticmethod - def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable=too-many-locals + def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable=too-many-branches """Processes items that have been added to the queue""" logger.info("%s Processing annotation tasks queue ...", annotation_log_label()) @@ -92,11 +92,10 @@ def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable while not annotation_queue.empty(): annotation_unit = annotation_queue.get() - if not annotation_unit.version_exists(): # version = "" + if not annotation_unit.version_exists(): version_task = AnnotationTaskFactory.create_version_task(annotation_unit) logger.info('%s Creating Task To Version...', format_annotation_logging(annotation_unit)) annotation_task_futures[executor.submit(version_task.annotate)] = version_task - version_task_created = True else: logger.info( '%s Version queried according to configuration, now to check if it exists in database', @@ -108,15 +107,14 @@ def process_tasks(annotation_queue, genomic_unit_collection): # pylint: disable if annotation_unit.has_dependencies(): missing_dependencies = annotation_unit.get_missing_dependencies() - for missing in missing_dependencies: + for missing_dataset_name in missing_dependencies: annotation_value = genomic_unit_collection.find_genomic_unit_annotation_value( - annotation_unit.genomic_unit, missing + annotation_unit.genomic_unit, missing_dataset_name ) if annotation_value: - annotation_unit.set_annotation_for_dependency(missing, annotation_value) - ready_with_all_dependencies = annotation_unit.ready_for_annotation() + annotation_unit.set_annotation_for_dependency(missing_dataset_name, annotation_value) - if not ready_with_all_dependencies: + if not annotation_unit.conditions_met_to_gather_annotation(): if annotation_unit.should_continue_annotation(): logger.info( '%s Delaying Annotation, Missing %s Dependencies...', diff --git a/backend/src/core/annotation_task.py b/backend/src/core/annotation_task.py index 3cf45f42..9d23a339 100644 --- a/backend/src/core/annotation_task.py +++ b/backend/src/core/annotation_task.py @@ -65,7 +65,8 @@ def extract(self, json_result): if 'attribute' in self.annotation_unit.dataset: # pylint: disable=too-many-nested-blocks annotation_unit_json = { "data_set": self.annotation_unit.dataset['data_set'], - "data_source": self.annotation_unit.dataset['data_source'], "value": "", "version": "" + "data_source": self.annotation_unit.dataset['data_source'], "value": "", + "version": self.annotation_unit.version } replaced_attributes = self.aggregate_string_replacements(self.annotation_unit.dataset['attribute']) diff --git a/backend/src/core/annotation_unit.py b/backend/src/core/annotation_unit.py index 00b19e3e..f2c5637a 100644 --- a/backend/src/core/annotation_unit.py +++ b/backend/src/core/annotation_unit.py @@ -39,13 +39,17 @@ def get_missing_dependencies(self): if they are missing from the genomic_unit """ missing_dependencies = [] + + if 'dependencies' not in self.dataset: + return missing_dependencies + for dependency in self.dataset['dependencies']: if dependency not in self.genomic_unit: missing_dependencies = [dependency] return missing_dependencies - def ready_for_annotation(self): + def conditions_met_to_gather_annotation(self): """ Checks for annotation unit is ready for annotation and calls the assign_annotation_value_to_dependency() function if ready @@ -53,11 +57,11 @@ def ready_for_annotation(self): missing_dependencies = self.get_missing_dependencies() return len(missing_dependencies) == 0 - def set_annotation_for_dependency(self, missing_dependency, dependency_annotation): + def set_annotation_for_dependency(self, missing_dependency_name, dependency_annotation_value): """ Assigns annotation value to the genomic unit's missing dependency """ - self.genomic_unit[missing_dependency] = dependency_annotation + self.genomic_unit[missing_dependency_name] = dependency_annotation_value def should_continue_annotation(self): """ diff --git a/backend/src/repository/genomic_unit_collection.py b/backend/src/repository/genomic_unit_collection.py index 58756897..cba6e142 100644 --- a/backend/src/repository/genomic_unit_collection.py +++ b/backend/src/repository/genomic_unit_collection.py @@ -23,7 +23,7 @@ def all(self): """ Returns all genomic units that are currently stored """ return self.collection.find() - def annotation_exist(self, annotation_unitt): + def annotation_exist(self, genomic_unit, dataset): """ Returns true if the genomic_unit already has that dataset annotated """ data_set_name = dataset['data_set'] find_query = { @@ -109,6 +109,9 @@ def annotate_genomic_unit(self, genomic_unit, genomic_annotation): that can be sent to mongo to update the genomic unit's document in the collection """ + # logger.info(f"{genomic_unit}") + # logger.info(f"{genomic_annotation}") + # logger.info("Updating with the above information") annotation_data_set = { genomic_annotation['data_set']: [{ 'data_source': genomic_annotation['data_source'], diff --git a/backend/tests/integration/test_analysis_routers.py b/backend/tests/integration/test_analysis_routers.py index 846bdb43..54671f65 100644 --- a/backend/tests/integration/test_analysis_routers.py +++ b/backend/tests/integration/test_analysis_routers.py @@ -67,7 +67,7 @@ def test_import_analysis_with_phenotips_json( # pylint: disable=too-many-argumen phenotips_file.close() - assert mock_annotation_queue.put.call_count == 49 + assert mock_annotation_queue.put.call_count == 7 mock_background_add_task.assert_called_once_with( AnnotationService.process_tasks, mock_annotation_queue, mock_repositories['genomic_unit'] diff --git a/backend/tests/unit/core/test_annotate.py b/backend/tests/unit/core/test_annotate.py index 9ccc7bb8..9ee26214 100644 --- a/backend/tests/unit/core/test_annotate.py +++ b/backend/tests/unit/core/test_annotate.py @@ -46,12 +46,16 @@ def test_processing_cpam0002_annotations_tasks(process_cpam0002_tasks): def test_processing_cpam0002_annotation_tasks_for_datasets_with_dependencies(process_cpam0002_tasks): - """Tests that the dependencies will put the annotation task back onto the processing queue when its missing a depedency""" + """ + Tests that the dependencies will put the annotation task back onto the processing queue when its missing a + depedency + """ assert process_cpam0002_tasks['genomic_unit_collection'].find_genomic_unit_annotation_value.call_count == 4 def test_processing_cpam0002_datasets_with_dependencies(cpam0002_annotation_queue, process_cpam0002_tasks): + """ Confirms that the datasets with dependencies configured to annotate for analysis CPAM0002 are processed """ assert cpam0002_annotation_queue.empty() assert process_cpam0002_tasks['http'].call_count == 5 @@ -62,6 +66,7 @@ def test_processing_cpam0002_datasets_with_dependencies(cpam0002_annotation_queu def test_processing_cpam0002_version_annotation_tasks(process_cpam0002_tasks): + """ Asserts that each dataset configured to annotate for analysis CPAM0002 calculates the datasets version. """ assert process_cpam0002_tasks['version'].call_count == 7 @@ -76,8 +81,12 @@ def fixture_cpam0046_hgvs_variant(cpam0046_analysis): return unit + @pytest.fixture(name="process_cpam0002_tasks") def fixture_extract_and_annotate_cpam0002(cpam0002_annotation_queue): + """ + Emulates processing the annotations for the configured genomic unit's datasets within the CPAM0002 analysis. + """ mock_extract_result = [{ 'data_set': 'mock_datset', 'data_source': 'mock_source', @@ -85,43 +94,51 @@ def fixture_extract_and_annotate_cpam0002(cpam0002_annotation_queue): 'value': '9000', }] - with( - patch("src.core.annotation_task.AnnotationTaskInterface.extract", return_value=mock_extract_result) as extract_task_annotate, - patch("src.core.annotation_task.VersionAnnotationTask.annotate") as version_task_annotate, - patch("src.core.annotation_task.ForgeAnnotationTask.annotate") as forge_task_annotate, + with ( + patch("src.core.annotation_task.AnnotationTaskInterface.extract", return_value=mock_extract_result) as + extract_task_annotate, patch("src.core.annotation_task.VersionAnnotationTask.annotate") as + version_task_annotate, patch("src.core.annotation_task.ForgeAnnotationTask.annotate") as forge_task_annotate, patch("src.core.annotation_task.HttpAnnotationTask.annotate") as http_task_annotate, patch("src.core.annotation_task.NoneAnnotationTask.annotate") as none_task_annotate ): skip_depends = SkipDepedencies() mock_genomic_unit_collection = Mock(spec=GenomicUnitCollection) - mock_genomic_unit_collection.find_genomic_unit_annotation_value.side_effect = skip_depends.skip_hgncid_get_value_first_time_mock + mock_genomic_unit_collection.find_genomic_unit_annotation_value.side_effect = ( + skip_depends.skip_hgncid_get_value_first_time_mock + ) mock_genomic_unit_collection.annotation_exist.return_value = False AnnotationService.process_tasks(cpam0002_annotation_queue, mock_genomic_unit_collection) yield { - 'extract': extract_task_annotate, - 'version': version_task_annotate, - 'http': http_task_annotate, - 'none': none_task_annotate, - 'forge': forge_task_annotate, + 'extract': extract_task_annotate, 'version': version_task_annotate, 'http': http_task_annotate, + 'none': none_task_annotate, 'forge': forge_task_annotate, 'genomic_unit_collection': mock_genomic_unit_collection } -class SkipDepedencies: +# Disabling PyLint due to this being a simple Mock adapter as a simple test harness for emulating mising a dependency +class SkipDepedencies: # pylint: disable=too-few-public-methods + """ A skip annotation dependencies helper class that allows tester to dictate which datasets to skip once to + emulate a depedency not existing the first time when preparing an Annotation Task for annotation.""" - def __init__(self, dependencies_to_skip=["HGNC_ID"]): + def __init__(self, dependencies_to_skip=None): + """ Dictating the list of of dataset names to emulate that dataset annotation not existing.""" self.skip_tracker = {} - self.to_skip = dependencies_to_skip + self.to_skip = dependencies_to_skip if dependencies_to_skip else ["HGNC_ID"] def skip_hgncid_get_value_first_time_mock(self, *args): + """ Mock method that tracks if the provided dependencies are one of the ones indicated to skip""" unit, name = args should_skip = (name in self.to_skip and name not in self.skip_tracker) return self.skip_tracker.setdefault(name, None) if should_skip else f"{unit['unit']}-{name}-value" + @pytest.fixture(name="process_cpam0046_tasks") def fixture_extract_and_annotate_cpam0046(cpam0046_annotation_queue): + """ + Emulates processing the annotations for the configured genomic unit's datasets within the CPAM0046 analysis. + """ mock_extract_result = [{ 'data_set': 'mock_datset', 'data_source': 'mock_source', @@ -129,24 +146,23 @@ def fixture_extract_and_annotate_cpam0046(cpam0046_annotation_queue): 'value': '9000', }] - with( - patch("src.core.annotation_task.AnnotationTaskInterface.extract", return_value=mock_extract_result) as extract_task_annotate, - patch("src.core.annotation_task.VersionAnnotationTask.annotate") as version_task_annotate, - patch("src.core.annotation_task.ForgeAnnotationTask.annotate") as forge_task_annotate, + with ( + patch("src.core.annotation_task.AnnotationTaskInterface.extract", return_value=mock_extract_result) as + extract_task_annotate, patch("src.core.annotation_task.VersionAnnotationTask.annotate") as + version_task_annotate, patch("src.core.annotation_task.ForgeAnnotationTask.annotate") as forge_task_annotate, patch("src.core.annotation_task.HttpAnnotationTask.annotate") as http_task_annotate, patch("src.core.annotation_task.NoneAnnotationTask.annotate") as none_task_annotate ): skip_depends = SkipDepedencies() mock_genomic_unit_collection = Mock(spec=GenomicUnitCollection) - mock_genomic_unit_collection.find_genomic_unit_annotation_value.side_effect = skip_depends.skip_hgncid_get_value_first_time_mock + mock_genomic_unit_collection.find_genomic_unit_annotation_value.side_effect = ( + skip_depends.skip_hgncid_get_value_first_time_mock + ) mock_genomic_unit_collection.annotation_exist.return_value = False AnnotationService.process_tasks(cpam0046_annotation_queue, mock_genomic_unit_collection) yield { - 'extract': extract_task_annotate, - 'version': version_task_annotate, - 'http': http_task_annotate, - 'none': none_task_annotate, - 'forge': forge_task_annotate, + 'extract': extract_task_annotate, 'version': version_task_annotate, 'http': http_task_annotate, + 'none': none_task_annotate, 'forge': forge_task_annotate, 'genomic_unit_collection': mock_genomic_unit_collection } diff --git a/backend/tests/unit/core/test_annotation_unit.py b/backend/tests/unit/core/test_annotation_unit.py index b8a4ff2c..e68d3c3e 100644 --- a/backend/tests/unit/core/test_annotation_unit.py +++ b/backend/tests/unit/core/test_annotation_unit.py @@ -1,5 +1,4 @@ """Tests for annotation unit class""" -from unittest.mock import Mock import pytest from src.core.annotation_unit import AnnotationUnit @@ -11,31 +10,17 @@ def test_annotation_unit_gets_missing_dependencies(annotation_unit_lmna): assert actual == ['HGNC_ID'] -def test_annotation_unit_ready_for_annotation(annotation_unit_lmna): +def test_annotation_unit_ready_for_annotation(annotation_unit_has_dependency): """Verifies if the annotation unit is ready for annotation""" - missing = annotation_unit_lmna.get_missing_dependencies() - missing_dependency = missing[0] - mock_genomic_unit_collection = Mock() - mock_genomic_unit_collection.find_genomic_unit_annotation_value = Mock() - dependency_annotation = mock_genomic_unit_collection.find_genomic_unit_annotation_value( - annotation_unit_lmna.genomic_unit, missing_dependency - ) - - actual = annotation_unit_lmna.ready_for_annotation(dependency_annotation, missing_dependency) + actual = annotation_unit_has_dependency.conditions_met_to_gather_annotation() assert actual is True def test_annotation_unit_not_ready_for_annotation(annotation_unit_lmna): """Verifies if the annotation unit is not ready for annotation""" - missing = annotation_unit_lmna.get_missing_dependencies() - missing_dependency = missing[0] - mock_genomic_unit_collection = Mock() - mock_genomic_unit_collection.find_genomic_unit_annotation_value = Mock() - dependency_annotation = "" - - actual = annotation_unit_lmna.ready_for_annotation(dependency_annotation, missing_dependency) + actual = annotation_unit_lmna.conditions_met_to_gather_annotation() assert actual is False @@ -69,13 +54,15 @@ def fixture_annotation_unit_lmna(): return AnnotationUnit(genomic_unit, dataset) +@pytest.fixture(name="annotation_unit_has_dependency") +def fixture_annotation_unit_lmna_has_dependencies(annotation_unit_lmna): + """Provides annotation unit that has all of its dependencies gathered""" + annotation_unit_lmna.set_annotation_for_dependency("HGNC_ID", "FAKE_HGNC_ID_VALUE") + return annotation_unit_lmna + + @pytest.fixture(name="annotation_unit_lmna_exceeded_delay_count") -def fixture_annotation_unit_lmna_with_annotated_dependency(): +def fixture_annotation_unit_lmna_with_annotated_dependency(annotation_unit_lmna): """Returns the annotation unit for the genomic unit LMNA and the dataset Clingen gene url""" - genomic_unit = {'unit': 'LMNA'} - dataset = { - "data_set": "ClinGen_gene_url", "data_source": "Rosalution", "genomic_unit_type": "gene", - "annotation_source_type": "forge", "base_string": "https://search.clinicalgenome.org/kb/genes/{HGNC_ID}", - "attribute": "{ \"ClinGen_gene_url\": .ClinGen_gene_url }", "dependencies": ["HGNC_ID"], "delay_count": 10 - } - return AnnotationUnit(genomic_unit, dataset) + annotation_unit_lmna.dataset['delay_count'] = 10 + return annotation_unit_lmna diff --git a/backend/tests/unit/repository/test_annotation_collection.py b/backend/tests/unit/repository/test_annotation_collection.py index 70a32c34..57f942e4 100644 --- a/backend/tests/unit/repository/test_annotation_collection.py +++ b/backend/tests/unit/repository/test_annotation_collection.py @@ -8,14 +8,14 @@ def test_get_datasets_configuration_by_type(annotation_config_collection): """Tests getting the datasets for the provided types of genomic units""" types = set({GenomicUnitType.GENE, GenomicUnitType.HGVS_VARIANT}) datasets = annotation_config_collection.datasets_to_annotate_by_type(types) - assert len(datasets) == 49 + assert len(datasets) == 7 def test_get_datasets_to_annotate_for_units(annotation_config_collection, genomic_units_for_annotation): """Tests if the configuration for datasets is return as expected""" actual_configuration = annotation_config_collection.datasets_to_annotate_for_units(genomic_units_for_annotation) - assert len(actual_configuration["gene"]) == 39 - assert len(actual_configuration["hgvs_variant"]) == 10 + assert len(actual_configuration["gene"]) == 6 + assert len(actual_configuration["hgvs_variant"]) == 1 @pytest.fixture(name="genomic_units_for_annotation")