Added back the feature to append annotations to an existing dataset w…

…ithin genomic units
uab-cgds-worthey · Sep 26, 2024 · a60be14 · a60be14
1 parent 955ebdc
commit a60be14
Show file tree

Hide file tree

Showing 6 changed files with 126 additions and 33 deletions.
diff --git a/backend/src/core/annotation_unit.py b/backend/src/core/annotation_unit.py
@@ -12,7 +12,7 @@ def __init__(self, genomic_unit, dataset):
         self.version = ""
 
     def get_genomic_unit(self):
-        """Returs 'unit' from genomic_unit"""
+        """Returns 'unit' from genomic_unit"""
         return self.genomic_unit['unit']
 
     def get_dataset_name(self):

diff --git a/backend/src/repository/genomic_unit_collection.py b/backend/src/repository/genomic_unit_collection.py
@@ -26,6 +26,18 @@ def transcript_unit_exist(dataset, data_source, version, annotation):
     return annotation_unit_match is not None
 
 
+def create_annotation_entry(dataset_name, processed_annotation):
+    """ Helper method that restructures a dataset and the queried annotation into an entry for MongoDBc"""
+    annotation_entry = {
+        'data_source': processed_annotation['data_source'],
+        'version': processed_annotation['version'],
+        'value': processed_annotation['value'],
+    }
+    new_dataset_entry = {dataset_name: [annotation_entry]}
+
+    return new_dataset_entry, annotation_entry
+
+
 class GenomicUnitCollection:
     """ Repository for managing genomic units and their annotations """
 
@@ -134,7 +146,7 @@ def add_transcript_to_genomic_unit(self, genomic_unit, transcript_id):
             {'$addToSet': {'transcripts': {'transcript_id': transcript_id, 'annotations': []}}},
         )
 
-    def update_genomic_unit_annotation_by_mongo_id(self, genomic_unit_document):
+    def __update_genomic_unit_by_mongo_id(self, genomic_unit_document):
         """ Takes a genomic unit and overwrites the existing object based on the object's id """
         genomic_unit_id = genomic_unit_document['_id']
 
@@ -145,42 +157,58 @@ def update_genomic_unit_annotation_by_mongo_id(self, genomic_unit_document):
     def annotate_genomic_unit(self, genomic_unit, genomic_annotation):
         """
         Takes a genomic_unit from an annotation task as well as a genomic_annotation and arranges them in a pattern
-        that can be sent to mongo to update the genomic unit's document in the collection
-        """
-
-        annotation_data_set = {
-            genomic_annotation['data_set']: [{
-                'data_source': genomic_annotation['data_source'],
-                'version': genomic_annotation['version'],
-                'value': genomic_annotation['value'],
+        that can be sent to mongo to update the genomic unit's document in the collection.  Saves annotation as the
+        following example
+
+        example:
+        {
+            'Entrez Gene Id': [{
+                'data_source': 'Rosalution',
+                'version': 'rosalution-manifest-00',
+                'value': 203547   
             }]
         }
-
-        updated_document = None
+        """
 
         if 'transcript_id' in genomic_annotation:
-            genomic_unit_document = self.find_genomic_unit_with_transcript_id(
-                genomic_unit, genomic_annotation['transcript_id']
-            )
+            transcript_id = genomic_annotation['transcript_id']
+            updated_document = self.__annotate_transcript_dataset(genomic_unit, transcript_id, genomic_annotation)
+            return updated_document
+
+        dataset_name = genomic_annotation['data_set']
+
+        genomic_unit_json = self.find_genomic_unit(genomic_unit)
+        self.__add_to_annotations_from_document(genomic_unit_json['annotations'], dataset_name, genomic_annotation)
+
+        updated_document = self.__update_genomic_unit_by_mongo_id(genomic_unit_json)
+
+        return updated_document
+
+    def __annotate_transcript_dataset(self, genomic_unit, transcript_id: str, genomic_annotation):
+        dataset_name = genomic_annotation['data_set']
+
+        genomic_unit_document = self.find_genomic_unit_with_transcript_id(genomic_unit, transcript_id)
 
-            if not genomic_unit_document:
-                self.add_transcript_to_genomic_unit(genomic_unit, genomic_annotation['transcript_id'])
-                genomic_unit_document = self.find_genomic_unit_with_transcript_id(
-                    genomic_unit, genomic_annotation['transcript_id']
-                )
+        if not genomic_unit_document:
+            self.add_transcript_to_genomic_unit(genomic_unit, transcript_id)
+            genomic_unit_document = self.find_genomic_unit_with_transcript_id(genomic_unit, transcript_id)
 
-            for transcript in genomic_unit_document['transcripts']:
-                if transcript['transcript_id'] == genomic_annotation['transcript_id']:
-                    transcript['annotations'].append(annotation_data_set)
+        for transcript in genomic_unit_document['transcripts']:
+            if transcript["transcript_id"] == transcript_id:
+                self.__add_to_annotations_from_document(transcript['annotations'], dataset_name, genomic_annotation)
 
-            updated_document = self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
+        return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)
 
+    def __add_to_annotations_from_document(self, list_of_annotations, dataset_name, genomic_annotation):
+        new_dataset_entry, annotation_entry = create_annotation_entry(dataset_name, genomic_annotation)
+
+        existing_dataset = next((dataset for dataset in list_of_annotations if dataset_name in dataset), None)
+        if existing_dataset:
+            existing_dataset[dataset_name].append(annotation_entry)
         else:
-            genomic_unit_document = self.find_genomic_unit(genomic_unit)
-            genomic_unit_document['annotations'].append(annotation_data_set)
-            updated_document = self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
+            list_of_annotations.append(new_dataset_entry)
 
-        return updated_document
+        return list_of_annotations
 
     def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
         """ Ensures that an annotation is created for the annotation image upload and only one image is allowed """
@@ -191,7 +219,7 @@ def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
         for annotation in genomic_unit_document['annotations']:
             if data_set in annotation:
                 annotation[data_set][0]['value'].append(genomic_annotation['value'])
-                return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
+                return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)
 
         annotation_data_set = {
             genomic_annotation['data_set']: [{
@@ -202,7 +230,7 @@ def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
         }
 
         genomic_unit_document['annotations'].append(annotation_data_set)
-        return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
+        return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)
 
     def update_genomic_unit_file_annotation(self, genomic_unit, data_set, annotation_value, file_id_old):
         """ Replaces existing annotation image with new image """
@@ -217,7 +245,7 @@ def update_genomic_unit_file_annotation(self, genomic_unit, data_set, annotation
                         annotation[data_set][0]['value'].append(annotation_value)
                         break
 
-        self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
+        self.__update_genomic_unit_by_mongo_id(genomic_unit_document)
 
         return
 
@@ -233,7 +261,7 @@ def remove_genomic_unit_file_annotation(self, genomic_unit, data_set, file_id):
                         annotation[data_set][0]['value'].pop(i)
                         break
 
-        return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
+        return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)
 
     def create_genomic_unit(self, genomic_unit):
         """

diff --git a/backend/tests/fixtures/annotations-NM001017980_3_c_164G_T.json b/backend/tests/fixtures/annotations-NM001017980_3_c_164G_T.json
@@ -1,4 +1,5 @@
 {
+  "_id": "62fbfa5f616a9799131174c9",
   "hgvs_variant": "NM_001017980.3:c.164G>T",
   "transcripts": 
   [

diff --git a/backend/tests/fixtures/annotations-VMA21.json b/backend/tests/fixtures/annotations-VMA21.json
@@ -1,4 +1,5 @@
 {
+  "_id": "62fbfa5f616a979913117477",
   "gene_symbol": "VMA21",
   "gene": "VMA21",
   "annotations": [

diff --git a/backend/tests/unit/repository/test_genomic_unit_collection.py b/backend/tests/unit/repository/test_genomic_unit_collection.py
@@ -175,6 +175,42 @@ def test_annotate_transcript_genomic_unit(genomic_unit_collection):
                                                                                    return_document=ReturnDocument.AFTER)
 
 
+@pytest.mark.parametrize(
+    "prepare_test_annotate", [('VMA21', 'Entrez Gene Id', "rosalution-manifest-01", 203547, False),
+                              ('VMA21', 'Entrez Gene Id', "rosalution-manifest-00", 203550, True),
+                              ('NM_001017980.3:c.164G>T', 'ClinVar_Variantion_Id', "2024-09-06", "581270", False)],
+    indirect=True,
+    ids=["new_VMA21_annotation_for_dataset", "new_VMA21_dataset", "new_variant_annotation"]
+)
+def test_annotate_genomic_unit(prepare_test_annotate, genomic_unit_collection):
+    """
+    Tests if a genomic unit's new annotation either adds a new dataset or adds the annotation to an exists dataset.
+    """
+
+    genomic_unit_json, dataset_name, genomic_annotation, annotation_unit, expected_amount = prepare_test_annotate
+
+    genomic_unit_collection.collection.find_one.return_value = genomic_unit_json
+
+    genomic_unit_collection.annotate_genomic_unit(annotation_unit.genomic_unit, genomic_annotation)
+
+    updated_genomic_unit = genomic_unit_collection.collection.find_one_and_update.call_args_list[0][0][1]['$set']
+    annotations = updated_genomic_unit['annotations']
+    actual_updated_dataset = next((dataset for dataset in annotations if dataset_name in dataset), None)
+
+    assert actual_updated_dataset is not None
+
+    assert len(actual_updated_dataset[dataset_name]) == expected_amount
+
+    def is_entry(entry):
+        return entry['data_source'] == genomic_annotation['data_source'] and entry['version'] == genomic_annotation[
+            'version']
+
+    actual_updated_annotation = next((entry for entry in actual_updated_dataset[dataset_name] if is_entry(entry)), None)
+
+    assert actual_updated_annotation is not None
+    assert actual_updated_annotation['value'] == genomic_annotation['value']
+
+
 def test_annotation_genomic_unit_with_file(genomic_unit_collection, get_annotation_json):
     """ Accepts a file and adds it as an annotation to the given genomic unit """
     genomic_unit = get_annotation_json('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT)
@@ -270,6 +306,33 @@ def test_remove_existing_genomic_unit_file_annotation(genomic_unit_collection, g
     genomic_unit_collection.update_genomic_unit_annotation_by_mongo_id.assert_called_once_with(expected_genomic_unit)
 
 
+@pytest.fixture(name="prepare_test_annotate", scope="function")
+def prepare_test_annotate_genomic_units(request, get_annotation_unit, get_annotation_json):
+    """ Provides a genomic unit from the genomic unit collection, otherwise returns false"""
+
+    genomic_unit, dataset_name, version, value, remove_dataset = request.param
+
+    annotation_unit = get_annotation_unit(genomic_unit, dataset_name)
+    annotation_unit.set_latest_version(version)
+
+    genomic_unit_json = get_annotation_json(genomic_unit, annotation_unit.genomic_unit['type'])
+    if remove_dataset:
+        genomic_unit_json['annotations'] = [
+            dataset for dataset in genomic_unit_json['annotations'] if dataset_name not in dataset
+        ]
+
+    genomic_annotation = {
+        "data_set": annotation_unit.get_dataset_name(),
+        "data_source": annotation_unit.get_dataset_source(),
+        "version": annotation_unit.get_version(),
+        "value": value,
+    }
+
+    expected_annotation_amount = 1 if remove_dataset else 2
+
+    return (genomic_unit_json, dataset_name, genomic_annotation, annotation_unit, expected_annotation_amount)
+
+
 @pytest.fixture(name="transcript_annotation_unit", scope="function")
 def variant_with_datasets_annotation_unit(request, get_annotation_unit, get_annotation_json):
     """ Fixture that creates generates the test data for verifying transcript annotation operations"""

diff --git a/etc/fixtures/initial-seed/genomic-units.json b/etc/fixtures/initial-seed/genomic-units.json
@@ -5897,7 +5897,7 @@
             "Polyphen Prediction": [
               {
                 "data_source": "Ensembl",
-                "version": 112,
+                "version": 100,
                 "value": "benign"
               }
             ]