Skip to content

Commit

Permalink
Added back the feature to append annotations to an existing dataset w…
Browse files Browse the repository at this point in the history
…ithin genomic units
  • Loading branch information
SeriousHorncat committed Sep 26, 2024
1 parent 955ebdc commit a60be14
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 33 deletions.
2 changes: 1 addition & 1 deletion backend/src/core/annotation_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def __init__(self, genomic_unit, dataset):
self.version = ""

def get_genomic_unit(self):
"""Returs 'unit' from genomic_unit"""
"""Returns 'unit' from genomic_unit"""
return self.genomic_unit['unit']

def get_dataset_name(self):
Expand Down
90 changes: 59 additions & 31 deletions backend/src/repository/genomic_unit_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@ def transcript_unit_exist(dataset, data_source, version, annotation):
return annotation_unit_match is not None


def create_annotation_entry(dataset_name, processed_annotation):
""" Helper method that restructures a dataset and the queried annotation into an entry for MongoDBc"""
annotation_entry = {
'data_source': processed_annotation['data_source'],
'version': processed_annotation['version'],
'value': processed_annotation['value'],
}
new_dataset_entry = {dataset_name: [annotation_entry]}

return new_dataset_entry, annotation_entry


class GenomicUnitCollection:
""" Repository for managing genomic units and their annotations """

Expand Down Expand Up @@ -134,7 +146,7 @@ def add_transcript_to_genomic_unit(self, genomic_unit, transcript_id):
{'$addToSet': {'transcripts': {'transcript_id': transcript_id, 'annotations': []}}},
)

def update_genomic_unit_annotation_by_mongo_id(self, genomic_unit_document):
def __update_genomic_unit_by_mongo_id(self, genomic_unit_document):
""" Takes a genomic unit and overwrites the existing object based on the object's id """
genomic_unit_id = genomic_unit_document['_id']

Expand All @@ -145,42 +157,58 @@ def update_genomic_unit_annotation_by_mongo_id(self, genomic_unit_document):
def annotate_genomic_unit(self, genomic_unit, genomic_annotation):
"""
Takes a genomic_unit from an annotation task as well as a genomic_annotation and arranges them in a pattern
that can be sent to mongo to update the genomic unit's document in the collection
"""

annotation_data_set = {
genomic_annotation['data_set']: [{
'data_source': genomic_annotation['data_source'],
'version': genomic_annotation['version'],
'value': genomic_annotation['value'],
that can be sent to mongo to update the genomic unit's document in the collection. Saves annotation as the
following example
example:
{
'Entrez Gene Id': [{
'data_source': 'Rosalution',
'version': 'rosalution-manifest-00',
'value': 203547
}]
}

updated_document = None
"""

if 'transcript_id' in genomic_annotation:
genomic_unit_document = self.find_genomic_unit_with_transcript_id(
genomic_unit, genomic_annotation['transcript_id']
)
transcript_id = genomic_annotation['transcript_id']
updated_document = self.__annotate_transcript_dataset(genomic_unit, transcript_id, genomic_annotation)
return updated_document

dataset_name = genomic_annotation['data_set']

genomic_unit_json = self.find_genomic_unit(genomic_unit)
self.__add_to_annotations_from_document(genomic_unit_json['annotations'], dataset_name, genomic_annotation)

updated_document = self.__update_genomic_unit_by_mongo_id(genomic_unit_json)

return updated_document

def __annotate_transcript_dataset(self, genomic_unit, transcript_id: str, genomic_annotation):
dataset_name = genomic_annotation['data_set']

genomic_unit_document = self.find_genomic_unit_with_transcript_id(genomic_unit, transcript_id)

if not genomic_unit_document:
self.add_transcript_to_genomic_unit(genomic_unit, genomic_annotation['transcript_id'])
genomic_unit_document = self.find_genomic_unit_with_transcript_id(
genomic_unit, genomic_annotation['transcript_id']
)
if not genomic_unit_document:
self.add_transcript_to_genomic_unit(genomic_unit, transcript_id)
genomic_unit_document = self.find_genomic_unit_with_transcript_id(genomic_unit, transcript_id)

for transcript in genomic_unit_document['transcripts']:
if transcript['transcript_id'] == genomic_annotation['transcript_id']:
transcript['annotations'].append(annotation_data_set)
for transcript in genomic_unit_document['transcripts']:
if transcript["transcript_id"] == transcript_id:
self.__add_to_annotations_from_document(transcript['annotations'], dataset_name, genomic_annotation)

updated_document = self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)

def __add_to_annotations_from_document(self, list_of_annotations, dataset_name, genomic_annotation):
new_dataset_entry, annotation_entry = create_annotation_entry(dataset_name, genomic_annotation)

existing_dataset = next((dataset for dataset in list_of_annotations if dataset_name in dataset), None)
if existing_dataset:
existing_dataset[dataset_name].append(annotation_entry)
else:
genomic_unit_document = self.find_genomic_unit(genomic_unit)
genomic_unit_document['annotations'].append(annotation_data_set)
updated_document = self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
list_of_annotations.append(new_dataset_entry)

return updated_document
return list_of_annotations

def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
""" Ensures that an annotation is created for the annotation image upload and only one image is allowed """
Expand All @@ -191,7 +219,7 @@ def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
for annotation in genomic_unit_document['annotations']:
if data_set in annotation:
annotation[data_set][0]['value'].append(genomic_annotation['value'])
return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)

annotation_data_set = {
genomic_annotation['data_set']: [{
Expand All @@ -202,7 +230,7 @@ def annotate_genomic_unit_with_file(self, genomic_unit, genomic_annotation):
}

genomic_unit_document['annotations'].append(annotation_data_set)
return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)

def update_genomic_unit_file_annotation(self, genomic_unit, data_set, annotation_value, file_id_old):
""" Replaces existing annotation image with new image """
Expand All @@ -217,7 +245,7 @@ def update_genomic_unit_file_annotation(self, genomic_unit, data_set, annotation
annotation[data_set][0]['value'].append(annotation_value)
break

self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
self.__update_genomic_unit_by_mongo_id(genomic_unit_document)

return

Expand All @@ -233,7 +261,7 @@ def remove_genomic_unit_file_annotation(self, genomic_unit, data_set, file_id):
annotation[data_set][0]['value'].pop(i)
break

return self.update_genomic_unit_annotation_by_mongo_id(genomic_unit_document)
return self.__update_genomic_unit_by_mongo_id(genomic_unit_document)

def create_genomic_unit(self, genomic_unit):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"_id": "62fbfa5f616a9799131174c9",
"hgvs_variant": "NM_001017980.3:c.164G>T",
"transcripts":
[
Expand Down
1 change: 1 addition & 0 deletions backend/tests/fixtures/annotations-VMA21.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"_id": "62fbfa5f616a979913117477",
"gene_symbol": "VMA21",
"gene": "VMA21",
"annotations": [
Expand Down
63 changes: 63 additions & 0 deletions backend/tests/unit/repository/test_genomic_unit_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,42 @@ def test_annotate_transcript_genomic_unit(genomic_unit_collection):
return_document=ReturnDocument.AFTER)


@pytest.mark.parametrize(
"prepare_test_annotate", [('VMA21', 'Entrez Gene Id', "rosalution-manifest-01", 203547, False),
('VMA21', 'Entrez Gene Id', "rosalution-manifest-00", 203550, True),
('NM_001017980.3:c.164G>T', 'ClinVar_Variantion_Id', "2024-09-06", "581270", False)],
indirect=True,
ids=["new_VMA21_annotation_for_dataset", "new_VMA21_dataset", "new_variant_annotation"]
)
def test_annotate_genomic_unit(prepare_test_annotate, genomic_unit_collection):
"""
Tests if a genomic unit's new annotation either adds a new dataset or adds the annotation to an exists dataset.
"""

genomic_unit_json, dataset_name, genomic_annotation, annotation_unit, expected_amount = prepare_test_annotate

genomic_unit_collection.collection.find_one.return_value = genomic_unit_json

genomic_unit_collection.annotate_genomic_unit(annotation_unit.genomic_unit, genomic_annotation)

updated_genomic_unit = genomic_unit_collection.collection.find_one_and_update.call_args_list[0][0][1]['$set']
annotations = updated_genomic_unit['annotations']
actual_updated_dataset = next((dataset for dataset in annotations if dataset_name in dataset), None)

assert actual_updated_dataset is not None

assert len(actual_updated_dataset[dataset_name]) == expected_amount

def is_entry(entry):
return entry['data_source'] == genomic_annotation['data_source'] and entry['version'] == genomic_annotation[
'version']

actual_updated_annotation = next((entry for entry in actual_updated_dataset[dataset_name] if is_entry(entry)), None)

assert actual_updated_annotation is not None
assert actual_updated_annotation['value'] == genomic_annotation['value']


def test_annotation_genomic_unit_with_file(genomic_unit_collection, get_annotation_json):
""" Accepts a file and adds it as an annotation to the given genomic unit """
genomic_unit = get_annotation_json('NM_001017980.3:c.164G>T', GenomicUnitType.HGVS_VARIANT)
Expand Down Expand Up @@ -270,6 +306,33 @@ def test_remove_existing_genomic_unit_file_annotation(genomic_unit_collection, g
genomic_unit_collection.update_genomic_unit_annotation_by_mongo_id.assert_called_once_with(expected_genomic_unit)


@pytest.fixture(name="prepare_test_annotate", scope="function")
def prepare_test_annotate_genomic_units(request, get_annotation_unit, get_annotation_json):
""" Provides a genomic unit from the genomic unit collection, otherwise returns false"""

genomic_unit, dataset_name, version, value, remove_dataset = request.param

annotation_unit = get_annotation_unit(genomic_unit, dataset_name)
annotation_unit.set_latest_version(version)

genomic_unit_json = get_annotation_json(genomic_unit, annotation_unit.genomic_unit['type'])
if remove_dataset:
genomic_unit_json['annotations'] = [
dataset for dataset in genomic_unit_json['annotations'] if dataset_name not in dataset
]

genomic_annotation = {
"data_set": annotation_unit.get_dataset_name(),
"data_source": annotation_unit.get_dataset_source(),
"version": annotation_unit.get_version(),
"value": value,
}

expected_annotation_amount = 1 if remove_dataset else 2

return (genomic_unit_json, dataset_name, genomic_annotation, annotation_unit, expected_annotation_amount)


@pytest.fixture(name="transcript_annotation_unit", scope="function")
def variant_with_datasets_annotation_unit(request, get_annotation_unit, get_annotation_json):
""" Fixture that creates generates the test data for verifying transcript annotation operations"""
Expand Down
2 changes: 1 addition & 1 deletion etc/fixtures/initial-seed/genomic-units.json
Original file line number Diff line number Diff line change
Expand Up @@ -5897,7 +5897,7 @@
"Polyphen Prediction": [
{
"data_source": "Ensembl",
"version": 112,
"version": 100,
"value": "benign"
}
]
Expand Down

0 comments on commit a60be14

Please sign in to comment.