From b518b45cf17e1bc0d5e7360be9d6d77dc722bd32 Mon Sep 17 00:00:00 2001 From: Angelina Uno-Antonison Date: Mon, 23 Sep 2024 14:52:30 -0500 Subject: [PATCH] wrapping up first draft of migration script;and and tidying up (#183) * wrapping up first draft of migration script;and tidying up feature * added a port for local mongodb developer so vscode can connect to mongodb in the container; fixed documentation in script for the example run command --- backend/src/core/annotation.py | 2 +- backend/src/core/annotation_task.py | 2 +- .../tests/unit/core/test_annotation_task.py | 2 +- docker-compose.yml | 2 + etc/fixtures/create-annotation-manifest.js | 112 +++++++++++++----- 5 files changed, 89 insertions(+), 31 deletions(-) diff --git a/backend/src/core/annotation.py b/backend/src/core/annotation.py index 1bed4315..a599e407 100644 --- a/backend/src/core/annotation.py +++ b/backend/src/core/annotation.py @@ -158,6 +158,7 @@ def process_tasks( logger.info( '%s Version Calculated %s...', format_annotation_logging(annotation_unit), version ) + analysis_collection.add_dataset_to_manifest(analysis_name, annotation_unit) annotation_queue.put(annotation_unit) else: for annotation in task.extract(task_process_result): @@ -170,7 +171,6 @@ def process_tasks( genomic_unit_collection.annotate_genomic_unit( task.annotation_unit.genomic_unit, annotation ) - analysis_collection.add_dataset_to_manifest(analysis_name, annotation_unit) except FileNotFoundError as error: logger.info( diff --git a/backend/src/core/annotation_task.py b/backend/src/core/annotation_task.py index bfca75b2..df86b3ea 100644 --- a/backend/src/core/annotation_task.py +++ b/backend/src/core/annotation_task.py @@ -246,7 +246,7 @@ def annotate(self): def get_annotation_version_from_rest(self): """Gets version for rest type and returns the version data""" - version = {"rest": "rosalution-temp-manifest-00"} + version = {"rest": "rosalution-manifest-00"} url_to_query = self.annotation_unit.dataset['version_url'] result = requests.get(url_to_query, verify=False, headers={"Accept": "application/json"}, timeout=30) diff --git a/backend/tests/unit/core/test_annotation_task.py b/backend/tests/unit/core/test_annotation_task.py index ccb8362b..3338a66e 100644 --- a/backend/tests/unit/core/test_annotation_task.py +++ b/backend/tests/unit/core/test_annotation_task.py @@ -141,7 +141,7 @@ def test_process_annotation_versioning_all_types(genomic_unit, dataset_name, exp "genomic_unit,dataset_name,version_to_extract,expected", [ ('VMA21', 'Entrez Gene Id', {"rosalution": "rosalution-manifest-00"}, "rosalution-manifest-00"), ('VMA21', 'Ensembl Gene Id', {"releases": [112]}, 112), - ('LMNA', 'OMIM', {"date": "rosalution-temp-manifest-00"}, "rosalution-temp-manifest-00"), + ('LMNA', 'OMIM', {"date": "rosalution-manifest-00"}, "rosalution-manifest-00"), ] ) def test_version_extraction(genomic_unit, dataset_name, expected, version_to_extract, get_version_task): diff --git a/docker-compose.yml b/docker-compose.yml index da8515b3..fcc611ae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -77,6 +77,8 @@ services: volumes: - ./etc/fixtures/initial-seed/initial-db-seed.sh:/docker-entrypoint-initdb.d/initial-db-seed.sh - ./etc/fixtures/:/tmp/fixtures + ports: + - 27017:27017 networks: - rosalution-network environment: diff --git a/etc/fixtures/create-annotation-manifest.js b/etc/fixtures/create-annotation-manifest.js index ddf26838..0b75ab8a 100755 --- a/etc/fixtures/create-annotation-manifest.js +++ b/etc/fixtures/create-annotation-manifest.js @@ -2,17 +2,22 @@ const usage = ` mongosh /tmp/database/create-annotation-manifests.js Script Options: help: if true print this help message + overwrite: replace the existing analysis' annotation manifest + databaseName: different name of the database if needed + Run mongosh help for mongosh connection and authentication usage. Example: - mongosh --host localhost --port 27017 --eval "var help=true;" /tmp/fixtures/create-annotation-manifest.js - - docker exec -it mongosh /tmp/fixtures/create-annotation-manifest.js + mongosh --host localhost --port 27017 --file /tmp/fixtures/create-annotation-manifest.js overwrite --eval="databaseName='rosalution_db'" + docker exec -it mongosh --file /tmp/fixtures/create-annotation-manifest.js overwrite --eval="databaseName='rosalution_db'" ` -if (help === true) { +overwrite = process.argv.includes('overwrite') +help = process.argv.includes('help') + +if (help) { print(usage); quit(1); } @@ -32,55 +37,106 @@ const annotationSectionsRename = { } let yourDate = new Date() -yourDate.toISOString().split('T')[0] +yourDateString = yourDate.toISOString().split('T')[0].split('T')[0] -version = { - 'rest': 'rosalution-temp-manifest-00', - 'rosalution': 'rosalution-temp-manifest-00', - 'date': 'rosalution-temp-manifest-00', +versionMapping = { + 'rosalution': 'rosalution-manifest-00', + 'date': yourDateString, + 'Ensembl': 112, + 'Alliance Genome': '7.3.0', + 'Alliance genome': '7.3.0', } +function createAnalysisManifestEntry(incomingDataset) { + datasetManifest = { + } + + datasetName = incomingDataset['data_set'] + versionType = incomingDataset['versioning_type'] + annotationSource = incomingDataset['data_source'] + versionString = versionType in versionMapping ? versionMapping[versionType] : versionMapping[annotationSource] + datasetManifest[datasetName] = { + 'data_source': incomingDataset['data_source'], + 'version': versionString, + } -usernameMapping = { - 'amoss01': '00bqd0d6r0dqpwptyuo8p6h3mnnumzee', - 'afoksin': 'h3bnr2uh1bo4oe0cs1rdyh3n5hrg71ej', - 'aeunoant': '00bqd0d6r0dqpwptyuo8p6h3mnnumzee', + return datasetManifest } -usersToUpdate = Object.keys(usernameMapping); +function getAnalysisGenomicUnits(genomicUnits) { + function getHgvsVariants(variants) { + return variants.flatMap( variant => [variant.hgvs_variant && { 'hgvs_variant': variant.hgvs_variant}]); + } + return genomicUnits.flatMap( unit => ('variants' in unit ? [{'gene': unit['gene'] }, ...getHgvsVariants(unit['variants'])]: unit['gene'])) +} +function getVersionFromManifest(manifest, datasetName) { + const found = manifest.find((config) => Object.hasOwn(config, datasetName)) + if(!found) + return undefined + return found[datasetName].version +} try { const analyses = db.analyses.find(); analyses.forEach(element => { - print(`Creating analysis' '${element.name}' manifest ...`); + print(`Creating analysis: '${element.name}' manifest...`); const annotation_configuration = db.annotations_config.find(); - if (!('manifest' in element)) + if( overwrite ) { element['manifest']= [] + } + annotation_configuration.forEach(dataset => { - dataset_name = dataset['data_set'] - if (element.manifest.some(e => dataset_name in e)){ - print("Dataset already in manifest", dataset_name) + datasetName = dataset['data_set'] + datasetManifest = createAnalysisManifestEntry(dataset) + + if (!overwrite && element.manifest.some(e => datasetName in e)){ + print("Dataset already in manifest", datasetName) return } + element['manifest'].push(datasetManifest) - dataset_manifest = { - } - version_string = version[config.versioning_type] - dataset_manifest[dataset_name] = { - 'data_source': dataset['data_source'], - 'version': 'rosalution-temp-manifest-00', + }); + + analysisGenomicUnits = getAnalysisGenomicUnits(element['genomic_units']) + analysisGenomicUnits.forEach((genomicUnit) => { + // db.genomic_units.findOne({'hgvs_variant': 'NM_001360016.2:c.563C>T'}); + // db.genomic_units.findOne({'gene': 'VMA21'}); + genomicUnitDocument = db.genomic_units.findOne(genomicUnit) + + genomicUnitDocument.annotations.forEach( annotation => { + for (const annotationUnitDataset of Object.keys(annotation)) { + annotation[annotationUnitDataset].forEach(annotationUnit => { + annotationUnit.version = getVersionFromManifest(element['manifest'], annotationUnitDataset) + }) + } + }); + + if( Object.hasOwn(genomicUnitDocument, 'transcripts')) { + genomicUnitDocument.transcripts.forEach( transcript => { + transcript.annotations.forEach((annotation) => { + for (const annotationUnitDataset of Object.keys(annotation)) { + annotation[annotationUnitDataset].forEach(annotationUnit => { + annotationUnit.version = getVersionFromManifest(element['manifest'], annotationUnitDataset) + }); + } + }); + }); } - element['manifest'].push(dataset_manifest) + db.genomic_units.updateOne( + {'_id': genomicUnitDocument._id}, + {'$set': genomicUnitDocument} + ) }); - print(element['manifest']) + print('Updating analysis: ' + element.name + "...") const updated = db.analyses.updateOne( {'_id': element._id}, {'$set': element} ) - print(updated) + result_text = updated.modifiedCount == 1 ? 'Success': 'Existed' + print('Updating analysis: ' + element.name + "..." + result_text) }); } catch (err) { console.log(err.stack);