diff --git a/backend/src/core/annotation.py b/backend/src/core/annotation.py index a599e407..1399bba5 100644 --- a/backend/src/core/annotation.py +++ b/backend/src/core/annotation.py @@ -120,6 +120,7 @@ def process_tasks( annotation_unit.genomic_unit, analysis_manifest_dataset ) dependency_annotation_unit.set_latest_version(analysis_manifest_dataset['version']) + annotation_value = genomic_unit_collection.find_genomic_unit_annotation_value( dependency_annotation_unit ) diff --git a/backend/src/repository/analysis_collection.py b/backend/src/repository/analysis_collection.py index b505e83c..3c876bed 100644 --- a/backend/src/repository/analysis_collection.py +++ b/backend/src/repository/analysis_collection.py @@ -150,8 +150,10 @@ def add_dataset_to_manifest(self, analysis_name: str, annotation_unit: Annotatio def get_manifest_dataset_config(self, analysis_name: str, dataset_name: str): """ Returns an individual dataset manifest """ dataset_attribute = f"manifest.{dataset_name}" + projection = {"manifest.$": 1} - analysis = self.collection.find_one({"name": analysis_name, dataset_attribute: {'$exists': True}}, projection) + query = {"name": analysis_name, dataset_attribute: {'$exists': True}} + analysis = self.collection.find_one(query, projection) if not analysis: return None @@ -176,7 +178,6 @@ def create_analysis(self, analysis_data: dict): if self.collection.find_one({"name": analysis_data["name"]}) is not None: raise ValueError(f"Analysis with name {analysis_data['name']} already exists") - # returns an instance of InsertOneResult. return self.collection.insert_one(analysis_data) def attach_third_party_link(self, analysis_name: str, third_party_enum: str, link: str): diff --git a/backend/src/routers/analysis_router.py b/backend/src/routers/analysis_router.py index 8485a4f0..3b977f76 100644 --- a/backend/src/routers/analysis_router.py +++ b/backend/src/routers/analysis_router.py @@ -77,7 +77,11 @@ async def create_file( @router.get("/{analysis_name}", tags=["analysis"], response_model=Analysis, response_model_exclude_none=True) def get_analysis_by_name(analysis_name: str, repositories=Depends(database)): """Returns analysis case data by calling method to find case by it's analysis_name""" - return repositories["analysis"].find_by_name(analysis_name) + analysis = repositories["analysis"].find_by_name(analysis_name) + + if analysis is None: + raise HTTPException(status_code=404, detail=f"{analysis_name} does not exist.") + return analysis @router.get("/{analysis_name}/genomic_units", tags=["analysis"]) diff --git a/backend/src/routers/annotation_router.py b/backend/src/routers/annotation_router.py index c725f34e..051a3c84 100644 --- a/backend/src/routers/annotation_router.py +++ b/backend/src/routers/annotation_router.py @@ -44,7 +44,7 @@ def annotate_analysis( analysis = Analysis(**analysis_json) annotation_service = AnnotationService(repositories["annotation_config"]) annotation_service.queue_annotation_tasks(analysis, annotation_task_queue) - background_tasks.add_task(AnnotationService.process_tasks, annotation_task_queue, repositories['genomic_unit']) + background_tasks.add_task(AnnotationService.process_tasks, annotation_task_queue, analysis.name, repositories['genomic_unit'], repositories["analysis"]) return {"name": f"{name} annotations queued."} diff --git a/etc/database/screen-capture-annotate/screen_capture_annotate.py b/etc/database/screen-capture-annotate/screen_capture_annotate.py index 9580a6d9..a67d0521 100644 --- a/etc/database/screen-capture-annotate/screen_capture_annotate.py +++ b/etc/database/screen-capture-annotate/screen_capture_annotate.py @@ -38,47 +38,86 @@ import requests - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) config = dotenv_values('.env') -FORCE_APPEND = 'FORCE_APPEND' in config and ( - config['FORCE_APPEND'].lower() in ('true', '1', 't', 'on') -) +FORCE_APPEND = 'FORCE_APPEND' in config and (config['FORCE_APPEND'].lower() in ('true', '1', 't', 'on')) OVERWRITE_LOCAL_SAVE = 'OVERWRITE_LOCAL_SAVE' in config and ( config['OVERWRITE_LOCAL_SAVE'].lower() in ('true', '1', 't', 'on') ) DATASETS = { - "gene": [{ - "dataset": "Druggability", "url": "https://pharos.nih.gov/targets/{gene}#ppi", "dom_attribute": "ppi", - "extra_dom_element_wait": "circle", "dependencies": [], "selenium_by": By.ID, "popup_selectors": [ - '.shepherd-cancel-icon', - '.shepherd-cancel-icon', - ] - }, { - "dataset": "Gene Expression", - "url": "https://gtexportal.org/home/gene/{gene}", - "dom_attribute": "geneExpression", - "dependencies": [], - "selenium_by": By.ID, - }, { - "dataset": "Orthology", - "url": "https://www.alliancegenome.org/gene/{HGNC_ID}", - "dom_attribute": "a#orthology", - "dependencies": ["HGNC_ID"], - "selenium_by": By.CSS_SELECTOR, - }, { - "dataset": "Human_Gene_vs_Protein_Expression_Profile", - "url": "https://www.proteinatlas.org/{Ensembl Gene Id}-{gene}/tissue", - "dom_attribute": - "//table[@class='main_table']/tbody/tr/td[2]/div[@class='tissue_summary menu_margin']/table[@class='darkheader_white'][1]/tbody/tr[@class='roundbottom']", #pylint: disable=line-too-long - "dependencies": ["Ensembl Gene Id"], - "selenium_by": By.XPATH, - }], - "hgvsVariant": [], + "gene": [ + { + "dataset": "Druggability", "url": "https://pharos.nih.gov/targets/{gene}", + "dom_attribute": "scrollspy-main", "extra_dom_element_wait": "pharos-radar-chart", "dependencies": [], + "selenium_by": By.ID, "popup_selectors": [ + '.shepherd-cancel-icon', + '.shepherd-cancel-icon', + ], "additional_script_execution": + """ + document.querySelectorAll('mat-card').forEach(el => { + if (el.id !== 'development' && el.id !== 'ppi' && !el.classList.contains('target-card')) { + el.remove(); + } + }); + """ + }, + { + "dataset": "Human_Gene_Expression", + "url": "https://gtexportal.org/home/gene/{gene}", + "dom_attribute": "geneExpression", + "extra_dom_element_wait": "svg", + "file_postfix": "-0", + "dependencies": [], + "selenium_by": By.ID, + }, + { + "dataset": "Human_Gene_Expression", + "url": "https://www.proteinatlas.org/{Ensembl Gene Id}-{gene}/single+cell+type", + "dependencies": ["Ensembl Gene Id"], + "selenium_by": By.XPATH, + "file_postfix": "-1", + "dom_attribute": "(//table[contains(@class, \"main_table\")])", #pylint: disable=line-too-long + "additional_script_execution": + """ + let tables = document.evaluate( + '(//table[contains(@class, "main_table")])[1]//table', + document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null + ); + for (let i = 3; i < tables.snapshotLength; i++) tables.snapshotItem(i).remove(); + """ + }, + { + "dataset": "Orthology", + "url": "https://www.alliancegenome.org/gene/{HGNC_ID}", + "dom_attribute": "a#orthology", + "dependencies": ["HGNC_ID"], + "selenium_by": By.CSS_SELECTOR, + }, + { + "dataset": "Human_Gene_versus_Protein_Expression_Profile", + "url": "https://www.proteinatlas.org/{Ensembl Gene Id}-{gene}/tissue", + "dom_attribute": + "//table[@class='main_table']/tbody/tr/td[2]/div[@class='tissue_summary menu_margin']/table[@class='darkheader_white'][1]/tbody/tr[@class='roundbottom']", #pylint: disable=line-too-long + "dependencies": ["Ensembl Gene Id"], + "selenium_by": By.XPATH, + } + ], + "hgvsVariant": [ + # Disabling this annotaiton until additional time can be investigated to support + # more then just SNV variants for this. + # { + # "dataset": "GeneHomology_Multi-SequenceAlignment", + # "url": "https://marrvel.org/human/variant/{hgvsVariant}", + # "dom_attribute": + # "app-diopt-alignment", #pylint: disable=line-too-long + # "selenium_by": By.TAG_NAME , + # "dependencies": [], + # } + ], } @@ -92,6 +131,7 @@ def aggregate_string_replacement(key, value, base_string): genomic_unit_string = f"{{{key}}}" return base_string.replace(genomic_unit_string, value) + class ScreenCaptureDatasets(contextlib.ExitStack): """Manages Selenium WebDriver to capture screenshots of genomic datasets.""" @@ -119,7 +159,7 @@ def __enter__(self): self.path = os.path.dirname(os.path.abspath(__file__)) return self - def __exit__(self, *exec): # pylint: disable=redefined-builtin + def __exit__(self, *exec): # pylint: disable=redefined-builtin """Clean up WebDriver resources on exit.""" super().__exit__(*exec) self.driver.__exit__(*exec) @@ -129,9 +169,8 @@ def click_popup(self, url, selector): start_time = time.time() try: print(f'{url}: Checking for Popup Element', end='\r', flush=True) - popup_element = WebDriverWait(self.driver, 22).until( - lambda driver: driver.find_element(By.CSS_SELECTOR, selector) - ) + popup_element = WebDriverWait(self.driver, + 22).until(lambda driver: driver.find_element(By.CSS_SELECTOR, selector)) print(f'{url}: Found Popup element {selector}', end='\r', flush=True) if popup_element: popup_element.click() @@ -152,9 +191,9 @@ def screencapture_dataset(self, unit_type, unit, unit_annotations, dataset): print(url, end='\r', flush=True) # Get the current timestamp for the image name - today = datetime.now() - image_name = today.strftime("%Y-%m-%d") - image_name = f"{unit}-{dataset['dataset']}-{image_name}" + postfix = '' if 'file_postfix' not in dataset else dataset['file_postfix'] + image_name = datetime.now().strftime("%Y-%m-%d") + image_name = f"{unit}-{dataset['dataset']}-{image_name}{postfix}" file_name = f"tmp/{image_name}.png" if os.path.exists(file_name) and (not OVERWRITE_LOCAL_SAVE): @@ -167,10 +206,10 @@ def screencapture_dataset(self, unit_type, unit, unit_annotations, dataset): self.driver.get(url) width = self.driver.execute_script( - "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );" # pylint: disable=line-too-long + "return Math.max( document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth );" # pylint: disable=line-too-long ) height = self.driver.execute_script( - "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );" # pylint: disable=line-too-long + "return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight );" # pylint: disable=line-too-long ) self.driver.set_window_size(width, height) @@ -180,18 +219,25 @@ def screencapture_dataset(self, unit_type, unit, unit_annotations, dataset): self.click_popup(url, selector) try: - WebDriverWait(self.driver,30).until( + WebDriverWait(self.driver, 200).until( EC.presence_of_element_located((dataset['selenium_by'], dataset["dom_attribute"])) ) if "extra_dom_element_wait" in dataset: - WebDriverWait(self.driver, 50).until(EC.presence_of_element_located((By.TAG_NAME, "circle"))) + WebDriverWait(self.driver, 60).until( + EC.presence_of_element_located((By.TAG_NAME, dataset['extra_dom_element_wait'])) + ) + time.sleep(20) except TimeoutException as err: print(f'{url}: Failed to locate visualization {err}', end='\r', flush=True) return None page_element = self.driver.find_element(dataset['selenium_by'], dataset["dom_attribute"]) print(f'{url}: Found visualization, saving image', end='\r', flush=True) + + if 'additional_script_execution' in dataset: + self.driver.execute_script(dataset['additional_script_execution']) + page_element.screenshot(file_name) print(f'{url}: Saving Operation Complete ', end='\n', flush=True) @@ -203,6 +249,7 @@ class RosalutionAnalysis(): Representing a Rosalution analysisf for genomic units and annotations, and screencaptures datasets to save. """ + def __init__(self, analysis_name_string, rosalution_auth_header): """Initialize the Rosalution screencapture analyses""" self.analysis_name = analysis_name_string @@ -214,10 +261,9 @@ def get_genomic_units(self): Retrieve the genomic units from the analysis. """ response = requests.get( - f"{config['ROSALUTION_API_URL']}analysis/{self.analysis_name}/genomic_units", - verify=False, - timeout=20 + f"{config['ROSALUTION_API_URL']}analysis/{self.analysis_name}/genomic_units", verify=False, timeout=20 ) + genomic_units_json = response.json() genomic_units = { "gene": list(genomic_units_json['genes'].keys()), @@ -228,9 +274,7 @@ def get_genomic_units(self): def get_annotations(self, unit_type, unit): """Fetch annotations for a genomic unit.""" response_unit_annotations = requests.get( - f"{config['ROSALUTION_API_URL']}annotation/{unit_type}/{unit}", - verify=False, - timeout=20 + f"{config['ROSALUTION_API_URL']}analysis/{self.analysis_name}/{unit_type}/{unit}", verify=False, timeout=20 ) return response_unit_annotations.json() @@ -240,14 +284,21 @@ def capture_analysis(self, screen_capture): for genomic_unit_type, genomic_unit in genomic_units.items(): for unit in genomic_unit: unit_annotations = self.get_annotations(genomic_unit_type, unit) + if unit == "EXOC3L2": + unit_annotations["Ensembl Gene Id"] = "ENSG00000283632" + if unit == "FOPNL": + unit_annotations["HGNC_ID"] = "HGNC:26435" + unit_annotations["Ensembl Gene Id"] = "ENSG00000133393" for genomic_unit_dataset in DATASETS[genomic_unit_type]: captured_dataset_filepath = screen_capture.screencapture_dataset( genomic_unit_type, unit, unit_annotations, genomic_unit_dataset ) if captured_dataset_filepath: + extra_in_set = (genomic_unit_dataset['file_postfix'] + ) if 'file_postfix' in genomic_unit_dataset else () self.captured_datasets[ - (genomic_unit_type, unit, genomic_unit_dataset['dataset']) - ] = captured_dataset_filepath + (genomic_unit_type, unit, genomic_unit_dataset['dataset'], + *extra_in_set)] = captured_dataset_filepath def save_to_rosalution(self): """Save captured dataset screenshots to the Rosalution API.""" @@ -255,29 +306,25 @@ def save_to_rosalution(self): annotations = {} for each_unit in types_and_genes: unit_type, unit = each_unit - annotations[(type, unit)] = self.get_annotations(unit_type, unit) + annotations[(unit_type, unit)] = self.get_annotations(unit_type, unit) for entry, file_path in self.captured_datasets.items(): - unit_type, unit, dataset = entry - not_empty = dataset in annotations[(unit_type, unit)] + unit_type, unit, dataset, *post = entry #pylint: disable=unused-variable + not_empty = dataset in annotations[(unit_type, unit)] and len(annotations[(unit_type, unit)][dataset]) > 0 if not_empty and (not FORCE_APPEND): print(f'{entry}: Upload Operation Skipped, Annotation Exists ', end='\n', flush=True) continue - self.upload_file_to_rosalution(entry, file_path) def upload_file_to_rosalution(self, entry: tuple, file_path: str): """Uploads spceific file to the dataset entry""" - unit_type, unit, dataset = entry + unit_type, unit, dataset, *post = entry #pylint: disable=unused-variable api_url = \ f"{config['ROSALUTION_API_URL']}annotation/{unit}/{dataset}/attachment?genomic_unit_type={unit_type}" filename = file_path.strip('/')[1] with open(file_path, 'rb') as captured_dataset: - files = { - 'upload_file': - (filename, captured_dataset, 'application/png', {'Expires': '0'}) - } + files = {'upload_file': (filename, captured_dataset, 'application/png', {'Expires': '0'})} print(f'{entry}: Upload Operation Begin ', end='\r', flush=True) response = requests.post(api_url, headers=self.auth_header, files=files, verify=False, timeout=20) @@ -285,23 +332,29 @@ def upload_file_to_rosalution(self, entry: tuple, file_path: str): print(f'{entry}: Upload Operation {result_text} ', end='\n', flush=True) +def rosalution_authenticate(): + """ Authenticates and returns the request header to authenticate with Rosalution """ + print('🔒 Authenticating with Rosalution...', end='\r', flush=True) + client_id, client_secret = config['ROSALUTION_CLIENT_ID'], config['ROSALUTION_CLIENT_SECRET'] + auth_headers = {'Content-Type': 'application/x-www-form-urlencoded'} + auth_data = \ + f"grant_type=&scope=&client_id={client_id}&client_secret={client_secret}" + auth_response = requests.post( + f"{config['ROSALUTION_API_URL']}auth/token", headers=auth_headers, data=auth_data, verify=False, timeout=20 + ) + auth_response_json = auth_response.json() + if 'access_token' not in auth_response_json: + print('🔒 Authenticating with Rosalution...Failed', end='\n', flush=True) + print(auth_response_json) + sys.exit(2) + + print('🔓 Authenticating with Rosalution...Complete', end='\n', flush=True) + return {'Authorization': f"Bearer {auth_response_json['access_token']}"} + + rosalution_analyses = sys.argv[1:] -print('🔒 Authenticating with Rosalution...', end='\r', flush=True) -auth_headers = {'Content-Type': 'application/x-www-form-urlencoded'} -auth_data = \ - f"grant_type=&scope=&client_id={config['ROSALUTION_CLIENT_ID']}&client_secret={config['ROSALUTION_CLIENT_SECRET']}" -auth_response = requests.post( - f"{config['ROSALUTION_API_URL']}auth/token", headers=auth_headers, data=auth_data, verify=False, timeout=20 -) -auth_response_json = auth_response.json() -if 'access_token' not in auth_response_json: - print('🔒 Authenticating with Rosalution...Failed', end='\n', flush=True) - print(auth_response_json) - sys.exit(2) - -print('🔓 Authenticating with Rosalution...Complete', end='\n', flush=True) -rosalution_header = {'Authorization': f"Bearer {auth_response_json['access_token']}"} +rosalution_header = rosalution_authenticate() print("📷 Capturing Rosalution Analyses") print(*[f" 🧬 {analysis}" for analysis in rosalution_analyses], sep="\n") diff --git a/etc/database/set-new-annotations-configuration.sh b/etc/database/set-new-annotations-configuration.sh index 643f263d..c32d04c9 100755 --- a/etc/database/set-new-annotations-configuration.sh +++ b/etc/database/set-new-annotations-configuration.sh @@ -23,7 +23,7 @@ usage() { echo " " } -target_backup_path=/home/centos/backups/rosalution-annotation-configuration-backup +target_backup_path=/home/angelina docker_container_name="" docker_exec_prefix="" connection_string="localhost:27017" diff --git a/etc/fixtures/initial-seed/annotations-config.json b/etc/fixtures/initial-seed/annotations-config.json index eafacbc2..1fe945cd 100644 --- a/etc/fixtures/initial-seed/annotations-config.json +++ b/etc/fixtures/initial-seed/annotations-config.json @@ -1,14 +1,16 @@ [ { - "data_set": "Entrez Gene Id", - "data_source": "Rosalution", - "genomic_unit_type": "gene", - "annotation_source_type": "forge", - "base_string": "{HPO_NCBI_GENE_ID}", - "attribute": "{ \"Entrez Gene Id\": .\"Entrez Gene Id\"| sub( \".*:\"; \"\") }", - "dependencies": ["HPO_NCBI_GENE_ID"], - "versioning_type": "rosalution" - }, + "data_set": "transcript_id", + "data_source": "Ensembl", + "genomic_unit_type": "hgvs_variant", + "transcript": true, + "annotation_source_type": "http", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }", + "versioning_type": "rest", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", + "version_attribute": ".releases[]" + }, { "data_set": "HPO_NCBI_GENE_ID", "data_source": "HPO", @@ -23,10 +25,10 @@ "data_source": "Ensembl", "genomic_unit_type": "gene", "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/lookup/symbol/homo_sapiens/{gene}?content-type=application/json", + "url": "http://rest.ensembl.org/lookup/symbol/homo_sapiens/{gene}?content-type=application/json", "attribute": "{ \"Ensemble Gene Id\": .id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -37,9 +39,129 @@ "url": "http://rest.genenames.org/fetch/symbol/{gene}", "attribute": ".response | .docs[] | select( .symbol | contains(\"{gene}\")) | { HGNC_ID : .\"hgnc_id\"}", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, + { + "data_set": "ClinVar_Variantion_Id", + "data_source": "Rosalution", + "genomic_unit_type": "hgvs_variant", + "annotation_source_type": "http", + "attribute": ".[] | select(.colocated_variants != null) | .colocated_variants[] | select(.var_synonyms != null) | .var_synonyms | select( .ClinVar != null ) | select(.ClinVar != []) | .ClinVar[] | select(contains(\"VCV\")) | sub(\"VCV0+\"; \"\") | {\"ClinVar_Variantion_Id\": . } ", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;clinvar=1;", + "versioning_type": "rosalution" + }, + { + "data_set": "OMIM_gene_search_url", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "https://www.omim.org/search?index=entry&start=1&sort=score+desc%2C+prefix_sort+desc&search={gene}", + "attribute": "{ \"OMIM_gene_search_url\": .OMIM_gene_search_url }", + "versioning_type": "rosalution" + }, + { + "data_set": "HPO_gene_search_url", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "https://hpo.jax.org/app/browse/search?q={gene}&navFilter=all", + "attribute": "{ \"HPO_gene_search_url\": .HPO_gene_search_url }", + "versioning_type": "rosalution" + }, + { + "data_set": "Rat Gene Identifier", + "data_source": "Alliance Genome", + "genomic_unit_type": "gene", + "annotation_source_type": "http", + "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", + "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Rno)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"Rat Gene Identifier\": .primaryKey }", + "versioning_type": "rest", + "version_url": "https://www.alliancegenome.org/api/releaseInfo", + "version_attribute": ".releaseVersion" + }, + { + "data_set": "Mouse Gene Identifier", + "data_source": "Alliance Genome", + "genomic_unit_type": "gene", + "annotation_source_type": "http", + "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", + "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Mmu)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"Mouse Gene Identifier\": .primaryKey }", + "versioning_type": "rest", + "version_url": "https://www.alliancegenome.org/api/releaseInfo", + "version_attribute": ".releaseVersion" + }, + { + "data_set": "Zebrafish Gene Identifier", + "data_source": "Alliance Genome", + "genomic_unit_type": "gene", + "annotation_source_type": "http", + "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", + "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Dre)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"Zebrafish Gene Identifier\": .primaryKey }", + "versioning_type": "rest", + "version_url": "https://www.alliancegenome.org/api/releaseInfo", + "version_attribute": ".releaseVersion" + }, + { + "data_set": "C-Elegens Gene Identifier", + "data_source": "Alliance Genome", + "genomic_unit_type": "gene", + "annotation_source_type": "http", + "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", + "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Cel)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"C-Elegens Gene Identifier\": .primaryKey }", + "versioning_type": "rest", + "version_url": "https://www.alliancegenome.org/api/releaseInfo", + "version_attribute": ".releaseVersion" + }, + + { + "data_set": "Frog_General_Xenbase_Database_url", + "data_source": "Alliance Genome", + "annotation_source_type": "http", + "genomic_unit_type": "gene", + "url": "https://www.alliancegenome.org/api/search?q={gene}", + "attribute": "[.results[] | select ( .species | contains(\"Xenopus laevis\")) | .modCrossRefCompleteUrl][0] | { \"Frog_General_Xenbase_Database_url\": .}", + "versioning_type": "rest", + "version_url": "https://www.alliancegenome.org/api/releaseInfo", + "version_attribute": ".releaseVersion" + }, + { + "data_set": "GTEx_Human_Gene_Expression_url", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "https://gtexportal.org/home/gene/{gene}", + "attribute": "{ \"GTEx_Human_Gene_Expression_url\": .GTEx_Human_Gene_Expression_url }", + "versioning_type": "rosalution" + }, + { + "data_set": "Human_Protein_Atlas_Protein_Gene_Search_url", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "https://www.proteinatlas.org/search/{gene}", + "attribute": "{ \"Human_Protein_Atlas_Protein_Gene_Search_url\": .Human_Protein_Atlas_Protein_Gene_Search_url }", + "versioning_type": "rosalution" + }, + { + "data_set": "Pharos_Target_url", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "https://pharos.nih.gov/targets/{gene}", + "attribute": "{ \"Pharos_Target_url\": .Pharos_Target_url }", + "versioning_type": "rosalution" + }, + { + "data_set": "Entrez Gene Id", + "data_source": "Rosalution", + "genomic_unit_type": "gene", + "annotation_source_type": "forge", + "base_string": "{HPO_NCBI_GENE_ID}", + "attribute": "{ \"Entrez Gene Id\": .\"Entrez Gene Id\"| sub( \".*:\"; \"\") }", + "dependencies": ["HPO_NCBI_GENE_ID"], + "versioning_type": "rosalution" + }, { "data_set": "Gene Summary", "data_source": "Alliance Genome", @@ -72,15 +194,6 @@ "dependencies": ["ClinVar_Variantion_Id"], "versioning_type": "rosalution" }, - { - "data_set": "ClinVar_Variantion_Id", - "data_source": "Rosalution", - "genomic_unit_type": "hgvs_variant", - "annotation_source_type": "http", - "attribute": ".[] | select(.colocated_variants != null) | .colocated_variants[] | select(.var_synonyms != null) | .var_synonyms | select( .ClinVar != null ) | select(.ClinVar != []) | .ClinVar[] | select(contains(\"VCV\")) | sub(\"VCV0+\"; \"\") | {\"ClinVar_Variantion_Id\": . } ", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;clinvar=1;", - "versioning_type": "rosalution" - }, { "data_set": "NCBI_gene_url", "data_source": "Rosalution", @@ -111,15 +224,6 @@ "dependencies": ["HPO_NCBI_GENE_ID"], "versioning_type": "date" }, - { - "data_set": "OMIM_gene_search_url", - "data_source": "Rosalution", - "genomic_unit_type": "gene", - "annotation_source_type": "forge", - "base_string": "https://www.omim.org/search?index=entry&start=1&sort=score+desc%2C+prefix_sort+desc&search={gene}", - "attribute": "{ \"OMIM_gene_search_url\": .OMIM_gene_search_url }", - "versioning_type": "rosalution" - }, { "data_set": "HPO", "data_source": "HPO", @@ -130,26 +234,6 @@ "dependencies": ["HPO_NCBI_GENE_ID"], "versioning_type": "date" }, - { - "data_set": "HPO_gene_search_url", - "data_source": "Rosalution", - "genomic_unit_type": "gene", - "annotation_source_type": "forge", - "base_string": "https://hpo.jax.org/app/browse/search?q={gene}&navFilter=all", - "attribute": "{ \"HPO_gene_search_url\": .HPO_gene_search_url }", - "versioning_type": "rosalution" - }, - { - "data_set": "Rat Gene Identifier", - "data_source": "Alliance Genome", - "genomic_unit_type": "gene", - "annotation_source_type": "http", - "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", - "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Rno)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"Rat Gene Identifier\": .primaryKey }", - "versioning_type": "rest", - "version_url": "https://www.alliancegenome.org/api/releaseInfo", - "version_attribute": ".releaseVersion" - }, { "data_set": "Rat_Alliance_Genome_Automated_Summary", "data_source": "Alliance Genome", @@ -186,17 +270,6 @@ "version_url": "https://www.alliancegenome.org/api/releaseInfo", "version_attribute": ".releaseVersion" }, - { - "data_set": "Mouse Gene Identifier", - "data_source": "Alliance Genome", - "genomic_unit_type": "gene", - "annotation_source_type": "http", - "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", - "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Mmu)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"Mouse Gene Identifier\": .primaryKey }", - "versioning_type": "rest", - "version_url": "https://www.alliancegenome.org/api/releaseInfo", - "version_attribute": ".releaseVersion" - }, { "data_set": "Mouse_Alliance_Genome_Automated_Summary", "data_source": "Alliance Genome", @@ -233,17 +306,6 @@ "version_url": "https://www.alliancegenome.org/api/releaseInfo", "version_attribute": ".releaseVersion" }, - { - "data_set": "Zebrafish Gene Identifier", - "data_source": "Alliance Genome", - "genomic_unit_type": "gene", - "annotation_source_type": "http", - "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", - "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Dre)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"Zebrafish Gene Identifier\": .primaryKey }", - "versioning_type": "rest", - "version_url": "https://www.alliancegenome.org/api/releaseInfo", - "version_attribute": ".releaseVersion" - }, { "data_set": "Zebrafish_Alliance_Genome_Automated_Summary", "data_source": "Alliance Genome", @@ -280,17 +342,6 @@ "version_url": "https://www.alliancegenome.org/api/releaseInfo", "version_attribute": ".releaseVersion" }, - { - "data_set": "C-Elegens Gene Identifier", - "data_source": "Alliance Genome", - "genomic_unit_type": "gene", - "annotation_source_type": "http", - "url": "https://www.alliancegenome.org/api/search_autocomplete?q={gene}", - "attribute": ".results[] | { \"name_key\": .name_key, \"name\": .name, \"primaryKey\": .primaryKey, \"searchKey\": \"{gene} (Cel)\" } | . + { \"searchKey\": .searchKey | ascii_downcase, \"name_key\": .name_key | ascii_downcase } | select( .name_key == .searchKey) | { \"C-Elegens Gene Identifier\": .primaryKey }", - "versioning_type": "rest", - "version_url": "https://www.alliancegenome.org/api/releaseInfo", - "version_attribute": ".releaseVersion" - }, { "data_set": "C-Elegens_Alliance_Genome_Automated_Summary", "data_source": "Alliance Genome", @@ -327,28 +378,16 @@ "version_url": "https://www.alliancegenome.org/api/releaseInfo", "version_attribute": ".releaseVersion" }, - { - "data_set": "transcript_id", - "data_source": "Ensembl", - "genomic_unit_type": "hgvs_variant", - "transcript": true, - "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", - "attribute": ".[].transcript_consequences[] | { transcript_id: .transcript_id }", - "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", - "version_attribute": ".releases[]" - }, { "data_set": "Polyphen Prediction", "data_source": "Ensembl", "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { polyphen_prediction: .polyphen_prediction, transcript_id: .transcript_id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -357,10 +396,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { polyphen_score: .polyphen_score, transcript_id: .transcript_id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -369,10 +408,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { sift_prediction: .sift_prediction, transcript_id: .transcript_id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -381,10 +420,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { sift_score: .sift_score, transcript_id: .transcript_id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -393,10 +432,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { consequence_terms: .consequence_terms, transcript_id: .transcript_id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -404,11 +443,11 @@ "data_source": "Ensembl", "genomic_unit_type": "hgvs_variant", "annotation_source_type": "http", - "url": "https://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;", + "url": "https://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;CADD=1;refseq=1;", "attribute": ".[].transcript_consequences[] | select( .transcript_id | contains(\"{transcript}\") ) | { CADD: .cadd_phred }", "dependencies": ["transcript"], "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -417,10 +456,10 @@ "genomic_unit_type": "hgvs_variant", "transcript": true, "annotation_source_type": "http", - "url": "http://grch37.rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", + "url": "http://rest.ensembl.org/vep/human/hgvs/{hgvs_variant}?content-type=application/json;refseq=1;", "attribute": ".[].transcript_consequences[] | { impact: .impact, transcript_id: .transcript_id }", "versioning_type": "rest", - "version_url": "https://grch37.rest.ensembl.org/info/data/?content-type=application/json", + "version_url": "https://rest.ensembl.org/info/data/?content-type=application/json", "version_attribute": ".releases[]" }, { @@ -520,43 +559,5 @@ "versioning_type": "rest", "version_url": "https://www.alliancegenome.org/api/releaseInfo", "version_attribute": ".releaseVersion" - }, - { - "data_set": "Frog_General_Xenbase_Database_url", - "data_source": "Alliance Genome", - "annotation_source_type": "http", - "genomic_unit_type": "gene", - "url": "https://www.alliancegenome.org/api/search?q={gene}", - "attribute": "[.results[] | select ( .species | contains(\"Xenopus laevis\")) | .modCrossRefCompleteUrl][0] | { \"Frog_General_Xenbase_Database_url\": .}", - "versioning_type": "rest", - "version_url": "https://www.alliancegenome.org/api/releaseInfo", - "version_attribute": ".releaseVersion" - }, - { - "data_set": "GTEx_Human_Gene_Expression_url", - "data_source": "Rosalution", - "genomic_unit_type": "gene", - "annotation_source_type": "forge", - "base_string": "https://gtexportal.org/home/gene/{gene}", - "attribute": "{ \"GTEx_Human_Gene_Expression_url\": .GTEx_Human_Gene_Expression_url }", - "versioning_type": "rosalution" - }, - { - "data_set": "Human_Protein_Atlas_Protein_Gene_Search_url", - "data_source": "Rosalution", - "genomic_unit_type": "gene", - "annotation_source_type": "forge", - "base_string": "https://www.proteinatlas.org/search/{gene}", - "attribute": "{ \"Human_Protein_Atlas_Protein_Gene_Search_url\": .Human_Protein_Atlas_Protein_Gene_Search_url }", - "versioning_type": "rosalution" - }, - { - "data_set": "Pharos_Target_url", - "data_source": "Rosalution", - "genomic_unit_type": "gene", - "annotation_source_type": "forge", - "base_string": "https://pharos.nih.gov/targets/{gene}", - "attribute": "{ \"Pharos_Target_url\": .Pharos_Target_url }", - "versioning_type": "rosalution" } ] \ No newline at end of file