-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #151 from sanger-tol/add_ensembl_metadata_check
Add ensembl metadata check
- Loading branch information
Showing
6 changed files
with
139 additions
and
3 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
import sys | ||
import requests | ||
import argparse | ||
|
||
|
||
def parse_args(args=None): | ||
Description = "Query the Ensembl Metadata API to pull out annotation information required by a genome note." | ||
Epilog = "Example usage: python fetch_ensembl_metadata.py --taxon_id --output" | ||
|
||
parser = argparse.ArgumentParser(description=Description, epilog=Epilog) | ||
parser.add_argument("--taxon_id", required=True, help="The species taxon id") | ||
parser.add_argument("--output", required=True, help="Output file path") | ||
parser.add_argument("--version", action="version", version="%(prog)s 1.0") | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def make_dir(path): | ||
if len(path) > 0: | ||
os.makedirs(path, exist_ok=True) | ||
|
||
|
||
def fetch_ensembl_data(taxon, output_file): | ||
# Use the species taxon_id to query the Ensembl Metadata API to determine if the | ||
# species has been annotated. Return assmbly accesssion of annotated data and | ||
# a url linking to that species on the Ensembl Rapid website | ||
|
||
url = "https://beta.ensembl.org/data/graphql" | ||
variables = {"taxon": taxon} | ||
query = """ | ||
query Annotation($taxon: String) | ||
{ | ||
genomes(by_keyword: {species_taxonomy_id: $taxon }) { | ||
assembly_accession | ||
scientific_name | ||
tol_id | ||
dataset { | ||
name | ||
type | ||
dataset_type | ||
} | ||
genome_id | ||
} | ||
} | ||
""" | ||
response = requests.post(url=url, json={"query": query, "variables": variables}) | ||
|
||
if response.status_code == 200: | ||
param_list = [] | ||
data = response.json() | ||
if data["data"]["genomes"] is not None: | ||
genomes = data["data"]["genomes"][0] | ||
|
||
if genomes["assembly_accession"]: | ||
accession = genomes["assembly_accession"] | ||
acc = f'"{accession}"' | ||
param_list.append(("ANNOT_ACCESSION", acc)) | ||
species_id = genomes["genome_id"] | ||
annot_url = f"https://beta.ensembl.org/species/{species_id}" | ||
annot_url = f'"{annot_url}"' | ||
param_list.append(("ANNOT_URL", annot_url)) | ||
|
||
# Write out file even if there is no annotation data to write | ||
out_dir = os.path.dirname(output_file) | ||
make_dir(out_dir) # Create directory if it does not exist | ||
|
||
with open(output_file, "w") as fout: | ||
# Write header | ||
fout.write(",".join(["#paramName", "paramValue"]) + "\n") | ||
|
||
for param_pair in param_list: | ||
fout.write(",".join(param_pair) + "\n") | ||
|
||
return output_file | ||
|
||
|
||
def main(args=None): | ||
args = parse_args(args) | ||
fetch_ensembl_data(args.taxon_id, args.output) | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
process FETCH_ENSEMBL_METADATA { | ||
tag "$assembly" | ||
label 'process_single' | ||
|
||
conda "conda-forge::python=3.9.1" | ||
|
||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/requests:2.26.0': | ||
'quay.io/biocontainers/requests:2.26.0'}" | ||
|
||
input: | ||
tuple val(assembly), val(taxon_id) | ||
|
||
|
||
output: | ||
path "*.csv", emit: file_path | ||
path "versions.yml", emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def script_name = "fetch_ensembl_metadata.py" | ||
def output_file = "${assembly}_ensembl_annotation.csv" | ||
|
||
""" | ||
$script_name --taxon_id $taxon_id --output $output_file | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
fetch_ensembl_metadata.py: \$(fetch_ensembl_metadata.py --version | cut -d' ' -f2) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters