Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
dada2: shed.yml switch owner
Browse files Browse the repository at this point in the history
bernt-matthias committed Jul 6, 2019
1 parent 07616d6 commit eb326e3
Showing 2 changed files with 26 additions and 27 deletions.
51 changes: 25 additions & 26 deletions data_managers/data_manager_dada2/data_manager/data_manager.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,21 @@
import argparse
import json
import os
import shutil
import sys
import zipfile
try:
# For Python 3.0 and later
from urllib.request import Request, urlopen
except ImportError:
# Fall back to Python 2 imports
from urllib2 import Request, urlopen

DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species"
DEFAULT_TAXLEVELS = "Kingdom,Phylum,Class,Order,Family,Genus,Species"

FILE2NAME = {
"silva_132":"Silva version 132",
"silva_128":"Silva version 128",
"rdp_16":"RDP trainset 16",
"rdp_14":"RDP trainset 14",
"greengenes_13.84":"GreenGenes version 13.84",
"silva_132": "Silva version 132",
"silva_128": "Silva version 128",
"rdp_16": "RDP trainset 16",
"rdp_14": "RDP trainset 14",
"greengenes_13.84": "GreenGenes version 13.84",
"unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi",
"unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons",
"RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)",
@@ -29,13 +26,13 @@
}

FILE2TAXURL = {
"silva_132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
"silva_128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
"rdp_16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
"rdp_14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
"silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
"silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
"rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
"rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
"unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip",
"unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip",
"greengenes_13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
"greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
"RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1",
"gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1",
"hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1",
@@ -44,16 +41,17 @@
}

FILE2SPECIESURL = {
"silva_132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
"silva_128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
"rdp_16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
"rdp_14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
"silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
"silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
"rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
"rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
}

FILE2TAXLEVELS = {
"PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"
}


def url_download(url, fname, workdir):
"""
download url to workdir/fname
@@ -77,7 +75,7 @@ def url_download(url, fname, workdir):
if src:
src.close()

#special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta)
# special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta)
if fname.startswith("unite"):
import glob
import gzip
@@ -88,9 +86,9 @@ def url_download(url, fname, workdir):
zip_ref.extractall(workdir)
zip_ref.close()
# gzip top level fasta file
fastas = glob.glob("%s/*fasta"%workdir)
fastas = glob.glob("%s/*fasta" % workdir)
if len(fastas) != 1:
msg = "UNITE download %s contained %d fasta file(s): %s"%(url, len(fastas), " ".join(fastas))
msg = "UNITE download %s contained %d fasta file(s): %s" % (url, len(fastas), " ".join(fastas))
raise Exception(msg)
with open(fastas[0], 'rb') as f_in:
with gzip.open(file_path, 'wb') as f_out:
@@ -104,27 +102,28 @@ def remote_dataset(dataset, outjson):

workdir = params['output_data'][0]['extra_files_path']
os.mkdir(workdir)
url_download( FILE2TAXURL[dataset], dataset+".taxonomy", workdir)
url_download( FILE2TAXURL[dataset], dataset + ".taxonomy", workdir)

data_manager_json = {"data_tables":{}}
data_manager_json = {"data_tables": {}}
data_manager_entry = {}
data_manager_entry['value'] = dataset
data_manager_entry['name'] = FILE2NAME[dataset]
data_manager_entry['path'] = dataset+".taxonomy"
data_manager_entry['path'] = dataset + ".taxonomy"
data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry

if FILE2SPECIESURL.get(dataset, False ):
url_download( FILE2SPECIESURL[dataset], dataset+".species", workdir)
url_download( FILE2SPECIESURL[dataset], dataset + ".species", workdir)
data_manager_entry = {}
data_manager_entry['value'] = dataset
data_manager_entry['name'] = FILE2NAME[dataset]
data_manager_entry['path'] = dataset+".species"
data_manager_entry['path'] = dataset + ".species"
data_manager_json["data_tables"]["dada2_species"] = data_manager_entry

with file(outjson, 'w') as jf:
jf.write(json.dumps(data_manager_json))


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Create data manager json.')
parser.add_argument('--out', action='store', help='JSON filename')
2 changes: 1 addition & 1 deletion tools/dada2/.shed.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: dada2
owner: matthias
owner: iuc
categories:
- Metagenomics
description: DADA2 wrappers

0 comments on commit eb326e3

Please sign in to comment.