From eb326e3ad56ebe4d14a5e2ca9fabd8b0fa2687ae Mon Sep 17 00:00:00 2001 From: M Bernt Date: Fri, 5 Jul 2019 14:59:12 +0200 Subject: [PATCH] dada2: shed.yml switch owner --- .../data_manager/data_manager.py | 51 +++++++++---------- tools/dada2/.shed.yml | 2 +- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/data_managers/data_manager_dada2/data_manager/data_manager.py b/data_managers/data_manager_dada2/data_manager/data_manager.py index e1e0c977723..5a760b8cd70 100644 --- a/data_managers/data_manager_dada2/data_manager/data_manager.py +++ b/data_managers/data_manager_dada2/data_manager/data_manager.py @@ -1,9 +1,6 @@ import argparse import json import os -import shutil -import sys -import zipfile try: # For Python 3.0 and later from urllib.request import Request, urlopen @@ -11,14 +8,14 @@ # Fall back to Python 2 imports from urllib2 import Request, urlopen -DEFAULT_TAXLEVELS="Kingdom,Phylum,Class,Order,Family,Genus,Species" +DEFAULT_TAXLEVELS = "Kingdom,Phylum,Class,Order,Family,Genus,Species" FILE2NAME = { - "silva_132":"Silva version 132", - "silva_128":"Silva version 128", - "rdp_16":"RDP trainset 16", - "rdp_14":"RDP trainset 14", - "greengenes_13.84":"GreenGenes version 13.84", + "silva_132": "Silva version 132", + "silva_128": "Silva version 128", + "rdp_16": "RDP trainset 16", + "rdp_14": "RDP trainset 14", + "greengenes_13.84": "GreenGenes version 13.84", "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi", "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons", "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)", @@ -29,13 +26,13 @@ } FILE2TAXURL = { - "silva_132":"https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1", - "silva_128":"https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1", - "rdp_16":"https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1", - "rdp_14":"https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1", + "silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1", + "silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1", + "rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1", + "rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1", "unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip", "unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip", - "greengenes_13.84":"https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1", + "greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1", "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1", "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1", "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1", @@ -44,16 +41,17 @@ } FILE2SPECIESURL = { - "silva_132":"https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1", - "silva_128":"https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1", - "rdp_16":"https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1", - "rdp_14":"https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1" + "silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1", + "silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1", + "rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1", + "rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1" } FILE2TAXLEVELS = { "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species" } + def url_download(url, fname, workdir): """ download url to workdir/fname @@ -77,7 +75,7 @@ def url_download(url, fname, workdir): if src: src.close() - #special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta) +# special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta) if fname.startswith("unite"): import glob import gzip @@ -88,9 +86,9 @@ def url_download(url, fname, workdir): zip_ref.extractall(workdir) zip_ref.close() # gzip top level fasta file - fastas = glob.glob("%s/*fasta"%workdir) + fastas = glob.glob("%s/*fasta" % workdir) if len(fastas) != 1: - msg = "UNITE download %s contained %d fasta file(s): %s"%(url, len(fastas), " ".join(fastas)) + msg = "UNITE download %s contained %d fasta file(s): %s" % (url, len(fastas), " ".join(fastas)) raise Exception(msg) with open(fastas[0], 'rb') as f_in: with gzip.open(file_path, 'wb') as f_out: @@ -104,27 +102,28 @@ def remote_dataset(dataset, outjson): workdir = params['output_data'][0]['extra_files_path'] os.mkdir(workdir) - url_download( FILE2TAXURL[dataset], dataset+".taxonomy", workdir) + url_download( FILE2TAXURL[dataset], dataset + ".taxonomy", workdir) - data_manager_json = {"data_tables":{}} + data_manager_json = {"data_tables": {}} data_manager_entry = {} data_manager_entry['value'] = dataset data_manager_entry['name'] = FILE2NAME[dataset] - data_manager_entry['path'] = dataset+".taxonomy" + data_manager_entry['path'] = dataset + ".taxonomy" data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS) data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry if FILE2SPECIESURL.get(dataset, False ): - url_download( FILE2SPECIESURL[dataset], dataset+".species", workdir) + url_download( FILE2SPECIESURL[dataset], dataset + ".species", workdir) data_manager_entry = {} data_manager_entry['value'] = dataset data_manager_entry['name'] = FILE2NAME[dataset] - data_manager_entry['path'] = dataset+".species" + data_manager_entry['path'] = dataset + ".species" data_manager_json["data_tables"]["dada2_species"] = data_manager_entry with file(outjson, 'w') as jf: jf.write(json.dumps(data_manager_json)) + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Create data manager json.') parser.add_argument('--out', action='store', help='JSON filename') diff --git a/tools/dada2/.shed.yml b/tools/dada2/.shed.yml index b8a961e4fc9..b86e26a2a61 100644 --- a/tools/dada2/.shed.yml +++ b/tools/dada2/.shed.yml @@ -1,5 +1,5 @@ name: dada2 -owner: matthias +owner: iuc categories: - Metagenomics description: DADA2 wrappers