Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convenience PR for Viewer code #48

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ distribute-*.egg
dammit.egg-info/
*.so
*~
.tags*
6 changes: 5 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,9 @@ include setup.cfg
include LICENSE
include distribute_setup.py
include dammit/VERSION
include bin/dammit
include bin/dammit*
recursive-include dammit .*.json
recursive-include dammit/viewer/templates *.html
recursive-include dammit/viewer/templates *.js
recursive-include dammit/viewer/static *.css
recursive-include dammit/viewer/static *.js
28 changes: 28 additions & 0 deletions bin/dammit-view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python
from __future__ import print_function

import argparse
from flask import Flask, g
from dammit import common
from dammit.viewer import transcript_pane
from dammit.viewer.database import db
from dammit.viewer import static_folder, template_folder
import os

DIRECTORY = None
ZODB_STORAGE = None

if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--directory')
args = parser.parse_args()

DIRECTORY = os.path.abspath(args.directory)
ZODB_STORAGE = 'file://' + os.path.join(DIRECTORY, common.CONFIG['settings']['database_filename'])

app = Flask(__name__, static_folder=static_folder, template_folder=template_folder)
app.config.from_object(__name__)
app.register_blueprint(transcript_pane.views)
db.init_app(app)

app.run(host='0.0.0.0', port=5001, debug=True)
4 changes: 3 additions & 1 deletion dammit/.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
"dammit_dir": ".dammit",
"dep_dir": "dependencies",
"db_dir": "databases",

"summary_filename": "dammit.summary.json",
"database_filename": "dammit.database.fs",

"blast": {
"evalue": 0.000001,
"params": ""
Expand Down
7 changes: 7 additions & 0 deletions dammit/.databases.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@
"url": "ftp://cegg.unige.ch/OrthoDB8/Eukaryotes/Genes_to_OGs/ODB8_EukOGs_genes_ALL_levels.txt.gz"
},

"ncbi.tax": {
"access": "download",
"db_type": "json",
"filename": "ncbi_taxonomy.json",
"url": "https://s3-us-west-1.amazonaws.com/json-taxonomies/ncbi_taxonomy.json"
},

"busco": {

"metazoa": {
Expand Down
20 changes: 11 additions & 9 deletions dammit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
from .hits import BestHits
from .crbl import CRBL

import parsers
import gff
import blast
import tasks
import fileio

import annotate
import databases
import dependencies
import common
import report
from . import parsers
from . import gff
from . import blast
from . import tasks

from . import annotate
from . import databases
from . import dependencies
from . import common
from . import report

import os
rel_path = os.path.dirname(__file__)
Expand Down
109 changes: 64 additions & 45 deletions dammit/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
from platform import system
import sys

from doit.task import Task

from . import common
from .log import LogReporter
#from .crbl import CRBL
from .report import get_report_tasks
from .tasks import get_transcriptome_stats_task, \
from .tasks import get_summary_task, \
get_busco_task, \
get_group_task, \
get_link_file_task, \
Expand All @@ -24,6 +26,7 @@
get_sanitize_fasta_task, \
get_rename_transcriptome_task, \
get_transeq_task, \
get_create_zodb_task, \
print_tasks

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -61,16 +64,14 @@ def _init_filenames(self):
else:
out_dir = self.args.output_dir
self.directory = os.path.abspath(out_dir)

self.stats_fn = self.transcriptome_fn + '.stats.json'

self.busco_basename = '{0}.{1}.busco.results'.format(self.transcriptome_fn,
self.args.busco_group)
self.busco_dir = 'run_{0}'.format(self.busco_basename)
busco_summary_fn = 'short_summary_{0}'.format(self.transcriptome_fn)
busco_summary_fn = 'short_summary_{0}'.format(self.busco_basename)
self.busco_summary_fn = os.path.join(self.busco_dir,
busco_summary_fn)

self.translated_fn = '{0}.pep'.format(self.transcriptome_fn)

self.transdecoder_dir = '{0}.transdecoder_dir'.format(self.transcriptome_fn)
Expand All @@ -81,13 +82,20 @@ def _init_filenames(self):
self.transdecoder_pfam_fn = '{0}.pfam.tbl'.format(self.transdecoder_orf_fn)
self.transdecoder_pep_fn = '{0}.transdecoder.pep'.format(self.transcriptome_fn)
self.transdecoder_gff3_fn = '{0}.transdecoder.gff3'.format(self.transcriptome_fn)

self.pfam_fn = '{0}.pfam.csv'.format(self.transcriptome_fn)
self.rfam_fn = '{0}.rfam.tbl'.format(self.transcriptome_fn)

self.orthodb_fn = '{0}.x.orthodb.maf'.format(self.transcriptome_fn)
self.uniref_fn = '{0}.x.uniref.maf'.format(self.transcriptome_fn)

self.final_gff3_fn = '{0}.dammit.gff3'.format(self.transcriptome_fn)
self.final_fasta_fn = '{0}.dammit.fasta'.format(self.transcriptome_fn)
self.final_transcript_fn = '{0}.dammit.json'.format(self.transcriptome_fn)
self.transcript_info_fn = '{0}.dammit.info.csv'.format(self.transcriptome_fn)
self.summary_fn = common.CONFIG['settings']['summary_filename']
self.database_fn = common.CONFIG['settings']['database_filename']

self.user_pep_fn_dict = {}


Expand Down Expand Up @@ -117,8 +125,8 @@ def run_tasks(self, doit_args=['run']):
os.makedirs(self.directory)
os.chdir(self.directory)

common.run_tasks(self.tasks,
doit_args,
common.run_tasks(self.tasks,
doit_args,
config=self.doit_config)
finally:
self.logger.debug('chdir: {0}'.format(cwd))
Expand All @@ -130,27 +138,17 @@ def rename_task(self):
self.names_fn,
self.args.name)

def stats_task(self):
'''Calculate assembly information. First it runs some basic stats like N50 and
number of contigs, and uses the HyperLogLog counter from khmer to
estimate unique k-mers for checking redundancy. Then it runs BUSCO to
assess completeness. These tasks are grouped under the 'assess' task.
'''

return get_transcriptome_stats_task(self.transcriptome_fn,
self.stats_fn)

def busco_task(self):
'''BUSCO assesses completeness using a series of curated databases of core
conserved genes.
'''

busco_cfg = common.CONFIG['settings']['busco']
return get_busco_task(self.transcriptome_fn,
self.busco_basename,
return get_busco_task(self.transcriptome_fn,
self.busco_basename,
self.database_dict['BUSCO'],
'trans',
self.args.n_threads,
'trans',
self.args.n_threads,
busco_cfg)

def transeq_task(self):
Expand All @@ -170,22 +168,22 @@ def transdecoder_tasks(self):


orf_cfg = common.CONFIG['settings']['transdecoder']['longorfs']
yield get_transdecoder_orf_task(self.transcriptome_fn,
yield get_transdecoder_orf_task(self.transcriptome_fn,
orf_cfg)

yield get_hmmscan_task(self.transdecoder_orf_fn,
yield get_hmmscan_task(self.transdecoder_orf_fn,
self.transdecoder_pfam_fn,
self.database_dict['PFAM'],
self.database_dict['PFAM'],
self.args.evalue,
self.args.n_threads,
self.args.n_threads,
common.CONFIG['settings']['hmmer']['hmmscan'])

yield get_remap_hmmer_task(self.transdecoder_pfam_fn,
self.transdecoder_orf_gff3_fn,
self.pfam_fn)

predict_cfg = common.CONFIG['settings']['transdecoder']['predict']
yield get_transdecoder_predict_task(self.transcriptome_fn,
yield get_transdecoder_predict_task(self.transcriptome_fn,
self.transdecoder_pfam_fn,
predict_cfg)

Expand All @@ -196,26 +194,26 @@ def cmscan_task(self):
'''

cmscan_cfg = common.CONFIG['settings']['infernal']['cmscan']
return get_cmscan_task(self.transcriptome_fn,
return get_cmscan_task(self.transcriptome_fn,
self.rfam_fn,
self.database_dict['RFAM'],
self.database_dict['RFAM'],
self.args.evalue,
self.args.n_threads,
self.args.n_threads,
cmscan_cfg)

def orthodb_task(self):
'''Run LAST to get homologies with OrthoDB. We use LAST here because
it is much faster than BLAST+, and OrthoDB is pretty huge.
'''

lastal_cfg = common.CONFIG['settings']['last']['lastal']
orthodb = self.database_dict['ORTHODB']
return get_lastal_task(self.transcriptome_fn,
orthodb,
self.orthodb_fn,
return get_lastal_task(self.transcriptome_fn,
orthodb,
self.orthodb_fn,
True,
self.args.evalue,
self.args.n_threads,
self.args.n_threads,
lastal_cfg)

def uniref_task(self):
Expand Down Expand Up @@ -243,17 +241,37 @@ def user_crb_tasks(self):

fn = '{0}.x.{1}.crbb.tsv'.format(self.transcriptome_fn, key)
self.user_pep_fn_dict[key] = fn
yield get_crb_blast_task(self.transcriptome_fn,
key,
fn,
yield get_crb_blast_task(self.transcriptome_fn,
key,
fn,
self.args.evalue,
crb_blast_cfg,
crb_blast_cfg,
self.args.n_threads)

def summary_task(self):
'''Calculate assembly information. First it runs some basic stats like N50 and
number of contigs, and uses the HyperLogLog counter from khmer to
estimate unique k-mers for checking redundancy. Then it runs BUSCO to
assess completeness. These tasks are grouped under the 'assess' task.
'''

return get_summary_task(self.final_fasta_fn,
self.names_fn,
self.directory,
self.final_gff3_fn,
self.busco_summary_fn,
self.summary_fn,
self.transcript_info_fn)

def create_zodb_task(self):
return get_create_zodb_task(self.final_gff3_fn,
self.transcript_info_fn,
self.database_fn)

def get_tasks(self):

yield self.rename_task()
yield self.stats_task()
#yield self.stats_task()
yield self.busco_task()
#yield self.transeq_task()
for task in self.transdecoder_tasks():
Expand All @@ -265,10 +283,11 @@ def get_tasks(self):
for task in self.user_crb_tasks():
yield task

self.outputs, report_tasks = get_report_tasks(self.transcriptome_fn,
self,
self.database_dict,
n_threads=self.args.n_threads)
report_tasks = get_report_tasks(self.transcriptome_fn,
self,
self.database_dict,
n_threads=self.args.n_threads)
for task in report_tasks:
yield task

yield self.summary_task()
yield self.create_zodb_task()
Loading