Skip to content

Commit

Permalink
version 2.1.10
Browse files Browse the repository at this point in the history
  • Loading branch information
PollyTikhonova committed Jan 29, 2020
1 parent f14702f commit a6ba13c
Show file tree
Hide file tree
Showing 17 changed files with 217 additions and 43 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Phigaro v2.1.9
Phigaro is a command-line tool for predicting and annotating phages and prophages from nucleid acid sequences (including metagenome assemblies) and is based on identifying phage genes via pVOG profile HMMs and a smoothing window algorithm. https://doi.org/10.1101/598243
# Phigaro v2.1.10


## Requirements
Expand Down Expand Up @@ -135,4 +134,4 @@ Phigaro is tested on Linux systems. For MacOS, you may need to add the following
## Publication
Elizaveta V. Starikova, Polina O. Tikhonova, Nikita A. Prianichnikov, Chris M. Rands, Evgeny M. Zdobnov, Vadim M. Govorun (2019), Phigaro: high throughput prophage sequence annotation, bioRxiv 598243; doi: https://doi.org/10.1101/598243

(c) E.Starikova, P. Tikhonova, N.Pryanichnikov, 2019
(c) E.Starikova, P. Tikhonova, N.Pryanichnikov, 2019
Binary file added dist_old_versions/phigaro-0.2.1.8.tar.gz
Binary file not shown.
Binary file added dist_old_versions/phigaro-2.1.9.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion phigaro/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Version information."""

# The following line *must* be the last in the module, exactly as formatted:
__version__ = "0.2.1.8_3"
__version__ ="2.1.10"
18 changes: 11 additions & 7 deletions phigaro/batch/task/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

from .base import AbstractTask
from Bio import SeqIO
from phigaro.context import Context

class PreprocessTask(AbstractTask):
task_name = 'input_file'

def __init__(self, input):
super().__init__()
self.input = input
self.context = Context.instance()

def check_fastafile(self):
def get_users_answer(question):
Expand All @@ -32,17 +34,19 @@ def get_users_answer(question):
if len(record) < 20000:
sequences_to_delete.append(record.id)
else:
self.context.scaffolds_info[record.id] = len(record)
records_to_save.append(record)
SeqIO.write(records_to_save, self.output(), "fasta")
del records_to_save

if len(sequences_to_delete) > 0:
print(
'Error! Your fasta file contains at least one sequence length < 20000. The short sequences are: ')
print('\n'.join(sequences_to_delete))
if not get_users_answer('Do you want to start Phigaro without these sequences?'):
self.clean()
exit(1)
if not self.config['phigaro']['delete_shorts']:
if len(sequences_to_delete) > 0:
print(
'Error! Your fasta file contains at least one sequence length < 20000. The short sequences are: ')
print('\n'.join(sequences_to_delete))
if not get_users_answer('Do you want to start Phigaro without these sequences?'):
self.clean()
exit(1)

def output(self):
return self.file('{}.fasta'.format(self.sample))
Expand Down
49 changes: 48 additions & 1 deletion phigaro/batch/task/run_phigaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from Bio.SeqRecord import SeqRecord
from phigaro.to_html.preprocess import plot_html, form_sequence, if_transposable
from phigaro.to_html.html_formation import form_html_document
from phigaro.context import Context

class RunPhigaroTask(AbstractTask):
task_name = 'run_phigaro'
Expand All @@ -39,6 +40,7 @@ def _prepare(self):
self._save_fasta = self.config['phigaro'].get('save_fasta', False)
self._output = self.config['phigaro'].get('output', False)
self._uuid = self.config['phigaro'].get('uuid', False)
self.context = Context.instance()

def output(self):
return self.file('{}.tsv'.format(self.sample))
Expand All @@ -47,13 +49,22 @@ def run(self):
max_evalue = self.config['hmmer']['e_value_threshold']
penalty_black = self.config['phigaro']['penalty_black']
penalty_white = self.config['phigaro']['penalty_white']
gff = self.config['phigaro']['gff']
bed = self.config['phigaro']['bed']

pvogs_black_list = const.DEFAULT_BLACK_LIST
pvogs_white_list = const.DEFAULT_WHITE_LIST

scaffold_set = read_prodigal_output(self.prodigal_task.output())
hmmer_result = read_hmmer_output(self.hmmer_task.output())

gff_base = ['##gff-version 3.2.1']
gff_scaffold = []
gff_prophage = []
gff_gene = []
bed_prophage = []
bed_gene = []

with open(self.output(), 'w') as of:
writer = csv.writer(of, delimiter='\t')

Expand All @@ -67,22 +78,50 @@ def run(self):
transposables_status = []
phage_num = 0
for scaffold in scaffold_set:
if gff:
gff_scaffold.append('##sequence-region %s 1 %d'%(scaffold.name, self.context.scaffolds_info[scaffold.name]))
phage_info.append([scaffold.name, []])
genes = list(scaffold) # type: list[Gene]
npn = hmmer_res_to_npn(scaffold, hmmer_result, max_evalue=max_evalue,
penalty_black = penalty_black, penalty_white = penalty_white,
pvogs_black_list = pvogs_black_list, pvogs_white_list = pvogs_white_list)
gc = hmmer_res_to_gc(scaffold, hmmer_result, max_evalue=max_evalue)

phages = self.finder.find_phages(npn, gc)
for phage in phages:
phage_num += 1
begin = genes[phage.begin].begin
end = genes[phage.end].end
phage_genes = genes[phage.begin:(phage.end+1)]
if gff:
gff_prophage.append('\t'.join([scaffold.name, '.', 'prophage',
str(begin+1), str(end+1),
'.', '.', '.',
'ID=prophage%d'%phage_num]))
if bed:
bed_prophage.append('\t'.join([scaffold.name,
str(begin), str(end),
'prophage%d' % phage_num,
'.', '.']))
hmmer_records = [
hmmer_result.min_record(hmmer_result.get_records(scaffold.name, gene.name))
for gene in genes[phage.begin: phage.end]
]
if gff or bed:
for record in phage_genes:
hmmer_record = hmmer_result.min_record(hmmer_result.get_records(scaffold.name, record.name))
pvog = '.' if hmmer_record is None else hmmer_record.vog_name
evalue = '.' if hmmer_record is None else hmmer_record.evalue
if gff:
gff_gene.append('\t'.join([scaffold.name, '.', 'gene',
str(record.begin+1), str(record.end+1),
str(evalue), '+' if record.strand>0 else '-', '.',
'ID=gene%s;Parent=prophage%d;pvog=%s'%(record.name, phage_num, pvog)]))
if bed:
bed_gene.append('\t'.join([scaffold.name,
str(record.begin), str(record.end+1),
'gene%s' % (record.name),
str(evalue), '+' if record.strand > 0 else '-']))

hmmer_pvogs_records = (
record.vog_name
for record in hmmer_records
Expand Down Expand Up @@ -112,6 +151,14 @@ def run(self):
if (not self._no_html) and (self._output):
plotly_plots.append(plot_html(hmmer_records, begin, end))
phage_info = phage_info if len(phage_info[-1][1]) > 0 else phage_info[:-1]

if gff:
with open(self._output+'.gff3', 'w') as f:
f.write('\n'.join(gff_base+gff_scaffold+gff_prophage+gff_gene))
if bed:
with open(self._output+'.bed', 'w') as f:
f.write('\n'.join(gff_base+bed_prophage+bed_gene))

if (len(phage_info) > 0) and (not self._no_html) and (self._output):
html = form_html_document(phage_info, transposables_status, plotly_plots, self._filename, self._uuid)
with open(self._output+'.html', 'w') as f:
Expand Down
19 changes: 14 additions & 5 deletions phigaro/cli/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def main():
version='%(prog)s {version}'.format(version=__version__))
parser.add_argument('-f', '--fasta-file', help='Assembly scaffolds/contigs or full genomes, required',
required=True)
parser.add_argument('-c', '--config', default=default_config_path, help='Config file, not required')
parser.add_argument('-c', '--config', default=default_config_path, help='Path to the config file, not required')
parser.add_argument('-v', '--verbose', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('-p', '--print-vogs', help='Print phage vogs for each region', action='store_true')
parser.add_argument('-e', '--extension', default=['html'], nargs='+', help='Type of the output: html, txt or stdout. Default is html. You can specify several file formats with a space as a separator. Example: -e txt html stdout.')
parser.add_argument('-e', '--extension', default=['html'], nargs='+', help='Type of the output: html, txt, gff, bed or stdout. Default is html. You can specify several file formats with a space as a separator. Example: -e txt html stdout.')
parser.add_argument('-o', '--output', default=False, help='Output filename for html and txt outputs. Required by default, but not required for stdout only output.')
parser.add_argument('--not-open', help='Do not open html file automatically, if html output type is specified.', action='store_true')
parser.add_argument('-t', '--threads',
Expand All @@ -77,7 +77,8 @@ def main():
help='Num of threads ('
'default is num of CPUs={})'.format(multiprocessing.cpu_count()))
parser.add_argument('--no-cleanup', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('-S', '--substitute-output', action='append', help=argparse.SUPPRESS)
parser.add_argument('-S', '--substitute-output', action='append', help='If you have precomputed prodigal and/or hmmer data you can provide paths to the files in the following format: program:address/to/the/file. In place of program you should write hmmer or prodigal. If you need to provide both files you should pass them separetely as two parametres.')
parser.add_argument('-d', '--delete-shorts', action='store_true', help='Exclude sequences with length < 20000 automatically.')

args = parser.parse_args()

Expand Down Expand Up @@ -106,13 +107,20 @@ def main():
config['phigaro']['not_open'] = args.not_open
config['phigaro']['output'] = args.output
config['phigaro']['uuid'] = uuid.uuid4().hex

config['phigaro']['delete_shorts'] = args.delete_shorts
config['phigaro']['gff'] = True if ('gff' in args.extension) else False
config['phigaro']['bed'] = True if ('bed' in args.extension) else False
filename = args.fasta_file
sample = '{}-{}'.format(
sample_name(filename),
config['phigaro']['uuid']
)

if config['phigaro']['output'] is not None:
fold = '/'.join(config['phigaro']['output'].replace('\\', '/').split('/')[:-1])
if not os.path.isdir(fold):
os.mkdir(fold)

Context.initialize(
sample=sample,
config=config,
Expand Down Expand Up @@ -147,8 +155,9 @@ def main():

if ('txt' in args.extension) or ('stdout' in args.extension):
with open(task_output_file) as f:
f = list(f)
if 'txt' in args.extension:
out_f = open(args.output, 'w')
out_f = open(args.output+'.tsv', 'w')
for line in f:
out_f.write(line)
if 'stdout' in args.extension:
Expand Down
3 changes: 2 additions & 1 deletion phigaro/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
class Context(object):
def __init__(self, sample, threads, config):
"""
:type sample: str
:type config: dict
:type threads: int
:type threads: dict
"""
self.threads = threads
self.sample = sample
self.config = config
self.scaffolds_info = {}
12 changes: 7 additions & 5 deletions phigaro/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,12 @@ def hmmer_res_to_gc(scaffold, hmmer_result, max_evalue):


class Gene(object):
def __init__(self, name, begin, end, scaffold=None):
def __init__(self, name, begin, end, strand, scaffold=None):
# type: (str, int, int, Scaffold|None)->Gene
self.name = name
self.begin = begin
self.end = end
self.strand = strand
self.scaffold = scaffold


Expand Down Expand Up @@ -166,9 +167,9 @@ def parse_line(line):
vog_name=tokens[2],
evalue=float(tokens[4]),
gc_cont=gc_cont,
begin = int(begin),
end = int(end),
strand = int(strand)
begin=int(begin),
end=int(end),
strand=int(strand)
)

with open(file_path) as f:
Expand All @@ -192,7 +193,8 @@ def extract_coords_and_name(gene_str):
return Gene(
name=tokens[0],
begin=int(tokens[1]),
end=int(tokens[2])
end=int(tokens[2]),
strand=int(tokens[3])
)

def parse_gene_records(gene_records):
Expand Down
19 changes: 19 additions & 0 deletions prepare_and_publish_package.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
echo "Did you change git action and updated the package?"
pause
git pull
move /-Y dist\*.* dist_old_versions\
C:\Users\tikho\Anaconda3\python.exe setup_version.py
pause
C:\Users\tikho\Anaconda3\python.exe setup.py sdist
pause
git add --all
git status
pause
set /P commit_name="Please, make up the commit name:"
set /p tag=<tag_name
git commit -m %commit_name%
git tag %tag%
git push
git push --tags
"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe" https://github.com/bobeobibo/phigaro/actions
pause
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
description='Phigaro is a scalable command-line tool for predictions phages and prophages '
'from nucleid acid sequences (including metagenomes) and '
'is based on phage genes HMMs and a smoothing window algorithm.',
version="2.1.9",
version="2.1.10",
license='MIT',
author='E.Starikova, N.Pryanichnikov, P.Tikhonova',
author_email='[email protected]',
Expand All @@ -15,7 +15,7 @@
'phigaro.batch',
'phigaro.batch.task',
'phigaro.misc',
'phigaro.to_html',
'phigaro.to_html',
],
package_data = {
'': ['*.pickle', 'README.md']
Expand All @@ -26,4 +26,4 @@
'phigaro-setup = phigaro.cli.helper:main',
]
}, install_requires=['numpy', 'six>=1.7.0', 'pandas>=0.23.4','sh', 'singleton', 'PyYAML>=5.1', 'future', 'argparse', 'numpy', 'plotly', 'bs4', 'beautifulsoup4>=4.4.0', 'lxml','biopython']
)
)
Loading

0 comments on commit a6ba13c

Please sign in to comment.