Skip to content

Commit

Permalink
v1.7
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelpmachado committed Aug 31, 2016
1 parent 5d7d77e commit cdc055c
Show file tree
Hide file tree
Showing 251 changed files with 2,152 additions and 1,142 deletions.
8 changes: 4 additions & 4 deletions INNUca.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
Copyright (C) 2016 Miguel Machado <[email protected]>
Last modified: August 25, 2016
Last modified: August 31, 2016
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -39,7 +39,7 @@


def main():
version = '1.6'
version = '1.7'
args = utils.parseArguments(version)

general_start_time = time.time()
Expand Down Expand Up @@ -94,7 +94,7 @@ def main():
if not args.skipTrimmomatic:
programs_version_dictionary['trimmomatic-0.36.jar'] = ['-version', '==', '0.36']
if not args.skipSPAdes:
programs_version_dictionary['spades.py'] = ['--version', '>=', '3.7.1']
programs_version_dictionary['spades.py'] = ['--version', '>=', '3.9.0']
if not args.skipPilon and not args.skipSPAdes:
programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9']
programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1']
Expand Down Expand Up @@ -257,7 +257,7 @@ def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme):

# Run SPAdes
if not args.skipSPAdes:
run_successfully, pass_qc, time_taken, failing, contigs = spades.runSpades(sampleName, outdir, threads, fastq_files, args.spadesNotUseCareful, args.spadesMaxMemory, args.spadesMinCoverage, args.spadesMinContigsLength, genomeSize, args.spadesKmers, maximumReadsLength, args.spadesSaveReport, args.spadesDefaultKmers)
run_successfully, pass_qc, time_taken, failing, contigs = spades.runSpades(sampleName, outdir, threads, fastq_files, args.spadesNotUseCareful, args.spadesMaxMemory, args.spadesMinCoverageAssembly, args.spadesMinContigsLength, genomeSize, args.spadesKmers, maximumReadsLength, args.spadesSaveReport, args.spadesDefaultKmers, args.spadesMinCoverageContigs)
runs['SPAdes'] = [run_successfully, pass_qc, time_taken, failing]

if run_successfully:
Expand Down
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Dependencies
- *FastQC* = v0.11.5
- *Trimmomatic* = v0.36 (make sure the .jar file is executable and it is
in your PATH)
- *SPAdes* >= v3.7.1
- *SPAdes* >= v3.9.0
- *Pilon* = v1.18
- *Bowtie2* >= v2.2.9
- *Samtools* = v1.3.1
Expand All @@ -55,8 +55,8 @@ Usage
[--trimLeading N] [--trimTrailing N] [--trimKeepFiles]
[--spadesNotUseCareful] [--spadesMinContigsLength N]
[--spadesKmers 55 77 | --spadesDefaultKmers]
[--spadesMaxMemory N] [--spadesMinCoverage 10]
[--spadesSaveReport]
[--spadesMaxMemory N] [--spadesMinCoverageAssembly 10]
[--spadesMinCoverageContigs N] [--spadesSaveReport]
[--pilonKeepFiles] [--pilonKeepSPAdesAssembly]

INNUca - Reads Control and Assembly
Expand Down Expand Up @@ -144,10 +144,14 @@ Usage
this value (default: 200)
--spadesMaxMemory N The maximum amount of RAM Gb for SPAdes to use
(default: 25)
--spadesMinCoverage 10
--spadesMinCoverageAssembly 10
The minimum number of reads to consider an edge in the
de Bruijn graph (or path I am not sure). Can also be
auto or off (default: 'off')
--spadesMinCoverageContigs N
Minimum contigs coverage. After assembly only keep
contigs with reported coverage equal or above this
value (default: 5)
--spadesSaveReport Tells INNUca to store the number of contigs and
assembled nucleotides for each sample
SPAdes k-mers options (one of the following):
Expand Down
28 changes: 16 additions & 12 deletions modules/spades.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import utils
import os
import shutil
from functools import partial


# Run Spades
def spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory, minCoverage, kmers):
def spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory, minCoverageAssembly, kmers):
contigs = os.path.join(spades_folder, 'contigs.fasta')

command = ['spades.py', '', '--only-assembler', '--threads', str(threads), '--memory', str(maxMemory), '--cov-cutoff', str(minCoverage), '', '-1', fastq_files[0], '-2', fastq_files[1], '-o', spades_folder]
command = ['spades.py', '', '--only-assembler', '--threads', str(threads), '--memory', str(maxMemory), '--cov-cutoff', str(minCoverageAssembly), '', '-1', fastq_files[0], '-2', fastq_files[1], '-o', spades_folder]

if not notUseCareful:
command[1] = '--careful'
Expand All @@ -22,7 +23,7 @@ def spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory, minCov


# Rename contigs and contigs.fasta file while filtering for contigs length
def renameFilterContigs(sampleName, outdir, spadesContigs, minContigsLength):
def renameFilterContigs(sampleName, outdir, spadesContigs, minContigsLength, minCoverageContigs):
newContigsFile = os.path.join(outdir, str(sampleName + '.contigs.fasta'))
number_contigs = 0
number_bases = 0
Expand All @@ -34,18 +35,20 @@ def renameFilterContigs(sampleName, outdir, spadesContigs, minContigsLength):
for line in contigs:
if line[0] == '>':
if contigHeader != "":
if len(contigSequence) >= minContigsLength:
writer.write(contigHeader + "\n")
items = contigHeader.split('_')
if len(contigSequence) >= minContigsLength and float(items[5]) >= minCoverageContigs:
writer.write(">" + sampleName + "_" + contigHeader + "\n")
writer.write(contigSequence + "\n")
number_bases = number_bases + len(contigSequence)
number_contigs = number_contigs + 1
contigHeader = ""
contigSequence = ""
contigHeader = ">" + sampleName + "_" + line[1:].splitlines()[0]
contigHeader = line[1:].splitlines()[0]
else:
contigSequence = contigSequence + line.splitlines()[0]
if len(contigSequence) >= minContigsLength:
writer.write(contigHeader + "\n")
items = contigHeader.split('_')
if len(contigSequence) >= minContigsLength and float(items[5]) >= minCoverageContigs:
writer.write(">" + sampleName + "_" + contigHeader + "\n")
writer.write(contigSequence + "\n")
number_bases = number_bases + len(contigSequence)
number_contigs = number_contigs + 1
Expand All @@ -68,7 +71,7 @@ def define_kmers(kmers, maximumReadsLength):

# Run SPAdes procedure
@spades_timer
def runSpades(sampleName, outdir, threads, fastq_files, notUseCareful, maxMemory, minCoverage, minContigsLength, estimatedGenomeSizeMb, kmers, maximumReadsLength, saveReport, defaultKmers):
def runSpades(sampleName, outdir, threads, fastq_files, notUseCareful, maxMemory, minCoverageAssembly, minContigsLength, estimatedGenomeSizeMb, kmers, maximumReadsLength, saveReport, defaultKmers, minCoverageContigs):
pass_qc = False
failing = {}
failing['sample'] = False
Expand All @@ -89,11 +92,12 @@ def runSpades(sampleName, outdir, threads, fastq_files, notUseCareful, maxMemory
else:
print 'SPAdes will use the following k-mers: ' + str(kmers)

run_successfully, contigs = spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory, minCoverage, kmers)
run_successfully, contigs = spades(spades_folder, threads, fastq_files, notUseCareful, maxMemory, minCoverageAssembly, kmers)

if run_successfully:
print 'Filtering for contigs with at least ' + str(minContigsLength) + ' nucleotides'
contigsFiltered, number_contigs, number_bases = renameFilterContigs(sampleName, outdir, contigs, minContigsLength)
shutil.copyfile(contigs, os.path.join(outdir, 'SPAdes_original_assembly.contigs.fasta'))
print 'Filtering for contigs with at least ' + str(minContigsLength) + ' nucleotides and a coverage of ' + str(minCoverageContigs)
contigsFiltered, number_contigs, number_bases = renameFilterContigs(sampleName, outdir, contigs, minContigsLength, minCoverageContigs)
print str(number_bases) + ' assembled nucleotides in ' + str(number_contigs) + ' contigs'

if saveReport:
Expand Down
9 changes: 5 additions & 4 deletions modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def parseArguments(version):
spades_options.add_argument('--spadesNotUseCareful', action='store_true', help='Tells SPAdes to only perform the assembly without the --careful option')
spades_options.add_argument('--spadesMinContigsLength', type=int, metavar='N', help='Filter SPAdes contigs for length greater or equal than this value', required=False, default=200)
spades_options.add_argument('--spadesMaxMemory', type=int, metavar='N', help='The maximum amount of RAM Gb for SPAdes to use', required=False, default=25)
spades_options.add_argument('--spadesMinCoverage', type=spades_cov_cutoff, metavar='10', help='The minimum number of reads to consider an edge in the de Bruijn graph (or path I am not sure). Can also be auto or off', required=False, default='off')
spades_options.add_argument('--spadesMinCoverageAssembly', type=spades_cov_cutoff, metavar='10', help='The minimum number of reads to consider an edge in the de Bruijn graph (or path I am not sure). Can also be auto or off', required=False, default='off')
spades_options.add_argument('--spadesMinCoverageContigs', type=int, metavar='N', help='Minimum contigs coverage. After assembly only keep contigs with reported coverage equal or above this value', required=False, default=5)
spades_options.add_argument('--spadesSaveReport', action='store_true', help='Tells INNUca to store the number of contigs and assembled nucleotides for each sample')

spades_kmers_options = parser.add_mutually_exclusive_group()
Expand Down Expand Up @@ -98,9 +99,9 @@ def spades_cov_cutoff(argument):
if argument > 0:
return argument
else:
argparse.ArgumentParser.error('--spadesMinCoverage must be positive integer, auto or off')
argparse.ArgumentParser.error('--spadesMinCoverageAssembly must be positive integer, auto or off')
except:
argparse.ArgumentParser.error('--spadesMinCoverage must be positive integer, auto or off')
argparse.ArgumentParser.error('--spadesMinCoverageAssembly must be positive integer, auto or off')


def runCommandPopenCommunicate(command, shell_True, timeout_sec_None):
Expand Down Expand Up @@ -214,7 +215,7 @@ def setPATHvariable(doNotUseProvidedSoftware, script_path):
if not doNotUseProvidedSoftware:
fastQC = os.path.join(script_folder, 'src', 'fastqc_v0.11.5')
trimmomatic = os.path.join(script_folder, 'src', 'Trimmomatic-0.36')
spades = os.path.join(script_folder, 'src', 'SPAdes-3.7.1-Linux', 'bin')
spades = os.path.join(script_folder, 'src', 'SPAdes-3.9.0-Linux', 'bin')
bowtie2 = os.path.join(script_folder, 'src', 'bowtie2-2.2.9')
samtools = os.path.join(script_folder, 'src', 'samtools-1.3.1', 'bin')
pilon = os.path.join(script_folder, 'src', 'pilon_v1.18')
Expand Down
Binary file removed src/SPAdes-3.7.1-Linux/bin/bwa-spades
Binary file not shown.
Binary file removed src/SPAdes-3.7.1-Linux/bin/corrector
Binary file not shown.
Binary file removed src/SPAdes-3.7.1-Linux/bin/dipspades
Binary file not shown.
Binary file removed src/SPAdes-3.7.1-Linux/bin/hammer
Binary file not shown.
Binary file removed src/SPAdes-3.7.1-Linux/bin/ionhammer
Binary file not shown.
Binary file removed src/SPAdes-3.7.1-Linux/bin/scaffold_correction
Binary file not shown.
Binary file removed src/SPAdes-3.7.1-Linux/bin/spades
Binary file not shown.
38 changes: 0 additions & 38 deletions src/SPAdes-3.7.1-Linux/bin/spades_init.py

This file was deleted.

1 change: 0 additions & 1 deletion src/SPAdes-3.7.1-Linux/share/spades/VERSION

This file was deleted.

This file was deleted.

Loading

0 comments on commit cdc055c

Please sign in to comment.