Skip to content

Commit

Permalink
RAM usage change
Browse files Browse the repository at this point in the history
Remove bzip2 requirement.
Change SPAdes maximum memory determination.
Add .jar files maximum memory setting (--jarMaxMemory option).
Save fastq files size after Trimmomatic.
Update README.
  • Loading branch information
miguelpmachado committed Oct 3, 2016
1 parent 335a4ef commit 3cce932
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 55 deletions.
30 changes: 19 additions & 11 deletions INNUca.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ def main():

# Check programms
programs_version_dictionary = {}
if not args.skipEstimatedCoverage:
programs_version_dictionary['bunzip2'] = ['--version', '>=', '1.0.6']
programs_version_dictionary['gunzip'] = ['--version', '>=', '1.6']
programs_version_dictionary['gunzip'] = ['--version', '>=', '1.6']
if not (args.skipFastQC and args.skipTrimmomatic and args.skipPilon):
programs_version_dictionary['java'] = ['-version', '>=', '1.8']
if not args.skipFastQC:
Expand Down Expand Up @@ -133,11 +131,18 @@ def main():
if not args.skipMLST:
scheme = mlst.getScheme(args.speciesExpected)

# Memory
available_memory_GB = utils.get_free_memory() / (1024.0 ** 2)
# Determine SPAdes maximum memory
spadesMaxMemory = None
if not args.skipSPAdes:
print ''
spadesMaxMemory = spades.define_memory(args.spadesMaxMemory, args.threads)
spadesMaxMemory = spades.define_memory(args.spadesMaxMemory, args.threads, available_memory_GB)
# Determine .jar maximum memory
jarMaxMemory = 'off'
if not (args.skipTrimmomatic and (args.skipSPAdes or args.skipPilon)):
print ''
jarMaxMemory = utils.define_jar_max_memory(args.jarMaxMemory, args.threads, available_memory_GB)

# Run comparisons for each sample
for sample in samples:
Expand All @@ -161,7 +166,7 @@ def main():
print str(fastq_files) + '\n'

# Run INNUca.py analysis
run_successfully, pass_qc, run_report = run_INNUca(sample, sample_outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon)
run_successfully, pass_qc, run_report = run_INNUca(sample, sample_outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon, jarMaxMemory)

# Save sample fail report
fail_report_path = os.path.join(sample_outdir, 'fail_report.txt')
Expand Down Expand Up @@ -198,7 +203,7 @@ def main():
sys.exit('No samples run successfully!')


def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon):
def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spadesMaxMemory, jar_path_trimmomatic, jar_path_pilon, jarMaxMemory):
threads = args.threads
adaptersFasta = args.adapters
if adaptersFasta is not None:
Expand Down Expand Up @@ -238,8 +243,8 @@ def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spade

# Run Trimmomatic
if not args.skipTrimmomatic:
run_successfully, not_empty_fastq, time_taken, failing, paired_reads, trimmomatic_folder = trimmomatic.runTrimmomatic(jar_path_trimmomatic, sampleName, outdir, threads, adaptersFasta, script_path, args.doNotSearchAdapters, fastq_files, maximumReadsLength, args.doNotTrimCrops, args.trimCrop, args.trimHeadCrop, args.trimLeading, args.trimTrailing, args.trimSlidingWindow, args.trimMinLength, nts2clip_based_ntsContent)
runs['Trimmomatic'] = [run_successfully, not_empty_fastq, time_taken, failing]
run_successfully, not_empty_fastq, time_taken, failing, paired_reads, trimmomatic_folder, fileSize = trimmomatic.runTrimmomatic(jar_path_trimmomatic, sampleName, outdir, threads, adaptersFasta, script_path, args.doNotSearchAdapters, fastq_files, maximumReadsLength, args.doNotTrimCrops, args.trimCrop, args.trimHeadCrop, args.trimLeading, args.trimTrailing, args.trimSlidingWindow, args.trimMinLength, nts2clip_based_ntsContent, jarMaxMemory)
runs['Trimmomatic'] = [run_successfully, not_empty_fastq, time_taken, failing, fileSize]

if run_successfully and not_empty_fastq:
fastq_files = paired_reads
Expand All @@ -265,7 +270,7 @@ def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spade

else:
print '--skipTrimmomatic set. Skipping Trimmomatic, but also Second FastQC analysis and Second Estimated Coverage analysis'
runs['Trimmomatic'] = skipped
runs['Trimmomatic'] = skipped + ['NA']
runs['second_Coverage'] = skipped
runs['second_FastQC'] = skipped

Expand All @@ -279,7 +284,7 @@ def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spade
contigs = contigs_spades

if not args.skipPilon:
run_successfully, _, time_taken, failing, assembly_polished, bam_file, pilon_folder = pilon.runPilon(jar_path_pilon, contigs_spades, fastq_files, threads, outdir)
run_successfully, _, time_taken, failing, assembly_polished, bam_file, pilon_folder = pilon.runPilon(jar_path_pilon, contigs_spades, fastq_files, threads, outdir, jarMaxMemory)
runs['Pilon'] = [run_successfully, None, time_taken, failing]

if run_successfully:
Expand Down Expand Up @@ -334,7 +339,10 @@ def run_INNUca(sampleName, outdir, fastq_files, args, script_path, scheme, spade
else:
print 'Moving to the next sample'
for step in ('first_Coverage', 'first_FastQC', 'Trimmomatic', 'second_Coverage', 'second_FastQC', 'SPAdes', 'Pilon', 'Assembly_Mapping', 'MLST'):
runs[step] = not_run
if step == 'Trimmomatic':
runs[step] = not_run + ['NA']
else:
runs[step] = not_run

# Remove Trimmomatic directory with cleaned reads
if not args.trimKeepFiles:
Expand Down
30 changes: 17 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ Dependencies
- *mlst* (https://github.com/tseemann/mlst) >= v2.4 (it is recommended
to use a mlst version with updated databases)
- *gzip* >= v1.6 (normally found in Linux OS)
- *bzip2* >= v1.0.6 (normally found in Linux OS)

**Optional**
(executables are provided, but user's own executables can be used with `--doNotUseProvidedSoftware` option)
Expand All @@ -47,8 +46,9 @@ Usage
-g 2.1
[-o /output/directory/] [-j N] [--doNotUseProvidedSoftware]
[--pairEnd_filesSeparation "_left/rigth.fq.gz" "_left/rigth.fq.gz"]
[--skipEstimatedCoverage] [--skipFastQC] [--skipTrimmomatic]
[--skipSPAdes] [--skipPilon] [--skipAssemblyMapping] [--skipMLST]
[--jarMaxMemory] [--skipEstimatedCoverage] [--skipFastQC]
[--skipTrimmomatic] [--skipSPAdes] [--skipPilon]
[--skipAssemblyMapping] [--skipMLST]
[--adapters adaptersFile.fasta | --doNotSearchAdapters]
[--doNotTrimCrops | [[--trimCrop N] [--trimHeadCrop N]]]
[--trimSlidingWindow window:meanQuality] [--trimMinLength N]
Expand Down Expand Up @@ -79,6 +79,10 @@ Usage
-o /output/directory/, --outdir /output/directory/
Path for output directory (default: .)
-j N, --threads N Number of threads (default: 1)
--jarMaxMemory 10 Sets the maximum RAM Gb usage by jar files (Trimmomatic
and Pilon). Can also be auto or off. When auto is set,
1 Gb per thread will be used up to the free available memory
(default: off)
--doNotUseProvidedSoftware
Tells the software to not use FastQC, Trimmomatic,
SPAdes and Samtools that are provided with INNUca.py
Expand Down Expand Up @@ -146,16 +150,16 @@ Usage
Filter SPAdes contigs for length greater or equal than
this value (default: maximum reads size or 200 bp)
--spadesMaxMemory N The maximum amount of RAM Gb for SPAdes to use (default:
free memory available at the beginning of INNUca)
1 Gb per thread will be used up to the free available
memory)
--spadesMinCoverageAssembly 10
The minimum number of reads to consider an edge in the
de Bruijn graph (or path I am not sure). Can also be

auto or off (default: 'off')
The minimum number of reads to consider an edge in the
de Bruijn graph during the assembly. Can also be auto
or off (default: 2)
--spadesMinCoverageContigs N
Minimum contigs coverage. After assembly only keep
contigs with reported coverage equal or above this
value (default: 5)
Minimum contigs coverage. After assembly only keep contigs
with reported k-mer coverage equal or above this value
(default: 5)
SPAdes k-mers options (one of the following):
--spadesKmers 55,77 Manually sets SPAdes k-mers lengths (all values must
be odd, less than 128) (default: 55, 77, 99, 113,
Expand All @@ -167,8 +171,8 @@ Usage
--pilonKeepFiles Tells INNUca.py to not remove the output of Pilon
(default: False)
--pilonKeepSPAdesAssembly
Tells INNUca.py to not remove the unpolished SPAdes
assembly (default: False)
Tells INNUca.py to not remove the filtered but unpolished
SPAdes assembly (default: False)

Assembly Mapping options:
--assemblyMinCoverageContigs
Expand Down
6 changes: 3 additions & 3 deletions modules/assembly_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ def runAssemblyMapping(alignment_file, reference_file, threads, outdir, minCover

assembly_filtered = None

pilon_run_successfuly = True if assembly_pilon is not None else False

# Get assembly coverage
sample_coverage_no_problems, mean_coverage_data = sample_coverage(reference_file, alignment_file, assemblyMapping_folder, threads)
if sample_coverage_no_problems:
Expand All @@ -289,16 +291,14 @@ def runAssemblyMapping(alignment_file, reference_file, threads, outdir, minCover

assembly = reference_file if assembly_pilon is None else assembly_pilon

pilon_run_successfuly = True if assembly_pilon is not None else False

subsampleContigs(assembly, sequences_2_keep, assembly_filtered, pilon_run_successfuly)

if not pass_qc_coverage:
failing['Coverage'] = [failing_reason]
else:
failing['Coverage'] = ['Did not run']

if not keep_pilon_assembly:
if pilon_run_successfuly and not keep_pilon_assembly:
os.remove(assembly_pilon)

# Save mapping statistics
Expand Down
10 changes: 6 additions & 4 deletions modules/pilon.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,11 @@ def indexAlignment(alignment_file):
return run_successfully


def pilon(jar_path_pilon, assembly, bam_file, outdir):
def pilon(jar_path_pilon, assembly, bam_file, outdir, jarMaxMemory):
assembly_polished = os.path.splitext(assembly)[0] + '.polished.fasta'
command = ['java', '-jar', jar_path_pilon, '--genome', assembly, '--frags', bam_file, '--outdir', outdir, '--output', os.path.basename(os.path.splitext(assembly_polished)[0]), '--changes', '--vcf']
command = ['java', '', '-jar', jar_path_pilon, '--genome', assembly, '--frags', bam_file, '--outdir', outdir, '--output', os.path.basename(os.path.splitext(assembly_polished)[0]), '--changes', '--vcf']
if str(jarMaxMemory) != 'off':
command[1] = '-Xmx' + str(int(round(jarMaxMemory * 1024, 0))) + 'M'
run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
if not run_successfully:
assembly_polished = None
Expand Down Expand Up @@ -89,7 +91,7 @@ def parsePilonResult(assembly_polished, outdir):


@pilon_timer
def runPilon(jar_path_pilon, assembly, fastq_files, threads, outdir):
def runPilon(jar_path_pilon, assembly, fastq_files, threads, outdir, jarMaxMemory):
failing = {}
failing['sample'] = False

Expand Down Expand Up @@ -119,7 +121,7 @@ def runPilon(jar_path_pilon, assembly, fastq_files, threads, outdir):
run_successfully = indexAlignment(bam_file)

if run_successfully:
run_successfully, assembly_polished = pilon(jar_path_pilon, assembly_link, bam_file, pilon_folder)
run_successfully, assembly_polished = pilon(jar_path_pilon, assembly_link, bam_file, pilon_folder, jarMaxMemory)

if run_successfully:
parsePilonResult(assembly_polished, outdir)
Expand Down
5 changes: 1 addition & 4 deletions modules/spades.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,11 @@ def define_minContigsLength(maximumReadsLength, minContigsLength):
return minimum_length


def define_memory(maxMemory, threads):
def define_memory(maxMemory, threads, available_memory_GB):
GB_per_thread = 1024 / 1024.0

minimum_required_memory_GB = GB_per_thread * threads

available_memory_GB = utils.get_free_memory()
if available_memory_GB == 0:
print 'WARNING: it was not possible to determine the free available memory!'
if maxMemory is not None:
Expand All @@ -93,8 +92,6 @@ def define_memory(maxMemory, threads):
else:
print 'Trying use the minimum memory required for SPAdes to run'
available_memory_GB = minimum_required_memory_GB
else:
available_memory_GB = available_memory_GB / (1024.0 ** 2)

if maxMemory is None:
if minimum_required_memory_GB > available_memory_GB:
Expand Down
34 changes: 21 additions & 13 deletions modules/trimmomatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,57 @@


# Run Trimmomatic
def trimmomatic(jar_path_trimmomatic, sampleName, trimmomatic_folder, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent):
def trimmomatic(jar_path_trimmomatic, sampleName, trimmomatic_folder, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent, jarMaxMemory):
fastq = sorted(fastq_files)[0]

# Run Trimmomatic
command = ['java', '-jar', jar_path_trimmomatic, 'PE', '-threads', str(threads), '-basein', fastq, '-baseout', os.path.join(trimmomatic_folder, str(sampleName + '.fastq.gz')), '', '', '', str('SLIDINGWINDOW:' + slidingWindow), str('LEADING:' + str(leading)), str('TRAILING:' + str(trailing)), str('MINLEN:' + str(minLength)), 'TOPHRED33', '']
command = ['java', '', '-jar', jar_path_trimmomatic, 'PE', '-threads', str(threads), '-basein', fastq, '-baseout', os.path.join(trimmomatic_folder, str(sampleName + '.fastq.gz')), '', '', '', str('SLIDINGWINDOW:' + slidingWindow), str('LEADING:' + str(leading)), str('TRAILING:' + str(trailing)), str('MINLEN:' + str(minLength)), 'TOPHRED33', '']

if str(jarMaxMemory) != 'off':
command[1] = '-Xmx' + str(int(round(jarMaxMemory * 1024, 0))) + 'M'

if not doNotTrimCrops:
if maxReadsLength is not None:
if crop is not None:
crop = maxReadsLength - crop[0]
command[10] = str('CROP:' + str(crop))
command[11] = str('CROP:' + str(crop))
else:
if nts2clip_based_ntsContent is not None:
crop = nts2clip_based_ntsContent[1]
print str(crop) + ' nucleotides will be clipped at the end of reads'
crop = maxReadsLength - crop
command[10] = str('CROP:' + str(crop))
command[11] = str('CROP:' + str(crop))
else:
print 'Because FastQC did not run successfully, --trimCrop option will not be considered'

if headCrop is not None:
command[11] = str('HEADCROP:' + str(headCrop[0]))
command[12] = str('HEADCROP:' + str(headCrop[0]))
else:
if nts2clip_based_ntsContent is not None:
headCrop = nts2clip_based_ntsContent[0]
print str(headCrop) + ' nucleotides will be clipped at the beginning of reads'
command[11] = str('HEADCROP:' + str(headCrop))
command[12] = str('HEADCROP:' + str(headCrop))

if not doNotSearchAdapters:
if adaptersFasta is not None:
print 'Removing adapters contamination using ' + adaptersFasta
command[12] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'
command[13] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'
else:
trimmomatic_adapters_folder = os.path.join(os.path.dirname(script_path), 'src', 'Trimmomatic-0.36', 'adapters')
adapters_files = [os.path.join(trimmomatic_adapters_folder, 'NexteraPE-PE.fa'), os.path.join(trimmomatic_adapters_folder, 'TruSeq2-PE.fa'), os.path.join(trimmomatic_adapters_folder, 'TruSeq3-PE-2.fa')]
print 'Removing adapters contamination using ' + str(adapters_files)
adaptersFasta = concatenateFastaFiles(adapters_files, trimmomatic_folder, 'concatenated_adaptersFile.fasta')
command[12] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'
command[13] = 'ILLUMINACLIP:' + adaptersFasta + ':3:30:10:6:true'

run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

if not run_successfully:
print 'Trimmomatic fail! Trying run with Phred+33 enconding defined...'
command[18] = '-phred33'
command[19] = '-phred33'
run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)
if not run_successfully:
print 'Trimmomatic fail again! Trying run with Phred+64 enconding defined...'
command[18] = '-phred64'
command[19] = '-phred64'
run_successfully, stdout, stderr = utils.runCommandPopenCommunicate(command, False, None, True)

return run_successfully
Expand Down Expand Up @@ -115,29 +118,34 @@ def controlForZeroReads(fastq_files):

# Run Trimmomatic procedure
@trim_timer
def runTrimmomatic(jar_path_trimmomatic, sampleName, outdir, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent):
def runTrimmomatic(jar_path_trimmomatic, sampleName, outdir, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent, jarMaxMemory):
failing = {}
failing['sample'] = False
not_empty_fastq = False

paired_reads = None
fileSize = 'NA'

# Create Trimmomatic output directory
trimmomatic_folder = os.path.join(outdir, 'trimmomatic', '')
utils.removeDirectory(trimmomatic_folder)
os.mkdir(trimmomatic_folder)

run_successfully = trimmomatic(jar_path_trimmomatic, sampleName, trimmomatic_folder, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent)
run_successfully = trimmomatic(jar_path_trimmomatic, sampleName, trimmomatic_folder, threads, adaptersFasta, script_path, doNotSearchAdapters, fastq_files, maxReadsLength, doNotTrimCrops, crop, headCrop, leading, trailing, slidingWindow, minLength, nts2clip_based_ntsContent, jarMaxMemory)

if run_successfully:
paired_reads = getTrimmomaticPairedReads(trimmomatic_folder)
not_empty_fastq = controlForZeroReads(paired_reads)

# Get raw reads files size
fileSize = sum(os.path.getsize(fastq) for fastq in paired_reads)

if not not_empty_fastq:
failing['sample'] = 'Zero reads after Trimmomatic'
print failing['sample']

else:
failing['sample'] = 'Did not run'
print failing['sample']

return run_successfully, not_empty_fastq, failing, paired_reads, trimmomatic_folder
return run_successfully, not_empty_fastq, failing, paired_reads, trimmomatic_folder, fileSize
Loading

0 comments on commit 3cce932

Please sign in to comment.