Skip to content

Commit

Permalink
Merge pull request #92 from sanger-tol/resource_optimisation
Browse files Browse the repository at this point in the history
Resource optimisation
  • Loading branch information
muffato authored Nov 24, 2023
2 parents 273bdf5 + a5d21f4 commit 48f7738
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 62 deletions.
127 changes: 84 additions & 43 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,66 +2,107 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sanger-tol/genomenote Nextflow base config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A 'blank slate' config file, appropriate for general use on most high performance
compute environments. Assumes that all software is installed and available on
the PATH. Runs in `local` mode - all jobs will be run on the logged in environment.
----------------------------------------------------------------------------------------
*/

process {
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Increasing the number of CPUs often gives diminishing returns, so we increase it
following a logarithm curve. Example:
- 0 < value <= 1: start + step
- 1 < value <= 2: start + 2*step
- 2 < value <= 4: start + 3*step
- 4 < value <= 8: start + 4*step
In order to support re-runs, the step increase may be multiplied by the attempt
number prior to calling this function.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Modified logarithm function that doesn't return negative numbers
def positive_log(value, base) {
if (value <= 1) {
return 0
} else {
return Math.log(value)/Math.log(base)
}
}

def log_increase_cpus(start, step, value, base) {
return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus')
}

cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }

process {

errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 5
maxRetries = 3
maxErrors = '-1'

// Process-specific resource requirements
// NOTE - Please try and re-use the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
// If possible, it would be nice to keep the same label naming convention when
// adding in your local modules too.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel:process_single {
cpus = { check_max( 1 , 'cpus' ) }
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
// In this configuration file, we give little resources by default and
// explicitly bump them up for some processes.
// All rules should still increase resources every attempt to allow the
// pipeline to self-heal from MEMLIMIT/RUNLIMIT.

// Default
cpus = 1
memory = { check_max( 50.MB * task.attempt, 'memory' ) }
time = { check_max( 30.min * task.attempt, 'time' ) }

// These processes typically complete within 30 min to 1.5 hours.
withName: 'BED_SORT|BEDTOOLS_BAMTOBED|COOLER_CLOAD|COOLER_ZOOMIFY|FILTER_BED' {
time = { check_max( 4.hour * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }

// These processes may take a few hours.
withName: 'FILTER_SORT|SAMTOOLS_VIEW' {
time = { check_max( 8.hour * task.attempt, 'time' ) }
}

withName: SAMTOOLS_VIEW {
memory = { check_max( 1.GB * task.attempt, 'memory' ) }
}

withName: FASTK_FASTK {
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) }
}
withLabel:process_medium {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }

withName: MERQURYFK_MERQURYFK {
// Memory usage seems to be following two different linear rules:
// - 1 GB for every 60 Mbp for genomes < 840 Mbp
// - 2 GB for every 1 Gbp for genomes > 840 Mbp, with a 12 GB offset to match the previous rule
memory = { check_max( 1.GB + ((meta.genome_size < 840000000) ? (Math.ceil(meta.genome_size/60000000) * 1.GB * task.attempt) : (Math.ceil(meta.genome_size/1000000000) * 2.GB * task.attempt + 12.GB)), 'memory' ) }
cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) }
}
withLabel:process_high {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }

withName: BUSCO {
// Weird memory growth. The formula below is to fit the actual usage and avoid BUSCO being killed.
memory = { check_max(1.GB * Math.max(Math.pow(2, positive_log(meta.genome_size/1000000, 10)+task.attempt), Math.floor(meta.genome_size/1000000000) * 16 * task.attempt), 'cpus' ) }
cpus = { log_increase_cpus(4, 2*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) }
time = { check_max( 2.h * Math.ceil(meta.genome_size/1000000000), 'time') }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }

withName: 'BED_SORT|FILTER_SORT' {
cpus = { log_increase_cpus(2, 2*task.attempt, 1, 2) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
}
withLabel:process_high_memory {
memory = { check_max( 200.GB * task.attempt, 'memory' ) }

withName: COOLER_CLOAD {
memory = { check_max( 6.GB * task.attempt, 'memory' ) }
}
withName:"COOLER_.*" {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
memory = { check_max( 36.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }

withName: COOLER_DUMP {
memory = { check_max( 100.MB * task.attempt, 'memory' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'

withName: COOLER_ZOOMIFY {
cpus = { log_increase_cpus(2, 2*task.attempt, 1, 2) }
memory = { check_max( (meta.genome_size < 1000000000 ? 16.GB : 24.GB) * task.attempt, 'memory' ) }
}
withLabel:error_retry {
errorStrategy = 'retry'
maxRetries = 2

withName: MULTIQC {
memory = { check_max( 150.MB * task.attempt, 'memory' ) }
}

withName:CUSTOM_DUMPSOFTWAREVERSIONS {
cache = false
}
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ report {
trace {
enabled = true
file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
fields = 'task_id,hash,native_id,process,tag,status,exit,cpus,memory,time,attempt,submit,start,complete,duration,%cpu,%mem,peak_rss,rchar,wchar'
}
dag {
enabled = true
Expand Down
21 changes: 5 additions & 16 deletions subworkflows/local/genome_statistics.nf
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,14 @@ workflow GENOME_STATISTICS {
| join ( FASTK_FASTK.out.ktab )
| set { ch_combo }

ch_grab = GrabFiles ( ch_pacbio.dir )
ch_pacbio.dir
| map { meta, dir -> [ meta, [dir.listFiles().findAll { it.toString().endsWith(".hist") }], [dir.listFiles().findAll { it.toString().contains(".ktab") }] ] }
| set { ch_grab }

ch_combo
| mix ( ch_grab )
| combine ( genome )
| map { meta, hist, ktab, meta2, fasta -> [ meta, hist, ktab, fasta, [] ] }
| map { meta, hist, ktab, meta2, fasta -> [ meta + [genome_size: meta2.genome_size], hist, ktab, fasta, [] ] }
| set { ch_merq }


Expand All @@ -98,9 +100,9 @@ workflow GENOME_STATISTICS {

MERQURYFK_MERQURYFK.out.qv
| join ( MERQURYFK_MERQURYFK.out.stats )
| ifEmpty ( [ [], [], [] ] )
| map { meta, qv, comp -> [ meta + [ id: "merq" ], qv, comp ] }
| groupTuple ()
| ifEmpty ( [ [], [], [] ] )
| set { ch_merqury }

CREATETABLE ( ch_summary, ch_busco, ch_merqury, flagstat )
Expand All @@ -119,16 +121,3 @@ workflow GENOME_STATISTICS {

}


process GrabFiles {
tag "${meta.id}"
executor 'local'

input:
tuple val(meta), path("in")

output:
tuple val(meta), path("in/*.hist"), path("in/*.ktab*", hidden:true)

"true"
}
10 changes: 7 additions & 3 deletions workflows/genomenote.nf
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,20 @@ workflow GENOMENOTE {
// MODULE: Uncompress fasta file if needed
//
ch_fasta
| map { file -> [ [ 'id': file.baseName.tokenize('.')[0..1].join('.') ], file ] }
| map { file -> [ [ 'id': file.baseName ], file ] }
| set { ch_genome }

if ( params.fasta.endsWith('.gz') ) {
ch_fasta = GUNZIP ( ch_genome ).gunzip
ch_unzipped = GUNZIP ( ch_genome ).gunzip
ch_versions = ch_versions.mix ( GUNZIP.out.versions.first() )
} else {
ch_fasta = ch_genome
ch_unzipped = ch_genome
}

ch_unzipped
| map { meta, fa -> [ meta + [id: fa.baseName, genome_size: fa.size()], fa] }
| set { ch_fasta }


//
// SUBWORKFLOW: Create contact map matrices from HiC alignment files
Expand Down

0 comments on commit 48f7738

Please sign in to comment.