Skip to content

Commit

Permalink
Merge pull request sanger-tol#9 from reichan1998/cram_handling
Browse files Browse the repository at this point in the history
Merge cram_handling and 1.3.1
  • Loading branch information
tkchafin authored Oct 1, 2024
2 parents c39bb79 + b089fa8 commit f72917c
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 24 deletions.
11 changes: 11 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ process {
time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) }
}

withName: SAMTOOLS_ADDREPLACERG {
cpus = { log_increase_cpus(2, 6*task.attempt, 1, 2) }
memory = { check_max( 4.GB + 850.MB * log_increase_cpus(2, 6*task.attempt, 1, 2) * task.attempt + 0.6.GB * Math.ceil( meta.read_count / 100000000 ), 'memory' ) }
time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) }
}

withName: BLAST_BLASTN {
time = { check_max( 2.hour * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) }
memory = { check_max( 100.MB + 20.MB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) }
Expand Down Expand Up @@ -114,6 +120,11 @@ process {
memory = { check_max( 1.GB * Math.ceil( 30 * fasta.size() / 1e+9 ) * task.attempt, 'memory' ) }
}

withName: GENERATE_CRAM_CSV {
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
}

withName: CRUMBLE {
// No correlation between memory usage and the number of reads or the genome size.
// Most genomes seem happy with 1 GB, then some with 2 GB, then some with 5 GB.
Expand Down
32 changes: 24 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,6 @@ process {
ext.args = 'include=f'
}

withName: '.*:.*:ALIGN_HIC:BWAMEM2_MEM' {
ext.args = { "-5SPCp -R ${meta.read_group}" }
}

withName: '.*:.*:ALIGN_ILLUMINA:BWAMEM2_MEM' {
ext.args = { "-p -R ${meta.read_group}" }
}

withName: SAMTOOLS_MERGE {
beforeScript = { "export REF_PATH=spoof"}
ext.args = { "-c -p" }
Expand Down Expand Up @@ -119,6 +111,30 @@ process {
ext.args = { "-ax map-ont -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' }
}

withName: ".*:ALIGN_HIFI:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "-F 0x200 -nt" }
ext.args2 = { "-ax map-hifi --cs=short -I" + Math.ceil(meta.genome_size/1e9) + 'G' }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:ALIGN_CLR:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "-F 0x200 -nt" }
ext.args2 = { "-ax map-pb -I" + Math.ceil(meta.genome_size/1e9) + 'G' }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:ALIGN_ONT:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "-F 0x200 -nt" }
ext.args2 = { "-ax map-ont -I" + Math.ceil(meta.genome_size/1e9) + 'G' }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: '.*:CONVERT_STATS:SAMTOOLS_CRAM' {
beforeScript = { "export REF_PATH=spoof"}
ext.prefix = { "${fasta.baseName}.${meta.datatype}.${meta.id}" }
Expand Down
Empty file removed seq_cache_populate.pl
Empty file.
55 changes: 48 additions & 7 deletions subworkflows/local/align_ont.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
// Align Nanopore read files against the genome
//

include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv'
include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce'
include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'


Expand All @@ -14,17 +18,54 @@ workflow ALIGN_ONT {

main:
ch_versions = Channel.empty()
ch_merged_bam = Channel.empty()

// Convert FASTQ to CRAM
CONVERT_CRAM ( reads, fasta )
ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions )

// Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM
MINIMAP2_ALIGN ( reads, fasta, true, "bai", false, false )
ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() )
SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam )
ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions )

SAMTOOLS_ADDREPLACERG.out.cram
| set { ch_reads_cram }

// Collect all alignment output by sample name
MINIMAP2_ALIGN.out.bam
// Index the CRAM file
SAMTOOLS_INDEX ( ch_reads_cram )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions )

ch_reads_cram
| join ( SAMTOOLS_INDEX.out.crai )
| set { ch_reads_cram_crai }


//
// MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT
//
GENERATE_CRAM_CSV( ch_reads_cram_crai )
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions )

//
// SUBWORKFLOW: mapping hic reads using minimap2 or bwamem2
//
MINIMAP2_MAPREDUCE (
fasta,
GENERATE_CRAM_CSV.out.csv
)
ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions )
ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam)


ch_merged_bam
| combine( ch_reads_cram_crai )
| map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] }
| set { ch_merged_bam }


// Collect all BAM output by sample name
ch_merged_bam
| map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] }
| groupTuple ( by: [0] )
| groupTuple( by: [0] )
| map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] }
| branch {
meta, bams ->
Expand Down
55 changes: 47 additions & 8 deletions subworkflows/local/align_pacbio.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
//

include { FILTER_PACBIO } from '../../subworkflows/local/filter_pacbio'
include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv'
include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce'
include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'


Expand All @@ -16,21 +20,56 @@ workflow ALIGN_PACBIO {

main:
ch_versions = Channel.empty()

ch_merged_bam = Channel.empty()

// Filter BAM and output as FASTQ
FILTER_PACBIO ( reads, db )
ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions )

// Convert FASTQ to CRAM
CONVERT_CRAM ( FILTER_PACBIO.out.fastq, fasta )
ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions )

SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam )
ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions )

SAMTOOLS_ADDREPLACERG.out.cram
| set { ch_reads_cram }

// Index the CRAM file
SAMTOOLS_INDEX ( ch_reads_cram )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions )

ch_reads_cram
| join ( SAMTOOLS_INDEX.out.crai )
| set { ch_reads_cram_crai }


//
// MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT
//
GENERATE_CRAM_CSV( ch_reads_cram_crai )
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions )

//
// SUBWORKFLOW: mapping pacbio reads using minimap2
//
MINIMAP2_MAPREDUCE (
fasta,
GENERATE_CRAM_CSV.out.csv
)
ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions )
ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam)

// Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM
MINIMAP2_ALIGN ( FILTER_PACBIO.out.fastq, fasta, true, "bai", false, false )
ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() )
ch_merged_bam
| combine( ch_reads_cram_crai )
| map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] }
| set { ch_merged_bam }

// Collect all alignment output by sample name
MINIMAP2_ALIGN.out.bam
// Collect all BAM output by sample name
ch_merged_bam
| map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] }
| groupTuple ( by: [0] )
| groupTuple( by: [0] )
| map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] }
| branch {
meta, bams ->
Expand Down
3 changes: 2 additions & 1 deletion subworkflows/local/minimap2_mapreduce.nf
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ workflow MINIMAP2_MAPREDUCE {
.map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path->
tuple([
id: cram_id.id,
chunk_id: cram_id.id + "_" + cram_info[5]
chunk_id: cram_id.id + "_" + cram_info[5],
genome_size: ref_id.genome_size
],
file(cram_info[0]),
cram_info[1],
Expand Down

0 comments on commit f72917c

Please sign in to comment.