Skip to content

Commit

Permalink
generalise mapreduce to illumina
Browse files Browse the repository at this point in the history
tkchafin committed Sep 16, 2024
1 parent 17bbbc7 commit a7db737
Showing 10 changed files with 92 additions and 164 deletions.
36 changes: 18 additions & 18 deletions conf/modules.config
Original file line number Diff line number Diff line change
@@ -15,14 +15,6 @@ process {
ext.args = '-F 0x200 -nt'
}

withName: '.*:.*:ALIGN_HIC:BWAMEM2_MEM' {
ext.args = { "-5SPCp -R ${meta.read_group}" }
}

withName: '.*:.*:ALIGN_ILLUMINA:BWAMEM2_MEM' {
ext.args = { "-R ${meta.read_group}" }
}

withName: SAMTOOLS_MERGE {
ext.args = { "-c -p" }
ext.prefix = { "${meta.id}.merge" }
@@ -50,39 +42,47 @@ process {
ext.args = "--output-fmt cram"
}

withName: ".*:.*:ILLUMINA_BWAMEM2:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" {
withName: '.*:.*:ALIGN_HIC:BWAMEM2_MEM' {
ext.args = { "-5SPCp -R ${meta.read_group}" }
}

withName: '.*:.*:ALIGN_ILLUMINA:BWAMEM2_MEM' {
ext.args = { "-p -R ${meta.read_group}" }
}

withName: ".*:ALIGN_ILLUMINA:.*:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "" }
ext.args2 = { "-R '${rglines}'" }
ext.args1 = { "-F 0x200 -nt" }
ext.args2 = { "-p -R '${rglines}'" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:.*:ILLUMINA_MINIMAP2:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
withName: ".*:ALIGN_ILLUMINA:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "" }
ext.args1 = { "-F 0x200 -nt" }
ext.args2 = { "-ax sr" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:.*:HIC_BWAMEM2:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" {
withName: ".*:ALIGN_HIC:.*:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "" }
ext.args1 = { "-F 0x200 -nt" }
ext.args2 = { "-5SPCp -R '${rglines}'" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:.*:HIC_MINIMAP2:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
withName: ".*:ALIGN_HIC:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
ext.args = ""
ext.args1 = { "" }
ext.args2 = { "-ax sr" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:.*:HIC_MINIMAP2:MINIMAP2_INDEX" {
withName: "MINIMAP2_INDEX" {
ext.args = { "${fasta.size() > 2.5e9 ? (" -I " + Math.ceil(fasta.size()/1e9)+"G") : ""} "}
}

@@ -123,7 +123,7 @@ process {

withName: SAMTOOLS_ADDREPLACERG {
ext.prefix = { "${input.baseName}_addRG" }
ext.args = { "-r ${meta.read_group}" }
ext.args = { "-r ${meta.read_group} --no-PG" }
}

withName: SAMTOOLS_STATS {
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
@@ -26,7 +26,7 @@ params {
outdir = "${projectDir}/results"

// Aligner
hic_aligner = "minimap2"
short_aligner = "minimap2"

// Fasta references
fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz"
2 changes: 1 addition & 1 deletion modules/local/generate_cram_csv.nf
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ process GENERATE_CRAM_CSV {
label 'process_single'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' :
'https://quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' :
'sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' }"

input:
2 changes: 1 addition & 1 deletion modules/local/samtools_addreplacerg.nf
Original file line number Diff line number Diff line change
@@ -27,7 +27,7 @@ process SAMTOOLS_ADDREPLACERG {
"""
samtools \\
addreplacerg \\
--threads ${task.cpus-1} \\
--threads $task.cpus \\
$args \\
-o ${prefix}.${file_type} \\
$input
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@ params {
bwamem2_index = null
fasta = null
header = null

// Aligner option
short_aligner = "minimap2" // Can choose minimap2 and bwamem2
chunk_size = 10000
86 changes: 64 additions & 22 deletions subworkflows/local/align_short.nf
Original file line number Diff line number Diff line change
@@ -2,21 +2,26 @@
// Align short read (HiC and Illumina) data against the genome
//

include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
include { BWAMEM2_MEM } from '../../modules/nf-core/bwamem2/mem/main'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORMADUP } from '../../modules/local/samtools_sormadup'

include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORMADUP } from '../../modules/local/samtools_sormadup'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv'
include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup'
include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg'
include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce'
include { BWAMEM2_MAPREDUCE } from '../../subworkflows/local/bwamem2_mapreduce'

workflow ALIGN_SHORT {
take:
fasta // channel: [ val(meta), /path/to/fasta ]
fasta // channel: [ val(meta), /path/to/fasta ] reference_tuple
index // channel: [ val(meta), /path/to/bwamem2/ ]
reads // channel: [ val(meta), /path/to/datafile ]
reads // channel: [ val(meta), /path/to/datafile ] hic_reads_path


main:
ch_versions = Channel.empty()
ch_merged_bam = Channel.empty()

// Check file types and branch
reads
@@ -28,23 +33,60 @@ workflow ALIGN_SHORT {
| set { ch_reads }


// Convert from CRAM to FASTQ only if CRAM files were provided as input
SAMTOOLS_FASTQ ( ch_reads.cram, false )
ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() )


SAMTOOLS_FASTQ.out.fastq
| mix ( ch_reads.fastq )
| set { ch_reads_fastq }

// Convert FASTQ to CRAM only if FASTQ were provided as input
CONVERT_CRAM ( ch_reads.fastq, fasta )
ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions )

SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam )
ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions )

SAMTOOLS_ADDREPLACERG.out.cram
| mix ( ch_reads.cram )
| set { ch_reads_cram }

// Index the CRAM file
SAMTOOLS_INDEX ( ch_reads_cram )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions )

ch_reads_cram
| join ( SAMTOOLS_INDEX.out.crai )
| set { ch_reads_cram_crai }


//
// MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT
//
GENERATE_CRAM_CSV( ch_reads_cram_crai )
ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions )

//
// SUBWORKFLOW: mapping hic reads using minimap2 or bwamem2
//
if (params.short_aligner.startsWith("minimap")) {
MINIMAP2_MAPREDUCE (
fasta,
GENERATE_CRAM_CSV.out.csv
)
ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions )
ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam)
} else {
BWAMEM2_MAPREDUCE (
fasta,
GENERATE_CRAM_CSV.out.csv,
index
)
ch_versions = ch_versions.mix( BWAMEM2_MAPREDUCE.out.versions )
ch_merged_bam = ch_merged_bam.mix(BWAMEM2_MAPREDUCE.out.mergedbam)
}

// Align Fastq to Genome and output sorted BAM
BWAMEM2_MEM ( ch_reads_fastq, index, fasta, true )
ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )
ch_merged_bam
| combine( ch_reads_cram_crai )
| map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] }
| set { ch_merged_bam }


// Collect all BWAMEM2 output by sample name
BWAMEM2_MEM.out.bam
// Collect all BAM output by sample name
ch_merged_bam
| map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] }
| groupTuple( by: [0] )
| map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] }
@@ -58,7 +100,7 @@ workflow ALIGN_SHORT {

// Merge, but only if there is more than 1 file
SAMTOOLS_MERGE ( ch_bams.multi_bams, [ [], [] ], [ [], [] ] )
ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() )
ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions )


SAMTOOLS_MERGE.out.bam
116 changes: 0 additions & 116 deletions subworkflows/local/align_short_hic.nf

This file was deleted.

3 changes: 2 additions & 1 deletion subworkflows/local/bwamem2_mapreduce.nf
Original file line number Diff line number Diff line change
@@ -36,8 +36,9 @@ workflow BWAMEM2_MAPREDUCE {
}
.set { ch_filtering_input }


//
// MODULE: map hic reads by 10,000 container per time using bwamem2
// MODULE: map hic reads in each chunk using bwamem2
//
CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT (
ch_filtering_input,
6 changes: 3 additions & 3 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ workflow PREPARE_GENOME {

// Unmask genome fasta
UNMASK ( ch_fasta )
ch_versions = ch_versions.mix ( UNMASK.out.versions.first() )
ch_versions = ch_versions.mix ( UNMASK.out.versions )

// Generate BWA index
if ( checkShortReads( params.input ) ) {
@@ -42,14 +42,14 @@ workflow PREPARE_GENOME {

if ( params.bwamem2_index.endsWith('.tar.gz') ) {
ch_bwamem2_index = UNTAR ( ch_bwamem ).untar
ch_versions = ch_versions.mix ( UNTAR.out.versions.first() )
ch_versions = ch_versions.mix ( UNTAR.out.versions )
} else {
ch_bwamem2_index = ch_bwamem
}

} else {
ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index
ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions.first() )
ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions )
}
} else {
ch_bwamem2_index = Channel.empty()
2 changes: 1 addition & 1 deletion workflows/readmapping.nf
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ include { SAMTOOLS_REHEADER } from '../modules/local/samtools_replaceh

include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
include { ALIGN_SHORT_HIC as ALIGN_HIC } from '../subworkflows/local/align_short_hic'
include { ALIGN_SHORT as ALIGN_HIC } from '../subworkflows/local/align_short'
include { ALIGN_SHORT as ALIGN_ILLUMINA } from '../subworkflows/local/align_short'
include { ALIGN_PACBIO as ALIGN_HIFI } from '../subworkflows/local/align_pacbio'
include { ALIGN_PACBIO as ALIGN_CLR } from '../subworkflows/local/align_pacbio'

0 comments on commit a7db737

Please sign in to comment.