move the branching for FASTQ files inside of align_short.nf

sanger-tol · Jul 9, 2024 · 4804307 · 4804307
1 parent dce6de7
commit 4804307
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 82 deletions.
diff --git a/docs/usage.md b/docs/usage.md
@@ -42,7 +42,7 @@ sample1_T5,pacbio,pacbio2.bam,pacbio2
 | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `sample`   | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_).                                                 |
 | `datatype` | Type of sequencing data. Must be one of `hic`, `Illumina`, `pacbio`, or `ont`.                                                                                                                                                        |
-| `datafile` | Full path to read data file. Must be `bam` or `cram` for `hic`. Must be `cram` or `fastq.gz` or `fq.gz` for `Illumina`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`.                                          |
+| `datafile` | Full path to read data file. Must be `bam` or `cram` or `fastq.gz` or `fq.gz` for `Illumina` and `HiC`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`.                                                          |
 | `library`  | (Optional) The library value is a unique identifier which is assigned to read group (`@RG`) ID. If the library name is not specified, the pipeline will auto-create library name using the data filename provided in the samplesheet. |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.

diff --git a/subworkflows/local/align_illumina_fastq.nf b/subworkflows/local/align_illumina_fastq.nf
diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf
@@ -18,15 +18,31 @@ workflow ALIGN_SHORT {
     main:
     ch_versions = Channel.empty()
 
+    // Check file types and branch
+    reads
+    | branch {
+        meta, reads ->
+            fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ }
+            cram : true
+    }
+    | set { ch_reads }
+
 
-    // Convert from CRAM to FASTQ
-    SAMTOOLS_FASTQ ( reads, false )
-    ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() )
+    if ( ch_reads.cram ) {
+        // Convert from CRAM to FASTQ
+        SAMTOOLS_FASTQ ( ch_reads.cram, false )
+        ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() )
 
 
-    // Align Fastq to Genome and output sorted BAM
-    BWAMEM2_MEM ( SAMTOOLS_FASTQ.out.fastq, index, true )
-    ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )
+        // Align Fastq to Genome and output sorted BAM
+        BWAMEM2_MEM ( SAMTOOLS_FASTQ.out.fastq, index, true )
+        ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )
+
+
+    } else {
+        BWAMEM2_MEM ( ch_reads.fastq, index, true )
+        ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )
+    }
 
 
     // Collect all BWAMEM2 output by sample name

diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf
@@ -33,7 +33,6 @@ include { ALIGN_PACBIO as ALIGN_HIFI    } from '../subworkflows/local/align_pacb
 include { ALIGN_PACBIO as ALIGN_CLR     } from '../subworkflows/local/align_pacbio'
 include { ALIGN_ONT                     } from '../subworkflows/local/align_ont'
 include { CONVERT_STATS                 } from '../subworkflows/local/convert_stats'
-include { ALIGN_ILLUMINA_FASTQ          } from '../subworkflows/local/align_illumina_fastq'
 
 
 /*
@@ -77,13 +76,6 @@ workflow READMAPPING {
 
     ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions )
 
-    ch_reads.illumina
-    | branch {
-        meta, reads ->
-            fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ }
-            cram : reads.findAll { it.getName().toLowerCase() =~ /.*cram/ }
-    }
-    | set { ch_illumina }
 
     //
     // SUBWORKFLOW: Uncompress and prepare reference genome files
@@ -121,12 +113,9 @@ workflow READMAPPING {
     ALIGN_HIC ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.hic )
     ch_versions = ch_versions.mix ( ALIGN_HIC.out.versions )
 
-    ALIGN_ILLUMINA ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_illumina.cram )
+    ALIGN_ILLUMINA ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.illumina )
     ch_versions = ch_versions.mix ( ALIGN_ILLUMINA.out.versions )
 
-    ALIGN_ILLUMINA_FASTQ ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_illumina.fastq )
-    ch_versions = ch_versions.mix ( ALIGN_ILLUMINA_FASTQ.out.versions )
-
     ALIGN_HIFI ( PREPARE_GENOME.out.fasta, ch_reads.pacbio, ch_vector_db )
     ch_versions = ch_versions.mix ( ALIGN_HIFI.out.versions )