Skip to content

Commit

Permalink
move the branching for FASTQ files inside of align_short.nf
Browse files Browse the repository at this point in the history
  • Loading branch information
reichan1998 committed Jul 9, 2024
1 parent dce6de7 commit 4804307
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 82 deletions.
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ sample1_T5,pacbio,pacbio2.bam,pacbio2
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). |
| `datatype` | Type of sequencing data. Must be one of `hic`, `Illumina`, `pacbio`, or `ont`. |
| `datafile` | Full path to read data file. Must be `bam` or `cram` for `hic`. Must be `cram` or `fastq.gz` or `fq.gz` for `Illumina`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`. |
| `datafile` | Full path to read data file. Must be `bam` or `cram` or `fastq.gz` or `fq.gz` for `Illumina` and `HiC`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`. |
| `library` | (Optional) The library value is a unique identifier which is assigned to read group (`@RG`) ID. If the library name is not specified, the pipeline will auto-create library name using the data filename provided in the samplesheet. |

An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
Expand Down
63 changes: 0 additions & 63 deletions subworkflows/local/align_illumina_fastq.nf

This file was deleted.

28 changes: 22 additions & 6 deletions subworkflows/local/align_short.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,31 @@ workflow ALIGN_SHORT {
main:
ch_versions = Channel.empty()

// Check file types and branch
reads
| branch {
meta, reads ->
fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ }
cram : true
}
| set { ch_reads }


// Convert from CRAM to FASTQ
SAMTOOLS_FASTQ ( reads, false )
ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() )
if ( ch_reads.cram ) {
// Convert from CRAM to FASTQ
SAMTOOLS_FASTQ ( ch_reads.cram, false )
ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() )


// Align Fastq to Genome and output sorted BAM
BWAMEM2_MEM ( SAMTOOLS_FASTQ.out.fastq, index, true )
ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )
// Align Fastq to Genome and output sorted BAM
BWAMEM2_MEM ( SAMTOOLS_FASTQ.out.fastq, index, true )
ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )


} else {
BWAMEM2_MEM ( ch_reads.fastq, index, true )
ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() )
}


// Collect all BWAMEM2 output by sample name
Expand Down
13 changes: 1 addition & 12 deletions workflows/readmapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ include { ALIGN_PACBIO as ALIGN_HIFI } from '../subworkflows/local/align_pacb
include { ALIGN_PACBIO as ALIGN_CLR } from '../subworkflows/local/align_pacbio'
include { ALIGN_ONT } from '../subworkflows/local/align_ont'
include { CONVERT_STATS } from '../subworkflows/local/convert_stats'
include { ALIGN_ILLUMINA_FASTQ } from '../subworkflows/local/align_illumina_fastq'


/*
Expand Down Expand Up @@ -77,13 +76,6 @@ workflow READMAPPING {

ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions )

ch_reads.illumina
| branch {
meta, reads ->
fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ }
cram : reads.findAll { it.getName().toLowerCase() =~ /.*cram/ }
}
| set { ch_illumina }

//
// SUBWORKFLOW: Uncompress and prepare reference genome files
Expand Down Expand Up @@ -121,12 +113,9 @@ workflow READMAPPING {
ALIGN_HIC ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.hic )
ch_versions = ch_versions.mix ( ALIGN_HIC.out.versions )

ALIGN_ILLUMINA ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_illumina.cram )
ALIGN_ILLUMINA ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.illumina )
ch_versions = ch_versions.mix ( ALIGN_ILLUMINA.out.versions )

ALIGN_ILLUMINA_FASTQ ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_illumina.fastq )
ch_versions = ch_versions.mix ( ALIGN_ILLUMINA_FASTQ.out.versions )

ALIGN_HIFI ( PREPARE_GENOME.out.fasta, ch_reads.pacbio, ch_vector_db )
ch_versions = ch_versions.mix ( ALIGN_HIFI.out.versions )

Expand Down

0 comments on commit 4804307

Please sign in to comment.