Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika committed Jul 15, 2024
2 parents 2ffc33b + 1a2f220 commit dcb3273
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 55 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/run_pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if [ -z "${GITHUB_ACTIONS}" ]; then
else
echo "In GitHub Actions environment. Modifying nextflow.config and FluViewer.nf."
sed -i 's/cpus = 8/cpus = 4/g' nextflow.config
sed -i '/memory/d' modules/fluviewer.nf
sed -i "s/memory = '50 GB'/memory = '8 GB'/g" nextflow.config
fi

nextflow -log artifacts/nextflow.log \
Expand Down
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ Short read Illumina sequences, files ending in '.fastq.gz', '.fq.gz', '.fastq',
For a full list of optional arguments, see: https://github.com/BCCDC-PHL/FluViewer

| Argument | Description | Default Value |
|-----------------------|--------------------------------------------------------------------------------------------------|----------------:|
| `--min_depth` | Minimum read depth for base calling [default: 20] | 20 |
| `--min_q` | Minimum PHRED score for base quality and mapping quality [default: 20] | 20 |
| `--min_cov` | Minimum coverage of database reference sequence by contig, percentage [default: 25] | 25 |
|----------------------------|--------------------------------------------------------------------------------------------------|----------------:|
| `--target_depth` | Depth to normalize coverage to, where sufficient depth is available in inputs. | 200 |
| `--min_depth` | Minimum read depth for base calling. | 20 |
| `--min_q` | Minimum PHRED score for base quality and mapping quality. | 20 |
| `--min_cov` | Minimum coverage of database reference sequence by contig, percentage. | 25 |
| `--min_ident` | Minimum nucleotide sequence identity between database reference sequence and contig (percentage) | 95 |

**Example command:**
Expand Down
2 changes: 1 addition & 1 deletion environments/fluviewer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ dependencies:
- pip=24.0
- setuptools=69
- pip:
- git+https://github.com/BCCDC-PHL/[email protected]4
- git+https://github.com/BCCDC-PHL/[email protected]6
25 changes: 14 additions & 11 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ include { pipeline_provenance } from './modules/provenance.nf'
include { collect_provenance } from './modules/provenance.nf'
include { fastp } from './modules/fastp.nf'
include { cutadapt} from './modules/cutadapt.nf'
include { normalize_depth } from './modules/fluviewer.nf'
include { fluviewer } from './modules/fluviewer.nf'
include { multiqc } from './modules/multiqc.nf'
include { fastqc } from './modules/fastqc.nf'
Expand Down Expand Up @@ -66,7 +67,6 @@ workflow {

ch_reference_db = Channel.of([file(params.blastx_subtype_db).parent, file(params.blastx_subtype_db).name]).first()


main:

if (params.fastq_input == 'NO_FILE' && params.samplesheet_input == 'NO_FILE') {
Expand All @@ -88,15 +88,17 @@ workflow {
cutadapt(fastp.out.trimmed_reads.combine(ch_primers))
fastqc(cutadapt.out.primer_trimmed_reads)

normalize_depth(cutadapt.out.primer_trimmed_reads)

// Run FluViewer
fluviewer(cutadapt.out.primer_trimmed_reads.combine(ch_db))
fluviewer(normalize_depth.out.normalized_reads.combine(ch_db))

//Collect al the relevant filesfor multiqc
ch_fastqc_collected = fastqc.out.zip.map{ it -> [it[1], it[2]]}.collect()
multiqc(fastp.out.json.mix( cutadapt.out.log, ch_fastqc_collected ).collect().ifEmpty([]) )

//Call clades for H1 and H3 samples
clade_calling(fluviewer.out.consensus_seqs)
clade_calling(fluviewer.out.ha_consensus_seq)

snp_calling(fluviewer.out.consensus_main, ch_reference_db)

Expand All @@ -112,14 +114,15 @@ workflow {
// The basic idea is to build up a channel with the following structure:
// [sample_id, [provenance_file_1.yml, provenance_file_2.yml, provenance_file_3.yml...]]
// ...and then concatenate them all together in the 'collect_provenance' process.
ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] }
ch_provenance = ch_provenance.join(hash_files.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(fastp.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(cutadapt.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(fluviewer.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(clade_calling.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(snp_calling.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(genoflu.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] }
ch_provenance = ch_provenance.join(hash_files.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(fastp.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(cutadapt.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(normalize_depth.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(fluviewer.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(clade_calling.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(snp_calling.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(genoflu.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
collect_provenance(ch_provenance)

}
59 changes: 31 additions & 28 deletions modules/clade_calling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ process clade_calling {
publishDir "${params.outdir}/${sample_id}/clade-calls", pattern: "${sample_id}*translation.fasta.gz", mode:'copy'

input:
tuple val(sample_id), path(consensus_seqs)
tuple val(sample_id), path(ha_consensus_seq)

output:
tuple val(sample_id), path("*nextclade*"), emit: nextclade, optional: true
tuple val(sample_id), path("${sample_id}_clade_calling_provenance.yml"), emit: provenance, optional: true
tuple val(sample_id), path("*nextclade*"), emit: nextclade
tuple val(sample_id), path("${sample_id}_clade_calling_provenance.yml"), emit: provenance

script:
"""
Expand All @@ -24,40 +24,43 @@ process clade_calling {
printf -- " tool_version: \$(nextclade --version 2>&1 | cut -d ' ' -f 2)\\n" >> ${sample_id}_clade_calling_provenance.yml
printf -- " subcommand: run\\n" >> ${sample_id}_clade_calling_provenance.yml
[ ! -f ${sample_id}_HA_consensus.fa ] && ln -sf *HA_consensus.fa ${sample_id}_HA_consensus.fa
FOUND=true
if [ `grep "H1" ${sample_id}_HA_consensus.fa` ]; then
dataset=${params.h1_dataset}
elif [ `grep "H3" ${sample_id}_HA_consensus.fa` ]; then
dataset=${params.h3_dataset}
elif [ `grep "H5" ${sample_id}_HA_consensus.fa` ]; then
dataset=${params.h5_dataset}
if [ `grep "H1" ${ha_consensus_seq}` ]; then
if [ ${params.h1_dataset} == "NO_FILE" ]; then
dataset="flu_h1n1pdm_ha"
reference="CY121680"
nextclade dataset get --name \$dataset --reference \$reference --output-dir \$dataset
else
dataset=${params.h1_dataset}
fi
elif [ `grep "H3" ${ha_consensus_seq}` ]; then
if [ ${params.h3_dataset} == "NO_FILE" ]; then
dataset="flu_h3n2_ha"
reference="CY163680"
nextclade dataset get --name \$dataset --reference \$reference --output-dir \$dataset
else
dataset=${params.h3_dataset}
fi
elif [ `grep "H5" ${ha_consensus_seq}` ]; then
if [ ${params.h5_dataset} == "NO_FILE" ]; then
echo "WARNING: H5 subtype detected in the HA consensus file, but no H5 dataset provided. Please provide an H5 nextclade dataset with the --h5_dataset flag."
exit 10
else
dataset=${params.h5_dataset}
fi
else
echo "WARNING: None of H1, H3, or H5 were detected in the HA consensus file. No dataset available. Exiting."
FOUND=false
dataset="NONE"
fi
if [ \$FOUND == true ]; then
exit 10
fi
nextclade run --input-dataset \$dataset \
nextclade run \
--input-dataset \$dataset \
--output-fasta=${sample_id}_nextclade.aligned.fasta.gz \
--output-json=${sample_id}_nextclade.json \
--output-ndjson=${sample_id}_nextclade.ndjson \
--output-csv=${sample_id}_nextclade.csv \
--output-tsv=${sample_id}_nextclade.tsv \
--output-tree=${sample_id}_nextclade_auspice.json \
--output-translations=${sample_id}_nextclade_{gene}.translation.fasta.gz \
${sample_id}_HA_consensus.fa
LOCATION="\${dataset}\\n"
VERSION="\$(grep "tag" \${dataset}/tag.json)\\n"
else
LOCATION="NONE_INVALID_HA_TYPE\\n"
VERSION="NONE_INVALID_HA_TYPE\\n"
fi
${ha_consensus_seq}
"""
}
44 changes: 37 additions & 7 deletions modules/fluviewer.nf
Original file line number Diff line number Diff line change
@@ -1,18 +1,46 @@
process normalize_depth {
tag { sample_id }

input:
tuple val(sample_id), path(reads_1), path(reads_2)

output:
tuple val(sample_id), path("${sample_id}-normalized_R1.fastq.gz"), path("${sample_id}-normalized_R2.fastq.gz"), emit: normalized_reads
tuple val(sample_id), path("${sample_id}_normalize_depth_provenance.yml"), emit: provenance

script:
max_memory_gb = task.memory.toString().split(" ")[0]
"""
printf -- "- process_name: normalize_depth\\n" >> ${sample_id}_normalize_depth_provenance.yml
printf -- " tools:\\n" >> ${sample_id}_normalize_depth_provenance.yml
printf -- " - tool_name: bbnorm\\n" >> ${sample_id}_normalize_depth_provenance.yml
printf -- " tool_version: \$(bbnorm.sh --version)\\n" >> ${sample_id}_normalize_depth_provenance.yml
bbnorm.sh \
-Xmx${max_memory_gb}g \
in1=${reads_1} \
in2=${reads_2} \
out1=${sample_id}-normalized_R1.fastq \
out2=${sample_id}-normalized_R2.fastq \
target=${params.target_depth} \
gzip ${sample_id}-normalized_R1.fastq
gzip ${sample_id}-normalized_R2.fastq
"""

}

process fluviewer {

tag { sample_id }

memory { 50.GB * task.attempt }
errorStrategy { (task.exitStatus == 2 && task.attempt <= maxRetries) ? 'retry' : 'ignore' }
maxRetries 5
errorStrategy 'ignore'

publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}*", mode:'copy', saveAs: { filename -> filename.split("/").last() }
publishDir "${params.outdir}/${sample_id}", pattern: "*tsv", mode:'copy', saveAs: { filename -> filename.split("/").last() }
//publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_fluviewer/spades_output", mode:'copy', saveAs: { filename -> "spades_output" }
publishDir "${params.outdir}/${sample_id}", pattern: ".*", mode:'copy'
publishDir "${params.outdir}/${sample_id}", pattern: "logs", mode:'copy', saveAs: { filename -> "fluviewer_logs" }



input:
tuple val(sample_id), path(reads_1), path(reads_2), path(db)

Expand All @@ -21,6 +49,7 @@ process fluviewer {
tuple val(sample_id), path("${sample_id}*.bam.bai"), emit: alignmentindex, optional: true
tuple val(sample_id), path("${sample_id}*report.tsv"), emit: reports, optional: true
tuple val(sample_id), path("${sample_id}*_consensus.fa"), emit: consensus_seqs, optional: true
tuple val(sample_id), path("${sample_id}_HA_consensus.fa"), emit: ha_consensus_seq, optional: true
tuple val(sample_id), path("${sample_id}*consensus_seqs.fa"), emit: consensus_main
tuple val(sample_id), path("${sample_id}*_HPAI.tsv"), emit: HPAI, optional: true
tuple val(sample_id), path("${sample_id}*_cov.png"), emit: coverage_plot, optional: true
Expand Down Expand Up @@ -55,6 +84,7 @@ process fluviewer {
--min-identity ${params.min_ident} \
--max-memory 40 \
--disable-garbage-collection \
--skip-depth-normalization \
--force && EXITCODE=\$?) \
|| EXITCODE=\$?
Expand Down
12 changes: 9 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ params {
pipeline_short_name = parsePipelineName(manifest.toMap().get('name'))
pipeline_minor_version = parseMinorVersion(manifest.toMap().get('version'))
run_name = parseRunName( fastq_input )
target_depth = 200
min_depth = 10
min_q = 30
min_cov = 25
min_ident = 95
h1_dataset = ''
h3_dataset = ''
h5_dataset = ''
h1_dataset = 'NO_FILE'
h3_dataset = 'NO_FILE'
h5_dataset = 'NO_FILE'
db = 'NO_FILE'
blastx_subtype_db = "${projectDir}/assets/blastx/blastx_subtype_db.fasta"
genoflu_cache = "${projectDir}/assets/genoflu"
Expand Down Expand Up @@ -78,6 +79,11 @@ profiles {
}

process {
withName: normalize_depth {
conda = "$baseDir/environments/fluviewer.yml"
memory = '50 GB'
}

withName: fluviewer {
conda = "$baseDir/environments/fluviewer.yml"
cpus = 8
Expand Down

0 comments on commit dcb3273

Please sign in to comment.