From f478d472163e3daf65f0970c4a7739143ba62050 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Fri, 18 Dec 2020 13:46:56 -0800 Subject: [PATCH] Add Reference Alignment + UTR Trimming, Dehosting and iVar update (#16) * Index reference before samtools and ivar access the reference in callVariants step * Try indexing the reference before callVariants * Qc update (#56) * Update qc.py to reflect changed standards * Filter out Illumina Undetermined_S0 * Make sure to negate the matcher * Add minReadsPerBarcode parameter for Nanopore workflow (#57) The barcode directories of de-plexed Fastq input are filtered to exclude any containing fewer than 5 files (guppy makes all of the barcode directories, whether you used the barcodes or not). This patch changes the filter to scan the files and count the records within to 1) avoid excluding barcodes where many reads are in fewer than 5 files and 2) avoid including barcodes where there are very few reads in more than 5 files. This threshold can be changed with the new optional parameter minReadsPerBarcode, which defaults to 100 reads. Co-authored-by: Keith James * Remove unnecessary samtools view -bS (#48) Co-authored-by: ac55-sanger * Added libxcb, default Qt library backend for graphics rendering, to dependencies (#60) * use ivar default of 30 for illuminaKeepLen, and comment (#61) Co-authored-by: David K. Jackson * add basic host filter step * remove hardcoded paths * move host filter to within workflow * synchronize host removal with SIGNAL * limit host removal cpus * Move performHostFilter to illuminaNcov workflow * Add pysam dependency for filter_non_human_reads.py * update ivar to 1.2.3 * increase minimum read length after trimming * add step for filtering residual adapters missed by trimming * fix samtools/pysam version in environment * lower required match length for adapter check * restrict short adapter matches to the suffix of the read * update to ivar 1.3, add option for amplicon filtering * remove CLIMB-specific modules/calls * Align to ref (#15) * Add alignment step using MAFFT and UTR-trimming step * Remove extra = char * Tidy up illumina environment * Remove local executor definition * Remove addition of whitespace * Remove deletion of whitespace * Remove addition of whitespace * Publish dehosted fastqs * Fix process name for alignment step in illumina workflow * Set default viral_contig_name * Add sampleId tag to performHostFilter process * Enable testing against development branch * Remove testing scripts not in use * index bam files and publish indexes (#23) * index bam files and publish indexes * Move hostFilter to sequenceAnalysis, publish dehosted reads and consensus to nml_upload (#25) Co-authored-by: Michael Laszloffy Co-authored-by: Michael Laszloffy Co-authored-by: Matt Bull Co-authored-by: Keith James <47220353+kjsanger@users.noreply.github.com> Co-authored-by: Keith James Co-authored-by: Ashwini Chhipa <58439277+ac55-sanger@users.noreply.github.com> Co-authored-by: ac55-sanger Co-authored-by: nodrogluap Co-authored-by: dkj Co-authored-by: David K. Jackson Co-authored-by: Jared Simpson Co-authored-by: Lawrence Heisler --- .github/workflows/pull_request.yml | 1 + bin/filter_non_human_reads.py | 67 ++++++++++++++++ bin/filter_residual_adapters.py | 85 ++++++++++++++++++++ conf/base.config | 21 ++--- conf/illumina.config | 2 +- environments/extras.yml | 2 +- environments/illumina/environment.yml | 6 +- modules/illumina.nf | 108 +++++++++++++++++++++++--- modules/qc.nf | 2 +- modules/upload.nf | 24 +----- modules/utils.nf | 20 +++++ workflows/articNcovNanopore.nf | 14 ---- workflows/illuminaNcov.nf | 37 ++++----- workflows/upload.nf | 14 ---- 14 files changed, 307 insertions(+), 96 deletions(-) create mode 100755 bin/filter_non_human_reads.py create mode 100755 bin/filter_residual_adapters.py create mode 100644 modules/utils.nf delete mode 100644 workflows/upload.nf diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 09d2f1e4..d76b441d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -2,6 +2,7 @@ on: pull_request: branches: - master + - development name: master Pull Request jobs: test: diff --git a/bin/filter_non_human_reads.py b/bin/filter_non_human_reads.py new file mode 100755 index 00000000..7eb1c16f --- /dev/null +++ b/bin/filter_non_human_reads.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +import pysam +import sys +import argparse + +def filter_reads(contig_name, input_sam_fp, output_bam_fp): + + # use streams if args are None + if input_sam_fp: + input_sam = pysam.AlignmentFile(input_sam_fp, 'r') + else: + input_sam = pysam.AlignmentFile('-', 'r') + + if output_bam_fp: + output_bam = pysam.AlignmentFile(output_bam_fp, 'wb', + template=input_sam) + else: + output_bam = pysam.AlignmentFile('-', 'wb', template=input_sam) + + # if read isn't mapped or mapped to viral reference contig name + viral_reads = 0 + human_reads = 0 + unmapped_reads = 0 + + # iterate over input from BWA + for read in input_sam: + # only look at primary alignments + if not read.is_supplementary and not read.is_secondary: + if read.reference_name == contig_name: + output_bam.write(read) + viral_reads += 1 + elif read.is_unmapped: + output_bam.write(read) + unmapped_reads +=1 + else: + human_reads += 1 + + total_reads = viral_reads + human_reads + unmapped_reads + + print(f"viral read count = {viral_reads} " + f"({viral_reads/total_reads * 100:.2f}%)", file=sys.stderr) + print(f"human read count = {human_reads} " + f"({human_reads/total_reads * 100:.2f}%) ", file=sys.stderr) + print(f"unmapped read count = {unmapped_reads} " + f"({unmapped_reads/total_reads * 100:.2f}%)", file=sys.stderr) + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Get all reads that are " + "unmapped and map to a " + "specific reference " + "contig") + parser.add_argument('-i', '--input', required=False, default=False, + help="Input SAM formatted file (stdin used if " + " not specified)") + + parser.add_argument('-o', '--output', required=False, default=False, + help="Output BAM formatted file (stdout used if not " + "specified)") + + parser.add_argument('-c', '--contig_name', required=False, + default="MN908947.3", + help="Contig name to retain e.g. viral") + + args = parser.parse_args() + + filter_reads(args.contig_name, args.input, args.output) diff --git a/bin/filter_residual_adapters.py b/bin/filter_residual_adapters.py new file mode 100755 index 00000000..9d124292 --- /dev/null +++ b/bin/filter_residual_adapters.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +import gzip +import pysam +import sys +import argparse + +# require that filename contains the specified extension +def require_extension(filename, ext): + assert(args.input_R1[-len(ext):] == ext) + +def contains_adapter(read_sequence, adapter_sequence, min_match_length): + # discard all reads that contain the full adapter + if adapter_sequence in read_sequence: + return True + + # discard all reads where the suffix of length min_match_length is contained within the adapter + # this handles the case where there is a fairly good adapter match that is truncated + # by the end of the read + rl = len(read_sequence) + if rl >= min_match_length: + read_suffix = read_sequence[(rl - min_match_length):] + if read_suffix in adapter_sequence: + return True + return False + +def filter_reads(filter_sequences, min_match_length, input_R1_fp, input_R2_fp, output_R1_fp, output_R2_fp): + + input_R1 = pysam.FastxFile(input_R1_fp) + input_R2 = pysam.FastxFile(input_R2_fp) + + output_R1 = gzip.open(output_R1_fp, 'w') + output_R2 = gzip.open(output_R2_fp, 'w') + + reads_filtered = 0 + reads_kept = 0 + for R1, R2 in zip(input_R1, input_R2): + + discard = False + for f in filter_sequences: + if contains_adapter(R1.sequence, f, min_match_length) or contains_adapter(R2.sequence, f, min_match_length): + discard = True + break + + if discard: + reads_filtered += 1 + continue + else: + reads_kept += 1 + + fq1 = str(R1) + "\n" + fq2 = str(R2) + "\n" + output_R1.write(fq1.encode('utf-8')) + output_R2.write(fq2.encode('utf-8')) + + print(f"reads kept: {reads_kept}, reads filtered: {reads_filtered}") + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Discard read pairs that contain sequencing adapters that are missed by a trimmer") + parser.add_argument('--input_R1', required=True, default=False, + help="Input fasta/fastq file for first half of pair") + + parser.add_argument('--input_R2', required=True, default=False, + help="Input fasta/fastq file for second half of pair") + + args = parser.parse_args() + + # this is designed to only work in the nextflow pipeline so we put strict requirements on the input/output names + in_ext = ".fq.gz" + require_extension(args.input_R1, in_ext) + require_extension(args.input_R2, in_ext) + + out_ext = "_posttrim_filter.fq.gz" + output_R1 = args.input_R1.replace(in_ext, out_ext) + output_R2 = args.input_R2.replace(in_ext, out_ext) + + # Illumina adapters that are occasionally leftover in reads + S7 = "CCGAGCCCACGAGAC" + P7 = "ATCTCGTATGCCGTCTTCTGCTTG" + + # require a minimum match between read and adapter + min_length = 10 + filter_sequences = [ S7, P7 ] + + filter_reads(filter_sequences, min_length, args.input_R1, args.input_R2, output_R1, output_R2) diff --git a/conf/base.config b/conf/base.config index 4ae74c3e..e6fdfda0 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,6 +10,10 @@ params{ ref = false bed = false profile = false + + // host filtering + composite_ref = false + viral_contig_name = 'MN908947.3' // Repo to download your primer scheme from schemeRepoURL = 'https://github.com/artic-network/artic-ncov2019.git' @@ -21,7 +25,10 @@ params{ scheme = 'nCoV-2019' // Scheme version - schemeVersion = 'V2' + schemeVersion = 'V3' + + // For ivar's amplicon filter + primer_pairs_tsv = false // Run experimental medaka pipeline? Specify in the command using "--medaka" medaka = false @@ -31,18 +38,6 @@ params{ // Run nanopolish pipeline nanopolish = false - - // Upload data to CLIMB? - upload = false - - // CLIMB username - CLIMBUser = false - - // CLIMB SSH pubkey - CLIMBkey = false - - // CLIMB hostname - CLIMBHostname = false } diff --git a/conf/illumina.config b/conf/illumina.config index 4b39d48a..e5458111 100644 --- a/conf/illumina.config +++ b/conf/illumina.config @@ -28,7 +28,7 @@ params { allowNoprimer = true // Length of illumina reads to keep after primer trimming - illuminaKeepLen = 20 + illuminaKeepLen = 50 // Sliding window quality threshold for keeping reads after primer trimming (illumina) illuminaQualThreshold = 20 diff --git a/environments/extras.yml b/environments/extras.yml index ba81b210..ced40cad 100644 --- a/environments/extras.yml +++ b/environments/extras.yml @@ -6,5 +6,5 @@ dependencies: - biopython - matplotlib - pandas - - samtools=1.10-0 + - samtools - libxcb diff --git a/environments/illumina/environment.yml b/environments/illumina/environment.yml index 45eeae2c..311a53cf 100644 --- a/environments/illumina/environment.yml +++ b/environments/illumina/environment.yml @@ -8,6 +8,8 @@ dependencies: - biopython=1.74 - pandas=0.23.0=py36_1 - bwa=0.7.17=pl5.22.0_2 - - samtools=1.9 + - samtools=1.10 - trim-galore=0.6.5 - - ivar=1.2.2 + - ivar=1.3 + - pysam=0.16.0.1 + - mafft=7.471 diff --git a/modules/illumina.nf b/modules/illumina.nf index be08c1d3..131bef2f 100644 --- a/modules/illumina.nf +++ b/modules/illumina.nf @@ -27,6 +27,35 @@ process readTrimming { """ } +process filterResidualAdapters { + /** + * Discard reads that contain residual adapter sequences that indicate trimming may have failed + * @input tuple(sampleName, path(forward), path(reverse)) + * @output untrim_filter_out tuple(sampleName, path("*_val_1.fq.gz"), path("*_val_2.fq.gz")) + */ + + tag { sampleName } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: '*{1,2}_posttrim_filter.fq.gz', mode: 'copy' + + cpus 2 + + input: + tuple(sampleName, path(forward), path(reverse)) + + output: + tuple(sampleName, path("*1_posttrim_filter.fq.gz"), path("*2_posttrim_filter.fq.gz")) optional true + + script: + """ + if [[ \$(gunzip -c ${forward} | head -n4 | wc -l) -eq 0 ]]; then + exit 0 + else + filter_residual_adapters.py --input_R1 $forward --input_R2 $reverse + fi + """ +} + process indexReference { /** * Indexes reference fasta file in the scheme repo using bwa. @@ -59,18 +88,19 @@ process readMapping { label 'largecpu' - publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.sorted.bam", mode: 'copy' + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.sorted{.bam,.bam.bai}", mode: 'copy' input: tuple sampleName, path(forward), path(reverse), path(ref), path("*") output: - tuple(sampleName, path("${sampleName}.sorted.bam")) + tuple(sampleName, path("${sampleName}.sorted.bam"), path("${sampleName}.sorted.bam.bai")) script: """ bwa mem -t ${task.cpus} ${ref} ${forward} ${reverse} | \ samtools sort -o ${sampleName}.sorted.bam + samtools index ${sampleName}.sorted.bam """ } @@ -78,15 +108,15 @@ process trimPrimerSequences { tag { sampleName } - publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.mapped.bam", mode: 'copy' - publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.mapped.primertrimmed.sorted.bam", mode: 'copy' + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.mapped{.bam,.bam.bai}", mode: 'copy' + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.mapped.primertrimmed.sorted{.bam,.bam.bai}", mode: 'copy' input: - tuple sampleName, path(bam), path(bedfile) + tuple sampleName, path(bam), path(bam_index), path(bedfile) output: - tuple sampleName, path("${sampleName}.mapped.bam"), emit: mapped - tuple sampleName, path("${sampleName}.mapped.primertrimmed.sorted.bam" ), emit: ptrim + tuple sampleName, path("${sampleName}.mapped.bam"), path("${sampleName}.mapped.bam.bai"), emit: mapped + tuple sampleName, path("${sampleName}.mapped.primertrimmed.sorted.bam"), path("${sampleName}.mapped.primertrimmed.sorted.bam.bai" ), emit: ptrim script: if (params.allowNoprimer){ @@ -97,8 +127,9 @@ process trimPrimerSequences { """ samtools view -F4 -o ${sampleName}.mapped.bam ${bam} samtools index ${sampleName}.mapped.bam - ${ivarCmd} -i ${sampleName}.mapped.bam -b ${bedfile} -m ${params.illuminaKeepLen} -q ${params.illuminaQualThreshold} -p ivar.out + ${ivarCmd} -i ${sampleName}.mapped.bam -b ${bedfile} -m ${params.illuminaKeepLen} -q ${params.illuminaQualThreshold} -f ${params.primer_pairs_tsv} -p ivar.out samtools sort -o ${sampleName}.mapped.primertrimmed.sorted.bam ivar.out.bam + samtools index ${sampleName}.mapped.primertrimmed.sorted.bam """ } @@ -109,13 +140,14 @@ process callVariants { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.variants.tsv", mode: 'copy' input: - tuple(sampleName, path(bam), path(ref)) + tuple(sampleName, path(bam), path(bam_index), path(ref)) output: tuple sampleName, path("${sampleName}.variants.tsv") script: """ + samtools faidx ${ref} samtools mpileup -A -d 0 --reference ${ref} -B -Q 0 ${bam} |\ ivar variants -r ${ref} -m ${params.ivarMinDepth} -p ${sampleName}.variants -q ${params.ivarMinVariantQuality} -t ${params.ivarMinFreqThreshold} """ @@ -128,7 +160,7 @@ process makeConsensus { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.primertrimmed.consensus.fa", mode: 'copy' input: - tuple(sampleName, path(bam)) + tuple(sampleName, path(bam), path(bam_index)) output: tuple(sampleName, path("${sampleName}.primertrimmed.consensus.fa")) @@ -163,3 +195,59 @@ process cramToFastq { """ } +process alignConsensusToReference { + /** + * Aligns consensus sequence against reference using mafft. Uses the --keeplength + * flag to guarantee that all alignments remain the same length as the reference. + */ + + tag { sampleName } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.primertrimmed.consensus.aln.fa", mode: 'copy' + + input: + tuple(sampleName, path(consensus), path(reference)) + + output: + tuple(sampleName, path("${sampleName}.primertrimmed.consensus.aln.fa")) + + script: + // Convert multi-line fasta to single line + awk_string = '/^>/ {printf("\\n%s\\n", $0); next; } { printf("%s", $0); } END { printf("\\n"); }' + """ + mafft \ + --preservecase \ + --keeplength \ + --add \ + ${consensus} \ + ${reference} \ + > ${sampleName}.with_ref.multi_line.alignment.fa + awk '${awk_string}' ${sampleName}.with_ref.multi_line.alignment.fa > ${sampleName}.with_ref.single_line.alignment.fa + tail -n 2 ${sampleName}.with_ref.single_line.alignment.fa > ${sampleName}.primertrimmed.consensus.aln.fa + """ +} + +process trimUTRFromAlignment { + /** + * Trim the aligned consensus to remove 3' and 5' UTR sequences. + */ + + tag { sampleName } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleName}.primertrimmed.consensus.aln.utr_trimmed.fa", mode: 'copy' + + input: + tuple(sampleName, path(alignment)) + + output: + tuple(sampleName, path("${sampleName}.primertrimmed.consensus.aln.utr_trimmed.fa")) + + script: + awk_string = '/^>/ { printf("%s\\n", $0); next; } { printf("%s", $0); } END { printf("\\n"); }' + """ + echo -e "\$(head -n 1 ${alignment} | cut -c 2-):266-29674" > non_utr.txt + samtools faidx ${alignment} + samtools faidx -r non_utr.txt ${alignment} > ${sampleName}.primertrimmed.consensus.aln.utr_trimmed.multi_line.fa + awk '${awk_string}' ${sampleName}.primertrimmed.consensus.aln.utr_trimmed.multi_line.fa > ${sampleName}.primertrimmed.consensus.aln.utr_trimmed.fa + """ +} diff --git a/modules/qc.nf b/modules/qc.nf index a1db6fc8..b854db1c 100644 --- a/modules/qc.nf +++ b/modules/qc.nf @@ -6,7 +6,7 @@ process makeQCCSV { publishDir "${params.outdir}/qc_plots", pattern: "${sampleName}.depth.png", mode: 'copy' input: - tuple sampleName, path(bam), path(fasta), path(ref) + tuple sampleName, path(bam), path(bam_index), path(fasta), path(ref) output: path "${params.prefix}.${sampleName}.qc.csv", emit: csv diff --git a/modules/upload.nf b/modules/upload.nf index 6b9dceaf..da5f26cc 100644 --- a/modules/upload.nf +++ b/modules/upload.nf @@ -1,18 +1,17 @@ process collateSamples { tag { sampleName } - publishDir "${params.outdir}/qc_pass_climb_upload/${params.prefix}", pattern: "${sampleName}", mode: 'copy' + publishDir "${params.outdir}/nml_upload/", pattern: "${params.prefix}/${sampleName}*", mode: 'copy' input: - tuple(sampleName, path(bam), path(fasta)) + tuple(sampleName, path(fasta), path(fastq_r1), path(fastq_r2)) output: - path("${sampleName}") + path("${params.prefix}/${sampleName}*") script: """ - mkdir ${sampleName} - mv ${bam} ${fasta} ${sampleName} + mkdir ${params.prefix} && cp ${sampleName}* ${params.prefix} """ } @@ -31,18 +30,3 @@ process prepareUploadDirectory { """ } - -process uploadToCLIMB { - tag { params.prefix } - - input: - tuple(path(sshkey), path(uploadDir)) - - output: - - script: - """ - rsync -Lav -e "ssh -i ${sshkey} -l ${params.CLIMBUser}" ${uploadDir} ${params.CLIMBHostname}:upload/ - """ -} - diff --git a/modules/utils.nf b/modules/utils.nf new file mode 100644 index 00000000..881f0ed8 --- /dev/null +++ b/modules/utils.nf @@ -0,0 +1,20 @@ +process performHostFilter { + cpus 4 + + tag { sampleId } + + publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleId}_hostfiltered_R*.fastq.gz", mode: 'copy' + + input: + tuple(val(sampleId), path(forward), path(reverse)) + output: + tuple sampleId, path("${sampleId}_hostfiltered_R1.fastq.gz"), path("${sampleId}_hostfiltered_R2.fastq.gz"), emit: fastqPairs + + script: + """ + bwa mem -t ${task.cpus} ${params.composite_ref} ${forward} ${reverse} | \ + filter_non_human_reads.py -c ${params.viral_contig_name} > ${sampleId}.viral_and_nonmapping_reads.bam + samtools sort -n ${sampleId}.viral_and_nonmapping_reads.bam | \ + samtools fastq -1 ${sampleId}_hostfiltered_R1.fastq.gz -2 ${sampleId}_hostfiltered_R2.fastq.gz -s ${sampleId}_singletons.fastq.gz - + """ +} diff --git a/workflows/articNcovNanopore.nf b/workflows/articNcovNanopore.nf index 9c2e8f53..2eebf235 100644 --- a/workflows/articNcovNanopore.nf +++ b/workflows/articNcovNanopore.nf @@ -17,11 +17,6 @@ include {bamToCram} from '../modules/out.nf' include {collateSamples} from '../modules/upload.nf' - -// import subworkflows -include {CLIMBrsync} from './upload.nf' - - // workflow component for artic pipeline workflow sequenceAnalysisNanopolish { take: @@ -132,13 +127,4 @@ workflow articNcovNanopore { } else if ( params.medaka ) { sequenceAnalysisMedaka(ch_fastqDirs) } - - if ( params.upload ) { - - Channel.fromPath("${params.CLIMBkey}") - .set{ ch_CLIMBkey } - - CLIMBrsync(sequenceAnalysis.out.qc_pass, ch_CLIMBkey ) - } } - diff --git a/workflows/illuminaNcov.nf b/workflows/illuminaNcov.nf index 0eeeb036..26a90ba3 100644 --- a/workflows/illuminaNcov.nf +++ b/workflows/illuminaNcov.nf @@ -6,12 +6,16 @@ nextflow.preview.dsl = 2 // import modules include {articDownloadScheme } from '../modules/artic.nf' include {readTrimming} from '../modules/illumina.nf' +include {filterResidualAdapters} from '../modules/illumina.nf' include {indexReference} from '../modules/illumina.nf' include {readMapping} from '../modules/illumina.nf' include {trimPrimerSequences} from '../modules/illumina.nf' include {callVariants} from '../modules/illumina.nf' include {makeConsensus} from '../modules/illumina.nf' include {cramToFastq} from '../modules/illumina.nf' +include {alignConsensusToReference} from '../modules/illumina.nf' +include {trimUTRFromAlignment} from '../modules/illumina.nf' +include {performHostFilter} from '../modules/utils' include {makeQCCSV} from '../modules/qc.nf' include {writeQCSummaryCSV} from '../modules/qc.nf' @@ -20,10 +24,6 @@ include {bamToCram} from '../modules/out.nf' include {collateSamples} from '../modules/upload.nf' -// import subworkflows -include {CLIMBrsync} from './upload.nf' - - workflow prepareReferenceFiles { // Get reference fasta if (params.ref) { @@ -87,9 +87,14 @@ workflow sequenceAnalysis { ch_bedFile main: - readTrimming(ch_filePairs) - readMapping(readTrimming.out.combine(ch_preparedRef)) + performHostFilter(ch_filePairs) + + readTrimming(performHostFilter.out) + + filterResidualAdapters(readTrimming.out) + + readMapping(filterResidualAdapters.out.combine(ch_preparedRef)) trimPrimerSequences(readMapping.out.combine(ch_bedFile)) @@ -97,6 +102,10 @@ workflow sequenceAnalysis { makeConsensus(trimPrimerSequences.out.ptrim) + alignConsensusToReference(makeConsensus.out.combine(ch_preparedRef.map{ it[0] })) + + trimUTRFromAlignment(alignConsensusToReference.out) + makeQCCSV(trimPrimerSequences.out.ptrim.join(makeConsensus.out, by: 0) .combine(ch_preparedRef.map{ it[0] })) @@ -111,9 +120,7 @@ workflow sequenceAnalysis { writeQCSummaryCSV(qc.header.concat(qc.pass).concat(qc.fail).toList()) - collateSamples(qc.pass.map{ it[0] } - .join(makeConsensus.out, by: 0) - .join(trimPrimerSequences.out.mapped)) + collateSamples(makeConsensus.out.join(performHostFilter.out.fastqPairs)) if (params.outCram) { bamToCram(trimPrimerSequences.out.mapped.map{it[0] } @@ -132,19 +139,9 @@ workflow ncovIllumina { main: // Build or download fasta, index and bedfile as required prepareReferenceFiles() - + // Actually do analysis sequenceAnalysis(ch_filePairs, prepareReferenceFiles.out.bwaindex, prepareReferenceFiles.out.bedfile) - - // Upload files to CLIMB - if ( params.upload ) { - - Channel.fromPath("${params.CLIMBkey}") - .set{ ch_CLIMBkey } - - CLIMBrsync(sequenceAnalysis.out.qc_pass, ch_CLIMBkey ) - } - } workflow ncovIlluminaCram { diff --git a/workflows/upload.nf b/workflows/upload.nf deleted file mode 100644 index f645c41d..00000000 --- a/workflows/upload.nf +++ /dev/null @@ -1,14 +0,0 @@ - -include {prepareUploadDirectory} from '../modules/upload.nf' -include {uploadToCLIMB} from '../modules/upload.nf' - -workflow CLIMBrsync { - take: - ch_uploadDirectories - ch_CLIMBkey - - main: - prepareUploadDirectory(ch_uploadDirectories.collect()) - uploadToCLIMB(ch_CLIMBkey.combine(prepareUploadDirectory.out)) -} -