From 4a26067455b3f4d38332d0d638189dd121d76d4e Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Tue, 29 Aug 2023 12:28:23 -0700 Subject: [PATCH] Robust low coverage (#37) * Handle low-coverage samples * Fix duplicated sample ID --- bin/check_for_empty_assembly.py | 16 ++++++++++++++++ modules/long_read_qc.nf | 1 + modules/quast.nf | 3 +++ modules/unicycler.nf | 1 + 4 files changed, 21 insertions(+) create mode 100755 bin/check_for_empty_assembly.py diff --git a/bin/check_for_empty_assembly.py b/bin/check_for_empty_assembly.py new file mode 100755 index 0000000..09d3896 --- /dev/null +++ b/bin/check_for_empty_assembly.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 + +import argparse +import os + +def main(args): + input_num_bytes = os.path.getsize(args.assembly) + if input_num_bytes == 0: + with open(args.assembly, 'w') as f: + f.write('>1\nN\n') + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--assembly', help='Input assembly to check', required=True) + args = parser.parse_args() + main(args) diff --git a/modules/long_read_qc.nf b/modules/long_read_qc.nf index a8ce0f5..4c3b745 100644 --- a/modules/long_read_qc.nf +++ b/modules/long_read_qc.nf @@ -80,6 +80,7 @@ process bandage { tag { sample_id + ' / ' + assembly_mode } executor 'local' + errorStrategy 'ignore' publishDir params.versioned_outdir ? "${params.outdir}/${sample_id}/${params.pipeline_short_name}-v${params.minor_version}-output" : "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_${assembly_mode}_bandage.png", mode: 'copy' diff --git a/modules/quast.nf b/modules/quast.nf index 649d309..90cc93d 100644 --- a/modules/quast.nf +++ b/modules/quast.nf @@ -20,11 +20,14 @@ process quast { printf -- " value: null\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml printf -- " - parameter: --fast\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml printf -- " value: null\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml + printf -- " - parameter: --min-contig\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml + printf -- " value: 0\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml quast \ --threads ${task.cpus} \ --space-efficient \ --fast \ + --min-contig 0 \ --output-dir ${sample_id} \ ${assembly} diff --git a/modules/unicycler.nf b/modules/unicycler.nf index 0b24947..23c73cd 100644 --- a/modules/unicycler.nf +++ b/modules/unicycler.nf @@ -29,6 +29,7 @@ process unicycler { ${long_reads} \ -o ${sample_id}_assembly + check_for_empty_assembly.py --assembly ${sample_id}_assembly/assembly.fasta sed 's/^>/>${sample_id}_/' ${sample_id}_assembly/assembly.fasta > ${sample_id}_unicycler_${assembly_mode}.fa cp ${sample_id}_assembly/assembly.gfa ${sample_id}_unicycler_${assembly_mode}.gfa cp ${sample_id}_assembly/unicycler.log ${sample_id}_unicycler_${assembly_mode}.log