From f18c3da328ec438f551169315831310e0adfda29 Mon Sep 17 00:00:00 2001 From: skchronicles Date: Tue, 6 Feb 2024 12:30:42 -0500 Subject: [PATCH] Patch: Using tmpdir/lscratch for fastqc due to gpfs filesystem issue. --- rna-seek | 2 +- workflow/rules/paired-end.smk | 55 +++++++++++++++++++++++++++++++---- workflow/rules/single-end.smk | 51 ++++++++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 9 deletions(-) diff --git a/rna-seek b/rna-seek index 7eebc8b..77ca193 100755 --- a/rna-seek +++ b/rna-seek @@ -21,7 +21,7 @@ import argparse # potential python3 3rd party package, added in python/3.5 # Pipeline Metadata and globals __author__ = 'Skyler Kuhn' -__version__ = 'v1.9.3' +__version__ = 'v1.9.4' __email__ = 'kuhnsa@nih.gov' __home__ = os.path.dirname(os.path.abspath(__file__)) _name = os.path.basename(sys.argv[0]) diff --git a/workflow/rules/paired-end.smk b/workflow/rules/paired-end.smk index d70c66b..8576df0 100644 --- a/workflow/rules/paired-end.smk +++ b/workflow/rules/paired-end.smk @@ -54,11 +54,35 @@ rule rawfastqc: params: rname='pl:rawfastqc', outdir=join(workpath,"rawQC"), + tmpdir=tmpdir, threads: int(allocated("threads", "rawfastqc", cluster)), envmodules: config['bin'][pfamily]['tool_versions']['FASTQCVER'] container: config['images']['fastqc'] shell: """ - fastqc {input.R1} {input.R2} -t {threads} -o {params.outdir}; + # Setups temporary directory for + # intermediate files with built-in + # mechanism for deletion on exit + if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi + tmp=$(mktemp -d -p "{params.tmpdir}") + trap 'rm -rf "${{tmp}}"' EXIT + + # Running fastqc with local + # disk or a tmpdir, fastqc + # has been observed to lock + # up gpfs filesystems, adding + # this on request by HPC staff. + fastqc \\ + {input.R1} \\ + {input.R2} \\ + -t {threads} \\ + -o "${{tmp}}" + + # Copy output files from tmpdir + # to output directory + find "${{tmp}}" \\ + -type f \\ + \\( -name '*.html' -o -name '*.zip' \\) \\ + -exec cp {{}} {params.outdir} \\; """ @@ -76,8 +100,6 @@ rule trim_pe: file1=join(workpath,"{name}.R1.fastq.gz"), file2=join(workpath,"{name}.R2.fastq.gz"), output: - #out1=temp(join(workpath,trim_dir,"{name}.R1.trim.fastq.gz")), - #out2=temp(join(workpath,trim_dir,"{name}.R2.trim.fastq.gz")) out1=join(workpath,trim_dir,"{name}.R1.trim.fastq.gz"), out2=join(workpath,trim_dir,"{name}.R2.trim.fastq.gz") params: @@ -119,12 +141,35 @@ rule fastqc: params: rname='pl:fastqc', outdir=join(workpath,"QC"), - getrl=join("workflow", "scripts", "get_read_length.py"), + tmpdir=tmpdir, threads: int(allocated("threads", "fastqc", cluster)), envmodules: config['bin'][pfamily]['tool_versions']['FASTQCVER'] container: config['images']['fastqc'] shell: """ - fastqc {input.R1} {input.R2} -t {threads} -o {params.outdir}; + # Setups temporary directory for + # intermediate files with built-in + # mechanism for deletion on exit + if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi + tmp=$(mktemp -d -p "{params.tmpdir}") + trap 'rm -rf "${{tmp}}"' EXIT + + # Running fastqc with local + # disk or a tmpdir, fastqc + # has been observed to lock + # up gpfs filesystems, adding + # this on request by HPC staff. + fastqc \\ + {input.R1} \\ + {input.R2} \\ + -t {threads} \\ + -o "${{tmp}}" + + # Copy output files from tmpdir + # to output directory + find "${{tmp}}" \\ + -type f \\ + \\( -name '*.html' -o -name '*.zip' \\) \\ + -exec cp {{}} {params.outdir} \\; """ diff --git a/workflow/rules/single-end.smk b/workflow/rules/single-end.smk index bc02783..251ab66 100644 --- a/workflow/rules/single-end.smk +++ b/workflow/rules/single-end.smk @@ -49,11 +49,34 @@ rule rawfastqc: params: rname='pl:rawfastqc', outdir=join(workpath,"rawQC"), + tmpdir=tmpdir, threads: int(allocated("threads", "rawfastqc", cluster)), envmodules: config['bin'][pfamily]['tool_versions']['FASTQCVER'] container: config['images']['fastqc'] shell: """ - fastqc {input.R1} -t {threads} -o {params.outdir}; + # Setups temporary directory for + # intermediate files with built-in + # mechanism for deletion on exit + if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi + tmp=$(mktemp -d -p "{params.tmpdir}") + trap 'rm -rf "${{tmp}}"' EXIT + + # Running fastqc with local + # disk or a tmpdir, fastqc + # has been observed to lock + # up gpfs filesystems, adding + # this on request by HPC staff + fastqc \\ + {input.R1} \\ + -t {threads} \\ + -o "${{tmp}}" + + # Copy output files from tmpdir + # to output directory + find "${{tmp}}" \\ + -type f \\ + \\( -name '*.html' -o -name '*.zip' \\) \\ + -exec cp {{}} {params.outdir} \\; """ if config['options']['small_rna']: @@ -141,12 +164,34 @@ rule fastqc: params: rname='pl:fastqc', outdir=join(workpath,"QC"), - getrl=join("workflow", "scripts", "get_read_length.py"), + tmpdir = tmpdir, threads: int(allocated("threads", "fastqc", cluster)), envmodules: config['bin'][pfamily]['tool_versions']['FASTQCVER'] container: config['images']['fastqc'] shell: """ - fastqc {input} -t {threads} -o {params.outdir}; + # Setups temporary directory for + # intermediate files with built-in + # mechanism for deletion on exit + if [ ! -d "{params.tmpdir}" ]; then mkdir -p "{params.tmpdir}"; fi + tmp=$(mktemp -d -p "{params.tmpdir}") + trap 'rm -rf "${{tmp}}"' EXIT + + # Running fastqc with local + # disk or a tmpdir, fastqc + # has been observed to lock + # up gpfs filesystems, adding + # this on request by HPC staff + fastqc \\ + {input} \\ + -t {threads} \\ + -o "${{tmp}}" + + # Copy output files from tmpdir + # to output directory + find "${{tmp}}" \\ + -type f \\ + \\( -name '*.html' -o -name '*.zip' \\) \\ + -exec cp {{}} {params.outdir} \\; """ rule fastq_screen: