From 152b7734e59f3dc6556157961e2820538042dba6 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 11 Sep 2024 20:34:35 +0100 Subject: [PATCH 1/3] Introduced the use_work_dir_as_temp parameter like in the other pipelines --- .github/workflows/sanger_test.yml | 1 + CHANGELOG.md | 2 ++ conf/modules.config | 2 +- conf/test.config | 3 +++ conf/test_full.config | 3 +++ conf/test_raw.config | 3 +++ nextflow.config | 3 +++ nextflow_schema.json | 18 ++++++++++++++++++ 8 files changed, 34 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml index 32849b2e..cc2f2a19 100644 --- a/.github/workflows/sanger_test.yml +++ b/.github/workflows/sanger_test.yml @@ -23,6 +23,7 @@ jobs: parameters: | { "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}", + "use_work_dir_as_temp": true, } profiles: test,sanger,singularity,cleanup - uses: actions/upload-artifact@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cc1a361..78527774 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ The pipeline has now been validated for draft (unpublished) assemblies. for the original Yaml configuration files of the Snakemake version. - New option `--skip_taxon_filtering` to skip the taxon filtering in blast searches. Mostly relevant for draft assemblies. +- Introduced the `--use_work_dir_as_temp` parameter to avoid leaving files in `/tmp`. ### Parameters @@ -25,6 +26,7 @@ The pipeline has now been validated for draft (unpublished) assemblies. | --yaml | | | | --busco_lineages | | | --skip_taxon_filtering | +| | --use_work_dir_as_temp | > **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. diff --git a/conf/modules.config b/conf/modules.config index d4b3f897..33424d9f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -81,7 +81,7 @@ process { } withName: "BUSCO" { - scratch = true + scratch = { !params.use_work_dir_as_temp } ext.args = { 'test' in workflow.profile.tokenize(',') ? // Additional configuration to speed processes up during testing. // Note: BUSCO *must* see the double-quotes around the parameters diff --git a/conf/test.config b/conf/test.config index 623cf3f9..8ad70cae 100644 --- a/conf/test.config +++ b/conf/test.config @@ -35,4 +35,7 @@ params { blastp = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd" blastx = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd" blastn = "${projectDir}/assets/test/nt_mMelMel3.1" + + // Need to be set to avoid overfilling /tmp + use_work_dir_as_temp = true } diff --git a/conf/test_full.config b/conf/test_full.config index 6af9eecb..ca78130e 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -30,4 +30,7 @@ params { blastp = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd" blastx = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd" blastn = "${projectDir}/assets/test_full/nt_gfLaeSulp1.1" + + // Need to be set to avoid overfilling /tmp + use_work_dir_as_temp = true } diff --git a/conf/test_raw.config b/conf/test_raw.config index 47cc4267..0cf1d16f 100644 --- a/conf/test_raw.config +++ b/conf/test_raw.config @@ -36,4 +36,7 @@ params { blastp = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd" blastx = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd" blastn = "${projectDir}/assets/test/nt_mMelMel3.1/" + + // Need to be set to avoid overfilling /tmp + use_work_dir_as_temp = true } diff --git a/nextflow.config b/nextflow.config index 319b9b3d..d08bd163 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,9 @@ params { blastn = null skip_taxon_filtering = false + // Execution options + use_work_dir_as_temp = false + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e838ab04..e722369d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -148,6 +148,21 @@ } } }, + "execution": { + "title": "Execution", + "type": "object", + "description": "Control the execution of the pipeline.", + "default": "", + "properties": { + "use_work_dir_as_temp": { + "type": "boolean", + "description": "Set to true to make tools (e.g. sort, FastK, MerquryFK) use the work directory for their temporary files, rather than the system default.", + "fa_icon": "fas fa-arrow-circle-down", + "hidden": true + } + }, + "fa_icon": "fas fa-running" + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -353,6 +368,9 @@ { "$ref": "#/definitions/databases" }, + { + "$ref": "#/definitions/execution" + }, { "$ref": "#/definitions/institutional_config_options" }, From 3ddee538238aeaa6a098e4a384ba2724d9e3a1f5 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 11 Sep 2024 20:34:57 +0100 Subject: [PATCH 2/3] For large genomes, don't run Busco off /tmp --- conf/modules.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 33424d9f..1193cf5f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -81,7 +81,8 @@ process { } withName: "BUSCO" { - scratch = { !params.use_work_dir_as_temp } + // Obey "use_work_dir_as_temp", except for large genomes + scratch = { !params.use_work_dir_as_temp || (meta.genome_size < 2000000000) } ext.args = { 'test' in workflow.profile.tokenize(',') ? // Additional configuration to speed processes up during testing. // Note: BUSCO *must* see the double-quotes around the parameters From 5a581b1ec5bda739218fb5909be6d4ec12fc044c Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 11 Sep 2024 20:43:28 +0100 Subject: [PATCH 3/3] Getting ready for the release --- CHANGELOG.md | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78527774..050399ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## – Bellsprout – [] +## [[0.6.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.6.0)] – Bellsprout – [2024-09-13] The pipeline has now been validated for draft (unpublished) assemblies. diff --git a/nextflow.config b/nextflow.config index d08bd163..18994df9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -249,7 +249,7 @@ manifest { description = """Quality assessment of genome assemblies""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.6.0-dev' + version = '0.6.0' doi = '10.5281/zenodo.7949058' }