From 4a6d9e3ead685aba698b477e4c366bbc5f43b92f Mon Sep 17 00:00:00 2001 From: Maxime Borry Date: Wed, 13 Mar 2024 15:20:55 +0000 Subject: [PATCH 1/5] feat: allow adna co-binning --- lib/WorkflowMag.groovy | 3 --- subworkflows/local/ancient_dna.nf | 9 ++++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/WorkflowMag.groovy b/lib/WorkflowMag.groovy index cd1a4456..2a7521bc 100755 --- a/lib/WorkflowMag.groovy +++ b/lib/WorkflowMag.groovy @@ -19,9 +19,6 @@ class WorkflowMag { if (params.coassemble_group && params.binning_map_mode == 'own') { Nextflow.error("Invalid combination of parameter '--binning_map_mode own' and parameter '--coassemble_group'. Select either 'all' or 'group' mapping mode when performing group-wise co-assembly.") } - if (params.ancient_dna && params.binning_map_mode != 'own') { - Nextflow.error("Invalid combination of parameter '--binning_map_mode' and parameter '--ancient_dna'. Ancient DNA mode can only be executed with --binning_map_mode own. You supplied: --binning_map_mode ${params.binning_map_mode}") - } // Check if specified cpus for SPAdes are available if ( params.spades_fix_cpus > params.max_cpus ) { diff --git a/subworkflows/local/ancient_dna.nf b/subworkflows/local/ancient_dna.nf index 3aa7814e..868b16ee 100644 --- a/subworkflows/local/ancient_dna.nf +++ b/subworkflows/local/ancient_dna.nf @@ -12,7 +12,14 @@ workflow ANCIENT_DNA_ASSEMBLY_VALIDATION { main: ch_versions = Channel.empty() - PYDAMAGE_ANALYZE(input.map {item -> [item[0], item[2], item[3]]}) + PYDAMAGE_ANALYZE( + input.map { + meta, contigs, bam, bai -> [ + meta, bam[0], bai[0] + ] + } + ) + PYDAMAGE_FILTER(PYDAMAGE_ANALYZE.out.csv) ch_versions = ch_versions.mix(PYDAMAGE_ANALYZE.out.versions.first()) From 8344d182e614b5a13b1d43ab9ab9441ebd946945 Mon Sep 17 00:00:00 2001 From: Maxime Borry Date: Wed, 13 Mar 2024 15:24:50 +0000 Subject: [PATCH 2/5] chore: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64fd254c..5c6816a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#581](https://github.com/nf-core/mag/pull/581) - Added explicit licence text to headers of all custom scripts (reported by @FriederikeHanssen and @maxibor, fix by @jfy133) +- [#602](https://github.com/nf-core/mag/pull/602) - Enables co-binning when using aDNA mode (added by @maxibor) ### `Fixed` From 7a9508b90e958bb8fb7ace9d05f90fcc19760629 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 26 Mar 2024 10:36:05 +0000 Subject: [PATCH 3/5] Fix gtdbtk classifywf crash when mashdb given --- CHANGELOG.md | 2 + modules.json | 2 +- modules/nf-core/gtdbtk/classifywf/main.nf | 20 +- .../gtdbtk/classifywf/tests/main.nf.test | 43 ++++ .../gtdbtk/classifywf/tests/main.nf.test.snap | 199 ++++++++++++++++++ .../nf-core/gtdbtk/classifywf/tests/tags.yml | 2 + 6 files changed, 261 insertions(+), 7 deletions(-) create mode 100644 modules/nf-core/gtdbtk/classifywf/tests/main.nf.test create mode 100644 modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap create mode 100644 modules/nf-core/gtdbtk/classifywf/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 64fd254c..1cfe2143 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#606](https://github.com/nf-core/mag/pull/606) - Prevent pipeline crashing when premade mashdb given to or no alignments found with GTDB-TK_CLASSIFYWF (reported by @cedwardson4, fix by @jfy133) + ### `Dependencies` ### `Deprecated` diff --git a/modules.json b/modules.json index c8023704..c62a1a2c 100644 --- a/modules.json +++ b/modules.json @@ -118,7 +118,7 @@ }, "gtdbtk/classifywf": { "branch": "master", - "git_sha": "9bbc6a88ce3004ae4bc9f84cef762484dc2c95e5", + "git_sha": "0735b6d2b509cbb5cf71d15fda819cd7392722fe", "installed_by": ["modules"] }, "gunc/downloaddb": { diff --git a/modules/nf-core/gtdbtk/classifywf/main.nf b/modules/nf-core/gtdbtk/classifywf/main.nf index 6d9733ba..14dd28a4 100644 --- a/modules/nf-core/gtdbtk/classifywf/main.nf +++ b/modules/nf-core/gtdbtk/classifywf/main.nf @@ -51,11 +51,19 @@ process GTDBTK_CLASSIFYWF { --min_perc_aa $params.gtdbtk_min_perc_aa \\ --min_af $params.gtdbtk_min_af - mv classify/* . + ## If mash db given, classify/ and identify/ directories won't be created + if [[ -d classify/ ]]; then + mv classify/* . + fi - mv identify/* . + if [[ -d identify/ ]]; then + mv identify/* . + fi - mv align/* .\ + ## If nothing aligns, no output, so only run + if [[ -d align/ ]]; then + mv align/* . + fi mv gtdbtk.log "gtdbtk.${prefix}.log" @@ -74,10 +82,10 @@ process GTDBTK_CLASSIFYWF { prefix = task.ext.prefix ?: "${meta.id}" """ touch gtdbtk.${prefix}.stub.summary.tsv - touch gtdbtk.${prefix}.stub.classify.tree.gz + echo "" | gzip > gtdbtk.${prefix}.stub.classify.tree.gz touch gtdbtk.${prefix}.stub.markers_summary.tsv - touch gtdbtk.${prefix}.stub.msa.fasta.gz - touch gtdbtk.${prefix}.stub.user_msa.fasta.gz + echo "" | gzip > gtdbtk.${prefix}.stub.msa.fasta.gz + echo "" | gzip > gtdbtk.${prefix}.stub.user_msa.fasta.gz touch gtdbtk.${prefix}.stub.filtered.tsv touch gtdbtk.${prefix}.log touch gtdbtk.${prefix}.warnings.log diff --git a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test new file mode 100644 index 00000000..60bedb68 --- /dev/null +++ b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process GTDBTK_CLASSIFYWF" + script "../main.nf" + process "GTDBTK_CLASSIFYWF" + + tag "modules" + tag "modules_nfcore" + tag "gtdbtk" + tag "gtdbtk/classifywf" + + // Only stub test is possible due to very large required database (>70GB) + test("sarscov2 - genome fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, assembler:'SPADES' ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/contigs.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fasta/scaffolds.fasta', checkIfExists: true), + ] + ] + input[1] = [[], []] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap new file mode 100644 index 00000000..e821084c --- /dev/null +++ b/modules/nf-core/gtdbtk/classifywf/tests/main.nf.test.snap @@ -0,0 +1,199 @@ +{ + "sarscov2 - genome fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.classify.tree.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.markers_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.msa.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.user_msa.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.filtered.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.failed_genomes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "9": [ + "versions.yml:md5,a8ab755bce9f17684f235d49ab99f6d2" + ], + "failed": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.failed_genomes.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "filtered": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.filtered.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "markers": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.markers_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "msa": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.msa.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "summary": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tree": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.classify.tree.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "user_msa": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.stub.user_msa.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,a8ab755bce9f17684f235d49ab99f6d2" + ], + "warnings": [ + [ + { + "id": "test", + "single_end": false, + "assembler": "SPADES" + }, + "gtdbtk.test.warnings.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-26T09:39:21.632259941" + } +} \ No newline at end of file diff --git a/modules/nf-core/gtdbtk/classifywf/tests/tags.yml b/modules/nf-core/gtdbtk/classifywf/tests/tags.yml new file mode 100644 index 00000000..5d8badac --- /dev/null +++ b/modules/nf-core/gtdbtk/classifywf/tests/tags.yml @@ -0,0 +1,2 @@ +gtdbtk/classifywf: + - "modules/nf-core/gtdbtk/classifywf/**" From 6f870d9b475593caadeeb618215dc4e42c39a5c3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 26 Mar 2024 10:37:11 +0000 Subject: [PATCH 4/5] improve changelgo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cfe2143..86bb12aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#606](https://github.com/nf-core/mag/pull/606) - Prevent pipeline crashing when premade mashdb given to or no alignments found with GTDB-TK_CLASSIFYWF (reported by @cedwardson4, fix by @jfy133) +- [#606](https://github.com/nf-core/mag/pull/606) - Prevent pipeline crash when premade mashdb given to or no alignments found with GTDB-TK_CLASSIFYWF (reported by @cedwardson4, fix by @jfy133) ### `Dependencies` From 2f62ecdf29baf7c119a99dc1d0ddc85559a3b6b6 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 18 Apr 2024 11:43:31 +0200 Subject: [PATCH 5/5] Apply suggestions from code review --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c6816a5..cc0f448a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#581](https://github.com/nf-core/mag/pull/581) - Added explicit licence text to headers of all custom scripts (reported by @FriederikeHanssen and @maxibor, fix by @jfy133) -- [#602](https://github.com/nf-core/mag/pull/602) - Enables co-binning when using aDNA mode (added by @maxibor) +- [#602](https://github.com/nf-core/mag/pull/602) - Co-binning when using aDNA mode now enabled (added by @maxibor) ### `Fixed`