diff --git a/CHANGELOG.md b/CHANGELOG.md index a902cd7b..f46adf9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#708](https://github.com/nf-core/mag/pull/708) - Added `--exclude_unbins_from_postbinning` parameter to exclude unbinned contigs from post-binning processes, speeding up Prokka in some cases (added by @dialvarezs) + ### `Changed` ### `Fixed` +- [#708](https://github.com/nf-core/mag/pull/708) - Fixed channel passed as GUNC input (added by @dialvarezs) + ### `Dependencies` ### `Deprecated` diff --git a/nextflow.config b/nextflow.config index 07325acf..1ecabbdc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -121,6 +121,7 @@ params { refine_bins_dastool = false refine_bins_dastool_threshold = 0.5 postbinning_input = 'raw_bins_only' + exclude_unbins_from_postbinning = false // Bin QC skip_binqc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index ceb3ac08..0875606d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -704,6 +704,12 @@ "type": "integer", "default": 3000, "description": "Minimum contig length for Tiara to use for domain classification. For accurate classification, should be longer than 3000 bp." + }, + "exclude_unbins_from_postbinning": { + "type": "boolean", + "description": "Exclude unbinned contigs in the post-binning steps (bin QC, taxonomic classification, and annotation steps).", + "help": "If you're not interested in assemby results that are not considered 'genome level', excluding unbinned contigs can greatly speed up downstream steps such as Prokka, that can be quite slow and spin up many tasks.", + "default": false } } }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 7afb4316..356c39fe 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -769,7 +769,11 @@ workflow MAG { ch_input_for_postbinning_bins_unbins = ch_binning_results_bins.mix(ch_binning_results_unbins) } - DEPTHS(ch_input_for_postbinning_bins_unbins, BINNING.out.metabat2depths, ch_short_reads) + ch_input_for_postbinning = params.exclude_unbins_from_postbinning + ? ch_input_for_postbinning_bins + : ch_input_for_postbinning_bins_unbins + + DEPTHS(ch_input_for_postbinning, BINNING.out.metabat2depths, ch_short_reads) ch_input_for_binsummary = DEPTHS.out.depths_summary ch_versions = ch_versions.mix(DEPTHS.out.versions) @@ -777,7 +781,7 @@ workflow MAG { * Bin QC subworkflows: for checking bin completeness with either BUSCO, CHECKM, and/or GUNC */ - ch_input_bins_for_qc = ch_input_for_postbinning_bins_unbins.transpose() + ch_input_bins_for_qc = ch_input_for_postbinning.transpose() if (!params.skip_binqc && params.binqc_tool == 'busco') { /* @@ -821,16 +825,16 @@ workflow MAG { ch_versions = ch_versions.mix(GUNC_QC.out.versions) } else if (params.run_gunc) { - ch_input_bins_for_gunc = ch_input_for_postbinning_bins_unbins.filter { meta, bins -> + ch_input_bins_for_gunc = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } - GUNC_QC(ch_input_bins_for_qc, ch_gunc_db, []) + GUNC_QC(ch_input_bins_for_gunc, ch_gunc_db, []) ch_versions = ch_versions.mix(GUNC_QC.out.versions) } ch_quast_bins_summary = Channel.empty() if (!params.skip_quast) { - ch_input_for_quast_bins = ch_input_for_postbinning_bins_unbins + ch_input_for_quast_bins = ch_input_for_postbinning .groupTuple() .map { meta, bins -> def new_bins = bins.flatten() @@ -859,7 +863,7 @@ workflow MAG { ch_cat_db = CAT_DB_GENERATE.out.db } CAT( - ch_input_for_postbinning_bins_unbins, + ch_input_for_postbinning, ch_cat_db ) // Group all classification results for each sample in a single file @@ -890,7 +894,7 @@ workflow MAG { ch_gtdbtk_summary = Channel.empty() if (gtdb) { - ch_gtdb_bins = ch_input_for_postbinning_bins_unbins.filter { meta, bins -> + ch_gtdb_bins = ch_input_for_postbinning.filter { meta, bins -> meta.domain != "eukarya" } @@ -925,7 +929,7 @@ workflow MAG { */ if (!params.skip_prokka) { - ch_bins_for_prokka = ch_input_for_postbinning_bins_unbins + ch_bins_for_prokka = ch_input_for_postbinning .transpose() .map { meta, bin -> def meta_new = meta + [id: bin.getBaseName()] @@ -944,7 +948,7 @@ workflow MAG { } if (!params.skip_metaeuk && (params.metaeuk_db || params.metaeuk_mmseqs_db)) { - ch_bins_for_metaeuk = ch_input_for_postbinning_bins_unbins + ch_bins_for_metaeuk = ch_input_for_postbinning .transpose() .filter { meta, bin -> meta.domain in ["eukarya", "unclassified"]