diff --git a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/CHANGELOG.md b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/CHANGELOG.md index 10ed96a6a..788cbec76 100644 --- a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/CHANGELOG.md +++ b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## [0.7.1] 2024-08-13 + +- Expose Busco parameters : + - Lineage database + - Lineage + ## [0.7] 2024-07-24 ### Added diff --git a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b-tests.yml b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b-tests.yml index 473b556b5..b6e202663 100644 --- a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b-tests.yml +++ b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b-tests.yml @@ -29,6 +29,8 @@ location: https://zenodo.org/records/10632760/files/Trimmed_yeast_reads_sub1.fastq.gz?download=1 Name of un-altered assembly: Hap2 Name of purged assembly: Hap1 + Database for Busco Lineage: v5 + Lineage: vertebrata_odb10 outputs: Removed haplotigs: asserts: diff --git a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b.ga b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b.ga index 41128c4e8..f85038526 100644 --- a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b.ga +++ b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b.ga @@ -15,7 +15,7 @@ ], "format-version": "0.1", "license": "CC-BY-4.0", - "release": "0.7", + "release": "0.7.1", "name": "Purging-duplicates-one-haplotype-VGP6b ", "report": { "markdown": "\n# Workflow Execution Report\n\nTime workflow was invoked\n\n```galaxy\ninvocation_time()\n```\n\n```galaxy\ngenerate_galaxy_version()\n```\n\n## Merqury results\n\nMerqury QV:\n\n```galaxy\nhistory_dataset_as_table(output=\"merqury_QV\")\n```\n\nMerqury completeness:\n\n```galaxy\nhistory_dataset_as_table(output=\"merqury_stats\")\n```\n\nMerqury plots:\n\nspectra-cn:\n\n\n```galaxy\nhistory_dataset_as_image(output=\"output_merqury.spectra-cn.fl\")\n```\n\n\nspectra-asm:\n\n\n```galaxy\nhistory_dataset_as_image(output=\"output_merqury.spectra-asm.fl\")\n```\n\n\nhap1 spectra-cn:\n\n\n```galaxy\nhistory_dataset_as_image(output=\"output_merqury.assembly_01.spectra-cn.fl\")\n```\n\n\nhap2 spectra-cn:\n\n```galaxy\nhistory_dataset_as_image(output=\"output_merqury.assembly_02.spectra-cn.fl\")\n```\n\n\n\n\n## BUSCO results (Vertebrata database)\n\nPurged Assembly\n\n\n```galaxy\nhistory_dataset_as_image(output=\"Busco on Purged Primary assembly: summary image\")\n```\n\n\n## Assembly statistics\n\n\n```galaxy\nhistory_dataset_as_table(output=\"clean_stats\")\n```\n\n\n## Nx and Size plots\n\n\n```galaxy\nhistory_dataset_as_image(output=\"Nx Plot\")\n```\n\n\n```galaxy\nhistory_dataset_as_image(output=\"Size Plot\")\n```\n\n\n\n## Current Workflow\n```galaxy\nworkflow_display()\n```\n" @@ -189,6 +189,60 @@ "errors": null, "id": 6, "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Database for Busco Lineage" + } + ], + "label": "Database for Busco Lineage", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 1705.6584960887728, + "top": 1674.0155168754275 + }, + "tool_id": null, + "tool_state": "{\"restrictOnConnections\": true, \"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "1562a9b8-e3b5-473f-9f77-e09693d1a98f", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "Taxonomic lineage for the organism being assembled for Busco analysis", + "content_id": null, + "errors": null, + "id": 7, + "input_connections": {}, + "inputs": [ + { + "description": "Taxonomic lineage for the organism being assembled for Busco analysis", + "name": "Lineage" + } + ], + "label": "Lineage", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 1718.7343010232019, + "top": 1810.02258129551 + }, + "tool_id": null, + "tool_state": "{\"restrictOnConnections\": true, \"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "f95d611e-99bd-43a3-8455-b87df2071d01", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 8, + "input_connections": {}, "inputs": [ { "description": "", @@ -210,11 +264,11 @@ "when": null, "workflow_outputs": [] }, - "7": { + "9": { "annotation": "", "content_id": null, "errors": null, - "id": 7, + "id": 9, "input_connections": {}, "inputs": [ { @@ -237,11 +291,11 @@ "when": null, "workflow_outputs": [] }, - "8": { + "10": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/2.0", "errors": null, - "id": 8, + "id": 10, "input_connections": { "input": { "id": 0, @@ -282,11 +336,11 @@ "when": null, "workflow_outputs": [] }, - "9": { + "11": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.28+galaxy0", "errors": null, - "id": 9, + "id": 11, "input_connections": { "fastq_input|fastq_input1": { "id": 1, @@ -338,11 +392,11 @@ "when": null, "workflow_outputs": [] }, - "10": { + "12": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/purge_dups/purge_dups/1.2.6+galaxy0", "errors": null, - "id": 10, + "id": 12, "input_connections": { "function_select|input": { "id": 2, @@ -383,11 +437,11 @@ "when": null, "workflow_outputs": [] }, - "11": { + "13": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/gfastats/gfastats/1.3.6+galaxy0", "errors": null, - "id": 11, + "id": 13, "input_connections": { "input_file": { "id": 4, @@ -428,11 +482,11 @@ "when": null, "workflow_outputs": [] }, - "12": { + "14": { "annotation": "", "content_id": "param_value_from_file", "errors": null, - "id": 12, + "id": 14, "input_connections": { "input1": { "id": 5, @@ -474,14 +528,14 @@ "when": null, "workflow_outputs": [] }, - "13": { + "15": { "annotation": "", "content_id": "Cut1", "errors": null, - "id": 13, + "id": 15, "input_connections": { "input": { - "id": 8, + "id": 10, "output_name": "out_file1" } }, @@ -513,14 +567,14 @@ "when": null, "workflow_outputs": [] }, - "14": { + "16": { "annotation": "", "content_id": "Cut1", "errors": null, - "id": 14, + "id": 16, "input_connections": { "input": { - "id": 8, + "id": 10, "output_name": "out_file1" } }, @@ -552,18 +606,18 @@ "when": null, "workflow_outputs": [] }, - "15": { + "17": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.28+galaxy0", "errors": null, - "id": 15, + "id": 17, "input_connections": { "fastq_input|fastq_input1": { - "id": 10, + "id": 12, "output_name": "split_fasta" }, "reference_source|ref_file": { - "id": 10, + "id": 12, "output_name": "split_fasta" } }, @@ -603,12 +657,12 @@ "when": null, "workflow_outputs": [] }, - "16": { + "18": { "annotation": "", - "id": 16, + "id": 18, "input_connections": { "gfa_stats": { - "id": 11, + "id": 13, "input_subworkflow_step_id": 0, "output_name": "stats" } @@ -980,18 +1034,18 @@ "when": null, "workflow_outputs": [] }, - "17": { + "19": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/gfastats/gfastats/1.3.6+galaxy0", "errors": null, - "id": 17, + "id": 19, "input_connections": { "input_file": { "id": 4, "output_name": "output" }, "mode_condition|statistics_condition|expected_genomesize": { - "id": 12, + "id": 14, "output_name": "integer_param" } }, @@ -1029,14 +1083,14 @@ "when": null, "workflow_outputs": [] }, - "18": { + "20": { "annotation": "", "content_id": "param_value_from_file", "errors": null, - "id": 18, + "id": 20, "input_connections": { "input1": { - "id": 13, + "id": 15, "output_name": "out_file1" } }, @@ -1089,14 +1143,14 @@ "when": null, "workflow_outputs": [] }, - "19": { + "21": { "annotation": "", "content_id": "param_value_from_file", "errors": null, - "id": 19, + "id": 21, "input_connections": { "input1": { - "id": 14, + "id": 16, "output_name": "out_file1" } }, @@ -1128,14 +1182,14 @@ "when": null, "workflow_outputs": [] }, - "20": { + "22": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", "errors": null, - "id": 20, + "id": 22, "input_connections": { "infile": { - "id": 17, + "id": 19, "output_name": "stats" } }, @@ -1173,22 +1227,22 @@ "when": null, "workflow_outputs": [] }, - "21": { + "23": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/purge_dups/purge_dups/1.2.6+galaxy0", "errors": null, - "id": 21, + "id": 23, "input_connections": { "function_select|input": { - "id": 9, + "id": 11, "output_name": "alignment_output" }, "function_select|section_calcuts|transition": { - "id": 19, + "id": 21, "output_name": "integer_param" }, "function_select|section_calcuts|upper_depth": { - "id": 18, + "id": 20, "output_name": "integer_param" } }, @@ -1294,22 +1348,22 @@ } ] }, - "22": { + "24": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/purge_dups/purge_dups/1.2.6+galaxy0", "errors": null, - "id": 22, + "id": 24, "input_connections": { "function_select|coverage": { - "id": 21, + "id": 23, "output_name": "pbcstat_cov" }, "function_select|cutoffs": { - "id": 21, + "id": 23, "output_name": "calcuts_cutoff" }, "function_select|input": { - "id": 15, + "id": 17, "output_name": "alignment_output" } }, @@ -1360,14 +1414,14 @@ "when": null, "workflow_outputs": [] }, - "23": { + "25": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.3+galaxy1", "errors": null, - "id": 23, + "id": 25, "input_connections": { "infile": { - "id": 22, + "id": 24, "output_name": "purge_dups_bed" } }, @@ -1407,14 +1461,14 @@ "when": null, "workflow_outputs": [] }, - "24": { + "26": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/purge_dups/purge_dups/1.2.6+galaxy0", "errors": null, - "id": 24, + "id": 26, "input_connections": { "function_select|bed_input": { - "id": 23, + "id": 25, "output_name": "output" }, "function_select|fasta_input": { @@ -1501,14 +1555,14 @@ } ] }, - "25": { + "27": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/merqury/merqury/1.3+galaxy4", "errors": null, - "id": 25, + "id": 27, "input_connections": { "mode|assembly_options|assembly_01": { - "id": 24, + "id": 26, "output_name": "get_seqs_purged" }, "mode|assembly_options|assembly_02": { @@ -1635,14 +1689,14 @@ } ] }, - "26": { + "28": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/gfastats/gfastats/1.3.6+galaxy0", "errors": null, - "id": 26, + "id": 28, "input_connections": { "input_file": { - "id": 24, + "id": 26, "output_name": "get_seqs_purged" } }, @@ -1687,15 +1741,23 @@ "when": null, "workflow_outputs": [] }, - "27": { + "29": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.5.0+galaxy0", "errors": null, - "id": 27, + "id": 29, "input_connections": { "input": { - "id": 24, + "id": 26, "output_name": "get_seqs_purged" + }, + "lineage_conditional|cached_db": { + "id": 6, + "output_name": "output" + }, + "lineage|lineage_dataset": { + "id": 7, + "output_name": "output" } }, "inputs": [], @@ -1771,7 +1833,7 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"miniprot\": false, \"use_augustus\": {\"use_augustus_selector\": \"no\", \"__current_case__\": 0}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": \"vertebrata_odb10\"}, \"lineage_conditional\": {\"selector\": \"download\", \"__current_case__\": 1}, \"outputs\": [\"short_summary\", \"missing\", \"image\", \"gff\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"geno\", \"__current_case__\": 0, \"miniprot\": false, \"use_augustus\": {\"use_augustus_selector\": \"no\", \"__current_case__\": 0}}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": {\"__class__\": \"ConnectedValue\"}}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": {\"__class__\": \"ConnectedValue\"}}, \"outputs\": [\"short_summary\", \"missing\", \"image\", \"gff\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "5.5.0+galaxy0", "type": "tool", "uuid": "73b62ef3-0002-418e-9f10-22076c1e48f3", @@ -1789,14 +1851,14 @@ } ] }, - "28": { + "30": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/gfastats/gfastats/1.3.6+galaxy0", "errors": null, - "id": 28, + "id": 30, "input_connections": { "input_file": { - "id": 24, + "id": 26, "output_name": "get_seqs_purged" } }, @@ -1849,18 +1911,18 @@ } ] }, - "29": { + "31": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/gfastats/gfastats/1.3.6+galaxy0", "errors": null, - "id": 29, + "id": 31, "input_connections": { "input_file": { - "id": 24, + "id": 26, "output_name": "get_seqs_purged" }, "mode_condition|statistics_condition|expected_genomesize": { - "id": 12, + "id": 14, "output_name": "integer_param" } }, @@ -1906,14 +1968,14 @@ } ] }, - "30": { + "32": { "annotation": "", "content_id": "__EXTRACT_DATASET__", "errors": null, - "id": 30, + "id": 32, "input_connections": { "input": { - "id": 25, + "id": 27, "output_name": "qv_files" } }, @@ -1951,14 +2013,14 @@ } ] }, - "31": { + "33": { "annotation": "", "content_id": "__EXTRACT_DATASET__", "errors": null, - "id": 31, + "id": 33, "input_connections": { "input": { - "id": 25, + "id": 27, "output_name": "png_files" } }, @@ -2003,14 +2065,14 @@ } ] }, - "32": { + "34": { "annotation": "", "content_id": "__EXTRACT_DATASET__", "errors": null, - "id": 32, + "id": 34, "input_connections": { "input": { - "id": 25, + "id": 27, "output_name": "png_files" } }, @@ -2055,14 +2117,14 @@ } ] }, - "33": { + "35": { "annotation": "", "content_id": "__EXTRACT_DATASET__", "errors": null, - "id": 33, + "id": 35, "input_connections": { "input": { - "id": 25, + "id": 27, "output_name": "png_files" } }, @@ -2107,14 +2169,14 @@ } ] }, - "34": { + "36": { "annotation": "", "content_id": "__EXTRACT_DATASET__", "errors": null, - "id": 34, + "id": 36, "input_connections": { "input": { - "id": 25, + "id": 27, "output_name": "stats_files" } }, @@ -2152,14 +2214,14 @@ } ] }, - "35": { + "37": { "annotation": "", "content_id": "__EXTRACT_DATASET__", "errors": null, - "id": 35, + "id": 37, "input_connections": { "input": { - "id": 25, + "id": 27, "output_name": "png_files" } }, @@ -2204,12 +2266,12 @@ } ] }, - "36": { + "38": { "annotation": "", - "id": 36, + "id": 38, "input_connections": { "gfa_stats": { - "id": 26, + "id": 28, "input_subworkflow_step_id": 0, "output_name": "stats" } @@ -2581,14 +2643,14 @@ "when": null, "workflow_outputs": [] }, - "37": { + "39": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", "errors": null, - "id": 37, + "id": 39, "input_connections": { "infile": { - "id": 29, + "id": 31, "output_name": "stats" } }, @@ -2626,27 +2688,27 @@ "when": null, "workflow_outputs": [] }, - "38": { + "40": { "annotation": "", - "id": 38, + "id": 40, "input_connections": { "Alternate data": { - "id": 16, + "id": 18, "input_subworkflow_step_id": 1, "output_name": "gfastats data for plotting" }, "Name of alternate assembly": { - "id": 7, + "id": 9, "input_subworkflow_step_id": 3, "output_name": "output" }, "Name of primary assembly": { - "id": 6, + "id": 8, "input_subworkflow_step_id": 2, "output_name": "output" }, "Primary data": { - "id": 36, + "id": 38, "input_subworkflow_step_id": 0, "output_name": "gfastats data for plotting" } @@ -3164,18 +3226,18 @@ } ] }, - "39": { + "41": { "annotation": "", "content_id": "join1", "errors": null, - "id": 39, + "id": 41, "input_connections": { "input1": { - "id": 37, + "id": 39, "output_name": "outfile" }, "input2": { - "id": 20, + "id": 22, "output_name": "outfile" } }, @@ -3207,14 +3269,14 @@ "when": null, "workflow_outputs": [] }, - "40": { + "42": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_cut_tool/9.3+galaxy1", "errors": null, - "id": 40, + "id": 42, "input_connections": { "input": { - "id": 39, + "id": 41, "output_name": "out_file1" } }, @@ -3267,14 +3329,14 @@ } ] }, - "41": { + "43": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/9.3+galaxy1", "errors": null, - "id": 41, + "id": 43, "input_connections": { "infile": { - "id": 40, + "id": 42, "output_name": "output" } }, @@ -3322,6 +3384,6 @@ "tags": [ "VGP_curated" ], - "uuid": "7d187438-e7da-494b-ba1f-993da4174e79", - "version": 14 + "uuid": "727b2b8c-3ff1-4736-952d-b2c215c0faa1", + "version": 1 } \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/README.md b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/README.md index 3c58d2ee4..ad3b26735 100644 --- a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/README.md +++ b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/README.md @@ -6,13 +6,15 @@ This workflow is the 6th workflow of the VGP pipeline. It is meant to be run aft ## Inputs 1. Genomescope model parameters [txt] (Generated by the k-mer profiling workflow) -1. Hifi long reads - trimmed [fastq] (Generated by Cutadapt in the contigging workflow) -2. Assembly to purge (e.g. hap1) [fasta] (Generated by the contigging workflow) -3. K-mer database [meryldb] (Generated by the k-mer profiling workflow) -4. Estimated Genome Size [txt] +2. Hifi long reads - trimmed [fastq] (Generated by Cutadapt in the contigging workflow) +3. Assembly to purge (e.g. hap1) [fasta] (Generated by the contigging workflow) +4. K-mer database [meryldb] (Generated by the k-mer profiling workflow) 5. Assembly to leave alone (used for merqury statistics) (e.g. hap2) [fasta] (Generated by the contigging workflow) -6. Name of un-altered assembly -7. Name of purged assembly +6. Estimated Genome Size [txt] +7. Database for busco lineage (recommended: latest) +8. Busco lineage (recommended: vertebrata) +9. Name of un-altered assembly +10. Name of purged assembly ## Outputs