From cdd853fda2208aea29478295b6d4158312a96fc8 Mon Sep 17 00:00:00 2001 From: Sage Wright <sage.wright@theiagen.com> Date: Fri, 13 Dec 2024 15:04:09 -0500 Subject: [PATCH] [TheiaProk] Update default versions for TB-Profiler and tbp-parser (#673) * new branch so tbp-parser is compatible * use tbprofiler 6.2.0 * update tbp-parser accordingly * int -> float * change to float again and update version so it says whov2 * change to float * remove typos ahh bad keyboard! * clean up clean up everybody everywhere * enable californiaDPH tbprofiler database * enable ability to alter tbprofiler docker * update docker * update to latest docker * add cycloserine parameter * additional params * expose input var * update tbprofiler & tbp-parser defaults * add updates * update version * organize * update md5sums * more md5sums so sad * fix genome_percent_coverage * rename tbprofiler_additional_outputs to call_tbp_parser * clarify output * md5sum * reorder trimmomatic base crop stuff * add lane concatenation to theiaproks * rewrite so it works * update documentation for concatenate_lanes * bump version and update docs * update last known changes * fix md5sums * version * update doc dockers * bump version * alphabetize things and fix description * move to appropriate type * fix disk type * remove files from test * finish updating versions everywhere * add sample-level to fetch_srr_accession thing --- .dockstore.yml | 7 +- .github/PULL_REQUEST_TEMPLATE.md | 3 +- .../genomic_characterization/theiacov.md | 2 +- .../genomic_characterization/theiaeuk.md | 2 +- .../genomic_characterization/theiameta.md | 2 +- .../genomic_characterization/theiaprok.md | 44 ++++-- .../genomic_characterization/vadr_update.md | 2 +- .../phylogenetic_construction/augur.md | 2 +- .../snippy_streamline.md | 2 +- .../snippy_streamline_fasta.md | 2 +- .../fetch_srr_accession.md | 2 +- .../mercury_prep_n_batch.md | 2 +- .../public_data_sharing/terra_2_ncbi.md | 2 +- .../standalone/concatenate_illumina_lanes.md | 47 +++++++ docs/workflows/standalone/kraken2.md | 2 +- docs/workflows/standalone/tbprofiler_tngs.md | 4 +- .../workflows_alphabetically.md | 32 ++--- docs/workflows_overview/workflows_kingdom.md | 39 +++--- docs/workflows_overview/workflows_type.md | 31 ++--- mkdocs.yml | 5 +- .../read_filtering/task_trimmomatic.wdl | 4 +- .../mycobacterium/task_tbp_parser.wdl | 25 ++-- .../mycobacterium/task_tbprofiler.wdl | 127 ++++++++---------- .../file_handling/task_cat_lanes.wdl | 54 ++++++++ .../test_wf_theiaprok_illumina_pe.yml | 83 +----------- .../test_wf_theiaprok_illumina_se.yml | 76 +---------- .../standalone_modules/wf_tbprofiler_tngs.wdl | 2 +- .../theiaprok/wf_theiaprok_illumina_pe.wdl | 45 +++++-- .../theiaprok/wf_theiaprok_illumina_se.wdl | 27 +++- workflows/theiaprok/wf_theiaprok_ont.wdl | 2 +- .../wf_concatenate_illumina_lanes.wdl | 42 ++++++ workflows/utilities/wf_merlin_magic.wdl | 44 ++++-- 32 files changed, 425 insertions(+), 340 deletions(-) create mode 100644 docs/workflows/standalone/concatenate_illumina_lanes.md create mode 100644 tasks/utilities/file_handling/task_cat_lanes.wdl create mode 100644 workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl diff --git a/.dockstore.yml b/.dockstore.yml index 146638eb7..aa87c3835 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -287,4 +287,9 @@ workflows: subclass: WDL primaryDescriptorPath: /workflows/phylogenetics/wf_snippy_streamline_fasta.wdl testParameterFiles: - - /tests/inputs/empty.json \ No newline at end of file + - /tests/inputs/empty.json + - name: Concatenate_Illumina_Lanes_PHB + subclass: WDL + primaryDescriptorPath: /workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl + testParameterFiles: + - /tests/inputs/empty.json \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3aebe21d9..96f294e54 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -45,7 +45,8 @@ This PR uses an element that could cause duplicate runs to have different result - [ ] The workflow/task has been tested and results, including file contents, are as anticipated - [ ] The CI/CD has been adjusted and tests are passing (Theiagen developers) - [ ] Code changes follow the [style guide](https://theiagen.notion.site/Style-Guide-WDL-Workflow-Development-51b66a47dde54c798f35d673fff80249) -- [ ] Documentation and/or workflow diagrams have been updated if applicable (Theiagen developers only) +- [ ] Documentation and/or workflow diagrams have been updated if applicable + - [ ] You have updated the latest version for any affected worklows in the respective workflow documentation page and for every entry in the three `workflows_overview` tables. ## 🎯 Reviewer Checklist <!-- Indicate NA when not applicable --> diff --git a/docs/workflows/genomic_characterization/theiacov.md b/docs/workflows/genomic_characterization/theiacov.md index 3e2ad8956..480bfbf04 100644 --- a/docs/workflows/genomic_characterization/theiacov.md +++ b/docs/workflows/genomic_characterization/theiacov.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.2.0 | Yes, some optional features incompatible | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | Yes, some optional features incompatible | Sample-level | ## TheiaCoV Workflows diff --git a/docs/workflows/genomic_characterization/theiaeuk.md b/docs/workflows/genomic_characterization/theiaeuk.md index cc9cba9c1..2bfc2e6cf 100644 --- a/docs/workflows/genomic_characterization/theiaeuk.md +++ b/docs/workflows/genomic_characterization/theiaeuk.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibliity** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Mycotics](../../workflows_overview/workflows_kingdom.md/#mycotics) | PHB v2.2.0 | Yes | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Mycotics](../../workflows_overview/workflows_kingdom.md/#mycotics) | PHB v2.3.0 | Yes | Sample-level | ## TheiaEuk Workflows diff --git a/docs/workflows/genomic_characterization/theiameta.md b/docs/workflows/genomic_characterization/theiameta.md index d6b55e80a..fad3c359a 100644 --- a/docs/workflows/genomic_characterization/theiameta.md +++ b/docs/workflows/genomic_characterization/theiameta.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.2.0 | Yes | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | ## TheiaMeta Workflows diff --git a/docs/workflows/genomic_characterization/theiaprok.md b/docs/workflows/genomic_characterization/theiaprok.md index 8808caab2..d3dbb55b0 100644 --- a/docs/workflows/genomic_characterization/theiaprok.md +++ b/docs/workflows/genomic_characterization/theiaprok.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.2.0 | Yes, some optional features incompatible | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes, some optional features incompatible | Sample-level | ## TheiaProk Workflows @@ -78,6 +78,12 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | *workflow name | **originating_lab** | String | Will be used in the "originating_lab" column in any taxon-specific tables created in the Export Taxon Tables task | | Optional | FASTA, ONT, PE, SE | | *workflow name | **perform_characterization** | Boolean | Set to "false" if you want to only generate an assembly and relevant QC metrics and skip all characterization tasks | TRUE | Optional | FASTA, ONT, PE, SE | | *workflow name | **qc_check_table** | File | TSV value with taxons for rows and QC values for columns; internal cells represent user-determined QC thresholds; if provided, turns on the QC Check task.<br>Click on the variable name for an example QC Check table | | Optional | FASTA, ONT, PE, SE | +| *workflow name | **read1_lane2** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read1_lane3** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read1_lane4** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read2_lane2** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read2_lane3** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | +| *workflow name | **read2_lane4** | File | If provided, the Concatenate_Illumina_Lanes subworkflow will concatenate all files from the same lane before doing any subsequent analysis | | Optional | PE, SE | | *workflow name | **run_id** | String | Will be used in the "run_id" column in any taxon-specific tables created in the Export Taxon Tables task | | Optional | FASTA, ONT, PE, SE | | *workflow name | **seq_method** | String | Will be used in the "seq_id" column in any taxon-specific tables created in the Export Taxon Tables task | | Optional | FASTA, ONT, PE, SE | | *workflow name | **skip_mash** | Boolean | If true, skips estimation of genome size and coverage in read screening steps. As a result, providing true also prevents screening using these parameters. | TRUE | Optional | ONT, SE | @@ -301,6 +307,7 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **call_poppunk** | Boolean | If "true", runs PopPUNK for GPSC cluster designation for S. pneumoniae | TRUE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **call_shigeifinder_reads_input** | Boolean | If set to "true", the ShigEiFinder task will run again but using read files as input instead of the assembly file. Input is shown but not used for TheiaProk_FASTA. | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **call_stxtyper** | Boolean | If set to "true", the StxTyper task will run on all samples regardless of the `gambit_predicted_taxon` output. Useful if you suspect a non-E.coli or non-Shigella sample contains stx genes. | FALSE | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **call_tbp_parser** | Boolean | If set to "true", activates the tbp_parser module and results in more outputs, including tbp_parser_looker_report_csv, tbp_parser_laboratorian_report_csv, tbp_parser_lims_report_csv, tbp_parser_coverage_report, and tbp_parser_genome_percent_coverage | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **cauris_cladetyper_docker_image** | String | Internal component, do not modify | | Do not modify, Optional | FASTA, ONT, PE, SE | | merlin_magic | **cladetyper_kmer_size** | Int | Internal component, do not modify | | Do not modify, Optional | FASTA, ONT, PE, SE | | merlin_magic | **cladetyper_ref_clade1** | File | *Provide an empty file if running TheiaProk on the command-line | | Do not modify, Optional | FASTA, ONT, PE, SE | @@ -407,27 +414,33 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | merlin_magic | **stxtyper_enable_debug** | Boolean | When enabled, additional messages are printed and files in `$TMPDIR` are not removed after running | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **stxtyper_memory** | Int | Amount of memory (in GB) to allocate to the task | 4 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **staphopia_sccmec_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/biocontainers/staphopia-sccmec:1.0.0--hdfd78af_0 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_add_cs_lims** | Boolean | Set to true add cycloserine results to the LIMS report | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_regions_bed** | File | A bed file that lists the regions to be considered for QC | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_coverage_threshold** | Int | The minimum coverage for a region to pass QC in tbp_parser | 100 | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0 | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.4.0 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_debug** | Boolean | Activate the debug mode on tbp_parser; increases logging outputs | TRUE | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_depth** | Int | Minimum depth for a variant to pass QC in tbp_parser | 10 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_frequency** | Int | The minimum frequency for a mutation to pass QC | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_min_read_support** | Int | The minimum read support for a mutation to pass QC | 10 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_operator** | String | Fills the "operator" field in the tbp_parser output files | Operator not provided | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbp_parser_output_seq_method_type** | String | Fills out the "seq_method" field in the tbp_parser output files | Sequencing method not provided | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_additional_outputs** | Boolean | If set to "true", activates the tbp_parser module and results in more outputs, including tbp_parser_looker_report_csv, tbp_parser_laboratorian_report_csv, tbp_parser_lims_report_csv, tbp_parser_coverage_report, and tbp_parser_genome_percent_coverage | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_cov_frac_threshold** | Int | A cutoff used to calculate the fraction of the region covered by ≤ this value | 1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rpob449_frequency** | Float | Minimum frequency for a mutation at protein position 449 to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrl_frequency** | Float | Minimum frequency for a mutation in rrl to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrl_read_support** | Int | Minimum read support for a mutation in rrl to pass QC in tbp-parser | 10 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrs_frequency** | Float | Minimum frequency for a mutation in rrs to pass QC in tbp-parser | 0.1 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_rrs_read_support** | Int | Minimum read support for a mutation in rrs to pass QC in tbp-parser | 10 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbp_parser_tngs_data** | Boolean | Set to true to enable tNGS-specific parameters and runs in tbp-parser | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_custom_db** | File | TBProfiler uses by default the TBDB database; if you have a custom database you wish to use, you must provide a custom database in this field and set tbprofiler_run_custom_db to true | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_docker_image** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_mapper** | String | The mapping tool used in TBProfiler to align the reads to the reference genome; see TBProfiler’s original documentation for available options. | bwa | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_min_af** | Float | The minimum allele frequency to call a variant | 0.1 | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_min_af_pred** | Float | The minimum allele frequency to use a variant for resistance prediction | 0.1 | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_min_depth** | Int | The minimum depth for a variant to be called. | 10 | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_run_cdph_db** | Boolean | TBProfiler uses by default the TBDB database; set this value to "true" to use the WHO v2 database with customizations for CDPH | FALSE | Optional | FASTA, ONT, PE, SE | | merlin_magic | **tbprofiler_run_custom_db** | Boolean | TBProfiler uses by default the TBDB database; if you have a custom database you wish to use, you must set this value to true and provide a custom database in the tbprofiler_custom_db field | FALSE | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_variant_caller** | String | Select a different variant caller for TBProfiler to use by writing it in this block; see TBProfiler’s original documentation for available options. | freebayes | Optional | FASTA, ONT, PE, SE | -| merlin_magic | **tbprofiler_variant_calling_params** | String | Enter additional variant calling parameters in this free text input to customize how the variant caller works in TBProfiler | None | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_variant_caller** | String | Select a different variant caller for TBProfiler to use by writing it in this block; see TBProfiler’s original documentation for available options. | GATK | Optional | FASTA, ONT, PE, SE | +| merlin_magic | **tbprofiler_variant_calling_params** | String | Enter additional variant calling parameters in this free text input to customize how the variant caller works in TBProfiler | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **theiaeuk** | Boolean | Internal component, do not modify | | Do not modify, Optional | FASTA, ONT, PE, SE | | merlin_magic | **virulencefinder_coverage_threshold** | Float | The threshold for minimum coverage | | Optional | FASTA, ONT, PE, SE | | merlin_magic | **virulencefinder_database** | String | The specific database to use | virulence_ecoli | Optional | FASTA, ONT, PE, SE | @@ -594,6 +607,17 @@ All input reads are processed through "[core tasks](#core-tasks-performed-for-al | --- | --- | | Task | [task_versioning.wdl](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/task_versioning.wdl) | +??? task "`concatenate_illumina_lanes`: Concatenate Multi-Lane Illumina FASTQs ==_for Illumina only_==" + + The `concatenate_illumina_lanes` task concatenates Illumina FASTQ files from multiple lanes into a single file. This task only runs if the `read1_lane2` input file has been provided. All read1 lanes are concatenated together and are used in subsequent tasks, as are the read2 lanes. These concatenated files are also provided as output. + + !!! techdetails "Concatenate Illumina Lanes Technical Details" + The `concatenate_illumina_lanes` task is run before any downstream steps take place. + + | | Links | + | --- | --- | + | Task | [wf_concatenate_illumina_lanes.wdl](https://github.com/theiagen/public_health_bioinformatics/blob/main/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl) + ??? task "`screen`: Total Raw Read Quantification and Genome Size Estimation" The [`screen`](https://github.com/theiagen/public_health_bioinformatics/blob/main/tasks/quality_control/comparisons/task_screen.wdl) task ensures the quantity of sequence data is sufficient to undertake genomic analysis. It uses [`fastq-scan`](https://github.com/rpetit3/fastq-scan) and bash commands for quantification of reads and base pairs, and [mash](https://mash.readthedocs.io/en/latest/index.html) sketching to estimate the genome size and its coverage. At each step, the results are assessed relative to pass/fail criteria and thresholds that may be defined by optional user inputs. Samples that do not meet these criteria will not be processed further by the workflow: @@ -2022,7 +2046,7 @@ The TheiaProk workflows automatically activate taxa-specific sub-workflows after | tbp_parser_version | String | Optional output. The version of tbp-parser | ONT, PE | | tbprofiler_dr_type | String | Drug resistance type predicted by TB-Profiler (sensitive, Pre-MDR, MDR, Pre-XDR, XDR) | ONT, PE, SE | | tbprofiler_main_lineage | String | Lineage(s) predicted by TBProfiler | ONT, PE, SE | -| tbprofiler_median_coverage | Int | The median coverage of the H37Rv TB reference genome | ONT, PE | +| tbprofiler_median_depth | Int | The median depth of the H37Rv TB reference genome covered by the sample | ONT, PE | | tbprofiler_output_bai | File | Index BAM file generated by mapping sequencing reads to reference genome by TBProfiler | ONT, PE, SE | | tbprofiler_output_bam | File | BAM alignment file produced by TBProfiler | ONT, PE, SE | | tbprofiler_output_file | File | CSV report from TBProfiler | ONT, PE, SE | diff --git a/docs/workflows/genomic_characterization/vadr_update.md b/docs/workflows/genomic_characterization/vadr_update.md index ceaa45fa8..b3d706d72 100644 --- a/docs/workflows/genomic_characterization/vadr_update.md +++ b/docs/workflows/genomic_characterization/vadr_update.md @@ -5,7 +5,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v1.2.1 | Yes | Sample-level | +| [Genomic Characterization](../../workflows_overview/workflows_type.md/#genomic-characterization) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.2.0 | Yes | Sample-level | ## Vadr_Update_PHB diff --git a/docs/workflows/phylogenetic_construction/augur.md b/docs/workflows/phylogenetic_construction/augur.md index 88adb3c9f..45d92ad5b 100644 --- a/docs/workflows/phylogenetic_construction/augur.md +++ b/docs/workflows/phylogenetic_construction/augur.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.1.0 | Yes | Sample-level, Set-level | +| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | Yes | Sample-level, Set-level | ## Augur Workflows diff --git a/docs/workflows/phylogenetic_construction/snippy_streamline.md b/docs/workflows/phylogenetic_construction/snippy_streamline.md index aa04198b3..facc3e1c4 100644 --- a/docs/workflows/phylogenetic_construction/snippy_streamline.md +++ b/docs/workflows/phylogenetic_construction/snippy_streamline.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.2.0 | Yes; some optional features incompatible | Set-level | +| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes; some optional features incompatible | Set-level | ## Snippy_Streamline_PHB diff --git a/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md b/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md index 890674b3f..118c66588 100644 --- a/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md +++ b/docs/workflows/phylogenetic_construction/snippy_streamline_fasta.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.2.0 | Yes; some optional features incompatible | Set-level | +| [Phylogenetic Construction](../../workflows_overview/workflows_type.md/#phylogenetic-construction) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes; some optional features incompatible | Set-level | ## Snippy_Streamline_FASTA_PHB diff --git a/docs/workflows/public_data_sharing/fetch_srr_accession.md b/docs/workflows/public_data_sharing/fetch_srr_accession.md index aa18c6438..df432d1ca 100644 --- a/docs/workflows/public_data_sharing/fetch_srr_accession.md +++ b/docs/workflows/public_data_sharing/fetch_srr_accession.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | +| [Data Import](../../workflows_overview/workflows_type.md/#data-import) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | ## Fetch SRR Accession diff --git a/docs/workflows/public_data_sharing/mercury_prep_n_batch.md b/docs/workflows/public_data_sharing/mercury_prep_n_batch.md index 4fcc48d36..56e169e82 100644 --- a/docs/workflows/public_data_sharing/mercury_prep_n_batch.md +++ b/docs/workflows/public_data_sharing/mercury_prep_n_batch.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.2.0 | Yes | Set-level | +| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | Yes | Set-level | ## Mercury_Prep_N_Batch_PHB diff --git a/docs/workflows/public_data_sharing/terra_2_ncbi.md b/docs/workflows/public_data_sharing/terra_2_ncbi.md index 0fa48e50e..54e17aa9d 100644 --- a/docs/workflows/public_data_sharing/terra_2_ncbi.md +++ b/docs/workflows/public_data_sharing/terra_2_ncbi.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Bacteria](../../workflows_overview/workflows_kingdom.md#bacteria), [Mycotics](../../workflows_overview/workflows_kingdom.md#mycotics) [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.1.0 | No | Set-level | +| [Public Data Sharing](../../workflows_overview/workflows_type.md/#public-data-sharing) | [Bacteria](../../workflows_overview/workflows_kingdom.md#bacteria), [Mycotics](../../workflows_overview/workflows_kingdom.md#mycotics) [Viral](../../workflows_overview/workflows_kingdom.md/#viral) | PHB v2.3.0 | No | Set-level | ## Terra_2_NCBI_PHB diff --git a/docs/workflows/standalone/concatenate_illumina_lanes.md b/docs/workflows/standalone/concatenate_illumina_lanes.md new file mode 100644 index 000000000..282844fa4 --- /dev/null +++ b/docs/workflows/standalone/concatenate_illumina_lanes.md @@ -0,0 +1,47 @@ +# Concatenate Illumina Lanes + +## Quick Facts + +| **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | +|---|---|---|---|---| +| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB 2.3.0 | Yes | Sample-level | + +## Concatenate_Illumina_Lanes_PHB + +Some Illumina machines produce multi-lane FASTQ files for a single sample. This workflow concatenates the multiple lanes into a single FASTQ file per read type (forward or reverse). + +### Inputs + +| **Terra Task Name** | **Variable** | **Type** | **Description** | **Default Value** | **Terra Status** | +|---|---|---|---|---|---| +| concatenate_illumina_lanes | **read1_lane1** | File | The first lane for the forward reads | | Required | +| concatenate_illumina_lanes | **read1_lane2** | File | The second lane for the forward reads | | Required | +| concatenate_illumina_lanes | **samplename** | String | The name of the sample, used to name the output files | | Required | +| cat_lanes | **cpu** | Int | Number of CPUs to allocate to the task | 2 | Optional | +| cat_lanes | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 50 | Optional | +| cat_lanes | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.2" | Optional | +| cat_lanes | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | +| concatenate_illumina_lanes | **read1_lane3** | File | The third lane for the forward reads | | Optional | +| concatenate_illumina_lanes | **read1_lane4** | File | The fourth lane for the forward reads | | Optional | +| concatenate_illumina_lanes | **read2_lane1** | File | The first lane for the reverse reads | | Optional | +| concatenate_illumina_lanes | **read2_lane2** | File | The second lane for the reverse reads | | Optional | +| concatenate_illumina_lanes | **read2_lane3** | File | The third lane for the reverse reads | | Optional | +| concatenate_illumina_lanes | **read2_lane4** | File | The fourth lane for the reverse reads | | Optional | +| version_capture | **docker** | String | The Docker container to use for the task | "us-docker.pkg.dev/general-theiagen/theiagen/alpine-plus-bash:3.20.0" | Optional | +| version_capture | **timezone** | String | Set the time zone to get an accurate date of analysis (uses UTC by default) | | Optional | + +### Workflow Tasks + +This workflow concatenates the Illumina lanes for forward and (if provided) reverse reads. The output files are named as followed: + +- Forward reads: `<samplename>_merged_R1.fastq.gz` +- Reverse reads: `<samplename>_merged_R2.fastq.gz` + +### Outputs + +| **Variable** | **Type** | **Description** | +|---|---|---| +| concatenate_illumina_lanes_analysis_date | String | Date of analysis | +| concatenate_illumina_lanes_version | String | Version of PHB used for the analysis | +| read1_concatenated | File | Concatenated forward reads | +| read2_concatenated | File | Concatenated reverse reads | diff --git a/docs/workflows/standalone/kraken2.md b/docs/workflows/standalone/kraken2.md index df36e56a1..95c86c216 100644 --- a/docs/workflows/standalone/kraken2.md +++ b/docs/workflows/standalone/kraken2.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.0.0 | Yes | Sample-level | +| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Any Taxa](../../workflows_overview/workflows_kingdom.md/#any-taxa) | PHB v2.3.0 | Yes | Sample-level | ## Kraken2 Workflows diff --git a/docs/workflows/standalone/tbprofiler_tngs.md b/docs/workflows/standalone/tbprofiler_tngs.md index d0061fdd7..96f29e0bf 100644 --- a/docs/workflows/standalone/tbprofiler_tngs.md +++ b/docs/workflows/standalone/tbprofiler_tngs.md @@ -4,7 +4,7 @@ | **Workflow Type** | **Applicable Kingdom** | **Last Known Changes** | **Command-line Compatibility** | **Workflow Level** | |---|---|---|---|---| -| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.0.0 | Yes | Sample-level | +| [Standalone](../../workflows_overview/workflows_type.md/#standalone) | [Bacteria](../../workflows_overview/workflows_kingdom.md/#bacteria) | PHB v2.3.0 | Yes | Sample-level | ## TBProfiler_tNGS_PHB @@ -23,7 +23,7 @@ This workflow is still in experimental research stages. Documentation is minimal | tbp_parser | **coverage_threshold** | Int | The minimum percentage of a region to exceed the minimum depth for a region to pass QC in tbp_parser | 100 | Optional | | tbp_parser | **cpu** | Int | Number of CPUs to allocate to the task | 1 | Optional | | tbp_parser | **disk_size** | Int | Amount of storage (in GB) to allocate to the task | 100 | Optional | -| tbp_parser | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0 | Optional | +| tbp_parser | **docker** | String | The Docker container to use for the task | us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.1.1 | Optional | | tbp_parser | **etha237_frequency** | Float | Minimum frequency for a mutation in ethA at protein position 237 to pass QC in tbp-parser | 0.1 | Optional | | tbp_parser | **expert_rule_regions_bed** | File | A file that contains the regions where R mutations and expert rules are applied | | Optional | | tbp_parser | **memory** | Int | Amount of memory/RAM (in GB) to allocate to the task | 4 | Optional | diff --git a/docs/workflows_overview/workflows_alphabetically.md b/docs/workflows_overview/workflows_alphabetically.md index c937e815b..cc90bf439 100644 --- a/docs/workflows_overview/workflows_alphabetically.md +++ b/docs/workflows_overview/workflows_alphabetically.md @@ -11,44 +11,46 @@ title: Alphabetical Workflows | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | -| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | +| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | +| [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Sample-level | Yes | v2.3.0 | [*Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Find_Shared_Variants**](../workflows/phylogenetic_construction/find_shared_variants.md)| Combines and reshapes variant data from Snippy_Variants to illustrate variants shared across multiple samples | Bacteria, Mycotics | Set-level | Yes | v2.0.0 | [Find_Shared_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Find_Shared_Variants_PHB:main?tab=info) | | [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | -| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | +| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**Lyve_SET**](../workflows/phylogenetic_construction/lyve_set.md)| Alignment of reads to a reference genome, SNP calling, curation of high quality SNPs, phylogenetic analysis | Bacteria | Set-level | Yes | v2.1.0 | [Lyve_SET_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Lyve_SET_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | -| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | +| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.3.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | | [**Rename_FASTQ**](../workflows/standalone/rename_fastq.md)| Rename paired-end or single-end read files in a Terra data table in a non-destructive way | Any taxa | Sample-level | Yes | v2.1.0 | [Rename_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Rename_FASTQ_PHB:im-utilities-rename-files?tab=info) | | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/samples_to_ref_tree.md)| Use Nextclade to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Samples_to_Ref_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Samples_to_Ref_Tree_PHB:main?tab=info) | -| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | -| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | -| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | +| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | +| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.3.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | -| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | +| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.3.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | -| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | -| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | -| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | +| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | +| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/usher.md)| Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Update SRR metadata in a Terra data table at the sample level | Any taxa | | Yes | v2.3.0 | [*Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | -| [**Usher_PHB**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**Usher_PHB**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.1.0 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.0 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | </div> diff --git a/docs/workflows_overview/workflows_kingdom.md b/docs/workflows_overview/workflows_kingdom.md index d10fa2afd..9d8ffc719 100644 --- a/docs/workflows_overview/workflows_kingdom.md +++ b/docs/workflows_overview/workflows_kingdom.md @@ -15,16 +15,17 @@ title: Workflows by Kingdom | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Concatenate_Column_Content**](../workflows/data_export/concatenate_column_content.md) | Concatenate contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Concatenate_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Column_Content_PHB:main?tab=info) | +| [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | -| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Sample-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | +| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | | [**Rename_FASTQ**](../workflows/standalone/rename_fastq.md)| Rename paired-end or single-end read files in a Terra data table in a non-destructive way | Any taxa | Sample-level | Yes | v2.1.0 | [Rename_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Rename_FASTQ_PHB:im-utilities-rename-files?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | -| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | +| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | | [**Transfer_Column_Content**](../workflows/data_export/transfer_column_content.md)| Transfer contents of a specified Terra data table column for many samples ("entities") to a GCP storage bucket location | Any taxa | Set-level | Yes | v1.3.0 | [Transfer_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Transfer_Column_Content_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Update SRR metadata in a Terra data table at the sample level | Any taxa | Set-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**Zip_Column_Content**](../workflows/data_export/zip_column_content.md)| Zip contents of a specified Terra data table column for many samples ("entities") | Any taxa | Set-level | Yes | v2.1.0 | [Zip_Column_Content_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Zip_Column_Content_PHB:main?tab=info) | </div> @@ -42,13 +43,13 @@ title: Workflows by Kingdom | [**Lyve_SET**](../workflows/phylogenetic_construction/lyve_set.md)| Alignment of reads to a reference genome, SNP calling, curation of high quality SNPs, phylogenetic analysis | Bacteria | Set-level | Yes | v2.1.0 | [Lyve_SET_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Lyve_SET_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | -| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | -| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | -| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | -| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | +| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | +| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | +| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.3.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.3.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | </div> @@ -64,9 +65,9 @@ title: Workflows by Kingdom | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | </div> @@ -76,20 +77,20 @@ title: Workflows by Kingdom | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | +| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | -| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | +| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.3.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | -| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | +| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.3.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | | [**Samples_to_Ref_Tree**](../workflows/phylogenetic_placement/samples_to_ref_tree.md)| Use Nextclade to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Samples_to_Ref_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Samples_to_Ref_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | -| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | +| [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.3.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | +| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | | [**Usher_PHB**](../workflows/phylogenetic_placement/usher.md)| Use UShER to rapidly and accurately place your samples on any existing phylogenetic tree | Monkeypox virus, SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.1.0 | [Usher_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Usher_PHB:main?tab=info) | -| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.0 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | </div> diff --git a/docs/workflows_overview/workflows_type.md b/docs/workflows_overview/workflows_type.md index 14f23fd92..e43a3f7c0 100644 --- a/docs/workflows_overview/workflows_type.md +++ b/docs/workflows_overview/workflows_type.md @@ -15,6 +15,7 @@ title: Workflows by Type | [**Assembly_Fetch**](../workflows/data_import/assembly_fetch.md) | Download assemblies from NCBI, after optionally identifying the closest RefSeq reference genome to your own draft assembly | Any taxa | Sample-level | Yes | v1.3.0 | [Assembly_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Assembly_Fetch_PHB:main?tab=info) | | [**BaseSpace_Fetch**](../workflows/data_import/basespace_fetch.md)| Import data from BaseSpace into Terra | Any taxa | Sample-level | Yes | v2.0.0 | [BaseSpace_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/BaseSpace_Fetch_PHB:main?tab=info) | | [**Create_Terra_Table**](../workflows/data_import/create_terra_table.md)| Upload data to Terra and then run this workflow to have the table automatically created | Any taxa | | Yes | v2.2.0 | [Create_Terra_Table_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Create_Terra_Table_PHB:main?tab=info) | +| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Provided a BioSample accession, identify any associated SRR accession(s) | Any taxa | Sample-level | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | | [**SRA_Fetch**](../workflows/data_import/sra_fetch.md)| Import publicly available reads from SRA using SRR#, ERR# or DRR# | Any taxa | Sample-level | Yes | v2.2.0 | [SRA_Fetch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/SRA_Fetch_PHB:main?tab=info) | </div> @@ -25,13 +26,13 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.2.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | +| [**Freyja Workflow Series**](../workflows/genomic_characterization/freyja.md)| Recovers relative lineage abundances from mixed sample data and generates visualizations | SARS-CoV-2, Viral | Sample-level, Set-level | Yes | v2.3.0 | [Freyja_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_FASTQ_PHB:main?tab=info), [Freyja_Plot_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Plot_PHB:main?tab=info), [Freyja_Dashboard_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Dashboard_PHB:main?tab=info), [Freyja_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Freyja_Update_PHB:main?tab=info) | | [**Pangolin_Update**](../workflows/genomic_characterization/pangolin_update.md) | Update Pangolin assignments | SARS-CoV-2, Viral | Sample-level | Yes | v2.0.0 | [Pangolin_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Pangolin_Update_PHB:main?tab=info) | -| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | -| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.0.1 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | -| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.0.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | -| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.2.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | -| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v1.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | +| [**TheiaCov Workflow Series**](../workflows/genomic_characterization/theiacov.md) | Viral genome assembly, QC and characterization from amplicon sequencing | HIV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level, Set-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaCoV_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_PE_PHB:main?tab=info), [TheiaCoV_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_Illumina_SE_PHB:main?tab=info), [TheiaCoV_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ONT_PHB:main?tab=info), [TheiaCoV_ClearLabs_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_ClearLabs_PHB:main?tab=info), [TheiaCoV_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_PHB:main?tab=info), [TheiaCoV_FASTA_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaCoV_FASTA_Batch_PHB:main?tab=info) | +| [**TheiaEuk**](../workflows/genomic_characterization/theiaeuk.md) | Mycotic genome assembly, QC and characterization from WGS data | Mycotics | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaEuk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaEuk_Illumina_PE_PHB:main?tab=info) | +| [**TheiaMeta**](../workflows/genomic_characterization/theiameta.md) | Genome assembly and QC from metagenomic sequencing | Any taxa | Sample-level | Yes | v2.3.0 | [TheiaMeta_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaMeta_Illumina_PE_PHB:main?tab=info) | +| [**TheiaProk Workflow Series**](../workflows/genomic_characterization/theiaprok.md) | Bacterial genome assembly, QC and characterization from WGS data | Bacteria | Sample-level | Some optional features incompatible, Yes | v2.3.0 | [TheiaProk_Illumina_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_PE_PHB:main?tab=info), [TheiaProk_Illumina_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_Illumina_SE_PHB:main?tab=info), [TheiaProk_ONT_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_ONT_PHB:main?tab=info), [TheiaProk_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaProk_FASTA_PHB:main?tab=info) | +| [**VADR_Update**](../workflows/genomic_characterization/vadr_update.md)| Update VADR assignments | HAV, Influenza, Monkeypox virus, RSV-A, RSV-B, SARS-CoV-2, Viral, WNV | Sample-level | Yes | v2.2.1 | [VADR_Update_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/VADR_Update_PHB:main?tab=info) | </div> @@ -41,17 +42,17 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.1.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | +| [**Augur**](../workflows/phylogenetic_construction/augur.md) | Phylogenetic analysis for viral pathogens | Viral | Sample-level, Set-level | Yes | v2.3.0 | [Augur_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_Prep_PHB:main?tab=info), [Augur_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Augur_PHB:main?tab=info) | | [**Core_Gene_SNP**](../workflows/phylogenetic_construction/core_gene_snp.md) | Pangenome analysis | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Core_Gene_SNP_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Core_Gene_SNP_PHB:main?tab=info) | | [**CZGenEpi_Prep**](../workflows/phylogenetic_construction/czgenepi_prep.md)| Prepare metadata and fasta files for easy upload to the CZ GEN EPI platform. | Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v1.3.0 | [CZGenEpi_Prep_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/CZGenEpi_Prep_PHB:main?tab=info) | | [**Find_Shared_Variants**](../workflows/phylogenetic_construction/find_shared_variants.md)| Combines and reshapes variant data from Snippy_Variants to illustrate variants shared across multiple samples | Bacteria, Mycotics | Set-level | Yes | v2.0.0 | [Find_Shared_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Find_Shared_Variants_PHB:main?tab=info) | | [**kSNP3**](../workflows/phylogenetic_construction/ksnp3.md)| SNP-based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [kSNP3_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/kSNP3_PHB:main?tab=info) | | [**Lyve_SET**](../workflows/phylogenetic_construction/lyve_set.md)| Alignment of reads to a reference genome, SNP calling, curation of high quality SNPs, phylogenetic analysis | Bacteria | Set-level | Yes | v2.1.0 | [Lyve_SET_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Lyve_SET_PHB:main?tab=info) | | [**MashTree_FASTA**](../workflows/phylogenetic_construction/mashtree_fasta.md)| Mash-distance based phylogenetic analysis from assemblies | Bacteria, Mycotics, Viral | Set-level | Some optional features incompatible, Yes | v2.1.0 | [MashTree_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/MashTree_FASTA_PHB:main?tab=info) | -| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | -| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.2.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | -| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.1.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | -| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.2.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | +| [**Snippy_Streamline**](../workflows/phylogenetic_construction/snippy_streamline.md)| Implementation of Snippy workflows for phylogenetic analysis from reads, with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_PHB:main?tab=info) | +| [**Snippy_Streamline_FASTA**](../workflows/phylogenetic_construction/snippy_streamline_fasta.md)| Implementation of Snippy workflows for phylogenetic analysis from assembled genomes (in FASTA format), with optional dynamic reference selection | Bacteria | Set-level | Yes | v2.3.0 | [Snippy_Streamline_FASTA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Streamline_FASTA_PHB:im-snippy-fasta-dev?tab=info) | +| [**Snippy_Tree**](../workflows/phylogenetic_construction/snippy_tree.md)| SNP-based phylogenetic analysis from reads, with option to mask recombination | Bacteria | Set-level | Some optional features incompatible, Yes | v2.3.0 | [Snippy_Tree_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Tree_PHB:main?tab=info) | +| [**Snippy_Variants**](../workflows/phylogenetic_construction/snippy_variants.md)| Alignment of reads to a reference genome, then SNP calling | Bacteria, Mycotics, Viral | Sample-level | Yes | v2.3.0 | [Snippy_Variants_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Snippy_Variants_PHB:main?tab=info) | </div> @@ -72,10 +73,9 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| -| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.2.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | +| [**Mercury_Prep_N_Batch**](../workflows/public_data_sharing/mercury_prep_n_batch.md)| Prepare metadata and sequence data for submission to NCBI and GISAID | Influenza, Monkeypox virus, SARS-CoV-2, Viral | Set-level | No | v2.3.0 | [Mercury_Prep_N_Batch_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Mercury_Prep_N_Batch_PHB:main?tab=info) | | [**Terra_2_GISAID**](../workflows/public_data_sharing/terra_2_gisaid.md)| Upload of assembly data to GISAID | SARS-CoV-2, Viral | Set-level | Yes | v1.2.1 | [Terra_2_GISAID_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_GISAID_PHB:main?tab=info) | | [**Terra_2_NCBI**](../workflows/public_data_sharing/terra_2_ncbi.md)| Upload of sequence data to NCBI | Bacteria, Mycotics, Viral | Set-level | No | v2.1.0 | [Terra_2_NCBI_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Terra_2_NCBI_PHB:main?tab=info) | -| [**Fetch_SRR_Accession**](../workflows/public_data_sharing/fetch_srr_accession.md)| Update SRR metadata in a Terra data table at the sample level | Any taxa | | Yes | v2.3.0 | [Fetch_SRR_Accession_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Fetch_SRR_Accession_PHB:main?tab=info) | </div> @@ -98,13 +98,14 @@ title: Workflows by Type | **Name** | **Description** | **Applicable Kingdom** | **Workflow Level** | **Command-line Compatibility**[^1] | **Last Known Changes** | **Dockstore** | |---|---|---|---|---|---|---| | [**Cauris_CladeTyper**](../workflows/standalone/cauris_cladetyper.md)| C. auris clade assignment | Mycotics | Sample-level | Yes | v1.0.0 | [Cauris_CladeTyper_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Cauris_CladeTyper_PHB:main?tab=info) | +| [**Concatenate_Illumina_Lanes**](../workflows/standalone/concatenate_illumina_lanes.md)| Concatenate Illumina lanes for a single sample | Any taxa | Sample-level | Yes | v2.3.0 | [Concatenate_Illumina_Lanes_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Concatenate_Illumina_Lanes_PHB:main?tab=info) | | [**GAMBIT_Query**](../workflows/standalone/gambit_query.md)| Taxon identification of genome assembly using GAMBIT | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [Gambit_Query_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Gambit_Query_PHB:main?tab=info) | -| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.0.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | +| [**Kraken2**](../workflows/standalone/kraken2.md) | Taxa identification from reads | Any taxa | Sample-level | Yes | v2.3.0 | [Kraken2_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_PE_PHB:main?tab=info), [Kraken2_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Kraken2_SE_PHB:main?tab=info) | | [**NCBI-AMRFinderPlus**](../workflows/standalone/ncbi_amrfinderplus.md)| Runs NCBI's AMRFinderPlus on genome assemblies (bacterial and fungal) | Bacteria, Mycotics | Sample-level | Yes | v2.0.0 | [NCBI-AMRFinderPlus_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI-AMRFinderPlus_PHB:main?tab=info) | | [**NCBI_Scrub**](../workflows/standalone/ncbi_scrub.md)| Runs NCBI's HRRT on Illumina FASTQs | Any taxa | Sample-level | Yes | v2.2.1 | [NCBI_Scrub_PE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_PE_PHB:main?tab=info), [NCBI_Scrub_SE_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/NCBI_Scrub_SE_PHB:main?tab=info) | | [**RASUSA**](../workflows/standalone/rasusa.md)| Randomly subsample sequencing reads to a specified coverage | Any taxa | Sample-level | Yes | v2.0.0 | [RASUSA_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/RASUSA_PHB:main?tab=info) | | [**Rename_FASTQ**](../workflows/standalone/rename_fastq.md)| Rename paired-end or single-end read files in a Terra data table in a non-destructive way | Any taxa | Sample-level | Yes | v2.1.0 | [Rename_FASTQ_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/Rename_FASTQ_PHB:im-utilities-rename-files?tab=info) | -| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.0.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | +| [**TBProfiler_tNGS**](../workflows/standalone/tbprofiler_tngs.md)| Performs in silico antimicrobial susceptibility testing on Mycobacterium tuberculosis targeted-NGS samples with TBProfiler and tbp-parser | Bacteria, TB | Sample-level | Yes | v2.3.0 | [TBProfiler_tNGS_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TBProfiler_tNGS_PHB:smw-tngs-tbprofiler-dev?tab=info) | | [**TheiaValidate**](../workflows/standalone/theiavalidate.md)| This workflow performs basic comparisons between user-designated columns in two separate tables. | Any taxa | | No | v2.0.0 | [TheiaValidate_PHB](https://dockstore.org/workflows/github.com/theiagen/public_health_bioinformatics/TheiaValidate_PHB:main?tab=info) | </div> diff --git a/mkdocs.yml b/mkdocs.yml index 613f81b15..0bb1d3f4f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,6 +53,7 @@ nav: - Zip_Column_Content: workflows/data_export/zip_column_content.md - Standalone: - Cauris_CladeTyper: workflows/standalone/cauris_cladetyper.md + - Concatenate_Illumina_Lanes: workflows/standalone/concatenate_illumina_lanes.md - GAMBIT_Query: workflows/standalone/gambit_query.md - Kraken2: workflows/standalone/kraken2.md - NCBI-AMRFinderPlus: workflows/standalone/ncbi_amrfinderplus.md @@ -66,7 +67,8 @@ nav: - Any Taxa: - Assembly_Fetch: workflows/data_import/assembly_fetch.md - BaseSpace_Fetch: workflows/data_import/basespace_fetch.md - - Concatenate_Column_Content: workflows/data_export/concatenate_column_content.md + - Concatenate_Column_Content: workflows/data_export/concatenate_column_content.md + - Concatenate_Illumina_Lanes: workflows/standalone/concatenate_illumina_lanes.md - Create_Terra_Table: workflows/data_import/create_terra_table.md - Kraken2: workflows/standalone/kraken2.md - NCBI-Scrub: workflows/standalone/ncbi_scrub.md @@ -124,6 +126,7 @@ nav: - BaseSpace_Fetch: workflows/data_import/basespace_fetch.md - Cauris_CladeTyper: workflows/standalone/cauris_cladetyper.md - Concatenate_Column_Content: workflows/data_export/concatenate_column_content.md + - Concatenate_Illumina_Lanes: workflows/standalone/concatenate_illumina_lanes.md - Core_Gene_SNP: workflows/phylogenetic_construction/core_gene_snp.md - Create_Terra_Table: workflows/data_import/create_terra_table.md - CZGenEpi_Prep: workflows/phylogenetic_construction/czgenepi_prep.md diff --git a/tasks/quality_control/read_filtering/task_trimmomatic.wdl b/tasks/quality_control/read_filtering/task_trimmomatic.wdl index e8a246497..42f62559d 100644 --- a/tasks/quality_control/read_filtering/task_trimmomatic.wdl +++ b/tasks/quality_control/read_filtering/task_trimmomatic.wdl @@ -40,9 +40,9 @@ task trimmomatic_pe { -threads ~{cpu} \ ~{read1} ~{read2} \ -baseout ~{samplename}.fastq.gz \ + "${CROPPING_VAR}" \ SLIDINGWINDOW:~{trimmomatic_window_size}:~{trimmomatic_quality_trim_score} \ - MINLEN:~{trimmomatic_min_length} &> ~{samplename}.trim.stats.txt \ - "${CROPPING_VAR}" + MINLEN:~{trimmomatic_min_length} &> ~{samplename}.trim.stats.txt >>> output { diff --git a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl index 310ee61fd..60baa0b99 100644 --- a/tasks/species_typing/mycobacterium/task_tbp_parser.wdl +++ b/tasks/species_typing/mycobacterium/task_tbp_parser.wdl @@ -9,17 +9,18 @@ task tbp_parser { String? sequencing_method String? operator + Int? min_depth # default 10 - Int? coverage_threshold # default 100 (--min_percent_coverage) - File? coverage_regions_bed Float? min_frequency # default 0.1 Int? min_read_support # default 10 + + Int? coverage_threshold # default 100 (--min_percent_coverage) + File? coverage_regions_bed - Boolean tbp_parser_debug = false - Boolean add_cycloserine_lims = false - + Boolean tbp_parser_debug = true Boolean tngs_data = false + Float? rrs_frequency # default 0.1 Int? rrs_read_support # default 10 Float? rrl_frequency # default 0.1 @@ -27,11 +28,11 @@ task tbp_parser { Float? rpob449_frequency # default 0.1 Float? etha237_frequency # default 0.1 File? expert_rule_regions_bed - - String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.6.0" - Int disk_size = 100 - Int memory = 4 + Int cpu = 1 + Int disk_size = 100 + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:2.2.1" + Int memory = 4 } command <<< # get version @@ -42,10 +43,10 @@ task tbp_parser { ~{"--sequencing_method " + sequencing_method} \ ~{"--operator " + operator} \ ~{"--min_depth " + min_depth} \ - ~{"--min_percent_coverage " + coverage_threshold} \ - ~{"--coverage_regions " + coverage_regions_bed} \ ~{"--min_frequency " + min_frequency} \ ~{"--min_read_support " + min_read_support} \ + ~{"--min_percent_coverage " + coverage_threshold} \ + ~{"--coverage_regions " + coverage_regions_bed} \ ~{"--tngs_expert_regions " + expert_rule_regions_bed} \ ~{"--rrs_frequency " + rrs_frequency} \ ~{"--rrs_read_support " + rrs_read_support} \ @@ -63,7 +64,7 @@ task tbp_parser { echo 0.0 > AVG_DEPTH # get genome percent coverage for the entire reference genome length over min_depth - genome=$(samtools depth -J ~{tbprofiler_bam} | awk -F "\t" '{if ($3 >= ~{min_depth}) print;}' | wc -l ) + genome=$(samtools depth -J ~{tbprofiler_bam} | awk -F "\t" -v min_depth=~{min_depth} '{if ($3 >= min_depth) print;}' | wc -l ) python3 -c "print ( ($genome / 4411532 ) * 100 )" | tee GENOME_PC # get genome average depth diff --git a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl index 21c4f4b15..bc81ce419 100644 --- a/tasks/species_typing/mycobacterium/task_tbprofiler.wdl +++ b/tasks/species_typing/mycobacterium/task_tbprofiler.wdl @@ -5,84 +5,74 @@ task tbprofiler { File read1 File? read2 String samplename - - # logic Boolean ont_data = false - Boolean tbprofiler_run_custom_db = false - File? tbprofiler_custom_db - # minimum thresholds - Int cov_frac_threshold = 1 - Float min_af = 0.1 - Float min_af_pred = 0.1 - Int min_depth = 10 - # tool options within tbprofiler + String mapper = "bwa" - String variant_caller = "freebayes" + String variant_caller = "gatk" String? variant_calling_params - # runtime + + String? additional_parameters # for tbprofiler + + Int min_depth = 10 + Float min_af = 0.1 + + File? tbprofiler_custom_db + Boolean tbprofiler_run_cdph_db = false + Boolean tbprofiler_run_custom_db = false + Int cpu = 8 Int disk_size = 100 - String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2" + String docker = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:6.4.1" Int memory = 16 } command <<< - # Print and save date - date | tee DATE - # Print and save version tb-profiler version > VERSION && sed -i -e 's/TBProfiler version //' VERSION && sed -n -i '$p' VERSION # check if file is non existant or non empty - if [ -z "~{read2}" ] || [ ! -s "~{read2}" ] ; then + if [ -z "~{read2}" ] || [ ! -s "~{read2}" ]; then INPUT_READS="-1 ~{read1}" else INPUT_READS="-1 ~{read1} -2 ~{read2}" fi - - if [ "~{ont_data}" = true ]; then - mode="--platform nanopore" - export ont_data="true" - else - export ont_data="false" - fi # check if new database file is provided and not empty - if [ "~{tbprofiler_run_custom_db}" = true ] ; then - echo "Found new database file ~{tbprofiler_custom_db}" - prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//') - echo "New database will be created with prefix $prefix" - - echo "Inflating the new database..." - tar xfv ~{tbprofiler_custom_db} + if ~{tbprofiler_run_custom_db}; then + if [ ! -s ~{tbprofiler_custom_db} ]; then + echo "Custom database file is empty" + TBDB="" + else + echo "Found new database file ~{tbprofiler_custom_db}" + prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//') + tar xfv ~{tbprofiler_custom_db} + + tb-profiler load_library ./"$prefix"/"$prefix" - tb-profiler load_library ./"$prefix"/"$prefix" - - TBDB="--db $prefix" - else - TBDB="" + TBDB="--db $prefix" + fi + elif ~{tbprofiler_run_cdph_db}; then + tb-profiler update_tbdb --branch CaliforniaDPH + TBDB="--db CaliforniaDPH" fi # Run tb-profiler on the input reads with samplename prefix tb-profiler profile \ - ${mode} \ ${INPUT_READS} \ --prefix ~{samplename} \ --mapper ~{mapper} \ --caller ~{variant_caller} \ --calling_params "~{variant_calling_params}" \ - --min_depth ~{min_depth} \ + --depth ~{min_depth} \ --af ~{min_af} \ - --reporting_af ~{min_af_pred} \ - --coverage_fraction_threshold ~{cov_frac_threshold} \ + --threads ~{cpu} \ --csv --txt \ - $TBDB + ~{true="--platform nanopore" false="" ont_data} \ + ~{additional_parameters} \ + ${TBDB} # Collate results tb-profiler collate --prefix ~{samplename} - # touch optional output files because wdl - touch GENE_NAME LOCUS_TAG VARIANT_SUBSTITUTIONS OUTPUT_SEQ_METHOD_TYPE - # merge all vcf files if multiple are present bcftools index ./vcf/*bcf bcftools index ./vcf/*gz @@ -97,35 +87,32 @@ task tbprofiler { tsv_reader=csv.reader(tsv_file, delimiter="\t") tsv_data=list(tsv_reader) tsv_dict=dict(zip(tsv_data[0], tsv_data[1])) - with open ("MAIN_LINEAGE", 'wt') as Main_Lineage: - main_lin=tsv_dict['main_lineage'] - Main_Lineage.write(main_lin) - with open ("SUB_LINEAGE", 'wt') as Sub_Lineage: - sub_lin=tsv_dict['sub_lineage'] - Sub_Lineage.write(sub_lin) - with open ("DR_TYPE", 'wt') as DR_Type: - dr_type=tsv_dict['DR_type'] - DR_Type.write(dr_type) - with open ("NUM_DR_VARIANTS", 'wt') as Num_DR_Variants: - num_dr_vars=tsv_dict['num_dr_variants'] - Num_DR_Variants.write(num_dr_vars) - with open ("NUM_OTHER_VARIANTS", 'wt') as Num_Other_Variants: - num_other_vars=tsv_dict['num_other_variants'] - Num_Other_Variants.write(num_other_vars) - with open ("RESISTANCE_GENES", 'wt') as Resistance_Genes: - res_genes_list=['rifampicin', 'isoniazid', 'pyrazinamide', 'ethambutol', 'streptomycin', 'fluoroquinolones', 'moxifloxacin', 'ofloxacin', 'levofloxacin', 'ciprofloxacin', 'aminoglycosides', 'amikacin', 'kanamycin', 'capreomycin', 'ethionamide', 'para-aminosalicylic_acid', 'cycloserine', 'linezolid', 'bedaquiline', 'clofazimine', 'delamanid'] + + with open ("MAIN_LINEAGE", 'wt') as main_lineage: + main_lineage.write(tsv_dict['main_lineage']) + with open ("SUB_LINEAGE", 'wt') as sublineage: + sublineage.write(tsv_dict['sub_lineage']) + + with open ("DR_TYPE", 'wt') as dr_type: + dr_type.write(tsv_dict['drtype']) + with open ("NUM_DR_VARIANTS", 'wt') as num_dr_variants: + num_dr_variants.write(tsv_dict['num_dr_variants']) + with open ("NUM_OTHER_VARIANTS", 'wt') as num_other_variants: + num_other_variants.write(tsv_dict['num_other_variants']) + + with open ("RESISTANCE_GENES", 'wt') as resistance_genes: + res_genes_list=['rifampicin', 'isoniazid', 'ethambutol', 'pyrazinamide', 'moxifloxacin', 'levofloxacin', 'bedaquiline', 'delamanid', 'pretomanid', 'linezolid', 'streptomycin', 'amikacin', 'kanamycin', 'capreomycin', 'clofazimine', 'ethionamide', 'para-aminosalicylic_acid', 'cycloserine'] res_genes=[] for i in res_genes_list: if tsv_dict[i] != '-': res_genes.append(tsv_dict[i]) res_genes_string=';'.join(res_genes) - Resistance_Genes.write(res_genes_string) - with open ("MEDIAN_COVERAGE", 'wt') as Median_Coverage: - median_coverage=tsv_dict['median_coverage'] - Median_Coverage.write(median_coverage) - with open ("PCT_READS_MAPPED", 'wt') as Pct_Reads_Mapped: - pct_reads_mapped=tsv_dict['pct_reads_mapped'] - Pct_Reads_Mapped.write(pct_reads_mapped) + resistance_genes.write(res_genes_string) + + with open ("MEDIAN_DEPTH", 'wt') as median_depth: + median_depth.write(tsv_dict['target_median_depth']) + with open ("PCT_READS_MAPPED", 'wt') as pct_reads_mapped: + pct_reads_mapped.write(tsv_dict['pct_reads_mapped']) CODE >>> output { @@ -134,7 +121,7 @@ task tbprofiler { File tbprofiler_output_json = "./results/~{samplename}.results.json" File tbprofiler_output_bam = "./bam/~{samplename}.bam" File tbprofiler_output_bai = "./bam/~{samplename}.bam.bai" - File tbprofiler_output_vcf = "./vcf/~{samplename}.targets.csq.merged.vcf" + File? tbprofiler_output_vcf = "./vcf/~{samplename}.targets.csq.merged.vcf" String version = read_string("VERSION") String tbprofiler_main_lineage = read_string("MAIN_LINEAGE") String tbprofiler_sub_lineage = read_string("SUB_LINEAGE") @@ -142,7 +129,7 @@ task tbprofiler { String tbprofiler_num_dr_variants = read_string("NUM_DR_VARIANTS") String tbprofiler_num_other_variants = read_string("NUM_OTHER_VARIANTS") String tbprofiler_resistance_genes = read_string("RESISTANCE_GENES") - Int tbprofiler_median_coverage = read_int("MEDIAN_COVERAGE") + Float tbprofiler_median_depth = read_float("MEDIAN_DEPTH") Float tbprofiler_pct_reads_mapped = read_float("PCT_READS_MAPPED") } runtime { diff --git a/tasks/utilities/file_handling/task_cat_lanes.wdl b/tasks/utilities/file_handling/task_cat_lanes.wdl new file mode 100644 index 000000000..60a2b7d28 --- /dev/null +++ b/tasks/utilities/file_handling/task_cat_lanes.wdl @@ -0,0 +1,54 @@ +version 1.0 + +task cat_lanes { + input { + String samplename + + File read1_lane1 + File read1_lane2 + File? read1_lane3 + File? read1_lane4 + + File? read2_lane1 + File? read2_lane2 + File? read2_lane3 + File? read2_lane4 + + Int cpu = 2 + Int disk_size = 50 + String docker = "us-docker.pkg.dev/general-theiagen/theiagen/utility:1.2" + Int memory = 4 + } + meta { + volatile: true + } + command <<< + # exit task if anything throws an error (important for proper gzip format) + set -euo pipefail + + exists() { [[ -f $1 ]]; } + + set -euo pipefail + + cat ~{read1_lane1} ~{read1_lane2} ~{read1_lane3} ~{read1_lane4} > "~{samplename}_merged_R1.fastq.gz" + + if exists ~{read2_lane1} ; then + cat ~{read2_lane1} ~{read2_lane2} ~{read2_lane3} ~{read2_lane4} > "~{samplename}_merged_R2.fastq.gz" + fi + + # ensure newly merged FASTQs are valid gzipped format + gzip -t *merged*.gz + >>> + output { + File read1_concatenated = "~{samplename}_merged_R1.fastq.gz" + File? read2_concatenated = "~{samplename}_merged_R2.fastq.gz" + } + runtime { + docker: "~{docker}" + memory: memory + " GB" + cpu: cpu + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" + preemptible: 1 + } +} \ No newline at end of file diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index 38f979119..58f0b6fb4 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -70,81 +70,6 @@ md5sum: 3cfdda0096f0689c9829ed27bdef6b1a - path: miniwdl_run/call-busco/work/_miniwdl_inputs/0/test_contigs.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-busco/work/busco_downloads/file_versions.tsv - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/101957at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102178at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102360at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98221at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98657at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99236at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99734at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99842at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1009041at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1024388at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1036075at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1043239at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/961486at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/981870at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/984717at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/list_of_reference_markers.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxid-lineage.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxids-busco_dataset_name.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/supermatrix.aln.bacteria_odb10.2019-12-16.faa - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree.bacteria_odb10.2019-12-16.nwk - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree_metadata.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/fragmented_busco_sequences/108145at2157.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/fragmented_busco_sequences/108145at2157.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/single_copy_busco_sequences/84219at2157.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/busco_sequences/single_copy_busco_sequences/84219at2157.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/101957at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102178at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102360at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99734at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99842at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1827334at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1211060at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1009041at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1024388at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1036075at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/961486at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/981870at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/984717at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/placement_files/marker_genes.fasta - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/logs/busco.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_err.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_out.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_err.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_out.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_err.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_out.log - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.faa - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.fna - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa @@ -464,7 +389,7 @@ - path: miniwdl_run/call-read_QC_trim/call-fastq_scan_raw/work/_miniwdl_inputs/0/SRR2838702_R1.fastq.gz - path: miniwdl_run/call-read_QC_trim/call-fastq_scan_raw/work/_miniwdl_inputs/0/SRR2838702_R2.fastq.gz - path: miniwdl_run/call-read_QC_trim/call-trimmomatic_pe/command - md5sum: cc137a029d5143592b40edf01d53735f + md5sum: cc961dbda52c70200555ffb34e5ba62d - path: miniwdl_run/call-read_QC_trim/call-trimmomatic_pe/inputs.json contains: ["read", "fastq", "test", "trimmomatic_min_length"] - path: miniwdl_run/call-read_QC_trim/call-trimmomatic_pe/outputs.json @@ -615,7 +540,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_sonneityping.wdl md5sum: 3357a36f11992a0ca00c61d7bfccb44b - path: miniwdl_run/wdl/tasks/species_typing/mycobacterium/task_tbprofiler.wdl - md5sum: 3b37e6bf7f4773e12afe1fa15920acd9 + md5sum: a4d6d24a04a453227b4fa320ff79e45f - path: miniwdl_run/wdl/tasks/species_typing/multi/task_ts_mlst.wdl md5sum: ff8070a06eca94264ad6a7d91cb03bf0 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -629,9 +554,9 @@ - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl md5sum: 8c97c5bd65e2787239f12ef425d479ae - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl - md5sum: d8db687487a45536d4837a540ed2a135 + md5sum: ac49217c129add7c000eedf38acee8f3 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ea5cff6eff8c2c42046cf2eae6f16b6f + md5sum: f3b18a0b4c2bdeb0896176e8f9c8247d - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index e12ec8ec0..06caae04d 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -70,76 +70,6 @@ md5sum: 3cfdda0096f0689c9829ed27bdef6b1a - path: miniwdl_run/call-busco/work/_miniwdl_inputs/0/test_contigs.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-busco/work/busco_downloads/file_versions.tsv - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/101957at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102178at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/102360at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98221at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/98657at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99236at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99734at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/hmms/99842at2157.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/archaea_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/ancestral_variants - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/dataset.cfg - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1009041at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1024388at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1036075at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/1043239at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/961486at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/981870at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/hmms/984717at2.hmm - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/ogs.id.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/info/species.info - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/lengths_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/links_to_ODB10.txt - - path: miniwdl_run/call-busco/work/busco_downloads/lineages/bacteria_odb10/scores_cutoff - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/list_of_reference_markers.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxid-lineage.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/mapping_taxids-busco_dataset_name.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/supermatrix.aln.bacteria_odb10.2019-12-16.faa - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree.bacteria_odb10.2019-12-16.nwk - - path: miniwdl_run/call-busco/work/busco_downloads/placement_files/tree_metadata.bacteria_odb10.2019-12-16.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/101957at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102178at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/102360at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99734at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/hmmer_output/99842at2157.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_archaea_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences/1540940at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1132353at2.fna - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences/1505038at2.faa - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/full_table.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1009041at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1024388at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/1036075at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/961486at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/981870at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/hmmer_output/984717at2.out - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/missing_busco_list.tsv - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/placement_files/marker_genes.fasta - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.json - - path: miniwdl_run/call-busco/work/test/auto_lineage/run_bacteria_odb10/short_summary.txt - - path: miniwdl_run/call-busco/work/test/logs/busco.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_err.log - - path: miniwdl_run/call-busco/work/test/logs/hmmsearch_out.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_err.log - - path: miniwdl_run/call-busco/work/test/logs/prodigal_out.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_err.log - - path: miniwdl_run/call-busco/work/test/logs/sepp_out.log - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.faa - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/predicted.fna - path: miniwdl_run/call-busco/work/test/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa @@ -578,7 +508,7 @@ - path: miniwdl_run/wdl/tasks/species_typing/escherichia_shigella/task_sonneityping.wdl md5sum: 3357a36f11992a0ca00c61d7bfccb44b - path: miniwdl_run/wdl/tasks/species_typing/mycobacterium/task_tbprofiler.wdl - md5sum: 3b37e6bf7f4773e12afe1fa15920acd9 + md5sum: a4d6d24a04a453227b4fa320ff79e45f - path: miniwdl_run/wdl/tasks/species_typing/multi/task_ts_mlst.wdl md5sum: ff8070a06eca94264ad6a7d91cb03bf0 - path: miniwdl_run/wdl/tasks/task_versioning.wdl @@ -592,9 +522,9 @@ - path: miniwdl_run/wdl/tasks/utilities/data_export/task_broad_terra_tools.wdl md5sum: 8c97c5bd65e2787239f12ef425d479ae - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl - md5sum: 4111a758490174325ae8ea52a95319e9 + md5sum: 5e735ae6cb60f86ec7983274f3baf9f8 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ea5cff6eff8c2c42046cf2eae6f16b6f + md5sum: f3b18a0b4c2bdeb0896176e8f9c8247d - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: 09d9f68b9ca8bf94b6145ff9bed2edd1 - path: miniwdl_run/workflow.log diff --git a/workflows/standalone_modules/wf_tbprofiler_tngs.wdl b/workflows/standalone_modules/wf_tbprofiler_tngs.wdl index 85f75a665..2d84d91ed 100644 --- a/workflows/standalone_modules/wf_tbprofiler_tngs.wdl +++ b/workflows/standalone_modules/wf_tbprofiler_tngs.wdl @@ -71,7 +71,7 @@ workflow tbprofiler_tngs { String tbprofiler_num_dr_variants = tbprofiler.tbprofiler_num_dr_variants String tbprofiler_num_other_variants = tbprofiler.tbprofiler_num_other_variants String tbprofiler_resistance_genes = tbprofiler.tbprofiler_resistance_genes - Int tbprofiler_median_coverage = tbprofiler.tbprofiler_median_coverage + Float tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth Float tbprofiler_pct_reads_mapped = tbprofiler.tbprofiler_pct_reads_mapped # tbp_parser outputs File tbp_parser_looker_report_csv = tbp_parser.tbp_parser_looker_report_csv diff --git a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl index 32271224e..88f0ef066 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl @@ -18,6 +18,7 @@ import "../../tasks/task_versioning.wdl" as versioning import "../../tasks/taxon_id/contamination/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/utilities/data_export/task_broad_terra_tools.wdl" as terra_tools +import "../utilities/file_handling/wf_concatenate_illumina_lanes.wdl" as concatenate_lanes_workflow import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow import "../utilities/wf_read_QC_trim_pe.wdl" as read_qc @@ -30,6 +31,15 @@ workflow theiaprok_illumina_pe { String seq_method = "ILLUMINA" File read1 File read2 + + # optional additional lanes + File? read1_lane2 + File? read1_lane3 + File? read1_lane4 + File? read2_lane2 + File? read2_lane3 + File? read2_lane4 + Int? genome_length # export taxon table parameters String? run_id @@ -68,10 +78,24 @@ workflow theiaprok_illumina_pe { call versioning.version_capture { input: } + if (defined(read1_lane2)) { + call concatenate_lanes_workflow.concatenate_illumina_lanes { + input: + samplename = samplename, + read1_lane1 = read1, + read1_lane2 = select_first([read1_lane2]), + read1_lane3 = read1_lane3, + read1_lane4 = read1_lane4, + read2_lane1 = read2, + read2_lane2 = read2_lane2, + read2_lane3 = read2_lane3, + read2_lane4 = read2_lane4 + } + } call screen.check_reads as raw_check_reads { input: - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), min_reads = min_reads, min_basepairs = min_basepairs, min_genome_length = min_genome_length, @@ -85,8 +109,8 @@ workflow theiaprok_illumina_pe { call read_qc.read_QC_trim_pe as read_QC_trim { input: samplename = samplename, - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), trim_min_length = trim_min_length, trim_quality_min_score = trim_quality_min_score, trim_window_size = trim_window_size, @@ -121,8 +145,8 @@ workflow theiaprok_illumina_pe { } call cg_pipeline.cg_pipeline as cg_pipeline_raw { input: - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), samplename = samplename, genome_length = select_first([genome_length, quast.genome_length]) } @@ -257,8 +281,8 @@ workflow theiaprok_illumina_pe { sample_taxon = gambit.gambit_predicted_taxon, taxon_tables = taxon_tables, samplename = samplename, - read1 = read1, - read2 = read2, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), + read2 = select_first([concatenate_illumina_lanes.read2_concatenated, read2]), read1_clean = read_QC_trim.read1_clean, read2_clean = read_QC_trim.read2_clean, run_id = run_id, @@ -608,6 +632,9 @@ workflow theiaprok_illumina_pe { String theiaprok_illumina_pe_analysis_date = version_capture.date # Read Metadata String seq_platform = seq_method + # Concatenated Illumina Reads + File? read1_concatenated = concatenate_illumina_lanes.read1_concatenated + File? read2_concatenated = concatenate_illumina_lanes.read2_concatenated # Sample Screening String read_screen_raw = raw_check_reads.read_screen String? read_screen_clean = clean_check_reads.read_screen @@ -945,7 +972,7 @@ workflow theiaprok_illumina_pe { String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage String? tbprofiler_dr_type = merlin_magic.tbprofiler_dr_type String? tbprofiler_resistance_genes = merlin_magic.tbprofiler_resistance_genes - Int? tbprofiler_median_coverage = merlin_magic.tbprofiler_median_coverage + Float? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = merlin_magic.tbprofiler_pct_reads_mapped String? tbp_parser_version = merlin_magic.tbp_parser_version String? tbp_parser_docker = merlin_magic.tbp_parser_docker diff --git a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl index e743ecbce..9005766a4 100644 --- a/workflows/theiaprok/wf_theiaprok_illumina_se.wdl +++ b/workflows/theiaprok/wf_theiaprok_illumina_se.wdl @@ -18,6 +18,7 @@ import "../../tasks/task_versioning.wdl" as versioning import "../../tasks/taxon_id/contamination/task_kmerfinder.wdl" as kmerfinder_task import "../../tasks/taxon_id/task_gambit.wdl" as gambit_task import "../../tasks/utilities/data_export/task_broad_terra_tools.wdl" as terra_tools +import "../utilities/file_handling/wf_concatenate_illumina_lanes.wdl" as concatenate_lanes_workflow import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow import "../utilities/wf_read_QC_trim_se.wdl" as read_qc @@ -29,6 +30,12 @@ workflow theiaprok_illumina_se { String samplename String seq_method = "ILLUMINA" File read1 + + # optional additional lanes + File? read1_lane2 + File? read1_lane3 + File? read1_lane4 + Int? genome_length # export taxon table parameters String? run_id @@ -68,9 +75,19 @@ workflow theiaprok_illumina_se { call versioning.version_capture { input: } + if (defined(read1_lane2)) { + call concatenate_lanes_workflow.concatenate_illumina_lanes { + input: + samplename = samplename, + read1_lane1 = read1, + read1_lane2 = select_first([read1_lane2]), + read1_lane3 = read1_lane3, + read1_lane4 = read1_lane4 + } + } call screen.check_reads_se as raw_check_reads { input: - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), min_reads = min_reads, min_basepairs = min_basepairs, min_genome_length = min_genome_length, @@ -84,7 +101,7 @@ workflow theiaprok_illumina_se { call read_qc.read_QC_trim_se as read_QC_trim { input: samplename = samplename, - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), trim_min_length = trim_min_length, trim_quality_min_score = trim_quality_min_score, trim_window_size = trim_window_size, @@ -116,7 +133,7 @@ workflow theiaprok_illumina_se { } call cg_pipeline.cg_pipeline as cg_pipeline_raw { input: - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), samplename = samplename, genome_length = select_first([genome_length, quast.genome_length]) } @@ -240,7 +257,7 @@ workflow theiaprok_illumina_se { sample_taxon = gambit.gambit_predicted_taxon, taxon_tables = taxon_tables, samplename = samplename, - read1 = read1, + read1 = select_first([concatenate_illumina_lanes.read1_concatenated, read1]), read1_clean = read_QC_trim.read1_clean, run_id = run_id, collection_date = collection_date, @@ -566,6 +583,8 @@ workflow theiaprok_illumina_se { String theiaprok_illumina_se_analysis_date = version_capture.date # Read Metadata String seq_platform = seq_method + # Concatenated Illumina Reads + File? read1_concatenated = concatenate_illumina_lanes.read1_concatenated # Sample Screening String read_screen_raw = raw_check_reads.read_screen String? read_screen_clean = clean_check_reads.read_screen diff --git a/workflows/theiaprok/wf_theiaprok_ont.wdl b/workflows/theiaprok/wf_theiaprok_ont.wdl index a7eb9143e..867e67e0b 100644 --- a/workflows/theiaprok/wf_theiaprok_ont.wdl +++ b/workflows/theiaprok/wf_theiaprok_ont.wdl @@ -848,7 +848,7 @@ workflow theiaprok_ont { String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage String? tbprofiler_dr_type = merlin_magic.tbprofiler_dr_type String? tbprofiler_resistance_genes = merlin_magic.tbprofiler_resistance_genes - Int? tbprofiler_median_coverage = merlin_magic.tbprofiler_median_coverage + Float? tbprofiler_median_depth = merlin_magic.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = merlin_magic.tbprofiler_pct_reads_mapped String? tbp_parser_version = merlin_magic.tbp_parser_version String? tbp_parser_docker = merlin_magic.tbp_parser_docker diff --git a/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl b/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl new file mode 100644 index 000000000..f2a5a9ad9 --- /dev/null +++ b/workflows/utilities/file_handling/wf_concatenate_illumina_lanes.wdl @@ -0,0 +1,42 @@ +version 1.0 + +import "../../../tasks/utilities/file_handling/task_cat_lanes.wdl" as concatenate_lanes +import "../../../tasks/task_versioning.wdl" as versioning + +workflow concatenate_illumina_lanes { + input { + String samplename + + File read1_lane1 + File read1_lane2 + File? read1_lane3 + File? read1_lane4 + + File? read2_lane1 + File? read2_lane2 + File? read2_lane3 + File? read2_lane4 + } + call concatenate_lanes.cat_lanes { + input: + samplename = samplename, + read1_lane1 = read1_lane1, + read2_lane1 = read2_lane1, + read1_lane2 = read1_lane2, + read2_lane2 = read2_lane2, + read1_lane3 = read1_lane3, + read2_lane3 = read2_lane3, + read1_lane4 = read1_lane4, + read2_lane4 = read2_lane4 + } + call versioning.version_capture { + input: + } + output { + String concatenate_illumina_lanes_version = version_capture.phb_version + String concatenate_illumina_lanes_analysis_date = version_capture.date + + File read1_concatenated = cat_lanes.read1_concatenated + File? read2_concatenated = cat_lanes.read2_concatenated + } +} \ No newline at end of file diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index f10060851..1e19184bd 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -57,7 +57,7 @@ workflow merlin_magic { # activating tool logic Boolean call_poppunk = true Boolean call_shigeifinder_reads_input = false - Boolean tbprofiler_additional_outputs = false # set to true to run tbp-parser + Boolean call_tbp_parser = false # docker options String? abricate_abaum_docker_image String? abricate_vibrio_docker_image @@ -197,14 +197,14 @@ workflow merlin_magic { Int srst2_gene_max_mismatch = 2000 # tbprofiler options Boolean tbprofiler_run_custom_db = false + Boolean tbprofiler_run_cdph_db = false File? tbprofiler_custom_db - Int? tbprofiler_cov_frac_threshold Float? tbprofiler_min_af - Float? tbprofiler_min_af_pred Int? tbprofiler_min_depth String? tbprofiler_mapper String? tbprofiler_variant_caller String? tbprofiler_variant_calling_params + String? tbprofiler_additional_parameters # tbp-parser options String tbp_parser_output_seq_method_type = "WGS" String? tbp_parser_operator @@ -215,6 +215,14 @@ workflow merlin_magic { File? tbp_parser_coverage_regions_bed Boolean? tbp_parser_debug Boolean? tbp_parser_add_cs_lims + Boolean? tbp_parser_tngs_data + Float? tbp_parser_rrs_frequency + Int? tbp_parser_rrs_read_support + Float? tbp_parser_rrl_frequency + Int? tbp_parser_rrl_read_support + Float? tbp_parser_rpob449_frequency + Float? tbp_parser_etha237_frequency + File? tbp_parser_expert_rule_regions_bed # virulencefinder options Float? virulencefinder_coverage_threshold Float? virulencefinder_identity_threshold @@ -448,18 +456,18 @@ workflow merlin_magic { read2 = select_first([clockwork_decon_reads.clockwork_cleaned_read2, read2, "gs://theiagen-public-files/terra/theiaprok-files/no-read2.txt"]), samplename = samplename, ont_data = ont_data, - tbprofiler_run_custom_db = tbprofiler_run_custom_db, - tbprofiler_custom_db = tbprofiler_custom_db, - cov_frac_threshold = tbprofiler_cov_frac_threshold, - min_af = tbprofiler_min_af, - min_af_pred = tbprofiler_min_af_pred, - min_depth = tbprofiler_min_depth, mapper = tbprofiler_mapper, variant_caller = tbprofiler_variant_caller, variant_calling_params = tbprofiler_variant_calling_params, + additional_parameters = tbprofiler_additional_parameters, + min_depth = tbprofiler_min_depth, + min_af = tbprofiler_min_af, + tbprofiler_custom_db = tbprofiler_custom_db, + tbprofiler_run_custom_db = tbprofiler_run_custom_db, + tbprofiler_run_cdph_db = tbprofiler_run_cdph_db, docker = tbprofiler_docker_image } - if (tbprofiler_additional_outputs) { + if (call_tbp_parser) { call tbp_parser_task.tbp_parser { input: tbprofiler_json = tbprofiler.tbprofiler_output_json, @@ -468,13 +476,21 @@ workflow merlin_magic { samplename = samplename, sequencing_method = tbp_parser_output_seq_method_type, operator = tbp_parser_operator, - coverage_threshold = tbp_parser_coverage_threshold, - coverage_regions_bed = tbp_parser_coverage_regions_bed, min_depth = tbp_parser_min_depth, min_frequency = tbp_parser_min_frequency, min_read_support = tbp_parser_min_read_support, - tbp_parser_debug = tbp_parser_debug, + coverage_threshold = tbp_parser_coverage_threshold, + coverage_regions_bed = tbp_parser_coverage_regions_bed, add_cycloserine_lims = tbp_parser_add_cs_lims, + tbp_parser_debug = tbp_parser_debug, + tngs_data = tbp_parser_tngs_data, + rrs_frequency = tbp_parser_rrs_frequency, + rrs_read_support = tbp_parser_rrs_read_support, + rrl_frequency = tbp_parser_rrl_frequency, + rrl_read_support = tbp_parser_rrl_read_support, + rpob449_frequency = tbp_parser_rpob449_frequency, + etha237_frequency = tbp_parser_etha237_frequency, + expert_rule_regions_bed = tbp_parser_expert_rule_regions_bed, docker = tbp_parser_docker_image } } @@ -896,7 +912,7 @@ workflow merlin_magic { String? tbprofiler_sub_lineage = tbprofiler.tbprofiler_sub_lineage String? tbprofiler_dr_type = tbprofiler.tbprofiler_dr_type String? tbprofiler_resistance_genes = tbprofiler.tbprofiler_resistance_genes - Int? tbprofiler_median_coverage = tbprofiler.tbprofiler_median_coverage + Float? tbprofiler_median_depth = tbprofiler.tbprofiler_median_depth Float? tbprofiler_pct_reads_mapped = tbprofiler.tbprofiler_pct_reads_mapped String? tbp_parser_version = tbp_parser.tbp_parser_version String? tbp_parser_docker = tbp_parser.tbp_parser_docker