-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #118 from mthang/panaroo
add panaroo tool
- Loading branch information
Showing
18 changed files
with
90,924 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: panaroo | ||
owner: galaxy-australia | ||
categories: | ||
- Pangenome | ||
description: A Bacterial Pangenome Analysis Pipeline | ||
homepage_url: https://gthlab.au/panaroo/#/ | ||
long_description: | | ||
a graph-based pangenome clustering tool that is able to account for many of the sources of error introduced during the annotation of prokaryotic genome assemblies. | ||
remote_repository_url: https://github.com/usegalaxy-au/tools-au/tree/master/tools/panaroo | ||
type: unrestricted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.5.0</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
<token name="@PROFILE@">22.05</token> | ||
<xml name="edam_ontology"> | ||
<edam_topics> | ||
<edam_topic>topic_0194</edam_topic> | ||
</edam_topics> | ||
</xml> | ||
<xml name="biotools"> | ||
<xrefs> | ||
<xref type="bio.tools">panaroo</xref> | ||
</xrefs> | ||
</xml> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">panaroo</requirement> | ||
<requirement type="package" version="170427">prank</requirement> | ||
</requirements> | ||
</xml> | ||
<xml name="clean_mode"> | ||
<option value="strict">strict</option> | ||
<option value="moderate">moderate</option> | ||
<option value="sensitive">sensitive</option> | ||
</xml> | ||
<xml name="genetic_code"> | ||
<option value="1">1. Standard</option> | ||
<option value="2">2. Vertebrate Mitochondrial</option> | ||
<option value="3">3. Yeast Mitochondrial</option> | ||
<option value="4">4. Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | ||
<option value="5">5. Invertebrate Mitochondrial</option> | ||
<option value="6">6. Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | ||
<option value="9">9. Echinoderm Mitochondrial</option> | ||
<option value="10">10. Euplotid Nuclear</option> | ||
<option value="11" selected="True">11. Bacteria and Archaea</option> | ||
<option value="12">12. Alternative Yeast Nuclear</option> | ||
<option value="13">13. Ascidian Mitochondrial</option> | ||
<option value="14">14. Flatworm Mitochondrial</option> | ||
<option value="15">15. Blepharisma Macronuclear</option> | ||
<option value="16">16. Chlorophycean Mitochondrial</option> | ||
<option value="21">21. Trematode Mitochondrial</option> | ||
<option value="22">22. Scenedesmus obliquus mitochondrial</option> | ||
<option value="23">23. Thraustochytrium Mitochondrial</option> | ||
<option value="24">24. Pterobranchia mitochondrial</option> | ||
<option value="25">25. Candidate Division SR1 and Gracilibacteria Code</option> | ||
<option value="26">26. Pachysolen tannophilus Nuclear Code</option> | ||
<option value="27">27. Karyorelict Nuclear Code</option> | ||
<option value="28">28. Condylostoma Nuclear Code</option> | ||
<option value="29">29. Mesodinium Nuclear Code</option> | ||
<option value="30">30. Peritrich Nuclear Code</option> | ||
<option value="31">31. Blastocrithidia Nuclear Code</option> | ||
<option value="33">33. Cephalodiscidae Mitochondrial UAA-Tyr Code</option> | ||
</xml> | ||
<xml name="refind_mode_option"> | ||
<option value="default" selected="True">default</option> | ||
<option value="strict">strict</option> | ||
<option value="off">off</option> | ||
</xml> | ||
<xml name="gene_alignment"> | ||
<option value="None" selected="True">None</option> | ||
<option value="core">core</option> | ||
<option value="pan">pan</option> | ||
</xml> | ||
<xml name="gene_aligner"> | ||
<option value="mafft" selected="True">mafft</option> | ||
<option value="prank">prank</option> | ||
<option value="clustal">clustal</option> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
<tool id="panaroo" name="Panaroo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>A Bacterial Pangenome Analysis Pipeline</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="edam_ontology"/> | ||
<expand macro="biotools"/> | ||
<expand macro="requirements"/> | ||
<stdio> | ||
<exit_code range="1:" /> | ||
<regex match="System..*Exception" | ||
source="both" | ||
level="fatal" | ||
description="Error encountered" /> | ||
</stdio> | ||
<command><![CDATA[ | ||
mkdir outdir && | ||
#import re | ||
#set input_directory = 'input_directory' | ||
mkdir $input_directory && | ||
#for $gff in $gff_input_collection: | ||
#set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier)) | ||
ln -fs '$gff' '$input_directory/$identifier' && | ||
#end for | ||
panaroo | ||
-t \${GALAXY_SLOTS:-2} | ||
#if str($gen_code) != 'None': | ||
--codon-table $gen_code | ||
#end if | ||
#if str($advanced.adv_options_selector) == "set": | ||
#if $advanced.remove_invalid_gene | ||
$advanced.remove_invalid_gene | ||
#end if | ||
-c '$advanced.matching_option.seq_threshold' | ||
-f '$advanced.matching_option.peptide_threshold' | ||
--len_dif_percent '$advanced.matching_option.length_diff_cutoff' | ||
$advanced.matching_option.merge_paralogs | ||
--search_radius '$advanced.refind_option.search_radius' | ||
--refind_prop_match '$advanced.refind_option.refind_prop_match' | ||
--refind-mode '$advanced.refind_option.refind_mode' | ||
--min_trailing_support '$advanced.graph_correction_option.min_trailing_support' | ||
--trailing_recursive '$advanced.graph_correction_option.trailing_recursive' | ||
--edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold' | ||
--remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus' | ||
--high_var_flag '$advanced.graph_correction_option.high_var_flag' | ||
--min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv' | ||
$advanced.graph_correction_option.all_seq_in_graph | ||
$advanced.graph_correction_option.no_clean_edges | ||
#if $advanced.gene_alignment_option.a != 'None' | ||
-a '$advanced.gene_alignment_option.a' | ||
#end if | ||
#if '$advanced.gene_alignment_option.aligner' == 'mafft' | ||
--aligner mafft | ||
#else | ||
--aligner '$advanced.gene_alignment_option.aligner' | ||
#end if | ||
#if $advanced.gene_alignment_option.core_subset != '' | ||
--core_subset $advanced.gene_alignment_option.core_subset | ||
#end if | ||
#end if | ||
-i $input_directory/*.gff | ||
-o outdir | ||
--clean-mode $mode | ||
> '$log' && | ||
mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab && | ||
2>&1 | ||
]]></command> | ||
<inputs> | ||
<param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A list of gff files (i.e prokka)"/> | ||
<param name="mode" type="select" label="The stringency mode at which to run panaroo" help="--clean-mode"> | ||
<expand macro="clean_mode"/> | ||
</param> | ||
<param name="gen_code" type="select" label="the codon table user for translation" help="default: 11"> | ||
<expand macro="genetic_code"/> | ||
</param> | ||
<conditional name="advanced"> | ||
<param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls"> | ||
<option value="set">Set</option> | ||
<option value="do_not_set" selected="True">Do not set</option> | ||
</param> | ||
<when value="set"> | ||
<param name="remove_invalid_gene" argument="--remove-invalid-genes" type="boolean" truevalue="--remove-invalid-genes" falsevalue="" label="removes annotations that do not conform to the expected Prokka format such as those including premature stop codons" help="--remove-invalid-genes"/> | ||
|
||
<section name="matching_option" title="Matching" expanded="false"> | ||
<param name="seq_threshold" argument="--threshold" type="float" value="0.98" label="sequence identity threshold" help="default: 0.98"/> | ||
<param name="peptide_threshold" argument="--family_threshold" type="float" value="0.7" label="protein family sequence identity threshold" help="default: 0.7"/> | ||
<param name="length_diff_cutoff" argument="--len_dif_percent" type="float" value="0.98" label="length difference cutoff" help="default: 0.98"/> | ||
<param name="merge_paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" checked="false" label="do not split paralogs" help="--merge_paralogs"/> | ||
</section> | ||
|
||
<section name="refind_option" title="Refind" expanded="false"> | ||
<param argument="--search_radius" type="integer" value="5000" label="Search radius" help="--search_radius (default: 5000)"/> | ||
<param argument="--refind_prop_match" type="float" value="0.75" label="Gene proportion match" help="default: 0.75"/> | ||
<param argument="--refind_mode" type="select" label="The stringency mode at which to re-find genes" help="default: default"> | ||
<expand macro="refind_mode_option"/> | ||
</param> | ||
</section> | ||
|
||
<section name="graph_correction_option" title="Graph Correction" expanded="false"> | ||
<param argument="--min_trailing_support" type="integer" value="2" label="Minimum cluster size to keep a gene called at the end of a contig" help="--min_traiiing_support [relexed mode : 2 is used]"/> | ||
<param argument="--trailing_recursive" type="integer" value="1" label="Number of times to perform recursive trimming of low support nodes near the end of contigs" help="--trailing_recursive [relaxed mode: 1 is used]"/> | ||
<param name="edge_support_threshold" type="integer" value="1" label="Edge support threshold" help="--edge_support_threshold [ Minimal edge 1 is used ]"/> | ||
<param name="len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion [default: 0.01]"/> | ||
<param name="remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove consensus" help="--remove_by_consensus [default: False]"/> | ||
<param name="high_var_flag" type="integer" value="5" label="Highly variable gene region" help="--high_var_flag [default: 5]"/> | ||
<param name="min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="--min_edge_support_sv [relaxed mode: 2 is used]"/> | ||
<param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="--all_seq_in_graph [default: off]"/> | ||
<param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="Edge filtering in the final output graph" help="--no_clean_edges [default: off]"/> | ||
</section> | ||
|
||
<section name="gene_alignment_option" title="Gene Alignment" expanded="false"> | ||
<param argument="-a" type="select" label="Output alignments of core genes or all genes." help="-a [optional: core or pan; default: None"> | ||
<expand macro="gene_alignment"/> | ||
</param> | ||
<param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]"> | ||
<expand macro="gene_aligner"/> | ||
</param> | ||
<param name="codons" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate codon alignments" help="--codons"/> | ||
<param name="core_threshold" type="float" value="0.95" label="Core-genome sample threshold" help="--core_threshold [default: 0.95]"/> | ||
<param argument="--core_subset" type="integer" value="" optional="true" label="Subset of the core genome to these many genes" help="--core_subset [default: all]"/> | ||
<param name="core_entropy" type="float" value="0.1" label="Set the Block Mapping and Gathering with Entropy" help="--core_entropy_filter (threshold can be between 0.0 and 1.0) [default: Tukey outlier method]"/> | ||
</section> | ||
</when> | ||
<when value="do_not_set"/> | ||
</conditional> | ||
</inputs> | ||
<outputs> | ||
<collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome output"> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> | ||
<filter>advanced['gene_alignment_option']['a'] == 'None' </filter> | ||
</collection> | ||
<collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output"> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>aln)" directory="outdir" format="aln" visible="false" /> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>embl)" directory="outdir" format="embl" visible="false" /> | ||
<filter>advanced['gene_alignment_option']['a'] != 'None' </filter> | ||
</collection> | ||
<collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenom alignment fasta"> | ||
<discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fas)" directory="outdir/aligned_gene_sequences" format="fasta" visible="false" /> | ||
<filter>advanced['gene_alignment_option']['a'] != 'None' </filter> | ||
</collection> | ||
|
||
<data name="log" format="txt" label="${tool.name} on ${on_string}: log"/> | ||
</outputs> | ||
<tests> | ||
<!-- run panaroo with default parameters (i.e panaroo -t 2 -i *.gff -o default \-\-clean-mode strict \-\-remove-invalid-genes) --> | ||
<test expect_num_outputs="2"> | ||
<param name="gen_code" value="11"/> | ||
<param name="mode" value="strict"/> | ||
<param name="adv_options_selector" value="set"/> | ||
<param name="a" value="None"/> | ||
<param name="gff_input_collection"> | ||
<collection type="list"> | ||
<element name="gff10.gff" value="10_small.gff"/> | ||
<element name="gff11.gff" value="11_small.gff"/> | ||
</collection> | ||
</param> | ||
<output_collection name="output" count="13"/> | ||
<output name="log"> | ||
<assert_contents> | ||
<has_text text="pre-processing gff3 files..."/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test expect_num_outputs="3"> | ||
<param name="gen_code" value="11"/> | ||
<param name="mode" value="strict"/> | ||
<param name="adv_options_selector" value="set"/> | ||
<param name="a" value="core"/> | ||
<param name="gff_input_collection"> | ||
<collection type="list"> | ||
<element name="gff10.gff" value="10_small.gff"/> | ||
<element name="gff11.gff" value="11_small.gff"/> | ||
</collection> | ||
</param> | ||
<output_collection name="output_pangenome" count="18"/> | ||
<output_collection name="output_pangenome_fasta" count="251"/> | ||
<output name="log"> | ||
<assert_contents> | ||
<has_text text="pre-processing gff3 files..."/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
Panaroo_ is A Bacterial Pangenome Analysis Pipeline. | ||
**INPUTS** | ||
Panaroo now supports multiple input formats. To use non-standard GFF3 files you must profile the input file as a list in a text file (one per line). Separate GFF and FASTA files can be provided per isolate by providing each file delimited by a space or a tab. Genbank file formats are also supported with extensions '.gbk', '.gb' or '.gbff'. These must compliant with Genbank/ENA/DDJB. This can be forced in Prokka by specifying the --compliance parameter. | ||
- data file in gff format | ||
**OUTPUTS** | ||
- combined_protein_cdhit_out.txt | ||
- combined_protein_cdhit_out.txt.clstr | ||
- pre_filt_graph.gml | ||
- gene_data.csv | ||
- combined_protein_CDS.fasta | ||
- combined_DNA_CDS.fasta | ||
- gene_presence_absence.Rtab | ||
- gene_presence_absence_roary.csv | ||
- gene_presence_absence.csv | ||
- summary_statistics.txt | ||
- pan_genome_reference.fa | ||
- struct_presence_absence.Rtab | ||
- final_graph.gml | ||
.. _Panaroo: https://gthlab.au/panaroo/#/gettingstarted/quickstart | ||
]]></help> | ||
<citations> | ||
<citation type="doi">10.1186/s13059-020-02090-4</citation> | ||
</citations> | ||
</tool> | ||
|
Oops, something went wrong.