Skip to content

Commit

Permalink
Add kleborate task and workflow (#16)
Browse files Browse the repository at this point in the history
* Initial commit. Individual Kleborate task and workflow. --all flag invoked. No parsing of output file included.

* Second Commit.

* add kleborate workflow to dockstore

* remove default samplename

* Added python code to parse outputs

* Added outputs for parsed files and strings

* added .to_string()

* fixed python block indent

* removed accidental indent

* Reverted back to initial commit

* reverted to intial commit

* Added python logic to parse tsv (without pandas)

* Added tsv parsing python block (without pandas)

* added output prefixes and removed date output

* removed date output

* Added kleborate task

* capture kleborate version and analysis date

* Clean up validation files dir

* remove miniwdl file

* remove fja notes file

* Utilize wf tag for version and analysis date only

* and whitespace to end of file

* remove redundant kleborate task file

Co-authored-by: Frank_Ambrosio <[email protected]>
Co-authored-by: frankambrosio3 <[email protected]>
  • Loading branch information
3 people authored Jul 27, 2021
1 parent 8632cf7 commit 24e06dc
Show file tree
Hide file tree
Showing 8 changed files with 71,180 additions and 25 deletions.
5 changes: 5 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ workflows:
primaryDescriptorPath: /workflows/wf_apollo_illumina_pe.wdl
testParameterFiles:
- empty.json
- name: Kleborate
subclass: WDL
primaryDescriptorPath: /workflows/wf_kleborate.wdl
testParameterFiles:
- empty.json
- name: SerotypeFinder
subclass: WDL
primaryDescriptorPath: /workflows/wf_serotypefinder.wdl
Expand Down
18 changes: 0 additions & 18 deletions notes.wdl

This file was deleted.

92 changes: 91 additions & 1 deletion tasks/task_taxon_id.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ task gambit {
date | tee DATE

midas query ~{assembly} | tail -n2 > ~{samplename}_gambit.csv

python3 <<CODE
import csv
#grab output genome length and number contigs by column header
Expand Down Expand Up @@ -58,6 +58,96 @@ task gambit {
preemptible: 0
}
}
task kleborate_one_sample {
# Inputs
input {
File assembly
String samplename
String kleborate_docker_image = "staphb/kleborate:2.0.4"
}
command <<<
# capture date and version
# Print and save date
date | tee DATE
# Print and save version
kleborate --version > VERSION && sed -i -e 's/^/Kleborate /' VERSION
# Run Kleborate on the input assembly with the --all flag and output with samplename prefix
kleborate -a ~{assembly} --all -o ~{samplename}_kleborate_output_file.tsv
python3 <<CODE
import csv
with open("./~{samplename}_kleborate_output_file.tsv",'r') as tsv_file:
tsv_reader=csv.reader(tsv_file, delimiter="\t")
tsv_data=list(tsv_reader)
tsv_dict=dict(zip(tsv_data[0], tsv_data[1]))
with open ("SPECIES", 'wt') as Species:
kleb_species=tsv_dict['species']
Species.write(kleb_species)
with open ("MLST_SEQUENCE_TYPE", 'wt') as MLST_Sequence_Type:
mlst=tsv_dict['ST']
MLST_Sequence_Type.write(mlst)
with open ("VIRULENCE_SCORE", 'wt') as Virulence_Score:
virulence_level=tsv_dict['virulence_score']
Virulence_Score.write(virulence_level)
with open ("RESISTANCE_SCORE", 'wt') as Resistance_Score:
resistance_level=tsv_dict['resistance_score']
Resistance_Score.write(resistance_level)
with open ("NUM_RESISTANCE_GENES", 'wt') as Num_Resistance_Genes:
resistance_genes_count=tsv_dict['num_resistance_genes']
Num_Resistance_Genes.write(resistance_genes_count)
with open ("BLA_RESISTANCE_GENES", 'wt') as BLA_Resistance_Genes:
bla_res_genes_list=['Bla_acquired', 'Bla_inhR_acquired', 'Bla_ESBL_acquired', 'Bla_ESBL_inhR_acquired', 'Bla_Carb_acquired']
bla_res_genes=[]
for i in bla_res_genes_list:
if tsv_dict[i] != '-':
bla_res_genes.append(tsv_dict[i])
bla_res_genes_string=';'.join(bla_res_genes)
BLA_Resistance_Genes.write(bla_res_genes_string)
with open ("ESBL_RESISTANCE_GENES", 'wt') as ESBL_Resistance_Genes:
esbl_res_genes_list=['Bla_ESBL_acquired', 'Bla_ESBL_inhR_acquired']
esbl_res_genes=[]
for i in esbl_res_genes_list:
if tsv_dict[i] != '-':
bla_res_genes.append(tsv_dict[i])
esbl_res_genes_string=';'.join(esbl_res_genes)
ESBL_Resistance_Genes.write(esbl_res_genes_string)
with open ("KEY_RESISTANCE_GENES", 'wt') as Key_Resistance_Genes:
key_res_genes_list= ['Col_acquired', 'Fcyn_acquired', 'Flq_acquired', 'Rif_acquired', 'Bla_acquired', 'Bla_inhR_acquired', 'Bla_ESBL_acquired', 'Bla_ESBL_inhR_acquired', 'Bla_Carb_acquired']
key_res_genes=[]
for i in key_res_genes_list:
if tsv_dict[i] != '-':
key_res_genes.append(tsv_dict[i])
key_res_genes_string=';'.join(key_res_genes)
Key_Resistance_Genes.write(key_res_genes_string)
with open ("GENOMIC_RESISTANCE_MUTATIONS", 'wt') as Resistance_Mutations:
res_mutations_list= ['Bla_chr', 'SHV_mutations', 'Omp_mutations', 'Col_mutations', 'Flq_mutations']
res_mutations=[]
for i in res_mutations_list:
if tsv_dict[i] != '-':
res_mutations.append(tsv_dict[i])
res_mutations_string=';'.join(res_mutations)
Resistance_Mutations.write(res_mutations_string)
CODE
>>>
output {
File kleborate_output_file = "~{samplename}_kleborate_output_file.tsv"
String version = read_string("VERSION")
String mlst_sequence_type = read_string("MLST_SEQUENCE_TYPE")
String virulence_score = read_string("VIRULENCE_SCORE")
String resistance_score = read_string("RESISTANCE_SCORE")
String num_resistance_genes = read_string("NUM_RESISTANCE_GENES")
String bla_resistance_genes = read_string("BLA_RESISTANCE_GENES")
String esbl_resistance_genes = read_string("ESBL_RESISTANCE_GENES")
String key_resistance_genes = read_string("KEY_RESISTANCE_GENES")
String resistance_mutations = read_string("GENOMIC_RESISTANCE_MUTATIONS")
}
runtime {
docker: "~{kleborate_docker_image}"
memory: "16 GB"
cpu: 8
disks: "local-disk 100 SSD"
}
}
task serotypefinder_one_sample {
input {
File ecoli_assembly
Expand Down
Loading

0 comments on commit 24e06dc

Please sign in to comment.