Add kleborate task and workflow (#16)

* Initial commit. Individual Kleborate task and workflow. --all flag invoked. No parsing of output file included. * Second Commit. * add kleborate workflow to dockstore * remove default samplename * Added python code to parse outputs * Added outputs for parsed files and strings * added .to_string() * fixed python block indent * removed accidental indent * Reverted back to initial commit * reverted to intial commit * Added python logic to parse tsv (without pandas) * Added tsv parsing python block (without pandas) * added output prefixes and removed date output * removed date output * Added kleborate task * capture kleborate version and analysis date * Clean up validation files dir * remove miniwdl file * remove fja notes file * Utilize wf tag for version and analysis date only * and whitespace to end of file * remove redundant kleborate task file Co-authored-by: Frank_Ambrosio <[email protected]> Co-authored-by: frankambrosio3 <[email protected]>
theiagen · Jul 27, 2021 · 24e06dc · 24e06dc
1 parent 8632cf7
commit 24e06dc
Show file tree

Hide file tree

Showing 8 changed files with 71,180 additions and 25 deletions.
diff --git a/.dockstore.yml b/.dockstore.yml
@@ -10,6 +10,11 @@ workflows:
    primaryDescriptorPath: /workflows/wf_apollo_illumina_pe.wdl
    testParameterFiles:
     - empty.json
+ - name: Kleborate
+   subclass: WDL
+   primaryDescriptorPath: /workflows/wf_kleborate.wdl
+   testParameterFiles:
+    - empty.json
  - name: SerotypeFinder
    subclass: WDL
    primaryDescriptorPath: /workflows/wf_serotypefinder.wdl

diff --git a/notes.wdl b/notes.wdl
diff --git a/tasks/task_taxon_id.wdl b/tasks/task_taxon_id.wdl
@@ -11,7 +11,7 @@ task gambit {
     date | tee DATE
 
     midas query ~{assembly} | tail -n2 > ~{samplename}_gambit.csv
-    
+
     python3 <<CODE
     import csv
     #grab output genome length and number contigs by column header
@@ -58,6 +58,96 @@ task gambit {
     preemptible:  0
   }
 }
+task kleborate_one_sample {
+  # Inputs
+  input {
+    File assembly
+    String samplename
+    String kleborate_docker_image = "staphb/kleborate:2.0.4"
+  }
+  command <<<
+    # capture date and version
+    # Print and save date
+    date | tee DATE
+    # Print and save version
+    kleborate --version > VERSION && sed -i -e 's/^/Kleborate /' VERSION
+    # Run Kleborate on the input assembly with the --all flag and output with samplename prefix
+    kleborate -a ~{assembly} --all -o ~{samplename}_kleborate_output_file.tsv
+
+    python3 <<CODE
+    import csv
+    with open("./~{samplename}_kleborate_output_file.tsv",'r') as tsv_file:
+      tsv_reader=csv.reader(tsv_file, delimiter="\t")
+      tsv_data=list(tsv_reader)
+      tsv_dict=dict(zip(tsv_data[0], tsv_data[1]))
+      with open ("SPECIES", 'wt') as Species:
+        kleb_species=tsv_dict['species']
+        Species.write(kleb_species)
+      with open ("MLST_SEQUENCE_TYPE", 'wt') as MLST_Sequence_Type:
+        mlst=tsv_dict['ST']
+        MLST_Sequence_Type.write(mlst)
+      with open ("VIRULENCE_SCORE", 'wt') as Virulence_Score:
+        virulence_level=tsv_dict['virulence_score']
+        Virulence_Score.write(virulence_level)
+      with open ("RESISTANCE_SCORE", 'wt') as Resistance_Score:
+        resistance_level=tsv_dict['resistance_score']
+        Resistance_Score.write(resistance_level)
+      with open ("NUM_RESISTANCE_GENES", 'wt') as Num_Resistance_Genes:
+        resistance_genes_count=tsv_dict['num_resistance_genes']
+        Num_Resistance_Genes.write(resistance_genes_count)
+      with open ("BLA_RESISTANCE_GENES", 'wt') as BLA_Resistance_Genes:
+        bla_res_genes_list=['Bla_acquired', 'Bla_inhR_acquired', 'Bla_ESBL_acquired', 'Bla_ESBL_inhR_acquired', 'Bla_Carb_acquired']
+        bla_res_genes=[]
+        for i in bla_res_genes_list:
+          if tsv_dict[i] != '-':
+            bla_res_genes.append(tsv_dict[i])
+        bla_res_genes_string=';'.join(bla_res_genes)
+        BLA_Resistance_Genes.write(bla_res_genes_string)
+      with open ("ESBL_RESISTANCE_GENES", 'wt') as ESBL_Resistance_Genes:
+        esbl_res_genes_list=['Bla_ESBL_acquired', 'Bla_ESBL_inhR_acquired']
+        esbl_res_genes=[]
+        for i in esbl_res_genes_list:
+          if tsv_dict[i] != '-':
+            bla_res_genes.append(tsv_dict[i])
+        esbl_res_genes_string=';'.join(esbl_res_genes)
+        ESBL_Resistance_Genes.write(esbl_res_genes_string)
+      with open ("KEY_RESISTANCE_GENES", 'wt') as Key_Resistance_Genes:
+        key_res_genes_list= ['Col_acquired', 'Fcyn_acquired', 'Flq_acquired', 'Rif_acquired', 'Bla_acquired', 'Bla_inhR_acquired', 'Bla_ESBL_acquired', 'Bla_ESBL_inhR_acquired', 'Bla_Carb_acquired']
+        key_res_genes=[]
+        for i in key_res_genes_list:
+          if tsv_dict[i] != '-':
+            key_res_genes.append(tsv_dict[i])
+        key_res_genes_string=';'.join(key_res_genes)
+        Key_Resistance_Genes.write(key_res_genes_string)
+      with open ("GENOMIC_RESISTANCE_MUTATIONS", 'wt') as Resistance_Mutations:
+        res_mutations_list= ['Bla_chr', 'SHV_mutations', 'Omp_mutations', 'Col_mutations', 'Flq_mutations']
+        res_mutations=[]
+        for i in res_mutations_list:
+          if tsv_dict[i] != '-':
+            res_mutations.append(tsv_dict[i])
+        res_mutations_string=';'.join(res_mutations)
+        Resistance_Mutations.write(res_mutations_string)
+    CODE
+  >>>
+  output {
+    File kleborate_output_file = "~{samplename}_kleborate_output_file.tsv"
+    String version = read_string("VERSION")
+    String mlst_sequence_type = read_string("MLST_SEQUENCE_TYPE")
+    String virulence_score = read_string("VIRULENCE_SCORE")
+    String resistance_score = read_string("RESISTANCE_SCORE")
+    String num_resistance_genes = read_string("NUM_RESISTANCE_GENES")
+    String bla_resistance_genes = read_string("BLA_RESISTANCE_GENES")
+    String esbl_resistance_genes = read_string("ESBL_RESISTANCE_GENES")
+    String key_resistance_genes = read_string("KEY_RESISTANCE_GENES")
+    String resistance_mutations = read_string("GENOMIC_RESISTANCE_MUTATIONS")
+  }
+  runtime {
+    docker:       "~{kleborate_docker_image}"
+    memory:       "16 GB"
+    cpu:          8
+    disks:        "local-disk 100 SSD"
+  }
+}
 task serotypefinder_one_sample {
   input {
     File ecoli_assembly