Skip to content

Commit

Permalink
TheiaProk TB: new VCF output and modification to the coverage report (#…
Browse files Browse the repository at this point in the history
…245)

* output tbprofiler vcf

* update default docker

* fix path

* add sample id to the beginning of the coverage report

* update default docker

* Enable TBProfiler parameter changes (#246)

* updated VCF output file renaming in kSNP3 task (#207)

* updated VCF output file renaming in kSNP3 task; also added 1 new File output and change the output names to be more descriptive

* ksnp3 task:changed VCF file names to be predictable; split 2 ksnp3 options to 2 lines for readability; added new string output "ksnp3_vcf_ref_samplename" to capture sample within cluster to use for snp calling

* added new string output to ksnp3 workflow "ksnp3_vcf_ref_samplename"

* reduce unnecessary logging in MIDAS task (#210)

* made untar/decompression of midas database quiet since it produces 41k lines of output. also made the 2 mv commands verbose (but it's only 2 lines!)

* update CI

* expose tbprofiler parameters as inputs in merlin

* input spelling

---------

Co-authored-by: Curtis Kapsak <[email protected]>

* update md5sums

* caller_options tbprofiler

* caller_options merlin magic

* --calling_params tbprofiler

* calling_params tbprofiler

* quotes around params tbprofiler

* added quotes around calling params tbprofiler

* "-C 1 -F 0.0" tbprof

* removed caller options

* hardcoded tbprofiler freebayes params

* re-optionalize

* update md5sums

* Add branch name to versioning task

* version reversion for merge

* update checksums

---------

Co-authored-by: frankambrosio3 <[email protected]>
Co-authored-by: Curtis Kapsak <[email protected]>
Co-authored-by: frankambrosio3 <[email protected]>
Co-authored-by: kevinlibuit <[email protected]>
  • Loading branch information
5 people authored Dec 29, 2023
1 parent 494f076 commit 7177fbb
Show file tree
Hide file tree
Showing 8 changed files with 40 additions and 16 deletions.
5 changes: 4 additions & 1 deletion tasks/species_typing/task_tbp_parser.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ task tbp_parser {
Int coverage_threshold = 100
Boolean tbp_parser_debug = false

String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.1"
String docker = "us-docker.pkg.dev/general-theiagen/theiagen/tbp-parser:1.1.4"
Int disk_size = 100
Int memory = 4
Int cpu = 1
Expand Down Expand Up @@ -41,6 +41,9 @@ task tbp_parser {

# get genome average depth
samtools depth -J ~{tbprofiler_bam} | awk -F "\t" '{sum+=$3} END { print sum/NR }' | tee AVG_DEPTH

# add sample id to the beginning of the coverage report
awk '{print "~{samplename},"$0}' ~{samplename}.percent_gene_coverage.csv > tmp.csv && mv -f tmp.csv ~{samplename}.percent_gene_coverage.csv
>>>
output {
File tbp_parser_looker_report_csv = "~{samplename}.looker_report.csv"
Expand Down
19 changes: 11 additions & 8 deletions tasks/species_typing/task_tbprofiler.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ task tbprofiler {
String tbprofiler_docker_image = "us-docker.pkg.dev/general-theiagen/staphb/tbprofiler:4.4.2"
Int disk_size = 100
String mapper = "bwa"
String caller = "freebayes"
String variant_caller = "freebayes"
String? variant_calling_params
Int min_depth = 10
Float min_af = 0.1
Float min_af_pred = 0.1
Expand Down Expand Up @@ -42,7 +43,6 @@ task tbprofiler {

# check if new database file is provided and not empty
if [ "~{tbprofiler_run_custom_db}" = true ] ; then

echo "Found new database file ~{tbprofiler_custom_db}"
prefix=$(basename "~{tbprofiler_custom_db}" | sed 's/\.tar\.gz$//')
echo "New database will be created with prefix $prefix"
Expand All @@ -53,11 +53,8 @@ task tbprofiler {
tb-profiler load_library ./"$prefix"/"$prefix"

TBDB="--db $prefix"

else

TBDB=""

fi

# Run tb-profiler on the input reads with samplename prefix
Expand All @@ -66,11 +63,11 @@ task tbprofiler {
${INPUT_READS} \
--prefix ~{samplename} \
--mapper ~{mapper} \
--caller ~{caller} \
--caller ~{variant_caller} \
--calling_params "~{variant_calling_params}" \
--min_depth ~{min_depth} \
--af ~{min_af} \
--reporting_af \
~{min_af_pred} \
--reporting_af ~{min_af_pred} \
--coverage_fraction_threshold ~{cov_frac_threshold} \
--csv --txt \
$TBDB
Expand All @@ -81,6 +78,11 @@ task tbprofiler {
# touch optional output files because wdl
touch GENE_NAME LOCUS_TAG VARIANT_SUBSTITUTIONS OUTPUT_SEQ_METHOD_TYPE

# merge all vcf files if multiple are present
bcftools index ./vcf/*bcf
bcftools index ./vcf/*gz
bcftools merge --force-samples ./vcf/*bcf ./vcf/*gz > ./vcf/~{samplename}.targets.csq.merged.vcf

python3 <<CODE
import csv
import json
Expand Down Expand Up @@ -127,6 +129,7 @@ task tbprofiler {
File tbprofiler_output_json = "./results/~{samplename}.results.json"
File tbprofiler_output_bam = "./bam/~{samplename}.bam"
File tbprofiler_output_bai = "./bam/~{samplename}.bam.bai"
File tbprofiler_output_vcf = "./vcf/~{samplename}.targets.csq.merged.vcf"
String version = read_string("VERSION")
String tbprofiler_main_lineage = read_string("MAIN_LINEAGE")
String tbprofiler_sub_lineage = read_string("SUB_LINEAGE")
Expand Down
6 changes: 3 additions & 3 deletions tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@
- path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl
md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb
- path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl
md5sum: c2a8c0978cc1bd65656584e1bad9dca7
md5sum: a90fc52112a8333361f96e50b316d03b
- path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl
md5sum: d49ae0b02e798af0636eb2721bb434b4
- path: miniwdl_run/wdl/tasks/task_versioning.wdl
Expand All @@ -634,9 +634,9 @@
- path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl
md5sum: 4106837e51f6445e02776e0a74606ed5
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
md5sum: 3acf4dcddbb44d547b69f597761cc048
md5sum: 6da70123ba3fd1a3ec5434ef21a4c0cb
- path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl
md5sum: 00bd2489b2a7aa5b88340a940961a857
md5sum: 90eb6ac7463058a81da77120aa45138b
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl
contains: ["version", "QC", "output"]
- path: miniwdl_run/workflow.log
Expand Down
6 changes: 3 additions & 3 deletions tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@
- path: miniwdl_run/wdl/tasks/species_typing/task_sonneityping.wdl
md5sum: aeb12cf9a0db4e34f2aecbaba51c30fb
- path: miniwdl_run/wdl/tasks/species_typing/task_tbprofiler.wdl
md5sum: c2a8c0978cc1bd65656584e1bad9dca7
md5sum: a90fc52112a8333361f96e50b316d03b
- path: miniwdl_run/wdl/tasks/species_typing/task_ts_mlst.wdl
md5sum: d49ae0b02e798af0636eb2721bb434b4
- path: miniwdl_run/wdl/tasks/task_versioning.wdl
Expand All @@ -600,9 +600,9 @@
- path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl
md5sum: 4106837e51f6445e02776e0a74606ed5
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl
md5sum: 3e19938fc8a624c7948b57867865561a
md5sum: e2fbc89ced1fd8e44106e8c12dd9c129
- path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl
md5sum: 00bd2489b2a7aa5b88340a940961a857
md5sum: 90eb6ac7463058a81da77120aa45138b
- path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl
md5sum: 53d322d895837c0bcb049786572e944d
- path: miniwdl_run/workflow.log
Expand Down
1 change: 1 addition & 0 deletions workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,7 @@ workflow theiaprok_illumina_pe {
File? tbprofiler_output_file = merlin_magic.tbprofiler_output_file
File? tbprofiler_output_bam = merlin_magic.tbprofiler_output_bam
File? tbprofiler_output_bai = merlin_magic.tbprofiler_output_bai
File? tbprofiler_output_vcf = merlin_magic.tbprofiler_output_vcf
String? tbprofiler_version = merlin_magic.tbprofiler_version
String? tbprofiler_main_lineage = merlin_magic.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage
Expand Down
1 change: 1 addition & 0 deletions workflows/theiaprok/wf_theiaprok_illumina_se.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ workflow theiaprok_illumina_se {
File? tbprofiler_output_file = merlin_magic.tbprofiler_output_file
File? tbprofiler_output_bam = merlin_magic.tbprofiler_output_bam
File? tbprofiler_output_bai = merlin_magic.tbprofiler_output_bai
File? tbprofiler_output_vcf = merlin_magic.tbprofiler_output_vcf
String? tbprofiler_version = merlin_magic.tbprofiler_version
String? tbprofiler_main_lineage = merlin_magic.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage
Expand Down
1 change: 1 addition & 0 deletions workflows/theiaprok/wf_theiaprok_ont.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,7 @@ workflow theiaprok_ont {
File? tbprofiler_output_file = merlin_magic.tbprofiler_output_file
File? tbprofiler_output_bam = merlin_magic.tbprofiler_output_bam
File? tbprofiler_output_bai = merlin_magic.tbprofiler_output_bai
File? tbprofiler_output_vcf = merlin_magic.tbprofiler_output_vcf
String? tbprofiler_version = merlin_magic.tbprofiler_version
String? tbprofiler_main_lineage = merlin_magic.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = merlin_magic.tbprofiler_sub_lineage
Expand Down
17 changes: 16 additions & 1 deletion workflows/utilities/wf_merlin_magic.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ workflow merlin_magic {
Boolean call_shigeifinder_reads_input = false
Boolean assembly_only = false
Boolean theiaeuk = false
String? tbprofiler_mapper
Int? tbprofiler_min_depth
Float? tbprofiler_min_af
Float? tbprofiler_min_af_pred
Int? tbprofiler_cov_frac_threshold
String? tbprofiler_variant_caller
String? tbprofiler_variant_calling_params
Boolean tbprofiler_run_custom_db = false
File tbprofiler_custom_db = "gs://theiagen-public-files/terra/theiaprok-files/tbdb_varpipe_combined_nodups.tar.gz"
Boolean tbprofiler_additional_outputs = false
Expand Down Expand Up @@ -252,7 +259,14 @@ workflow merlin_magic {
samplename = samplename,
tbprofiler_run_custom_db = tbprofiler_run_custom_db,
tbprofiler_custom_db = tbprofiler_custom_db,
ont_data = ont_data
ont_data = ont_data,
mapper = tbprofiler_mapper,
variant_caller = tbprofiler_variant_caller,
variant_calling_params = tbprofiler_variant_calling_params,
min_depth = tbprofiler_min_depth,
min_af = tbprofiler_min_af,
min_af_pred = tbprofiler_min_af_pred,
cov_frac_threshold = tbprofiler_cov_frac_threshold
}
if (tbprofiler_additional_outputs) {
call tbp_parser_task.tbp_parser {
Expand Down Expand Up @@ -580,6 +594,7 @@ workflow merlin_magic {
File? tbprofiler_output_file = tbprofiler.tbprofiler_output_csv
File? tbprofiler_output_bam = tbprofiler.tbprofiler_output_bam
File? tbprofiler_output_bai = tbprofiler.tbprofiler_output_bai
File? tbprofiler_output_vcf = tbprofiler.tbprofiler_output_vcf
String? tbprofiler_version = tbprofiler.version
String? tbprofiler_main_lineage = tbprofiler.tbprofiler_main_lineage
String? tbprofiler_sub_lineage = tbprofiler.tbprofiler_sub_lineage
Expand Down

0 comments on commit 7177fbb

Please sign in to comment.