Skip to content

Commit

Permalink
Enable TBProfiler parameter changes (#246)
Browse files Browse the repository at this point in the history
* updated VCF output file renaming in kSNP3 task (#207)

* updated VCF output file renaming in kSNP3 task; also added 1 new File output and change the output names to be more descriptive

* ksnp3 task:changed VCF file names to be predictable; split 2 ksnp3 options to 2 lines for readability; added new string output "ksnp3_vcf_ref_samplename" to capture sample within cluster to use for snp calling

* added new string output to ksnp3 workflow "ksnp3_vcf_ref_samplename"

* reduce unnecessary logging in MIDAS task (#210)

* made untar/decompression of midas database quiet since it produces 41k lines of output. also made the 2 mv commands verbose (but it's only 2 lines!)

* update CI

* expose tbprofiler parameters as inputs in merlin

* input spelling

---------

Co-authored-by: Curtis Kapsak <[email protected]>
  • Loading branch information
frankambrosio3 and kapsakcj authored Nov 13, 2023
1 parent c6fac1e commit efafe25
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 12 deletions.
19 changes: 14 additions & 5 deletions tasks/phylogenetic_inference/task_ksnp3.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ task ksnp3 {
-in ksnp3_input.tsv \
-outdir ksnp3 \
-k ~{kmer_size} \
-core -vcf \
-core \
-vcf \
~{'-SNPs_all ' + previous_ksnp3_snps} \
~{ksnp3_args}

Expand All @@ -71,7 +72,13 @@ task ksnp3 {
echo "The core SNP matrix could not be produced" | tee SKIP_SNP_DIST # otherwise, skip
fi

mv -v ksnp3/VCF.*.vcf ksnp3/~{cluster_name}_core.vcf
# capture sample name of genome used as reference
ls ksnp3/*.vcf | cut -d '.' -f 2 | tee KSNP3_VCF_REF_SAMPLENAME.txt

# rename the 2 vcf files by appending ~{cluster_name} and removing the ref genome name to make final filenames predictable
mv -v ksnp3/VCF.*.vcf ksnp3/~{cluster_name}_VCF.reference_genome.vcf
mv -v ksnp3/VCF.SNPsNotinRef.* ksnp3/~{cluster_name}_VCF_.SNPsNotinRef.tsv

mv -v ksnp3/SNPs_all_matrix.fasta ksnp3/~{cluster_name}_pan_SNPs_matrix.fasta
mv -v ksnp3/tree.parsimony.tre ksnp3/~{cluster_name}_pan_parsimony.nwk

Expand All @@ -84,9 +91,11 @@ task ksnp3 {

>>>
output {
File ksnp3_core_matrix = "ksnp3/${cluster_name}_core_SNPs_matrix.fasta"
File ksnp3_core_tree = "ksnp3/${cluster_name}_core.nwk"
File ksnp3_core_vcf = "ksnp3/${cluster_name}_core.vcf"
File ksnp3_core_matrix = "ksnp3/~{cluster_name}_core_SNPs_matrix.fasta"
File ksnp3_core_tree = "ksnp3/~{cluster_name}_core.nwk"
File ksnp3_vcf_ref_genome = "ksnp3/~{cluster_name}_VCF.reference_genome.vcf"
File ksnp3_vcf_snps_not_in_ref = "ksnp3/~{cluster_name}_VCF_.SNPsNotinRef.tsv"
String ksnp3_vcf_ref_samplename = read_string("KSNP3_VCF_REF_SAMPLENAME.txt")
File ksnp3_pan_matrix = "ksnp3/~{cluster_name}_pan_SNPs_matrix.fasta"
File ksnp3_pan_parsimony_tree = "ksnp3/~{cluster_name}_pan_parsimony.nwk"
File? ksnp3_ml_tree = "ksnp3/~{cluster_name}_ML.nwk"
Expand Down
7 changes: 4 additions & 3 deletions tasks/taxon_id/task_midas.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ task midas {

# Decompress the Midas database
mkdir db
tar -C ./db/ -xzvf ~{midas_db}
echo "Decompressing Midas database. Please be patient, this may take a few minutes."
tar -C ./db/ -xzf ~{midas_db}

# Run Midas
run_midas.py species ~{samplename} -1 ~{read1} ~{'-2 ' + read2} -d db/midas_db_v1.2/ -t ~{cpu}

# rename output files
mv ~{samplename}/species/species_profile.txt ~{samplename}/species/~{samplename}_species_profile.tsv
mv ~{samplename}/species/log.txt ~{samplename}/species/~{samplename}_log.txt
mv -v ~{samplename}/species/species_profile.txt ~{samplename}/species/~{samplename}_species_profile.tsv
mv -v ~{samplename}/species/log.txt ~{samplename}/species/~{samplename}_log.txt

# Run a python block to parse output file for terra data tables
# pandas is available in default docker image for python2 but not python3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@
- path: miniwdl_run/wdl/tasks/taxon_id/task_kraken2.wdl
md5sum: a1f287e6e6feaf2d7d3c74a70e3b5a28
- path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl
md5sum: 024971d1439dff7d59c0a26a824bd2c6
md5sum: faacd87946ee3fbdf70f3a15b79ce547
- path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl
md5sum: 43ef050bde1fb8755f38e697a1794918
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@
- path: miniwdl_run/wdl/tasks/taxon_id/task_kraken2.wdl
md5sum: a1f287e6e6feaf2d7d3c74a70e3b5a28
- path: miniwdl_run/wdl/tasks/taxon_id/task_midas.wdl
md5sum: 024971d1439dff7d59c0a26a824bd2c6
md5sum: faacd87946ee3fbdf70f3a15b79ce547
- path: miniwdl_run/wdl/tasks/utilities/task_broad_terra_tools.wdl
md5sum: 43ef050bde1fb8755f38e697a1794918
- path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl
Expand Down
4 changes: 3 additions & 1 deletion workflows/phylogenetics/wf_ksnp3.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ workflow ksnp3_workflow {
String ksnp3_docker = ksnp3_task.ksnp3_docker_image
# ksnp3_outputs
String ksnp3_snp_dists_version = pan_snp_dists.snp_dists_version
File ksnp3_core_vcf = ksnp3_task.ksnp3_core_vcf
File ksnp3_vcf_ref_genome = ksnp3_task.ksnp3_vcf_ref_genome
File ksnp3_vcf_snps_not_in_ref = ksnp3_task.ksnp3_vcf_snps_not_in_ref
String ksnp3_vcf_ref_samplename = ksnp3_task.ksnp3_vcf_ref_samplename
String ksnp3_core_snp_matrix_status = ksnp3_task.skip_core_snp_dists
File ksnp3_snps = ksnp3_task.ksnp3_snps_all
# ordered matrixes and reordered trees
Expand Down
14 changes: 13 additions & 1 deletion workflows/utilities/wf_merlin_magic.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ workflow merlin_magic {
Boolean call_shigeifinder_reads_input = false
Boolean assembly_only = false
Boolean theiaeuk = false
String? tbp_mapper
String? tbp_caller
Int? tbp_min_depth
Float? tbp_min_af
Float? tbp_min_af_pred
Int? tbp_cov_frac_threshold
Boolean tbprofiler_run_custom_db = false
File tbprofiler_custom_db = "gs://theiagen-public-files/terra/theiaprok-files/tbdb_varpipe_combined_nodups.tar.gz"
Boolean tbprofiler_additional_outputs = false
Expand Down Expand Up @@ -252,7 +258,13 @@ workflow merlin_magic {
samplename = samplename,
tbprofiler_run_custom_db = tbprofiler_run_custom_db,
tbprofiler_custom_db = tbprofiler_custom_db,
ont_data = ont_data
ont_data = ont_data,
mapper = tbp_mapper,
caller = tbp_caller,
min_depth = tbp_min_depth,
min_af = tbp_min_af,
min_af_pred = tbp_min_af_pred,
cov_frac_threshold = tbp_cov_frac_threshold
}
if (tbprofiler_additional_outputs) {
call tbp_parser_task.tbp_parser {
Expand Down

0 comments on commit efafe25

Please sign in to comment.