Skip to content

Commit

Permalink
mummer_ani task: added percent_base_aligned_threshold with 70 as defa…
Browse files Browse the repository at this point in the history
…ult. default ani_threshold is now 85. Also added logic to only output ani_top_species_match if both thresholds are surpassed.
  • Loading branch information
kapsakcj committed Oct 6, 2023
1 parent 3cc9766 commit a556675
Showing 1 changed file with 13 additions and 4 deletions.
17 changes: 13 additions & 4 deletions tasks/quality_control/task_mummer_ani.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ task animummer {
String samplename
File? ref_genome
Float mash_filter = 0.9
Float ani_threshold = 92.0
# these 2 thresholds were set as they are used by CDC enterics lab/PulseNet for ANI thresholds
Float ani_threshold = 85.0
Float percent_bases_aligned_threshold = 70.0
String docker= "us-docker.pkg.dev/general-theiagen/staphb/mummer:4.0.0-rgdv2"
Int cpus = 4
Int memory = 8
Expand Down Expand Up @@ -53,6 +55,7 @@ task animummer {
## parse out highest percentBases aligned
cut -f 5 ~{samplename}.ani-mummer.out.tsv | sort -nr | head -n 1 | tee ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt
echo "highest percent bases aligned is: $(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)"
ANI_HIGHEST_PERCENT_BASES_ALIGNED=$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)

## parse out ANI value using highest percentBases aligned value
grep "$(cat ANI_HIGHEST_PERCENT_BASES_ALIGNED.txt)" ~{samplename}.ani-mummer.out.tsv | cut -f 3 | tee ANI_HIGHEST_PERCENT.txt
Expand All @@ -72,15 +75,21 @@ task animummer {
grep "${ANI_HIGHEST_PERCENT}" ~{samplename}.ani-mummer.out.tsv | cut -f 1,2 | sed "s|${assembly_file_basename}||g" | xargs | cut -d '.' -f 3 | tee ANI_TOP_SPECIES_MATCH.txt
echo "ANI top species match is: $(cat ANI_TOP_SPECIES_MATCH.txt)"

# if ANI threshold is defined by user, compare to highest ANI value and only output if threshold is surpassed
if [[ -n "~{ani_threshold}" ]]; then
# if ANI threshold or percent_bases_aligned_threshold is defined by user (they both are by default), compare to highest ANI value and corresponding percent_bases_aligned value and only output ANI_top_species_match if both thresholds are surpassed
if [[ -n "~{ani_threshold}" || -n "~{percent_bases_aligned_threshold}" ]]; then
echo "Comparing user-defined ANI threshold to highest ANI value..."
# compare ANI_HIGHEST_PERCENT to ani_threshold using awk
if ! awk "BEGIN{ exit ($ANI_HIGHEST_PERCENT < ~{ani_threshold} )}"; then
echo "The highest ANI value $ANI_HIGHEST_PERCENT is less than the user-defined threshold of ~{ani_threshold}"
echo "ANI top species match did not surpass the user-defined threshold of ~{ani_threshold}" > ANI_TOP_SPECIES_MATCH.txt
# else if: compare percent_bases_aligned_threshold to ANI_HIGHEST_PERCENT_BASES_ALIGNED using awk
elif ! awk "BEGIN{ exit (${ANI_HIGHEST_PERCENT_BASES_ALIGNED} < ~{percent_bases_aligned_threshold} )}"; then
echo "The highest ANI percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is less than the user-defined threshold of ~{percent_bases_aligned_threshold}"
# overwrite ANI_TOP_SPECIES_MATCH.txt when percent_bases_aligned threshold is not surpassed
echo "ANI percent bases aligned did not surpass the user-defined threshold of ~{percent_bases_aligned_threshold}" > ANI_TOP_SPECIES_MATCH.txt
else
echo "The highest ANI value $ANI_HIGHEST_PERCENT is greater than the user-defined threshold ~{ani_threshold}"
echo "The highest ANI value ${ANI_HIGHEST_PERCENT} is greater than the user-defined threshold ~{ani_threshold}"
echo "The highest percent bases aligned value ${ANI_HIGHEST_PERCENT_BASES_ALIGNED} is greater than the user-defined threshold ~{percent_bases_aligned_threshold}"
fi
fi
else
Expand Down

0 comments on commit a556675

Please sign in to comment.