theiagen · jrotieno · Dec 13, 2023 · Sep 14, 2023 · Sep 15, 2023 · Sep 15, 2023
@@ -60,13 +60,13 @@ task abricate_flu {
     File assembly
     String samplename
     String database = "insaflu"
-    String nextclade_flu_h1n1_ha_tag
-    String nextclade_flu_h1n1_na_tag
-    String nextclade_flu_h3n2_ha_tag
-    String nextclade_flu_h3n2_na_tag
-    String nextclade_flu_vic_ha_tag
-    String nextclade_flu_vic_na_tag
-    String nextclade_flu_yam_tag
+    String? nextclade_flu_h1n1_ha_tag
+    String? nextclade_flu_h1n1_na_tag
+    String? nextclade_flu_h3n2_ha_tag
+    String? nextclade_flu_h3n2_na_tag
+    String? nextclade_flu_vic_ha_tag
+    String? nextclade_flu_vic_na_tag
+    String? nextclade_flu_yam_tag
     Int minid = 70
     Int mincov =60
     Int cpu = 2
@@ -91,7 +91,17 @@ task abricate_flu {
     cat ~{samplename}_abricate_hits.tsv | awk -F '\t' '{if ($6=="M1") print $15}' > FLU_TYPE
     HA_hit=$(cat ~{samplename}_abricate_hits.tsv | awk -F '\t' '{if ($6=="HA") print $15 }')
     NA_hit=$(cat ~{samplename}_abricate_hits.tsv | awk -F '\t' '{if ($6=="NA") print $15 }')
-    flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" >  FLU_SUBTYPE
+    if [[ ! (-z "${HA_hit}")  &&  ! (-z "${NA_hit}") ]]; then
+      flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" >  FLU_SUBTYPE
+    fi
+    if [[ -z "${HA_hit}" ]]; then
+      flu_subtype="${NA_hit}" && echo "$flu_subtype" >  FLU_SUBTYPE
+    elif [[ -z "${NA_hit}" ]]; then
+      flu_subtype="${HA_hit}" && echo "$flu_subtype" >  FLU_SUBTYPE
+    else
+      flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" >  FLU_SUBTYPE
+    fi
+    #flu_subtype="${HA_hit}${NA_hit}" && echo "$flu_subtype" >  FLU_SUBTYPE
 
     # set nextclade variables based on subptype
     run_nextclade=true

@@ -8,7 +8,7 @@ task consensus_qc {
     Int disk_size = 100
   }
   command <<<
-    if [ ~{reference_genome} ] ; then
+    if [ -s "~{reference_genome}" ] ; then
       GENOME_LEN=$(grep -v ">" ~{reference_genome} | tr --delete '\n' | wc -c)
     elif [ ~{genome_length} ] ; then
       GENOME_LEN=~{genome_length}
@@ -27,7 +27,7 @@ task consensus_qc {
     num_ACTG=$( grep -v ">" ~{assembly_fasta} | grep -o -E "C|A|T|G" | wc -l )
     if [ -z "$num_ACTG" ] ; then num_ACTG="0" ; fi
     echo $num_ACTG | tee NUM_ACTG
-
+  
     # calculate percent coverage (Wu Han-1 genome length: 29903bp)
     python3 -c "print ( round( ($num_ACTG / $GENOME_LEN ) * 100, 2 ) )" | tee PERCENT_REF_COVERAGE
 

@@ -9,7 +9,7 @@ task vadr {
     String vadr_opts = "--noseqnamemax --glsearch -s -r --nomisc --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn --out_allfasta"
     Int assembly_length_unambiguous
     Int skip_length = 10000
-    String docker = "us-docker.pkg.dev/general-theiagen/staphb/vadr:1.5"
+    String docker = "us-docker.pkg.dev/general-theiagen/staphb/vadr:1.5.1"
     Int minlen = 50
     Int maxlen = 30000
     Int cpu = 2

@@ -54,6 +54,7 @@ task nextclade {
       File auspice_json = "~{basename}.nextclade.auspice.json"
       File nextclade_tsv = "~{basename}.nextclade.tsv"
       String nextclade_docker = docker
+      String nextclade_dataset_tag = "~{dataset_tag}"
     }
 }
 
@@ -168,76 +169,76 @@ task nextclade_output_parser {
 }
 
 task nextclade_add_ref {
-    meta {
-      description: "Nextclade task to add samples to either a user specified or a nextclade reference tree."
-    }
-    input {
-      File genome_fasta
-      File? root_sequence
-      File? reference_tree_json
-      File? qc_config_json
-      File? gene_annotations_gff
-      File? pcr_primers_csv
-      File? virus_properties
-      String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:2.14.0"
-      String dataset_name
-      String? dataset_reference
-      String? dataset_tag
-      Int disk_size = 50
-    }
-    String basename = basename(genome_fasta, ".fasta")
-    command <<<
-        NEXTCLADE_VERSION="$(nextclade --version)"
-        echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
-
-        nextclade dataset get \
-          --name="~{dataset_name}" \
-          ~{"--reference " + dataset_reference} \
-          ~{"--tag " + dataset_tag} \
-          -o nextclade_dataset_dir \
-          --verbose
-
-        # If no referece sequence is provided, use the reference tree from the dataset
-        if [ -z "~{reference_tree_json}" ]; then
-          echo "Default dataset reference tree JSON will be used"
-          cp nextclade_dataset_dir/tree.json reference_tree.json
-        else
-          echo "User reference tree JSON will be used"
-          cp ~{reference_tree_json} reference_tree.json
-        fi
-
-        tree_json="reference_tree.json"
-
-        set -e
-        nextclade run \
-            --input-dataset=nextclade_dataset_dir/ \
-            ~{"--input-root-seq " + root_sequence} \
-            --input-tree ${tree_json} \
-            ~{"--input-qc-config " + qc_config_json} \
-            ~{"--input-gene-map " + gene_annotations_gff} \
-            ~{"--input-pcr-primers " + pcr_primers_csv} \
-            ~{"--input-virus-properties " + virus_properties}  \
-            --output-json "~{basename}".nextclade.json \
-            --output-tsv  "~{basename}".nextclade.tsv \
-            --output-tree "~{basename}".nextclade.auspice.json \
-            --output-all=. \
-            "~{genome_fasta}"
-    >>>
-    runtime {
-      docker: "~{docker}"
-      memory: "8 GB"
-      cpu: 2
-      disks:  "local-disk " + disk_size + " SSD"
-      disk: disk_size + " GB" # TES
-      dx_instance_type: "mem1_ssd1_v2_x2"
-      maxRetries: 3
-    }
-    output {
-      String nextclade_version = read_string("NEXTCLADE_VERSION")
-      File nextclade_json = "~{basename}.nextclade.json"
-      File auspice_json = "~{basename}.nextclade.auspice.json"
-      File nextclade_tsv = "~{basename}.nextclade.tsv"
-      String nextclade_docker = docker
-      File netclade_ref_tree = "reference_tree.json"
-    }
+  meta {
+    description: "Nextclade task to add samples to either a user specified or a nextclade reference tree."
+  }
+  input {
+    File genome_fasta
+    File? root_sequence
+    File? reference_tree_json
+    File? qc_config_json
+    File? gene_annotations_gff
+    File? pcr_primers_csv
+    File? virus_properties
+    String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:2.14.0"
+    String dataset_name
+    String? dataset_reference
+    String? dataset_tag
+    Int disk_size = 50
+  }
+  String basename = basename(genome_fasta, ".fasta")
+  command <<<
+    NEXTCLADE_VERSION="$(nextclade --version)"
+    echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION
+
+    nextclade dataset get \
+      --name="~{dataset_name}" \
+      ~{"--reference " + dataset_reference} \
+      ~{"--tag " + dataset_tag} \
+      -o nextclade_dataset_dir \
+      --verbose
+
+    # If no referece sequence is provided, use the reference tree from the dataset
+    if [ -z "~{reference_tree_json}" ]; then
+      echo "Default dataset reference tree JSON will be used"
+      cp nextclade_dataset_dir/tree.json reference_tree.json
+    else
+      echo "User reference tree JSON will be used"
+      cp ~{reference_tree_json} reference_tree.json
+    fi
+
+    tree_json="reference_tree.json"
+
+    set -e
+    nextclade run \
+      --input-dataset=nextclade_dataset_dir/ \
+      ~{"--input-root-seq " + root_sequence} \
+      --input-tree ${tree_json} \
+      ~{"--input-qc-config " + qc_config_json} \
+      ~{"--input-gene-map " + gene_annotations_gff} \
+      ~{"--input-pcr-primers " + pcr_primers_csv} \
+      ~{"--input-virus-properties " + virus_properties}  \
+      --output-json "~{basename}".nextclade.json \
+      --output-tsv  "~{basename}".nextclade.tsv \
+      --output-tree "~{basename}".nextclade.auspice.json \
+      --output-all=. \
+      "~{genome_fasta}"
+  >>>
+  runtime {
+    docker: "~{docker}"
+    memory: "8 GB"
+    cpu: 2
+    disks:  "local-disk " + disk_size + " SSD"
+    disk: disk_size + " GB"
+    dx_instance_type: "mem1_ssd1_v2_x2"
+    maxRetries: 3
+  }
+  output {
+    String nextclade_version = read_string("NEXTCLADE_VERSION")
+    File nextclade_json = "~{basename}.nextclade.json"
+    File auspice_json = "~{basename}.nextclade.auspice.json"
+    File nextclade_tsv = "~{basename}.nextclade.tsv"
+    String nextclade_docker = docker
+    File netclade_ref_tree = "reference_tree.json"
+  }
 }
@@ -2,5 +2,6 @@
     "theiacov_fasta.samplename": "fasta",
     "theiacov_fasta.assembly_fasta": "tests/data/theiacov/fasta/clearlabs.fasta.gz",
     "theiacov_fasta.seq_method": "clearlabs",
-    "theiacov_fasta.input_assembly_method": "clearlabs"
+    "theiacov_fasta.input_assembly_method": "clearlabs",
+    "theiacov_fasta.reference_genome": "tests/inputs/completely-empty-for-test.txt"
 }
@@ -88,7 +88,7 @@
     - path: miniwdl_run/call-consensus/work/primer-schemes/SARS-CoV-2/Vuser/SARS-CoV-2.scheme.bed
       md5sum: d5ad850f8c610dc45162957ab84530d6
     - path: miniwdl_run/call-consensus_qc/command
-      md5sum: 1736bbc2b16e75dbeb37076bacedc129
+      md5sum: 3ded305519281d6609fda355bf1c060b
     - path: miniwdl_run/call-consensus_qc/inputs.json
       contains: ["assembly_fasta", "medaka"]
     - path: miniwdl_run/call-consensus_qc/outputs.json

@@ -18,7 +18,7 @@
     - wf_theiacov_fasta_miniwdl
   files:
     - path: miniwdl_run/call-consensus_qc/command
-      md5sum: b89c8a9a0b9e27b26454ba7d668d68f4
+      md5sum: 57cce4e7c41e1ff0f9a9883605d84695
     - path: miniwdl_run/call-consensus_qc/inputs.json
     - path: miniwdl_run/call-consensus_qc/outputs.json
     - path: miniwdl_run/call-consensus_qc/stderr.txt
@@ -38,7 +38,7 @@
       md5sum: 6808ca805661622ad65ae014a4b2a094
     - path: miniwdl_run/call-consensus_qc/work/_miniwdl_inputs/0/clearlabs.fasta.gz
     - path: miniwdl_run/call-nextclade/command
-      md5sum: ed29cde6f430eff4c408d9ea214ebe85
+      md5sum: b5ecaad831316b3bd8f066f1e71cc0a5
     - path: miniwdl_run/call-nextclade/inputs.json
     - path: miniwdl_run/call-nextclade/outputs.json
     - path: miniwdl_run/call-nextclade/stderr.txt
@@ -69,11 +69,11 @@
     - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/reference.fasta
       md5sum: c7ce05f28e4ec0322c96f24e064ef55c
     - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/sequences.fasta
-      md5sum: ea475ab0a62a0a68fc3b1108fdff8a20
+      md5sum: bb6b4e9e91304a396724bcb6344b8a5d
     - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tag.json
-      md5sum: 6a17b1ee5449279af7bdd0922545d7b8
+      md5sum: 97e1309e683fbaaa839198d88cd4e2d9
     - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tree.json
-      md5sum: 13eb330629b6ef17a070fcb6283bea2f
+      md5sum: 6892e6019bf88ec571b4560d66d3acb0
     - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/virus_properties.json
     - path: miniwdl_run/call-nextclade/work/nextclade_gene_E.translation.fasta
       md5sum: dc43b1e98245a25c142aec52b29a07df
@@ -149,7 +149,7 @@
       md5sum: f4ad614b7ad39f28a8145cec280a93c0
     - path: miniwdl_run/call-vadr/inputs.json
     - path: miniwdl_run/call-vadr/outputs.json
-      md5sum: f58a2654f9ba9d49617f643b59ae739f
+      md5sum: e35217438ca21b347ef68e157c480c2e
     - path: miniwdl_run/call-vadr/stderr.txt
     - path: miniwdl_run/call-vadr/stderr.txt.offset
     - path: miniwdl_run/call-vadr/stdout.txt

@@ -94,7 +94,7 @@
     - path: miniwdl_run/call-consensus/work/primer-schemes/SARS-CoV-2/Vuser/SARS-CoV-2.scheme.bed
       md5sum: d5ad850f8c610dc45162957ab84530d6
     - path: miniwdl_run/call-consensus_qc/command
-      md5sum: 770764cd13027f258bf2a871c720c80d
+      md5sum: 2b043e77f5254e0a8002aa32693edeb8
     - path: miniwdl_run/call-consensus_qc/inputs.json
       contains: ["assembly_fasta", "medaka"]
     - path: miniwdl_run/call-consensus_qc/outputs.json

@@ -550,7 +550,7 @@
     - path: miniwdl_run/wdl/tasks/assembly/task_shovill.wdl
       md5sum: ca45f97152cb9536f2bb0603382021bd
     - path: miniwdl_run/wdl/tasks/gene_typing/task_abricate.wdl
-      md5sum: 8ea4befaa7a09b0def8d033cb9b806d1
+      md5sum: 49018b0dc2b173bc9e0f3893b8be8e7c
     - path: miniwdl_run/wdl/tasks/gene_typing/task_amrfinderplus.wdl
       md5sum: 249db321d15832002c4945394ae9af76
     - path: miniwdl_run/wdl/tasks/gene_typing/task_bakta.wdl

@@ -518,7 +518,7 @@
     - path: miniwdl_run/wdl/tasks/assembly/task_shovill.wdl
       md5sum: ca45f97152cb9536f2bb0603382021bd
     - path: miniwdl_run/wdl/tasks/gene_typing/task_abricate.wdl
-      md5sum: 8ea4befaa7a09b0def8d033cb9b806d1
+      md5sum: 49018b0dc2b173bc9e0f3893b8be8e7c
     - path: miniwdl_run/wdl/tasks/gene_typing/task_amrfinderplus.wdl
       md5sum: 249db321d15832002c4945394ae9af76
     - path: miniwdl_run/wdl/tasks/gene_typing/task_bakta.wdl