Skip to content

Commit

Permalink
Patch to modified pangolin updates in versions >=2.4.1 (#9)
Browse files Browse the repository at this point in the history
* fix to ensure pangolin 2.4 ouputs are properly prased

* add local dev workflows

* update to grab output by column headers

* ensure static nexclade tag

* update default pangolin docker tag

* set pangolin_conflicts output to String to capture all posible values
  • Loading branch information
kevinlibuit authored May 6, 2021
1 parent cd49970 commit f09aa09
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 167 deletions.
44 changes: 29 additions & 15 deletions tasks/task_taxonID.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -98,37 +98,50 @@ task pangolin2 {
input {
File fasta
String samplename
Int min_length=10000
Float max_ambig=0.5
String docker


}

command{
command <<<
# date and version control
date | tee DATE
echo "$(pangolin -v); $(pangolin -pv)" | tee VERSION
set -e

pangolin "~{fasta}" \
--outfile "~{samplename}.pangolin_report.csv" \
--min-length ~{min_length} \
--max-ambig ~{max_ambig} \
--verbose

pangolin_lineage=$(tail -n 1 ${samplename}.pangolin_report.csv | cut -f 2 -d "," | grep -v "lineage")

pangolin_probability=$(tail -n 1 ${samplename}.pangolin_report.csv | cut -f 3 -d "," )
mv ${samplename}.pangolin_report.csv ${samplename}_pango2_lineage.csv

echo $pangolin_lineage | tee PANGOLIN_LINEAGE
echo $pangolin_probability | tee PANGOLIN_PROBABILITY
}
python3 <<CODE
import csv
#grab output values by column header
with open("~{samplename}.pangolin_report.csv",'r') as csv_file:
csv_reader = list(csv.DictReader(csv_file, delimiter=","))
for line in csv_reader:
with open("VERSION", 'wt') as lineage:
pangolin_version=line["pangolin_version"]
pangoLEARN_version=line["pangoLEARN_version"]
lineage.write(f"pangolin {pangolin_version}; pangoLEARN {pangoLEARN_version}")
with open("PANGOLIN_LINEAGE", 'wt') as lineage:
lineage.write(line["lineage"])
with open("PANGOLIN_CONFLICTS", 'wt') as lineage:
lineage.write(line["conflict"])
with open("PANGOLIN_NOTES", 'wt') as lineage:
lineage.write(line["note"])
CODE
>>>
output {
String date = read_string("DATE")
String version = read_string("VERSION")
String pangolin_lineage = read_string("PANGOLIN_LINEAGE")
String pangolin_aLRT = read_string("PANGOLIN_PROBABILITY")
String pangolin_conflicts = read_string("PANGOLIN_CONFLICTS")
String pangolin_notes = read_string("PANGOLIN_NOTES")
String pangolin_docker = docker
File pango_lineage_report = "${samplename}_pango2_lineage.csv"
File pango_lineage_report = "${samplename}.pangolin_report.csv"
}
runtime {
Expand All @@ -151,6 +164,7 @@ task nextclade_one_sample {
File? qc_config_json
File? gene_annotations_json
File? pcr_primers_csv
String docker = "neherlab/nextclade:0.14.2"
}
String basename = basename(genome_fasta, ".fasta")
command {
Expand Down Expand Up @@ -179,7 +193,7 @@ task nextclade_one_sample {
grep ^aaDeletions transposed.tsv | cut -f 2 | grep -v aaDeletions | sed 's/,/|/g' > NEXTCLADE_AADELS
}
runtime {
docker: "neherlab/nextclade:latest"
docker: "~{docker}"
memory: "3 GB"
cpu: 2
disks: "local-disk 50 HDD"
Expand Down
11 changes: 6 additions & 5 deletions workflows/wf_pangolin_update.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ workflow pangolin_update {
}

output {
String pango_lineage = pangolin2.pangolin_lineage
Float pangolin_aLRT = pangolin2.pangolin_aLRT
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_version = pangolin2.version
String pangolin_docker = pangolin2.pangolin_docker
String pango_lineage = pangolin2.pangolin_lineage
String pangolin_conflicts = pangolin2.pangolin_conflicts
String pangolin_notes = pangolin2.pangolin_notes
String pangolin_version = pangolin2.version
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_docker = pangolin2.pangolin_docker
}
}
7 changes: 4 additions & 3 deletions workflows/wf_titan_clearlabs.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ workflow titan_clearlabs {
File clear_lab_fastq
String seq_method = "ONT via Clear Labs WGS"
String? artic_primer_version = "V3"
String pangolin_docker_image = "staphb/pangolin:2.3.8-pangolearn-2021-04-14"
String pangolin_docker_image = "staphb/pangolin:2.4.2-pangolearn-2021-04-28"
Int? normalise = 20000
}
call qc_utils.fastqc_se as fastqc_se_raw {
Expand Down Expand Up @@ -97,8 +97,9 @@ workflow titan_clearlabs {
Float assembly_mean_coverage = stats_n_coverage_primtrim.depth
String samtools_version = stats_n_coverage.samtools_version

String pango_lineage = pangolin2.pangolin_lineage
Float pangolin_aLRT = pangolin2.pangolin_aLRT
String pango_lineage = pangolin2.pangolin_lineage
String pangolin_conflicts = pangolin2.pangolin_conflicts
String pangolin_notes = pangolin2.pangolin_notes
String pangolin_version = pangolin2.version
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_docker = pangolin2.pangolin_docker
Expand Down
52 changes: 0 additions & 52 deletions workflows/wf_titan_clearlabs_local-dev.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,56 +13,4 @@ workflow nCoV19_pipeline {
clear_lab_fastq = sample.right
}
}
output {
Array[String] seq_platform = titan_clearlabs.seq_platform

Array[File] dehosted_reads = titan_clearlabs.dehosted_reads

Array[String] kraken_version = titan_clearlabs.kraken_version
Array[Float] kraken_human = titan_clearlabs.kraken_human
Array[Float] kraken_sc2 = titan_clearlabs.kraken_sc2
Array[String] kraken_report = titan_clearlabs.kraken_report
Array[Float] kraken_human_dehosted = titan_clearlabs.kraken_human_dehosted
Array[Float] kraken_sc2_dehosted = titan_clearlabs.kraken_sc2_dehosted
Array[String] kraken_report_dehosted = titan_clearlabs.kraken_report_dehosted

Array[File] aligned_bam = titan_clearlabs.aligned_bam
Array[File] aligned_bai = titan_clearlabs.aligned_bai
Array[File] variants_from_ref_vcf = titan_clearlabs.variants_from_ref_vcf
Array[String] artic_version = titan_clearlabs.artic_version
Array[File] assembly_fasta = titan_clearlabs.assembly_fasta
Array[Int] number_N = titan_clearlabs.number_N
Array[Int] assembly_length_unambiguous = titan_clearlabs.assembly_length_unambiguous
Array[Int] number_Degenerate = titan_clearlabs.number_Degenerate
Array[Int] number_Total = titan_clearlabs.number_Total
Array[Float] pool1_percent = titan_clearlabs.pool1_percent
Array[Float] pool2_percent = titan_clearlabs.pool2_percent
Array[Float] percent_reference_coverage = titan_clearlabs.percent_reference_coverage
Array[String] assembly_method = titan_clearlabs.assembly_method

Array[File] consensus_stats = titan_clearlabs.consensus_stats
Array[File] consensus_flagstat = titan_clearlabs.consensus_flagstat
Array[Float] meanbaseq_trim = titan_clearlabs.meanbaseq_trim
Array[Float] meanmapq_trim = titan_clearlabs.meanmapq_trim
Array[Float] assembly_mean_coverage = titan_clearlabs.assembly_mean_coverage
Array[String] samtools_version = titan_clearlabs.samtools_version

Array[String] pango_lineage = titan_clearlabs.pango_lineage
Array[Float] pangolin_aLRT = titan_clearlabs.pangolin_aLRT
Array[String] pangolin_version = titan_clearlabs.pangolin_version
Array[File] pango_lineage_report = titan_clearlabs.pango_lineage_report
Array[String] pangolin_docker = titan_clearlabs.pangolin_docker

Array[File] nextclade_json = titan_clearlabs.nextclade_json
Array[File] auspice_json = titan_clearlabs.auspice_json
Array[File] nextclade_tsv = titan_clearlabs.nextclade_tsv
Array[String] nextclade_clade = titan_clearlabs.nextclade_clade
Array[String] nextclade_aa_subs = titan_clearlabs.nextclade_aa_subs
Array[String] nextclade_aa_dels = titan_clearlabs.nextclade_aa_dels
Array[String] nextclade_version = titan_clearlabs.nextclade_version

Array[File] vadr_alerts_list = titan_clearlabs.vadr_alerts_list
Array[Int] vadr_num_alerts = titan_clearlabs.vadr_num_alerts
Array[String] vadr_docker = titan_clearlabs.vadr_docker
}
}
5 changes: 3 additions & 2 deletions workflows/wf_titan_illumina_pe.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ workflow titan_illumina_pe {
File read1_raw
File read2_raw
File primer_bed
String pangolin_docker_image = "staphb/pangolin:2.3.8-pangolearn-2021-04-14"
String pangolin_docker_image = "staphb/pangolin:2.4.2-pangolearn-2021-04-28"

}

Expand Down Expand Up @@ -130,7 +130,8 @@ workflow titan_illumina_pe {
String samtools_version_stats = stats_n_coverage.samtools_version

String pango_lineage = pangolin2.pangolin_lineage
Float pangolin_aLRT = pangolin2.pangolin_aLRT
String pangolin_conflicts = pangolin2.pangolin_conflicts
String pangolin_notes = pangolin2.pangolin_notes
String pangolin_version = pangolin2.version
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_docker = pangolin2.pangolin_docker
Expand Down
42 changes: 0 additions & 42 deletions workflows/wf_titan_illumina_pe_local-dev.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,47 +19,5 @@ workflow nCoV19_pipeline {
read1_raw = sample.right.left,
read2_raw = sample.right.right
}

call summary.sample_metrics {
input:
samplename = sample.left[0],
submission_id = sample.left[1],
collection_date = sample.left[2],
pangolin_lineage = titan_illumina_pe.pango_lineage,
pangolin_aLRT = titan_illumina_pe.pangolin_aLRT,
nextclade_clade = titan_illumina_pe.nextclade_clade,
nextclade_aa_subs = titan_illumina_pe.nextclade_aa_subs,
nextclade_aa_dels = titan_illumina_pe.nextclade_aa_dels,
fastqc_raw_pairs = titan_illumina_pe.fastqc_raw_pairs,
kraken_human = titan_illumina_pe.kraken_human,
kraken_sc2 = titan_illumina_pe.kraken_sc2,
number_N = titan_illumina_pe.number_N,
number_ATCG = titan_illumina_pe.assembly_length_unambiguous,
number_Degenerate = titan_illumina_pe.number_Degenerate,
number_Total = titan_illumina_pe.number_Total,
meanbaseq_trim = titan_illumina_pe.meanbaseq_trim,
meanmapq_trim = titan_illumina_pe.meanmapq_trim,
coverage_trim = titan_illumina_pe.percent_reference_coverage,
depth_trim = titan_illumina_pe.assembly_mean_coverage,
}

}

call summary.merge_metrics {
input:
all_metrics = sample_metrics.single_metrics
}

output {
Array[File] read1_clean = titan_illumina_pe.read1_clean
Array[File] read2_clean = titan_illumina_pe.read2_clean
Array[File] kraken_report = titan_illumina_pe.kraken_report
Array[File] trim_sorted_bam = titan_illumina_pe.aligned_bam
Array[File] trim_sorted_bai = titan_illumina_pe.aligned_bai
Array[File] consensus_seq = titan_illumina_pe.assembly_fasta
Array[File] samtools_stats = titan_illumina_pe.consensus_stats
Array[File] samtools_flagstat = titan_illumina_pe.consensus_flagstat
File merged_metrics = merge_metrics.run_results

}
}
7 changes: 4 additions & 3 deletions workflows/wf_titan_illumina_se.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ workflow titan_illumina_se {
String seq_method="Illumina paired-end"
File read1_raw
File primer_bed
String pangolin_docker_image = "staphb/pangolin:2.3.8-pangolearn-2021-04-14"
String pangolin_docker_image = "staphb/pangolin:2.4.2-pangolearn-2021-04-28"
}

call read_qc.read_QC_trim {
Expand Down Expand Up @@ -123,8 +123,9 @@ workflow titan_illumina_se {
Float assembly_mean_coverage = stats_n_coverage_primtrim.depth
String samtools_version_stats = stats_n_coverage.samtools_version

String pango_lineage = pangolin2.pangolin_lineage
Float pangolin_aLRT = pangolin2.pangolin_aLRT
String pango_lineage = pangolin2.pangolin_lineage
String pangolin_conflicts = pangolin2.pangolin_conflicts
String pangolin_notes = pangolin2.pangolin_notes
String pangolin_version = pangolin2.version
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_docker = pangolin2.pangolin_docker
Expand Down
42 changes: 0 additions & 42 deletions workflows/wf_titan_illumina_se_local-dev.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,54 +10,12 @@ input {
Array[Pair[Array[String], File]] inputSamples
File primer_bed
}

scatter (sample in inputSamples) {
call assembly.titan_illumina_se {
input:
samplename = sample.left[0],
primer_bed = primer_bed,
read1_raw = sample.right
}

call summary.sample_metrics {
input:
samplename = sample.left[0],
submission_id = sample.left[1],
collection_date = sample.left[2],
pangolin_lineage = titan_illumina_se.pango_lineage,
pangolin_aLRT = titan_illumina_se.pangolin_aLRT,
nextclade_clade = titan_illumina_se.nextclade_clade,
nextclade_aa_subs = titan_illumina_se.nextclade_aa_subs,
nextclade_aa_dels = titan_illumina_se.nextclade_aa_dels,
fastqc_raw_pairs = titan_illumina_se.fastqc_number_reads,
kraken_human = titan_illumina_se.kraken_human,
kraken_sc2 = titan_illumina_se.kraken_sc2,
number_N = titan_illumina_se.number_N,
number_ATCG = titan_illumina_se.assembly_length_unambiguous,
number_Degenerate = titan_illumina_se.number_Degenerate,
number_Total = titan_illumina_se.number_Total,
meanbaseq_trim = titan_illumina_se.meanbaseq_trim,
meanmapq_trim = titan_illumina_se.meanmapq_trim,
coverage_trim = titan_illumina_se.percent_reference_coverage,
depth_trim = titan_illumina_se.assembly_mean_coverage,
}

}

call summary.merge_metrics {
input:
all_metrics = sample_metrics.single_metrics
}

output {
Array[File] read1_clean = titan_illumina_se.read1_clean
Array[File] kraken_report = titan_illumina_se.kraken_report
Array[File] trim_sorted_bam = titan_illumina_se.aligned_bam
Array[File] trim_sorted_bai = titan_illumina_se.aligned_bai
Array[File] consensus_seq = titan_illumina_se.assembly_fasta
Array[File] samtools_stats = titan_illumina_se.consensus_stats
Array[File] samtools_flagstat = titan_illumina_se.consensus_flagstat
File merged_metrics = merge_metrics.run_results

}
}
7 changes: 4 additions & 3 deletions workflows/wf_titan_ont.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ workflow titan_ont {
String? artic_primer_version = "V3"
File demultiplexed_reads
Int? normalise = 200
String pangolin_docker_image = "staphb/pangolin:2.3.8-pangolearn-2021-04-14"
String pangolin_docker_image = "staphb/pangolin:2.4.2-pangolearn-2021-04-28"
}
call qc_utils.fastqc_se as fastqc_se_raw {
input:
Expand Down Expand Up @@ -109,9 +109,10 @@ workflow titan_ont {
String samtools_version = stats_n_coverage.samtools_version

String pango_lineage = pangolin2.pangolin_lineage
Float pangolin_aLRT = pangolin2.pangolin_aLRT
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_conflicts = pangolin2.pangolin_conflicts
String pangolin_notes = pangolin2.pangolin_notes
String pangolin_version = pangolin2.version
File pango_lineage_report = pangolin2.pango_lineage_report
String pangolin_docker = pangolin2.pangolin_docker

File nextclade_json = nextclade_one_sample.nextclade_json
Expand Down
16 changes: 16 additions & 0 deletions workflows/wf_titan_ont_local-dev.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
version 1.0

import "wf_titan_clearlabs.wdl" as assembly

workflow nCoV19_pipeline {
input {
Array[Pair[Array[String], File]] inputSamples
}
scatter (sample in inputSamples) {
call assembly.titan_clearlabs {
input:
samplename = sample.left[0],
clear_lab_fastq = sample.right
}
}
}

0 comments on commit f09aa09

Please sign in to comment.