-
Notifications
You must be signed in to change notification settings - Fork 0
/
DenovoGearPipeline.wdl
162 lines (132 loc) · 4.58 KB
/
DenovoGearPipeline.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import "DenovoGearPostProcessing.wdl" as DenovoGear_post
workflow DenovoGearPipeline {
File father_bam
File father_bam_bai
File mother_bam
File mother_bam_bai
File child_bam
File child_bam_bai
File reference
File reference_fai
File reference_dict
Array[Int] chromosome_ids
Array[String] chromosomes = prefix("chr", chromosome_ids)
File ped_file
File python_file
## Call samtools mpileup and DenovoGear caller tasks per chromosome as scatter parallelism.
scatter (chromosome in chromosomes) {
## Call samtools mpileup task to generate bcf mpileup
call SamtoolsMpileup {
input:
chromosome=chromosome,
reference=reference,
reference_fai=reference_fai,
father_bam=father_bam,
father_bam_bai=father_bam_bai,
mother_bam=mother_bam,
mother_bam_bai=mother_bam_bai,
child_bam=child_bam,
child_bam_bai=child_bam_bai
}
## Call DenovoGear for each chromosome.
call DenovoGearCaller {
input:
chromosome=chromosome,
ped_file=ped_file,
mpileup_file=SamtoolsMpileup.mpileup_file
}
}
## Call CombineDenovoGearOutput task which combines all the output files from DenovoGear.
call DenovoGear_post.CombineDenovoGearOutput as CombineDenovoGearOutput {
input:
DNGOutputFiles=DenovoGearCaller.dng_out
}
## Call NumericGenotype to binarize genotype values of variants.
call DenovoGear_post.NumericGenotype as NumericGenotype {
input:
python_file=python_file,
DNG_file=CombineDenovoGearOutput.CombinedDNGOutput
}
## Call SelectDNMGenotype to select/filter variants with de novo mutation genotype.
call DenovoGear_post.SelectDNMGenotype as SelectDNMGenotype {
input:
Numeric_Genotype_input=NumericGenotype.DenovoGear_NumericGenotype_output
}
## Call SplitSnpIndel to separate files from SNP and INDELs.
call DenovoGear_post.SplitSnpIndel as SplitSnpIndel {
input:
Combined_DNG_Numeric_Genotype_file=SelectDNMGenotype.DenovoGear_DNM_Genotype_output
}
## Call ListOfDNMs to generate list of de novo mutations.
call DenovoGear_post.ListOfDNMs as ListOfDNMs {
input:
DenovoGear_DNMs_file=SelectDNMGenotype.DenovoGear_DNM_Genotype_output,
DenovoGear_snp_file=SplitSnpIndel.DenovoGear_snp_file,
DenovoGear_indel_file=SplitSnpIndel.DenovoGear_indel_file
}
output {
File CombinedDNGOutput = CombineDenovoGearOutput.CombinedDNGOutput
File DenovoGear_NumericGenotype_output = NumericGenotype.DenovoGear_NumericGenotype_output
File DenovoGear_DNM_Genotype_output = SelectDNMGenotype.DenovoGear_DNM_Genotype_output
File DenovoGear_snp_file = SplitSnpIndel.DenovoGear_snp_file
File DenovoGear_indel_file = SplitSnpIndel.DenovoGear_indel_file
File DenovoGear_DNMs_file_output = ListOfDNMs.DenovoGear_DNMs_file_output
File DenovoGear_list_of_snps_output = ListOfDNMs.DenovoGear_list_of_snps_output
File DenovoGear_list_of_indels_output = ListOfDNMs.DenovoGear_list_of_indels_output
}
}
## This is the pre-processing step required for DenovoGear caller.
## mpileup files are generated by samtools command. It is run for each chromosome.
## The task requires trio BAM files, reference files and chromosome name.
## Output bcf mpileup file is generated for each chromosome.
task SamtoolsMpileup {
String chromosome
File reference
File reference_fai
File father_bam
File father_bam_bai
File mother_bam
File mother_bam_bai
File child_bam
File child_bam_bai
runtime {
docker: "biocontainers/samtools:v1.3.1_cv4"
memory: "8GB"
cpu: 2
disks: "local-disk"
maxRetries: 3
}
command {
samtools mpileup \
-r ${chromosome} \
-t DP \
-gf ${reference} \
${child_bam} ${father_bam} ${mother_bam} \
-o ${chromosome}.mpileup.bcf
}
output {
File mpileup_file = "${chromosome}.mpileup.bcf"
}
}
## This task runs DenovoGear caller per chromosome.
## It takes bcf mpileup generated from SamtoolsMpileup task and chromosome name.
## It generates VCF file for snp and indels separately for each chromosome.
task DenovoGearCaller {
String chromosome
File ped_file
File mpileup_file
runtime {
docker: "mictro/vccri-denovogear:1.1.1-290-gce84763"
memory: "32GB"
cpu: 2
disks: "local-disk"
maxRetries: 3
}
command {
/usr/local/denovo/dng/bin/dng dnm auto --ped ${ped_file} --bcf ${mpileup_file} --write ${chromosome}_dng.vcf > ${chromosome}_dng.out
}
output {
File dng_vcf = "${chromosome}_dng.vcf"
File dng_out = "${chromosome}_dng.out"
}
}