forked from porchard/2021-03-sn-muscle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gkmexplain-and-fimo-new-plots.nf
150 lines (102 loc) · 3.06 KB
/
gkmexplain-and-fimo-new-plots.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env nextflow
explain_channel = Channel.fromPath(params.snp_file)
explain_channel_2 = Channel.fromPath(params.snp_file)
model_channel = Channel.fromPath(params.model_glob)
FIMO_BACKGROUND = params.fimo_background
MEME_GLOB = params.meme_glob
PLAIN_MOTIF_GLOB = params.plain_motif_glob
process make_fastas {
container "${params.containers.mkfasta}"
input:
file(snps) from explain_channel
output:
set file("ref.fa"), file("alt.fa") into fastas
file("*.fa") into fastas_for_fimo
"""
make_ref_alt_flanking_fastas.py $snps ${params.fasta['hg19']} --flank_size 50
"""
}
process explain {
container "${params.containers.gkmsvm}"
publishDir "${params.results}/explained"
input:
set file(model), file(fasta) from model_channel.combine(fastas.flatten())
output:
file("${cluster}.${ref_or_alt}.explained.txt") into explain_out
script:
cluster = model.getName().replaceAll('.model.txt', '')
ref_or_alt = fasta.getName().replaceAll('.fa', '')
"""
gkmexplain $fasta $model ${cluster}.${ref_or_alt}.explained.txt
"""
}
process reformat {
publishDir "${params.results}/explained"
container "${params.containers.general}"
executor 'local'
input:
file(explained) from explain_out
output:
file(out) into reformat_out
file(out) into reformat_out_2
script:
out = explained.getName().replaceAll('.explained.txt', '.explained.reformatted.txt')
"""
reformat-importance-scores.py $explained > $out
"""
}
process plot {
publishDir "${params.results}/plot"
executor 'local'
container "${params.containers.rplot}"
input:
file(x) from reformat_out.toSortedList()
output:
file("*.pdf")
"""
plot-gkmexplain.R ${x.join(' ')}
"""
}
process fimo_scan {
publishDir "${params.results}/fimo/scan"
maxForks 50
input:
file(bg) from Channel.fromPath(FIMO_BACKGROUND)
each file(fasta) from fastas_for_fimo.flatten()
each file(motif) from Channel.fromPath(MEME_GLOB)
output:
set val("${ref_or_alt}"), file("${ref_or_alt}.${motif_name}.fimo.txt") into concat_in
script:
ref_or_alt = fasta.getName().replaceAll('.fa', '')
motif_name = motif.getName().replaceAll('.meme', '').replaceAll('::', '_')
"""
fimo --text --bgfile $bg $motif $fasta > ${ref_or_alt}.${motif_name}.fimo.txt
"""
}
process fimo_concat {
publishDir "${params.results}/concat"
executor 'local'
input:
set val(ref_or_alt), file(fimo) from concat_in.groupTuple()
output:
file("${ref_or_alt}.fimo.txt") into fimo_out
"""
cat ${fimo.join(' ')} > ${ref_or_alt}.fimo.txt
"""
}
process plot_fimo {
publishDir "${params.results}/fimo/plot"
errorStrategy 'ignore'
input:
file(fimo) from fimo_out.toSortedList()
file(gkm) from reformat_out_2.toSortedList()
file(plain_motifs) from Channel.fromPath(PLAIN_MOTIF_GLOB).toSortedList()
each snp from explain_channel_2.splitText().map({x -> x.tokenize(' ')[2]})
output:
file("*.pdf")
when:
snp != 'SNP'
"""
plot-fimo-general.py . . $snp .
"""
}