-
Notifications
You must be signed in to change notification settings - Fork 17
/
poreCov.nf
executable file
·676 lines (582 loc) · 33.3 KB
/
poreCov.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
/*
* Nextflow -- SARS-CoV-2 Analysis Pipeline
* Author: [email protected]
*/
/**************************
* HELP messages & checks
**************************/
header()
/*
Nextflow version check
Format is this: XX.YY.ZZ (e.g. 20.07.1)
change below
*/
XX = "21"
YY = "04"
ZZ = "0"
if ( nextflow.version.toString().tokenize('.')[0].toInteger() < XX.toInteger() ) {
println "\033[0;33mporeCov requires at least Nextflow version " + XX + "." + YY + "." + ZZ + " -- You are using version $nextflow.version\u001B[0m"
exit 1
}
else if ( nextflow.version.toString().tokenize('.')[1].toInteger() == XX.toInteger() && nextflow.version.toString().tokenize('.')[1].toInteger() < YY.toInteger() ) {
println "\033[0;33mporeCov requires at least Nextflow version " + XX + "." + YY + "." + ZZ + " -- You are using version $nextflow.version\u001B[0m"
exit 1
}
/*
try to check for poreCov releases
*/
static boolean netIsAvailable() {
try {
final URL url = new URL("https://api.github.com/repos/replikation/poreCov/releases/latest");
final URLConnection conn = url.openConnection();
conn.connect();
conn.getInputStream().close();
return true;
} catch (MalformedURLException e) {
return false;
} catch (IOException e) {
return false;
}
}
def gitcheck = netIsAvailable()
if ( gitcheck.toString() == "true" ) { porecovrelease = 'https://api.github.com/repos/replikation/poreCov/releases/latest'.toURL().text.split('"tag_name":"')[1].split('","')[0] }
if ( gitcheck.toString() == "false" ) { porecovrelease = 'Could not get version info' }
println " "
println " Latest available poreCov release: " + porecovrelease
println " If neccessary update via: nextflow pull replikation/poreCov"
println "________________________________________________________________________________"
// Log infos based on user inputs
if ( params.help ) { exit 0, helpMSG() }
// profile helps
if ( workflow.profile == 'standard' ) { exit 1, "NO EXECUTION PROFILE SELECTED, use e.g. [-profile local,docker]" }
if (params.profile) { exit 1, "--profile is WRONG use -profile" }
if (
workflow.profile.contains('singularity') ||
workflow.profile.contains('nanozoo') ||
workflow.profile.contains('ukj_cloud') ||
workflow.profile.contains('stub') ||
workflow.profile.contains('docker')
) { "engine selected" }
else { println "No engine selected: -profile EXECUTER,ENGINE"
println "using native installations" }
if (
workflow.profile.contains('nanozoo') ||
workflow.profile.contains('ukj_cloud') ||
workflow.profile.contains('local') ||
workflow.profile.contains('stub') ||
workflow.profile.contains('slurm')
) { "executer selected" }
else { exit 1, "No executer selected: -profile EXECUTER,ENGINE" }
if (workflow.profile.contains('local')) {
println "\033[2m Using $params.cores/$params.max_cores CPU threads per process for a local run [--max_cores]\u001B[0m"
println " "
}
if ( workflow.profile.contains('singularity') ) {
println ""
println "\033[0;33mWARNING: Singularity image building sometimes fails!"
println "Multiple resumes (-resume) and --max_cores 1 --cores 1 for local execution might help.\033[0m\n"
}
// params help
if (!workflow.profile.contains('test_fastq') && !workflow.profile.contains('test_fast5') && !workflow.profile.contains('test_fasta')) {
if (!params.fasta && !params.fast5 && !params.fastq && !params.fastq_pass ) {
exit 1, "input missing, use [--fasta] [--fastq] [--fastq_pass] or [--fast5]" }
if ( params.fastq && params.fastq_pass ) { exit 1, "Please use either: [--fastq] or [--fastq_pass]"}
if ( params.fasta && ( params.fastq || params.fast5 || params.fastq_pass)) { exit 1, "Please use [--fasta] without inputs like: [--fastq], [--fastq_pass], [--fast5]" }
if (( params.fastq || params.fastq_pass ) && params.fast5 && !params.nanopolish ) {
exit 1, "Simultaneous fastq and fast5 input is only supported with [--nanopolish]"}
}
if ( (params.cores.toInteger() > params.max_cores.toInteger()) && workflow.profile.contains('local')) {
exit 1, "More cores (--cores $params.cores) specified than available (--max_cores $params.max_cores)" }
if ( params.single && params.samples ) { exit 1, "Sample input [--samples] not supported for [--single]" }
// check that input params are used as such
if (params.fasta == true) { exit 5, "Please provide a fasta file via [--fasta]" }
if (params.fastq == true) { exit 5, "Please provide a fastq files (one per sample) via [--fastq]" }
if (params.fastq_pass == true) { exit 5, "Please provide a fastq_pass dir via [--fastq_pass]" }
if (params.fast5 == true) { exit 5, "Please provide a fast5 dir via [--fast5]" }
if (params.minLength && !params.minLength.toString().matches("[0-9]+")) { exit 5, "Please provide an integer number (e.g. 300) as minimal read length via [--minLength]" }
if (params.maxLength && !params.maxLength.toString().matches("[0-9]+")) { exit 5, "Please provide an integer number (e.g. 300) as maximum read length via [--maxLength]" }
if (params.nanopolish == true && (params.fastq || params.fastq_pass) ) { exit 5, "Please provide sequencing_summary.txt via [--nanopolish]" }
if (!workflow.profile.contains('test_fast5')) { if (params.nanopolish && !params.fast5 ) { exit 5, "Please provide a fast5 dir for nanopolish [--fast5]" } }
// check correct usage of param-flags
if (params.extended && !params.samples ) { exit 5, "When using --extended you need to specify also a sample.csv via [--samples]" }
if (!params.freyja == true && !params.freyja == false) {exit 5, "Please provide no input to [--freyja]"}
if (!params.lcs == true && !params.lcs == false) {exit 5, "Please provide no input to [--lcs]"}
if (params.screen_reads && !params.lcs && !params.freyja) {exit 5, "When using [--screen_reads] you also need to use at least one: [--freyja] or [--lcs]"}
if (!params.screen_reads && params.lcs) {exit 5, "[--lcs] requires [--screen_reads] to work"}
if (!params.screen_reads && params.freyja) {exit 5, "[--freyja] requires [--screen_reads] to work"}
// validating sample table
if (params.samples) {
// check that the rows _id and Status can be found
// checks afterwards that no fields are empty
Channel.fromPath( params.samples, checkIfExists: true)
.splitCsv(header: false, sep: ',')
.take( 1 )
.map { row ->
if ( !("_id" in row) ) { exit 6, "The column '_id' was not found in $params.samples, hidden symbols? Use a editor to generate the csv file" }
if ( !("Status" in row) ) { exit 6, "The column 'Status' was not found in $params.samples" }
}
.mix(
Channel.fromPath( params.samples, checkIfExists: true)
.splitCsv(header: true, sep: ',')
.map { row ->
if (!row.'Status') { exit 6, "A Status field appears to be empty in the file $params.samples" }
if (!row.'_id') { exit 6, "A _id field appears to be empty in the file $params.samples"}
}
)
}
/**************************
* INPUTs
**************************/
// fasta input
if (!params.list && params.fasta && !workflow.profile.contains('test_fasta')) {
fasta_input_raw_ch = Channel
.fromPath( params.fasta, checkIfExists: true)
}
else if (params.list && params.fasta && !workflow.profile.contains('test_fasta')) {
fasta_input_raw_ch = Channel
.fromPath( params.fasta, checkIfExists: true )
.splitCsv()
.map { row -> file("${row[1]}", checkIfExists: true) }
}
// consensus qc reference input - auto using git default if not specified
if (params.reference_for_qc) {
reference_for_qc_input_ch = Channel
.fromPath( params.reference_for_qc, checkIfExists: true)
}
else if (!params.reference_for_qc) {
reference_for_qc_input_ch = Channel
.fromPath(workflow.projectDir + "/data/reference_nCov19/NC_045512.2.fasta")
}
// fastq input or via csv file
if (params.fastq && params.list && !workflow.profile.contains('test_fastq')) {
fastq_file_ch = Channel
.fromPath( params.fastq, checkIfExists: true )
.splitCsv()
.map { row -> ["${row[0]}", file("${row[1]}", checkIfExists: true)] }
}
else if (params.fastq && !workflow.profile.contains('test_fastq')) {
fastq_file_ch = Channel
.fromPath( params.fastq, checkIfExists: true)
.map { file -> tuple(file.simpleName, file) }
}
// fastq raw input direct from basecalling
if (params.fastq_pass && params.list && !workflow.profile.contains('test_fastq')) {
fastq_dir_ch = Channel
.fromPath( params.fastq_pass, checkIfExists: true )
.splitCsv()
.map { row -> ["${row[0]}", file("${row[1]}", checkIfExists: true, type: 'dir')] }
}
else if (params.fastq_pass && !workflow.profile.contains('test_fastq')) {
fastq_dir_ch = Channel
.fromPath( params.fastq_pass, checkIfExists: true, type: 'dir')
.map { file -> tuple(file.simpleName, file) }
}
// dir input
if (params.fast5 && !workflow.profile.contains('test_fast5')) { dir_input_ch = Channel
.fromPath( params.fast5, checkIfExists: true, type: 'dir')
.map { file -> tuple(file.name, file) }
}
// samples input
if (params.samples) {
samples_input_ch = Channel.fromPath( params.samples, checkIfExists: true)
.splitCsv(header: true, sep: ',')
.map { row -> tuple ("barcode${row.Status[-2..-1]}", "${row._id.replace( " ", "")}")}
samples_file_ch = Channel.fromPath( params.samples, checkIfExists: true)
.splitCsv(header: true, sep: ',')
.collectFile(seed: '_id,Status\n') {
row -> [ "input.csv", row.'_id'.replace( " ", "") + ',' + row.'Status'.replace( " ", "") + '\n']
}
}
else { samples_file_ch = Channel.from( ['deactivated'] ) }
// extended input
if (params.samples && params.extended) {
extended_input_ch = Channel.fromPath( params.samples, checkIfExists: true)
.splitCsv(header: true, sep: ',')
.collectFile() {
row -> [ "extended.csv", row.'_id'.replace( " ", "") + ',' + row.'Submitting_Lab' + ',' + row.'Isolation_Date' + ',' +
row.'Seq_Reason' + ',' + row.'Sample_Type'.replace( " ", "") + '\n']
}
}
else { extended_input_ch = Channel.from( ['deactivated', 'deactivated'] ) }
/**************************
* Automatic Pangolin version updates, with fail save
**************************/
static boolean DockernetIsAvailable() {
try {
final URL url = new URL("https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/");
final URLConnection conn = url.openConnection();
conn.connect();
conn.getInputStream().close();
return true;
} catch (MalformedURLException e) {
return false;
} catch (IOException e) {
return false;
}
}
def internetcheck = DockernetIsAvailable()
if (params.update) {
println "\033[0;33mWarning: Running --update might not be poreCov compatible!\033[0m"
if ( internetcheck.toString() == "true" ) {
tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/pangolin-v4/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
params.pangolindocker = "nanozoo/pangolin-v4:" + tagname
println "\033[0;32mFound latest pangolin container, using: " + params.pangolindocker + " \033[0m"
tagname = 'https://registry.hub.docker.com/v2/repositories/nanozoo/nextclade3/tags/'.toURL().text.split(',"name":"')[1].split('","')[0]
params.nextcladedocker = "nanozoo/nextclade3:" + tagname
println "\033[0;32mFound latest nextclade3 container, using: " + params.nextcladedocker + " \033[0m"
}
if ( internetcheck.toString() == "false" ) {
println "\033[0;33mCould not find the latest pangolin container, trying: " + params.defaultpangolin + "\033[0m"
params.pangolindocker = params.defaultpangolin
println "\033[0;33mCould not find the latest nextclade3 container, trying: " + params.defaultnextclade + "\033[0m"
params.nextcladedocker = params.defaultnextclade
}
}
else { params.pangolindocker = params.defaultpangolin ; params.nextcladedocker = params.defaultnextclade }
if ( params.screen_reads && params.lcs_ucsc_update ){
if ( internetcheck.toString() == "true" ) {
latest_version = 'https://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/UShER_SARS-CoV-2/public-latest.version.txt'.toURL().text.split('\\(')[1].split('\\)')[0]
log.info "\033[0;32mFound latest UCSC version, using: " + latest_version + " \033[0m"
params.lcs_ucsc = latest_version
}
if ( internetcheck.toString() == "false" ) {
log.info "\033[0;33mCould not find the latest UCSC version, trying: " + params.lcs_ucsc_version + "\033[0m"
params.lcs_ucsc = params.lcs_ucsc_version
}
} else { params.lcs_ucsc = params.lcs_ucsc_version}
/**************************
* Log-infos
**************************/
defaultMSG()
if ( params.fast5 || workflow.profile.contains('test_fast5') ) { basecalling() }
if (!params.fasta && !workflow.profile.contains('test_fasta')) { read_length() }
rki()
/**************************
* MODULES
**************************/
include { get_fast5 } from './modules/get_fast5_test_data.nf'
include { get_nanopore_fastq } from './modules/get_fastq_test_data.nf'
include { get_fasta } from './modules/get_fasta_test_data.nf'
include { align_to_reference } from './modules/align_to_reference.nf'
include { split_fasta } from './modules/split_fasta.nf'
include { filter_fastq_by_length } from './modules/filter_fastq_by_length.nf'
include { add_alt_allele_ratio_vcf } from './modules/add_alt_allele_ratio_vcf.nf'
/**************************
* Workflows
**************************/
include { artic_ncov_wf; artic_ncov_np_wf } from './workflows/artic_nanopore_nCov19.nf'
include { basecalling_wf } from './workflows/basecalling.nf'
include { collect_fastq_wf } from './workflows/collect_fastq.nf'
include { create_json_entries_wf } from './workflows/create_json_entries.nf'
include { create_summary_report_wf } from './workflows/create_summary_report.nf'
include { determine_lineage_wf } from './workflows/determine_lineage.nf'
include { determine_mutations_wf } from './workflows/determine_mutations.nf'
include { genome_quality_wf } from './workflows/genome_quality.nf'
include { read_classification_wf; read_screening_freyja_wf; read_screening_lsc_wf} from './workflows/read_classification'
include { read_qc_wf } from './workflows/read_qc.nf'
include { rki_report_wf } from './workflows/provide_rki.nf'
/**************************
* MAIN WORKFLOW
**************************/
workflow {
// 0. Test profile data
if ( workflow.profile.contains('test_fast5')) { dir_input_ch = get_fast5().map {it -> ['SARSCoV2', it] } }
if ( workflow.profile.contains('test_fastq')) { fastq_input_raw_ch = get_nanopore_fastq().map {it -> ['SARSCoV2', it] } }
if ( workflow.profile.contains('test_fasta')) { fasta_input_raw_ch = get_fasta() }
// 1. Reconstruct genomes
// fast5
if ( (params.fast5 && !params.fastq && !params.fastq_pass) || workflow.profile.contains('test_fast5')) {
basecalling_wf(dir_input_ch)
// rename barcodes
if (params.samples) {
fastq_from5_ch = basecalling_wf.out[0].join(samples_input_ch).map { it -> tuple(it[2],it[1]) }
reporterrorfast5 = basecalling_wf.out[0].join(samples_input_ch).ifEmpty{ exit 2, "Could not match barcode numbers from $params.samples to the read files, some typo?"}
}
else if (!params.samples) { fastq_from5_ch = basecalling_wf.out[0] }
filtered_reads_ch = filter_fastq_by_length(fastq_from5_ch)
noreadsatall = filtered_reads_ch.ifEmpty{ log.info "\033[0;33mNot enough reads in all samples, please investigate $params.output/$params.readqcdir\033[0m" }
read_classification_wf(filtered_reads_ch)
// use medaka or nanopolish artic reconstruction
if (params.nanopolish) {
artic_ncov_np_wf(filtered_reads_ch, dir_input_ch, basecalling_wf.out[1], artic_ncov_np_wf)
fasta_input_ch = artic_ncov_np_wf.out.assembly
}
else if (!params.nanopolish) {
artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
fasta_input_ch = artic_ncov_wf.out.assembly
// add alternative allele ratio to the VCF
if (params.primerV.toString().contains(".bed")) {
external_primer_schemes = artic_ncov_wf.out.primer_dir
}
else {
external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
}
add_alt_allele_ratio_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes)
}
}
// fastq input via dir and or files
if ( (params.fastq || params.fastq_pass) || workflow.profile.contains('test_fastq')) {
if (params.fastq_pass && !params.fastq) { fastq_input_raw_ch = collect_fastq_wf(fastq_dir_ch) }
if (!params.fastq_pass && params.fastq) { fastq_input_raw_ch = fastq_file_ch }
// rename barcodes based on --samples input.csv
if (params.samples) { fastq_input_ch = fastq_input_raw_ch.join(samples_input_ch).map { it -> tuple(it[2],it[1])}
reporterrorfastq = fastq_input_raw_ch.join(samples_input_ch).ifEmpty{ exit 2, "Could not match barcode numbers from $params.samples to the read files, some typo?"}
}
else if (!params.samples) { fastq_input_ch = fastq_input_raw_ch }
read_qc_wf(fastq_input_ch)
filtered_reads_ch = filter_fastq_by_length(fastq_input_ch)
noreadsatall = filtered_reads_ch.ifEmpty{ log.info "\033[0;33mNot enough reads in all samples, please investigate $params.output/$params.readqcdir\033[0m" }
read_classification_wf(filtered_reads_ch)
// use medaka or nanopolish artic reconstruction
if (params.nanopolish && !params.fast5 ) { exit 3, "Please provide fast5 data for nanopolish via [--fast5]" }
else if (params.nanopolish && params.fast5 && (params.fastq_pass || params.fastq ) ) {
// get sequence summary from nanopolish
sequence_summary_ch = Channel.fromPath( params.nanopolish, checkIfExists: true ).map { file -> tuple(file.name, file) }
external_primer_schemes = Channel.fromPath(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
artic_ncov_np_wf(filtered_reads_ch, dir_input_ch, sequence_summary_ch, artic_ncov_np_wf)
fasta_input_ch = artic_ncov_np_wf.out
}
else if (!params.nanopolish) {
artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
fasta_input_ch = artic_ncov_wf.out.assembly
// add alternative allele ratio to the VCF
if (params.primerV.toString().contains(".bed")) {
external_primer_schemes = artic_ncov_wf.out.primer_dir
}
else {
external_primer_schemes = file(workflow.projectDir + "/data/external_primer_schemes", checkIfExists: true, type: 'dir' )
}
add_alt_allele_ratio_vcf(artic_ncov_wf.out.trimmed_bam.join(artic_ncov_wf.out.vcf).join(artic_ncov_wf.out.failed_vcf), external_primer_schemes)
}
}
// 2. Genome quality, lineages, clades and mutations
// fasta input
if ( params.fasta || workflow.profile.contains('test_fasta' ) ) {
fasta_input_ch = split_fasta(fasta_input_raw_ch).flatten().map { it -> tuple(it.simpleName, it) }
}
determine_lineage_wf(fasta_input_ch)
determine_mutations_wf(fasta_input_ch)
genome_quality_wf(fasta_input_ch, reference_for_qc_input_ch)
// 3. Specialised outputs (rki, json)
rki_report_wf(genome_quality_wf.out[0], genome_quality_wf.out[1], extended_input_ch)
if (params.samples) {
create_json_entries_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out)
}
// 4. Summary output
if (params.fasta || workflow.profile.contains('test_fasta')) {
taxonomic_read_classification_ch = Channel.from( ['deactivated', 'deactivated', 'deactivated'] ).collect()
alignments_ch = Channel.from( ['deactivated'] )
} else {
taxonomic_read_classification_ch = read_classification_wf.out.kraken
if (params.screen_reads) {
if (params.lcs) {
read_screening_lsc_wf(filtered_reads_ch)
}
if (params.freyja) {
read_screening_freyja_wf(artic_ncov_wf.out.trimmed_bam.map{it -> [it[0], it[1]]}.combine(reference_for_qc_input_ch))
}
}
alignments_ch = align_to_reference(filtered_reads_ch.combine(reference_for_qc_input_ch))
}
if (params.fasta || workflow.profile.contains('test_fasta') || params.nanopolish ) {
alt_allele_ratio_ch = Channel.from( ['deactivated'] )
} else {
alt_allele_ratio_ch = add_alt_allele_ratio_vcf.out.stats
}
/*
if (params.samples) {
samples_table_ch = Channel.fromPath( params.samples, checkIfExists: true)
}
else { samples_table_ch = Channel.from( ['deactivated'] ) }
*/
create_summary_report_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out,
taxonomic_read_classification_ch, alt_allele_ratio_ch, alignments_ch, samples_file_ch)
}
/*************
* --help
*************/
def helpMSG() {
c_green = "\033[0;32m";
c_reset = "\033[0m";
c_yellow = "\033[0;33m";
c_blue = "\033[0;34m";
c_dim = "\033[2m";
log.info """
.
\033[0;33mUsage examples:${c_reset}
nextflow run replikation/poreCov --update --fastq '*.fasta.gz' -r 1.3.0 -profile local,singularity
nextflow run replikation/poreCov --fastq '*.fasta.gz' --fast5 dir/ --nanopolish sequencing_summary.txt -profile local,docker
${c_yellow}Inputs (choose one):${c_reset}
--fast5 one fast5 dir of a nanopore run containing multiple samples (barcoded);
to skip demultiplexing (no barcodes) add the flag [--single]
${c_dim}[Basecalling + Genome reconstruction + Lineage + Reports]${c_reset}
--fastq one fastq or fastq.gz file per sample or
multiple file-samples: --fastq 'sample_*.fastq.gz'
${c_dim}[Genome reconstruction + Lineage + Reports]${c_reset}
--fastq_pass the fastq_pass dir from the (guppy) bascalling
--fastq_pass 'fastq_pass/'
to skip demultiplexing (no barcodes) add the flag [--single]
${c_dim}[Genome reconstruction + Lineage + Reports]${c_reset}
--fasta direct input of genomes - supports multi-fasta file(s) - can be gzip compressed (.gz)
${c_dim}[Lineage + Reports]${c_reset}
${c_yellow}Workflow control (optional)${c_reset}
--artic_normalize Normalise down to moderate coverage to save runtime [default: $params.artic_normalize]
${c_dim}(after mapping and before variant calling in the ARTIC bioinformatics pipeline)
Use `--artic_normalize False` to turn off this normalisation.${c_reset}
--update Always try to use latest pangolin & nextclade release [default: $params.update]
--samples .csv input (header: Status,_id), renames barcodes (Status) by name (_id), e.g.:
Status,_id
barcode01,sample2011XY
BC02,thirdsample_run
--extended poreCov utilizes from --samples these additional headers:
Submitting_Lab,Isolation_Date,Seq_Reason,Sample_Type
--nanopolish use nanopolish instead of medaka for ARTIC (needs --fast5)
to skip basecalling use --fastq or --fastq_pass and provide a sequencing_summary.txt in addition to --fast5
e.g --nanopolish sequencing_summary.txt
--screen_reads Determines the Pangolineage of each individual read (takes time, needs --freyja and/or --lcs)
--scorpio Skip Scorpio in pangolin run [default: $params.scorpio]
${c_dim}From pangolin version 4, Scorpio overwrites Usher results which leads to many unassigned samples
Can be turned on with --scorpio${c_reset}
${c_yellow}Parameters - Lineage detection on reads (see screen_reads, optional)${c_reset}
--freyja activate read-screening via freyja
--freyja_update update freyja's barcode-db prior to running
--lcs activate read-screening via lcs
--lcs_ucsc_version Create marker table based on a specific UCSC SARS-CoV-2 tree (e.g. '2022-05-01'). Use 'predefined'
to use the marker table from the repo (most probably not up-to-date) [default: $params.lcs_ucsc_version]
${c_dim}See https://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/UShER_SARS-CoV-2 for available trees.${c_reset}
--lcs_ucsc_update Use latest UCSC SARS-CoV-2 tree for marker table update. Overwrites --lcs_ucsc_version [default: $params.lcs_ucsc_update]
${c_dim}Automatically checks https://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/UShER_SARS-CoV-2/public-latest.version.txt${c_reset}
--lcs_ucsc_downsampling Downsample sequences when updating marker table to save resources. Use 'None' to turn off [default: $params.lcs_ucsc_downsampling]
${c_dim}Attention! Updating without downsampling needs a lot of resources in terms of memory and might fail.
Consider downsampling or increase the memory for this process.${c_reset}
--lcs_variant_groups Provide path to custom variant groups table (TSV) for marker table update (requires --lcs_ucsc_update). Use 'default'
for predefined groups from repo (https://github.com/rki-mf1/LCS/blob/master/data/variant_groups.tsv) [default: $params.lcs_variant_groups]
--lcs_cutoff Plot linages above this threshold [default: $params.lcs_cutoff]
${c_yellow}Parameters - Basecalling (optional)${c_reset}
--localguppy use a native installation of guppy instead of a gpu-docker or gpu_singularity
--guppy_cpu use cpus instead of gpus for basecalling
--one_end removes the recommended "--require_barcodes_both_ends" from guppy demultiplexing
try this if to many barcodes are unclassified (beware - results might not be trustworthy)
--guppy_model guppy basecalling model [default: ${params.guppy_model}]
e.g. "dna_r9.4.1_450bps_hac.cfg" or "dna_r9.4.1_450bps_sup.cfg"
${c_yellow}Parameters - SARS-CoV-2 genome reconstruction (optional)${c_reset}
--primerV Supported primer variants or primer bed files - choose one [default: ${params.primerV}]
${c_dim}ARTIC:${c_reset} V1, V2, V3, V4, V4.1, V.5, V.5.1, V.5.3.2_400
${c_dim}NEB:${c_reset} VarSkipV1a, VarSkipV2, VarSkipV2b
${c_dim}Other:${c_reset} V1200, V5.2.0_1200 ${c_dim}(also known as midnight)${c_reset}
${c_dim}Primer bed file:${c_reset} e.g. primers.bed ${c_dim}See Readme for more help${c_reset}
--rapid rapid-barcoding-kit was used [default: ${params.rapid}]
--minLength min length filter raw reads [default: 100]
--maxLength max length filter raw reads [default: 700 (primer-scheme: V1-4, rapid); 1500 (primer-scheme: V1200, V5.2.0_1200)]
--min_depth nucleotides below min depth will be masked to "N" [default ${params.min_depth}]
--medaka_model medaka model for the artic workflow [default: ${params.medaka_model}]
e.g. "r941_min_hac_g507" or "r941_min_sup_g507"
${c_yellow}Parameters - Genome quality control (optional)${c_reset}
--reference_for_qc reference FASTA for consensus qc (optional, wuhan is provided by default)
--seq_threshold global pairwise ACGT sequence identity threshold [default: ${params.seq_threshold}]
--n_threshold consensus sequence N threshold [default: ${params.n_threshold}]
${c_yellow}Options (optional)${c_reset}
--cores amount of cores for a process (local use) [default: $params.cores]
--max_cores max amount of cores for poreCov to use (local use) [default: $params.max_cores]
--memory available memory [default: $params.memory]
--output name of the result folder [default: $params.output]
--cachedir defines the path where singularity images are cached
[default: $params.cachedir]
--krakendb provide a .tar.gz kraken database [default: auto downloads one]
${c_yellow}Execution/Engine profiles (choose executer and engine${c_reset}
poreCov supports profiles to run via different ${c_green}Executers${c_reset} and ${c_blue}Engines${c_reset}
examples:
-profile ${c_green}local${c_reset},${c_blue}docker${c_reset}
-profile ${c_yellow}test_fastq${c_reset},${c_green}slurm${c_reset},${c_blue}singularity${c_reset}
${c_green}Executer${c_reset} (choose one):
local
slurm
${c_blue}Engines${c_reset} (choose one):
docker
singularity
${c_yellow}Input test data${c_reset} (choose one):
test_fasta
test_fastq
test_fast5
Note: The singularity profile automatically passes the following environment variables to the container.
to ensure execution on HPCs: HTTPS_PROXY, HTTP_PROXY, http_proxy, https_proxy, FTP_PROXY, ftp_proxy
""".stripIndent()
}
def header(){
c_green = "\033[0;32m";
c_reset = "\033[0m";
log.info """
________________________________________________________________________________
${c_green}poreCov${c_reset} | A Nextflow SARS-CoV-2 workflow for nanopore data
"""
}
def defaultMSG(){
log.info """
.
\u001B[32mProfile: $workflow.profile\033[0m
\033[2mCurrent User: $workflow.userName
Nextflow-version: $nextflow.version
poreCov-version: $workflow.revision
\u001B[0m
Pathing:
\033[2mWorkdir location [-work-Dir]:
$workflow.workDir
Output dir [--output]:
$params.output
Databases location [--databases]:
$params.databases
Singularity cache dir [--cachedir]:
$params.cachedir
\u001B[1;30m______________________________________\033[0m
Parameters:
\033[2mMedaka model: $params.medaka_model [--medaka_model]
Min depth nucleotide: $params.min_depth [--min_depth]
Latest Pangolin/Nextclade?: $params.update [--update]
CPUs to use: $params.cores [--cores]
Memory in GB: $params.memory [--memory]\u001B[0m
\u001B[1;30m______________________________________\033[0m
""".stripIndent()
}
def basecalling() {
log.info """
Basecalling options:
\033[2mUse local guppy? $params.localguppy [--localguppy]
One end demultiplexing? $params.one_end [--one_end]
Basecalling via CPUs? $params.guppy_cpu [--guppy_cpu]
Basecalling modell: $params.guppy_model [--guppy_model]
Rapid-barcode-kit?: $params.rapid [--rapid]\u001B[0m
\u001B[1;30m______________________________________\033[0m
""".stripIndent()
}
def rki() {
log.info """
RKI output for german DESH upload:
\033[2mOutput stored at: $params.output/$params.rkidir
Min Identity to NC_045512.2: $params.seq_threshold [--seq_threshold]
Min Depth used: $params.min_depth [--min_depth]
Min Depth should be 20 or more for RKI upload
Proportion cutoff N: $params.n_threshold [--n_threshold]\u001B[0m
\u001B[1;30m______________________________________\033[0m
""".stripIndent()
}
def read_length() {
log_msg_read_min_length = params.minLength
log_msg_read_max_length = params.maxLength
if ( params.primerV.matches('V1200') || params.primerV.matches('V5.2.0_1200') ) {
if ( !params.minLength ) { log_msg_read_min_length = 400 }
if ( !params.maxLength ) { log_msg_read_max_length = 1500 }
}
else {
if ( !params.minLength ) { log_msg_read_min_length = 200 }
if ( !params.maxLength ) { log_msg_read_max_length = 700 }
}
if (log_msg_read_max_length < log_msg_read_min_length) {exit 5, "--maxLength ${log_msg_read_max_length} needs to be greater than --minlength ${log_msg_read_min_length}."}
log.info """
Primerscheme: $params.primerV [--primerV]
\033[2mMin read-length set to: $log_msg_read_min_length [--minLength]
Max read-length set to: $log_msg_read_max_length [--maxLength]\u001B[0m
\u001B[1;30m______________________________________\033[0m
""".stripIndent()
}