diff --git a/tools/dorado/dorado_trimming.xml b/tools/dorado/dorado_trimming.xml new file mode 100644 index 00000000..d4c1c778 --- /dev/null +++ b/tools/dorado/dorado_trimming.xml @@ -0,0 +1,102 @@ + + for Oxford Nanopore (ONT) DNA reads + + macros.xml + + + + trimmed.bam + +&& + +dorado summary +trimmed.bam +> summary.tsv + + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `__ basecaller. + +This tool scans existing, basecalled datasets for adapter and/or primer +sequences at either end, and trims any such found sequences. + +**If you have raw (un-basecalled) data, you can trim them during +basecalling with the Dorado tool on Galaxy**. + +Note that if you intend to demultiplex the reads later, trimming +adapters and primers may result in some portions of the flanking regions +of the barcodes being removed, which could interfere with correct +demultiplexing. + +The **Don't trim primers** option can be used to prevent the trimming of +primer sequences. In this case only adapter sequences will be trimmed. + +The output of will always be unaligned records, regardless of whether +the input is aligned/sorted or not. + +Custom primer trimming +---------------------- + +The software automatically searches for primer sequences used in Oxford +Nanopore kits. However, you can specify an alternative set of primer +sequences to search by adding a FASTA file of primer sequences in the +**Custom primer sequences** option. The record names of the sequences do +not matter. Note that if you use this option the normal primer sequences +built-in to the dorado software will not be searched for. + +RNA adapter trimming +-------------------- + +Adapters for RNA002 and RNA004 kits are automatically trimmed during +basecalling. However, unlike in DNA, the RNA adapter cannot be trimmed +post-basecalling. + ]]> + + diff --git a/tools/dorado/test-data/FAL00375_473bf0ed_0.ten_reads.bam b/tools/dorado/test-data/FAL00375_473bf0ed_0.ten_reads.bam new file mode 100644 index 00000000..b1b14a14 Binary files /dev/null and b/tools/dorado/test-data/FAL00375_473bf0ed_0.ten_reads.bam differ diff --git a/tools/dorado/test-data/custom_primers.fasta.gz b/tools/dorado/test-data/custom_primers.fasta.gz new file mode 100644 index 00000000..65368652 Binary files /dev/null and b/tools/dorado/test-data/custom_primers.fasta.gz differ diff --git a/tools/dorado/test-data/dorado_trimming_test1.bam b/tools/dorado/test-data/dorado_trimming_test1.bam new file mode 100644 index 00000000..59090042 Binary files /dev/null and b/tools/dorado/test-data/dorado_trimming_test1.bam differ diff --git a/tools/dorado/test-data/dorado_trimming_test1.tsv b/tools/dorado/test-data/dorado_trimming_test1.tsv new file mode 100644 index 00000000..66bf3e1b --- /dev/null +++ b/tools/dorado/test-data/dorado_trimming_test1.tsv @@ -0,0 +1,11 @@ +filename read_id run_id channel mux start_time duration template_start template_duration sequence_length_template mean_qscore_template barcode +reads.pod5 005cf7ae-4d74-42dd-ab96-9befed842822 473bf0edfc2f8f756173de35db5da9b6f6db4959 473 2 258.872 29.436 259.165 29.1435 14587 5.17895 unclassified +reads.pod5 0066800d-d191-4833-a495-cfe8b925aca0 473bf0edfc2f8f756173de35db5da9b6f6db4959 56 4 359.11 14.0378 359.11 14.0378 5111 9.1072 unclassified +reads.pod5 00777c4b-cbd6-4a79-8647-bbe5f5f3f3bf 473bf0edfc2f8f756173de35db5da9b6f6db4959 300 3 329.733 2.5165 329.766 2.484 1055 12.9149 unclassified +reads.pod5 002f231b-5d37-437f-a027-a2e8b872e73b 473bf0edfc2f8f756173de35db5da9b6f6db4959 118 3 534.745 19.5847 534.745 19.5847 8387 9.88254 unclassified +reads.pod5 000a9728-0a7c-4b64-9791-76bb30b63796 473bf0edfc2f8f756173de35db5da9b6f6db4959 105 4 331.319 3.27625 331.319 3.27625 1175 12.0328 unclassified +reads.pod5 0067486b-9f92-4849-8456-671463e64412 473bf0edfc2f8f756173de35db5da9b6f6db4959 84 4 164.018 2.453 164.018 2.453 885 12.399 unclassified +reads.pod5 009f5efd-de5d-4a7e-9d17-969c3996cbc8 473bf0edfc2f8f756173de35db5da9b6f6db4959 230 2 171.634 3.27475 171.634 3.27475 1232 13.6258 unclassified +reads.pod5 00aeb4ba-e404-49d2-97c5-8fcf22547f81 473bf0edfc2f8f756173de35db5da9b6f6db4959 51 4 297.891 50.1888 297.984 50.0962 19142 8.45178 unclassified +reads.pod5 0072b26f-f37c-4517-afa7-621543ac2187 473bf0edfc2f8f756173de35db5da9b6f6db4959 317 3 111.275 2.31875 111.275 2.31875 929 9.69614 unclassified +reads.pod5 00ad521b-b916-404f-a31d-a657f9aa7756 473bf0edfc2f8f756173de35db5da9b6f6db4959 365 2 566.263 23.6745 566.263 23.6745 10934 12.074 unclassified diff --git a/tools/dorado/test-data/dorado_trimming_test2.bam b/tools/dorado/test-data/dorado_trimming_test2.bam new file mode 100644 index 00000000..ec2e60ab Binary files /dev/null and b/tools/dorado/test-data/dorado_trimming_test2.bam differ diff --git a/tools/dorado/test-data/dorado_trimming_test2.tsv b/tools/dorado/test-data/dorado_trimming_test2.tsv new file mode 100644 index 00000000..52047057 --- /dev/null +++ b/tools/dorado/test-data/dorado_trimming_test2.tsv @@ -0,0 +1,11 @@ +filename read_id run_id channel mux start_time duration template_start template_duration sequence_length_template mean_qscore_template barcode +reads.pod5 00777c4b-cbd6-4a79-8647-bbe5f5f3f3bf 473bf0edfc2f8f756173de35db5da9b6f6db4959 300 3 329.733 2.5165 329.766 2.484 1055 12.9149 unclassified +reads.pod5 0072b26f-f37c-4517-afa7-621543ac2187 473bf0edfc2f8f756173de35db5da9b6f6db4959 317 3 111.275 2.31875 111.275 2.31875 929 9.69614 unclassified +reads.pod5 0067486b-9f92-4849-8456-671463e64412 473bf0edfc2f8f756173de35db5da9b6f6db4959 84 4 164.018 2.453 164.018 2.453 885 12.399 unclassified +reads.pod5 002f231b-5d37-437f-a027-a2e8b872e73b 473bf0edfc2f8f756173de35db5da9b6f6db4959 118 3 534.745 19.5847 534.745 19.5847 8387 9.88254 unclassified +reads.pod5 0066800d-d191-4833-a495-cfe8b925aca0 473bf0edfc2f8f756173de35db5da9b6f6db4959 56 4 359.11 14.0378 359.11 14.0378 5111 9.1072 unclassified +reads.pod5 009f5efd-de5d-4a7e-9d17-969c3996cbc8 473bf0edfc2f8f756173de35db5da9b6f6db4959 230 2 171.634 3.27475 171.634 3.27475 1232 13.6258 unclassified +reads.pod5 005cf7ae-4d74-42dd-ab96-9befed842822 473bf0edfc2f8f756173de35db5da9b6f6db4959 473 2 258.872 29.436 259.165 29.1435 14587 5.17895 unclassified +reads.pod5 000a9728-0a7c-4b64-9791-76bb30b63796 473bf0edfc2f8f756173de35db5da9b6f6db4959 105 4 331.319 3.27625 331.319 3.27625 1175 12.0328 unclassified +reads.pod5 00ad521b-b916-404f-a31d-a657f9aa7756 473bf0edfc2f8f756173de35db5da9b6f6db4959 365 2 566.263 23.6745 566.263 23.6745 10934 12.074 unclassified +reads.pod5 00aeb4ba-e404-49d2-97c5-8fcf22547f81 473bf0edfc2f8f756173de35db5da9b6f6db4959 51 4 297.891 50.1888 297.984 50.0962 19142 8.45178 unclassified diff --git a/tools/dorado/test-data/dorado_trimming_test3.bam b/tools/dorado/test-data/dorado_trimming_test3.bam new file mode 100644 index 00000000..32bf1d09 Binary files /dev/null and b/tools/dorado/test-data/dorado_trimming_test3.bam differ diff --git a/tools/dorado/test-data/dorado_trimming_test3.tsv b/tools/dorado/test-data/dorado_trimming_test3.tsv new file mode 100644 index 00000000..68559e07 --- /dev/null +++ b/tools/dorado/test-data/dorado_trimming_test3.tsv @@ -0,0 +1,2 @@ +filename read_id run_id channel mux start_time duration template_start template_duration sequence_length_template mean_qscore_template barcode + 2f707b6e-0060-4f33-9c92-a1230d26cb21 unknown 0 0 0 0 0 0 421 0 unclassified diff --git a/tools/dorado/test-data/lsk109_single_read.fastqsanger.gz b/tools/dorado/test-data/lsk109_single_read.fastqsanger.gz new file mode 100644 index 00000000..79ae44f1 Binary files /dev/null and b/tools/dorado/test-data/lsk109_single_read.fastqsanger.gz differ