diff --git a/tools/tn93/.shed.yml b/tools/tn93/.shed.yml new file mode 100644 index 00000000000..029becaf122 --- /dev/null +++ b/tools/tn93/.shed.yml @@ -0,0 +1,19 @@ +name: tn93 +owner: iuc +description: Compute distances between sequences +long_description: | + This is a simple program meant to compute pairwise distances between aligned + nucleotide sequences in sequential FASTA format using the Tamura Nei 93 distance +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ +homepage_url: https://github.com/veg/tn93/ +type: unrestricted +categories: + - Sequence Analysis +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Wrapper for the TN-93 tool: {{ tool_name }}" +suite: + name: "suite_tn93" + description: "TN-93 – Transmission Analysis by Distance Clustering" + long_description: | + TN-93 uses the 1993 Tamura-Nei model to deduce pathogen transmission by genetic distance estimation. diff --git a/tools/tn93/macros.xml b/tools/tn93/macros.xml new file mode 100644 index 00000000000..050c77a80a0 --- /dev/null +++ b/tools/tn93/macros.xml @@ -0,0 +1,17 @@ + + + 1.0.4 + + + + @UNPUBLISHED{spond, + author = "Sergei Kosakovsky Pond", + title = "HyPhy: Hypothesis Testing using Phylogenies", + year = "2000", + note = "http://hyphy.org/", + url = "http://hyphy.org/"} + + + + + \ No newline at end of file diff --git a/tools/tn93/readreduce.xml b/tools/tn93/readreduce.xml new file mode 100644 index 00000000000..3da05d5b6f8 --- /dev/null +++ b/tools/tn93/readreduce.xml @@ -0,0 +1,69 @@ + + into clusters with TN-93 + + macros.xml + + + tn93 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/tn93/test-data/readreduce-in1.fa b/tools/tn93/test-data/readreduce-in1.fa new file mode 100644 index 00000000000..093f666672b --- /dev/null +++ b/tools/tn93/test-data/readreduce-in1.fa @@ -0,0 +1,224 @@ +>B_FR_83_HXB2_ACC_K03455_5 +CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC +AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA +GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACCTTC +>B_US_83_RF_ACC_M17451 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT +TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA +AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA +CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT +CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC +GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG +CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA +TAATAGGAGCAGAAACTTTC +>B_US_86_JRFL_ACC_U63632 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA +AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA +ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGG +GAAAGATTCCTAAATTTAAATTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC +>B_US_90_WEAU160_ACC_U21135 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAGAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTTCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTACCCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAAACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +ATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGATTTACCACACC +AGACAAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAA +AGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAAACTCCTTA +GGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAG +CTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAGCTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC +>D_CD_83_ELI_ACC_K03454_7 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGATTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGCTG +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCCTTTACCATATCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATGGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGGACA +AAAATAGAGAAATTAAGAGAACATCTATTGAGGTGGGGATTTACCAGACC +AGATAAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGTCTATAAAACTGCCAGAAAAGGAG +AGCTGGACTGTCAATGATATACAGAACTTAGTGGAGAGATTAAACTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC +ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAGCA +ATTAGCAGAGGCAGTGCAAAGAATATCCACAGAAAGCATAGTGATATGGG +GAAGGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGGCAGAGTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_CD_83_NDK_ACC_M27323 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATATAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGATTTCTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGGCTG +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTC +AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTCCCA +CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAATTAAGAGAACATCTATTGAGGTGGGGATTTACCACACC +AGATAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGGAAATTAAACTGGGC +AAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAACAAGGGG +ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAAAA +ACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_CD_84_84ZR085_ACC_U88822 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGGATACCACATCCTGCAGGATTA +AAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTC +AATTCCCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGACAGCATAGAGCA +AAAATAGAGAAATTAAGAGAACATCTGTTGAGGTGGGGGCTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAGATTCTAAAGGAACCAATGCATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAGAAACAAGGGC +AAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +GTTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGG +GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_UG_94_94UG114_ACC_U88824 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGGATGGA +TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAC +TAATAGAAATTTGTTCAGAACTAGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTTTGGGAAGTTCAACTAGGAATACCACATCCTGCAGGGCTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTC +AGTTCCCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTA +GTACAAACAATGAGACACCAGGAATTAGATATCAGTACAATGTGCTTCCA +CAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAATTAAGGGGACACCTCTTGAAGTGGGGATTTACCACACC +AGACAAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAAATGCCTTA +GGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATACTAAAAGAACCAGTACATGGAGC +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC +AAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAATCTGAAA +ACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGG +GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT +CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCA +TAGTAGGAGCAGAAACTTTC diff --git a/tools/tn93/test-data/readreduce-in2.fa b/tools/tn93/test-data/readreduce-in2.fa new file mode 100644 index 00000000000..e8252b6c779 --- /dev/null +++ b/tools/tn93/test-data/readreduce-in2.fa @@ -0,0 +1,112 @@ +>B_FR_83_HXB2_ACC_K03455_5 +CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC +AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA +GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACCTTC +>B_US_83_RF_ACC_M17451 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT +TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA +AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA +CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT +CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC +GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG +CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA +TAATAGGAGCAGAAACTTTC +>B_US_86_JRFL_ACC_U63632 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA +AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA +ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGG +GAAAGATTCCTAAATTTAAATTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC +>B_US_90_WEAU160_ACC_U21135 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAGAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTTCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTACCCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAAACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +ATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGATTTACCACACC +AGACAAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAA +AGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAAACTCCTTA +GGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAG +CTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAGCTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC \ No newline at end of file diff --git a/tools/tn93/test-data/readreduce-out1.fa b/tools/tn93/test-data/readreduce-out1.fa new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/tn93/test-data/readreduce-out2.fa b/tools/tn93/test-data/readreduce-out2.fa new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/tn93/test-data/tn93-in1.fa b/tools/tn93/test-data/tn93-in1.fa new file mode 100644 index 00000000000..093f666672b --- /dev/null +++ b/tools/tn93/test-data/tn93-in1.fa @@ -0,0 +1,224 @@ +>B_FR_83_HXB2_ACC_K03455_5 +CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC +AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA +GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACCTTC +>B_US_83_RF_ACC_M17451 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT +TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA +AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA +CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT +CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC +GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG +CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA +TAATAGGAGCAGAAACTTTC +>B_US_86_JRFL_ACC_U63632 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA +AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA +ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGG +GAAAGATTCCTAAATTTAAATTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC +>B_US_90_WEAU160_ACC_U21135 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAGAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTTCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTACCCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAAACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +ATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGATTTACCACACC +AGACAAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAA +AGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAAACTCCTTA +GGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAG +CTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAGCTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC +>D_CD_83_ELI_ACC_K03454_7 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGATTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGCTG +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCCTTTACCATATCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATGGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGGACA +AAAATAGAGAAATTAAGAGAACATCTATTGAGGTGGGGATTTACCAGACC +AGATAAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGTCTATAAAACTGCCAGAAAAGGAG +AGCTGGACTGTCAATGATATACAGAACTTAGTGGAGAGATTAAACTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC +ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAGCA +ATTAGCAGAGGCAGTGCAAAGAATATCCACAGAAAGCATAGTGATATGGG +GAAGGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGGCAGAGTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_CD_83_NDK_ACC_M27323 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATATAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGATTTCTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGGCTG +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTC +AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTCCCA +CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAATTAAGAGAACATCTATTGAGGTGGGGATTTACCACACC +AGATAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGGAAATTAAACTGGGC +AAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAACAAGGGG +ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAAAA +ACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_CD_84_84ZR085_ACC_U88822 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGGATACCACATCCTGCAGGATTA +AAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTC +AATTCCCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGACAGCATAGAGCA +AAAATAGAGAAATTAAGAGAACATCTGTTGAGGTGGGGGCTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAGATTCTAAAGGAACCAATGCATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAGAAACAAGGGC +AAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +GTTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGG +GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_UG_94_94UG114_ACC_U88824 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGGATGGA +TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAC +TAATAGAAATTTGTTCAGAACTAGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTTTGGGAAGTTCAACTAGGAATACCACATCCTGCAGGGCTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTC +AGTTCCCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTA +GTACAAACAATGAGACACCAGGAATTAGATATCAGTACAATGTGCTTCCA +CAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAATTAAGGGGACACCTCTTGAAGTGGGGATTTACCACACC +AGACAAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAAATGCCTTA +GGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATACTAAAAGAACCAGTACATGGAGC +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC +AAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAATCTGAAA +ACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGG +GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT +CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCA +TAGTAGGAGCAGAAACTTTC diff --git a/tools/tn93/test-data/tn93-in2-alpha.fa b/tools/tn93/test-data/tn93-in2-alpha.fa new file mode 100644 index 00000000000..e8252b6c779 --- /dev/null +++ b/tools/tn93/test-data/tn93-in2-alpha.fa @@ -0,0 +1,112 @@ +>B_FR_83_HXB2_ACC_K03455_5 +CCCATTAGCCCTATTGAGACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAGATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGC +AAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTA +GAGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGAGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAATATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTGCCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +TAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACCTTC +>B_US_83_RF_ACC_M17451 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAGGAAAAAATAAAAGCAT +TGGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCCAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAGTTAGGAATACCACATCCTGCAGGGTTA +AAAAAGAAGAAATCAGTAACAGTATTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGAGTTCAGGAAGTATACTGCATTTACCATACCTA +GTATAAACAATGAAACACCACGGATTAGATATCAGTACAATGTGCTTCCA +CAAGGGTGGAAAGGATCACCAGCAATATTCCAAAGTAGTATGACAAAAAT +CTTAGAGCCTTTTAAAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAACTGAGAGAACATCTGTTAAAGTGGGGGTTTACCACACC +GGACAAGAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTATGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACAACTAACAAAAGAAGCAGAG +CTAGAACTGGCAGAAAATAGGGAGATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAGCAGGGGC +AAGGCCAATGGACATACCAAATTTATCAAGAGCCATTTAAAAACCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTACAAAAAGTAGCCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAGGCA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTAAAATTGTGGTACCAGTTAGAAAAAGAACCCA +TAATAGGAGCAGAAACTTTC +>B_US_86_JRFL_ACC_U63632 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTCAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGGAAAATTTCAAAAATT +GGGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAGGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAAAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTA +AAAAAGAGAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATAAAGACTTCAGGAAATATACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGGTATCAGTACAATGTGCTTCCG +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCTTTTAGAAAACAAAATCCAGACATAATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAGATAGGGCAGCATAGAGCA +AAAATAGAGGAATTGAGACAACATCTGTTGAGGTGGGGGTTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGACAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGAC +AGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAG +CTAGAACTGGCAGAAAACAGGGAGATTCTAAAAGAGCCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAATTCTGAAA +ACAGGAAAATATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCAATGAAAGCATAGTAATATGGG +GAAAGATTCCTAAATTTAAATTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTACCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC +>B_US_90_WEAU160_ACC_U21135 +CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAGAAAATAAAAGCAT +TAGTAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAATCCATATAATACTCCAGTATTTGCCATAAAGAAAAAAGA +CAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCTTCAGGGTTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTACCCTTAGATGAAGACTTCAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAAACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +ATTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGATTTACCACACC +AGACAAAAAACATCAAAAAGACCCTCCATTCCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAAGCTGCCAGAAAAAGAA +AGTTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTGAATTGGGC +AAGTCAGATTTACGCAGGGATTAAAGTAAAGCAACTATGTAAACTCCTTA +GGGGGACCAAAGCACTAACAGAAATAATACCAATAACAGAAGAAGCAGAG +CTAGAGCTGGCAGAAAACAGGGAAATTCTAAAAGAACCGGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAGCTACAGAAGCAGGGGC +AAGGCCAATGGACATATCAGATTTATCAAGAGCCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAGTGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAACCACAGAAAGCATAGTAATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAAGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAGTTTGT +CAATACCCCTCCCTTAGTGAAATTATGGTATCAGTTAGAGAAAGAACCCA +TAGTAGGAGCAGAAACTTTC \ No newline at end of file diff --git a/tools/tn93/test-data/tn93-in2-beta.fa b/tools/tn93/test-data/tn93-in2-beta.fa new file mode 100644 index 00000000000..70c28bf5c0b --- /dev/null +++ b/tools/tn93/test-data/tn93-in2-beta.fa @@ -0,0 +1,112 @@ +>D_CD_83_ELI_ACC_K03454_7 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGATTTCTGGGAAGTTCAATTAGGAATACCGCATCCTGCAGGGCTG +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTC +AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCCTTTACCATATCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCA +CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATGGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGGACA +AAAATAGAGAAATTAAGAGAACATCTATTGAGGTGGGGATTTACCAGACC +AGATAAAAAACATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGTCTATAAAACTGCCAGAAAAGGAG +AGCTGGACTGTCAATGATATACAGAACTTAGTGGAGAGATTAAACTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAATACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATTTTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC +ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAGCA +ATTAGCAGAGGCAGTGCAAAGAATATCCACAGAAAGCATAGTGATATGGG +GAAGGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGGCAGAGTATTGGCAAGCCACTTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_CD_83_NDK_ACC_M27323 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCATTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGAAATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATATAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACCAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGATTTCTGGGAGGTTCAATTAGGAATACCGCATCCTGCAGGGCTG +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTCTC +AGTTCCCTTAGATGAAGATTTTAGGAAATATACCGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTCCCA +CAGGGATGGAAAGGATCACCGGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAATAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACA +AAAATAGAGGAATTAAGAGAACATCTATTGAGGTGGGGATTTACCACACC +AGATAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAATGGACAGTACAGCCTATAAACCTGCCAGAAAAAGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGGAAATTAAACTGGGC +AAGCCAGATTTATGCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAAGCACTAACAGAAGTAGTACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATTCTAAAAGAACCAGTACATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAACTACAGAAACAAGGGG +ACGGCCAATGGACATACCAAATTTATCAAGAACCATTTAAAAATCTAAAA +ACAGGAAAGTATGCAAGAACGAGGGGTGCCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTGATATGGG +GAAAGACTCCTAAATTTAAACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGATAGAGTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_CD_84_84ZR085_ACC_U88822 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAATGGA +TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAT +TAACAGAAATTTGTACAGATATGGAAAAGGAAGGAAAAATTTCAAGAATT +GGGCCTGAAAATCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTCTGGGAAGTTCAATTAGGGATACCACATCCTGCAGGATTA +AAGAAGAAAAAGTCAATAACAGTACTGGATGTGGGCGATGCATATTTTTC +AATTCCCTTATGTGAAGACTTTAGGAAGTACACTGCATTTACCATACCTA +GTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTACTTCCA +CAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGATAAAAAT +CTTAGAGCCCTTTAGAAAACAAAATCCAGAAGTAGTTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGATTTAGAAATAGGACAGCATAGAGCA +AAAATAGAGAAATTAAGAGAACATCTGTTGAGGTGGGGGCTTACCACACC +AGACAAAAAACATCAGAAAGAACCTCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAGTGGACAGTACAGTCTATAACACTGCCAGAGAAAGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAAGCAATTATGTAAACTCCTTA +GGGGAACCAAGGCACTAACAGAGGTAATACCACTAACAGAAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAGATTCTAAAGGAACCAATGCATGGAGT +GTATTATGACCCATCAAAAGACTTAATAGCAGAATTACAGAAACAAGGGC +AAGGTCAATGGACATATCAAATTTATCAAGAACCATTTAAAAATCTGAAA +ACAGGAAAGTATGCAAGAATGAGGGGTGCCCACACTAATGATGTAAAACA +GTTAACAGAGGCAGTGCAAAAAATAGCCATAGAAAGCATAGTGATATGGG +GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGATAGACTATTGGCAAGCCACCTGGATTCCTGAGTGGGAATTTGT +CAATACCCCTCCTTTAGTAAAATTATGGTACCAGTTAGAGAAGGAACCCA +TAATAGGAGCAGAAACTTTC +>D_UG_94_94UG114_ACC_U88824 +CCAATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGGATGGA +TGGCCCAAAAGTTAAACAATGGCCGTTGACAGAAGAAAAAATAAAAGCAC +TAATAGAAATTTGTTCAGAACTAGAAAAGGAAGGAAAAATTTCAAAAATT +GGGCCTGAAAACCCATACAATACTCCAATATTTGCCATAAAGAAAAAAGA +CAGTACTAAGTGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAA +CTCAAGACTTTTGGGAAGTTCAACTAGGAATACCACATCCTGCAGGGCTA +AAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGACGCATATTTTTC +AGTTCCCTTACATGAAGACTTTAGAAAATATACCGCATTCACCATACCTA +GTACAAACAATGAGACACCAGGAATTAGATATCAGTACAATGTGCTTCCA +CAAGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAAT +CTTAGAACCTTTTAGAAAACAAAATCCAGAAATGATTATCTATCAATACA +TGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAATA +AAAATAGAGGAATTAAGGGGACACCTCTTGAAGTGGGGATTTACCACACC +AGACAAAAAGTATCAGAAAGAACCCCCATTTCTTTGGATGGGTTATGAAC +TCCATCCTGATAAGTGGACAGTACAGCCTATACATCTGCCAGAAAAGGAA +AGCTGGACTGTCAATGATATACAGAAGTTAGTGGGAAAATTAAATTGGGC +AAGCCAGATTTATCCAGGAATTAAAGTAAGACAATTATGCAAATGCCTTA +GGGGAGCCAAAGCACTGACAGAAGTAATACCACTGACAGCAGAAGCAGAA +TTAGAACTGGCAGAAAACAGGGAAATACTAAAAGAACCAGTACATGGAGC +GTATTATGACCCATCAAAAGACTTAATAGCAGAAATACAGAAACAAGGGC +AAGATCAATGGACATATCAAATATATCAAGAACAATATAAAAATCTGAAA +ACAGGAAAGTATGCGAAAATGAGGGGTACCCACACTAATGATGTAAAACA +ATTAACAGAGGCAGTGCAGAAAATAGCCCAAGAATGTATAGTAATATGGG +GAAAGACTCCTAAATTTAGACTACCCATACAAAAGGAAACATGGGAAACA +TGGTGGACAGAGTATTGGCAGGCCACCTGGATTCCTGAGTGGGAGTATGT +CAACACCCCTCCTTTAGTTAAATTATGGTATCAGTTAGAGAAGGAACCCA +TAGTAGGAGCAGAAACTTTC diff --git a/tools/tn93/test-data/tn93-out1.csv b/tools/tn93/test-data/tn93-out1.csv new file mode 100644 index 00000000000..9981b245286 --- /dev/null +++ b/tools/tn93/test-data/tn93-out1.csv @@ -0,0 +1,29 @@ +B_FR_83_HXB2_ACC_K03455_5,B_US_83_RF_ACC_M17451,0.045156 +B_FR_83_HXB2_ACC_K03455_5,B_US_86_JRFL_ACC_U63632,0.0296218 +B_FR_83_HXB2_ACC_K03455_5,B_US_90_WEAU160_ACC_U21135,0.0327566 +B_FR_83_HXB2_ACC_K03455_5,D_CD_83_ELI_ACC_K03454_7,0.0669206 +B_FR_83_HXB2_ACC_K03455_5,D_CD_83_NDK_ACC_M27323,0.0592586 +B_FR_83_HXB2_ACC_K03455_5,D_CD_84_84ZR085_ACC_U88822,0.0663619 +B_FR_83_HXB2_ACC_K03455_5,D_UG_94_94UG114_ACC_U88824,0.0847988 +B_US_83_RF_ACC_M17451,B_US_86_JRFL_ACC_U63632,0.048328 +B_US_83_RF_ACC_M17451,B_US_90_WEAU160_ACC_U21135,0.0515908 +B_US_83_RF_ACC_M17451,D_CD_83_ELI_ACC_K03454_7,0.0810759 +B_US_83_RF_ACC_M17451,D_CD_83_NDK_ACC_M27323,0.0661066 +B_US_83_RF_ACC_M17451,D_CD_84_84ZR085_ACC_U88822,0.0769146 +B_US_83_RF_ACC_M17451,D_UG_94_94UG114_ACC_U88824,0.0955213 +B_US_86_JRFL_ACC_U63632,B_US_90_WEAU160_ACC_U21135,0.0408994 +B_US_86_JRFL_ACC_U63632,D_CD_83_ELI_ACC_K03454_7,0.0771797 +B_US_86_JRFL_ACC_U63632,D_CD_83_NDK_ACC_M27323,0.0609044 +B_US_86_JRFL_ACC_U63632,D_CD_84_84ZR085_ACC_U88822,0.0705011 +B_US_86_JRFL_ACC_U63632,D_UG_94_94UG114_ACC_U88824,0.0882054 +B_US_90_WEAU160_ACC_U21135,D_CD_83_ELI_ACC_K03454_7,0.0771856 +B_US_90_WEAU160_ACC_U21135,D_CD_83_NDK_ACC_M27323,0.0609097 +B_US_90_WEAU160_ACC_U21135,D_CD_84_84ZR085_ACC_U88822,0.0740203 +B_US_90_WEAU160_ACC_U21135,D_UG_94_94UG114_ACC_U88824,0.0890019 +D_CD_83_ELI_ACC_K03454_7,D_CD_83_NDK_ACC_M27323,0.0287246 +D_CD_83_ELI_ACC_K03454_7,D_CD_84_84ZR085_ACC_U88822,0.055948 +D_CD_83_ELI_ACC_K03454_7,D_UG_94_94UG114_ACC_U88824,0.0742033 +D_CD_83_NDK_ACC_M27323,D_CD_84_84ZR085_ACC_U88822,0.0491974 +D_CD_83_NDK_ACC_M27323,D_UG_94_94UG114_ACC_U88824,0.0726626 +D_CD_84_84ZR085_ACC_U88822,D_UG_94_94UG114_ACC_U88824,0.0805088 +ID1,ID2,Distance diff --git a/tools/tn93/test-data/tn93-out2.csv b/tools/tn93/test-data/tn93-out2.csv new file mode 100644 index 00000000000..3459c62c219 --- /dev/null +++ b/tools/tn93/test-data/tn93-out2.csv @@ -0,0 +1,17 @@ +B_FR_83_HXB2_ACC_K03455_5,D_CD_83_ELI_ACC_K03454_7,0.0669206 +B_FR_83_HXB2_ACC_K03455_5,D_CD_83_NDK_ACC_M27323,0.0592586 +B_FR_83_HXB2_ACC_K03455_5,D_CD_84_84ZR085_ACC_U88822,0.0663619 +B_FR_83_HXB2_ACC_K03455_5,D_UG_94_94UG114_ACC_U88824,0.0847988 +B_US_83_RF_ACC_M17451,D_CD_83_ELI_ACC_K03454_7,0.0810759 +B_US_83_RF_ACC_M17451,D_CD_83_NDK_ACC_M27323,0.0661066 +B_US_83_RF_ACC_M17451,D_CD_84_84ZR085_ACC_U88822,0.0769146 +B_US_83_RF_ACC_M17451,D_UG_94_94UG114_ACC_U88824,0.0955213 +B_US_86_JRFL_ACC_U63632,D_CD_83_ELI_ACC_K03454_7,0.0771797 +B_US_86_JRFL_ACC_U63632,D_CD_83_NDK_ACC_M27323,0.0609044 +B_US_86_JRFL_ACC_U63632,D_CD_84_84ZR085_ACC_U88822,0.0705011 +B_US_86_JRFL_ACC_U63632,D_UG_94_94UG114_ACC_U88824,0.0882054 +B_US_90_WEAU160_ACC_U21135,D_CD_83_ELI_ACC_K03454_7,0.0771856 +B_US_90_WEAU160_ACC_U21135,D_CD_83_NDK_ACC_M27323,0.0609097 +B_US_90_WEAU160_ACC_U21135,D_CD_84_84ZR085_ACC_U88822,0.0740203 +B_US_90_WEAU160_ACC_U21135,D_UG_94_94UG114_ACC_U88824,0.0890019 +ID1,ID2,Distance diff --git a/tools/tn93/tn93.xml b/tools/tn93/tn93.xml new file mode 100644 index 00000000000..3fe93a854c0 --- /dev/null +++ b/tools/tn93/tn93.xml @@ -0,0 +1,95 @@ + + compute distances between aligned sequences + + macros.xml + + + tn93 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + seqname:10 + +':' can be replaced with another character using `-d`, and sequences that have no explicit copy number are assumed to be a single copy. Copy numbers +only affect histogram and mean calculations. +]]> + + 10.1093/oxfordjournals.molbev.a040023 + +