diff --git a/config/auspice_config.json b/config/auspice_config.json index c349466..82cba3f 100644 --- a/config/auspice_config.json +++ b/config/auspice_config.json @@ -14,11 +14,6 @@ "build_url": "https://github.com/nextstrain/rsv", "colorings": [ - { - "key": "genome_clade", - "title": "Genome Clade", - "type": "categorical" - }, { "key": "gt", "title": "Genotype", diff --git a/ingest/bin/extend-metadata.py b/ingest/bin/extend-metadata.py new file mode 100644 index 0000000..05c4414 --- /dev/null +++ b/ingest/bin/extend-metadata.py @@ -0,0 +1,76 @@ +from Bio import SeqIO +import numpy as np +import pandas as pd +from Bio import SeqIO +from collections import defaultdict + +NEXTCLADE_JOIN_COLUMN_NAME = 'seqName' +VALUE_MISSING_DATA = '?' + +column_map = { + "clade": "clade", + "lineage": "lineage", + "coverage": "genome_coverage", + "totalMissing": "missing_data", + "totalSubstitutions": "divergence", + "totalNonACGTNs": "nonACGTN" +} + +coordinates = {'a':{'G':[4652, 5617], 'F':[5697,7421]}, + 'b':{'G':[4646, 5578], 'F':[5676,7400]}} + +def coverage(target, total): + if total[0]>target[1] or total[1]=target[1]: + # total overlap + return 1 + elif total[0]>target[0] and total[1]target[0] and total[1]>target[1]: + # overlap with total to the right of target + return (target[1]-total[0])/(target[1]-target[0]) + else: + # overlap with total to the left of target + return (total[1]-target[0])/(target[1]-target[0]) + + +if __name__=="__main__": + import argparse, sys + parser = argparse.ArgumentParser() + parser.add_argument("--metadata") + parser.add_argument("--nextclade") + parser.add_argument("--id-field") + parser.add_argument("--virus-type") + parser.add_argument("--output", default=sys.stdout) + args = parser.parse_args() + + metadata = pd.read_csv(args.metadata, index_col=args.id_field, + sep='\t', low_memory=False, na_filter = False) + + # Read and rename clade column to be more descriptive + clades = pd.read_csv(args.nextclade, index_col=NEXTCLADE_JOIN_COLUMN_NAME, + sep='\t', low_memory=False, na_filter = False) \ + .rename(columns=column_map) + + # Concatenate on columns + result = pd.merge( + metadata, clades, + left_index=True, + right_index=True, + how='left' + ) + + for gene in coordinates[args.virus_type]: + def get_coverage(d): + try: + return coverage(coordinates[args.virus_type][gene], [int(d.alignmentStart), int(d.alignmentEnd)]) + except: + print('missing alignment for ',d.name) + return np.nan + + result[f"{gene}_coverage"] = result.apply(get_coverage, axis=1) + + result.to_csv(args.output, index_label=args.id_field, sep='\t') diff --git a/ingest/bin/gene-coverage.py b/ingest/bin/gene-coverage.py deleted file mode 100644 index c7e77d8..0000000 --- a/ingest/bin/gene-coverage.py +++ /dev/null @@ -1,71 +0,0 @@ -from Bio import SeqIO -import numpy as np -import pandas as pd -from Bio import SeqIO -from collections import defaultdict -from sort import sequence_to_int_array - - -#this function finds the coverage of the F and G gene for each sequence in each alignment. The genome annotations used are those of the respective alignment reference. - -def G_and_F_coverage(alignment, indices_G, indices_F): - gap_symbol = 45 - missing_symbol = 110 - G_coverage, F_coverage, genome_coverage = {}, {}, {} - for seq in SeqIO.parse(alignment, "fasta"): - seq_array = sequence_to_int_array(seq.seq, fill_gaps=False) - seq_array_G = seq_array[indices_G[0]:indices_G[1]] - seq_array_F = seq_array[indices_F[0]:indices_F[1]] - - F_coverage[seq.id] = np.mean((seq_array_F!=gap_symbol)&(seq_array_F!=missing_symbol)) - G_coverage[seq.id] = np.mean((seq_array_G!=gap_symbol)&(seq_array_G!=missing_symbol)) - genome_coverage[seq.id] = np.mean((seq_array!=gap_symbol)&(seq_array!=missing_symbol)) - - return(G_coverage, F_coverage, genome_coverage) - - -if __name__=="__main__": - alignments = ["data/a/1_sequences.aligned.fasta", "data/a/2_sequences.aligned.fasta", "data/a/3_sequences.aligned.fasta", - "data/b/1_sequences.aligned.fasta", "data/b/2_sequences.aligned.fasta", "data/b/3_sequences.aligned.fasta"] - - start_and_end_G = [ [4682, 5653], [4637,5602], [4633, 5229], [4713, 5645],[4688, 5641], [4615, 5578]] - - start_and_end_F = [[5733, 7457],[5682,7401], [5606, 7349], [5747, 7467], [5718, 7442], [5628, 7529]] - - column = ['a_1', 'a_2', 'a_3', 'b_1', 'b_2', 'b_3'] - - accessions = [pd.read_csv("data/a/1_metadata.tsv", sep='\t').accession, - pd.read_csv("data/a/2_metadata.tsv", sep='\t').accession, - pd.read_csv("data/a/3_metadata.tsv", sep='\t').accession, - pd.read_csv("data/b/1_metadata.tsv", sep='\t').accession, - pd.read_csv("data/b/2_metadata.tsv", sep='\t').accession, - pd.read_csv("data/b/3_metadata.tsv", sep='\t').accession] - - everything = set().union(*accessions) - - # add the G and F coverage into the dataframe - G_coverage, F_coverage, genome_coverage = (defaultdict(list) for i in range(3)) - for filename, G_s, F_s in zip(alignments, start_and_end_G, start_and_end_F): - coverages_G, coverages_F, coverage_genome = G_and_F_coverage(filename, G_s, F_s) - - for accession in everything: - G_coverage[accession].append(coverages_G.get(accession, 0.00)) - F_coverage[accession].append(coverages_F.get(accession, 0.00)) - genome_coverage[accession].append(coverage_genome.get(accession, 0.00)) - - metadata_files = ["data/b/metadata_no_covg.tsv", "data/a/metadata_no_covg.tsv"] - outputs = ["data/b/metadata.tsv", "data/a/metadata.tsv"] - - # this part of the script reads the already separated a and b metadata and adds the F and G covg values to the correct row based on accession - for metadata_fname, output in zip(metadata_files, outputs): - metadata = pd.read_csv(metadata_fname, sep='\t') - F_covg, G_covg, genome_covg = [], [], [] - - for acc in metadata['accession']: - F_covg.append(max(F_coverage[acc])) - G_covg.append(max(G_coverage[acc])) - genome_covg.append(max(genome_coverage[acc])) - - coverages = pd.DataFrame({'F_coverage':F_covg, 'G_coverage': G_covg, 'genome_coverage': genome_covg}) - m = pd.DataFrame(metadata.join(coverages)) - m.to_csv(output, sep='\t', index=False, float_format='%.3f') diff --git a/ingest/bin/metadata_dedup.py b/ingest/bin/metadata_dedup.py deleted file mode 100644 index 5d56985..0000000 --- a/ingest/bin/metadata_dedup.py +++ /dev/null @@ -1,19 +0,0 @@ -import pandas as pd -import argparse - - -def metadata_deduplication(old, new): - df = pd.read_csv(old, sep='\t') - df1 = df.drop_duplicates(subset=['strain'], keep='first') - df1.to_csv(new,sep='\t', index=False) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="make new reference depending on whether the entire genome or only part is to be used for the tree", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("--metadata-original", required=True, help="original metadata file, tsv format") - parser.add_argument("--metadata-output", required=True, help="deduplicated metadata file, tsv format") - args = parser.parse_args() - metadata_deduplication(args.metadata_original, args.metadata_output) diff --git a/ingest/bin/sequencesandmetadata.py b/ingest/bin/sequencesandmetadata.py deleted file mode 100644 index 8d6855e..0000000 --- a/ingest/bin/sequencesandmetadata.py +++ /dev/null @@ -1,31 +0,0 @@ -from Bio import SeqIO -import pandas as pd -import argparse - -def sequencesandmetadata(sortedalignment,allmetadata, allsequences,newmetadata, newsequences): - seq_new, listofid =([] for i in range(2)) - alignment = SeqIO.parse(sortedalignment, "fasta") - - for record in alignment: listofid.append(record.id) - - tsv_file = pd.read_csv(allmetadata, sep="\t") - metadata = pd.DataFrame(data =tsv_file.loc[tsv_file['accession'].isin(listofid)], columns=tsv_file.columns) - metadata.to_csv(newmetadata, sep="\t") - sequences = SeqIO.parse(allsequences, "fasta") - for record in sequences: - if record.id in listofid: seq_new.append(record) - SeqIO.write(seq_new, newsequences,"fasta") - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="write separate files for sequences and metadata for a and b", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument("--sortedalignment", required=True, help="FASTA file of aligned and sorted sequences") - parser.add_argument("--allmetadata", required=True, help="all metadata input file") - parser.add_argument("--allsequences", required=True, help="all sequences input file") - parser.add_argument("--metadata", required=True, help="output metadata file for a or b, tsv") - parser.add_argument("--sequences", required=True, help="output sequences file for a or b, FASTA") - args = parser.parse_args() - - sequencesandmetadata(args.sortedalignment, args.allmetadata, args.allsequences, args.metadata, args.sequences) diff --git a/ingest/bin/sort.py b/ingest/bin/sort.py deleted file mode 100644 index 33e9c30..0000000 --- a/ingest/bin/sort.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -import pandas as pd -from Bio import SeqIO -from collections import defaultdict - -def sequence_to_int_array(s, fill_value=110, fill_gaps=True): - seq = np.frombuffer(str(s).lower().encode('utf-8'), dtype=np.int8).copy() - if fill_gaps: - seq[(seq!=97) & (seq!=99) & (seq!=103) & (seq!=116)] = fill_value - else: - seq[(seq!=97) & (seq!=99) & (seq!=103) & (seq!=116) & (seq!=45)] = fill_value - return seq - -def get_similarity(alignment, reference): - ref_array = sequence_to_int_array(SeqIO.read(reference,"fasta").seq, fill_gaps=False) - similarity = {} - for seq in SeqIO.parse(alignment,"fasta"): - seq_array = sequence_to_int_array(seq.seq, fill_gaps=False) - good_indices = seq_array!=45 - similarity[seq.id] = np.mean(seq_array[good_indices]==ref_array[good_indices]) - - return similarity - -if __name__=="__main__": - dfs, a_list, b_list, a_sequences, b_sequences, listofdictionaries, a_metadata, b_metadata = ([] for i in range(8)) - to_a = ['a_1', 'a_2', 'a_3'] - to_b = ['b_1', 'b_2', 'b_3'] - seq = ["data/a/1_sequences.aligned.fasta", "data/a/2_sequences.aligned.fasta", "data/a/3_sequences.aligned.fasta", - "data/b/1_sequences.aligned.fasta", "data/b/2_sequences.aligned.fasta", "data/b/3_sequences.aligned.fasta"] - references = ["config/a_1_reference.fasta", "config/a_2_reference.fasta", "config/a_3_reference.fasta", - "config/b_1_reference.fasta", "config/b_2_reference.fasta", "config/b_3_reference.fasta"] - accessions = [pd.read_csv("data/a/1_metadata.tsv", sep='\t').accession, - pd.read_csv("data/a/2_metadata.tsv", sep='\t').accession, - pd.read_csv("data/a/3_metadata.tsv", sep='\t').accession, - pd.read_csv("data/b/1_metadata.tsv", sep='\t').accession, - pd.read_csv("data/b/2_metadata.tsv", sep='\t').accession, - pd.read_csv("data/b/3_metadata.tsv", sep='\t').accession] - everything = set().union(*accessions) - - all_similarities = defaultdict(list) - for filename, reference in zip(seq, references): - similarity = get_similarity(filename, reference) - - for accession in everything: - all_similarities[accession].append(similarity.get(accession, 0.001)) - - all_similarities_df = pd.DataFrame(all_similarities, index = to_a + to_b).T - - max_similarity = all_similarities_df.max(axis=1) - - all_similarities_df = all_similarities_df.loc[max_similarity>0.70,:] - - a_or_b = all_similarities_df.idxmax(axis=1).apply(lambda x: 'A' if x[0]=='a' else 'B') - - for record in SeqIO.parse("data/sequences.fasta", "fasta"): - if record.id in a_or_b: - if a_or_b[record.id]=='A': - a_sequences.append(record) - elif a_or_b[record.id]=='B': - b_sequences.append(record) - - SeqIO.write(a_sequences, "data/a/sequences_notdedup.fasta","fasta") - SeqIO.write(b_sequences, "data/b/sequences_notdedup.fasta", "fasta") - - metadata = pd.read_csv("data/metadata.tsv", sep="\t", index_col='accession') - original_columns = metadata.columns - metadata.drop_duplicates(keep='first', inplace=True) - metadata['type'] = a_or_b - - a_metadata = pd.DataFrame(data=metadata.loc[metadata['type']=='A'], - columns=original_columns) - b_metadata = pd.DataFrame(data=metadata.loc[metadata['type']=='B'], - columns=original_columns) - a_metadata.to_csv('data/a/metadata_notdedup.tsv', sep="\t") - b_metadata.to_csv('data/b/metadata_notdedup.tsv', sep="\t") diff --git a/ingest/config/a_1_reference.fasta b/ingest/config/a_1_reference.fasta deleted file mode 100644 index 7203bdb..0000000 --- a/ingest/config/a_1_reference.fasta +++ /dev/null @@ -1,221 +0,0 @@ ->MZ515804.1 Human respiratory syncytial virus A isolate SE02-0021-D02, complete genome -ACGCGAAAAAATGCGTACAACAAACTTGCGTAAACCAAAAAAATGGGGCAAATAAGAATTTGATAAGTAC -CACTTAAATTTAACTCCTTTGGTTTAAGATGGACAGCAACTCATTGAGCATGATAAAAGTTAGATTGCAA -AATCTGTTTGACAATGATGAAGTAGCATTGTTAAAAATAACATGTTATACTGACAAATTAATACAGTTAA -CCAATGCTTTGGCTAAGGCAGTTATACATACAATCAAATTGAATGGCATTGTATTTGTGCATGTTATTAC -AAGTAGTGATATTTGCCCTAATAATAATATTGTAGTGAAATCCAATTTCACAACAATGCCAGTATTACAA -AATGGAGGTTATATATGGGAAATGATGGAATTAACACACTGCTCTCAACCTAATGGCCTAATAGATGACA -ATTGTGAAATTAAATTCTCCAAAAAACTAAGTGATTCAACAATGACCAATTATATGAATCAATTATCTGA -ATTACTTGGATTTGACCTCAATCCATAAATCATAATAAATATCAACTAGCAAATCAATGTCACTAACACC -ATTAGTTAATATAAAACTTGACAGAAGATAAAAATGGGGCAAATAAATCAATTCAGCCGACCCAACCATG -GACACAACACACAATGATACCACACCACAAAGACTGATGATCACAGACATGAGACCACTATCGCTTGAGA -CCATAATAACATCTCTAACCAGAGATATCATAACACATAAATTTATATACTTGATAAATCATGAATGCAT -AGTGAGAAAACTTGATGAAAGACAGGCCACATTTACATTTCTGGTCAACTATGAAATGAAACTATTGCAC -AAAGTGGGAAGCACTAAATATAAAAAATATACTGAATACAACACAAAATATGGCACTTTCCCTATGCCAA -TATTTATCAATCATGATGGGTTCTTAGAATGCATTGGCATTAAGCCTACCAAGCACACACCCATAATATA -CAAGTATGATCTCAATCCATGAATATCAAACCAAGATTCAAACAATCCGAAATAACAACTCTATGCATAA -TCACACTCCATAGTCCAAATGGAGCCTGAAAATTATAGTTATTTAAAATTAAGGAGAGACATAAGATGAA -AGATGGGGCAAATACAAAAATGGCTCTTAGCAAAGTCAAGTTGAATGATACACTCAACAAAGATCAACTT -CTATCATCCAGCAAATATACCATCCAACGGAGCACAGGAGACAGCATTGACACTCCTAATTATGATGTGC -AGAAACACATTAATAAGTTATGTGGCATGTTATTAATCACAGAAGATGCTAATCATAAATTCACTGGGTT -AATAGGTATGTTATATGCTATGTCTAGATTAGGAAGAGAAGACACCATAAAAATACTCAAAGATGCGGGA -TATCATGTTAAGGCAAATGGAGTGGATGTAACAACACATCGTCAAGACATTAATGGGAAAGAAATGAAAT -TTGAAGTGTTAACATTAGCAAGCTTAACAACTGAAATTCAAATTAACATTGAGATAGAATCTAGAAAATC -CTACAAAAAAATGCTAAAAGAAATGGGAGAGGTGGCTCCAGAATACAGGCATGACTCTCCTGATTGTGGG -ATGATAATATTATGTATAGCAGCATTAGTAATAACCAAATTAGCAGCAGGAGATAGATCAGGTCTTACAG -CTGTGATTAGGAGAGCTAACAATGTCCTAAAAAATGAAATGAAACGTTATAAAGGTTTATTACCCAAGGA -TATAGCCAACAGCTTCTATGAAGTGTTTGAAAAATATCCTCACTTTATAGATGTTTTTGTTCATTTCGGT -ATAGCACAATCTTCCACCAGAGGTGGCAGTAGAGTTGAAGGGATTTTTGCAGGATTGTTTATGAATGCCT -ATGGTGCAGGGCAAGTGATGTTACGGTGGGGGGTCTTAGCAAAATCAGTTAAAAACATTATGTTAGGACA -CGCTAGTGTACAAGCAGAAATGGAACAAGTTGTGGAGGTGTATGAGTATGCTCAGAAATTGGGTGGAGAA -GCAGGATTCTACCATATATTGAACAACCCAAAAGCATCACTATTATCTTTGACTCAATTTCCTCACTTCT -CTAGTGTAGTATTGGGCAATGCTGCTGGCCTAGGCATAATGGGAGAATACAGAGGTACACCAAGGAATCA -AGATTTATATGATGCTGCAAAAGCATATGCTGAACAACTCAAAGAAAATGGTGTGATTAACTACAGTGTA -TTAGATTTGACAGCAGAAGAACTAGAGGCTATCAAACATCAGCTTAATCCAAAAGATAATGATGTAGAGC -TTTGAGTTAATAAAAAAGTGGGGCAAATAAATCATCATGGAAAAGTTTGCTCCTGAATTCCATGGAGAAG -ATGCAAACAACAGAGCCACCAAATTCCTAGAATCAATAAAAGGCAAATTCACATCACCCAAAGATCCCAA -GAAAAAAGATAGTATCATATCTGTCAACTCAATAGATATAGAAGTAACCAAAGAAAGCCCTATAACATCA -AATTCAACCATTATAAACCCAATAAATGAGACAGATGATACTGTAGGGAACAAGCCCAATTATCAAAGAA -AGCCTCTAGTAAGTTTCAAAGAAGACCCTACGCCAAATGATAATCCTTTTTCAAAACTATACAAAGAAAC -CATAGAAACATTTGATAACAATGAAGAAGAATCTAGCTATTCATATGAAGAAATAAATGATCAAACAAAC -GATAATATAACAGTAAGATTAGATAGGATTGATGAGAAATTAAGTGAAATACTAGGAATGCTTCACACAT -TAGTAGTAGCGAGTGCAGGACCCACATCTGCTCGGGATGGTATAAGAGATGCCATGGTTGGTTTAAGAGA -AGAAATGATAGAAAAAATCAGAACTGAAGCATTAATGACCAATGACAGACTAGAAGCTATGGCAAGACTC -AGGAATGAAGAAAGTGAAAAGATGGCAAAAGACACATCAGATGAAGTGTCTCTCAATCCAACATCAGAGA -AACTGAACAACCTGTTGGAAGGGAATGATAGTGACAATGATCTATCACTTGAAGATTTCTGATTAGCTAC -CAAACTGTACATCAAAACACAACACCAATAGAAAGCCAACAAACAAACCAATTCACCTATCCAACCAAAC -ATCCATCTGCTGATTAGCCAATCAGCCAAAAAACAACCAGCTAATCCAAAACTAGCTACTCGGAAAAAAT -CGATACTATAGTTACAAAAAAAGATGGGGCAAATATGGAAACATACGTGAATAAACTTCACGAGGGCTCC -ACATACACAGCTGCTGTTCAATACAATGTCCTAGAAAAAGACGATGATCCTGCATCACTTACAATATGGG -TGCCCATGTTCCAATCATCCATGCCAGCAGATCTACTCATAAAAGAACTAGCCAATGTCAATATACTAGT -GAAACAAATATCCACACCCAAGGGACCCTCATTAAGAGTCATGATAAACTCAAGAAGTGCAGTGCTAGCA -CAAATGCCCAGTAAATTTACCATATGTGCCAATGTGTCCTTGGATGAAAGAAGCAAGCTGGCATATGACG -TAACCACACCCTGTGAAATTAAGGCATGCAGTCTAACATGCCTAAAATCAAAAAATATGTTAACTACAGT -TAAAGATCTCACCATGAAAACACTCAACCCAACACATGACATCATTGCTTTATGTGAATTTGAAAATATA -GTAACATCAAAAAAAGTCATAATACCAACATACCTAAGATCTATCAGCGTCAGAAATAAAGATCTGAACA -CACTTGAAAATATAACAACCACTGAATTCAAAAATGCCATTACAAATGCAAAAATTATCCCTTACTCAGG -ATTACTGTTAGTCATCACAGTGACTGACAACAAAGGAGCATTCAAATACATAAAGCCACAAAGTCAATTC -ATAGTAGATCTTGGAGCTTACCTAGAAAAAGAAAGTATATATTATGTTACAACAAATTGGAAGCACACAG -CTACACGATTTGCAATCAAACCCATGGAAGATTAACCTTTTTCCTCTACATCAATGAGTAGATTCATACA -AACTTTCTAACTACATTCTTCACTTCACAATCATAATCACCAACCCTCTGTGGTTCAATAAATCAAACAA -AACTCATCAGGAGTTCCAGATCATCCCAAGTCATTGTTCATCAGATCCAGTACTCAAATAAGTTAATAAA -AAAATCCACATGGGGCAAATAATCATTTAGGGAAATCCAACTAATCACAACATCTGTCAACATAGACAAG -TCAACACGCTAGATAAAATCAACCAATGGAAAATACATCCATAACTATAGAATTCTCAAGCAAATTCTGG -CCTTACTTTACACTAATACACATGATAACAACAATAATCTCTTTGATAATCATAATCTCCATCATGATTG -CAATACTAAACAAACTCTGCGAATATAATGTATTCCATAACAAAACCTTTGAGCTACCACGAGCTCGAGT -CAATACATAGCATTCACCAATCTGATAGCTCAAAACAGTAACCTAGCATTTGTAAATGAACTACCCTCAC -TTCTTCACAAAACCACATCAACATCTCACCATGCAAGCCATCATCTATACCATAAAGTAGTTAATTAAAA -AATAGTCATAACAATGAACTAGGATATTAAGACCAAAAACAACGCTGGGGCAAATGCAAACATGTCCAAA -ACCAAGGACCAACGCACCGCCAAGACACTAGAAAGGACCTGGGACACTCTCAATCATCTATTATTCATAT -CATCGTGCTTATACAAGTTAAATCTTAAATCTATAGCACAAATCACATTATCTATTTTGGCAATGATAAT -CTCAACCTCACTTATAATTGCAGCCATCATATTCATAGCCTCGGCAAACCACAAAGTCACACTAACAACT -GCAATCATACAAGATGCAACGAACCAGATCAAGAACACAACCCCAACATACCTCACCCAGAATCCCCAGC -TTGGAATCAGCTTCTCCAATCTGTCCGGAACTACATCACAATCCACTACCATACTAGCTTCAACAACACC -AAGTGCTGAGTCAACCCCACAATCCACAACAGTCAAGATCATAAACACAACAACAACCCAAATATTACCT -AGCAAACCCACCACAAAACAACGCCAAAATGAACCACAAAACAAACCCAACAATGACTTTCACTTTGAAG -TGTTCAATTTCGTACCCTGCAGCATATGCAGCAACAATCCAACCTGCTGGGCCATCTGCAAGAGAATACC -AAACAAAAACAAAAAACCTGGAAAGAAAACCACCACCAAGCCCACAAAAAAACCAACCCTCAAGACAACC -AAAAAAGATCCCAAACCTCAAACCACAAAACCAAAGGGAGTACCCACTACCAAGCCTACAGGAAAGCCAA -TCATTAACACCACTAAAACAAACAGCAGAACTACACTGCTCACCTCCAACACCAAAGGAAATCCAGAACA -CACAAGTCAAAAGGAAACCATCCACTCAACCACCTCCGAAGGCTATCCAAGCCCATCACAAGTCTATACA -ACATCCGGTCAAGAGGAAACCCTCCACTCAACCACCTCCAAAGGCTATCCAAGCCCATCACAAGTCTATA -CAACATCCGAGTACCCATCACAATCTCCATCTTCATCCAACACAACAAAATGATAGTCATTAAAAAGCGT -ATTGTTGCAAAAGGCCATGACCAAATCAAACAGAATCAAAATCAACTTTGGGGCAAATAACAATGGAGTT -GCCAATCCTCAAAATAAATGCTATTACCACAATCCTTGCTGCAGTCACACTCTGTTTTGCTTCCAGTCAA -AACATCACTGAAGAATTTTATCAATCAACATGCAGTGCAGTTAGCAAAGGCTATCTTAGTGCTCTAAGAA -CTGGTTGGTATACTAGTGTTATAACTATAGAATTGAGTAATATCAAGGAAAATAAGTGTAATGGTACAGA -CGCTAAGGTAAAATTAATAAAACAAGAATTAGATAAATATAAAAATGCTGTAACAGAATTGCAGTTGCTC -ATGCAAAGCACACCAGCAGCCAACAGTCGAGCCAGAAGAGAACTACCAAGATTTATGAATTATACACTCA -ACAACACCAAAAACACCAATGTAACATTAAGTAAGAAAAGGAAAAGAAGATTTCTTGGATTTTTGTTAGG -TGTTGGATCTGCAATCGCCAGTGGCATTGCCGTATCCAAGGTCCTGCACCTAGAAGGGGAAGTGAACAAA -ATCAAAAGTGCTCTACTATCCACAAACAAGGCTGTAGTCAGCTTATCCAATGGAGTCAGTGTCTTAACCA -GCAAGGTGTTAGACCTCAAAAACTATATAGATAAACAGTTGTTACCTATTGTTAACAAGCAAAGCTGCAG -CATATCAAACATTGAAACTGTGATAGAGTTCCAACAAAAGAACAACAGACTACTAGAGATTACCAGAGAA -TTTAGTGTTAATGCAGGTGTAACCACACCTGTAAGCACTTATATGTTAACTAATAGTGAGTTATTATCAT -TAATCAATGATATGCCTATAACAAATGATCAGAAAAAGTTAATGTCCAGCAATGTTCAAATAGTTAGACA -GCAAAGTTACTCTATCATGTCAATAATAAAAGAGGAAGTCTTAGCATATGTAGTACAATTACCACTATAT -GGTGTAATAGATACTCCTTGTTGGAAACTACACACATCCCCTCTATGTACAACCAACACAAAGGAAGGAT -CCAACATCTGCTTAACAAGAACCGACAGAGGATGGTACTGTGACAATGCAGGATCTGTATCCTTTTTCCC -ACAAGCTGAAACATGTAAAGTTCAATCGAATCGGGTGTTTTGTGACACAATGAACAGTTTAACATTACCA -AGTGAGGTAAATCTCTGCAACATTGACATATTCAACCCCAAATATGATTGCAAAATTATGACTTCAAAAA -CAGATGTAAGCAGCTCCGTTATCACATCTCTAGGAGCCATTGTGTCATGCTATGGCAAAACCAAATGTAC -AGCATCCAATAAAAATCGTGGGATCATAAAGACATTCTCTAATGGGTGTGATTATGTATCAAATAAGGGG -GTGGATACTGTGTCTGTAGGTAATACATTATATTATGTAAATAAGCAAGAAGGCAAAAGTCTCTATGTAA -AAGGTGAACCAATAATAAATTTCTATGATCCATTAGTGTTCCCCTCTGATGAATTTGATGCATCAATATC -TCAAGTCAATGAGAAAATTAATCAGAGTCTAGCATTTATCCGTAAATCAGATGAATTATTACATAATGTA -AATGCTGGTAAATCCACCACAAATATCATGATAACTACCATAATTATAGTAATTATAGTAATATTGTTAG -CATTAATTGCAGTTGGACTGCTTCTATACTGCAAGGCCAGAAGCACACCAGTCACATTAAGTAAGGATCA -ACTGAGTGGTATAAATAACATTGCATTTAGTAACTGAATAAAAATAGCACCTAATCATATTCTTACAATG -GTTCGCTATTTGACCATAGATAACCCATCTATCATTAGATTATCCTAAAATTTGAACTTCATCACAACTT -TCATTTATAAACCATCTTACTTACACTTTTTAAGTAGATTCCTATTTTATAGTTATATAAAACAATTGAA -TACCAAATTAACTTACTATTTGTAAAAATGAGAACTGGGGCAAATATGTCACGAAGGAATCCTTGCAAAT -TCGAAATTCGAGGTCATTGCTTGAATGGTAAAAGGTGTCATTTTAGTCATAATTATTTTGAATGGCCACC -CCATGCACTGCTTGTAAGACAAAACTTTATGTTAAACAGAATACTTAAGTCTATGGATAAAAGCATAGAT -ACATTGTCAGAAATAAGTGGAGCTGCAGAGTTGGACAGAACAGAAGAGTATGCCCTCGGTGTAGTAGGAG -TGCTAGAGAGTTATATAGGATCAATAAATAATATAACTAAACAATCAGCATGTGTTGCCATGAGCAAACT -CCTTACTGAACTCAACAGCGATGACATCAAAAAACTAAGGGACAATGAAGAGCCAAACTCACCCAAAGTA -AGAGTGTACAATACTGTCATATCATATATTGAAAGCAACAGGAAGAACAATAAACAAACTATCCATTTGT -TAAAAAGATTGCCAGCAGACGTATTGAAGAAAACCATCAAAAACACATTGGATATCCACAAGAGCATAAC -CATCAATAACCCAAAAGAATCAACTGTTAGTGATACGAACGACCATGCCAAAAATAATGATACTACCTGA -CAAATATCCTTGTAGTATAAATTCCATACTAATAACAAGTAATTGTAGAGTCACTATGTATAATCAAAAA -AACACACTATATATCAATCAAAACAACCAAAATGGCCATATATACCCACCAGATCAACCATTCAATGAAA -TCCATTGGACCTCTCAAGACTTGATTGATGCAACTCAAAATTTTCTACAACATCTAGGTATTACTGATGA -TATATACACAATATATATATTAGTGTCATAATACTCAATCCTAATACTTACCACGTCATCAAATTATTAA -CTCAAACAATTCAAGCTATGGGACAAAATGGATCCCATTATTAGTGGAAATTCTGCTAATGTTTATCTAA -CTGATAGTTATTTAAAAGGTGTTATTTCTTTCTCAGAATGTAACGCTTTAGGAAGTTACATATTCAATGG -TCCTTATCTCAAAAATGATTATACCAACTTAATTAGTAGACAAAATCCATTAATAGAACACATAAATCTA -AAGAAACTAAATATAACACAGTCCTTAATATCTAAGTATCATAAAGGTGAAATAAAAATAGAAGAACCTA -CTTACTTTCAGTCATTACTTATGACATACAAGAGTATGACCTCTTCAGAACAGACTACTACTACTAATTT -ACTTAAAAAGATAATAAGAAGAGCTATAGAAATCAGTGATGTCAAAGTCTATGCTATATTGAATAAACTG -GGGCTCAAAGAAAAAGACAAGATTAAATCCAATAATGGACAAGATGAAGACAACTCAGTCATTACTACCA -TAATCAAAGATGATATACTTCTAGCTGTCAAGGATAATCAATCTCATCCTAAAGCAGACAAAAATCAATC -CACGAAACAAAAAGATACAATCAAAACAACACTTTTGAAGAAATTAATGTGTTCAATGCAACATCCTCCA -TCATGGTTAATACATTGGTTTAATTTATACACAAAATTAAACAGCATATTAACACAATATCGATCTAGTG -AGGTAAAAAACCATGGTTTTATATTGATAGATAATCATACTCTTAGTGGATTCCAATTTATTTTGAATCA -ATATGGTTGTATAGTTTATCATAGGGAACTCAAAAGAATTACTGTGACTACTTATAATCAATTCTTGACA -TGGAAAGATATTAGCCTTAGTAGATTAAATGTTTGTTTGATTACATGGATTAGTAACTGTTTGAACACAT -TAAACAAGAGCTTAGGCTTAAGATGTGGATTCAATAATGTTATCTTGACACAATTATTCCTTTATGGAGA -TTGTATACTAAAACTATTCCACAATGAGGGGTTCTACATAATAAAAGAGGTAGAGGGATTTATTATGTCT -CTAATTTTAAATATAACAGAAGAAGATCAATTCAGAAAACGGTTTTATAATAGTATGCTCAACAACATCA -CAGATGCCGCCAACAAAGCTCAAAAAAATCTGTTATCAAGAGTATGTCATACATTATTAGATAAGACAAT -ATCAGATAATATAATAAATGGCAGATGGATAATTCTATTGAGTAAGTTCCTAAAATTAATTAAGCTTGCA -GGTGACAATAACCTCAACAATCTGAGTGAATTATATTTTTTGTTCAGAATATTTGGACACCCAATGGTAG -ATGAAAGACAAGCCATGGATGCTGTTAAAGTTAATTGCAACGAGACCAAATTTTACTTGTTAAGTAGTTT -GAGTATGTTAAGAGGAGCTTTTATATATAGAATTATAAAAGGGTTTGTAAATAATTACAACAGATGGCCT -ACTTTAAGAAATGCCATTGTCTTACCCTTAAGATGGTTAACTTACTATAAACTAAACACTTATCCTTCCT -TGTTGGAACTTACAGAAAGAGATTTGATTGTTCTATCAGGACTACGTTTCTATCGAGAGTTTCGGTTGCC -TAAAAAAGTGGATCTTGAAATGATCATAAATGATAAGGCTATATCACCTCCTAAAAATTTAATATGGACT -AGTTTCCCTAGAAATTATATGCCATCACACATACAAAATTATATAGAACATGAAAAATTAAAATTCTCTG -ATAGTGATAAATCAAGAAGAGTATTAGAGTATTATTTAAGAGATAACAAATTCAATGAATGTGATTTATA -CAACTGTGTAGTTAATCAAAGTTATCTTAACAACCCGAATCATGTGGTATCATTGACAGGCAAAGAAAGA -GAACTCAGTGTAGGTAGAATGTTTGCAATGCAACCAGGAGTGTTCAGACAAGTTCAAATATTAGCAGAGA -AAATGATAGCAGAAAACATATTACAATTTTTCCCTGAAAGTCTTACAAGATATGGTGATCTAGAACTACA -GAAAATATTAGAATTGAAAGCAGGAATAAGTAACAAATCAAATCGTTACAATGATAATTACAACAATTAC -ATTAGTAAGTGCTCTATCATCACAGATCTCAGCAAATTCAATCAAGCATTTCGATATGAAACATCATGTA -TTTGTAGTGATGTACTGGATGAACTGCATGGTGTACAATCTCTATTTTCCTGGTTACATTTAACTATTCC -TCATGTCACAATAATATGCACATATAGGCATGCACCCCCCTATATAAAGGATCATATTGTAGATCTTAAC -AATGTAGATGAGCAAAGTGGACTATATAGATATCATATGGGTGGTATCGAAGGGTGGTGTCAAAAACTAT -GGACCATAGAAGCTATATCACTATTAGATCTAATATCTCTCAAAGGGAAATTCTCAATTACTGCTTTAAT -TAATGGTGACAATCAATCAATAGATATAAGTAAACCAGTCAGACTCATGGAAGGTCAAACTCATGCTCAA -GCAGATTATTTGCTAGCATTAAATAGTCTCAAATTACTGTATAAAGAGTATGCAGGAATAGGCCACAAAT -TAAAAGGAACTGAGACTTATATATCGAGGGATATGCAATTTATGAGTAAAACGATCCAACATAACGGCGT -ATATTACCCAGCTAGTATAAAGAAAGTCCTAAGAGTGGGACCGTGGATAAACACTATACTTGATGACTTC -AAAGTGAGTCTAGAATCTATAGGTAGTTTGACACAAGAATTAGAATATAGAGGTGAAAGTCTATTATGCA -GTTTAATATTTAGAAATGTATGGTTATATAATCAAATTGCATTACAACTTAAAAATCATGCATTATGTAA -CAACAAATTATATTTGGATATATTAAAAGTTCTAAAACACTTAAAAACCTTTTTTAATCTTGATAACATT -GATACAGCATTAACATTGTATATGAATTTGCCCATGTTATTTGGTGGTGGTGATCCCAACTTGTTATATC -GAAGTTTCTATCGAAGAACTCCTGATTTCCTCACAGAGGCTATAGTTCACTCTGTGTTCATACTTAGTTA -TTATACAAACCATGATTTAAAAGATAAACTTCAAGATCTGTCAGATGATAGATTGAATAAGTTCTTAACA -TGCATAATTACGTTTGACAAAAACCCCAATGCTGAATTCGTTACATTGATGAGAGATCCTCAAGCTTTAG -GATCTGAGAGGCAAGCTAAAATTACTAGCGAAATCAATAGACTGGCAGTTACCGAGGTTTTGAGCACAGC -TCCAAACAAAATATTTTCCAAAAGTGCACAACACTATACCACTACAGAGATAGATCTTAATGATATTATG -CAAAATATAGAACCTACATATCCTCACGGGCTAAGAGTTGTTTATGAAAGTTTACCCTTTTATAAAGCAG -AGAAAATAGTAAATCTTATATCCGGTACAAAATCTATAACTAACATACTGGAAAAGACTTCTGCCATAGA -CTTAACAGATATTGATAGAGCCACTGAGATGATGAGGAAAAACATAACTTTGCTTATAAGGATATTACCA -TTAGATTGTAACAGAGATAAAAGAGAAATATTGAGTATGGAAAACCTAAGTATTACTGAATTAAGCAAAT -ACGTTAGAGAAAGATCCTGGTCTTTATCCAATATAGTTGGTGTTACATCACCTAGTATCATGTATACAAT -GGACATAAAATATACAACAAGCACTATAGCTAGTGGCATAATCATAGAGAAATATAATGTCAACAGTTTA -ACACGTGGTGAGAGAGGACCCACTAAACCATGGGTTGGTTCATCTACACAAGAGAAAAAGACAATGCCAG -TTTATAATAGACAAGTTTTAACCAAAAAACAGAGAGATCAAATAGATCTATTAGCAAAATTAGATTGGGT -GTATGCATCTATAGATAACAAGGATGAATTTATGGAGGAACTTAGCATAGGAACTCTTGGGTTAACATAT -GAGAAGGCCAAAAAATTATTCCCACAATATTTAAGTGTTAACTATTTGCATCGTCTTACAGTCAGTAGTA -GACCATGTGAATTCCCTGCATCTATACCAGCTTATAGAACTACAAATTATCACTTTGATACTAGCCCTAT -TAATCGCATATTAACAGAAAAGTATGGTGATGAAGATATTGATATAGTATTCCAAAACTGTATAAGCTTT -GGCCTCAGCTTAATGTCTGTAGTAGAACAATTTACTAATGTATGTCCTAACAGAATTATTCTCATACCCA -AGCTTAATGAGATACATTTGATGAAACCTCCCATATTCACAGGTGATGTTGATATTCACAAGTTAAAACA -AGTGATACAAAAACAACATATGTTTTTACCAGACAAAATAAGTTTGACTCAATATGTGGAATTATTCTTA -AGTAATAAAACACTCAAATCTGGATCTAATGTTAATTCTAATTTAATATTGGCGCATAAGATATCTGACT -ATTTTCATAATACTTACATTTTAAGTACTAATTTAGCTGGACATTGGATTCTTATTATACAACTTATGAA -AGATTCTAAGGGTATTTTTGAAAAAGATTGGGGAGAGGGATATATAACTGATCACATGTTCATTAATTTG -AAAGTTTTCTTCAATGCTTATAAGACATATCTCTTGTGTTTTCACAAAGGTTACGGCAGAGCAAAGCTAG -AGTGTGATATGAATACTTCAGATCTCCTATGTGTATTGGAATTAATAGACAGTAGTTATTGGAAGTCTAT -GTCCAAGGTGTTTTTAGAACAAAAAGTTATCAAATACATTCTTAGCCAAGATGCAAGTTTACATAGAGTA -AAAGGATGTCATAGCTTCAAACTATGGTTTCTTAAACGTCTTAATGTAGCAGAATTCACAGTTTGCCCTT -GGGTTGTTAACATAGATTATCATCCAACACATATGAAAGCAATATTAACTTATATTGATCTTGTTAGAAT -GGGATTGATAAATATAGATAGAATATACATTAAAAATAAACACAAGTTCAATGATGAATTTTATACTTCT -AATCTGTTTTACATTAATTATAACTTCTCAGATAATACTCATCTATTAACTAAACATATAAGGATTGCTA -ATTCCGAATTAGAAAGTAATTACAACAAATTATATCATCCCACACCAGAAACCCTAGAAAATATACTAAC -CAATCCGGTTAAAAGTAATGAAAAAAAGACACTGAGTGGCTATTGTATAGGTAAAAATGTTGACTCAATA -ATGTTACCATCGTTATCTAATAAGAAGCTTATTAAATCGTCTACTATGATTAGAACCAATTACAGCAGAC -AAGATTTGTATAATTTATTTCCTACAGTTGTGATTGATAAAATTATAGATCATTCAGGTAATACAGCCAA -ATCTAACCAACTTTACACTACTACTTCTCATCAAATATCCTTAGTGCACAATAGCACATCACTTTATTGC -ATGCTTCCTTGGCATCATATTAATAGATTCAATTTTGTATTTAGTTCTACAGGTTGTAAAATTAGTATAG -AGTATATTTTAAAAGATCTTAAAATTAAGGATCCTAATTGTATAGCATTCATAGGTGAAGGAGCAGGGAA -TTTATTATTGCGTACAGTAGTGGAACTTCATCCTGATATAAGATATATTTACAGAAGTCTGAAAGATTGC -AATGATCATAGTTTACCAATTGAGTTTTTAAGGCTGTACAATGGACATATCAACATTGATTATGGTGAAA -ATTTGACCATTCCTGCTACAGATGCAACCAACAACATTCATTGGTCTTATTTACATATAAAGTTTGCTGA -ACCTATCAGTCTTTTTGTCTGTGATGCTGAATTGCCTGTAACAGTCAACTGGAGTAAGATTATAATAGAG -TGGAGCAAGCATGTAAGAAAATGCAAATACTGCTCTTCAGTTAATAAATGTACATTAATAGTAAAATATC -ATGCTCAAGATGATATCGATTTCAAATTAGACAACATAACTATATTAAAAACTTATGTATGCTTAGGTAG -TAAGTTAAAGGGATCTGAAGTTTACTTAGTCCTTACAATAGGTCCTGCAAATGTGTTCCCAGTATTTAAT -GTAGTACAAAATGCTAAATTGATACTATCAAGAACCAAAAATTTCATCATGCCTAAAAAAGCTGATAAAG -AGTCTATTGATGCAAATATTAAGAGTTTGATACCCTTTCTTTGTTACCCCATAACAAAAAAAGGAATTAA -TACTGCATTGTCTAAATTAAAGAGTGTTGTTAGTGGAGATATACTATCATATTCTATAGCTGGACGTAAT -GAAGTTTTCAGCAATAAACTTATAAATCATAAGCATATGAACATCTTAAAGTGGTTCAATCATGTTTTGA -ATTTCAGATCAACAGAATTAAACTATAATCATTTATATATGGTAGAATCTACTTATCCTCATCTAAGTGA -ATTGTTAAACAGCTTGACAACCAATGAACTTAAAAAACTGATTAAAATCACAGGTAGTTTGTTATACAAC -TTTTATAATGAATAATGAGCAAAAATCTTATAACAAAAATAGCTACACACTAACATTGTATTCAATTGTA -GTTATTTAAAATTAATAATTATATATATTTTTTAATAACTTCTAGTGAACTAATCCTAAAATTATCATTT -TGATCTAGGAAGAATAAGTTTAAATCCAAATCTAATTGGTTTATATGTATATTAACTAAATTACGAGATA -TTAGTTTTTGACACTTTTTTTCTCGT - diff --git a/ingest/config/a_2_reference.fasta b/ingest/config/a_2_reference.fasta deleted file mode 100644 index da03e00..0000000 --- a/ingest/config/a_2_reference.fasta +++ /dev/null @@ -1,221 +0,0 @@ ->MH181982.1 Human respiratory syncytial virus A isolate KEN/KILIFI/WGS/1132_11/01/2013, partial genome -GGGGCAAATAAGAATTTGATAAGTACCACTTAAATTTAACTCCTTTGGTTAGAGATGGGCAGCAACTCAT -TGAGTATGATAAAAGTTAGATTGCAAAATCTGTTTGACAATGATGAAGTAGCATTGTTAAAAATAACATG -CTATACTGACAAATTAATACAGTTAACTAATGCTTTGGCTAAGGCAGTTATACATACAATCAAATTGAAT -GGCATTGTATTTGTGCATGTTATTACAAGTAGTGATATTTGCCCTAATAATAATATTGTAGTGAAATCCA -ATTTCACAACAATGCCAGTATTACAAAATGGAGGTTATATATGGGAAATGATGGAATTAACACACTGCTC -TCAACCTAATGGCCTAATAGATGACAATTGTGAAATTAAATTCTCCAAAAAACTAAGTGATTCAACAATG -ACCAATTATATGAATCAATTATCTGAATTACTTGGATTTGACCTCAATCCATAAATCATAATAAATATCA -ACTAGCAAATCAATGTCACTAACACCATTAGTTAATATAAAACTTGACAGAAGATAAAAATGGGGCAAAT -AAATCAATTCAGCCGACCCAACCATGGACACAACACACAATGATACCACACCACAAAGACTGATGATCAC -AGACATGAGACCATTATCGCTTGAGACTATAATAACATCTCTAACCAGAGATATCATAACACATAAATTT -ATATACTTGATAAATCATGAATGCATAGTGAGAAAACTTGATGAAAGACAGGCCACATTTACATTTCTGG -TCAACTATGAAATGAAACTATTGCACAAAGTGGGAAGCACTAAATATAAAAAATATACTGAATACAACAC -AAAATATGGCACTTTCCCTATGCCAATATTTATCAATCATGATGGGTTCCTAGAATGCATTGGCATTAAG -CCTACCAAGCACACACCCATAATATACAAGTATGATCTCAATCCATGAATATCAAACCAAGATTCAAACA -ATCCGAAATAACAGCTTTATGCATAATCACACTCCATAGTCCAAATGGAGCCTGAAAATTATAGTTATTT -AAAATTAAGGAGAGACATAAGATGAAAGATGGGGCAAATACAAAAATGGCTCTTAGCAAAGTCAAGTTGA -ATGATACACTCAACAAAGATCAACTTCTATCATCCAGCAAATATACCATCCAACGGAGCACAGGAGACAG -CATTGACACTCCTAATTATGATGTGCAGAAACACATTAATAAGTTATGTGGCATGCTATTAATCACAGAA -GATGCTAATCATAAATTCACTGGGTTAATAGGTATGTTATATGCTATGTCTAGATTAGGAAGAGAAGACA -CCATAAAAATACTCAAAGATGCGGGATATCATGTTAAGGCAAATGGAGTGGATGTAACAACACATCGTCA -AGACATTAATGGGAAAGAAATGAAATTTGAAGTGTTAACATTAGCAAGCTTAACAACTGAAATTCAAATC -AACATTGAGATAGAATCTAGAAAATCCTACAAAAAAATGCTAAAAGAAATGGGAGAGGTGGCTCCAGAAT -ACAGGCATGACTCTCCTGATTGTGGGATGATAATATTATGTATAGCAGCATTAGTAATAACCAAATTAGC -AGCAGGAGATAGATCAGGTCTTACAGCTGTGATTAGGAGAGCTAATAATGTCCTAAAAAATGAAATGAAA -CGTTATAAAGGTTTATTACCCAAGGATATAGCCAACAGCTTCTATGAAGTGTTTGAAAAATATCCTCACT -TTATAGATGTTTTTGTTCATTTTGGTATAGCACAATCTTCTACCAGAGGTGGCAGTAGAGTTGAAGGAAT -TTTTGCAGGATTGTTTATGAATGCCTATGGTGCAGGGCAAGTGATGTTACGGTGGGGGGTCTTAGCAAAA -TCAGTTAAAAACATTATGTTAGGACACGCTAGTGTACAAGCTGAAATGGAACAAGTTGTGGAGGTGTATG -AGTATGCTCAGAAATTGGGTGGAGAAGCAGGATTCTACCATATATTGAACAACCCAAAAGCATCACTATT -ATCTTTGACTCAATTTCCTCACTTCTCTAGTGTAGTATTGGGCAATGCTGCTGGCCTAGGCATAATGGGA -GAATACAGAGGTACACCAAGGAATCAAGATTTATATGATGCTGCAAAAGCATATGCTGAACAACTCAAAG -AAAATGGTGTGATTAACTACAGTGTATTAGATTTGACAGCAGAAGAACTAGAGGCTATCAAACATCAGCT -TAATCCAAAAGATAATGATGTAGAGCTTTGAGTTAATAAAAAGGTGGGGCAAATAAATCATCATGGAAAA -GTTTGCTCCTGAATTCCATGGAGAAGATGCAAACAACAGAGCCACCAAATTCCTAGAATCAATAAAGGGC -AAATTCACATCACCCAAAGATCCCAAGAAAAAAGATAGTATCATATCTGTCAACTCAATAGATATAGAAG -TAACCAAAGAAAGCCCTATAACATCAAATTCAACCATTATAAACCCAATAAATGAGACAGATGATACTGT -AGGGAACAAGCCCAATTATCAAAGAAAGCCTCTAGTAAGTTTCAAAGAAGACCCTACGCCAAGTGATAAT -CCTTTTTCAAAACTATACAAAGAAACCATAGAAACATTTGATAACAATGAAGAAGAATCTAGCTATTCAT -ATGAAGAAATAAATGATCAGACAAACGATAATATAACAGCAAGATTAGATAGGATTGATGAGAAATTAAG -TGAAATACTAGGAATGCTTCACACATTAGTAGTAGCGAGTGCAGGACCCACATCTGCTCGGGATGGTATA -AGAGATGCCATGGTTGGTTTAAGAGAAGAAATGATAGAAAAAATCAGAACTGAAGCATTAATGACCAATG -ACAGACTAGAAGCTATGGCAAGACTCAGGAATGAAGAAAGTGAAAAGATGGCAAAAGACACATCAGATGA -AGTGTCTCTCAATCCAACATCAGAGAAACTGAACAACCTGTTGGAAGGGAATGATAGTGACAATGATCTA -TCACTTGAAGATTTCTGATTAGCTACCAAACTGTACATCAAAACACAACACCAATAGAAAACCAACAAAC -AAACCAACTCACCCATCCAACCAAACATCTATCTGCTGATTAGCCAACCAGCCAAAAAACAACCAGCCAA -TCTAAAACTAGCCACCCGGAAAAAATCGATACTATAGTTACAAAAAAAGATGGGGCAAATATGGAAACAT -ACGTGAATAAACTTCACGAGGGCTCCACATACACAGCTGCTGTTCAATACAATGTCCTAGAAAAAGACGA -TGATCCTGCATCACTTACAATATGGGTGCCCATGTTCCAATCATCCATGCCAGCAGATCTACTCATAAAA -GAACTAGCCAATGTCAATATACTAGTGAAACAAATATCCACACCCAAGGGACCCTCATTAAGAGTCATGA -TAAACTCAAGAAGTGCAGTGCTAGCACAAATGCCCAGCAAATTTACCATATGTGCCAATGTGTCCTTGGA -TGAAAGAAGCAAGCTGGCATATGATGTAACCACACCCTGTGAAATTAAGGCATGCAGTCTAACATGCCTA -AAATCAAAAAATATGTTAACTACAGTTAAAGATCTCACTATGAAAACACTCAACCCAACACATGACATCA -TTGCTTTATGTGAATTTGAAAATATAGTAACATCAAAAAAAGTCATAATACCAACATACCTAAGATCTAT -CAGCGTCAGAAATAAAGATCTGAACACACTTGAAAATATAACAACCACTGAATTCAAAAATGCCATTACA -AATGCAAAAATCATCCCTTACTCAGGATTACTGTTAGTCATCACAGTGACTGACAACAAAGGAGCATTCA -AATACATAAAGCCACAAAGTCAATTCATAGTAGATCTTGGAGCTTACCTAGAAAAAGAAAGTATATATTA -TGTTACAACAAATTGGAAGCACACAGCTACACGATTTGCAATCAAACCCATGGAAGATTAACCTTTTTCC -TCTACATCAATGAGCAGATTCACACAAACTTTCCAACCACATTCTTCACTCCACAATCACAATCACCAAC -CCTCCGTGGTTCAACCAATCAAACAAAACTCATCAGGAGTTCCAGATCATCCCAAGTCATTGTTCATCAG -ATCCAGTACTCAAATAAGTTAATAAAAAATCCACATGGGGCAAATAATCATTGAGGGAAATCCAACTAAT -CACAACATCTGTCAACATAGACAAGTCAACACGCTAGATAAAATCAACCAATGGAAAATACATCCATAAC -TATAGAATTCTCAAGCAAATTCTGGCCTTACTTTACACTAATACACATGATAACAACAATAATCTCTTTG -ATAATCATAATCTCCATCATGATTGCAATACTAAACAAACTCTGCGAATATAATGTATTCCATAACAAAA -CCTTTGAGCTACCAAGAGCTCGAGTCAATACATAGCATTCACCAATCTGATAGCTCAAAACAGTAACCTT -GCATTTGTAAATGAACTACCCTCACTTCTTCACAAAACCACATCAACATCTCACCATGCAAGCCATCATC -TATACCATAAAGTAGTTAATTAAAAAATAGTCATAACAATGAACTAGGATATTAAGACCAAAAACAACGC -TGGGGCAAATGCAAACATGTCCAAAACCAAGGACCAACGCACCGCCAAGACACTAGAAAGGACCTGGGAC -ACTCTCAATCATCTGTTATTCATATCATCGTGCTTATACAAGTTAAATCTTAAATCTATAGCACAAATCA -CATTATCTATTTTGGCAATGATAATCTCAACCTCACTTATAATTGCAGCCATCATATTCATAGCCTCGGC -AAACCACAAAGTCACACTAACAACTGCAATCATACAAGATGCAACGAACCAGATCAAGAACACAACCCCA -ACATACCTCACCCAGAATCCCCAGCTTGGAATCAGCTTCTCCAATCTGTCCGGAACTACATCACAATCCA -CCACCATACTAGCTTCAACAACACCAAGTGCTGAGTCAACCCCACAATCCACAACAGTCAAGATCAAAAA -CACAACAACAACCCAAATATTACCTAGCAAACCCACCACAAAACAACGCCAAAATAAACCACAAAACAAA -CCCAACAATGATTTTCACTTTGAAGTGTTCAATTTTGTACCCTGCAGCATATGCAGCAACAATCCAACCT -GCTGGGCCATCTGCAAGAGAATACCAAACAAAAAACCTGGAAAGAAAACCACCACCAAGCCCACAAAAAA -ACCAACCCTCAAGACAACCAAAAAAGATCCCAAACCTCAAACCACAAAACCAAAGGAAGTACTCACTACC -AAGCCTACAGGAAAGCCAACCATCAACACTACTAAAACAAACATCAGAACTACACTGCTCACCTCCAACA -CCAAAGGAAATCCAGAACACACAAGTCAAGAGAAAACCCTCCACTCAACCACCTCCGAAGGCTATCCAAG -CCCATCACAAGTCTACACAACATCCGGTCAAGAGGAAACCCTCCACTCAACCACCTCCGAAGGCTATCCA -AGCCCATCACAAGTCCATACAACATCCGAGTACCTATCACAATCTCTATCTTCATCCAACACAACAAAAT -GATAGTCATTAAAAAGCGTATTGTTGCAAAAAGCCATGACCAAATCAAACAGAATCAAAATCAACTCTGG -GGCAAATAACAATGGAGTTGCCAATCCTCAAAACAAATGCTATTACCACAATCCTTGCTGCAGTCACACT -CTGTTTCGCTTCCAGTCAAAACATCACTGAAGAATTTTATCAATCAACATGCAGTGCAGTTAGCAAAGGC -TATCTTAGTGCTCTAAGAACTGGTTGGTATACTAGTGTTATAACTATAGAATTAAGTAATATCAAGGAAA -ATAAGTGTAATGGTACAGACGCTAAGGTAAAATTAATAAAACAAGAATTAGATAAATATAAAAATGCTGT -AACAGAATTGCAGTTGCTCATGCAAAGCACACCAGCAGCCAACAGTCGAGCCAGAAGAGAACTACCAAGA -TTTATGAATTATACACTCAACAATGCCAAAAACACCAATGTAACATCAAGTAAGAAAAGGAAAAGAAGAT -TTCTTGGATTTTTGTTAGGTGTTGGATCTGCAATCGCCAGTGGCATTGCCGTATCCAAGGTCCTGCACCT -AGAAGGGGAAGTGAACAAAATCAAAAGTGCTCTATTATCCACAAACAAGGCTGTAGTCAGCTTATCTAAT -GGAGTCAGTGTCTTAACCAGCAAGGTGTTAGACCTCAAGAACTATATAGATAAACAGTTGTTACCTATTG -TTAACAAGCAAAGCTGCAGCATATCAAACATTGAAACTGTGATAGAGTTCCAACAAAAGAACAACAGACT -ACTAGAGATTACCAGAGAATTTAGTGTTAATGCAGGTGTAACTACACCTGTAAGCACTTATATGTTAACT -AATAGTGAGTTATTATCATTAATCAATGATATGCCTATAACAAATGATCAGAAAAAGTTAATGTCCAGCA -ATGTTCAAATAGTTAGACAGCAAAGTTACTCTATCATGTCAATAATAAAAGAGGAAGTCTTAGCATATGT -AGTACAATTACCACTATATGGTGTAATAGATACTCCTTGTTGGAAACTACACACATCCCCTCTATGTACA -ACCAACACAAAGGAAGGATCCAACATCTGCTTAACAAGAACCGACAGAGGATGGTACTGTGACAATGCAG -GATCTGTATCCTTTTTCCCACAAGCTGAAACATGTAAAGTTCAATCGAATCGGGTGTTTTGTGACACAAT -GAACAGTTTAACATTACCAAGTGAGGTAAATCTCTGCAACATTGACATATTCAACCCCAAATATGATTGC -AAAATTATGACTTCAAAAACAGATGTAAGCAGCTCCGTTATCACATCTCTAGGAGCCATTGTGTCATGCT -ATGGCAAAACCAAATGTACAGCATCCAATAAAAATCGTGGGATCATAAAGACATTCTCTAACGGGTGTGA -TTATGTATCAAATAAGGGGGTGGATACTGTGTCTGTAGGTAATACATTATATTATGTAAATAAGCAAGAA -GGCAAAAGTCTCTATGTAAAAGGTGAACCAATAATAAATTTCTATGATCCATTAGTGTTCCCCTCTGATG -AATTTGATGCATCAATATCTCAAGTCAATGAGAAAATTAATCAGAGTCTAGCATTTATCCGTAAATCAGA -TGAATTATTACATAATGTAAATGCTGGTAAATCCACCACAAATATCATGATAACTACCATAATTATAGTA -ATTATAGTAATATTGTTAGCATTAATTGCAGTTGGACTGCTTCTATACTGCAAGGCCAGAAGCACACCAG -TCACATTAAGTAAGGATCAACTGAGTGGTATAAATAATATTGCATTTAGTAACTGAATAAAAATAGCACC -TAATCATATTCTTACAATGGTTCGCTATTTGACCATAGATAACCCATCTATCATTAGATTATCCTAAAAT -TTGAACTTCATCACAACTTTCATCTATAAACCATCTCACTTACACTTTTTAAGTGGATTCCTATTTTATA -GTTATATAAAACAATTGAATATCAAATTAACTTACTATTTGTAAAAATGAGAACTGGGGCAAATATGTCA -CGAAGGAATCCTTGCAAATTCGAAATTCGAGGTCATTGCTTGAATGGTAAAAGGTGTCATTTTAGTCATA -ATTATTTTGAATGGCCACCCCATGCACTGCTTGTAAGACAAAACTTTATGTTAAACAGAATACTTAAGTC -TATGGATAAAAGCATAGATACTTTGTCAGAAATAAGTGGAGCTGCAGAGTTGGACAGAACAGAAGAGTAT -GCCCTCGGTGTAGTTGGAGTGCTAGAGAGTTATATAGGATCAATAAATAATATAACTAAACAATCAGCAT -GTGTTGCCATGAGCAAACTCCTTACTGAACTCAACAGCGATGACATCAAAAAACTAAGGGACAATGAAGA -GCCAAACTCACCCAAAGTAAGAGTGTACAATACTGTCATATCATATATTGAAAGCAACAGGAAGAACAAT -AAACAAACTATCCATCTGTTAAAAAGATTGCCAGCAGACGTATTGAAGAAAACCATCAAAAACACATTGG -ATATCCACAAGAGCATAACCATCAATAACCCAAAAGAATCAACTGTTAGTGATACGAACGACCATGCCAA -AAATAATGATACTACCTGACAAATATCCTTGTAGTATAAATTCCATACTAATAACAAGTAATTGTAGAGT -CACTATGTATAATCAAAAAAACACACTATATATCAATCAAAACAACCAAAATAACCATATATACCCACCG -GATCAACCATTCAATGAAATCCATTGGACCTCTCAAGACTTGATTGATGCAACTCAAAATTTTCTACAAC -ATCTAGGTATTACTGATGATATATACACAATATATATATTAGTGTCATAATACTCAATCCTAATACTTAC -CACATCATCAAATTATTAACTCAAACAATTCAAGCTATGGGACAAAATGGATCCCATTATTAGTGGAAAT -TCTGCTAATGTTTATCTAACTGATAGTTATTTAAAAGGTGTTATTTCTTTCTCAGAATGTAACGCTTTAG -GAAGTTACATATTCAATGGTCCTTATCTCAAAAATGATTATACCAACTTAATTAGTAGACAAAATCCATT -AATAGAACACATAAATCTAAAGAAACTAAATATAACACAGTCCTTAATATCTAAGTATCATAAAGGTGAA -ATAAAAATAGAAGAACCTACTTACTTTCAGTCATTACTTATGACATACAAGAGTATGACCTCGTCAGAAC -AGACTACTACTACTAATTTACTTAAAAAGATAATAAGAAGAGCTATAGAAATCAGTGATGTCAAAGTCTA -TGCTATATTGAATAAACTGGGGCTCAAAGAAAAAGACAAGATTAAATCCAATAATGGACAAGATGAAGAC -AACTCAGTCATTACTACCATAATCAAAGATGATATACTTTTAGCTGTCAAGGATAATCAATCTCATCTTA -AAGCAGACAAAAATCAATCCACAAAACAAAAAGATACAATCAAAACAACACTTTTGAAGAAATTAATGTG -TTCGATGCAACATCCTCCATCATGGTTAATACATTGGTTTAATTTATACACAAAATTAAACAGCATATTA -ACACAATATCGATCTAGTGAGGTAAAAAACCATGGTTTTATATTGATAGATAATCATACTCTTAGTGGAT -TCCAATTTATTTTGAATCAATATGGTTGTATAGTTTATCATAAGGAACTCAAAAGAATTACTGTGACAAC -TTATAATCAATTCTTGACATGGAAAGATATTAGCCTTAGTAGATTGAATGTTTGTTTGATTACATGGATT -AGTAACTGTTTGAATACATTAAACAAAAGCTTAGGCTTAAGATGTGGATTCAATAATGTTATCTTGACAC -AATTATTCCTTTATGGAGATTGTATACTAAAACTATTCCACAATGAGGGGTTCTACATAATAAAAGAGGT -AGAGGGATTTATTATGTCTCTAATTTTAAATATAACAGAAGAAGATCAATTCAGAAAACGGTTTTATAAT -AGTATGCTCAACAACATCACAGATGCCGCCAACAAAGCTCAAAAAAATCTGCTATCAAGAGTATGTCATA -CATTATTAGATAAGACAATATCAGATAATATAATAAATGGCAGATGGATAATTCTATTGAGTAAGTTCCT -AAAATTAATTAAGCTTGCAGGTGACAATAACCTCAACAATCTGAGTGAATTATATTTTTTGTTCAGAATA -TTTGGACACCCAATGGTAGATGAAAGACAAGCCATGGATGCTGTTAAAGTTAATTGCAACGAGACCAAAT -TTTACTTGTTAAGTAGTTTGAGTATGTTAAGAGGAGCTTTTATATATAGAATTATAAAAGGGTTTGTAAA -TAATTACAACAGATGGCCTACTTTAAGAAATGCCATTGTCTTACCCTTAAGATGGTTAACTTACTATAAA -CTAAACACTTATCCTTCCTTGTTGGAACTTACAGAAAGAGATTTGATTGTTCTATCAGGACTACGTTTCT -ATCGAGAGTTTCGGTTGCCTAAAAAAGTGGATCTCGAAATGATCATAAATGATAAGGCTATATCACCTCC -TAAAAATTTAATATGGACTAGTTTCCCTAGAAATTATATGCCGTCACACATACAAAATTATATAGAACAT -GAAAAATTAAAATTCTCTGATAGTGATAAATCAAGAAGAGTATTAGAGTATTATTTAAGAGATAACAAAT -TCAATGAATGTGATTTACACAACTGTGTAGTTAATCAAAGTTATCTTAACAACCCGAATCATGTGGTATC -ATTGACAGGCAAAGAAAGAGAACTCAGTGTAGGTAGAATGTTTGCAATGCAACCAGGAATGTTCAGACAA -GTTCAAATATTAGCAGAGAAAATGATAGCAGAAAACATATTACAATTTTTCCCTGAAAGTCTTACAAGAT -ATGGTGATCTAGAACTACAGAAAATATTAGAATTGAAAGCAGGAATAAGTAACAAATCAAATCGTTACAA -TGATAATTACAACAATTACATTAGTAAGTGCTCTATCATCACAGATCTCAGCAAATTCAATCAAGCATTT -CGATATGAAACATCATGTATTTGTAGTGATGTACTGGATGAACTGCATGGTGTACAATCTCTATTTTCCT -GGTTACATTTAACTATTCCTCATGTCACAATAATATGCACATATAGGCATGCACCCCCCTATATAAAGGA -TCATATTGTAGATCTTAACAATGTAGATGAGCAAAGTGGACTATATAGATATCATATGGGTGGTATCGAA -GGGTGGTGTCAAAAACTATGGACCATAGAAGCTATATCACTATTAGATCTAATATCTCTCAAAGGGAAAT -TCTCAATTACTGCTTTAATTAATGGTGACAATCAATCAATAGATATAAGTAAACCAGTCAGACTCATGGA -AGGTCAAACTCATGCTCAAGCAGATTATTTGCTAGCATTAAATAGTCTCAAATTACTGTATAAAGAGTAT -GCAGGAATAGGCCACAAATTAAAAGGAACTGAGACTTATATATCGAGAGATATGCAATTTATGAGTAAAA -CGATCCAACATAACGGTGTATATTACCCAGCTAGTATAAAGAAAGTCCTAAGAGTGGGACCGTGGATAAA -CACTATACTTGATGACTTCAAAGTGAGTCTAGAATCTATAGGTAGTTTGACACAAGAATTAGAATATAGA -GGTGAAAGTCTATTATGCAGTTTAATATTTAGAAATGTATGGTTATATAATCAAATTGCATTACAACTTA -AAAATCATGCATTATGTAACAACAAATTATATTTGGATATATTAAAAGTTCTAAAACACTTAAAAACCTT -TTTTAATCTTGATAACATTGATACAGCATTAACATTGTATATGAATTTGCCCATGTTATTTGGTGGTGGT -GATCCCAACTTGTTATATCGAAGTTTCTATAGAAGAACTCCTGATTTCCTCACAGAGGCTATAGTTCACT -CTGTGTTCATACTTAGTTATTATACAAACCATGATTTAAAAGATAAACTTCAAGATCTGTCAGATGATAG -ATTGAATAAGTTCTTAACATGCATAATCACGTTTGACAAAAATCCCAATGCTGAATTCGTTACATTGATG -AGAGATCCTCAAGCTTTAGGATCTGAGAGGCAAGCTAAAATTACTAGCGAAATCAATAGACTGGCAGTTA -CCGAGGTTTTGAGCACAGCTCCAAACAAAATATTTTCCAAAAGTGCACAACACTATACCACTACAGAGAT -AGATCTTAATGATATTATGCAAAATATAGAACCTACATATCCTCACGGGCTAAGAGTTGTTTATGAAAGT -TTACCCTTTTATAAAGCAGAGAAAATAGTAAATCTTATATCCGGTACAAAATCTATAACTAACATACTGG -AAAAGACTTCTGCCATAGACTTAACAGATATTGATAGAGCCACTGAGATGATGAGGAAAAACATAACTTT -GCTTATAAGGATATTACCATTAGATTGTAACAGAGATAAAAGAGAAATATTGAGTATGGAAAACCTAAGT -ATTACTGAATTAAGCAAATACGTTAGAGAAAGATCTTGGTCTTTATCCAATATAGTTGGTGTTACATCAC -CCAGTATCATGTATACAATGGACATAAAATATACAACAAGCACTATAGCTAGTGGCATAATCATAGAGAA -ATATAATGTCAACAGTTTAACACGTGGTGAGAGAGGACCCACTAAACCATGGGTTGGTTCATCTACACAA -GAGAAAAAGACAATGCCAGTTTATAATAGACAAGTTTTAACCAAAAAACAGAGAGATCAAATAGATCTAT -TAGCAAAATTGGATTGGGTGTATGCATCTATAGATAACAAGGATGAATTTATGGAGGAACTTAGCATAGG -AACTCTTGGGTTAACATATGAGAAGGCCAAAAAATTATTCCCACAATATTTAAGTGTTAACTATTTGCAT -CGTCTTACAGTCAGTAGTAGACCATGTGAATTCCCTGCATCTATACCAGCTTATAGAACTACAAATTATC -ACTTTGATACTAGCCCTATTAATCGCATATTAACAGAGAAGTATGGTGATGAAGATATTGATATAGTATT -CCAAAACTGTATAAGCTTTGGCCTTAGCTTAATGTCTGTAGTAGAACAATTTACTAATGTATGTCCTAAC -AGAATTATTCTCATACCCAAGCTTAATGAGATACATTTGATGAAACCTCCCATATTCACAGGTGATGTTG -ATATTCACAAGTTAAAACAAGTGATACAAAAACAACATATGTTTTTACCAGACAAAATAAGTTTGACTCA -ATATGTGGAATTATTCTTAAGTAATAAAACACTCAAATCTGGATCTAATGTTAATTCTAATTTAATATTG -GCGCATAAGATATCTGACTATTTTCATAATACTTACATTTTAAGTACTAATTTAGCTGGACATTGGATTC -TTATTATACAACTTATGAAAGATTCTAAGGGTATTTTTGAAAAAGATTGGGGAGAGGGATATATAACTGA -TCATATGTTCATTAATTTGAAAGTTTTCTTCAATGCTTATAAGACATATCTCTTGTGTTTCCATAAAGGT -TACGGCAGAGCAAAGCTGGAGTGTGATATGAATACTTCAGATCTCCTATGTGTATTGGAATTAATAGACA -GTAGTTATTGGAAGTCTATGTCTAAGGTGTTTTTAGAACAAAAAGTTATCAAATACATTCTTAGTCAGGA -TGCAAGTTTACATAGAGTAAAAGGATGTCATAGCTTCAAACTATGGTTTCTTAAACGTCTTAATGTAGCA -GAATTCACAGTTTGCCCTTGGGTTGTTAACATAGATTATCATCCAACACATATGAAAGCAATATTAACTT -ATATTGATCTTGTTAGAATGGGATTGATAAATATAGATAGAATATACATTAAAAATAAACACAAGTTCAA -TGATGAGTTTTATACTTCTAATCTGTTTTACATTAATTATAACTTCTCAGATAATACTCATCTATTAACT -AAACATATAAGGATTGCTAATTCCGAATTAGAAAGTAATTACAACAAATTATATCATCCCACACCAGAAA -CCCTAGAAAATATACTAACCAATCCGGTTAAAAGTAATGAAAAAAAGACACTGAGTGACTATTGTATAGG -TAAAAATGTTGACTCAATAATGTTACCATCGTTATCTAATAAGAAGCTTATTAAATCGTCTACAATGATT -AGAACCAATTACAGCAGACAAGATTTGTATAATTTATTTCCTACGGTTGTGATTGATAAAATTATAGATC -ATTCAGGTAATACAGCCAAATCTAACCAACTTTACACTACTACTTCTCATCAGATATCCTTAGTGCACAA -TAGCACATCACTTTATTGCATGCTTCCTTGGCATCATATTAATAGATTCAATTTTGTATTTAGTTCTACA -GGTTGTAAAATTAGTATAGAGTATATTTTAAAAGATCTTAAAATTAAGGATCCTAATTGTATAGCATTCA -TAGGTGAAGGAGCAGGGAATTTATTATTGCGTACAGTAGTGGAACTTCATCCTGATATAAGATATATTTA -CAGAAGTCTGAAAGATTGCAATGATCATAGTTTACCAATTGAGTTTTTAAGGCTGTACAATGGACATATC -AACATTGATTATGGTGAAAATTTGACCATTCCTGCTACAGATGCAACCAACAACATTCATTGGTCTTATT -TACATATAAAGTTTGCTGAACCTATCAGTCTTTTTGTCTGTGATGCTGAATTGCCTGTAACAGTCAACTG -GAGTAAGATTATAATAGAGTGGAGCAAGCATGTAAGAAAATGCAAGTACTGTTCTTCAGTTAATAAATGT -ACATTAATAGTAAAATATCATGCTCAAGATGATATCGATTTCAAATTAGACAACATAACTATATTAAAAA -CTTATGTATGCTTAGGCAGTAAGTTAAAGGGATCTGAAGTTTACTTAGTCCTTACAATAGGTCCTGCAAA -TGTGTTCCCAGTATTTAATGTAGTACAAAATGCTAAATTGATACTATCAAGAACCAAAAATTTCATCATG -CCTAAAAAAGCTGATAAAGAGTCTATTGATGCAAATATTAAGAGTTTGATACCCTTTCTTTGTTACCCTA -TAACAAAAAAAGGAATTAATACTGCATTGTCTAAATTAAAGAGTGTTGTTAGTGGAGATATACTATCATA -TTCTATAGCTGGACGTAATGAAGTTTTCAGCAATAAACTTATAAATCATAAGCATATGAACATCTTAAAG -TGGTTCAATCATGTTTTAAATTTCAGATCAACAGAATTAAACTATAATCATTTATATATGGTAGAATCTA -CTTATCCTCATCTAAGTGAATTGTTAAACAGCTTGACAACCAATGAACTTAAAAAACTGATTAAAATCAC -AGGTAGTTTGTTATACAACTTTTATAATGAATAATGAGCAAAAATCTTATAACAAAAATAGCTACACACT -AACATTATATTCAATTATAGTTATTTAAAATTAATAATTATATAATTTTTAATAACTTCTAGTGAACTAA -TCCTAAAATTATCATTTTGATCTAGGAAGAATAAGTTTAAATCCAAATCTAATTGGTTTATATGTATATT -AACTAAATTACGAGATATTAGTTTTTGACACTTTTTTTCTCGTAATTTAGTTAATATACATATAAACCAA -TTAGATTTGGATTTAAACTTATTCTTCCTAGATCAAAATGATAATTTTGATCTAGGAGGAAT - diff --git a/ingest/config/a_3_reference.fasta b/ingest/config/a_3_reference.fasta deleted file mode 100644 index dafe892..0000000 --- a/ingest/config/a_3_reference.fasta +++ /dev/null @@ -1,219 +0,0 @@ ->KJ627695.1 Respiratory syncytial virus type A isolate RSV-A/US/BID-V8469/2001, complete genome -CAAATAAGAATTTGATAAGTACCACTTAAATTCAACTCCTTTGGTTAGAGATGGGCAGCAATTCATTGAG -TATGATAAAAGTTAGATTACAAAATTTATTTGACAATGATGAAGTAGCATTGTTAAAAATAACCTGCTAT -ACTGACAAATTGATACATTTAACTAATGCTTTGGCTAAGGCAGTGATACATACAATCAAATTGAATGGCA -TTGTATTTGTGCATGTTATTACAAGTAGTGATATTTGCCCTAATAATAATATTGTAGTGAAATCCAACTT -CACAACAATGCCAGTGTTACAAAATGGAGGTTATATATGGGAAATGATGGAATTAACACACTGCTCTCAA -CCCAATGGCCTAATAGATGACAATTGTGAAATCAAATTCTCCAAAAAACTAAGCGATTCAACAATGACCA -ACTATATGAATCAATTATCTGAATTACTTGGATTTGATCTTAATCCATAAATTATAATAAATATCAACTA -GCAAATCAGTGTCACTAACACCATTAGTTAATATAAAACTTGACAGAAGATAAAAATGGGGCAAATAAAT -AAACTCAGCCGACCCAACCATGGACACAACACACAATGATACCACACCACAAAGACTGATGATCACAGAC -ATGAGACCATTGTCACTTGAGACTATAATAACATCACTAACCAGAGACATCATAACACACAGATTTATAT -ACTTGATAAATCATGAATGTATAGTGAGAAAACTTGATGAAAGACAGGCCACATTTACATTCCTGGTCAA -CTATGAAATGAAACTATTGCACAAAGTGGGAAGCACTAAATACAAAAAATACACTGAATACAACACAAAA -TATGGCACTTTCCCTATGCCAATATTCATCAATCATGATGGGTTCTTAGAATGCATTGGCATTAAGCCTA -CAAAGCACACTCCCATAATATACAAGTATGATCTCAATCCATGAATTTCAACATAAGATTCACACTATCT -GAAATAACAACTTCATGCATAACTACACTCCATAGTCCAAATGGAGCCTGAAAATTATAGTAATTTAAAA -TTAAGGAGAGACATAAGATGAAAGATGGGGCAAATACAAAGATGGCTCTTAGCAAAGTCAAGTTGAACGA -TACACTCAACAAAGATCAACTTCTGTCATCCAGCAAATACACCATCCAACGGAGCACAGGAGACAGCATT -GATACTCCTAATTATGATGTGCAGAAACACATCAATAAGTTATGTGGCATGTTATTAATCACAGAAGATG -CCAATCATAAATTCACTGGGGTAATAGGCATGCTATATGCTATGTCTAGATTAGGAAGAGAAGACACCAT -AAAAATACTCAGAGATGCAGGATATCATGTAAAAGCAAATGGAGTGGATGTAACAACACATCGTCAAGAC -ATTAATGGGAAAGAAATGAAATTTGAAGTTTTAACATTGGCAAGCTTAACAACTGAAATTCAAATCAACA -TTGAGATAGAATCTAGAAAATCCTACAAAAAAATGCTAAAAGAAATGGGAGAGGTAGCTCCAGAATACAG -GCATGACTCTCCTGATTGTGGGATGATAATATTATGTATAGCAGCATTAGTAATAACTAAATTAGCAGCA -GGGGATAGATCTGGTCTTACAGCTGTAATTAGGAGAGCTAATAATGTTCTAAAAAATGAAATGAAACGTT -ATAAAGGCTTACTACCAAAGGATATAGCCAACAGCTTCTATGAAGTGTTTGAAAAATATCCTCACTTTAT -AGATGTTTTTGTTCATTTTGGTATAGCACAATCTTCTACCAGAGGTGGCAGTAGAGTTGAAGGGATTTTT -GCAGGATTGTTTATGAATGCCTATGGTGCAGGACAAGTGATGTTACGGTGGGGGGTCTTAGCAAAATCAG -TTAAAAATATTATGCTAGGACACGCTAGTGTGCAAGCAGAAATGGAACAAGTTGTGGAGGTTTATGAATA -TGCCCAAAAATTGGGTGGAGAAGCAGGATTCTACCATATATTGAACAACCCAAAAGCATCATTATTATCT -TTGACTCAATTCCCCCACTTCTCCAGTGTAGTATTAGGCAATGCTGCTGGCCTAGGCATAATGGGAGAAT -ACAGAGGTACACCAAGGAATCAAGATCTATATGATGCTGCAAAGGCATATGCTGAACAACTCAAAGAAAA -TGGTGTGATTAACTACAGTGTATTAGACTTGACAGCAGAAGAACTAGAGGCTATCAAACATCAGCTTAAT -CCAAAAGATAATGATGTAGAGCTTTGAGTTAATAAAAAAGTGGGGCAAATAAATCATCATGGAAAAGTTT -GCTCCTGAATTCCATGGAGAAGACGCAAACAACAGAGCTACTAAATTCCTAGAATCAATAAAGGGCAAAT -TCACATCACCTAAAGATCCCAAGAAAAAAGATAGTATCATATCTGTCAACTCAATAGATATAGAAGTAAC -CAAAGAAAGCCCTATAACATCAAATTCAACCATTATAAACCCTACAAATGAGACAGATGATACTGTAGGG -AACAAGCCCAATTATCAAAGGAAACCTCTAGTAAGTTTCAAAGAAGACTCTACGCCGAGTGATAATCCCT -TTTCAAAACTATACAAAGAAACCATAGAAACATTTGATAACAATGAAGAAGAATCTAGCTATTCATATGA -AGAAATAAATGATCAGACAAACGATAATATAACAGCAAGATTAGATAGGATTGATGAAAAATTAAGTGAA -ATACTAGGAATGCTTCACACACTAGTAGTAGCGAGTGCAGGACCTACATCTGCTCGGGATGGTATAAGAG -ATGCCATGGTTGGTTTAAGAGAAGAAATGATAGAAAAAATCAGAACTGAAGCATTAATGACTAATGATAG -ATTAGAAGCTATGGCAAGACTCAGGAATGAGGAAAGTGAAAAGATGGCAAAAGATACATCAGATGAAGTG -TCTCTCAATCCAACATCAGAGAAATTGAACAACCTGTTGGAGGGGAATGATAGTGACAATGATCTATCAC -TTGAAGATTTCTGATCAGTTACCAGTCTGCACATCAACACACAACACCAACAGAAGACCAACAAACAAAA -CAACTCACCTATCCAACCAAACATCTATCTGCCAATCAGCCAACCAGCCAAAAAAACACCCAGCCAATCC -AAAACCAGTCACCGGAAAAAATCGATACTATAGTTACAAAAAAAGATGGGGCAAATATGGAAACATACGT -GAACAAACTTCACGAAGGCTCCACATACACAGCTGCTGTTCAATACAATGTCCTAGAAAAAGACGATGAC -CCTGCATCACTTACAATATGGGTGCCCATGTTCCAATCATCCATGCCAGCAGATTCACTTATAAAAGAAC -TAGCTAATGTCAACATACTAGTGAAACAAATATCCACACCCAAAGGACCTTCATTAAGAGTCATGATAAA -CTCAAGAAGTGCAGTGCTAGCACAAATGCCCAGCAAATTCACTATATGTGCCAATGTGTCCTTGGATGAA -AGAAGCAAGCTGGCATATGATGTAACCACACCCTGCGAAATCAAGGCATGTAGTCTAACATGCCTAAAAT -CAAAAAATATGTTAACTACAGTTAAAGATCTCACTATGAAAACACTCAACCCAACACATGACATCATTGC -TTTATGTGAATTTGAAAATATAGTAACATCAAAAAAAGTCACAATACCAACATACCTAAGATCCATCAGT -GTCAGAAATAAAGATCTGAACACACTTGAAAATATAACAACCACCGAATTCAAAAATGCCATCACAAATG -CAAAAATCATCCCTTACTCAGGATTACTGTTAGTCATCACAGTGACTGACAACAAAGGAGCATTCAAATA -CATAAAGCCACAAAGTCAATTCATAGTAGATCTTGGAGCTTACCTAGAAAAAGAAAGTATATATTATGTT -ACAACGAATTGGAAGCACACAGCTACACGATTTGCAATCAAACCCATGGAAGATTAACCTTTTTCCTCTA -CATTAGCTAGCTGATCCATACACACCCTCTACCTACCTTCTCCACCTCACAGACACAATCACCAACCCTC -TGTGGTTCAACCAATCAAACAAAACTCATCTGGAGTCTCAGATCATCCCAAGTCATTGTTCATCAGATCT -AGTACTCAAATAAGTTAATAAAAATACCCACATGGGGCAAATAATCATCGGAGGAAATCCAACTAATCAC -AATATCTGTCAACATAGACAAGTCAACACGCCAAACAAAATAAACCAATGGAAAATACATCCATAACAAT -AGAATTCTCAAGCAAATTCTGGCCTTACTTTACATTAATACACATGATCACAACAATAATCTCTTTGCTA -ATCATAATCTCCATTATGATTGCAATACTAAACAAACTCTGTGAATATAACGTAGTCCATAACAAAACCT -TTGAGCTACCAAGAGCTCGAGTCAATACATAGCATTCCCCAATCTGATGGCTCAAAACAGTAACCTTGCA -TTTGTAAGTGAACAACCTTCACCTTTTTACAAAACCACATCAACATCTCACCAGGCAAGCCATCATCCAT -ACTATAAAGTAGTTAATTAAAAATAGTCATAGCAATGAACTAAGATATTAAGACTAACAACAACATTGGG -GCAAATGCAAACATGTCCAAAACCAAGGACCAACGCGCCGCCAAGTCACTAGAAAAGACCTGGGACACTC -TCAATCATCTATTATTCATATCATCGTGCTTATACAAGTTAAATCTTAAATCTATAGCACAAATCACATT -ATCCATTCTGGCAATGATAATCTCAACTTCACTTATAATTGCAGCCATCATATTCATAGCCTCAGCAAAC -AACAAAGTCACACTAACAACTGCAATCATACAAGATGCAACAAGCCAGATCAAGAACACAACTCCAACAT -ACCTGACCCAGAATCCTCAGCTTGGAATCAGCTTCTTTAATCTGTCTGGAACTACAACACAAACCACCGC -CATACTAGCTTTAACAACACCAAGTGTCGAGTCAATCCTGCAATCTACAACAGTCAAGACCAAAAACACA -ACAACAACCCAAATACAACCCAGCAAGCCCACCACAAAACAACGCCAAAACAAACCACCAAACAAACCCA -ATAATGATTTTCACTTTGAAGTGTTCAACTTTGTACCCTGCAGCATATGCAGCAACAATCCAACTTGCTG -GGCCATCTGCAAAAGAATACCAAGCAAAAAACCTGGAAAGAAAACCACCACCAAGCCCACAAAAAAACCA -ACCATCAAGACAACCAAAAAAGATCTCAAACCTCAAACCACAAAACCAAAGGAAACACCTACCACCAAGC -CCACAGAAAAGCCAACCATCAACATCACCAAATCAAACATCAGAACTACACTGCTCACCGACAGTACCAC -AGGAGATCTAGAACACACAAGTCAAGAGGAAATCCTCCATTCAACCTCCTCCGAAGGCAATACAAGCCCT -TCACAAGTCTATACAACATCCGAATACCTATCACAACCTCCATCTCCATCCAACATAACAAACCAGTAGT -CATTAAAAAGCGTATTATTGCAAAGAAACATGACTAAATCAAACAAAATAAAAATAAGCTTTGGGGCAAA -TAACAATGGAGTTGCCAATCCTCAAAACAAATGCAATTACCACAATCCTTGCTGCAGTCACACTCTGTTT -CGCTTCCAGTCAAAACATCACTGAAGAATTTTATCAATCAACATGCAGTGCAGTTAGCAAAGGCTATCTT -AGTGCTTTAAGAACTGGTTGGTATACTAGTGTTATAACTATAGAATTAAGTAATATCAAGGAAAATAAGT -GTAATGGAACAGACGCTAAGGTAAAATTGATAAAACAAGAATTAGATAAATATAAAAATGCTGTAACAGA -ATTGCAGTTGCTCATGCAAAGCACACCAGCAGCCAACAATCGAGCCAGAAGAGAACTACCAAGGTTTATG -AATTATACACTCAACAATACCAAAAATACCAATGTAACATTAAGCAAGAAAAGGAAAAGAAGATTTCTTG -GCTTTTTGTTAGGTGTTGGATCTGCAATCGCCAGTGGTATTGCTGTGTCTAAAGTCCTGCACCTAGAAGG -GGAAGTGAACAAACTCAAAAGTGCTTTACTATCCACAAACAAGGCTGTAGTCAGCTTATCAAATGGAGTT -AGTGTCTTAACCAGCAAAGTGTTAGACCTCAAAAACTATATAGATAAACAGTTGTTACCTATTGTGAACA -AGCAAAGCTGCAGCATATCAAACATTGAAACTGTGATAGAATTCCAACAAAAGAACAACAGACTACTAGA -GATTACCAGGGAATTTAGTGTCAATGCAGGTGTAACTACACCTGTAAGCACTTATATGTTAACAAATAGT -GAATTATTATCATTAATCAATGATATGCCTATAACAAATGATCAGAAAAAGTTAATGTCCAACAATGTTC -AAATAGTTAGGCAGCAAAGTTACTCTATCATGTCCATAATTAAGGAGGAAGTCTTAGCATATGTAGTACA -ATTACCACTATATGGTGTAATAGATACACCTTGTTGGAAACTACACACATCCCCTCTATGTACAACCAAC -ACAAAGGAAGGGTCCAACATCTGTTTAACAAGAACCGACAGAGGATGGTACTGTGACAATGCAGGATCAG -TGTCTTTCTTCCCACAAGCTGAAACGTGTAAAGTTCAATCGAATCGAGTATTTTGTGACACAATGAACAG -TTTAACATTACCAAGTGAAGTAAATCTCTGCAACATTGACATATTCAACCCCAAATATGATTGCAAAATT -ATGACTTCAAAAACAGATGTAAGCAGCTCCGTTATCACATCTCTAGGAGCCATTGTATCATGCTATGGCA -AAACTAAATGTACAGCATCCAATAAAAATCGTGGAATCATAAAGACATTTTCTAACGGGTGTGATTATGT -ATCAAATAAGGGGGTGGACACTGTATCTGTAGGTAATACATTATATTATGTAAATAAGCAAGAAGGAAAA -AGTCTCTATGTAAAAGGTGAACCAATAATAAATTTCTATGACCCATTAGTGTTCCCTTCTGATGAATTTG -ATGCATCAATATCTCAAGTCAATGAGAAGATTAACCAGAGCCTAGCATTTATTCGTAAATCCGATGAATT -ATTACATAATGTAAATGTTGGTAAATCCACCACAAATATCATGATAACTACTATAATTATAGTGATTATA -GTAATATTGTTATTATTAATTGCAGTTGGACTGTTCCTATACTGCAAGGCCAGAAGCACACCAGTCACAC -TAAGCAAGGATCAACTGAGTGGTATAAATAATATTGCATTTAGTAACTGAACAAAAATAGTACCCAATCA -TGTTCTTACAATGGTTCACTATCTGACCATAGACAACCCATCTATCATTGGATTTTCTTAAAGTCTGAAC -TTCATCGCAACTCTCATCTATAAACCATCTCACTTACACTAATTAAGTAGATTCCCATTTTATAGTTATA -TAAAAACCTACTGAGCACCAGATTAACTCACTATTTGTAAAAATTAGAAATGGGGCAAATATGTCACGAA -GGAATCCTTGCAAATTTGAAATTCGAGGTCATTGCTTGAATGGTAAGAGGTGTCATTTTAGTCATAATTA -TTTTGAATGGCCACCCCATGCACTGCTTGTAAGACAAAACTTTATGTTAAACAGAATACTTAAGTCTATG -GATAAAAGCATAGATACTTTATCAGAAATAAGTGGAGCTGCAGAGTTGGACAGAACTGAAGAGTACGCCC -TTGGTGTAGTTGGAGTGCTAGAGAGTTATATAGGATCAATAAATAATATAACTAAACAATCAGCATGTGT -TGCCATGAGCAAACTCCTCACTGAACTCAACAGTGATGACATCAAAAAACTAAGGGACAATGAAGAGCCA -AATTCACCCAAGATAAGAGTGTACAACACTGTCACATCATATATTGAAAGCAACAGGAAAAACAATAAAC -AAACTATCCATCTGTTAAAAAGATTGCCAGCAGACGTATTGAAGAAAACCATCAAAAACACATTGGATAT -CCACAAGAGCATAACCATCAATAACCCAAAAGAATCAACTGTTAATGATACAAATGACCATGCCAAAAAT -AATGATACTACCTGACAAATATCCTTGTAGTATAAATTCCATACTAATAACAAGTAGTTGTAGAGTTACT -ATGTATAATCAAAAGAACACACTATATTTAAATCAAAACAACCAAAATAACCATATATACTCACCAAATC -AACCATTCAATGAAATCCATTGGACCTCTCAAGACTTGATTGATGCAATTCAAAATTTTCTACAACATCT -AGGTATTACTGATGATATATACACAATATATATATTAGTGTCATAACACTCAATACCAATACTTACCACA -TCATCAAACTATTAACTCAAACAATTCATACCATGGGACAAAATGGATCCCATTATTAATGGAAATTCTG -CTAATGTTTATCTAACCGATAGTTATTTAAAAGGTGTTATTTCTTTCTCAGAATGTAATGCTTTGGGAAG -TTACATATTCAATGGTCCTTATCTCAAAAATGATTACACTAACTTAATTAGTAGACAAAATCCATTAATA -GAACACATAAATCTAAAGAAATTAAATATAACACAGTCCTTAATATCTAAGTATCATAAAGGTGAAATAA -AAATAGAAGAACCTACTTATTTTCAGTCATTACTTATGACATACAAGAGTATGACCTCGTCAGAACAGAT -TACTACCACTAATTTACTTAAAAAGATAATAAGAAGAGCTATAGAAATTAGTGATGTCAAAGTCTATGCT -ATATTGAATAAACTGGGGCTTAAAGAAAAAGACAAGATTAAATCCAACAATGGACAAGATGAAGACAACT -CAGTTATTACAACCATAATCAAAGATGATATTCTTTTAGCTGTTAAGGATAATCAATCTCATCTTAAAGC -AGGCAAAAATCACTCTACAAAACAAAAAGATACTATCAAAACAACACTCTTGAAAAAATTAATGTGTTCG -ATGCAACATCCTCCATCATGGTTAATACATTGGTTTAATTTATACACAAAATTAAACAACATATTAACAC -AGTATCGATCAAATGAGGTAAAAAACCATGGTTTTATATTGATAGATAATCATACTCTCAATGGTTTCCA -ATTTATTTTGAATCAATATGGTTGTATAGTTTATCATAAGGAACTCAAAAGAATTACTGTGACAACCTAT -AATCAATTCTTGACATGGAAAGATATTAGCCTTAGTAGATTAAATGTTTGTTTAATTACATGGATTAGTA -ACTGTTTGAACACATTAAACAAAAGCTTAGGCTTAAGATGTGGATTCAATAATGTTATCTTGACACAACT -ATTCCTTTATGGAGATTGTATATTAAAACTATTCCACAATGAAGGGTTCTACATAATAAAAGAGGTAGAG -GGATTTATTATGTCTCTAATTTTAAATATAACAGAAGAAGATCAATTCAGAAAACGGTTTTATAATAGTA -TGCTCAACAACATCACAGATGCTGCTAATAAAGCTCAGAAAAATCTGCTATCAAGAGTATGTCATACATT -ATTAGATAAGACAGTATCCGATAATATAATAAATGGCAGATGGATAATTCTATTAAGTAAGTTCCTTAAA -TTAATTAAGCTTGCAGGTGACAATAACCTTAACAATCTGAGTGAATTATATTTTTTATTCAGAATATTTG -GACACCCAATGGTAGATGAAAGACAAGCCATGGATGCTGTTAAAGTTAATTGCAATGAGACCAAATTTTA -CTTGTTAAGCAGTTTGAGTATGTTAAGAGGTGCATTTATATATAGAATTATAAAAGGGTTTGTAAATAAT -TACAACAGATGGCCTACTTTAAGGAATGCTATTGTTTTACCCTTAAGATGGTTAACTTACTACAAACTAA -ACACTTATCCCTCCTTGTTGGAACTTACAGAAAGAGATTTGATTGTTTTATCAGGACTACGTTTCTATCG -TGAGTTTCGGTTGCCTAAAAAAGTGGATCTTGAAATGATCATAAATGATAAGGCTATATCACCTCCTAAA -AATTTGATATGGACTAGTTTCCCTAGAAATTATATGCCGTCACACATACAAAATTATATAGAACATGAAA -AATTAAAATTTTCCGAGAGTGATAAATCAAGAAGAGTATTAGAGTACTATTTAAGAGATAACAAATTTAA -TGAATGTGATTTATATAACTGTGTAGTTAATCAAAGCTATCTTAACAACCCTAATCATGTGGTATCATTG -ACTGGCAAAGAAAGAGAACTCAGTGTAGGTAGAATGTTTGCAATGCAACCAGGAATGTTCAGACAAGTTC -AAATATTAGCAGAGAAAATGATAGCTGAAAACATTTTACAATTCTTTCCTGAAAGTCTTACAAGATATGG -TGATCTAGAATTACAGAAGATATTAGAATTGAAAGCGGGAATAAGTAACAAATCAAATCGTTACAATGAC -AATTACAACAATTACATCAGTAAGTGCTCTATCATCACAGATCTCAGCAAATTCAATCAAGCATTTCGGT -ATGAAACATCATGTATTTGTAGTGATGTACTAGATGAACTGCATGGTGTACAATCTCTATTTTCTTGGTT -ACATTTAACTATTCCTCATGTCACAATAATATGCACATATAGGCATGCACCCCCTTATATAAGAGATCAT -ATTGTGGATCTTAACAATGTAGATGAACAAAGTGGATTATATAGATATCATATGGGTGGTATCGAAGGGT -GGTGTCAAAAACTATGGACCATAGAAGCTATATCACTATTGGATCTAATATCTCTCAAAGGGAAATTCTC -TATTACTGCCTTAATTAATGGTGACAATCAATCAATAGATATAAGCAAACCAGTCAGACTCATGGAAGGT -CAAACTCATGCTCAAGCAGATTATTTGCTAGCATTAAATAGTCTTAAATTGCTGTATAAAGAGTATGCAG -GTATAGGTCACAAATTAAAAGGAACTGAGACTTATATATCAAGAGATATGCAATTTATGAGTAAAACAAT -TCAACATAACGGTGTATATTACCCAGCTAGTATAAAGAAAGTCCTAAGAGTGGGACCGTGGATAAACACT -ATACTTGATGATTTCAAAGTGAGTCTAGAATCTATAGGTAGTTTGACACAAGAATTAGAATATAGAGGTG -AAAGTCTATTGTGCAGTTTAATATTTAGAAATGTGTGGTTATATAATCAAATTGCTTTACAACTAAAAAA -TCATGCATTATGTAACAATAAATTATATTTGGACATATTAAAGGTTCTGAAACACTTAAAAACCTTTTTT -AATCTTGATAATATTGATACAGCATTAACATTGTACATGAATTTGCCCATGTTATTTGGTGGTGGTGATC -CCAACTTGTTATATCGAAGTTTCTATAGAAGAACTCCTGATTTCCTCACAGAGGCTATAGTTCACTCTGT -ATTCATACTTAGTTATTATACAAACCATGATTTAAAAGATAAACTTCAAGATCTGTCAGACGATAGATTG -AATAAGTTCTTAACATGTATAATCACGTTTGACAAAAACCCTAATGCTGAATTCGTAACATTGATGAGAG -ATCCTCAAGCTTTAGGGTCTGAGAGACAAGCTAAAATTACTAGCGAAATCAATAGACTGGCAGTTACTGA -GGTTTTGAGCACAGCTCCAAACAAAATATTTTCCAAAAGTGCACAACACTATACCACTACAGAGATAGAT -ATAAATGATATTATGCAAAATATAGAACCTACATATCCTCATGGGCTAAGAGTTGTTTATGAAAGTTTAC -CCTTTTATAAAGCAGAGAAAATAGTAAATCTTATATCCGGTACAAAATCTATAACTAACATACTGGAAAA -GACTTCTGCCATAGACTTAACAGATATTGATAGAGCCACTGATATGATGAGGAAAAACATAACTTTGCTT -ATAAGGATATTCCCATTAGATTGTAACAGAGATAAAAGAGAAATATTGAGTATGGAAAACCTAAGTATTA -CTGAATTAAGCAAATATGTTAGAGAAAGATCTTGGTCTTTGTCCAATATAGTTGGTGTTACATCACCTAG -TATCATGTATACAATGGACATCAAATATACAACAAGCACTATAGCTAGTGGCATAATCATAGAGAAATAT -AATGTCAACAGTTTAACACGTGGTGAGAGAGGACCCACTAAACCATGGGTTGGTTCATCTACACAAGAGA -AAAAAACAATGCCAGTTTATAATAGACAAGTTTTAACCAAAAAACAGAGAGATCAAATTGATCTATTAGC -AAAATTGGATTGGGTGTATGCATCTATAGATAACAAGGATGAATTCATGGAAGAACTTAGCATAGGAACT -CTTGGGTTAACATATGAGAAAGCCAAAAAATTATTTCCACAATATTTAAGTGTTAACTATTTGCACCGCC -TTACAGTCAGTAGTAGACCATGTGAATTCCCTGCATCAATACCAGCTTATAGAACTACAAATTATCACTT -TGATACTAGCCCTATTAATCGTATATTAACAGAAAAGTATGGTGATGAAGATATTGATATAGTATTCCAA -AACTGTATAAGCTTTGGCCTTAGCTTAATGTCAGTAGTAGAGCAATTTACCAATGTGTGTCCTAACAGAA -TTATTCTCATACCCAAGCTTAATGAGATACACTTGATGAAACCTCCCATATTCACAGGTGATGTTGATAT -TCACAAGTTAAAACAAGTGATCCAAAAACAGCATATGTTTTTACCAGACAAAATAAGTTTGACTCAATAT -GTGGAATTATTCTTAAGTAATAAAACACTCAAATCTGGGTCTCATGTTAATTCTAATTTAATATTGGCAC -ATAAGATATCTGACTATTTTCATAATACTTACATTTTAAGTACTAATTTAGCTGGACATTGGATTCTGAT -TATACAACTTATGAAAGATTCTAAAGGTATTTTTGAAAAAGATTGGGGAGAGGGATATATAACTGATCAT -ATGTTCATTAATTTGAAAGTTTTCTTCAATGCTTATAAGACCTATCTCTTGTGTTTTCATAAAGGTTACG -GCAGAGCAAAGCTGGAGTGTGATATGAATACTTCAGATCTCCTATGTGTATTGGAATTAATAGATAGTAG -TTATTGGAAGTCTATGTCTAAGGTATTTTTAGAACAAAAAGTTATCAAATACATTCTCAGCCAGGATGCA -AGTCTACACAGAGTCAAAGGATGTCATAGCTTCAAACTATGGTTTCTTAAACGTCTTAATGTAGCAGAAT -TCACAGTTTGCCCTTGGGTTGTTAACATAGATTATCATCCAACACATATGAAAGCAATATTAACTTATAT -AGATCTTGTTAGAATGGGATTGATAAATATAGATAGAATATACATTAAAAATAAACACAAATTCAATGAT -GAATTTTATACATCTAATCTCTTTTACATTAATTATAACTTCTCAGATAATACTCATCTATTGACTAAAC -ATATAAGGATTGCTAATTCTGAATTAGAAAATAATTACAACAAATTATATCATCCTACACCTGAAACTCT -AGAAAATATACTAACCAATCCGGTTAAATGTGATGAAAAAAAGACACTGAATGACTATTGTGTAGGTAAA -AATGTTGATTCAATAATGTTACCATTGTTATCTAATAAGAAGCTTATTAAATCGTCTACAATGATTAGAA -CCAATTACAGCAAACAAGATTTGTATAATTTATTTCCTACGGTTGTGATTGATAAAATTATAGATCATTC -GGGTAATACAGCCAAATCTAACCAACTTTACACTACTACTTCTCATCAAGTATCTTTAGTACACAATAGC -ACATCACTTTATTGCATGCTTCCTTGGCATCATATTAATAGATTCAATTTTGTGTTTAGTTCTACAGGTT -GTAAAATTAGTATAGAGTATATTTTAAAAGACCTTAAAATTAAAGATCCTAATTGTATAGCATTCATAGG -TGAAGGAGCAGGGAATTTATTGTTGCGTACAGTAGTGGAACTTCATCCTGATATAAGATATATTTACAGA -AGTCTGAAAGATTGCAATGATCATAGTTTACCTATTGAGTTTTTAAGGCTGTACAATGGACATATCAACA -TTGATTATGGAGAAAATTTGACCATTCCCGCTACAGATGCAACCAACAACATTCATTGGTCTTATTTGCA -TATAAAGTTTGCTGAACCTATCAGTCTTTTTGTTTGTGATGCTGAATTGCCTGTAACAGTCAACTGGAGT -AAAATTATAATAGAGTGGAGCAAGCATGTAAGAAAATGCAAGTACTGTTCCTCAGTTAATAAATGTACGT -TAATAGTAAAATATCATGCTCAAGATGATATCGATTTCAAATTAGACAACATAACTATATTAAAAACTTA -TGTATGCTTAGGCAGTAAGTTAAAGGGGTCTGAAGTTTACTTAGTCCTTACAATAGGTCCTGCAAATGTG -TTCCCAGTATTTAATGTAGTACAAAATGCTAAATTGATACTATCAAGAACCAAAAATTTCATCATGCCTA -AGAAGGCTGATAAAGAGTCTATTGATGCTAATATTAAAAGTTTGATACCCTTTCTTTGTTACCCTATAAC -AAAAAAAGGAATTAATACTGCATTGTCAAAACTAAAGAGTGTTGTTAGTGGAGATATACTATCATATTCT -ATAGCTGGACGTAATGAAGTTTTCAGCAATAAACTTATAAATCATAAGCATATGAACATCTTAAAGTGGT -TCAATCATGTTTTAAATTTCAGATCAACAGAACTTAACTATAATCATTTATATATGGTAGAATCTACATA -TCCTTATCTAAGTGAATTGTTAAATAGCTTGACAACTAATGAACTTAAAAAACTGATTAAAATCACAGGT -AGTTTGTTATACAACTTTCATAATGAATAATGAATAAAAATCTTATATTAAAAATTCCCACAGCTACACA -CTAACACTGTATTCAATTATAGTTATTTAAAATTAAAAATTATAAATTATATAATTTTTAATAACTTTTA -GTGGATTAATCCTAAAAATATCATTTTGATCTAGGAGGAATAAATTTAAATCCAAATCTAATTGGTTTAT -ATGTATGTTAACTAAACTACGAGATATTAGTTTTTGACAC - diff --git a/ingest/config/b_1_reference.fasta b/ingest/config/b_1_reference.fasta deleted file mode 100644 index e3a25a2..0000000 --- a/ingest/config/b_1_reference.fasta +++ /dev/null @@ -1,221 +0,0 @@ ->MT107528.1 Human respiratory syncytial virus B isolate HRSV/B/Bern/2019, complete genome -TGACGAAACGGAGTCTAGACTCCGTCACGCGAAAAAATGCGTACTACAAACTTGCACACTCGAAAAAAAT -GGGGCAAATAAGAATTTGATAAGTGCTATTTAAGTCTAACCTTTTTAATCAGAAATGGGGTGCAATTCAC -TGAGCATGATAAAGGTTAGATTACAAAATTTGTTTGATAATGACGAAGTAGCATTGTTAAAAATAACATG -TTATACTGACAAATTAATTCTTCTGACTAATGCATTAGCCAAAGCAACAATACATACAATTAAATTAAAC -GGCATAGTTTTTATACATGTTATAACAAGCAGTGAAGTGTGCCCTGATAACAATATTGTAGTGAAATCTA -ACTTTACAACAATGCCAATATTACAAAATGGAGGATACATATGGGAATTGATTGAATTGACACACTGCTC -TCAATTAAATGGTCTAATAGATGATAATTGTGAAATCAAATTTTCTAAAAGACTAAGTGACTCAGTAATG -ACTGATTATATGAATCAAATATCTGATTTACTTGGGCTTGATCTCCATTCATGAATTATGTTTAGTCTAA -TTCAATAGACATGTGTTTATTACCATTTTAGTTAATATAAAACCTCATCAAAGGGAGATGGGGCAAATAA -ACTCACCCAATCAATCAAACCATGAGCACTACAAACGACAACACCACCATGCAAAGATTGATGATCACAG -ACATGAGACCCCTGTCGATGGATTCAATAATAACATCTCTCACCAAAGAAATCATCACACACAAATTCAT -ATACTTGATAAACAATGAATGTATTGTAAGAAAACTCGATGAAAGACAAGCTACATTTACATTCCTAGTC -AATTATGAGATGAAGCTATTGCACAAAGTAGGGAGTACCAAATACAAGAAATACACCGAATATAATACAA -AATATGGCACATTCCCTATGCCTATATTTATCAATCATGGCGGGTTTCTAGAATGTATTGGCATTAAGCC -TACAAAACATACTCCTATAATATACAAATATGACCTCAACCCGTAACTTCCAACAAAAAAACCAACTCAT -CCAAACCAAGCCATTCTCCAAACAACAATGCTCAACAGTTAAGAAGGAGCTAATCCATTTTAGTAATTAA -AAATAAGGGTGAAACCAGTAACATAAATTGGGGCAAATACAAAGATGGCTCTTAGCAAAGTCAAGTTGAA -TGATACATTAAATAAGGATCAGCTGCTGTCATCCAGCAAATACACTATTCAACGTAGTACAGGAGATAAT -ATTGACACTCCCAATTATGATGTGCAAAAACACCTAAACAAACTATGTGGTATGCTATTAATCACTGAAG -ATGCAAATCATAAATTCACAGGATTAATAGGTATGCTATATGCTATGTCCAGATTAGGAAGGGAAGACAC -TATAAAGATACTTAAAGATGCTGGATATCATGTTAAAGCTAATGGAGTAGATATAACAACATATCGTCAA -GATATAAATGGAAAGGAAATGAAATTCGAAGTATTAACATTATCAAGCTTGACATCAGAAATACAAGTCA -ATATTGAGATAGAATCTAGAAAGTCCTACAAAAAAATGCTAAAAGAGATGGGAGAAGTGGCTCCAGAATA -TAGGCATGATTCTCCAGACTGTGGGATGATAATACTGTGTATAGCTGCCCTTGTAATAACCAAATTAGCA -GCAGGAGATAGATCAGGTCTTACAGCAGTAATTAGGAGGGCAAACAATGTCTTAAAAAACGAAATAAAAC -GCTACAAGGGCCTAATACCAAAAGACATAGCCAACAGTTTTTATGAAGTGTTTGAAAAATACCCTCATCT -TATAGATGTTTTTGTGCACTTTGGCATAGCACAATCATCCACAAGAGGGGGCAGTAGAGTTGAAGGAATC -TTTGCAGGATTGTTTATGAATGCCTATGGTTCAGGACAAGTAATGCTAAGATGGGGAGTTTTAGCCAAAT -CTGTAAAAAATATCATGCTAGGACATGCTAGTGTCCAAGCAGAAATGGAGCAAGTTGTGGAAGTCTATGA -GTATGCACAGAAGTTGGGAGGAGAAGCTGGTTTCTACCATATATTGAACAATCCAAAAGCATCATTGCTG -TCATTAACTCAATTTCCTAACTTCTCAAGTGTGGTCCTAGGCAATGCAGCAGGTCTAGGCATAATGGGAG -AGTATAGAGGTACACCAAGAAACCAAGATCTCTATGATGCAGCCAAAGCATATGCAGAGCAACTCAAAGA -AAATGGAGTAATAAACTACAGTGTATTAGACTTAACAACAGAAGAATTGGAAGCCATAAAGCATCAACTC -AACCCCAAAGAAGATGACGTAGAGCTTTAAGTTAACAAAAAATACGGGGCAAATAAGTCAACATGGAGAA -GTTTGCACCTGAATTTCATGGAGAAGACGCAAATAACAAAGCTACCAAATTCCTAGAATCAATAAAAGGC -AAGTTCGCATCATCCAAAGATCCTAAGAAGAAAGATAGCATAATATCTGTCAACTCAATAGACATAGAAG -TCACTAAAGAGAGCCCGATAACATCTGGCACCAACATTATCAATCCAACAAGTGAAGCCGACAGTACCCC -AGAAACTAAAGCCAACTACCCAAGAAAACCTCTAGTAAGCTTCAAAGAAGATCTCACCCCAAGTGATAAC -CCTTTCTCTAAGTTGTACAAAGAAACCATAGAAACATTTGATAACAATGAAGAAGAATCTAGCTACTCAT -ATGAGGAGATCAATGACCAAACAAATGACAACATTACAGCAAGACTAGATAGAATTGATGAAAAATTAAG -TGAAATATTAGGAATGCTCCATACATTAGTAGTTGCAAGTGCAGGACCTACTTCGGCTCGTGACGGAATA -AGAGATGCTATGGTTGGTCTAAGAGAAGAAATGATAGAAAAAATAAGAGCAGAAGCATTAATGACCAATG -ATAGGTTAGAGGCCATGGCAAGACTTAGGAATGAGGAAAGTGAAAAAATGGCAAAAGACACCTCAGATGA -AGTGTCTCTCAATCCAACCTCTAAAAAATTGAGTGACTTGTTGGAAGACAACGATAGCGACAATGATCTA -TCACTTGATGATTTTTGATCAGTGATCAACTCACTCAGCAATCAACAACATCAATAAAACAGACACCAAT -CCATTGAATCAATTGCCAGACTGAAAAAACAAACATCCATCAGCAGAACCACCAACCAATCAATCAACCA -ATTGATCAATCAGCACCCTGACAAAATTAACAATATAGTAACAAAAAAAGAACAAGATGGGGCAAATATG -GAAACATACGTGAACAAGCTTCACGAAGGCTCCACATACACAGCAGCTGTCCAGTACAATGTTCTAGAAA -AAGATGATGATCCCGCATCACTAACAATATGGGTGCCTATGTTCCAGTCATCTGTGCCAGCAGACTTGCT -CATAAAAGAACTTGCAAGCATCAACATACTAGTAAAGCAGATCTCTACGCCCAAAGGACCTTCACTACGA -GTCACGATCAACTCAAGAAGTGCTGTGCTGGCTCAAATGCCTAGTAATTTCACCATAAGTGCAAATGTAT -CATTAGATGAAAGAAGCAAATTAGCATATGATGTAACTACACCTTGTGAAATCAAAGCATGCAGTTTAAC -ATGCTTAAAAGTAAAAAGTATGTTAACTACAGTCAAAGATCTAACCATGAAGACATTCAACCCCACTCAT -GAGATCATTGCTCTATGTGAATTTGAAAATATTATGACATCAAAAAGAGTAATAATACCAACCTATCTAA -GATCAATCAGTGTCAAAAACAAAGATCTGAACTCACTGGAAAATATAGCAACCACCGAATTCAAAAATGC -TATCACCAATGCTAAAATTATTCCCTATGCAGGATTAGTGTTAGTTATCACGGTTACTGATAATAAAGGA -GCATTCAAGTATATCAAGCCACAGAGTCAATTTATAGTGGATCTTGGTGCCTACCTAGAAAAAGAGAGCA -TATATTATGTGACTACTAATTGGAAGCATACAGCTACACGTTTTTCAATCAAACCACTAGAGGATTAAAC -TCAATTATCAACATTGAATGACAGGTTCACATATATCCTCAACTGCACACTATATCTAAACATCATAAAC -ATCTACACTACACACTTCATCACACAAACCAATCCCACTCAAAATCTAAAATCACTTCCAGCCATTGTCT -GCCAGACCTAGAGTGCGAATAGGTAAATAAAACAAGAATATGGGGTAAATAGATATCAGTTAGAGTTCAA -CCAATCTCAACAACCATCTATACCGCCAATCCAATACATACATTGCAAATCTTAAAATGGGAAACACATC -CATCACAATAGAATTCACAAGCAAATTTTGGCCCTATTTTACACTAATACATATGATCTTAACTCTAATC -TCTTTACTAATTATAATCACTATTATGATTGCAATACTAAATAAGCTAAGTGAACATAAAATATTCTGCA -ACAAAACTCTTGAACAAGGACAGATGTATCAAATCGACACATAGTGTTCTCCCATTATGCTGTATCAAAT -CACAATCCTGTGTATATAAATAAACAAATCCAATCTTCTCACAGAGTCATGGCATCACAAAACCATGCCA -ACCATCATGGTAGCATAGAGTAGTTATTAAAAATTAACATAATGATGAATTATTAGTATGGGATCAAAAA -CAACATTGGGGCAAATGCAACAATGTCCAAAAACAAGAATCAACGCACTGCCAGGACTCTAGAAAAGACC -TGGGATACTCTTAATCATCTAATTGTAATATCCTCTTGTTTATACAAATTAAATTTAAAATCTATAGCAC -AAATAGCACTATCAGTTTTGGCAATGATAATCTCAACCTCTCTTATAATTGCAGCCATAATATTCATCAT -CTCTGCCAATCACAAAGTTACACTAACAACTGTCACAGTTCAAACAATAAAAAACCACACTGAGAAAAAC -ATAACCACTTACCTCACTCAAGTCTCACCAGAAAGGGTTAGCCCATCCAAACAACCCACAGCCACACCAC -CAATCCACACAAACTCAGCCACAATATCACCCAATACAAAATCAGAAACACACCATACAACAACACAAAC -CAAAGGCACAATCTCTACTCCAACACAGAACAACAAGCCAAGCACAAAACCACGTCCAAAAAATCCACCA -AAAAAAGATGATTACCATTTTGAAGTGTTCAACTTTGTTCCCTGTAGTATATGTGGCAACAATCAACTCT -GCAAATCCATTTGCAAAACAATACCAAGCAATAAACCAAAGAAAAAACCAACTACAAAACCCACAAACAA -ACCACCTACCAAAACCACAAACAAAAGAGACCCCAAAACACTAGCCAAAACACCGAAAAAAGAAACCACC -ATTAACCCAACAAAAAAACCAACCCCCAAGACTACAGAAAGAGACACCAGCACCCCACAATCCACTGTGC -TCGACATAACCACATCAAAACACACAGAAAGGGACACCAGCACCTCACAATCCATTGCGCTTGACACAAC -CACATCAAAACACACAACCCAACAGCAATCTCTCTACTCAACCATCCCCGAAAACACACCCAACTCCACA -CAAACACCCACAGCATCCGAGCCCTCCACATCAAATTCTATCTAAAGACTCCAGTCATATGCTTAGTTAT -TTAAAAACTACATCTTAGCAAAGAACCGTGATCCCTCAAGCAAGAACGAAATTAAATCTGGGGCAAATAA -CCATGGAGTTGCTGATCCATAGATCAAGTGCAATCTTCCTAACTCTTGCTATTAATGCATTGTACCTTAC -CTCAAGTCAGAACATAACTGAGGAGTTTTACCAATCAACATGTAGTGCAGTTAGCAGAGGTTACTTGAGT -GCTTTAAGAACAGGTTGGTATACCAGTGTCATAACAATAGAATTAAGTAATATAAAAGAAACCAAATGCA -ATGGAACTGACACTAAAGTTAAACTTATAAAACAAGAATTAGATAAGTATAAGAATGCAGTAACTGAATT -ACAGTTACTTATGCAAAACACACCAGCTGTCAACAACCGGGCCAGAAGAGAAGCACCACAGTATATGAAC -TACACAATCAATACCACTAAAAACCTAAATGTATCAATAAGCAAGAAGAGGAAACGAAGATTTCTGGGCT -TCTTGTTAGGTGTAGGATCTGCAATAGCAAGTGGTATAGCTGTATCCAAAGTTCTACACCTTGAAGGAGA -AGTGAACAAGATCAAAAATGCTTTGCAGCTTACAAACAAAGCTGTAGTCAGTCTATCAAATGGGGTCAGT -GTTTTAACCAGCAGAGTGTTAGATCTCAAGAATTATATAAACAACCAATTATTACCTATGGTAAATCGAC -AGAGTTGTCGCATATCCAACATTGAGACAGTTATAGAATTCCAGCAGAAGAACAGCAGATTGTTGGAAAT -CACCAGAGAATTTAGTGTCAATGCAGGTGTAACGACACCTTTAAGCACTTACATGTTAACAAACAGTGAG -TTACTATCATTAATCAATGATATGCCTATAACAAATGATCAGAAAAAATTAATGTCAAGCAATGTTCAGA -TAGTAAGGCAACAAAGTTATTCTATCATGTCTATAATAAAGGAAGAAGTTCTTGCATATGTTGTACAGCT -ACCTATCTATGGTGTAATTGATACACCTTGCTGGAAATTACACACATCACCTCTGTGCACCACCAACATC -AAAGAAGGATCAAATATTTGTTTAACAAGGACTGATAGAGGATGGTATTGTGATAATGCAGGATCAGTAT -CCTTCTTTCCACAAGCTGACACTTGTAAAGTACAGTCCAATCGAGTATTTTGTGACACTATGAACAGTTT -GACATTACCAAGTGAAGTCAGCCTTTGTAACACTGACATATTCAATTCCAAGTATGACTGCAAAATTATG -ACATCAAAAACAGACATAAGCAGCTCAGTAATTACTTCTCTAGGAGCTATAGTGTCATGCTATGGTAAGA -CTAAATGTACTGCATCCAACAAAAATCGTGGAATTATAAAGACATTTTCTAATGGTTGTGATTATGTGTC -AAACAAAGGAGTAGATACTGTATCAGTGGGCAACACTCTATATTATGTCAACAAGCTGGAAGGCAAAAAC -CTTTATGTAAAAGGGGAACCTATAATAAATTACTATGACCCTCTAGTGTTTCCTTCTGATGAGTTTGATG -CATCAATATCTCAAGTCAATGAAAAAATTAATCAAAGTTTAGCTTTTATTCGTAGATCTGATGAATTATT -ACATAATGTAAATACTGGAAAATCTACTACAAATATTATGATAACTGCAATTATTATAGTAATCATTGTA -GTATTGTTATCATTAATAGCTATTGGTTTACTGTTGTATTGCAAAGCCAAAAACACACCAGTTACATTAA -GCAAAGACCAACTAAGTGGAATCAATAATATTGCATTCAGCAAATAGACAAAAAACCACCTGATCATGTT -CCAACAACAATCTGCTGACCATCAATCCCAAATCAACTTACAACAGATACTTCAACATCACAGCACAGGC -TGAATCATCTCCTCGCATCATGCTACCCACACAACTAAGCTAGATCCTTAATTCATAGTTACATAAAAGC -CTCAAATATCGCAATCAACACTAAATCAACACATCATTCACAAAACTAACAGCTGGGGCAAATATGTCGC -GAAGAAATCCCTGCAAATTTGAGATTAGAGGTCATTGCTTGAATGGTAGAAGATGCCACTACAGTCATAA -TTACTTTGAATGGCCTCCTCATGCATTGCTAGTGAGGCAAAACTTCATGTTAAACAAGATACTCAAGTCA -ATGGACAAGAGCATAGACACTTTGTCTGAAATAAGTGGAGCTGCTGAACTTGATAGAACAGAAGAATATG -CTCTTGGTATAGTTGGAGTGCTAGAGAGTTACATAGGATCTATAAACAACATAACAAAACAATCAGCATG -TGTTGCTATGAGTAAACTTCTTATTGAGATCAATAGTGATGACATTAAAAAGCTGAGAGATAATGAAGAA -CCCAATTCACCTAAGATAAGAGTGTACAATACTGTTATATCATACATCGAGAGCAATAGAAAAAACAGCA -AGCAAACCATCCATCTGCTTAAACGATTACCAGCAGACGTGCTGAAGAAGACAATAAAGAACACATTAGA -TATCCACAAAAGCATAACCATAAGCAACCCAAAAGAGTCAACCATAAGTGATCAAAATGACCAAACCAAA -AATAATGATATTACCGGATAAATATCCTTGTAGTATATCATCCATATTGATCTCAAGTGAAAGCATGATT -GCTACATTCAATCATAAAGACATATTACAATTTAACCACAACCATTTAGATAACCACCAGTGTTTATTAA -ATCATATATTTGATGAAATTCATTGGACACCTAAAAACTTATTAGATGCCACTCAACAATTTCTCCAACA -TCTTAACATCCCTGAAGATATATATACAGTATATATATTAGTGTCATAATGCTTGATCATAGCGACTCTA -TATCATCCAACCATAAATTAGTCTTAATAAAGTTATGGGACAAAATGGATCCCATTATTAATGGAAGCTC -CGCTAATGTATATCTAACTGATAGTTATCTAAAAGGTGTTATCTCTTTTTCAGAATGTAATGCTTTAGGG -AGTTACCTTTTTAACGGCCCTTATCTTAAAAATGATTATACCAACTTAATTAGTAGACAAAGCCCACTAA -TAGAGCATATGAATCTAAAAAAACTAACTATAACACAGTCATTAATATCTAGATACCATAAAGGCGAACT -GAAATTAGAAGAACCAACTTATTTCCAGTCATTACTTATGACATATAAAAGCATGTCCTCGTCTGAACAA -ATTGCTACAACTAACTTACTTAAAAAAATAATACGAAGAGCTATAGAAATAAGTGATGTAAAGGTGTACG -CCATCTTGAATAAACTAGGACTAAAGGAAAAGGACAGAGTTAAGCCCAACAACAATTCAGGTGATGAAAA -CTCAGTACTTACAACCATAATTAAAGATGATATACTCTCAGCTGTGGAAAACAATCAATCATATACAAAT -TCAGATAAAAATTACTCAGTAAATCAAAATATCAATATCAAAACAACACTCTTAAAAAAGTTGATGTGTT -CAATGCAACATCCTCCATCATGGTTAATACACTGGTTCAATTTATATACAAAATTAAATAACATATTAAC -ACAATATCGATCAAATGAGGTAAAAAGTCATGGGTTTATATTAATAGATAATCAAACTTTGAGTGGTTTT -CAGTTTATTTTAAATCAATATGGTTGCATTGTTTATCATAAAGGGCTCAAAAAAATTACAACTACTACAT -ACAATCAATTTTTGACATGGAAAGACATCAGCCTCAGCAGATTAAATGTTTGCTTAATTACTTGGATAAG -TAATTGTTTAAATACATTAAATAAAAGCTTAGGGTTGAGATGCGGATTCAATAATGTTGTGCTATCACAA -TTATTCCTTTACGGAGATTGTATACTGAAATTATTTCATAATGAAGGATTTTACATAATAAAAGAAGTAG -AAGGATTTATTATGTCTTTAATTCTAAACATAACAGAAGAAGATCAATTTAGGAAACGATTTTATAATAG -CATGCTAAATAACATCACAGATGCAGCTATTAAGGCTCAAAAGGATCTACTATCAAGAGTATGTCACACT -TTATTAGACAAGACAGTGTCTGATAATATCATAAATGGTAAATGGATAATTCTATTAAGTAAATTTCTTA -AATTGATTAAGCTTGCAGGTGATAATAATCTCAATAACTTGAGTGAGCTATATTTTCTCTTCAGAATCTT -TGGACATCCAATGGTTGATGAAAGACAAGCAATGGATGCTGTAAGAATTAACTGCAATGAAACTAAGTTC -TATTTATTAAGTAGCCTAAGTACGTTGAGAGGTGCTTTCATTTATAGAATCATAAAAGGGTTTGTAAATA -CCTACAATAGATGGCCCACTTTAAGGAATGCTATTGTCCTACCTCTAAGATGGTTAAACTATTATAAACT -TAATACTTATCCATCTCTACTTGAAATCACAGAAAATGATTTGATTATTTTATCAGGATTGCGGTTCTAT -CGTGAGTTTCATCTGCCTAAAAAAGTGGATCTTGAAATGATAATAAATGACAAAGCTATTTCTCCTCCAA -AAGATCTAATATGGACTAGTTTTCCTAGAAATTACATGCCATCACATATACAAAATTATATAGAACATGA -AAAGTTGAAGTTCTCTGAAAGCGACAGATCAAGAAGAGTACTAGAGTATTACTTGAGAGATAATAAATTC -AATGAATGTGATCTATACAATTGTGTAGTTAATCAAAGCTATCTCAACAACTCTAATCATGTGGTATCAC -TAACTGGTAAAGAAAGAGAGCTCAGTGTGGGTAGAATGTTTGCTATGCAACCAGGTATGTTTAGGCAAAT -CCAAATCTTAGCAGAAAAAATGATAGCTGAAAATATTTTACAATTCTTCCCTGAGAGTTTGACAAGATAT -GGTGATCTAGAGCTTCAAAAGATATTAGAATTAAAAGCAGGAATAAGCAACAAGTCAAATCGTTATAATG -ATAACTACAACAATTATATCAGTAAATGTTCTATAATAACAGATCTTAGCAAATTTAATCAAGCATTTAG -ATATGAAACATCATGTGTCTGCAGTGATGTATTAGATGAACTGCATGGGGTACAATCTCTATTCTCTTGG -TTGCATTTAACAATACCTCTTGTCACAATAATATGTACATATAGACATGCACCTCCTTTTATAAAGGATC -ATGTTGTCAATCTTAATGAAGTTGATGAACAAAGTGGGTTATACAGATATCATATGGGTGGTATTGAGGG -CTGGTGTCAAAAACTGTGGACCATTGAAGCCATATCATTATTAGATCTAATATCTCTTAAAGGTAAATTC -TCCATCACAGCTCTGATAAATGGTGATAATCAGTCAATTGATATAAGTAAACCAGTTAGACTTATAGAGG -GTCAGACCCATGCTCAAGCAGATTATTTGTTAGCATTAAATAGCCTTAAGTTGCTATATAAAGAGTATGC -AGGCATAGGCCATAAGCTTAAGGGAACCGAGACCTATATATCTCGAGATATGCAGTTCATGAGCAAAACA -ATCCAGCACAATGGAGTGTACTATCCAGCCAGTATCAAAAAAGTCCTGAGAGTAGGTCCATGGATAAATA -CAATACTTGATGATTTTAAAGTTAGTTTAGAATCTATAGGTAGCTTAACACAGGAGTTAGAATACAGAGG -AGAAAGCTTATTATGCAGTTTAATATTTAGGAATATTTGGTTATACAATCAAATTGCTCTGCAACTCCGG -AATCATGCTTTATGTAATAATAAGCTATATTTAGATATATTGAAAGTATTAAAACACTTAAAAACCTTTT -TTAATCTTGATAGTATCGATACGGCATTATCATTGTATATGAACTTGCCTATGCTGTTTGGTGGTGGTGA -TCCTAATTTGTTATATCGAAGCTTTTATAGGAGAACTCCAGACTTCCTTACAGAAGCTATAGTACATTCA -GTGTTTGTGTTGAGCTATTATACTGGTCACGATCTACAAGATAAGCTCCAGGATCTTCCAGATGATAGAC -TGAACAAATTCTTGACTTGTGTCATCACATTTGATAAAAATCCAAATGCCGAGTTTGTAACATTGATGAG -GGATCCACAGGCTTTAGGGTCTGAAAGGCAAGCTAAAATTACTAGTGAGATTAATAGATTAGCAGTAACA -GAAGTCTTAAGTATAGCTCCAAACAAAATATTTTCTAAAAGTGCGCAACACTATACTACCACTGAGATTG -ATCTAAATGACATTATGCAAAATATAGAACCAACTTACCCTCATGGATTAAGAGTTGTTTATGAAAGTTT -ACCTTTTTATAAAGCAGAAAAAATAGTTAATCTTATATCAGGAACAAAATCCATAACTAATATACTTGAA -AAAACATCAGCGATAGATACAACTGATATTAATAGGGCTACTGATATGATGAGGAAAAATATAACCTTAC -TTATAAGGATACTTCCACTAGATTGTAACAAAGACAAAAGAGAGTTATTAAGTTTAGAAAATCTTAGCAT -AACTGAATTAAGCAAGTATGTAAGAGAAAGATCTTGGTCATTATCCAATATAGTAGGAGTAACATCACCA -AGTATTATGTTCACAATGGACATTAAATATACAACTAGCACTATAGCCAGTGGTATAATTATAGAAAAAT -ATAATGTTAATGGTTTAACTCGTGGTGAAAGAGGACCTACTAAGCCATGGGTAGGTTCATCTACACAAGA -GAAAAAAACAATGCCAGTGTACAATAGACAAGTTTTAACCAAAAAGCAAAGAGACCAAATAGATTTATTA -GCAAAATTAGACTGGGTATATGCATCCATAGACAACAAAGATGAATTCATGGAAGAACTGAGTACTGGAA -CACTTGGACTGTCATATGAAAAGGCCAAAAAGTTGTTTCCACAATATCTAAGTGTCAATTATTTACACCG -GTTAACAGTCAGTAGTAGACCATGCGAATTCCCTGCCTCAATACCAGCTTATAGAACAACAAATTATCAT -TTTGATACTAGTCCTATCAATCATGTATTAACAGAAAAGTATGGAGATGAAGATATCGACATTGTGTTTC -AAAATTGCATAAGTTTTGGTCTTAGCTTGATGTCAGTTGTGGAACAATTCACAAACATATGTCCTAATAG -AATTATTCTCATACCGAAGCTGAATGAGATACATTTGATGAAACCTCCTATATTTACAGGAGATGTTGAT -ATCATCAAGTTGAAGCAAGTGATACAAAAACAGCATATGTTCCTACCAGATAAAATAAGTTTAACCCAAT -ATGTAGAATTATTCTTAAGTAACAAAGCACTTAAATCTGGATCTCACATCAACTCTAATTTAATATTAGC -ACATAAAATGTCTGATTATTTTCATAATGCGTATATTTTAAGTACTAATTTAGCTGGACATTGGATTCTG -ATTATTCAACTTATGAAAGATTCAAAAGGTATTTTTGAAAAAGATTGGGGAGAGGGGTATATAACTGATC -ATATGTTCATTAATTTGAATGTTTTCTTTAATGCTTATAAGACTTATTTGCTATGTTTTCATAGAGGTTA -TGGTAAAGCAAAATTAGAATGTGATATGAACACTTCAGATCTTCTTTGTGTTTTGGAGTTAATAGACTGT -AGCTACTGGAAATCTATGTCTAAAGTTTTCCTAGAACAAAAAGTCATAAAATACATAGTCAATCAAGACA -CAAGTTTGCATAGAATAAAAGGTTGTCACAGTTTTAAGTTGTGGTTTTTAAAACGCCTTAATAATGCTAA -ATTTACCGTATGCCCTTGGGTTGTTAACATAGATTATCACCCAACACACATGAAAGCTATATTATCTTAC -ATAGATTTAGTTAGAATGGGGTTAATAAATGTAGATAAATTAACCATTAAAAATAAAAACAAATTCAATG -ATGAATTTTACACATCAAATCTCTTTTACATTAGTTACAACTTTTCAGACAACACTCATCTGCTAACAAA -ACAAATAAGGATTGCTAATTCAGAATTAGAAGATAATTATAACAAACTATATCACCCAACCCCAGAAGCT -TTAGAAAATGTATCATTAATCCCTGTTAAAAGTAATAATAGAAACAAACCTAAATTTTGTATAAGTGGAA -GTACTGAATCTATGATGACGTCAACATTCTCTAATAAAATGCATATTAAATCTTCCACTGTTACCACAAG -ATTCAATTATAGCAGACAAGACTTGTACAATTTATTTCCAATTGTTGTGATAGACAGGATTATAGATCAT -TCAGGTAATACAGAAAAATCTAACCAACTTTACACCACCACTTCACATCAGACATCTTTAGTAAGGAACA -GTGCATCACTTTATTGCATGCTTCCTTGGCATCATGTCAATAGATTTAACTTTGTATTTAGTTCCACAGG -ATGCAAGATCAGTATAGAATATATTTTAAAAGATCTTAAGATTAAAGATCCCAGTTGCATAGCATTCATA -GGTGAAGGAGCTGGTAACTTATTATTACGTACGGTAGTAGAACTTCATCCTGACATAAGATATATTTACA -GAAGTTTAAAAGATTGCAATGATCATAGTTTACCTATTGAATTTCTAAGGTTATACAACGGGCATATAAA -CATAGATTATGGTGAGAATTTAACCATTCCTGCTACAGATGCAACTAACAACATACATTGGTCTTATTTA -CACATAAAATTTGCAGAACCTATTAGTATCTTTGTCTGTGATGCTGAATTACCTGTCACAGCCAATTGGA -GTAAGATCATAATTGAATGGAGTAAGCATGTAAGAAAGTGCAAATACTGTTCTTCTGTAAATAGATGCAT -TTTAATTGCAAAATACCATGCTCAAGATGATATTGATTTCAAATTAGATAACATTACTATATTAAAAACT -TATGTGTGCCTAGGTAGCAAGTTAAAAGGATCTGAAGTTTACTTAGTCCTTACAATAGGCCCTGCAAATA -TACTTCCTGTTTTTGATGTTGTGCAAAATGCTAAATTGATTCTTTCAAGAACTAAAAATTTCATTATGCC -TAAAAAGATTGACAAGGAATCTATCGATGCAAATATTAAAAGCTTAATACCTTTCCTTTGTTACCCTATA -ACAAAAAATGGAATTAAGACTTCATTGTCAAAATTGAAGAGTGTAGTTAATGGAGATATATTATCATATT -CTATAGCTGGACGTAATGAAGTATTCAGCAACAAGCTTATAAACCACAAGCATATGAATATCTTAAAATG -GCTGGATCATGTTTTAAACTTTAGATCAGCTGAACTTAATTACAATCATTTATACATGATAGAGTCCACA -TATCCTTACTTGAGTGAATTATTAAATAGTTTAACAACCAATGAGCTCAAGAAGCTGATTAAAATAACAG -GTAGTGTACTATACAACCTTCCTAATGAACAGTAACTTAAAATATCATTAACAAGTTTGGTCAAATTTAG -ATGCTAACACATTATTATATTATAGTTATTAAAAAATATGCAAACTTTTCAATAATTTAGCATATTGATT -CCAAAATTATCATTTTAGTCTTAAGGGATTAAATAAAAGTCTAAAACTAACAATCACACATGTGCATTTA -CAACACAACGAGACATTAGTTTTTGACACTTTTTTTCTCGTGGCCGGCATG - diff --git a/ingest/config/b_2_reference.fasta b/ingest/config/b_2_reference.fasta deleted file mode 100644 index 2b8d1c6..0000000 --- a/ingest/config/b_2_reference.fasta +++ /dev/null @@ -1,221 +0,0 @@ ->KY249656.1 Human respiratory syncytial virus B isolate RSVB/England136/2014, complete genome -ACGCGAAAAAATGCGTACTACAAACTTGCACACTCGAAAAAAATGGGGCAAATAAGAATTTGATAAGTGC -TATTTAAGTCTGACCCTTTTAATCAGAAATGGGGTGCAATTCACTGAGCATGATAAAGGTTAGATTACAA -AATTTGTTTGATAATGACGAAGTAGCATTGTTAAAAATAACATGTTATACTGACAAATTAATTCTTCTGA -CTAATGCATTAGCCAAAGCAACAATACATACAATTAAATTAAACGGCATAGTTTTTATACATGTTATAAC -AAGCAGTGAAGTGTGCCCTGATAACAATATTGTAGTGAAATCTAACTTTACAACAATGCCAATATTACAA -AATGGAGGATACATATGGGAATTGATTGAATTGACACACTGCTCTCAATTAAATGGTCTAATAGATGATA -ATTGTGAAATCAAATTTTCTAAAAGACTAAGTGACTCAGTAATGACTGATTATATGAATCAAATATCTGA -TTTACTTGGGCTTGATCTCCATTCATGAATTATGTTTAGTCTAATTCAATAGACATGTGTTTATTACCAT -TTTAGTTAATATAAAACCTCATCAAAGGGAAATGGGGCAAATAAACTCACCCAATCAATCAAACCATGAG -CACTACAAACGACAACACCACCATGCAAAGATTGATGATCACAGACATGAGACCCCTGTCGATGGATTCA -ATAATAACATCTCTCACCAAAGAAATCATCACACACAAATTCATATACTTGATAAACAATGAATGTATTG -TAAGAAAACTCGATGAAAGACAAGCTACATTTACATTCCTAGTCAATTATGAGATGAAGCTATTGCACAA -AGTAGGGAGTACCAAATACAAGAAATACTCTGAATATAATACAAAATATGGCACATTCCCCATGCCTATA -TTTATCAATCATGGCGGGTTTCTAGAATGTATTGGCATTAAGCCTACAAAACATACTCCTATAATATACA -AATATGACCTCAACCCGTAACTTCCAACAAAAGAACCAACTCATCCAAACCAAGCTATTCTCTAAACAAC -AATGCTCAACAGTTAAGAAGGAGCTAATCCATTTTAGTAATTAAAAATAAGGGTGAAACCAGTAACATAA -ATTGGGGCAAATACAAAGATGGCTCTTAGCAAAGTCAAGTTGAATGATACATTAAATAAGGATCAGCTGC -TGTCATCCAGCAAATACACTATTCAACGTAGTACAGGAGATAATATTGACACTCCCAATTATGATGTGCA -AAAACACCTAAACAAACTATGTGGTATGCTATTAATCACTGAAGATGCAAATCATAAATTCACAGGATTA -ATAGGTATGCTATATGCTATGTCCAGATTAGGAAGGGAAGACACTATAAAGATACTTAAAGATGCTGGAT -ATCATGTTAAAGCTAATGGAGTAGATATAACAACATATCGTCAAGATATAAATGGAAAGGAAATGAAATT -CGAAGTGTTAACATTATCAAGCTTGACATCAGAAATACAAGTCAATATTGAGATAGAATCTAGAAAGTCC -TACAAAAAAATGCTAAAAGAGATGGGAGAAGTGGCTCCAGAATATAGGCATGATTCTCCAGACTGTGGGA -TGATAATACTGTGTATAGCTGCCCTTGTAATAACCAAATTAGCAGCAGGAGATAGATCAGGTCTTACAGC -AGTAATTAGGAGGGCAAACAATGTCTTAAAAAACGAAATAAAACGCTACAAGGGCCTAATACCAAAAGAC -ATAGCCAACAGTTTTTATGAAGTGTTTGAAAAATACCCTCATCTTATAGATGTTTTTGTGCACTTTGGCA -TAGCACAATCATCCACAAGAGGGGGCAGTAGAGTTGAAGGAATCTTTGCAGGATTGTTTATGAATGCCTA -TGGTTCAGGACAAGTAATGCTAAGATGGGGAGTTTTAGCCAAATCTGTAAAAAATATCATGCTAGGACAT -GCTAGTGTCCAAGCAGAAATGGAGCAAGTTGTGGAAGTCTATGAGTATGCACAGAAGTTGGGAGGAGAAG -CTGGTTTCTACCATATATTGAACAATCCAAAAGCATCATTGCTGTCATTAACTCAATTTCCTAACTTCTC -AAGTGTGGTCCTAGGCAATGCAGCAGGTCTAGGCATAATGGGAGAGTATAGAGGTACACCAAGAAACCAA -GATCTCTATGATGCAGCCAAAGCATATGCAGAGCAACTCAAAGAAAATGGAGTAATAAACTACAGTGTAT -TAGACTTAACAACAGAAGAATTGGAAGCCATAAAGCATCAACTCAACCCCAAAGAAGATGACGTAGAGCT -TTAAGTTAACAAAAAATACGGGGCAAATAAGTCAACATGGAGAAGTTTGCACCTGAATTTCATGGAGAAG -ATGCAAATAACAAAGCTACCAAATTCCTAGAATCAATAAAAGGCAAGTTTGCATCATCCAAAGATCCTAA -GAAGAAAGATAGCATAATATCTGTCAACTCAATAGACATAGAAGTCACTAAAGAGAGCCCGATAACATCT -GGCACCAACATTATCAATCCAACAAGTGAAGCCGACAGTACCCCAGAAACCAAAGCCAACTACCCAAGAA -AACCCCTAGTAAGCTTCAAAGAAGATCTCACCCCAAGTGATAACCCTTTCTCTAAGTTGTACAAAGAAAC -CATAGAAACATTTGATAACAATGAAGAAGAATCTAGCTACTCATATGAGGAGATCAATGATCAAACAAAT -GACAACATTACAGCAAGACTAGATAGAATTGATGAAAAATTAAGTGAAATATTAGGAATGCTCCATACAT -TAGTAGTTGCAAGTGCAGGACCTACTTCGGCTCGTGATGGAATAAGAGATGCTATGGTTGGCCTAAGAGA -AGAAATGATAGAAAAAATAAGAGCAGAAGCATTAATGACCAATGATAGGTTAGAGGCTATGGCAAGACTT -AGGAATGAGGAAAGCGAAAAAATGGCAAAAGACACCTCAGATGAAGTGTCTCTCAATCCAACCTCCAAAA -AATTGAGTGACTTGTTGGAAGACAACGATAGTGACAATGATCTATCACTTGATGACTTTTGATCAGTGAT -CAACTCACTCAGCAATCAACAACATCAATAAAACAGACACCAATCCATTGAATCAATTGCCAGACTGAAA -AAACAAACATCCATCAGCAGAACCACCAACCAATCAATCAACCAATTGATCAATCAGCACCCTGACAAAA -TTAACAATATAGTAACAAAAAAAGAACAAGATGGGGCAAATATGGAAACATACGTGAACAAGCTTCACGA -AGGCTCCACATACACAGCAGCTGTTCAGTACAATGTTCTAGAAAAAGATGATGATCCCGCATCACTAACA -ATATGGGTGCCTATGTTCCAGTCATCTGTGCCAGCAGACTTGCTCATAAAAGAACTTGCAAGCATCAACA -TACTAGTAAAGCAGATCTCTACGCCCAAAGGACCTTCACTACGAGTCACGATCAACTCAAGAAGTGCTGT -GCTGGCTCAAATGCCTAGCAATTTCACCATAAGTGCAAATGTATCATTAGATGAAAGAAGCAAATTAGCA -TATGATGTAACTACACCTTGTGAAATCAAAGCATGCAGTCTAACATGCTTAAAAGTAAAAAGTATGTTAA -CTACAGTCAAAGATCTAACCATGAAGACATTCAACCCCACTCATGAGATCATTGCTCTATGTGAATTTGA -AAATATTATGACATCAAAAAGAGTAATAATACCAACCTATCTAAGATCAATTAGTGTCAAAAACAAGGAC -CTGAACTCACTAGAAAATATAGCAACCACCGAATTCAAAAATGCTATCACCAATGCCAAAATTATTCCCT -ATGCAGGATTAGTGTTAGTCATCACGGTTACTGATAATAAAGGAGCATTCAAATATATCAAGCCACAGAG -TCAATTTATAGTGGATCTTGGTGCCTACCTAGAAAAAGAGAGCATATATTATGTGACTACTAATTGGAAG -CATACAGCTACACGTTTTTCAATCAAACCACTAGAGGATTAAACTCAATTATCAACATTGAATGACAGGT -TCACATATATCCTCAACTGCACACTATATCTAAACATCATAAACATCTACACTACACACTTCATCACACA -AACCAATCCCACTCAAAATCCAAAATCACTTCCAGCCATTGTCTGCCAGACCTAGAGTGTGAATAGGTAA -ATAAAATAAGAATATGGGGTAAATAGATATCAGTTAGAGTTCAATCAATCTCAACAACCATCTATACCGC -CAATTCAATACATATATTGCAAATTTCAAAATGGGAAACACATCCATCACAATAGAATTCACAAGCAAAT -TTTGGCCCTATTTTACACTAATACATATGATCTTAACTCTAATCTCTTTACTAATTATAATCACTATTAT -GATTGCAATACTAAATAAGCTAAGTGAACATAAAATATTCTGCAACAAAACTCTTGAACAAGGACAGATG -TATCAAATCAACACATAGTGTTCTCCCATTATGCTGTATCAAATTATAATCTTGTATATATAAATAAACA -AATCCAATCTTCTCACAGAGTCATGGCATCACAAAACCATGCCAACCATCATGGTAGCATAGAGTAGTTA -TTTAAAAATTAACATAATGATGAATTATTAGTATGGGATCAAAAACAACATTGGGGCAAATGCAACCATG -TCCAAAAACAAGAATCAACGCACTGCCAGGACTCTAGAGAAGACCTGGGATACTCTTAATCATCTAATTG -TAATATCCTCTTGTTTATACAAATTAAATTTAAAATCTATAGCACAAATAGCACTATCGGTTTTGGCAAT -GATAATCTCAACCTCTCTCATAATTGCAGCCATAATATTCATCATCTCTGCCAATCACAAAGTTACACTA -ACAACTGTCACAGTTCAAACAATAAAAAACCACACTGAGAAAAACATAACCACTTACCTTACTCAAGTCT -CACCAGAAAGGGTTAGCCCATCCAAACAACCCACAACCACACCACCAATCCACACAAACTCAGCCACAAT -ATCACCTAATACAAAATCAGAAACACACCATACAACAGCACAAACCAAAGGCAGAACCTCTACTCCAACA -CAGAACAACAAGCCAAGCACAAAACCACGTCCAAAAAAACCACCAAAAAAAGATGATTACCATTTTGAAG -TGTTCAACTTCGTTCCCTGCAGTATATGTGGCAACAATCAACTCTGCAAATCCATTTGCAAAACAATACC -AAGCAATAAACCAAAGAAAAAACCAACTACAAAACCCACAAACAAACCACCCACCAAAACCACAAACAAA -AGAGACCCTAAAACACTAGCCAAAACACCGAAAAAAGAAACTACCATTAACTCAACAAAAAAACCAACCC -CCAAGACCACAGAAAGAGACACCAGCACCCCACAATCCACTGTGCTCGACACAACCACATCAAAACACAC -AGAAAGAGACACCAGCACCTCACAATCCACTGCGCTTGACACAACCACATCAAAACACACAACCCAACAG -CAATCTCTCTACTCAACCACCCCCGAAAACACACCCAACTCCACACAAACACCCACAGCATCCGAGCCCT -CCACATCAAATTCCACCCAAAAACTCCAGTCATATGCTTAGTTATTTAAAACCTACATCTTAGCAGAGAA -CTGTGATCCTTCAAGCAAGAACGAAATTAAATCTGGGGCAAATAACCATGGAGTTGCTGATCCATAGATC -AAGTGCAATCTTCCTAACTCTTGCTATTAATGCATTGTACCTCACCTCAAGTCAGAACATAACTGAGGAG -TTTTACCAATCGACATGTAGTGCAGTTAGCAGAGGTTACTTGAGTGCTTTAAGAACAGGTTGGTATACCA -GTGTCATAACAATAGAATTAAGTAATATAAAAGAAACCAAATGCAATGGAACTGACACTAAAGTAAAACT -TATAAAACAAGAATTAGATAAGTATAAGAATGCAGTAACAGAATTACAGTTACTTATGCAAAACACACCA -GCTGTCAACAACCGGGCCAGAAGAGAAGCACCACAGTATATGAACTACACAATCAATACCACTAAAAACC -TAAATGTATCAATAAGCAAGAAGAGGAAACGAAGATTTCTGGGCTTCTTGTTAGGTGTGGGATCTGCAAT -AGCAAGTGGTATAGCTGTATCTAAAGTTCTACTCCTTGAAGGAGAAGTGAACAAGATCAAAAATGCTTTG -CAATCTACAAACAAAGCTGTAGTCAGTCTATCAAATGGGGTCAGTGTTTTAACCAGCAAAGTGTTAGATC -TCAAGAATTATATAAACAACCAATTATTACCTATAGTAAATCAACAGAGTTGTCGCATATCCAACATTGA -AACAGTTATAGAATTCCAGCAGAAGAACAGCAGATTGTTGGAAATCACCAGAGAATTTAGTGTCAATGCA -GGTGTAACGACACCTTTAAGCACTTACATGTTAACAAACAGTGAGTTACTATCATTAATCAATGATATGC -CTATAACAAATGATCAGAAAAAATTAATGTCAAGCAATGTTCAGATAGTAAGGCAACAAAGTTATTCTAT -CATGTCTATAATAAAGGAAGAAGTCCTTGCATATGTTGTACAGCTACCTATCTATGGTGTAATTGATACA -CCTTGCTGGAAATTACACACATCACCTCTGTGCACCACCAACATCAAAGAAGGATCAAATATTTGTTTAA -CAAGGACTGATAGAGGATGGTACTGTGATAATGCAGGATCAGTATCCTTCTTTCCACAGGCTGACACTTG -TAAAGTACAGTCCAATCGAGTATTTTGTGACACTATGAACAGTTTGACATTACCAAGTGAAGTCAGCCTT -TGTAACACTGACATATTCAACTCCAAGTATGACTGCAAAATTATGACATCAAAAACAGACATAAGCAGCT -CAGTAATTACTTCTCTAGGAGCTATAGTGTCATGCTATGGTAAGACTAAATGTACTGCATCCAACAAAAA -TCGTGGAATTATAAAGACATTTTCTAATGGTTGTGATTATGTGTCAAACAAAGGAGTAGATACTGTATCA -GTGGGCAACACTTTATATTATGTCAACAAGCTGGAAGGCAAAAACCTTTATGTAAAAGGGGAACCTATAA -TAAATTACTATGACCCTCTAGTGTTTCCTTCTGATGAGTTTGATGCATCAATATCTCAAGTCAATGAAAA -AATTAATCAAAGTTTAGCTTTTATTCGTAGATCCGATGAATTATTACATAATGTAAATACTGGAAAATCT -ACTACAAATATTATGATAACTGCAATTATTATAGTAATCATTGTAGTATTGTTATCATTAATAGCTATTG -GTTTACTGTTGTATTGCAAAGCCAAAAACACACCAGTTACATTAAGCAAAGACCAACTAAGTGGAATCAA -TAATATTGCATTCAGCAAATAGACAAAAAACCACCTGATCATGTTTCAACAACAATCTGCTGACCATCAA -TCCCAAATCAACTTACAACAGATACTTCAACATCACAGCACAAGCTGAATCATTTCCTCGCATCATGCTA -CCCACACAACTAAGCTAGATCCTTAACTCATAGTTACATAAAAGCCTCAAATATCGCAATCAACACTAAA -TCAACACATCATTCACAAAACTAACAGCTGGGGCAAATATGTCGCGAAGAAATCCCTGCAAATTTGAGAT -TAGAGGTCATTGCTTGAATGGTAGAAGATGCCACTACAGTCATAATTACTTTGAATGGCCTCCTCATGCA -TTGCTAGTGAGGCAAAACTTCATGTTAAACAAGATACTCAAGTCAATGGACAAGAGCATAGACACTTTGT -CTGAAATAAGTGGAGCTGCTGAACTTGATAGAACAGAAGAATATGCTCTTGGTATAGTTGGAGTGCTAGA -GAGTTACATAGGATCTATAAACAACATAACAAAACAATCAGCATGTGTTGCTATGAGTAAACTTCTTATT -GAGATCAATAGTGATGACATTAAAAAGCTGAGAGACAATGAAGAACCCAATTCACCTAAGATAAGAGTGT -ACAATACTGTTATATCATACATCGAGAGCAATAGAAAAAACAGCAAGCAAACCATCCATCTGCTCAAACG -ATTACCAGCAGACGTGCTGAAGAAGACAATAAAGAACACATTAGATATCCACAAAAGCATAACCATAAGC -AACCCAAAAGAGTCAACCATAAGTGATCAAAATGACCAAACCAAAAATAATGATATTACCGGATAAATAT -CCTTGTAGTATATCATCCATATTGATCTCAAGTGAAAGCATGATTGCTACATTCAATCATAAAGACATAT -TACAATTTAACCACAACCATTTAGATAACCACCAGTGTTTATTAAATCATATATTTGATGAAATTCATTG -GACACCTAAAAACTTATTAGATGCCACTCAACAATTTCTCCAACATCTTAACATCCCTGAAGATATATAT -ACAGTATATATATTAGTGTCATAATGCTTGATCATAACGATTCTATATCATCCAACCATAAAACGGTCTT -AATAAAGTTATGGGACAAAATGGATCCCATTATTAATGGAAGCTCTGCTAATGTATATCTAACTGATAGT -TATCTAAAAGGTGTTATCTCTTTTTCAGAATGTAATGCTTTAGGGAGTTACCTTTTTAACGGCCCTTATC -TTAAAAATGATTATACCAACTTAATTAGTAGACAAAGCCCACTAATAGAGCATATGAATCTAAAAAAACT -AACTATAACACAGTCATTAATATCTAGATACCATAAAGGTGAACTGAAATTAGAAGAACCAACTTATTTC -CAGTCATTACTTATGACATATAAAAGCATGTCCTCGTCTGAACAAATTGCTACAACTAACTTACTTAAAA -AAATAATACGAAGAGCTATAGAAATAAGTGATGTAAAGGTGTACGCCATCTTGAATAAACTAGGACTAAA -GGAAAAGGACAGAGTTAAGCCCAACAACAATTCAGGTGATGAAAACTCAGTACTTACAACCATAATTAAA -GATGATATACTCTCAGCTGTGGAAAACAATCAATCATATACAAATTCAGACAAAAATTACTCAGTAAATC -AAAATATCAATATCAAAACAACACTCTTAAAAAAATTGATGTGTTCAATGCAACATCCTCCATCATGGTT -AATACACTGGTTCAATTTATATACAAAATTAAATAACATATTAACACAATATCGATCAAATGAGGTAAAA -AGTCATGGGTTTATATTAATAGATAATCAAACTTTGAGTGGTTTTCAGTTTATTTTAAATCAATATGGTT -GCATTGTTTATCATAAAGGGCTCAAAAAAATCACAACTACTACATACAATCAATTTTTGACATGGAAAGA -CATCAGCCTCAGCAGATTAAATGTTTGCTTAATTACTTGGATAAGTAATTGTTTAAATACATTAAATAAA -AGCTTAGGGTTGAGATGCGGATTCAATAATGTTGTGCTATCACAATTATTCCTTTACGGAGATTGTATAC -TGAAATTATTTCATAATGAAGGCTTTTACATAATAAAAGAAGTAGAAGGATTTATTATGTCTTTAATTCT -AAACATAACAGAAGAAGATCAATTTAGGAAACGATTTTATAATAGCATGCTAAATAACATCACAGATGCA -GCTATTAAGGCTCAAAAGGACCTACTATCAAGAGTATGTCACACTTTATTAGACAAGACAGTGTCTGATA -ATATCATAAATGGTAAATGGATAATTCTATTAAGTAAATTTCTTAAATTGATTAAGCTTGCAGGTGATAA -TAATCTCAATAACTTGAGTGAGCTATATTTTCTCTTCAGAATCTTTGGACATCCAATGGTTGATGAAAGA -CAAGCAATGGATGCTGTAAGAATTAACTGCAATGAAACTAAGTTCTATTTATTAAGTAGCCTAAGTACAT -TGAGAGGTGCTTTCATTTATAGAATCATAAAAGGGTTTGTAAATACCTACAACAGATGGCCCACTTTAAG -GAATGCTATTGTCCTACCTCTAAGATGGTTAAACTATTATAAACTTAATACTTATCCATCTCTACTTGAA -ATCACAGAAAATGATTTGATTATTTTATCAGGATTGCGGTTCTATCGTGAGTTTCATCTGCCTAAAAAAG -TGGATCTTGAAATGATAATAAATGACAAAGCTATTTCTCCTCCAAAAGATCTAATATGGACTAGTTTTCC -TAGAAATTACATGCCATCACATATACAAAATTATATAGAACATGAAAAGTTGAAGTTCTCTGAAAGCGAC -AGATCAAGAAGAGTACTAGAGTATTACTTGAGAGATAATAAATTCAATGAATGTGATCTATACAATTGTG -TAGTTAATCAAAGCTATCTCAACAACTCTAATCATGTGGTATCACTAACTGGTAAAGAAAGAGAGCTCAG -TGTGGGTAGAATGTTTGCTATGCAACCAGGTATGTTTAGGCAAATCCAAATCTTAGCAGAGAAAATGATA -GCCGAAAATATTTTACAATTCTTCCCTGAGAGTTTGACAAGATATGGTGATCTAGAGCTTCAAAAGATAT -TAGAATTAAAAGCAGGAATAAGCAACAAGTCAAATCGTTATAATGATAACTACAACAATTATATCAGTAA -ATGTTCTATAATAACAGATCTTAGCAAATTTAATCAAGCATTTAGATATGAAACATCATGTATCTGCAGT -GATGTATTAGATGAACTGCATGGGGTACAATCTCTATTCTCCTGGTTGCATTTAACAATACCTCTTGTCA -CAATAATATGTACATATAGACATGCACCTCCTTTTATAAAGGATCATGTTGTCAATCTTAATGAAGTTGA -TGAACAAAGTGGGTTATACAGATATCATATGGGTGGTATTGAGGGCTGGTGTCAAAAACTGTGGACCATT -GAAGCCATATCATTATTAGATCTAATATCTCTTAAAGGTAAATTCTCCATCACAGCTCTGATAAATGGTG -ATAATCAGTCAATTGATATAAGTAAACCAGTTAGACTTATAGAGGGTCAGACCCATGCTCAAGCAGATTA -TTTGTTAGCATTAAATAGCCTTAAATTGCTATATAAAGAGTATGCAGGCATAGGCCATAAGCTTAAGGGA -ACTGAGACCTATATATCTCGAGATATGCAGTTCATGAGCAAAACAATCCAGCACAATGGAGTGTACTATC -CAGCCAGTATCAAAAAAGTCCTGAGAGTAGGTCCATGGATAAATACAATACTTGATGATTTTAAAGTTAG -TTTAGAATCTATAGGTAGCTTAACACAGGAGTTAGAATACAGAGGAGAAAGCTTATTATGCAGTTTAATA -TTTAGGAACATTTGGTTATACAATCAAATTGCTCTGCAACTCCGAAATCATGCTTTATGTAATAATAAGC -TATATTTAGATATATTGAAAGTATTAAAACACTTAAAAACCTTTTTTAATCTTGATAGTATCGATACGGC -GTTATCATTGTATATGAACTTGCCTATGCTGTTTGGTGGTGGTGATCCTAATTTGTTATATCGAAGCTTT -TATAGGAGAACCCCAGACTTCCTTACAGAAGCTATAGTACATTCAGTGTTCGTGTTGAGCTATTATACTG -GTCACGATCTACAAGATAAGCTCCAGGATCTTCCAGATGATAGACTGAACAAATTCTTGACTTGTGTCAT -CACATTTGATAAAAATCCAAATGCCGAGTTTGTAACATTGATGAGGGATCCACAGGCTTTAGGGTCTGAA -AGGCAAGCTAAAATTACTAGTGAGATTAATAGATTAGCAGTAACAGAAGTCTTAAGTATAGCTCCAAACA -AAATATTTTCTAAAAGTGCGCAACACTATACTACCACTGAGATTGATCTAAATGACATTATGCAAAATAT -AGAACCAACTTACCCTCACGGATTAAGAGTTGTTTATGAAAGTTTACCTTTTTATAAAGCAGAAAAAATA -GTTAATCTTATATCAGGAACAAAATCCATAACTAATATACTTGAAAAAACATCAGCGATAGATACAACTG -ATATTAATAGGGCTACTGATATGATGAGGAAAAATATAACCTTACTTATAAGAATACTTCCACTAGATTG -TAACAAAGACAAAAGAGAGTTATTAAGTTTAGAAAATCTTAGCATAACTGAATTAAGCAAGTACGTAAGA -GAAAGATCTTGGTCATTATCCAATATAGTAGGAGTAACATCGCCAAGTATTATGTTCACAATGGACATTA -AATATACAACTAGCACTATAGCCAGTGGTATAATTATAGAAAAATATAATGTTAATGGTTTAACTCGTGG -TGAAAGAGGACCCACTAAGCCATGGGTAGGTTCATCCACACAGGAGAAAAAAACAATGCCAGTGTACAAT -AGACAAGTTTTAACCAAAAAGCAAAGAGACCAAATAGATTTACTAGCAAAATTAGACTGGGTATATGCAT -CCATAGACAACAAAGATGAATTCATGGAAGAACTGAGTACTGGAACACTTGGACTGTCATATGAAAAAGC -CAAAAAGTTGTTTCCACAATATCTAAGTGTCAATTATTTACACCGGTTAACAGTCAGTAGTAGACCATGC -GAATTCCCTGCCTCAATACCAGCTTATAGAACAACAAATTATCATTTTGATACTAGTCCTATCAATCATG -TATTAACAGAAAAGTATGGAGATGAAGATATCGACATTGTGTTTCAAAATTGCATAAGTTTCGGTCTTAG -CTTGATGTCAGTTGTGGAACAATTCACAAACATATGTCCTAATAGAATTATTCTCATACCGAAGCTGAAT -GAGATACATTTGATGAAACCTCCTATATTTACAGGAGATGTTGATATCATCAAGTTGAAGCAAGTGATAC -AAAAACAGCATATGTTCCTACCAGATAAAATAAGTTTAACCCAATATGTAGAATTATTCTTAAGTAACAA -AACACTTAAATCTGGATCCCACATCAACTCTAATTTAATATTAGTACATAAAATGTCTGATTATTTTCAT -AATGCGTATATTTTAAGTACTAATTTAGCTGGACATTGGATTCTGATTATTCAACTTATGAAAGATTCAA -AAGGTATTTTTGAAAAAGATTGGGGAGAGGGGTATATAACTGATCATATGTTCATTAATTTGAATGTTTT -CTTTAATGCTTATAAGACTTATTTGCTATGTTTTCATAGAGGTTATGGTAAAGCAAAATTAGAATGTGAT -ATGAACACTTCAGATCTTCTTTGTGTTTTGGAGTTAATAGACAGTAGCTACTGGAAATCTATGTCTAAAG -TTTTCCTAGAACAAAAAGTCATAAAATACATAGTCAATCAAGACACAAATTTGCATAGAATAAAAGGCTG -TCACAGTTTTAAGTTGTGGTTTTTAAAACGCCTTAATAATGCTAAATTTACCGTATGCCCTTGGGTTGTT -AACATAGATTATCACCCAACACACATGAAAGCTATATTATCTTACATAGATTTAGTTAGAATGGGGTTAA -TAAATGTAGATAAATTAACCATTAAAAATAAAAGCAAATTCAATGATGAATTTTACACATCAAATCTCTT -TTACATTAGTTATAACTTTTCAGACAACACTCATCTGCTAACAAAACAAATAAGAATTGCTAATTCAGAA -TTAGAAGATAATTATAACAAACTATATCACCCAACCCCAGAAACTTTAGAAAATATATCATTAATCCCTG -TTAAAAGTAATAATAGAAACAAACCTAAATTTTGTATAAGTGGAAGTACTGAATCTATGATGACGTCAAC -ATTCTCTAATAAAATGCATATTAAATCTTCCACTGTTACCACAAGATTCAATTATAGCAGACAAGACTTG -TACAATTTATTTCCAATTGTTGTGATAGACAGGATTATAGATCATTCAGGTAATACAGAAAAATCTAACC -AACTTTACACCACCACTTCACATCAAACATCTTTAGTAAGGAATAGTGCATCACTTTATTGCATGCTTCC -TTGGCATCATGTCAATAGATTTAACTTTGTATTTAGTTCCACAGGATGCAAGATCAGTATAGAGTATATT -TTAAAAGATCTTAAGATTAAAGATCCCAGTTGTATAGCATTCATAGGTGAAGGAGCTGGTAACTTATTAT -TACGTACGGTAGTAGAACTTCATCCTGACATAAGATATATTTACAGAAGTTTAAAAGATTGCAATGATCA -TAGTTTACCTATTGAATTTCTAAGGTTATACAACGGGCATATAAACATAGATTATGGTGAGAATTTAACC -ATTCCTGCTACAGATGCAACTAACAACATACATTGGTCTTATTTACATATAAAATTTGCAGAACCTATTA -GTATCTTTGTCTGCGATGCTGAATTACCTGTCACAGCCAATTGGAGTAAAATTATAATTGAATGGAGTAA -GCATGTAAGAAAGTGCAAATACTGTTCTTCTGTAAATAGATGCATTTTAATTGCAAAATACCATGCTCAA -GATGATATTGATTTCAAATTAGATAACATTACTATATTAAAAACTTATGTGTGCCTAGGTAGCAAGTTAA -AAGGATCTGAAGTTTACTTAGTCCTTACAATAGGCCCTGCAAATATACTTCCTGTTTTTGATGTTGTGCA -AAATGCTAAATTGATTCTTTCAAGAACTAAAAATTTCATTATGCCTAAAAAGATTGACAAGGAATCTATC -GATGCAAATATTAAAAGCTTAATACCTTTCCTTTGTTACCCTATAACAAAAAATGGAATTAAGACTTCAT -TGTCAAAATTGAAGAGTGTAGTTAATGGAGATATATTATCATACTCTATAGCTGGACGTAATGAAGTATT -CAGCAACAAGCTTATAAACCACAAGCATATGAATATCTTAAAATGGCTGGATCATGTTTTAAACTTTAGA -TCAGCTGAACTTAATTACAATCATTTATACATGATAGAGTCCACATATCCTTACTTGAGTGAATTATTAA -ATAGTTTAACAACCAATGAGCTCAAGAAGCTGATTAAAATAACAGGTAGTGTACTATACAACCTTCCTAA -TGAACAGTAACTTAAAATATCATTAACAAGTTTGGTCAAATTTAGATGCTAACACATCATTATATTATAG -TTATTAAAAAATATGCAAACTTTTCAATAATTTAGCATATTGATTCCAAAATTATCATTTTAGTCTTAAG -GGATTAAATAAAAGTCTAAAACTAACAATCACACATGTGCATTTACAACACAACGAGACATTAGTTTTTG -ACACTTTTTTTCTCGT - diff --git a/ingest/config/b_3_reference.fasta b/ingest/config/b_3_reference.fasta deleted file mode 100644 index 2be1f0f..0000000 --- a/ingest/config/b_3_reference.fasta +++ /dev/null @@ -1,219 +0,0 @@ ->MG642037.1 Human respiratory syncytial virus B strain RSVB/Homo sapiens/USA/MCRSV_208/1980, complete genome -ACTTGCACATTCGGAAAAAATGGGGCAAATAAGAATTTGATAAGTGTTATTTAAGTCTAACCTTTTCAAT -CAGAAATGGGGTGCAATTCACTGAGCATGATAAAGGTTAGATTACAAAATTTATTTGACAATGATGAAGT -AGCATTGTTAAAAATAACATGTTATACTGACAAATTAATTCTTCTGACTAATGCATTAGCCAAAGCAGCA -ATACATACAATTAAATTAAACGGCATAGTTTTTATACATGTTATAACAAGCAGTGAAGTGTGCCCTGATA -ACAATATTGTAGTGAAATCTAACTTTACAACAATGCCAATATTACAAAATGGAGGATACATATGGGAATT -GATTGAGTTGACACACTGCTCTCAATTGAATGGTCTAATGGATGATAATTGTGAAATCAAATTTTCTAAA -AGACTAAGTGACTCAGTAATGACTGATTATATGAATCAAATATCTGATTTACTTGGGCTTGATCTCAATT -CATGAATTATGTTTAGTCTAATTCAATAGACATATGTTTATTACCATTTTAGTTAATATAAAAACTCATC -AAAGGGAGATGGGGCAAATAAACTCACCTAATCAGTCAAACCATGAGCACTACAAATGACAACACTACTA -TGCAAAGATTGATGATCACAGACATGAGACCCCTGTCGATGGAATCAATAATAACATCTCTCACCAAAGA -AATCATAACACACAAATTCATATACTTGATAAACAATGAATGTATTGTAAGAAAACTTGATGAAAGACAA -GCTACATTTACATTCTTAGTCAATTATGAGATGAAGCTACTGCACAAAGTAGGGAGTACCAAATACAAAA -AATACACTGAATATAATACAAAATATGGCACTTTCCCCATGCCTATATTTATCAATCATGGCGGGTTTCT -AGAATGTATTGGCATTAAGCCTACAAAACACACTCCTATAATATACAAATATGACCTCAACCCGTAAATT -CCAACAAAAAAACTAACCAATCCAAACTAAGCTATTCCTTAAACAACAGTGATCAACAGTTAAGAAGGAG -CTAATCCATTTTAGTAATTAAAAATAAAGGTAAAGCCAATAACATAAATTGGGGCAAATACAAAGATGGC -TCTTAGCAAAGTCAAGTTAAATGATACATTAAATAAGGATCAGCTGCTGTCATCCAGCAAATACACTATT -CAACGTAGTACAGGAGATAATATTGACACTCCCAATTATGATGTGCAAAAACACCTAAACAAACTATGTG -GTATGCTATTAATCACTGAAGATGCAAATCATAAATTCACAGGATTAATAGGTATGTTATATGCTATGTC -CAGGTTAGGAAGGGAAGACACTATAAAGATACTTAAAGATGCTGGATATCATGTTAAAGCTAATGGAGTA -GATATAACAACATATCGTCAAGATATAAATGGAAAGGAAATGAAATTCGAAGTATTAACATTATCAAGCT -TGACATCAGAAATACAAGCCAATATTGAGATAGAATCTAGAAAGTCCTACAAAAAAATGCTAAAAGAGAT -GGGAGAAGTGGCTCCAGAATATAGGCATGATTCTCCAGACTGTGGGATGATAATACTGTGTATAGCTGCA -CTTGTAATAACCAAATTAGCAGCAGGAGATAGATCAGGTCTTACAGCAGTAATTAGGAGGGCAAACAATG -TCTTAAAAAACGAAATAAAACGCTACAAGGGCCTCATACCAAAGGATATAGCTAACAGTTTTTATGAAGT -GTTTGAAAAACACCCTCATCTTATAGATGTTTTTGTGCACTTTGGCATTGCACAATCATCCACAAGAGGG -GGTAGTAGAGTTGAAGGAATCTTTGCAGGATTATTTATGAATGCCTATGGTTCAGGGCAAGTAATGCTAA -GATGGGGAGTTTTAGCCAAATCTGTAAAAAATATCATGCTAGGACATGCTAGTGTCCAGGCAGAAATGGA -GCAAGTTGTGGAAGTCTATGAATATGCACAGAAGTTGGGAGGAGAAGCTGGATTCTACCATATATTGAAC -AATCCAAAAGCATCATTGCTGTCATTAACTCAATTTCCTAACTTCTCAAGTGTGGTCCTAGGCAATGCAG -CAGGTCTAGGCATAATGGGAGAGTATAGAGGTACACCAAGAAACCAGGATCTTTATGATGCAGCCAAAGC -ATATGCAGAGCAACTCAAAGAAAATGGAGTAATAAACTACAGTGTATTAGACTTAACAGCAGAAGAATTG -GAAGCCATAAAGCATCAACTCAACCCCAAAGAAGATGATGTAGAGCTCTAAGTTAACAAAAAATACGGGG -CAAATAAGTCAACATGGAGAAGTTTGCACCTGAATTTCATGGAGAAGATGCAAATAACAAAGCTACCAAA -TTCCTAGAATCAATAAAGGGCAAGTTCGCATCATCCAAAGATCCTAAGAAGAAAGATAGCATAATATCTG -TTAACTCAATAGATATAGAAGTAACTAAAGAGAGCCCGATAACATCTGGCACCAACATCATCAATCCAAT -AAGTGAAGCTGATAGTACCCCAGAAGCCAAAGCCAACTACCCAAGAAAACCCCTAGTAAGCTTCAAAGAA -GATCTCACCCCAAGTGACAACCCCTTTTCTAAATTGTACAAAGAAACAATAGAAACATTTGATAACAATG -AAGAAGAATCTAGCTACTCATATGAAGAAATAAATGATCAAACAAATGACAACATTACAGCAAGACTAGA -TAGAATTGATGAAAAATTAAGTGAAATATTAGGAATGCTCCATACATTAGTAGTTGCAAGTGCAGGACCC -ACTTCAGCTCGCGATGGAATAAGAGATGCTATGGTTGGTCTAAGAGAAGAAATGATAGAAAAAATAAGAG -CGGAAGCATTAATGACCAATGATAGGTTAGAGGCTATGGCAAGACTTAGGAATGAGGAAAGCGAAAAAAT -GGCAAAAGACACCTCAGATGAAGTGTCTCTTAATCCAACTTCCAAAAAATTGAGTGACTTGTTGGAAGAC -AACGATAGTGACAATGATCTATCACTTGATGATTTTTGATCAGTGATCAACTCACTCAGCAATCAACAAC -ATCAATAAAACAGACATCAATCCATTGAATCAACTGCCAGACCAAACAAACAAACGTCCATCAGCAGAAC -CACCAACCAATCAATCAACCAATTGATCAATCAGCAACCTAACAAAATTAACAATATAGTAACAAAAAAA -GAACAAGATGGGGCAAATATGGAAACATACGTGAACAAGCTTCACGAAGGCTCCACATACACAGCAGCTG -TTCAATACAATGTTCTAGAAAAAGATGATGATCCTGCATCACTAACAATATGGGTGCCTATGTTCCAGTC -ATCTGTGCCAGCAGACTTGCTCATAAAAGAACTTGCAAGCATCAACATACTAGTGAAGCAGATCTCTACG -CCCAAAGGACCTTCACTACGAGTCACGATTAACTCAAGAAGTGCTGTGCTGGCACAAATGCCTAGTAATT -TTATCATCAGCGCAAATGTATCATTAGATGAAAGAAGCAAATTAGCATATGATGTAACTACACCTTGTGA -AATCAAAGCATGCAGTCTAACATGCTTAAAAGTAAAAAGTATGTTAACTACAGTCAAAGATCTTACCATG -AAAACATTCAACCCCACTCATGAGATTATTGCTCTATGTGAATTTGAAAATATTATGACATCAAAAAGAG -TAATAATACCAACCTATCTAAGATCAATTAGTGTCAAAAACAAGGACCTGAACTCACTAGAAAATATAGC -AACCACCGAATTCAAAAATGCTATCACCAATGCGAAAATTATTCCTTATGCAGGATTAGTATTAGTTATC -ACAGTTACTGACAATAAAGGAGCATTCAAATATATCAAGCCACAGAGTCAATTTATAGTAGATCTTGGAG -CCTACCTGGAAAAAGAGAGCATATATTATGTGACTACAAATTGGAAGCATACAGCTACACGTTTTTCAAT -CAAACCGCTAGAAGATTAAACTTAATTATCAACACTAAATGACAGGTCCACATATATCCTCAAACTACAC -ACTATATCCAAACATCATGAACATATACACTACACACTTCATCACACAAACCAATCCCACTCAAAATCCA -AAATCACTTCCAGCCACTATCTGCTAGACCTAGAGTGCGAATAGGTAAATAAAACCAAAATATGGGGTAA -ATAGACATTAGTTAGAGTTCAATCAATCTCAACAACCATTTATACTGCTAATTCAATACATATACTATAA -ATTTCAAAATGGGAAATACATCCATCACAATAGAATTCACAAGCAAATTTTGGCCTTATTTTACACTAAT -ACATATGATCTTAACTCTAATCTCTTTACTAATTATAATCACTATTATGATTGCAATACTAAATAAGCTA -AGTGAACATAAAACATTCTGTAACAAAACTCTTGAACTAGGACAGATGTATCAAATCAACACATAGTGTT -CTACCATTATGCTGTGTCAAATTATAATCCTGTATATATAAACAAACAAATCTAATCTTCTCACGGAGTC -ATGGTGGTGCAAAACCATGCCAACTATCATGGTAGCATAGAGTAGTTATTTAAAAATTAACATAATGATG -AATTATTAGTATGGGGTCAAAAACAAAATTGGGGCAAATGCAACCATGTCCAAACACAAGAATCAACGCA -CTGCCAGGACTCTAGAAAAGACCTGGGATACTCTTAATCATCTAATTGTAATATCCTCTTGTTTATACAG -ATTAAACTTAAAATCTATAGCACAAATAGCACTATCAGTTTTGGCAATGATAATCTCAACCTCTCTCATA -ATTGCAGCCATAATATTCATCATCTCTGCCAATCACAAAGTTACACTAACAACAGTTACAGTTCAAACAA -TAAAAAACCACACTGAAAAAAACATCACCACCTACCTTACTCAAGTCTCACCGGAAAGGGTTAGCTCATC -CATACAACCTACAACCACATCACCAATCCACACAAATTCAGCTACAATATCACCAAATACAAAATCAGAA -ACACACCATACAACAGCACAACCCAAAGGCAGAATCACCACTTCAACACAGACCAACAAGCCAAGCACAA -AATCACGTTCAAAAAATCCACCAAAAAAACCAAAAGATGATTACCATTTTGAAGTGTTCAATTTTGTTCC -ATGTAGTATATGTGGCAACAATCAACTTTGCAAATCCATCTGCAAAACAATACCAAGCAACAAACCAAAG -AAAAAACCAACCATCAAACCCACAAACAAACCAACCATCAAAACCACAAACAAAAGAGACCCAAAAACAC -CAGCCAAAATGCCGGATAAAGAAACCACCACCAACCCAACAAAAAAACCAACCCTCAAGACCACAGAAAG -AGACACCAGCACCTCACAATCTACTGTGCTCGACACAACCACATCAAAACACACAATCCAACAGCAATAC -CTCCACTCAACCACCTCTGAAAACACACCCAACTCCACACAAATACCCACAGCATCCGAGCCCTCCACAT -CAAATTCTACTTAAAAAACCTAGTCACATACTTAGTTATTCAAAAACTACATCTTAGCAGAGAACCGTGA -TCTATCAAGCAAGAACGAAATTAAACCTGGGGCAAATAACCATGGAGTTGCTGATCCACAGGTCAAGTGC -AATCTTCCTAACTCTTGCTATTAATGCATTGTACCTCACCTCAAGTCAGAACATAACTGAGGAGTTTTAC -CAATCGACATGTAGTGCAGTTAGCAGAGGTTATTTTAGTGCTTTAAGGACAGGTTGGTATACCAGTGTCA -TAACAATAGAATTAAGTAATATAAAAGAAACCAAATGCAATGGAACTGACACTAAAGTAAAACTTATAAA -ACAAGAATTAGATAAGTATAAGAATGCAGTAACAGAATTACAGCTACTTATGCAAAACACGCCAGCTGTC -AACAACCGGGCCAGAAGAGAAGCACCACAGCACATGAACTACACAATCAATACCACTAAGAACCTAAATG -TATCAATAAGCAAGAAGAGGAAACGAAGATTTCTGGGCTTCTTGTTAGGTGTAGGATCTGCAATAGCAAG -TGGTATAGCTGTATCCAAAGTTCTACACCTTGAAGGAGAAGTGAACAAAATCAAAAATGCTTTGTTGTCT -ACAAACAAAGCTGTAGTCAGTCTATCAAATGGGGTTAGTGTTTTAACCAGCAAAGTGTTAGATCTCAAGA -ATTACATAAATAACCAATTATTACCCATAGTAAATAAACAGAGCTGTCGCATCTCCAACATTGAAACAGT -TATAGAATTCCAACAGAAGAATAGCAGATTGTTGGAAATCACCAGAGAATTTAGTGTCAATGCAGGTGTA -ACAACACCTTTAAGCACTTACATGTTAACAAACAGTGAGTTACTATCATTGATCAATGATATGCCTATAA -CAAATGATCAGAAAAAATTAATGTCAAGCAATGTTCAGATAGTAAGGCAACAAAGTTATTCCATCATGTC -TATAATAAAGGAAGAAGTCCTTGCATATGTTGTACAGCTACCTATCTATGGTGTAATAGATACACATTGC -TGGAAATTACACACATCACCTCTATGCACCACCAACATCAAAGAAGGATCAAATATTTGTTTAACAAGGA -CTGATAGAGGATGGTATTGTGATAATGCAGGATCAGTATCCTTCTTTCCACAGGCTGACACTTGCAAAGT -ACAGTCCAATCGAGTATTTTGTGACACTATGAACAGTTTGACATTACCAAGTGAAGTCAGCCTTTGTAAC -ACTGACATATTCAATTCCAAGTATGACTGCAAAATTATGACATCAAAAACAGACATAAGTAGCTCAGTAA -TTACTTCTCTTGGAGCTATAGTGTCATGTTATGGTAAAACTAAATGCACTGCATCCAATAAAAATCGTGG -GATTATAAAGACATTTTCTAATGGTTGTGACTATGTGTCAAACAAAGGAGTAGATACTGTGTCAGTGGGC -AACACTTTATACTATGTAAACAAGCTGGAAGGCAAGAACCTTTATGTAAAAGGGGAACCTATAATAAATT -ACTATGATCCTCTAGTGTTTCCTTCTGATGAGTTTGATGCATCAATATCTCAAGTCAATGAAAAAATCAA -TCAAAGTTTAGCTTTTATTCGTAGATCTGATGAATTACTACATAATGTAAATACTGGCAAATCTACTACA -AATATTATGATAACTACAATTATTATAGTAATCATTGTAGTATTGTTATCATTAATAGCTATTGGTTTAC -TGTTGTATTGCAAAGCCAAAAACACACCAGTTACACTAAGCAAAGACCAACTAAGTGGAATCAATAATAT -TGCATTCAGCAAATAGAAAAAAACTACTTGATCATGTTTCAACAACAATCTGCTGACCACCAATCCTTAA -TCAACTTAACAATAAATATTTCAACATCATAGCACAGGCTGAATCATTTCCTCACATCATGCTACCTACA -CAACTAAGCTAGATCCCCAACTCATAGTTACATAAAAACCTCAAGTATCACAATCAACCACTAAATCGAC -ACATCACTCACAAAATTAACAACTGGGGCAAATATGTCGCGAAGAAACCCTTGTAAATTTGAGATTAGAG -GTCATTGCTTGAATGGTAGAAGATGTCACTACAGTCATAATTATTTTGAATGGCCTCCTCATGCATTACT -AGTGAGGCAAAACTTCATGTTAAACAAGATACTTAAGTCAATGGACAAAAGCATAGACACTTTGTCAGAA -ATAAGTGGAGCTGCTGAACTGGATAGAACAGAAGAATATGCTCTTGGTATAGTTGGAGTGCTAGAGAGTT -ACATAGGATCTATAAACAACATAACAAAACAATCAGCATGTGTTGCTATGAGTAAACTTCTTATTGAGAT -TAACAGTGATGACATTAAAAAACTGAGAGATAATGAAGAACCCAATTCACCTAAGATAAGAGTGTACAAT -ACTGTTATATCATACATTGAGAGCAATAGAAAAAACACCAAGCAAACCATCCATCTGCTTAAAAGACTGC -CAGCAGACGTGCTGAAGAAGACAATAAAGAACACATTAGATATCCACAAAAGCATAACCATAAGCAACCC -AAAAGAGTCAACTGTGAATGATCAAAATGACCAAACCAAAAATAATGATATTACCGGATAAGTATCCTTG -TAGTATATCATCCATATTGATTTCAAGTGAAAGCATGGTTGCTACATTCAATCATAAAAACATATTACAA -TTTAACTATAACCATTTGGATAACCGCCAGTGTTTATTAAATCATATATTTGATGAAATTCATTGGACAC -CTAAAAACTTATTAGATACCACTCAACAATTTCTCCAACATCTTAACATCCCTGAAGATATATATACAGT -ATATATATTAGTGTCATAATGCTTGACCATAACGATTTTATATCATCCAACCATAAAACTATCATAATAA -GGTTATGGGACAAAATGGATCCCATTATTAATGGAAACTCTGCTAATGTGTATCTAACTGATAGTTATCT -AAAAGGTGTTATCTCTTTTTCAGAATGTAATGCTTTAGGGAGTTACCTTTTTAACGGCCCTTATCTTAAA -AATGATTACACCAACTTAATTAGTAGACAAAGTCCACTACTAGAGCATATGAATCTAAAAAAACTAACTA -TAACACAGTCATTAATATCTAGATATCATAAAGGTGAACTGAAATTAGAAGAACCAACTTATTTCCAGTC -ATTACTTATGACATATAAAAGTATGTCCTCGTCTGAACAAATTGCTACAACTAACTTACTTAAAAAAATA -ATACGAAGAGCTATAGAAATAAGTGATGTAAAGGTATACGCCATCTTGAACAAACTAGGACTAAAGGAAA -AGGACAGAGTTAAGCCCAACAATAATTCAGGTGATGATAACTCAGTTCTTACAACCATAATTAAAGATGA -TATACTTTCGGCTGTGGAAAACAATCAATCATATACAAATTCAGACAAAAATCACTCAGTGAATCAAAAT -ATCACTATCAAAACAACACTCTTGAAAAAATTGATGTGTTCAATGCAACATCCTCCATCATGGTTAATAC -ACTGGTTCAATTTATATACAAAATTAAATAACATATTAACACAATACCGATCAAATGAGGTAAAAAGTCA -TGGGTTTATATTAATAGATAATCAAACTTTAAGTGGTTTTCAGTTTATTTTAAATCAATATGGTTGTATC -GTTTATCATAAAGGACTCAAAAAAATCACAACTACTACTTACAATCAATTTTTGACATGGAAAGACATCA -GCCTTAGCAGATTAAATGTTTGCTTAATTACTTGGATAAGTAATTGTTTAAATACATTAAATAAAAGCTT -AGGGCTGAGATGTGGATTCAATAATATTGTGTTATCACAATTATTTCTTTATGGAGATTGTATACTGAAA -TTATTTCATAATGAAGGCTTCTACATAATAAAAGAAGTAGAGGGATTTATTATGTCTTTAATTCTAAACA -TAACAGAAGAAGATCAATTTAGGAAACGATTTTATAATAGCATGCTAAATAACATAACAGATGCAGCTAT -TAAGGCTCAAAAAAACCTACTATCAAGAGTATGTCACACTTTATTAGACAAGACAGTGTCTGATAATATC -ATAAATGGTAAATGGATAATCCTATTAAGTAAATTTCTTAAATTGATTAAGCTTGCAGGTGATAATAATC -TCAATAACTTGAGTGAGCTATATTTTCTCTTCAGAATCTTTGGACATCCAATGGTCGATGAAAGACAAGC -AATGGATGCTGTAAGAATTAACTGCAATGAAACTAAGTTCTACTTATTAAGTAGTCTAAGTACGTTAAGA -GGTGCTTTTATTTATAGAATCATAAAAGGGTTTGTAAATACCTATAACAGATGGCCCACTTTAAGGAATG -CTATTGTTCTACCTCTAAGATGGTTGAACTATTATAAACTTAATACCTATCCATCTCTACTTGAAATCAC -AGAAAATGATTTGATTATTTTATCAGGATTGCGGTTCTATCGTGAGTTTCATCTGCCTAAAAAAGTGGAT -CTTGAAATGATAATAAATGACAAAGCCATTTCGCCTCCAAAAGATCTAATATGGACTAGTTTTCCTAGAA -ATTACATGCCATCACATATACAAAATTATATAGAACATGAAAAGTTGAAGTTCTCTGAAAGCGACAGATC -AAGAAGAGTACTAGAGTATTACTTGAGAGATAATAAGTTCAATGAATGCGATCTATACAATTGTGTGGTC -AATCAAAGCTATCTCAACAACTCTAACCACGTGGTATCACTAACTGGTAAAGAAAGAGAGCTTAGTGTAG -GTAGAATGTTTGCTATGCAACCAGGTATGTTTAGGCAAATTCAAATCTTAGCAGAGAAAATGATAGCCGA -AAATATTTTACAATTCTTCCCTGAGAGTTTGACAAGATATGGTGATCTAGAGCTTCAAAAGATATTAGAA -TTAAAAGCAGGAATAAGCAACAAGTCAAATCGTTATAATGATAACTACAACAATTATATCAGTAAATGTT -CTATCATTACAGACCTTAGCAAATTCAATCAAGCATTTAGATATGAAACATCATGTATCTGCAGTGATGT -ATTAGATGAACTGCATGGAGTACAATCTCTGTTCTCCTGGTTGCATTTAACAATACCTCTTGTCACAATA -ATATGTACATATAGACATGCACCTCCTTTCATAAAGGATCATGTTGTTAATCTTAATGAAGTTGATGAAC -AAAGTGGATTATACAGATATCATATGGGTGGTATTGAGGGCTGGTGTCAAAAACTGTGGACCATTGAAGC -TATATCATTATTAGATCTAATATCTCTTAAAGGGAAATTCTCTATCACTGCTCTAATAAATGGTGATAAT -CAGTCAATTGATATAAGTAAACCAGTTAGACTTATAGAGGGTCAGACTCATGCTCAAGCAGATTATTTGT -TAGCATTAAATAGCCTTAAATTGCTATATAAAGAGTATGCAGGCATAGGCCATAAGCTCAAGGGAACAGA -GACCTATATATCCCGAGATATGCAATTCATGAGCAAAACAATCCAGCACAATGGAGTGTATTATCCAGCC -AGTATCAAAAAAGTCCTGAGAGTAGGTCCATGGATAAATACAATACTTGATGATTTTAAAGTTAGTTTAG -AATCTATAGGCAGCTTAACACAGGAGTTAGAATACAGAGGAGAAAGCTTATTATGCAGTTTAATATTTAG -GAACATTTGGTTATACAATCAAATTGCTTTGCAACTTCGAAATCATGCATTATGTCACAATAAGCTATAT -TTAGATATATTGAAAGTATTAAAACACTTAAAAACTTTTTTTAATCTTGATAGTATTGATATGGCGTTAT -CATTATATATGAATTTGCCTATGCTGTTTGGTGGTGGTGATCCTAATTTGCTATATCGAAGCTTTTATAG -GAGAACTCCAGACTTCCTTACAGAAGCTATAGTACATTCAGTGTTTGTGTTGAGCTATTATACTGGTCAC -GATTTACAAGATAAGCTCCAGGATCTTCCAGATGATAGACTGAACAAATTCTTGACATGTATCATCACAT -TTGATAAAAATCCCAATGCCGAGTTTGTAACATTGATGAGGGATCCACAGGCTTTAGGGTCTGAAAGGCA -AGCTAAAATTACTAGTGAGATTAATAGATTAGCAGTAACAGAAGTCTTAAGTATAGCTCCAAACAAAATA -TTTTCTAAAAGTGCACAACATTATACTACCACTGAGATTGATCTAAATGATATTATGCAAAATATAGAAC -CAACTTACCCTCACGGATTAAGAGTTGTTTATGAAAGTTTACCTTTTTATAAAGCAGAAAAAATAGTTAA -TCTTATATCAGGAACAAAATCCATAACTAATATACTTGAAAAAACATCAGCAATAGATACAACTGATATT -AATAGGGCTACTGATATGATGAGGAAAAATATAACTTTACTTATGAGGATACTTCCACTAGATTGTAACA -AAGACAAAAGAGAGTTATTAAGTTTAGAAAATCTTAGTATAACTGAATTAAGCAAGTATGTAAGAGAAAG -ATCTTGGTCGTTATCCAATATAGTAGGAGTAACATCGCCAAGTATTATGTTCACAATGGACATTAAATAT -ACAACTAGCACTATAGCCAGTGGTATAATTATAGAAAAATATAATGTTAATAGTTTAACTCGTGGTGAAA -GAGGACCTACTAAGCCATGGGTAGGTTCATCTACGCAAGAGAAAAAAACAATGCCAGTGTACAATAGACA -AGTTTTAACCAAAAAACAAAGAGACCAAATAGATTTATTAGCAAAATTAGACTGGGTATATGCATCCATA -GACAACAAAGATGAATTCATGGAAGAACTGAGTACTGGAACACTTGGACTGTCATATGAAAAAGCCAAAA -AATTGTTTCCACAATATCTAAGTGTCAATTATTTACACCGCTTAACAGTCAGTAGTAGACCATGTGAATT -CCCTGCATCAATACCAGCTTATAGAACAACAAATTATCATTTTGATACTAGTCCTATCAATCATGTATTA -ACAGAAAAGTATGGAGATGAAGATATCGACATTGTGTTTCAAAATTGCATAAGTTTTGGTCTTAGCCTAA -TGTCGGTTGTGGAACAATTCACAAACATATGTCCTAATAGAATTATTCTCATACCGAAGCTGAATGAGAT -ACATTTGATGAAACCTCCTATATTTACAGGAGATGTTGATATCATCAAGTTGAAGCAAGTGATACAAAAA -CAGCACATGTTCCTACCTGATAAAATAAGTTTAACCCAATATGTAGAATTATTCCTAAGTAACAAAGCAC -TTAAATCTGGATCTCACATCAACTCTAATTTAATATTAGTACATAAAATGTCTGATTATTTTCATAATGC -TTATATTTTAAGTACTAATTTAGCTGGACATTGGATTCTGATTATTCAACTTATGAAAGATTCAAAAGGT -ATTTTTGAAAAAGATTGGGGAGAGGGGTATATAACTGATCATATGTTCATTAATTTGAATGTTTTCTTTA -ATGCTTATAAGACTTATTTGCTATGTTTTCATAAAGGTTATGGCAAAGCAAAATTAGAATGTGATATGAA -CACTTCAGATCTTCTTTGTGTTTTGGAGTTAATAGACAGTAGCTACTGGAAATCTATGTCTAAAGTTTTC -CTAGAACAAAAAGTCATAAAATACATAGTCAATCAAGACACAAGTTTGCATAGAATAAAAGGTTGTCATA -GTTTTAAGTTGTGGTTTTTAAACCGCCTTAATAATGCTAAATTTACCGTATGCCCTTGGGTTGTTAACAT -AGATTATCACCCAACACACATGAAAGCTATATTATCTTACATAGATTTAGTTAGAATGGGGTTAATAAAT -GTAGATAAATTAACCATTAAAAATAAAAACAAATTCAATGATGAATTTTACACATCAAATCTCTTTTATA -TTAGTTATAACTTTTCAGACAACACTCATTTGCTAACAAAACAAATAAGAATTGCTAATTCAGAATTAGA -AAATAATTATAACAAACTATATCACCCAACCCCAGAAACTTTAGAAAATATGTCATTAATTCCTGTTAAA -AGTAATAATAGTAACAAACCTAAATTTTGTATAAGTGGAAATACCGAATCTATGATGACGTCAACATTCT -CCAATAAAATGCATATTAAATCTTCCACTGTTACCACAAGATTCAACTATAGCAAACAAGACTTGTACAA -TTTATTTCCAATTGTTGTGATAGACAGGATTATAGATCATTCAGGCAATACAGCAAAATCCAACCAACTT -TACACTACCACTTCACATCAGACATCTTTAGTAAGGAATAGTGCATCACTTTATTGCATGCTTCCTTGGC -ATCATGTCAATAGATTCAACTTTGTATTTAGTTCCACAGGATGCAAGATTAGCATAGAGTATATTTTAAA -AGATCTTAAGATTAAGGACCCCAGTTGTATAGCATTCATAGGTGAAGGAGCTGGTAACTTATTATTACGT -ACGGTAGTAGAACTTCATCCAGACATAAGATACATTTACAGAAGTTTAAAAGATTGCAATGATCATAGTT -TACCTATTGAATTTCTAAGGTTATACAACGGGCATATAAACATAGATTATGGTGAGAATTTAACCATTCC -TGCTACAGATGCAACTAATAACATTCATTGGTCTTATTTACATATAAAATTTGCAGAACCTATTAGCATC -TTTGTCTGTGATGCTGAATTACCTGTTACAGCCAATTGGAGTAAAATTATAATTGAATGGAGTAAGCATG -TAAGAAAGTGTAAGTACTGTTCCTCTGTAAATAGATGCATTTTAATTGCAAAATATCATGCTCAAGATGA -TATTGATTTCAAATTAGATAACATTACTATATTAAAAACTTACGTGTGCCTAGGTAGCAAGTTAAAAGGA -TCTGAAGTTTACCTAGTCCTTACAATAGGCCCTTCAAATATACTTCCTGTTTTTAATGTTGTGCAAAATG -CTAAATTGATTCTTTCAAGAACTAAAAACTTCATTATGCCTAAAAAAACTGACAAAGAATCTATCGATGC -AAATATTAAAAGCTTAATACCTTTCCTTTGTTACCCTATAACAAAAAAAGGAATTAAGACTTCATTGTCA -AAATTGAAGAGTGTAGTTAACGGAGATATATTATCATATTCTATAGCTGGACGTAATGAAGTATTCAGCA -ACAAGCTTATAAACCACAAGCATATGAATATCCTAAAATGGCTAGATCATGTTTTAAACTTTAGATCAGC -TGAACTTAATTACAATCATTTATATATGATAGAGTCCACATATCCTTACTTAAGTGAATTGTTAAATAGT -TTAACAACCAATGAGCTCAAGAAGCTGATTAAAATAACAGGTAGTGTACTATACAACCTTCCCAACGAAC -AGTAACTTAAAGTATCATTAACAAGTTTGGTCAAATTTAGATGCTAACACATCATTATATTATAGTTATT -AAAAAATATATATGCAACCTTTTCAATAATTTAGCATATTGATTCCAAAATTATCATGTTGGTCTTCAGG -GGTTGAATAAAAATCTAAAACTAACAATTATACATGTGCATTTACAACACAACGAGACATTA - diff --git a/ingest/workflow/envs/nextstrain.yaml b/ingest/workflow/envs/nextstrain.yaml index 6f8b609..45375fc 100644 --- a/ingest/workflow/envs/nextstrain.yaml +++ b/ingest/workflow/envs/nextstrain.yaml @@ -8,7 +8,7 @@ dependencies: - fasttree - iqtree - mafft=7.471 - - nextalign=0.1.6 + - nextclade>=3 - pandas - psutil - python=3.7* diff --git a/ingest/workflow/snakemake_rules/sort.smk b/ingest/workflow/snakemake_rules/sort.smk index b5dd4a5..1eaf6a3 100644 --- a/ingest/workflow/snakemake_rules/sort.smk +++ b/ingest/workflow/snakemake_rules/sort.smk @@ -10,99 +10,76 @@ It produces output files as """ -TIME = ['1', '2','3'] - -rule align: +rule sort: input: - sequences = rules.transform.output.sequences, - reference = "config/{type}_{time}_reference.fasta" + sequences = rules.transform.output.sequences output: - alignment = "data/{type}/{time}_sequences.aligned.fasta" - threads: 2 + "data/a/sequences.fasta", + "data/b/sequences.fasta" shell: - """ - nextalign run -j {threads} --silent \ - --reference {input.reference} \ - --output-fasta {output.alignment} \ - {input.sequences} - """ + ''' + nextclade3 sort {input.sequences} --output-dir tmp + seqkit rmdup tmp/nextstrain/rsv/b/sequences.fasta > data/b/sequences.fasta + seqkit rmdup tmp/nextstrain/rsv/a/sequences.fasta > data/a/sequences.fasta + rm -r tmp + ''' -rule metadataandsequences: +rule metadata: input: - alignment = rules.align.output.alignment, metadata = rules.transform.output.metadata, - sequences = rules.transform.output.sequences + sequences = "data/{type}/sequences.fasta" output: - metadata = "data/{type}/{time}_metadata.tsv", - sequences = "data/{type}/{time}_sequences.fasta" + metadata = "data/{type}/metadata_raw.tsv" + run: + import pandas as pd + from Bio import SeqIO - shell: - """ - python bin/sequencesandmetadata.py \ - --sortedalignment {input.alignment} \ - --allmetadata {input.metadata} \ - --allsequences {input.sequences} \ - --metadata {output.metadata} \ - --sequences {output.sequences} - """ + strains = [s.id for s in SeqIO.parse(input.sequences, 'fasta')] + d = pd.read_csv(input.metadata, sep='\t', index_col='accession').loc[strains].drop_duplicates() + d.to_csv(output.metadata, sep='\t') -rule sort: - input: - allsequences = "data/sequences.fasta", - metadata = "data/metadata.tsv", - alignment_a = expand("data/a/{time}_sequences.aligned.fasta", time=TIME), - alignment_b = expand("data/b/{time}_sequences.aligned.fasta", time=TIME), - reference_a = expand("config/a_{time}_reference.fasta", time=TIME), - reference_b = expand("config/b_{time}_reference.fasta", time=TIME), - metadata_b = expand("data/b/{time}_metadata.tsv", time=TIME), - metadata_a = expand("data/a/{time}_metadata.tsv", time=TIME) +rule nextclade_dataset: output: - sequences_a = "data/a/sequences_notdedup.fasta", - metadata_a = "data/a/metadata_notdedup.tsv", - sequences_b = "data/b/sequences_notdedup.fasta", - metadata_b = "data/b/metadata_notdedup.tsv" + ref_a = "rsv-a_nextclade/reference.fasta", + ref_b = "rsv-b_nextclade/reference.fasta" + params: + dataset_a = "nextstrain/rsv/a/EPI_ISL_412866", + dataset_b = "nextstrain/rsv/b/EPI_ISL_1653999" shell: """ - python bin/sort.py + nextclade3 dataset get -n {params.dataset_a} --output-dir rsv-a_nextclade + nextclade3 dataset get -n {params.dataset_b} --output-dir rsv-b_nextclade """ -rule deduplication: +rule nextclade: input: - sequences_a = rules.sort.output.sequences_a, - metadata_a = rules.sort.output.metadata_a, - sequences_b = rules.sort.output.sequences_b, - metadata_b = rules.sort.output.metadata_b + sequences = "data/{type}/sequences.fasta", + ref = "rsv-a_nextclade/reference.fasta" output: - dedup_seq_a = "data/a/sequences.fasta", - dedup_metadata_a = "data/a/metadata_no_covg.tsv", - dedup_seq_b = "data/b/sequences.fasta", - dedup_metadata_b = "data/b/metadata_no_covg.tsv" + nextclade = "data/{type}/nextclade.tsv" + params: + dataset = "rsv-{type}_nextclade", + output_columns = "clade qc.overallScore qc.overallStatus alignmentScore alignmentStart alignmentEnd coverage dynamic" shell: """ - seqkit rmdup < {input.sequences_a} > {output.dedup_seq_a} - seqkit rmdup < {input.sequences_b} > {output.dedup_seq_b} - - python bin/metadata_dedup.py \ - --metadata-original {input.metadata_a} \ - --metadata-output {output.dedup_metadata_a} - - python bin/metadata_dedup.py \ - --metadata-original {input.metadata_b} \ - --metadata-output {output.dedup_metadata_b} + nextclade3 run -D {params.dataset} \ + --output-columns-selection {params.output_columns} \ + --output-tsv {output.nextclade} \ + {input.sequences} """ -rule coverage: +rule extend_metadata: input: - alignment_a = expand("data/a/{time}_sequences.aligned.fasta", time=TIME), - alignment_b = expand("data/b/{time}_sequences.aligned.fasta", time=TIME), - metadata_b = expand("data/b/{time}_metadata.tsv", time=TIME), - metadata_a = expand("data/a/{time}_metadata.tsv", time=TIME), - dedup_metadata_a = rules.deduplication.output.dedup_metadata_a, - dedup_metadata_b = rules.deduplication.output.dedup_metadata_b + nextclade = "data/{type}/nextclade.tsv", + metadata = "data/{type}/metadata_raw.tsv" output: - metadata_a = "data/a/metadata.tsv", - metadata_b = "data/b/metadata.tsv" + metadata = "data/{type}/metadata.tsv" shell: """ - python bin/gene-coverage.py - """ \ No newline at end of file + python3 bin/extend-metadata.py --metadata {input.metadata} \ + --id-field accession \ + --virus-type {wildcards.type} \ + --nextclade {input.nextclade} \ + --output {output.metadata} + """ + diff --git a/nextclade/Snakefile b/nextclade/Snakefile index 0586f6e..7626acf 100644 --- a/nextclade/Snakefile +++ b/nextclade/Snakefile @@ -104,11 +104,10 @@ rule align: alignment="build/{a_or_b}/{reference}/aligned.fasta" params: outdir=lambda w: f"build/{w.a_or_b}/{w.reference}/aligned.{{cds}}.fasta", - nextclade_bin = "./nextclade_v3" threads: 3 shell: """ - {params.nextclade_bin} run \ + nextclade3 run \ --jobs={threads} \ --input-ref {input.reference} \ --input-annotation {input.annotation} \ diff --git a/workflow/envs/nextstrain.yaml b/workflow/envs/nextstrain.yaml index a820606..8f23643 100644 --- a/workflow/envs/nextstrain.yaml +++ b/workflow/envs/nextstrain.yaml @@ -8,7 +8,7 @@ dependencies: - fasttree - iqtree - mafft=7.471 - - nextalign=0.1.6 + - nextclade>=3 - pandas - psutil - python=3.7* diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index 83c52a1..e87b63e 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -68,13 +68,14 @@ rule filter: --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ --exclude {input.exclude} \ + --exclude-where 'qc.overallStatus=bad' \ --output {output.sequences} \ --group-by {params.group_by} \ --subsample-max-sequences {params.subsample_max_sequences} \ --query '{params.min_coverage}' """ -rule nextalign: +rule genome_align: message: """ Aligning sequences to {input.reference} @@ -87,15 +88,16 @@ rule nextalign: threads: 4 shell: """ - nextalign run -j {threads}\ - --reference {input.reference} \ + nextclade3 run -j {threads}\ + --input-ref {input.reference} \ --output-fasta {output.alignment} \ {input.sequences} """ +# cut out the G-Gene for alignment refinement rule cut: input: - oldalignment = rules.nextalign.output.alignment, + oldalignment = rules.genome_align.output.alignment, reference = "config/{a_or_b}reference.gbk" output: slicedalignment = build_dir + "/{a_or_b}/{build_name}/{gene}_slicedalignment.fasta" @@ -110,6 +112,7 @@ rule cut: --gene {params.gene} """ +# align the G gene with mafft rule realign: input: slicedalignment = rules.cut.output.slicedalignment, @@ -128,7 +131,7 @@ rule realign: rule hybrid_align: input: - original = rules.nextalign.output.alignment, + original = rules.genome_align.output.alignment, G_alignment = build_dir + "/{a_or_b}/{build_name}/G_aligned.fasta", reference = "config/{a_or_b}reference.gbk" output: diff --git a/workflow/snakemake_rules/export.smk b/workflow/snakemake_rules/export.smk index b1b713b..6d8e9ec 100644 --- a/workflow/snakemake_rules/export.smk +++ b/workflow/snakemake_rules/export.smk @@ -6,10 +6,8 @@ def get_node_data(w): if w.build_name in config["genesforglycosylation"]: node_data.append(rules.glycosylation.output.glycosylations) if w.build_name == "genome": - node_data.append(rules.clades_genome.output.node_data) node_data.append(rules.clades_consortium.output.node_data) - if w.build_name in ["genome", "G"]: - node_data.append(rules.clades_Goya.output.node_data) + return node_data rule colors: @@ -43,11 +41,12 @@ rule export: params: title = lambda w: f"RSV-{w.a_or_b.upper()} phylogeny", strain_id=config["strain_id_field"], + metadata_colors = lambda w: '' if w.build_name=='genome' else f"--color-by-metadata clade" shell: """ augur export v2 \ --tree {input.tree} \ - --metadata {input.metadata} \ + --metadata {input.metadata} {params.metadata_colors} \ --metadata-id-columns {params.strain_id} \ --node-data {input.node_data} \ --title {params.title:q} \