diff --git a/.gitignore b/.gitignore index bab39a3..b6e4761 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -test.py - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.md b/README.md index df34e2a..0937e96 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# pairwise-alignment +# pairwise-sequence-alignment (psa) This is a Python module to calculate a pairwise alignment between biological sequences (protein or nucleic acid). This module uses the [needle](https://www.ebi.ac.uk/Tools/psa/emboss_needle/) and [water](https://www.ebi.ac.uk/Tools/psa/emboss_water/) tools from the EMBOSS package to calculate an optimal, global/local pairwise alignment. @@ -8,6 +8,7 @@ I wrote this module for two reasons. First, the needle and water tools are faste * [Introduction](#introduction) * [Requirements](#requirements) +* [Installation](#installation) * [Quick Start](#quick-start) * [Alignment object](#alignment-object) * [Attributes](#attributes) @@ -41,18 +42,35 @@ Pairwise sequence alignment is used to identify regions of similarity that may i > Check with `needle -version` or `water -version`. +## Installation + +You can install the module from [PyPI](https://pypi.org/project/pairwise-sequence-alignment/): + +``` +pip install pairwise-sequence-alignment +``` + +or directly from GitHub: + +``` +pip install "git+https://github.com/aziele/pairwise-sequence-alignment.git" +``` + +or you can use the module without installation. Simply clone or download this repository and you're ready to use it. + + ## Quick Start ```python -import pairwise_alignment as pa +import psa # Global alignment -aln = pa.needle(moltype='nucl', qseq='ATGCTAGTA', sseq='ATGCTAGTAGATGATGA') -aln = pa.needle(moltype='prot', qseq='MKSTVWSG', sseq='MKSSVLW') +aln = psa.needle(moltype='nucl', qseq='ATGCTAGTA', sseq='ATGCTAGTAGATGATGA') +aln = psa.needle(moltype='prot', qseq='MKSTVWSG', sseq='MKSSVLW') # Local alignment -aln = pa.water(moltype='nucl', qseq='ATGCTAGTA', sseq='ATGCTAGTAGATGATGAT') -aln = pa.water(moltype='prot', qseq='MKSTVWSG', sseq='MKSSVLW') +aln = psa.water(moltype='nucl', qseq='ATGCTAGTA', sseq='ATGCTAGTAGATGATGAT') +aln = psa.water(moltype='prot', qseq='MKSTVWSG', sseq='MKSSVLW') print(aln.score) # 20.0 print(aln.pidentity) # 71.4 @@ -109,9 +127,9 @@ print(aln.sseq) # MKSSVLW ### Alignment information ```python -import pairwise_alignment as pa +import psa -aln = pa.needle( +aln = psa.needle( moltype='prot', qseq='MTSPSTKNSDDKGRPNLSSTEYFANTNVLTCRLKWVNPDTFIMDPRKPQLHSRT', sseq='MTTPSRENSDDKGRPIEEASNLSSTEYFANTNVLTCKLKYVNPDTFIMDPRKP', @@ -261,9 +279,9 @@ Output: Query coverage describes how much of the query sequence is covered in the alignment by the subject sequence. Specifically, query coverage is the percentage of the query sequence length that is included in the alignment. In global alignments, query coverage is always 100% because both the sequences, query and subject, are aligned from end to end. It is thus more useful to calculate query coverage from local alignments. ```python -import pairwise_alignment as pa +import psa -aln = pa.water( +aln = psa.water( moltype='prot', qseq='MTSPSTKNSDDKGRPNLSSTEYFANTNVLTCRLKWVNPDTFIMDPRKPQLHSRT', sseq='NSDDKGRPIEEASNLSSTEYFANTNVLTCKLKYVNPDTFIMDPRKP', @@ -285,9 +303,9 @@ print(aln.subject_coverage()) You can change a scoring matrix and penalties for the gap open and extension to calculate the alignment. ```python -import pairwise_alignment as pa +import psa -aln = pa.water( +aln = psa.water( moltype='prot', qseq='MKSTWYERNST', sseq='MKSTGYWTRESA', @@ -335,9 +353,9 @@ The Needleman-Wunsch and Smith-Waterman algorithms will always find an optimal a The `.pvalue()` method calculates the *P*-value of the alignment between query and subject sequences. The method shuffles a subject sequence many times (100 by default) and calculates the alignment score between the query and each shuffled subject sequence. It then counts how many times the alignment score was greater than or equal to the alignment score of the original query and subject sequences. For example, if 100 such shuffles all produce alignment scores that are lower than the observed alignment score, then one can say that the *P*-value is likely to be less than 0.01. ```python -import pairwise_alignment as pa +import psa -aln = pa.needle(moltype='prot', qseq='MKSTVILK', sseq='MKSRSLK') +aln = psa.needle(moltype='prot', qseq='MKSTVILK', sseq='MKSRSLK') print(aln.pvalue()) # 0.16 ``` @@ -349,7 +367,7 @@ For more than two sequences, you can calculate alignments between every pair of ```python import itertools -import pairwise_alignment as pa +import psa # Input sequences sequences = { @@ -364,7 +382,7 @@ sequences = { for qid, sid in itertools.combinations(sequences, r=2): qseq = sequences[qid] sseq = sequences[sid] - aln = pa.needle(moltype='nucl', qseq=qseq, sseq=sseq) + aln = psa.needle(moltype='nucl', qseq=qseq, sseq=sseq) print(f'{qid} {sid} {aln.pidentity:.1f}% {aln.score}') ``` @@ -389,7 +407,7 @@ If you have multiple sequences in a FASTA file, you can use [Biopython](https:// ```python import itertools -import pairwise_alignment as pa +import psa from Bio import SeqIO @@ -402,7 +420,7 @@ for seq_record in SeqIO.parse('sequences.fasta', 'fasta'): for qid, sid in itertools.combinations(sequences, r=2): qseq = sequences[qid] sseq = sequences[sid] - aln = pa.needle(moltype='nucl', qseq=qseq, sseq=sseq) + aln = psa.needle(moltype='nucl', qseq=qseq, sseq=sseq) print(f'{qid} {sid} {aln.pidentity:.1f}% {aln.score}') ``` @@ -423,10 +441,10 @@ dna4 dna5 40.9% 14.0 ## Tests -This module contains automated tests. If you want to check that everything works as intended, just run: +If you want to check that everything works as intended, just run: ``` -python3 pairwise_alignment.py +./test.py ``` ## License diff --git a/pairwise_alignment.py b/psa.py similarity index 58% rename from pairwise_alignment.py rename to psa.py index 65fe242..163bfac 100755 --- a/pairwise_alignment.py +++ b/psa.py @@ -1,20 +1,24 @@ -""" -Global and local pairwise alignments between nucleotide/protein sequences. +"""Global and local pairwise alignments between nucleotide/protein sequences. The module uses needle/water from the EMBOSS package to compute an optimal global/local alignment between a pair of sequences (query and subject). Copyright 2022 Andrzej Zielezinski (a.zielezinski@gmail.com) -https://github.com/aziele/pairwise-alignment +https://github.com/aziele/pairwise-sequence-alignment """ from __future__ import annotations from collections import namedtuple from typing import Optional import subprocess +import shutil import random -__version__ = '0.1.0' +__version__ = '1.0.0' + +# Check whether needle is on PATH and marked as executable. +assert shutil.which('needle'), "needle not found (is emboss installed?)" + class PairwiseAlignment(): """Object representing a pairwise ailgnment. @@ -126,13 +130,12 @@ def fasta(self, wrap=70) -> str: """Returns pairwise alignment in FASTA/Pearson format.""" lst = [ f'>{self.qid} {self.qstart}-{self.qend}', - *(self.qaln[i:i+wrap] for i in range(0, self.length, wrap)), + *(self.qaln[i:i + wrap] for i in range(0, self.length, wrap)), f'>{self.sid} {self.sstart}-{self.send}', - *(self.saln[i:i+wrap] for i in range(0, self.length, wrap)), + *(self.saln[i:i + wrap] for i in range(0, self.length, wrap)), ] return "\n".join(lst) - def pvalue(self, n:int = 100) -> float: """Returns p-value of the alignment. @@ -188,7 +191,6 @@ def __getitem__(self, index): 'nidentity', 'nsimilarity', 'ngaps', 'output'] ) - def align( program: Literal['needle', 'water'], moltype: Literal['prot', 'nucl'], @@ -203,32 +205,21 @@ def align( """Aligns two sequences, parses the output, and returns an alignment object. Args: - program: - An EMBOSS tool to run. - moltype: - A molecule type of query and subject sequences - qseq: - Query sequence - sseq: - Subject sequence - qid: - Query sequence identifier - sid: - Subject sequence identifier - gapopen: - Gap open penalty - gapextend: - Gap extension penalty - matrix: - Name of scoring matrix + program An EMBOSS tool to run. + moltype A molecule type of query and subject sequences + qseq Query sequence + sseq Subject sequence + qid Query sequence identifier + sid Subject sequence identifier + gapopen Gap open penalty + gapextend Gap extension penalty + matrix Name of scoring matrix Returns: A list of lines from EMBOSS output - """ if not matrix: matrix = 'EBLOSUM62' if moltype == 'prot' else 'EDNAFULL' - handle = emboss_run( program=program, moltype=moltype, @@ -279,24 +270,15 @@ def emboss_run( """Aligns two sequences using EMBOSS and returns its output. Args: - program: - An EMBOSS tool to run. - moltype: - A molecule type of query and subject sequences - qseq: - Query sequence - sseq: - Subject sequence - qid: - Query sequence identifier - sid: - Subject sequence identifier - gapopen: - Gap open penalty - gapextend: - Gap extension penalty - matrix: - Name of scoring matrix + program An EMBOSS tool to run. + moltype A molecule type of query and subject sequences + qseq Query sequence + sseq Subject sequence + qid Query sequence identifier + sid Subject sequence identifier + gapopen Gap open penalty + gapextend Gap extension penalty + matrix Name of scoring matrix Returns: A list of lines from EMBOSS output @@ -307,7 +289,6 @@ def emboss_run( qualifiers = '-snucleotide1 -snucleotide2' if moltype == 'prot': qualifiers = '-sprotein1 -sprotein2' - cmd = [ f"{program} -stdout -auto", f"{qualifiers}", @@ -332,8 +313,7 @@ def emboss_parse(handle: Iterable[str]) -> collections.namedtuple: """Parses EMBOSS output. Args: - handle: - A list of lines in EMBOSS output + handle: A list of lines in EMBOSS output Returns: A namedtuple containing alignment data. @@ -347,7 +327,6 @@ def emboss_parse(handle: Iterable[str]) -> collections.namedtuple: for line in handle: if line.startswith('#====='): is_output = True - if is_output: output.append(line) @@ -408,175 +387,3 @@ def water(*args, **kwargs): def needle(*args, **kwargs): """Aligns two sequences using EMBOSS needle.""" return align('needle', *args, **kwargs) - - -if __name__ == '__main__': - - import unittest - - class Test(unittest.TestCase): - - def test_needle_dna(self): - aln = needle(moltype='nucl', qseq='ATGCTAGATA', sseq='ATGCTAGTTA') - self.assertEqual(len(aln.qaln), len(aln.saln)) - self.assertEqual(len(aln.qaln), aln.length) - - def test_water_dna(self): - aln = water(moltype='nucl', qseq='ATGCTAGTTA', sseq='ATCCT') - self.assertEqual(len(aln.qaln), len(aln.saln)) - self.assertEqual(len(aln.qaln), aln.length) - - def test_needle_protein(self): - aln = needle(moltype='prot', qseq='MERILIIMTGG', sseq='MEKILILM') - self.assertEqual(len(aln.qaln), len(aln.saln)) - self.assertEqual(len(aln.qaln), aln.length) - - def test_water_protein(self): - aln = needle(moltype='prot', qseq='MERI', sseq='MEKILILM') - self.assertEqual(len(aln.qaln), len(aln.saln)) - self.assertEqual(len(aln.qaln), aln.length) - - def test_needle_dna_qid_sid(self): - qid = 'dna1' - sid = 'dna2' - qseq = 'ATGCTAGATA' - sseq = 'ATGCTAGTTA' - aln = needle(moltype='nucl', qseq=qseq, sseq=sseq, qid=qid, sid=sid) - self.assertEqual(len(aln.qaln), len(aln.saln)) - self.assertEqual(len(aln.qaln), aln.length) - self.assertEqual(aln.qid, qid) - self.assertEqual(aln.sid, sid) - - - class TestPairwiseAlignment(unittest.TestCase): - - def setUp(self): - self.aln1 = PairwiseAlignment( - qid='query', - sid='subject', - qseq='MERILIIMTGGTITSIRDDEVTLELLIDYRKRFGDTQRFDIVKLMNIIS', - sseq='MERILIIMTGGTISSIKKENILNVDDEVTLELLIDYRKRFGDSQKFDIVILIIS', - qaln='MERILIIMTGGTITSIR-------DDEVTLELLIDYRKRFGDTQRFDIVKLMNIIS', - saln='MERILIIMTGGTISSIKKENILNVDDEVTLELLIDYRKRFGDSQKFDIVIL--IIS', - qstart=1, - qend=49, - sstart=1, - send=54, - length=56, - score=183.5, - nidentity=42, - nsimilarity=46, - ngaps=9, - moltype='prot', - program='needle', - gapopen=10, - gapextend=0.5, - matrix='EBLOSUM62', - raw=[] # Skipped for code readability. - ) - - self.aln2 = PairwiseAlignment( - qid='oddly_long_sequence_identifier1', - sid='oddly_long_sequence_identifier2', - qseq='ATGCTAGTAGTTGATTTTTT', - sseq='ATGCTAGTAGATGAT', - qaln='ATGCTAGTAGTTGAT', - saln='ATGCTAGTAGATGAT', - qstart=1, - qend=15, - sstart=1, - send=15, - length=15, - score=66.0, - nidentity=14, - nsimilarity=14, - ngaps=0, - moltype='nucl', - program='water', - gapopen=10, - gapextend=0.5, - matrix='EDNAFULL', - raw=[] # Skipped for code readability. - ) - - def test_pidentity(self): - self.assertEqual(self.aln1.pidentity, 75.0) - self.assertAlmostEqual(self.aln2.pidentity, 93.3333333) - - def test_psimilarity(self): - self.assertAlmostEqual(self.aln1.psimilarity, 82.1428571) - self.assertAlmostEqual(self.aln2.psimilarity, 93.3333333) - - def test_pgaps(self): - self.assertAlmostEqual(self.aln1.pgaps, 16.0714286) - self.assertAlmostEqual(self.aln2.pgaps, 0) - - def test_query_coverage(self): - self.assertAlmostEqual(self.aln1.query_coverage(), 100.0) - self.assertAlmostEqual(self.aln2.query_coverage(), 75.0) - - def test_subject_coverage(self): - self.assertAlmostEqual(self.aln1.subject_coverage(), 100.0) - self.assertAlmostEqual(self.aln2.subject_coverage(), 100.0) - - def test_len(self): - self.assertEqual(len(self.aln1), 56) - self.assertEqual(len(self.aln2), 15) - - def test_getitem(self): - self.assertEqual(self.aln1[13], ('T', 'S')) - self.assertEqual(self.aln2[1], ('T', 'T')) - - def test_iter(self): - self.assertEqual(list(iter(self.aln1))[0], ('M', 'M')) - self.assertEqual(list(iter(self.aln2))[0], ('A', 'A')) - - def test_fasta_aln1(self): - fasta = [ - '>query 1-49', - 'MERILIIMTGGTITSIR-------DDEVTLELLIDYRKRFGDTQRFDIVKLMNIIS', - '>subject 1-54', - 'MERILIIMTGGTISSIKKENILNVDDEVTLELLIDYRKRFGDSQKFDIVIL--IIS', - ] - self.assertEqual(self.aln1.fasta(), "\n".join(fasta)) - - def test_fasta_aln2(self): - fasta = [ - '>oddly_long_sequence_identifier1 1-15', - 'ATGCTAGTAGTTGAT', - '>oddly_long_sequence_identifier2 1-15', - 'ATGCTAGTAGATGAT', - ] - self.assertEqual(self.aln2.fasta(), "\n".join(fasta)) - - def test_fasta_aln1_wrap10(self): - fasta = [ - '>query 1-49', - 'MERILIIMTG', - 'GTITSIR---', - '----DDEVTL', - 'ELLIDYRKRF', - 'GDTQRFDIVK', - 'LMNIIS', - '>subject 1-54', - 'MERILIIMTG', - 'GTISSIKKEN', - 'ILNVDDEVTL', - 'ELLIDYRKRF', - 'GDSQKFDIVI', - 'L--IIS', - ] - self.assertEqual(self.aln1.fasta(wrap=10), "\n".join(fasta)) - - def test_fasta_aln2_wrap10(self): - fasta = [ - '>oddly_long_sequence_identifier1 1-15', - 'ATGCTAGTAG', - 'TTGAT', - '>oddly_long_sequence_identifier2 1-15', - 'ATGCTAGTAG', - 'ATGAT', - ] - self.assertEqual(self.aln2.fasta(wrap=10), "\n".join(fasta)) - - unittest.main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..de72236 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,37 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "pairwise-sequence-alignment" +authors = [ + {name = "Andrzej Zielezinski", email = "a.zielezinski@gmail.com"} +] +readme = "README.md" +license = {file = "LICENSE"} +classifiers = [ + "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Operating System :: POSIX :: Linux", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] +keywords = [ + "sequence alignment", + "nucleotide", + "protein", + "pairwise alignment", + "needle", + "water", + "emboss", +] +requires-python = ">=3.8" +dynamic = ["version", "description"] + +[tool.flit.module] +name = "psa" + +[project.urls] +Documentation = "https://github.com/aziele/pairwise-sequence-alignment" +Source = "https://github.com/aziele/pairwise-sequence-alignment" \ No newline at end of file diff --git a/test.py b/test.py new file mode 100755 index 0000000..a96978b --- /dev/null +++ b/test.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +import shutil +import unittest + +import psa + +class Test(unittest.TestCase): + + def test_is_emboss_installed(self): + self.assertIsNotNone(shutil.which('needle')) + self.assertIsNotNone(shutil.which('water')) + + def test_needle_dna(self): + aln = psa.needle(moltype='nucl', qseq='ATGCTAGATA', sseq='ATGCTAGTTA') + self.assertEqual(len(aln.qaln), len(aln.saln)) + self.assertEqual(len(aln.qaln), aln.length) + + def test_water_dna(self): + aln = psa.water(moltype='nucl', qseq='ATGCTAGTTA', sseq='ATCCT') + self.assertEqual(len(aln.qaln), len(aln.saln)) + self.assertEqual(len(aln.qaln), aln.length) + + def test_needle_protein(self): + aln = psa.needle(moltype='prot', qseq='MERILIIMTGG', sseq='MEKILILM') + self.assertEqual(len(aln.qaln), len(aln.saln)) + self.assertEqual(len(aln.qaln), aln.length) + + def test_water_protein(self): + aln = psa.needle(moltype='prot', qseq='MERI', sseq='MEKILILM') + self.assertEqual(len(aln.qaln), len(aln.saln)) + self.assertEqual(len(aln.qaln), aln.length) + + def test_needle_dna_qid_sid(self): + qid = 'dna1' + sid = 'dna2' + qseq = 'ATGCTAGATA' + sseq = 'ATGCTAGTTA' + aln = psa.needle(moltype='nucl', qseq=qseq, sseq=sseq, qid=qid, sid=sid) + self.assertEqual(len(aln.qaln), len(aln.saln)) + self.assertEqual(len(aln.qaln), aln.length) + self.assertEqual(aln.qid, qid) + self.assertEqual(aln.sid, sid) + + +class TestPairwiseAlignment(unittest.TestCase): + + def setUp(self): + self.aln1 = psa.PairwiseAlignment( + qid='query', + sid='subject', + qseq='MERILIIMTGGTITSIRDDEVTLELLIDYRKRFGDTQRFDIVKLMNIIS', + sseq='MERILIIMTGGTISSIKKENILNVDDEVTLELLIDYRKRFGDSQKFDIVILIIS', + qaln='MERILIIMTGGTITSIR-------DDEVTLELLIDYRKRFGDTQRFDIVKLMNIIS', + saln='MERILIIMTGGTISSIKKENILNVDDEVTLELLIDYRKRFGDSQKFDIVIL--IIS', + qstart=1, + qend=49, + sstart=1, + send=54, + length=56, + score=183.5, + nidentity=42, + nsimilarity=46, + ngaps=9, + moltype='prot', + program='needle', + gapopen=10, + gapextend=0.5, + matrix='EBLOSUM62', + raw=[] # Skipped for code readability. + ) + + self.aln2 = psa.PairwiseAlignment( + qid='oddly_long_sequence_identifier1', + sid='oddly_long_sequence_identifier2', + qseq='ATGCTAGTAGTTGATTTTTT', + sseq='ATGCTAGTAGATGAT', + qaln='ATGCTAGTAGTTGAT', + saln='ATGCTAGTAGATGAT', + qstart=1, + qend=15, + sstart=1, + send=15, + length=15, + score=66.0, + nidentity=14, + nsimilarity=14, + ngaps=0, + moltype='nucl', + program='water', + gapopen=10, + gapextend=0.5, + matrix='EDNAFULL', + raw=[] # Skipped for code readability. + ) + + def test_pidentity(self): + self.assertEqual(self.aln1.pidentity, 75.0) + self.assertAlmostEqual(self.aln2.pidentity, 93.3333333) + + def test_psimilarity(self): + self.assertAlmostEqual(self.aln1.psimilarity, 82.1428571) + self.assertAlmostEqual(self.aln2.psimilarity, 93.3333333) + + def test_pgaps(self): + self.assertAlmostEqual(self.aln1.pgaps, 16.0714286) + self.assertAlmostEqual(self.aln2.pgaps, 0) + + def test_query_coverage(self): + self.assertAlmostEqual(self.aln1.query_coverage(), 100.0) + self.assertAlmostEqual(self.aln2.query_coverage(), 75.0) + + def test_subject_coverage(self): + self.assertAlmostEqual(self.aln1.subject_coverage(), 100.0) + self.assertAlmostEqual(self.aln2.subject_coverage(), 100.0) + + def test_len(self): + self.assertEqual(len(self.aln1), 56) + self.assertEqual(len(self.aln2), 15) + + def test_getitem(self): + self.assertEqual(self.aln1[13], ('T', 'S')) + self.assertEqual(self.aln2[1], ('T', 'T')) + + def test_iter(self): + self.assertEqual(list(iter(self.aln1))[0], ('M', 'M')) + self.assertEqual(list(iter(self.aln2))[0], ('A', 'A')) + + def test_fasta_aln1(self): + fasta = [ + '>query 1-49', + 'MERILIIMTGGTITSIR-------DDEVTLELLIDYRKRFGDTQRFDIVKLMNIIS', + '>subject 1-54', + 'MERILIIMTGGTISSIKKENILNVDDEVTLELLIDYRKRFGDSQKFDIVIL--IIS', + ] + self.assertEqual(self.aln1.fasta(), "\n".join(fasta)) + + def test_fasta_aln2(self): + fasta = [ + '>oddly_long_sequence_identifier1 1-15', + 'ATGCTAGTAGTTGAT', + '>oddly_long_sequence_identifier2 1-15', + 'ATGCTAGTAGATGAT', + ] + self.assertEqual(self.aln2.fasta(), "\n".join(fasta)) + + def test_fasta_aln1_wrap10(self): + fasta = [ + '>query 1-49', + 'MERILIIMTG', + 'GTITSIR---', + '----DDEVTL', + 'ELLIDYRKRF', + 'GDTQRFDIVK', + 'LMNIIS', + '>subject 1-54', + 'MERILIIMTG', + 'GTISSIKKEN', + 'ILNVDDEVTL', + 'ELLIDYRKRF', + 'GDSQKFDIVI', + 'L--IIS', + ] + self.assertEqual(self.aln1.fasta(wrap=10), "\n".join(fasta)) + + def test_fasta_aln2_wrap10(self): + fasta = [ + '>oddly_long_sequence_identifier1 1-15', + 'ATGCTAGTAG', + 'TTGAT', + '>oddly_long_sequence_identifier2 1-15', + 'ATGCTAGTAG', + 'ATGAT', + ] + self.assertEqual(self.aln2.fasta(wrap=10), "\n".join(fasta)) + +unittest.main() \ No newline at end of file