diff --git a/.flake8 b/.flake8 index e585c9e..7d0a64f 100644 --- a/.flake8 +++ b/.flake8 @@ -1,2 +1,3 @@ [flake8] ignore = E211,E221,E226,E501 +exclude = .git, env, .tox \ No newline at end of file diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..dbf4a85 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox pytest flake8 + pip install -r requirements.txt + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=12 --max-line-length=127 --statistics + - name: Test with tox + run: | + python -m pytest tests diff --git a/.gitignore b/.gitignore index c23fba5..78f7d7c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.pyc *~ *.html +env +.tox diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e779089..0000000 --- a/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -# Config file for automatic testing at travis-ci.org - -language: python -python: - - 3.6 - - 3.5 - - 3.4 - - 2.7 - -# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: - - pip install -r requirements.txt - - pip install -U tox-travis - -# Command to run tests, e.g. python setup.py test -script: tox diff --git a/README.md b/README.md index e3d25e2..36c6879 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # A Simple Partial Order Alignment implementation -[![Build Status](https://travis-ci.org/ljdursi/poapy.svg?branch=master)](https://travis-ci.org/ljdursi/poapy) +![Build Status](https://github.com/ljdursi/poapy/actions/workflows/python-package.yml/badge.svg) This is a simple python implementation of a Partial Order Alignment for MSA, based on -[Multiple sequence alignment using partial order graphs](http://bioinformatics.oxfordjournals.org/content/18/3/452.short) (2002) by Lee, Grasso, and Sharlow +[Multiple sequence alignment using partial order graphs](http://bioinformatics.oxfordjournals.org/content/18/3/452.short) (2002) by Lee, Grasso, and Sharlow and [Generating consensus sequences from partial order ...](http://bioinformatics.oxfordjournals.org/content/19/8/999.short) (2003) by Lee diff --git a/poagraph.py b/poagraph.py index 7eb4abd..8018698 100644 --- a/poagraph.py +++ b/poagraph.py @@ -75,7 +75,7 @@ def __init__(self, inNodeID=-1, outNodeID=-1, label=None): self.outNodeID = outNodeID if label is None: self.labels = [] - elif type(label) == list: + elif type(label) is list: self.labels = label else: self.labels = [label] @@ -180,8 +180,8 @@ def nEdges(self): return self._nedges def _simplified_graph_rep(self): - ## TODO: The need for this suggests that the way the graph is currently represented - ## isn't really right and needs some rethinking. + # TODO: The need for this suggests that the way the graph is currently represented + # isn't really right and needs some rethinking. node_to_pn = {} pn_to_nodes = {} @@ -217,18 +217,18 @@ def toposort(self): sortedlist = [] completed = set([]) - ## - ## The topological sort of this graph is complicated by the alignedTo edges; - ## we want to nodes connected by such edges to remain near each other in the - ## topological sort. - ## - ## Here we'll create a simple version of the graph that merges nodes that - ## are alignedTo each other, performs the sort, and then decomposes the - ## 'pseudonodes'. - ## - ## The need for this suggests that the way the graph is currently represented - ## isn't quite right and needs some rethinking. - ## + # + # The topological sort of this graph is complicated by the alignedTo edges; + # we want to nodes connected by such edges to remain near each other in the + # topological sort. + # + # Here we'll create a simple version of the graph that merges nodes that + # are alignedTo each other, performs the sort, and then decomposes the + # 'pseudonodes'. + # + # The need for this suggests that the way the graph is currently represented + # isn't quite right and needs some rethinking. + # pseudonodes = self._simplified_graph_rep() @@ -383,7 +383,7 @@ def consensus(self, excludeLabels=None): for neighbourID in self.nodedict[nodeID].outEdges: e = self.nodedict[nodeID].outEdges[neighbourID] - weight = len([l for l in e.labels if l not in excludeLabels]) + weight = len([label for label in e.labels if label not in excludeLabels]) weightScoreEdge = (weight, scores[neighbourID], neighbourID) if weightScoreEdge > bestWeightScoreEdge: @@ -423,8 +423,8 @@ def allConsenses(self, maxfraction=0.5): labelcounts = collections.defaultdict(int) for ll in labellists: - for l in ll: - labelcounts[l] += 1 + for label in ll: + labelcounts[label] += 1 for label, seq in zip(self.__labels, self.__seqs): if label in labelcounts and labelcounts[ diff --git a/seqgraphalignment.py b/seqgraphalignment.py index b4461bd..8e7f615 100644 --- a/seqgraphalignment.py +++ b/seqgraphalignment.py @@ -50,7 +50,7 @@ def matchscoreVec(self, c, v): def alignStringToGraphSimple(self): """Align string to graph, following same approach as smith waterman example""" - if type(self.sequence) != str: + if type(self.sequence) is not str: raise TypeError("Invalid Type") nodeIDtoIndex, nodeIndexToID, scores, backStrIdx, backGrphIdx = self.initializeDynamicProgrammingData() @@ -79,14 +79,14 @@ def alignStringToGraphSimple(self): def alignStringToGraphFast(self): """Align string to graph - using numpy to vectorize across the string at each iteration.""" - if not type(self.sequence) == str: + if type(self.sequence) is not str: raise TypeError("Invalid Type") l2 = len(self.sequence) seqvec = numpy.array(list(self.sequence)) nodeIDtoIndex, nodeIndexToID, scores, backStrIdx, backGrphIdx = self.initializeDynamicProgrammingData() - inserted = numpy.zeros((l2), dtype=numpy.bool) + inserted = numpy.zeros((l2), dtype=bool) # having the inner loop as a function improves performance # can use Cython, etc on this for significant further improvements @@ -111,11 +111,11 @@ def insertions(i, l2, scores, inserted): # First calculate for the first predecessor, over all string posns: deletescore = scores[predecessors[0]+1, 1:] + self._gap - bestdelete = numpy.zeros((l2), dtype=numpy.int)+predecessors[0]+1 + bestdelete = numpy.zeros((l2), dtype=numpy.int32)+predecessors[0]+1 matchpoints = self.matchscoreVec(gbase, seqvec) matchscore = scores[predecessors[0]+1, 0:-1] + matchpoints - bestmatch = numpy.zeros((l2), dtype=numpy.int)+predecessors[0]+1 + bestmatch = numpy.zeros((l2), dtype=numpy.int32)+predecessors[0]+1 # then, the remaining for predecessor in predecessors[1:]: @@ -176,7 +176,7 @@ def initializeDynamicProgrammingData(self): # Dynamic Programming data structures; scores matrix and backtracking # matrix - scores = numpy.zeros((l1+1, l2+1), dtype=numpy.int) + scores = numpy.zeros((l1+1, l2+1), dtype=numpy.int32) # initialize insertion score # if global align, penalty for starting at head != 0 @@ -192,8 +192,8 @@ def initializeDynamicProgrammingData(self): scores[index+1, 0] = best + self._gap # backtracking matrices - backStrIdx = numpy.zeros((l1+1, l2+1), dtype=numpy.int) - backGrphIdx = numpy.zeros((l1+1, l2+1), dtype=numpy.int) + backStrIdx = numpy.zeros((l1+1, l2+1), dtype=numpy.int32) + backGrphIdx = numpy.zeros((l1+1, l2+1), dtype=numpy.int32) return nodeIDtoIndex, nodeIndexToID, scores, backStrIdx, backGrphIdx diff --git a/simplefasta.py b/simplefasta.py index 99783bb..02ac7c9 100644 --- a/simplefasta.py +++ b/simplefasta.py @@ -14,7 +14,7 @@ def readfasta(infile): cursequence = "" def updatelists(): - if len(cursequence) is not 0: + if len(cursequence) != 0: sequences.append(cursequence) if curlabel is not None: labels.append(curlabel) diff --git a/tests/test_alignment_column_order.py b/tests/test_alignment_column_order.py index 4ff41ad..13b4cb5 100644 --- a/tests/test_alignment_column_order.py +++ b/tests/test_alignment_column_order.py @@ -1,7 +1,6 @@ """ Tests for Issue #6, provided by @rlorigro """ -import pytest import poagraph import seqgraphalignment diff --git a/tox.ini b/tox.ini index 2077fcb..7c55908 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,6 @@ envlist = py27, py34, py35, py36, flake8 skipsdist=True -[travis] -python = - 3.6: py36 - 3.5: py35 - 3.4: py34 - 2.7: py27 - [testenv:flake8] basepython = python deps = flake8