Skip to content

Commit

Permalink
Organizing project structure to prepare for conda/pypi upload
Browse files Browse the repository at this point in the history
  • Loading branch information
dfornika committed May 14, 2018
1 parent 236c24a commit 59e5008
Show file tree
Hide file tree
Showing 24 changed files with 190 additions and 24 deletions.
104 changes: 104 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
20 changes: 20 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
The MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ git clone https://github.com/WGS-TB/PythonPRINCE.git
To check PRINCE is installed properly run

```
python Prince.py -to test_output.txt -tf sample_targets.txt
prince -to test_output.txt -tf sample_targets.txt
```
The output file should contain two rows with 24 random real numbers.

Expand All @@ -48,7 +48,7 @@ Once you have your target file you can run PRINCE.
Specify a target output file (eg. output.txt) with -to. If the file doesn't exist PRINCE will create one.

```
python Prince.py -tf samples.txt -to output.txt
prince -tf samples.txt -to output.txt
```

Each line in output.txt will correspond to the predicted VNTR copy numbers for the corresponding sample in your target file.
Expand All @@ -67,14 +67,14 @@ Once you have your altered genomes you can create simulated reads using your pre
Create a separate training file for each copy number with the paths to all your genomes with that many copies at each VNTR region.
Specify your training output file.
```
python Prince.py -bf training_samples_cn_1.txt -bo training_output.txt -cn 1
python Prince.py -bf training_samples_cn_2.txt -bo training_output.txt -cn 2
python Prince.py -bf training_samples_cn_3.txt -bo training_output.txt -cn 3
python Prince.py -bf training_samples_cn_4.txt -bo training_output.txt -cn 4
prince -bf training_samples_cn_1.txt -bo training_output.txt -cn 1
prince -bf training_samples_cn_2.txt -bo training_output.txt -cn 2
prince -bf training_samples_cn_3.txt -bo training_output.txt -cn 3
prince -bf training_samples_cn_4.txt -bo training_output.txt -cn 4
```
To use your new training data on your queries specifiy the training output file.
```
python Prince.py -tf samples.txt -to output.txt -bo training_output.txt
prince -tf samples.txt -to output.txt -bo training_output.txt
```

## Built With
Expand Down
20 changes: 11 additions & 9 deletions Prince.py → bin/prince
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
from Bio import SeqIO
from Kmer_Generator import kmerGenerator
from boost import run_boosts
from query_sample import test_target
#!/usr/bin/env python

import argparse
import warnings

from Bio import SeqIO
from prince.kmer_generator import kmerGenerator
from prince.boost import run_boosts
from prince.query_sample import test_target

DEFAULT_K = 9
DEFAULT_BOOST_OUTPUT = "training_data.txt"

def main():

parser = argparse.ArgumentParser(description='Prince Options.')

parser.add_argument('-bo', '--boost_output', default=DEFAULT_BOOST_OUTPUT,
help="output file for training data / training data used to predict copy numbers for queries")
help="output file for training data / training data used to predict copy numbers for queries")
parser.add_argument('-to', '--target_output', default="results/predictions.csv",
help="output file for query copy number predictions")
help="output file for query copy number predictions")
parser.add_argument('-tmp','--templates', default="templates.fasta",
help="VNTR templates. Default is for M.TB")
parser.add_argument('-tf', '--target_file', default=None,
Expand All @@ -31,7 +33,7 @@ def main():

#Safety check:
if prince_options.k != DEFAULT_K and prince_options.boost_output == DEFAULT_BOOST_OUTPUT:
warnings.warn("Warning: Target kmer size does not equal training settings. May lead to inaccurate predictions.")
warnings.warn("Warning: Target kmer size does not equal training settings. May lead to inaccurate predictions.")


#Template data initialized
Expand All @@ -49,4 +51,4 @@ def main():
test_target(prince_options, templates, templateNames,templateKmers)

if __name__ == '__main__':
main()
main()
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions prince/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = '0.0.0'
2 changes: 1 addition & 1 deletion boost.py → prince/boost.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from match_score import compute_match_score
from prince.match_score import compute_match_score

def run_boosts(opts,templates,templateNames,templateKmers):
with open(opts.boosting_file) as file:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions match_score.py → prince/match_score.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from Bio import SeqIO
from COARSE_filtering import coarse_filtering
from FINE_filtering import fine_filtering
from prince.coarse_filtering import coarse_filtering
from prince.fine_filtering import fine_filtering
from itertools import chain

def check_file_exists(itr8tr):
Expand Down Expand Up @@ -39,4 +39,4 @@ def compute_match_score(genome, templates, templateKmers, kmerLength):

#Normalize score by adjusting for coverage
matchScore = [t/coverage for t in matchScore]
return matchScore
return matchScore
File renamed without changes.
4 changes: 2 additions & 2 deletions query_sample.py → prince/query_sample.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from math import sqrt
from predict import get_data, get_equations, get_copy_number
from match_score import compute_match_score
from prince.predict import get_data, get_equations, get_copy_number
from prince.match_score import compute_match_score
import time

def test_target(opts, templates,templateNames, templateKmers):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
biopython
numpy
scipy
4 changes: 2 additions & 2 deletions sample_targets.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
small_test
medium_test
prince/tests/data/small_test
prince/tests/data/medium_test
File renamed without changes.
File renamed without changes.
File renamed without changes.
36 changes: 36 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from distutils.core import setup

from setuptools import find_packages

from prince import __version__

classifiers = """
Development Status :: 4 - Beta
Environment :: Console
License :: OSI Approved :: MIT License
Intended Audience :: Science/Research
Topic :: Scientific/Engineering
Topic :: Scientific/Engineering :: Bio-Informatics
Programming Language :: Python :: 2.7
Operating System :: POSIX :: Linux
""".strip().split('\n')

setup(name='prince',
version=__version__,
description='PRINCE estimates Variable Number Tandem Repeats (VNTR) copy number from raw next generation sequencing (NGS) data.',
author='Julius Booth, Margaryta Vityaz, Merhdad Mansouri, Leonid Chindelevitch',
author_email='',
url='https://github.com/WGS-TB/PythonPRINCE',
license='MIT',
classifiers=classifiers,
install_requires=[
'biopython',
'scipy',
'numpy'
],
test_suite='nose.collector',
tests_require=['nose'],
packages=find_packages(),
include_package_data=True,
scripts=['bin/prince']
)

0 comments on commit 59e5008

Please sign in to comment.