Organizing project structure to prepare for conda/pypi upload

WGS-TB · May 14, 2018 · 59e5008 · 59e5008
1 parent 236c24a
commit 59e5008
Show file tree

Hide file tree

Showing 24 changed files with 190 additions and 24 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,104 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,20 @@
+The MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ git clone https://github.com/WGS-TB/PythonPRINCE.git
 To check PRINCE is installed properly run 
 
 ```
-python Prince.py -to test_output.txt -tf sample_targets.txt
+prince -to test_output.txt -tf sample_targets.txt
 ```
 The output file should contain two rows with 24 random real numbers.
 
@@ -48,7 +48,7 @@ Once you have your target file you can run PRINCE.
 Specify a target output file (eg. output.txt) with -to. If the file doesn't exist PRINCE will create one. 
 
 ```
-python Prince.py -tf samples.txt -to output.txt
+prince -tf samples.txt -to output.txt
 ``` 
 
 Each line in output.txt will correspond to the predicted VNTR copy numbers for the corresponding sample in your target file.
@@ -67,14 +67,14 @@ Once you have your altered genomes you can create simulated reads using your pre
 Create a separate training file for each copy number with the paths to all your genomes with that many copies at each VNTR region.
 Specify your training output file.
 ```
-python Prince.py -bf training_samples_cn_1.txt -bo training_output.txt -cn 1
-python Prince.py -bf training_samples_cn_2.txt -bo training_output.txt -cn 2
-python Prince.py -bf training_samples_cn_3.txt -bo training_output.txt -cn 3
-python Prince.py -bf training_samples_cn_4.txt -bo training_output.txt -cn 4
+prince -bf training_samples_cn_1.txt -bo training_output.txt -cn 1
+prince -bf training_samples_cn_2.txt -bo training_output.txt -cn 2
+prince -bf training_samples_cn_3.txt -bo training_output.txt -cn 3
+prince -bf training_samples_cn_4.txt -bo training_output.txt -cn 4
 ```
 To use your new training data on your queries specifiy the training output file.
 ```
-python Prince.py -tf samples.txt -to output.txt -bo training_output.txt
+prince -tf samples.txt -to output.txt -bo training_output.txt
 ```
 
 ## Built With

diff --git a/Prince.py → bin/prince b/Prince.py → bin/prince
@@ -1,21 +1,23 @@
-from Bio import SeqIO
-from Kmer_Generator import kmerGenerator
-from boost import run_boosts 
-from query_sample import test_target
+#!/usr/bin/env python
+
 import argparse
 import warnings
 
+from Bio import SeqIO
+from prince.kmer_generator import kmerGenerator
+from prince.boost import run_boosts 
+from prince.query_sample import test_target
+
 DEFAULT_K = 9
 DEFAULT_BOOST_OUTPUT = "training_data.txt"
 
 def main():
-
     parser = argparse.ArgumentParser(description='Prince Options.')
 
     parser.add_argument('-bo', '--boost_output', default=DEFAULT_BOOST_OUTPUT,
-                help="output file for training data / training data used to predict copy numbers for queries")
+                        help="output file for training data / training data used to predict copy numbers for queries")
     parser.add_argument('-to', '--target_output', default="results/predictions.csv",
-                help="output file for query copy number predictions")
+                        help="output file for query copy number predictions")
     parser.add_argument('-tmp','--templates', default="templates.fasta",
                 help="VNTR templates. Default is for M.TB")
     parser.add_argument('-tf', '--target_file', default=None,
@@ -31,7 +33,7 @@ def main():
 
     #Safety check:
     if prince_options.k != DEFAULT_K and prince_options.boost_output == DEFAULT_BOOST_OUTPUT:
-	warnings.warn("Warning: Target kmer size does not equal training settings. May lead to inaccurate predictions.")
+        warnings.warn("Warning: Target kmer size does not equal training settings. May lead to inaccurate predictions.")
 
 
     #Template data initialized
@@ -49,4 +51,4 @@ def main():
         test_target(prince_options, templates, templateNames,templateKmers)
 
 if __name__ == '__main__':
-    main()
+    main()
diff --git a/Plots.ipynb → docs/Plots.ipynb b/Plots.ipynb → docs/Plots.ipynb
diff --git a/Testing_of_related_methods.ipynb → docs/Testing_of_related_methods.ipynb b/Testing_of_related_methods.ipynb → docs/Testing_of_related_methods.ipynb
diff --git a/prince/__init__.py b/prince/__init__.py
@@ -0,0 +1 @@
+__version__ = '0.0.0'
diff --git a/boost.py → prince/boost.py b/boost.py → prince/boost.py
@@ -1,4 +1,4 @@
-from match_score import compute_match_score
+from prince.match_score import compute_match_score
 
 def run_boosts(opts,templates,templateNames,templateKmers):
     with open(opts.boosting_file) as file:

diff --git a/COARSE_filtering.py → prince/coarse_filtering.py b/COARSE_filtering.py → prince/coarse_filtering.py
diff --git a/FINE_filtering.py → prince/fine_filtering.py b/FINE_filtering.py → prince/fine_filtering.py
diff --git a/Kmer_Generator.py → prince/kmer_generator.py b/Kmer_Generator.py → prince/kmer_generator.py
diff --git a/match_score.py → prince/match_score.py b/match_score.py → prince/match_score.py
@@ -1,6 +1,6 @@
 from Bio import SeqIO
-from COARSE_filtering import coarse_filtering
-from FINE_filtering import fine_filtering
+from prince.coarse_filtering import coarse_filtering
+from prince.fine_filtering import fine_filtering
 from itertools import chain
 
 def check_file_exists(itr8tr):
@@ -39,4 +39,4 @@ def compute_match_score(genome, templates, templateKmers, kmerLength):
 
     #Normalize score by adjusting for coverage
     matchScore = [t/coverage for t in matchScore]    
-    return matchScore
+    return matchScore
diff --git a/predict.py → prince/predict.py b/predict.py → prince/predict.py
diff --git a/query_sample.py → prince/query_sample.py b/query_sample.py → prince/query_sample.py
@@ -1,6 +1,6 @@
 from math import sqrt
-from predict import get_data, get_equations, get_copy_number
-from match_score import compute_match_score
+from prince.predict import get_data, get_equations, get_copy_number
+from prince.match_score import compute_match_score
 import time
 
 def test_target(opts, templates,templateNames, templateKmers):

diff --git a/medium_test1.fq → prince/tests/data/medium_test1.fq b/medium_test1.fq → prince/tests/data/medium_test1.fq
diff --git a/medium_test2.fq → prince/tests/data/medium_test2.fq b/medium_test2.fq → prince/tests/data/medium_test2.fq
diff --git a/small_test1.fastq → prince/tests/data/small_test1.fastq b/small_test1.fastq → prince/tests/data/small_test1.fastq
diff --git a/small_test2.fastq → prince/tests/data/small_test2.fastq b/small_test2.fastq → prince/tests/data/small_test2.fastq
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+biopython
+numpy
+scipy
diff --git a/sample_targets.txt b/sample_targets.txt
@@ -1,2 +1,2 @@
-small_test
-medium_test
+prince/tests/data/small_test
+prince/tests/data/medium_test
diff --git a/Inject_repeats.py → scripts/Inject_repeats.py b/Inject_repeats.py → scripts/Inject_repeats.py
diff --git a/incomplete_repeats.py → scripts/incomplete_repeats.py b/incomplete_repeats.py → scripts/incomplete_repeats.py
diff --git a/plots.py → scripts/plots.py b/plots.py → scripts/plots.py
diff --git a/setup.py b/setup.py
@@ -0,0 +1,36 @@
+from distutils.core import setup
+
+from setuptools import find_packages
+
+from prince import __version__
+
+classifiers = """
+Development Status :: 4 - Beta
+Environment :: Console
+License :: OSI Approved :: MIT License
+Intended Audience :: Science/Research
+Topic :: Scientific/Engineering
+Topic :: Scientific/Engineering :: Bio-Informatics
+Programming Language :: Python :: 2.7
+Operating System :: POSIX :: Linux
+""".strip().split('\n')
+
+setup(name='prince',
+      version=__version__,
+      description='PRINCE estimates Variable Number Tandem Repeats (VNTR) copy number from raw next generation sequencing (NGS) data.',
+      author='Julius Booth, Margaryta Vityaz, Merhdad Mansouri, Leonid Chindelevitch',
+      author_email='',
+      url='https://github.com/WGS-TB/PythonPRINCE',
+      license='MIT',
+      classifiers=classifiers,
+      install_requires=[
+          'biopython',
+          'scipy',
+          'numpy'
+      ],
+      test_suite='nose.collector',
+      tests_require=['nose'],
+      packages=find_packages(),
+      include_package_data=True,
+      scripts=['bin/prince']
+)