diff --git a/README.md b/README.md index f4fe714..c957f55 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,6 @@ or ```George Forman, "An Extensive Empirical Study of Feature Selection Metrics for Text Classification",Journal of Machine Learning Research 3 (2003) 1289-1305``` - - - # Requirement @@ -107,6 +104,21 @@ or `python setup.py install` +### Note + +You might see error message during running this command, such as + +``` +We failed to install numpy automatically. Try installing numpy manually or Try anaconda distribution. +``` + +This is because `setup.py` tries to instal numpy and scipy with `pip`, however it fails. +We need numpy and scipy before we install `scikit-learn`. + +In this case, you take following choice + +* You install `numpy` and `scipy` manually +* You use `anaconda` python distribution. Please visit [their site](https://www.continuum.io/downloads). # Examples @@ -157,4 +169,6 @@ Removed a bug when calling n_gram method of DataConverter * Resolved bottleneck poins in pre-processing * Introduced dict-vectorising in ScikitLearn * Introduced Cython in calculating PMI \& SOA. You can call them with `use_cython=True` flag. See `examples/example_python3.py` - +* Performance + * Cython PMI takes 11.87 sec. + * Python multiprocessing PMI takes 513.541 sec. (8.55 min.) \ No newline at end of file diff --git a/setup.py b/setup.py index 2e6fef7..b3e21f9 100644 --- a/setup.py +++ b/setup.py @@ -6,15 +6,16 @@ __version__ = '1.3' import sys +import pip from setuptools import setup, find_packages +from distutils.extension import Extension + -# Flags to compile Cython code or use already compiled code # -------------------------------------------------------------------------------------------------------- +# Flags to compile Cython code or use already compiled code try: from Cython.Build import cythonize - from distutils.extension import Extension from Cython.Distutils import build_ext - import numpy except ImportError: use_cython = False else: @@ -32,14 +33,34 @@ ext_modules += [ Extension("DocumentFeatureSelection.pmi.pmi_cython", [ "DocumentFeatureSelection/pmi/pmi_cython.c" ]), ] -# -------------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------------- +# try to install numpy automatically because sklearn requires the status where numpy is already installed +try: + import numpy +except ImportError: + use_numpy_include_dirs = False + try: + pip.main(['install', 'numpy']) + except: + raise Exception('We failed to install numpy automatically. Try installing numpy manually or Try anaconda distribution.') +# -------------------------------------------------------------------------------------------------------- +# try to install scipy automatically because sklearn requires the status where scipy is already installed +try: + import scipy +except ImportError: + use_numpy_include_dirs = False + try: + pip.main(['install', 'scipy']) + except: + raise Exception('We failed to install scipy automatically. Try installing scipy manually or Try anaconda distribution.') +# -------------------------------------------------------------------------------------------------------- python_version = sys.version_info if python_version >= (3, 0, 0): - install_requires = ['six', 'setuptools>=1.0', 'joblib', - 'scipy', 'nltk', 'scikit-learn', 'numpy', 'pypandoc', 'cython', 'scikit-learn'] + install_requires = ['six', 'setuptools>=1.0', 'joblib', 'numpy', + 'scipy', 'nltk', 'scikit-learn', 'pypandoc', 'cython'] else: raise Exception('This package does NOT support Python2.x') @@ -75,7 +96,8 @@ zip_safe=False, test_suite='tests.all_tests.suite', install_requires=install_requires, - setup_requires=['six', 'setuptools>=1.0'], + tests_require=install_requires, + setup_requires=['six', 'setuptools>=1.0', 'pip'], classifiers=[], cmdclass=cmdclass, ext_modules=ext_modules,