diff --git a/.gitignore b/.gitignore index eda3c5d..98450f3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.so # Packages +*.eggs *.egg *.egg-info dist @@ -50,3 +51,4 @@ docs/_build # Dev environment env/ .idea/ +*.bak \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 496d264..040454d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: python python: + - "2.7" - "3.4" # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors diff --git a/HISTORY.rst b/HISTORY.rst index 3eb0db6..6842f3c 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,3 +7,10 @@ History ------------------ * First release on PyPI. + +0.1.1 (2015-02-10) +------------------ + +* Fixed some package issues. +* Added Python 2.7 support +* Added extra parameter to search_param method. diff --git a/MANIFEST.in b/MANIFEST.in index ed592db..1b59795 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,6 +3,7 @@ include CONTRIBUTING.rst include HISTORY.rst include LICENSE include README.rst +include requirements.txt recursive-include tests * recursive-exclude * __pycache__ diff --git a/elm/__init__.py b/elm/__init__.py index 4e63baa..65a97d8 100755 --- a/elm/__init__.py +++ b/elm/__init__.py @@ -2,7 +2,7 @@ __author__ = 'Augusto Almeida' __email__ = 'acba@cin.ufpe.br' -__version__ = '0.1.0' +__version__ = '0.1.1' from .elmk import ELMKernel from .elmr import ELMRandom diff --git a/elm/elmk.py b/elm/elmk.py index 54b0793..fd1bbe0 100644 --- a/elm/elmk.py +++ b/elm/elmk.py @@ -4,16 +4,28 @@ This file contains ELMKernel classes and all developed methods. """ +# Python2 support +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + +from .mltools import * + import numpy as np import optunity import ast -from .mltools import * - -try: - import configparser -except ImportError: +import sys +if sys.version_info < (3, 0): import ConfigParser as configparser +else: + import configparser + + +# Find configuration file +from pkg_resources import Requirement, resource_filename +_ELMK_CONFIG = resource_filename(Requirement.parse("elm"), "elm/elmk.cfg") class ELMKernel(MLTools): @@ -91,7 +103,7 @@ def __init__(self, params=[]): >>> elmk = elm.ELMKernel(params) """ - super().__init__() + super(self.__class__, self).__init__() self.regressor_name = "elmk" @@ -268,7 +280,7 @@ def print_parameters(self): print() def search_param(self, database, dataprocess=None, path_filename=("", ""), - save=False, cv="ts", min_f="rmse", kf=None): + save=False, cv="ts", of="rmse", kf=None, eval=50): """ Search best hyperparameters for classifier/regressor based on optunity algorithms. @@ -282,11 +294,12 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), path_filename (tuple): *TODO*. save (bool): *TODO*. cv (str): Cross-validation method. Defaults to "ts". - min_f (str): Objective function to be minimized at + of (str): Objective function to be minimized at optunity.minimize. Defaults to "rmse". kf (list of str): a list of kernel functions to be used by the search. Defaults to None, this set all available functions. + eval (int): Number of steps (evaluations) to optunity algorithm. Each set of hyperparameters will perform a cross-validation @@ -300,7 +313,7 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), - "kfold" :func:`mltools.kfold_cross_validation()` Perform a k-fold cross-validation. - Available *min_f* function: + Available *of* function: - "accuracy", "rmse", "mape", "me". @@ -319,7 +332,11 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), print("##### Start search #####") config = configparser.ConfigParser() - config.read("elm/elmk.cfg") + + if sys.version_info < (3, 0): + config.readfp(open(_ELMK_CONFIG)) + else: + config.read_file(open(_ELMK_CONFIG)) best_function_error = 99999.9 temp_error = best_function_error @@ -328,15 +345,28 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), best_param_kernel_param = [] for kernel_function in search_kernel_functions: - kernel_config = config[kernel_function] - n_parameters = int(kernel_config["kernel_n_param"]) + if sys.version_info < (3, 0): + elmk_c_range = ast.literal_eval(config.get("DEFAULT", + "elmk_c_range")) + + n_parameters = config.getint(kernel_function, "kernel_n_param") + kernel_p_range = \ + ast.literal_eval(config.get(kernel_function, + "kernel_params_range")) + + else: + kernel_config = config[kernel_function] + + elmk_c_range = ast.literal_eval(kernel_config["elmk_c_range"]) - _kc = ast.literal_eval(kernel_config["elmk_c_range"]) + n_parameters = int(kernel_config["kernel_n_param"]) + kernel_p_range = \ + ast.literal_eval(kernel_config["kernel_params_range"]) - param_ranges = [[_kc[0][0], _kc[0][1]]] - _kc = ast.literal_eval(kernel_config["kernel_params_range"]) + param_ranges = [[elmk_c_range[0][0], elmk_c_range[0][1]]] for param in range(n_parameters): - param_ranges.append([_kc[param][0], _kc[param][1]]) + param_ranges.append([kernel_p_range[param][0], + kernel_p_range[param][1]]) def wrapper_0param(param_c): """ @@ -364,10 +394,10 @@ def wrapper_0param(param_c): else: raise Exception("Invalid type of cross-validation.") - if min_f == "accuracy": + if of == "accuracy": util = 1 / cv_te_error.get_accuracy() else: - util = cv_te_error.get(min_f) + util = cv_te_error.get(of) # print("c:", param_c, "util: ", util) return util @@ -398,10 +428,10 @@ def wrapper_1param(param_c, param_kernel): else: raise Exception("Invalid type of cross-validation.") - if min_f == "accuracy": + if of == "accuracy": util = 1 / cv_te_error.get_accuracy() else: - util = cv_te_error.get(min_f) + util = cv_te_error.get(of) # print("c:", param_c, " gamma:", param_kernel, "util: ", util) return util @@ -434,10 +464,10 @@ def wrapper_2param(param_c, param_kernel1, param_kernel2): else: raise Exception("Invalid type of cross-validation.") - if min_f == "accuracy": + if of == "accuracy": util = 1 / cv_te_error.get_accuracy() else: - util = cv_te_error.get(min_f) + util = cv_te_error.get(of) # print("c:", param_c, " param1:", param_kernel1, # " param2:", param_kernel2, "util: ", util) @@ -447,14 +477,14 @@ def wrapper_2param(param_c, param_kernel1, param_kernel2): optimal_parameters, details, _ = \ optunity.minimize(wrapper_0param, solver_name="cma-es", - num_evals=25, + num_evals=eval, param_c=param_ranges[0]) elif kernel_function == "rbf": optimal_parameters, details, _ = \ optunity.minimize(wrapper_1param, solver_name="cma-es", - num_evals=50, + num_evals=eval, param_c=param_ranges[0], param_kernel=param_ranges[1]) @@ -462,7 +492,7 @@ def wrapper_2param(param_c, param_kernel1, param_kernel2): optimal_parameters, details, _ = \ optunity.minimize(wrapper_2param, solver_name="cma-es", - num_evals=50, + num_evals=eval, param_c=param_ranges[0], param_kernel1=param_ranges[1], param_kernel2=param_ranges[2]) @@ -473,7 +503,7 @@ def wrapper_2param(param_c, param_kernel1, param_kernel2): if details[0] < temp_error: temp_error = details[0] - if min_f == "accuracy": + if of == "accuracy": best_function_error = 1 / temp_error else: best_function_error = temp_error @@ -495,7 +525,7 @@ def wrapper_2param(param_c, param_kernel1, param_kernel2): # print("best: ", best_param_kernel_function, # best_function_error, best_param_c, best_param_kernel_param) - if min_f == "accuracy": + if of == "accuracy": print("Kernel function: ", kernel_function, " best cv value: ", 1/details[0]) else: diff --git a/elm/elmr.py b/elm/elmr.py index dfacf1a..472b0a2 100644 --- a/elm/elmr.py +++ b/elm/elmr.py @@ -4,18 +4,24 @@ This file contains ELMKernel classes and all developed methods. """ -import numpy as np -import optunity +# Python2 support +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function from .mltools import * -try: - import configparser -except ImportError: - import ConfigParser as configparser - +import numpy as np +import optunity import ast +import sys +if sys.version_info < (3, 0): + import ConfigParser as configparser +else: + import configparser + try: from scipy.special import expit except ImportError: @@ -23,6 +29,10 @@ else: _SCIPY = 1 +# Find configuration file +from pkg_resources import Requirement, resource_filename +_ELMR_CONFIG = resource_filename(Requirement.parse("elm"), "elm/elmr.cfg") + class ELMRandom(MLTools): """ @@ -102,7 +112,7 @@ def __init__(self, params=[]): >>> elmr = elm.ELMRandom(params) """ - super().__init__() + super(self.__class__, self).__init__() self.available_functions = ["sigmoid", "multiquadric"] @@ -277,7 +287,7 @@ def _local_test(self, testing_patterns, testing_expected_targets, # ######################## def search_param(self, database, dataprocess=None, path_filename=("", ""), - save=False, cv="ts", min_f="rmse", f=None): + save=False, cv="ts", of="rmse", f=None, eval=50): """ Search best hyperparameters for classifier/regressor based on optunity algorithms. @@ -291,11 +301,12 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), path_filename (tuple): *TODO*. save (bool): *TODO*. cv (str): Cross-validation method. Defaults to "ts". - min_f (str): Objective function to be minimized at + of (str): Objective function to be minimized at optunity.minimize. Defaults to "rmse". f (list of str): a list of functions to be used by the search. Defaults to None, this set all available functions. + eval (int): Number of steps (evaluations) to optunity algorithm. Each set of hyperparameters will perform a cross-validation method chosen by param cv. @@ -307,7 +318,7 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), - "kfold" :func:`mltools.kfold_cross_validation()` Perform a k-fold cross-validation. - Available *min_f* function: + Available *of* function: - "accuracy", "rmse", "mape", "me". @@ -326,7 +337,10 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), print("##### Start search #####") config = configparser.ConfigParser() - config.read("elm/elmr.cfg") + if sys.version_info < (3, 0): + config.readfp(open(_ELMR_CONFIG)) + else: + config.read_file(open(_ELMR_CONFIG)) best_function_error = 99999.9 temp_error = best_function_error @@ -335,11 +349,18 @@ def search_param(self, database, dataprocess=None, path_filename=("", ""), best_param_l = 0 for function in search_functions: - function_config = config["DEFAULT"] - c_range = ast.literal_eval(function_config["elmr_c_range"]) - neurons = ast.literal_eval(function_config["elmr_neurons"]) + if sys.version_info < (3, 0): + elmr_c_range = ast.literal_eval(config.get("DEFAULT", + "elmr_c_range")) + + neurons = config.getint("DEFAULT", "elmr_neurons") + + else: + function_config = config["DEFAULT"] + elmr_c_range = ast.literal_eval(function_config["elmr_c_range"]) + neurons = ast.literal_eval(function_config["elmr_neurons"]) - param_ranges = [[c_range[0][0], c_range[0][1]]] + param_ranges = [[elmr_c_range[0][0], elmr_c_range[0][1]]] def wrapper_opt(param_c): """ @@ -369,10 +390,10 @@ def wrapper_opt(param_c): else: raise Exception("Invalid type of cross-validation.") - if min_f == "accuracy": + if of == "accuracy": util = 1 / cv_te_error.get_accuracy() else: - util = cv_te_error.get(min_f) + util = cv_te_error.get(of) # print("c:", param_c, "util: ", util) return util @@ -380,14 +401,14 @@ def wrapper_opt(param_c): optimal_pars, details, _ = \ optunity.minimize(wrapper_opt, solver_name="cma-es", - num_evals=30, + num_evals=eval, param_c=param_ranges[0]) # Save best function result if details[0] < temp_error: temp_error = details[0] - if min_f == "accuracy": + if of == "accuracy": best_function_error = 1 / temp_error else: best_function_error = temp_error @@ -396,7 +417,7 @@ def wrapper_opt(param_c): best_param_c = optimal_pars["param_c"] best_param_l = neurons - if min_f == "accuracy": + if of == "accuracy": print("Function: ", function, " best cv value: ", 1/details[0]) else: diff --git a/elm/mltools.py b/elm/mltools.py index a758501..b0f0d26 100644 --- a/elm/mltools.py +++ b/elm/mltools.py @@ -1,14 +1,21 @@ +# -*- coding: utf-8 -*- + """ This file contains MLTools class and all developed methods. """ -__author__ = 'acba' +# Python2 support +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function + import numpy as np import pickle -class MLTools: +class MLTools(object): """ A Python implementation of several methods needed for machine learning classification/regression. @@ -221,7 +228,7 @@ def load_regressor(self, file_name): return self -class Error: +class Error(object): """ Error is a class that saves expected and predicted values to calculate error metrics. @@ -529,7 +536,7 @@ def get_shapiro(self): return sw_statistic, sw_p_value -class CVError: +class CVError(object): """ CVError is a class that saves :class:`Error` objects from all folds of a cross-validation method. diff --git a/requirements.txt b/requirements.txt index 7bc4ab6..cb66e6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ numpy==1.9.1 deap==1.0.1 --e git+https://github.com/claesenm/optunity.git@842e5b951caf5b37b8a612156208fb12bb705c88#egg=optunity-master +-e git+https://github.com/claesenm/optunity.git#egg=optunity-master sphinx_rtd_theme==0.1.6 sphinxcontrib-napoleon==0.2.9 diff --git a/setup.py b/setup.py index 839f693..d187449 100755 --- a/setup.py +++ b/setup.py @@ -1,15 +1,33 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - import sys +import io +import os +import re + +from setuptools import setup +from setuptools import find_packages from setuptools.command.test import test as TestCommand +def read(*names, **kwargs): + with io.open( + os.path.join(os.path.dirname(__file__), *names), + encoding=kwargs.get("encoding", "utf8") + ) as fp: + return fp.read() + + +def find_version(*file_paths): + version_file = read(*file_paths) + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", + version_file, re.M) + if version_match: + return version_match.group(1) + raise RuntimeError("Unable to find version string.") + + class PyTest(TestCommand): def finalize_options(self): TestCommand.finalize_options(self) @@ -36,19 +54,18 @@ def run_tests(self): setup( name='elm', - version="0.1.0", + version=find_version("elm/__init__.py"), description="Python Extreme Learning Machine (ELM) is a machine learning " "technique used for classification/regression tasks.", long_description=readme + '\n\n' + history, author="Augusto Almeida", author_email='acba@cin.ufpe.br', url='https://github.com/acba/elm', - packages=['elm'], + packages=find_packages(exclude=['contrib', 'docs', 'tests*']), package_dir={'elm': 'elm'}, include_package_data=True, install_requires=requirements, - dependency_links=['https://github.com/claesenm/optunity/archive/' - 'master.zip#egg=optunity-1.0.2'], + dependency_links=['https://github.com/claesenm/optunity/archive/master.zip#egg=optunity-1.0.2'], license="BSD", zip_safe=False, keywords='elm, machine learning, artificial intelligence, ai, regression, \ @@ -58,6 +75,7 @@ def run_tests(self): 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Natural Language :: English', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Topic :: Software Development', diff --git a/tests/test_classification.py b/tests/test_classification.py index 4f9bc01..0d2900e 100755 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -30,7 +30,7 @@ def test_elmk_iris(): try: # search for best parameter for this dataset - elmk.search_param(data, cv="kfold", min_f="accuracy") + elmk.search_param(data, cv="kfold", of="accuracy", eval=10) # split data in training and testing sets tr_set, te_set = elm.split_sets(data, training_percent=.8, perm=True) @@ -59,7 +59,7 @@ def test_elmr_iris(): try: # search for best parameter for this dataset - elmr.search_param(data, cv="kfold", min_f="accuracy") + elmr.search_param(data, cv="kfold", of="accuracy", eval=10) # split data in training and testing sets tr_set, te_set = elm.split_sets(data, training_percent=.8, perm=True) diff --git a/tests/test_regression.py b/tests/test_regression.py index 11b45ef..b82f68b 100755 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -33,7 +33,7 @@ def test_elmk_boston(): try: # search for best parameter for this dataset - # elmk.search_param(data, cv="kfold", min_f="rmse") + # elmk.search_param(data, cv="kfold", of="rmse") # split data in training and testing sets tr_set, te_set = elm.split_sets(data, training_percent=.8, perm=True) @@ -61,7 +61,7 @@ def test_elmk_diabetes(): try: # search for best parameter for this dataset - # elmk.search_param(data, cv="kfold", min_f="rmse") + # elmk.search_param(data, cv="kfold", of="rmse") # split data in training and testing sets tr_set, te_set = elm.split_sets(data, training_percent=.8, perm=True) @@ -89,7 +89,7 @@ def test_elmr_boston(): try: # search for best parameter for this dataset - # elmr.search_param(data, cv="kfold", min_f="rmse") + # elmr.search_param(data, cv="kfold", of="rmse") # split data in training and testing sets tr_set, te_set = elm.split_sets(data, training_percent=.8, perm=True) @@ -117,7 +117,7 @@ def test_elmr_diabetes(): try: # search for best parameter for this dataset - # elmr.search_param(data, cv="kfold", min_f="rmse") + # elmr.search_param(data, cv="kfold", of="rmse") # split data in training and testing sets tr_set, te_set = elm.split_sets(data, training_percent=.8, perm=True) diff --git a/tox.ini b/tox.ini index e990e03..4128c9d 100644 --- a/tox.ini +++ b/tox.ini @@ -5,8 +5,7 @@ envlist = py27, py34 commands = {envpython} setup.py test deps = pytest - numpy - optunity + -rrequirements.txt setenv= PYTHONWARNINGS=all @@ -19,18 +18,8 @@ norecursedirs=.tox .git env [testenv:py27] commands= - py.test --doctest-module + py.test tests/ --doctest-module [testenv:py34] commands= - py.test --doctest-module - -[testenv:py27verbose] -basepython=python2.7 -commands= - py.test --doctest-module --cov=. --cov-report term - -[testenv:py34verbose] -basepython=python3.4 -commands= - py.test --doctest-module --cov=. --cov-report term + py.test tests/ --doctest-module