From 2bd87f6ed8d36f89cab73552665e4e59e892262b Mon Sep 17 00:00:00 2001 From: Adrin Jalali Date: Fri, 14 Dec 2018 10:53:12 +0100 Subject: [PATCH] Remove python < 3.5 from CI (#12746) --- .circleci/config.yml | 36 +- .travis.yml | 25 +- appveyor.yml | 5 +- build_tools/travis/install.sh | 9 +- conftest.py | 7 +- .../model_selection/plot_precision_recall.py | 2 +- setup.py | 13 +- sklearn/base.py | 2 +- sklearn/calibration.py | 2 +- sklearn/cluster/tests/test_k_means.py | 4 - sklearn/datasets/covtype.py | 3 +- sklearn/datasets/kddcup99.py | 9 +- sklearn/datasets/mldata.py | 13 +- sklearn/datasets/openml.py | 25 +- sklearn/datasets/rcv1.py | 3 +- sklearn/datasets/species_distributions.py | 8 +- .../datasets/tests/test_svmlight_format.py | 13 +- sklearn/externals/funcsigs.py | 815 ------------------ sklearn/feature_extraction/tests/test_text.py | 5 +- sklearn/gaussian_process/kernels.py | 2 +- .../gaussian_process/tests/test_kernels.py | 2 +- sklearn/model_selection/_search.py | 5 +- sklearn/model_selection/_split.py | 3 +- sklearn/model_selection/tests/test_search.py | 6 +- sklearn/preprocessing/_discretization.py | 2 - sklearn/preprocessing/data.py | 7 - sklearn/preprocessing/tests/test_data.py | 34 +- sklearn/svm/tests/test_svm.py | 11 - sklearn/tests/test_docstring_parameters.py | 3 +- sklearn/utils/estimator_checks.py | 28 +- sklearn/utils/extmath.py | 4 - sklearn/utils/fixes.py | 154 +--- sklearn/utils/testing.py | 12 +- sklearn/utils/tests/test_deprecation.py | 2 - sklearn/utils/tests/test_estimator_checks.py | 9 +- sklearn/utils/tests/test_extmath.py | 2 - sklearn/utils/tests/test_fixes.py | 5 - sklearn/utils/tests/test_validation.py | 27 +- sklearn/utils/validation.py | 12 +- 39 files changed, 108 insertions(+), 1221 deletions(-) delete mode 100644 sklearn/externals/funcsigs.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 2c6fbae7530cf..feae152555368 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,13 +1,18 @@ version: 2 jobs: - python3: + doc-min-dependencies: docker: - image: circleci/python:3.6.1 environment: - MINICONDA_PATH: ~/miniconda - CONDA_ENV_NAME: testenv - - PYTHON_VERSION: 3 + - PYTHON_VERSION: 3.5 + - NUMPY_VERSION: 1.11.0 + - SCIPY_VERSION: 0.17.0 + - PANDAS_VERSION: 0.18.0 + - MATPLOTLIB_VERSION: 1.5.1 + - SCIKIT_IMAGE_VERSION: 0.12.3 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh @@ -30,31 +35,21 @@ jobs: root: doc/_build/html paths: . - - python2: + doc: docker: - # We use the python 3 docker image for simplicity. Python is installed - # through conda and the python version actually used is set via the - # PYTHON_VERSION environment variable. - image: circleci/python:3.6.1 environment: - # Test examples run with minimal dependencies - MINICONDA_PATH: ~/miniconda - CONDA_ENV_NAME: testenv - - PYTHON_VERSION: "2" - - NUMPY_VERSION: "1.10" - - SCIPY_VERSION: "0.16" - - MATPLOTLIB_VERSION: "1.4" - - SCIKIT_IMAGE_VERSION: "0.11" - - PANDAS_VERSION: "0.17.1" + - PYTHON_VERSION: 3 steps: - checkout - run: ./build_tools/circle/checkout_merge_commit.sh - restore_cache: - key: v1-datasets-{{ .Branch }}-python2 + key: v1-datasets-{{ .Branch }} - run: ./build_tools/circle/build_doc.sh - save_cache: - key: v1-datasets-{{ .Branch }}-python2 + key: v1-datasets-{{ .Branch }} paths: - ~/scikit_learn_data - store_artifacts: @@ -63,6 +58,11 @@ jobs: - store_artifacts: path: ~/log.txt destination: log.txt + # Persists generated documentation so that it can be attached and deployed + # in the 'deploy' step. + - persist_to_workspace: + root: doc/_build/html + paths: . lint: docker: @@ -114,8 +114,8 @@ workflows: version: 2 build-doc-and-deploy: jobs: - - python3 - - python2 + - doc + - doc-min-dependencies - lint - pypy3: filters: diff --git a/.travis.yml b/.travis.yml index e976cacd06c96..cf9f27c5fc614 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ cache: - $HOME/.cache/pip - $HOME/.ccache -dist: trusty +dist: xenial env: global: @@ -21,26 +21,29 @@ env: matrix: include: # This environment tests that scikit-learn can be built against - # versions of numpy, scipy with ATLAS that comes with Ubuntu Trusty 14.04 - # i.e. numpy 1.8.2 and scipy 0.13.3 - - env: DISTRIB="ubuntu" PYTHON_VERSION="2.7" CYTHON_VERSION="0.23.5" - COVERAGE=true + # versions of numpy, scipy with ATLAS that comes with Ubuntu Xenial 16.04 + # i.e. numpy 1.11 and scipy 0.17 + - env: DISTRIB="ubuntu" PYTHON_VERSION="3.5" CYTHON_VERSION="0.28.6" + NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.17.0" + PILLOW_VERSION="4.0.0" COVERAGE=true + SKLEARN_SITE_JOBLIB=1 JOBLIB_VERSION="0.11" if: type != cron addons: apt: packages: # these only required by the DISTRIB="ubuntu" builds: - - python-scipy + - python3-scipy - libatlas3-base + - libatlas-base-dev - libatlas-dev - # Python 3.4 build - - env: DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="false" - NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" + # Python 3.5 build without SITE_JOBLIB + - env: DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="false" + NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.25.2" PILLOW_VERSION="4.0.0" COVERAGE=true if: type != cron # Python 3.5 build - env: DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="false" - NUMPY_VERSION="1.10.4" SCIPY_VERSION="0.16.1" CYTHON_VERSION="0.25.2" + NUMPY_VERSION="1.11.0" SCIPY_VERSION="0.17.0" CYTHON_VERSION="0.25.2" PILLOW_VERSION="4.0.0" COVERAGE=true SKLEARN_SITE_JOBLIB=1 JOBLIB_VERSION="0.11" if: type != cron @@ -57,7 +60,7 @@ matrix: # This environment tests scikit-learn against numpy and scipy master # installed from their CI wheels in a virtualenv with the Python # interpreter provided by travis. - - python: 3.6 + - python: 3.7 env: DISTRIB="scipy-dev" CHECK_WARNINGS="true" if: type = cron OR commit_message =~ /\[scipy-dev\]/ diff --git a/appveyor.yml b/appveyor.yml index e26a02c90cd39..e5c4362451e97 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,11 +22,10 @@ environment: PYTHON_ARCH: "64" CHECK_WARNINGS: "true" - - PYTHON: "C:\\Python27" - PYTHON_VERSION: "2.7.8" + - PYTHON: "C:\\Python35" + PYTHON_VERSION: "3.5.6" PYTHON_ARCH: "32" - # Because we only have a single worker, we don't want to waste precious # appveyor CI time and make other PRs wait for repeated failures in a failing # PR. The following option cancels pending jobs in a given PR after the first diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh index a2940bb7551ea..5ce4f6a9e603b 100755 --- a/build_tools/travis/install.sh +++ b/build_tools/travis/install.sh @@ -70,11 +70,6 @@ if [[ "$DISTRIB" == "conda" ]]; then fi make_conda $TO_INSTALL - # for python 3.4, conda does not have recent pytest packages - if [[ "$PYTHON_VERSION" == "3.4" ]]; then - pip install pytest==3.5 - fi - elif [[ "$DISTRIB" == "ubuntu" ]]; then # At the time of writing numpy 1.9.1 is included in the travis # virtualenv but we want to use the numpy installed through apt-get @@ -82,9 +77,9 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then deactivate # Create a new virtualenv using system site packages for python, numpy # and scipy - virtualenv --system-site-packages testvenv + virtualenv --system-site-packages --python=python3 testvenv source testvenv/bin/activate - pip install pytest pytest-cov cython==$CYTHON_VERSION + pip install pytest pytest-cov cython==$CYTHON_VERSION joblib==$JOBLIB_VERSION elif [[ "$DISTRIB" == "scipy-dev" ]]; then make_conda python=3.7 diff --git a/conftest.py b/conftest.py index 50a3d3470a47a..45a5a8af29d20 100644 --- a/conftest.py +++ b/conftest.py @@ -11,8 +11,6 @@ import pytest from _pytest.doctest import DoctestItem -from sklearn.utils.fixes import PY3_OR_LATER - PYTEST_MIN_VERSION = '3.3.0' if LooseVersion(pytest.__version__) < PYTEST_MIN_VERSION: @@ -47,11 +45,8 @@ def pytest_collection_modifyitems(config, items): item.add_marker(skip_network) # numpy changed the str/repr formatting of numpy arrays in 1.14. We want to - # run doctests only for numpy >= 1.14. We want to skip the doctest for - # python 2 due to unicode. + # run doctests only for numpy >= 1.14. skip_doctests = False - if not PY3_OR_LATER: - skip_doctests = True try: import numpy as np if LooseVersion(np.__version__) < LooseVersion('1.14'): diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py index 7010e96737c6b..936e56921d27c 100644 --- a/examples/model_selection/plot_precision_recall.py +++ b/examples/model_selection/plot_precision_recall.py @@ -137,7 +137,7 @@ # ................................ from sklearn.metrics import precision_recall_curve import matplotlib.pyplot as plt -from sklearn.utils.fixes import signature +from inspect import signature precision, recall, _ = precision_recall_curve(y_test, y_score) diff --git a/setup.py b/setup.py index e25c50a114a33..cce21f5883c5a 100755 --- a/setup.py +++ b/setup.py @@ -11,11 +11,7 @@ from distutils.command.clean import clean as Clean from pkg_resources import parse_version import traceback - -if sys.version_info[0] < 3: - import __builtin__ as builtins -else: - import builtins +import builtins # This is a bit (!) hackish: we are setting a global variable so that the main # sklearn __init__ can detect if it is being loaded by the setup routine, to @@ -45,8 +41,8 @@ SCIPY_MIN_VERSION = '1.1.0' NUMPY_MIN_VERSION = '1.14.0' else: - SCIPY_MIN_VERSION = '0.13.3' - NUMPY_MIN_VERSION = '1.8.2' + SCIPY_MIN_VERSION = '0.17.0' + NUMPY_MIN_VERSION = '1.11.0' # Optional setuptools features @@ -183,10 +179,7 @@ def setup_package(): 'Operating System :: POSIX', 'Operating System :: Unix', 'Operating System :: MacOS', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', diff --git a/sklearn/base.py b/sklearn/base.py index 34998270cea88..2dea6509fd927 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -6,11 +6,11 @@ import copy import warnings from collections import defaultdict +from inspect import signature import numpy as np from scipy import sparse from .externals import six -from .utils.fixes import signature from . import __version__ diff --git a/sklearn/calibration.py b/sklearn/calibration.py index d29145a5eb3df..e1ec2bd3cf596 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -9,6 +9,7 @@ from __future__ import division import warnings +from inspect import signature from math import log import numpy as np @@ -20,7 +21,6 @@ from .preprocessing import label_binarize, LabelBinarizer from .utils import check_X_y, check_array, indexable, column_or_1d from .utils.validation import check_is_fitted, check_consistent_length -from .utils.fixes import signature from .isotonic import IsotonicRegression from .svm import LinearSVC from .model_selection import check_cv diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index cec0fa2897546..e17f154a23c94 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -237,10 +237,6 @@ def test_k_means_new_centers(): @if_safe_multiprocessing_with_blas def test_k_means_plus_plus_init_2_jobs(): - if sys.version_info[:2] < (3, 4): - raise SkipTest( - "Possible multi-process bug with some BLAS under Python < 3.4") - km = KMeans(init="k-means++", n_clusters=n_clusters, n_jobs=2, random_state=42).fit(X) _check_fitted_model(km) diff --git a/sklearn/datasets/covtype.py b/sklearn/datasets/covtype.py index 9188a6dd585a9..ca8da7ddbd6ad 100644 --- a/sklearn/datasets/covtype.py +++ b/sklearn/datasets/covtype.py @@ -17,7 +17,7 @@ from gzip import GzipFile import logging from os.path import dirname, exists, join -from os import remove +from os import remove, makedirs import numpy as np @@ -26,7 +26,6 @@ from .base import RemoteFileMetadata from ..utils import Bunch from .base import _pkl_filepath -from ..utils.fixes import makedirs from ..utils import _joblib from ..utils import check_random_state diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 4fac89d7a65df..e335587270cef 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -205,14 +205,7 @@ def _fetch_brute_kddcup99(data_home=None, """ data_home = get_data_home(data_home=data_home) - if sys.version_info[0] == 3: - # The zlib compression format use by joblib is not compatible when - # switching from Python 2 to Python 3, let us use a separate folder - # under Python 3: - dir_suffix = "-py3" - else: - # Backward compat for Python 2 users - dir_suffix = "" + dir_suffix = "-py3" if percent10: kddcup_dir = join(data_home, "kddcup99_10" + dir_suffix) diff --git a/sklearn/datasets/mldata.py b/sklearn/datasets/mldata.py index 5948d04a8be80..897c003283fc8 100644 --- a/sklearn/datasets/mldata.py +++ b/sklearn/datasets/mldata.py @@ -7,16 +7,9 @@ from os.path import join, exists import re import numbers -try: - # Python 2 - from urllib2 import HTTPError - from urllib2 import quote - from urllib2 import urlopen -except ImportError: - # Python 3+ - from urllib.error import HTTPError - from urllib.parse import quote - from urllib.request import urlopen +from urllib.error import HTTPError +from urllib.parse import quote +from urllib.request import urlopen import numpy as np import scipy as sp diff --git a/sklearn/datasets/openml.py b/sklearn/datasets/openml.py index dd8da4f3bdf25..81c208b045a5f 100644 --- a/sklearn/datasets/openml.py +++ b/sklearn/datasets/openml.py @@ -7,20 +7,14 @@ from contextlib import closing from functools import wraps -try: - # Python 3+ - from urllib.request import urlopen, Request -except ImportError: - # Python 2 - from urllib2 import urlopen, Request - +from urllib.request import urlopen, Request import numpy as np import scipy.sparse from sklearn.externals import _arff from .base import get_data_home -from ..externals.six import string_types, PY2, BytesIO +from ..externals.six import string_types from ..externals.six.moves.urllib.error import HTTPError from ..utils import Bunch @@ -89,8 +83,6 @@ def is_gzip(_fsrc): if data_home is None: fsrc = urlopen(req) if is_gzip(fsrc): - if PY2: - fsrc = BytesIO(fsrc.read()) return gzip.GzipFile(fileobj=fsrc, mode='rb') return fsrc @@ -357,16 +349,9 @@ def _arff_load(): else: return_type = _arff.DENSE - if PY2: - arff_file = _arff.load( - response.read(), - encode_nominal=encode_nominal, - return_type=return_type, - ) - else: - arff_file = _arff.loads(response.read().decode('utf-8'), - encode_nominal=encode_nominal, - return_type=return_type) + arff_file = _arff.loads(response.read().decode('utf-8'), + encode_nominal=encode_nominal, + return_type=return_type) return arff_file return _arff_load() diff --git a/sklearn/datasets/rcv1.py b/sklearn/datasets/rcv1.py index ec562d334eae1..66566c389baf3 100644 --- a/sklearn/datasets/rcv1.py +++ b/sklearn/datasets/rcv1.py @@ -10,7 +10,7 @@ import logging -from os import remove +from os import remove, makedirs from os.path import dirname, exists, join from gzip import GzipFile @@ -21,7 +21,6 @@ from .base import _pkl_filepath from .base import _fetch_remote from .base import RemoteFileMetadata -from ..utils.fixes import makedirs from ..utils import _joblib from .svmlight_format import load_svmlight_files from ..utils import shuffle as shuffle_ diff --git a/sklearn/datasets/species_distributions.py b/sklearn/datasets/species_distributions.py index da158eb24aa33..a5b0597aeedfd 100644 --- a/sklearn/datasets/species_distributions.py +++ b/sklearn/datasets/species_distributions.py @@ -53,7 +53,6 @@ from sklearn.datasets.base import _pkl_filepath from sklearn.utils import _joblib -PY3_OR_LATER = sys.version_info[0] >= 3 # The original data can be found at: # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip @@ -106,12 +105,7 @@ def _load_csv(F): rec : np.ndarray record array representing the data """ - if PY3_OR_LATER: - # Numpy recarray wants Python 3 str but not bytes... - names = F.readline().decode('ascii').strip().split(',') - else: - # Numpy recarray wants Python 2 str but not unicode - names = F.readline().strip().split(',') + names = F.readline().decode('ascii').strip().split(',') rec = np.loadtxt(F, skiprows=0, delimiter=',', dtype='a22,f4,f4') rec.dtype.names = names diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index ca1f7ddae8ecd..909c1942106e3 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -233,18 +233,12 @@ def test_dump(): f.seek(0) comment = f.readline() - try: - comment = str(comment, "utf-8") - except TypeError: # fails in Python 2.x - pass + comment = str(comment, "utf-8") assert_in("scikit-learn %s" % sklearn.__version__, comment) comment = f.readline() - try: - comment = str(comment, "utf-8") - except TypeError: # fails in Python 2.x - pass + comment = str(comment, "utf-8") assert_in(["one", "zero"][zero_based] + "-based", comment) @@ -484,9 +478,6 @@ def test_load_offset_exhaustive_splits(): # load the same data in 2 parts with all the possible byte offsets to # locate the split so has to test for particular boundary cases for mark in range(size): - if sp_version < (0, 14) and (mark == 0 or mark > size - 100): - # old scipy does not support sparse matrices with 0 rows. - continue f.seek(0) X_0, y_0, q_0 = load_svmlight_file(f, n_features=n_features, query_id=True, offset=0, diff --git a/sklearn/externals/funcsigs.py b/sklearn/externals/funcsigs.py deleted file mode 100644 index 4e684690b309c..0000000000000 --- a/sklearn/externals/funcsigs.py +++ /dev/null @@ -1,815 +0,0 @@ -# Copyright 2001-2013 Python Software Foundation; All Rights Reserved -"""Function signature objects for callables - -Back port of Python 3.3's function signature tools from the inspect module, -modified to be compatible with Python 2.7 and 3.2+. -""" -from __future__ import absolute_import, division, print_function -import itertools -import functools -import re -import types - -from collections import OrderedDict - -__version__ = "0.4" - -__all__ = ['BoundArguments', 'Parameter', 'Signature', 'signature'] - - -_WrapperDescriptor = type(type.__call__) -_MethodWrapper = type(all.__call__) - -_NonUserDefinedCallables = (_WrapperDescriptor, - _MethodWrapper, - types.BuiltinFunctionType) - - -def formatannotation(annotation, base_module=None): - if isinstance(annotation, type): - if annotation.__module__ in ('builtins', '__builtin__', base_module): - return annotation.__name__ - return annotation.__module__+'.'+annotation.__name__ - return repr(annotation) - - -def _get_user_defined_method(cls, method_name, *nested): - try: - if cls is type: - return - meth = getattr(cls, method_name) - for name in nested: - meth = getattr(meth, name, meth) - except AttributeError: - return - else: - if not isinstance(meth, _NonUserDefinedCallables): - # Once '__signature__' will be added to 'C'-level - # callables, this check won't be necessary - return meth - - -def signature(obj): - '''Get a signature object for the passed callable.''' - - if not callable(obj): - raise TypeError('{0!r} is not a callable object'.format(obj)) - - if isinstance(obj, types.MethodType): - sig = signature(obj.__func__) - if obj.__self__ is None: - # Unbound method: the first parameter becomes positional-only - if sig.parameters: - first = sig.parameters.values()[0].replace( - kind=_POSITIONAL_ONLY) - return sig.replace( - parameters=(first,) + tuple(sig.parameters.values())[1:]) - else: - return sig - else: - # In this case we skip the first parameter of the underlying - # function (usually `self` or `cls`). - return sig.replace(parameters=tuple(sig.parameters.values())[1:]) - - try: - sig = obj.__signature__ - except AttributeError: - pass - else: - if sig is not None: - return sig - - try: - # Was this function wrapped by a decorator? - wrapped = obj.__wrapped__ - except AttributeError: - pass - else: - return signature(wrapped) - - if isinstance(obj, types.FunctionType): - return Signature.from_function(obj) - - if isinstance(obj, functools.partial): - sig = signature(obj.func) - - new_params = OrderedDict(sig.parameters.items()) - - partial_args = obj.args or () - partial_keywords = obj.keywords or {} - try: - ba = sig.bind_partial(*partial_args, **partial_keywords) - except TypeError as ex: - msg = 'partial object {0!r} has incorrect arguments'.format(obj) - raise ValueError(msg) - - for arg_name, arg_value in ba.arguments.items(): - param = new_params[arg_name] - if arg_name in partial_keywords: - # We set a new default value, because the following code - # is correct: - # - # >>> def foo(a): print(a) - # >>> print(partial(partial(foo, a=10), a=20)()) - # 20 - # >>> print(partial(partial(foo, a=10), a=20)(a=30)) - # 30 - # - # So, with 'partial' objects, passing a keyword argument is - # like setting a new default value for the corresponding - # parameter - # - # We also mark this parameter with '_partial_kwarg' - # flag. Later, in '_bind', the 'default' value of this - # parameter will be added to 'kwargs', to simulate - # the 'functools.partial' real call. - new_params[arg_name] = param.replace(default=arg_value, - _partial_kwarg=True) - - elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and - not param._partial_kwarg): - new_params.pop(arg_name) - - return sig.replace(parameters=new_params.values()) - - sig = None - if isinstance(obj, type): - # obj is a class or a metaclass - - # First, let's see if it has an overloaded __call__ defined - # in its metaclass - call = _get_user_defined_method(type(obj), '__call__') - if call is not None: - sig = signature(call) - else: - # Now we check if the 'obj' class has a '__new__' method - new = _get_user_defined_method(obj, '__new__') - if new is not None: - sig = signature(new) - else: - # Finally, we should have at least __init__ implemented - init = _get_user_defined_method(obj, '__init__') - if init is not None: - sig = signature(init) - elif not isinstance(obj, _NonUserDefinedCallables): - # An object with __call__ - # We also check that the 'obj' is not an instance of - # _WrapperDescriptor or _MethodWrapper to avoid - # infinite recursion (and even potential segfault) - call = _get_user_defined_method(type(obj), '__call__', 'im_func') - if call is not None: - sig = signature(call) - - if sig is not None: - # For classes and objects we skip the first parameter of their - # __call__, __new__, or __init__ methods - return sig.replace(parameters=tuple(sig.parameters.values())[1:]) - - if isinstance(obj, types.BuiltinFunctionType): - # Raise a nicer error message for builtins - msg = 'no signature found for builtin function {0!r}'.format(obj) - raise ValueError(msg) - - raise ValueError('callable {0!r} is not supported by signature'.format(obj)) - - -class _void(object): - '''A private marker - used in Parameter & Signature''' - - -class _empty(object): - pass - - -class _ParameterKind(int): - def __new__(self, *args, **kwargs): - obj = int.__new__(self, *args) - obj._name = kwargs['name'] - return obj - - def __str__(self): - return self._name - - def __repr__(self): - return '<_ParameterKind: {0!r}>'.format(self._name) - - -_POSITIONAL_ONLY = _ParameterKind(0, name='POSITIONAL_ONLY') -_POSITIONAL_OR_KEYWORD = _ParameterKind(1, name='POSITIONAL_OR_KEYWORD') -_VAR_POSITIONAL = _ParameterKind(2, name='VAR_POSITIONAL') -_KEYWORD_ONLY = _ParameterKind(3, name='KEYWORD_ONLY') -_VAR_KEYWORD = _ParameterKind(4, name='VAR_KEYWORD') - - -class Parameter(object): - '''Represents a parameter in a function signature. - - Has the following public attributes: - - * name : str - The name of the parameter as a string. - * default : object - The default value for the parameter if specified. If the - parameter has no default value, this attribute is not set. - * annotation - The annotation for the parameter if specified. If the - parameter has no annotation, this attribute is not set. - * kind : str - Describes how argument values are bound to the parameter. - Possible values: `Parameter.POSITIONAL_ONLY`, - `Parameter.POSITIONAL_OR_KEYWORD`, `Parameter.VAR_POSITIONAL`, - `Parameter.KEYWORD_ONLY`, `Parameter.VAR_KEYWORD`. - ''' - - __slots__ = ('_name', '_kind', '_default', '_annotation', '_partial_kwarg') - - POSITIONAL_ONLY = _POSITIONAL_ONLY - POSITIONAL_OR_KEYWORD = _POSITIONAL_OR_KEYWORD - VAR_POSITIONAL = _VAR_POSITIONAL - KEYWORD_ONLY = _KEYWORD_ONLY - VAR_KEYWORD = _VAR_KEYWORD - - empty = _empty - - def __init__(self, name, kind, default=_empty, annotation=_empty, - _partial_kwarg=False): - - if kind not in (_POSITIONAL_ONLY, _POSITIONAL_OR_KEYWORD, - _VAR_POSITIONAL, _KEYWORD_ONLY, _VAR_KEYWORD): - raise ValueError("invalid value for 'Parameter.kind' attribute") - self._kind = kind - - if default is not _empty: - if kind in (_VAR_POSITIONAL, _VAR_KEYWORD): - msg = '{0} parameters cannot have default values'.format(kind) - raise ValueError(msg) - self._default = default - self._annotation = annotation - - if name is None: - if kind != _POSITIONAL_ONLY: - raise ValueError("None is not a valid name for a " - "non-positional-only parameter") - self._name = name - else: - name = str(name) - if kind != _POSITIONAL_ONLY and not re.match(r'[a-z_]\w*$', name, re.I): - msg = '{0!r} is not a valid parameter name'.format(name) - raise ValueError(msg) - self._name = name - - self._partial_kwarg = _partial_kwarg - - @property - def name(self): - return self._name - - @property - def default(self): - return self._default - - @property - def annotation(self): - return self._annotation - - @property - def kind(self): - return self._kind - - def replace(self, name=_void, kind=_void, annotation=_void, - default=_void, _partial_kwarg=_void): - '''Creates a customized copy of the Parameter.''' - - if name is _void: - name = self._name - - if kind is _void: - kind = self._kind - - if annotation is _void: - annotation = self._annotation - - if default is _void: - default = self._default - - if _partial_kwarg is _void: - _partial_kwarg = self._partial_kwarg - - return type(self)(name, kind, default=default, annotation=annotation, - _partial_kwarg=_partial_kwarg) - - def __str__(self): - kind = self.kind - - formatted = self._name - if kind == _POSITIONAL_ONLY: - if formatted is None: - formatted = '' - formatted = '<{0}>'.format(formatted) - - # Add annotation and default value - if self._annotation is not _empty: - formatted = '{0}:{1}'.format(formatted, - formatannotation(self._annotation)) - - if self._default is not _empty: - formatted = '{0}={1}'.format(formatted, repr(self._default)) - - if kind == _VAR_POSITIONAL: - formatted = '*' + formatted - elif kind == _VAR_KEYWORD: - formatted = '**' + formatted - - return formatted - - def __repr__(self): - return '<{0} at {1:#x} {2!r}>'.format(self.__class__.__name__, - id(self), self.name) - - def __hash__(self): - msg = "unhashable type: '{0}'".format(self.__class__.__name__) - raise TypeError(msg) - - def __eq__(self, other): - return (issubclass(other.__class__, Parameter) and - self._name == other._name and - self._kind == other._kind and - self._default == other._default and - self._annotation == other._annotation) - - def __ne__(self, other): - return not self.__eq__(other) - - -class BoundArguments(object): - '''Result of `Signature.bind` call. Holds the mapping of arguments - to the function's parameters. - - Has the following public attributes: - - * arguments : OrderedDict - An ordered mutable mapping of parameters' names to arguments' values. - Does not contain arguments' default values. - * signature : Signature - The Signature object that created this instance. - * args : tuple - Tuple of positional arguments values. - * kwargs : dict - Dict of keyword arguments values. - ''' - - def __init__(self, signature, arguments): - self.arguments = arguments - self._signature = signature - - @property - def signature(self): - return self._signature - - @property - def args(self): - args = [] - for param_name, param in self._signature.parameters.items(): - if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or - param._partial_kwarg): - # Keyword arguments mapped by 'functools.partial' - # (Parameter._partial_kwarg is True) are mapped - # in 'BoundArguments.kwargs', along with VAR_KEYWORD & - # KEYWORD_ONLY - break - - try: - arg = self.arguments[param_name] - except KeyError: - # We're done here. Other arguments - # will be mapped in 'BoundArguments.kwargs' - break - else: - if param.kind == _VAR_POSITIONAL: - # *args - args.extend(arg) - else: - # plain argument - args.append(arg) - - return tuple(args) - - @property - def kwargs(self): - kwargs = {} - kwargs_started = False - for param_name, param in self._signature.parameters.items(): - if not kwargs_started: - if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or - param._partial_kwarg): - kwargs_started = True - else: - if param_name not in self.arguments: - kwargs_started = True - continue - - if not kwargs_started: - continue - - try: - arg = self.arguments[param_name] - except KeyError: - pass - else: - if param.kind == _VAR_KEYWORD: - # **kwargs - kwargs.update(arg) - else: - # plain keyword argument - kwargs[param_name] = arg - - return kwargs - - def __hash__(self): - msg = "unhashable type: '{0}'".format(self.__class__.__name__) - raise TypeError(msg) - - def __eq__(self, other): - return (issubclass(other.__class__, BoundArguments) and - self.signature == other.signature and - self.arguments == other.arguments) - - def __ne__(self, other): - return not self.__eq__(other) - - -class Signature(object): - '''A Signature object represents the overall signature of a function. - It stores a Parameter object for each parameter accepted by the - function, as well as information specific to the function itself. - - A Signature object has the following public attributes and methods: - - * parameters : OrderedDict - An ordered mapping of parameters' names to the corresponding - Parameter objects (keyword-only arguments are in the same order - as listed in `code.co_varnames`). - * return_annotation : object - The annotation for the return type of the function if specified. - If the function has no annotation for its return type, this - attribute is not set. - * bind(*args, **kwargs) -> BoundArguments - Creates a mapping from positional and keyword arguments to - parameters. - * bind_partial(*args, **kwargs) -> BoundArguments - Creates a partial mapping from positional and keyword arguments - to parameters (simulating 'functools.partial' behavior.) - ''' - - __slots__ = ('_return_annotation', '_parameters') - - _parameter_cls = Parameter - _bound_arguments_cls = BoundArguments - - empty = _empty - - def __init__(self, parameters=None, return_annotation=_empty, - __validate_parameters__=True): - '''Constructs Signature from the given list of Parameter - objects and 'return_annotation'. All arguments are optional. - ''' - - if parameters is None: - params = OrderedDict() - else: - if __validate_parameters__: - params = OrderedDict() - top_kind = _POSITIONAL_ONLY - - for idx, param in enumerate(parameters): - kind = param.kind - if kind < top_kind: - msg = 'wrong parameter order: {0} before {1}' - msg = msg.format(top_kind, param.kind) - raise ValueError(msg) - else: - top_kind = kind - - name = param.name - if name is None: - name = str(idx) - param = param.replace(name=name) - - if name in params: - msg = 'duplicate parameter name: {0!r}'.format(name) - raise ValueError(msg) - params[name] = param - else: - params = OrderedDict(((param.name, param) - for param in parameters)) - - self._parameters = params - self._return_annotation = return_annotation - - @classmethod - def from_function(cls, func): - '''Constructs Signature for the given python function''' - - if not isinstance(func, types.FunctionType): - raise TypeError('{0!r} is not a Python function'.format(func)) - - Parameter = cls._parameter_cls - - # Parameter information. - func_code = func.__code__ - pos_count = func_code.co_argcount - arg_names = func_code.co_varnames - positional = tuple(arg_names[:pos_count]) - keyword_only_count = getattr(func_code, 'co_kwonlyargcount', 0) - keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)] - annotations = getattr(func, '__annotations__', {}) - defaults = func.__defaults__ - kwdefaults = getattr(func, '__kwdefaults__', None) - - if defaults: - pos_default_count = len(defaults) - else: - pos_default_count = 0 - - parameters = [] - - # Non-keyword-only parameters w/o defaults. - non_default_count = pos_count - pos_default_count - for name in positional[:non_default_count]: - annotation = annotations.get(name, _empty) - parameters.append(Parameter(name, annotation=annotation, - kind=_POSITIONAL_OR_KEYWORD)) - - # ... w/ defaults. - for offset, name in enumerate(positional[non_default_count:]): - annotation = annotations.get(name, _empty) - parameters.append(Parameter(name, annotation=annotation, - kind=_POSITIONAL_OR_KEYWORD, - default=defaults[offset])) - - # *args - if func_code.co_flags & 0x04: - name = arg_names[pos_count + keyword_only_count] - annotation = annotations.get(name, _empty) - parameters.append(Parameter(name, annotation=annotation, - kind=_VAR_POSITIONAL)) - - # Keyword-only parameters. - for name in keyword_only: - default = _empty - if kwdefaults is not None: - default = kwdefaults.get(name, _empty) - - annotation = annotations.get(name, _empty) - parameters.append(Parameter(name, annotation=annotation, - kind=_KEYWORD_ONLY, - default=default)) - # **kwargs - if func_code.co_flags & 0x08: - index = pos_count + keyword_only_count - if func_code.co_flags & 0x04: - index += 1 - - name = arg_names[index] - annotation = annotations.get(name, _empty) - parameters.append(Parameter(name, annotation=annotation, - kind=_VAR_KEYWORD)) - - return cls(parameters, - return_annotation=annotations.get('return', _empty), - __validate_parameters__=False) - - @property - def parameters(self): - try: - return types.MappingProxyType(self._parameters) - except AttributeError: - return OrderedDict(self._parameters.items()) - - @property - def return_annotation(self): - return self._return_annotation - - def replace(self, parameters=_void, return_annotation=_void): - '''Creates a customized copy of the Signature. - Pass 'parameters' and/or 'return_annotation' arguments - to override them in the new copy. - ''' - - if parameters is _void: - parameters = self.parameters.values() - - if return_annotation is _void: - return_annotation = self._return_annotation - - return type(self)(parameters, - return_annotation=return_annotation) - - def __hash__(self): - msg = "unhashable type: '{0}'".format(self.__class__.__name__) - raise TypeError(msg) - - def __eq__(self, other): - if (not issubclass(type(other), Signature) or - self.return_annotation != other.return_annotation or - len(self.parameters) != len(other.parameters)): - return False - - other_positions = dict((param, idx) - for idx, param in enumerate(other.parameters.keys())) - - for idx, (param_name, param) in enumerate(self.parameters.items()): - if param.kind == _KEYWORD_ONLY: - try: - other_param = other.parameters[param_name] - except KeyError: - return False - else: - if param != other_param: - return False - else: - try: - other_idx = other_positions[param_name] - except KeyError: - return False - else: - if (idx != other_idx or - param != other.parameters[param_name]): - return False - - return True - - def __ne__(self, other): - return not self.__eq__(other) - - def _bind(self, args, kwargs, partial=False): - '''Private method. Don't use directly.''' - - arguments = OrderedDict() - - parameters = iter(self.parameters.values()) - parameters_ex = () - arg_vals = iter(args) - - if partial: - # Support for binding arguments to 'functools.partial' objects. - # See 'functools.partial' case in 'signature()' implementation - # for details. - for param_name, param in self.parameters.items(): - if (param._partial_kwarg and param_name not in kwargs): - # Simulating 'functools.partial' behavior - kwargs[param_name] = param.default - - while True: - # Let's iterate through the positional arguments and corresponding - # parameters - try: - arg_val = next(arg_vals) - except StopIteration: - # No more positional arguments - try: - param = next(parameters) - except StopIteration: - # No more parameters. That's it. Just need to check that - # we have no `kwargs` after this while loop - break - else: - if param.kind == _VAR_POSITIONAL: - # That's OK, just empty *args. Let's start parsing - # kwargs - break - elif param.name in kwargs: - if param.kind == _POSITIONAL_ONLY: - msg = '{arg!r} parameter is positional only, ' \ - 'but was passed as a keyword' - msg = msg.format(arg=param.name) - raise TypeError(msg) - parameters_ex = (param,) - break - elif (param.kind == _VAR_KEYWORD or - param.default is not _empty): - # That's fine too - we have a default value for this - # parameter. So, lets start parsing `kwargs`, starting - # with the current parameter - parameters_ex = (param,) - break - else: - if partial: - parameters_ex = (param,) - break - else: - msg = '{arg!r} parameter lacking default value' - msg = msg.format(arg=param.name) - raise TypeError(msg) - else: - # We have a positional argument to process - try: - param = next(parameters) - except StopIteration: - raise TypeError('too many positional arguments') - else: - if param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY): - # Looks like we have no parameter for this positional - # argument - raise TypeError('too many positional arguments') - - if param.kind == _VAR_POSITIONAL: - # We have an '*args'-like argument, let's fill it with - # all positional arguments we have left and move on to - # the next phase - values = [arg_val] - values.extend(arg_vals) - arguments[param.name] = tuple(values) - break - - if param.name in kwargs: - raise TypeError('multiple values for argument ' - '{arg!r}'.format(arg=param.name)) - - arguments[param.name] = arg_val - - # Now, we iterate through the remaining parameters to process - # keyword arguments - kwargs_param = None - for param in itertools.chain(parameters_ex, parameters): - if param.kind == _POSITIONAL_ONLY: - # This should never happen in case of a properly built - # Signature object (but let's have this check here - # to ensure correct behaviour just in case) - raise TypeError('{arg!r} parameter is positional only, ' - 'but was passed as a keyword'. \ - format(arg=param.name)) - - if param.kind == _VAR_KEYWORD: - # Memorize that we have a '**kwargs'-like parameter - kwargs_param = param - continue - - param_name = param.name - try: - arg_val = kwargs.pop(param_name) - except KeyError: - # We have no value for this parameter. It's fine though, - # if it has a default value, or it is an '*args'-like - # parameter, left alone by the processing of positional - # arguments. - if (not partial and param.kind != _VAR_POSITIONAL and - param.default is _empty): - raise TypeError('{arg!r} parameter lacking default value'. \ - format(arg=param_name)) - - else: - arguments[param_name] = arg_val - - if kwargs: - if kwargs_param is not None: - # Process our '**kwargs'-like parameter - arguments[kwargs_param.name] = kwargs - else: - raise TypeError('too many keyword arguments') - - return self._bound_arguments_cls(self, arguments) - - def bind(self, *args, **kwargs): - '''Get a BoundArguments object, that maps the passed `args` - and `kwargs` to the function's signature. Raises `TypeError` - if the passed arguments can not be bound. - ''' - return self._bind(args, kwargs) - - def bind_partial(self, *args, **kwargs): - '''Get a BoundArguments object, that partially maps the - passed `args` and `kwargs` to the function's signature. - Raises `TypeError` if the passed arguments can not be bound. - ''' - return self._bind(args, kwargs, partial=True) - - def __str__(self): - result = [] - render_kw_only_separator = True - for idx, param in enumerate(self.parameters.values()): - formatted = str(param) - - kind = param.kind - if kind == _VAR_POSITIONAL: - # OK, we have an '*args'-like parameter, so we won't need - # a '*' to separate keyword-only arguments - render_kw_only_separator = False - elif kind == _KEYWORD_ONLY and render_kw_only_separator: - # We have a keyword-only parameter to render and we haven't - # rendered an '*args'-like parameter before, so add a '*' - # separator to the parameters list ("foo(arg1, *, arg2)" case) - result.append('*') - # This condition should be only triggered once, so - # reset the flag - render_kw_only_separator = False - - result.append(formatted) - - rendered = '({0})'.format(', '.join(result)) - - if self.return_annotation is not _empty: - anno = formatannotation(self.return_annotation) - rendered += ' -> {0}'.format(anno) - - return rendered diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 54c87d433d49c..5fad25fe887af 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -1122,10 +1122,7 @@ def _check_stop_words_consistency(estimator): @fails_if_pypy def test_vectorizer_stop_words_inconsistent(): - if PY2: - lstr = "[u'and', u'll', u've']" - else: - lstr = "['and', 'll', 've']" + lstr = "['and', 'll', 've']" message = ('Your stop_words may be inconsistent with your ' 'preprocessing. Tokenizing the stop words generated ' 'tokens %s not in stop_words.' % lstr) diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py index 79d913bca1cb5..1d716fed67dc4 100644 --- a/sklearn/gaussian_process/kernels.py +++ b/sklearn/gaussian_process/kernels.py @@ -22,6 +22,7 @@ from abc import ABCMeta, abstractmethod from collections import namedtuple import math +from inspect import signature import numpy as np from scipy.special import kv, gamma @@ -30,7 +31,6 @@ from ..metrics.pairwise import pairwise_kernels from ..externals import six from ..base import clone -from ..utils.fixes import signature def _check_length_scale(X, length_scale): diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index d5949f60ff6f9..33a769b852c59 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -5,8 +5,8 @@ import pytest import numpy as np +from inspect import signature -from sklearn.utils.fixes import signature from sklearn.gaussian_process.kernels import _approx_fprime from sklearn.metrics.pairwise \ diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 9e1c09e37e44a..e007bd73e907b 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -280,10 +280,7 @@ def __iter__(self): params = dict() for k, v in items: if hasattr(v, "rvs"): - if sp_version < (0, 16): - params[k] = v.rvs() - else: - params[k] = v.rvs(random_state=rnd) + params[k] = v.rvs(random_state=rnd) else: params[k] = v[rnd.randint(len(v))] yield params diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 2eccb50fcc976..62654cc646533 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -18,6 +18,7 @@ from math import ceil, floor import numbers from abc import ABCMeta, abstractmethod +from inspect import signature import numpy as np @@ -27,7 +28,7 @@ from ..utils.multiclass import type_of_target from ..externals.six import with_metaclass from ..externals.six.moves import zip -from ..utils.fixes import signature, comb +from ..utils.fixes import comb from ..utils.fixes import _Iterable as Iterable from ..base import _pprint diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 324ea1f02d4aa..f70e38589f982 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -13,7 +13,6 @@ import pytest from sklearn.utils.fixes import sp_version -from sklearn.utils.fixes import PY3_OR_LATER from sklearn.utils.fixes import _Iterable as Iterable, _Sized as Sized from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_not_equal @@ -428,10 +427,7 @@ def test_grid_search_when_param_grid_includes_range(): # Test that the best estimator contains the right value for foo_param clf = MockClassifier() grid_search = None - if PY3_OR_LATER: - grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)}) - else: - grid_search = GridSearchCV(clf, {'foo_param': xrange(1, 4)}) + grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)}) grid_search.fit(X, y) assert_equal(grid_search.best_estimator_.foo_param, 2) diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index da6a8308abe21..2ba1b019fc12e 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -168,8 +168,6 @@ def fit(self, X, y=None): elif self.strategy == 'quantile': quantiles = np.linspace(0, 100, n_bins[jj] + 1) - if np_version < (1, 9): - quantiles = list(quantiles) bin_edges[jj] = np.asarray(np.percentile(column, quantiles)) elif self.strategy == 'kmeans': diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 31de6597617c6..4318d98443913 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -12,7 +12,6 @@ from itertools import chain, combinations import warnings from itertools import combinations_with_replacement as combinations_w_r -from distutils.version import LooseVersion import numpy as np from scipy import sparse @@ -2099,9 +2098,6 @@ def _dense_fit(self, X, random_state): n_samples, n_features = X.shape references = self.references_ * 100 - # numpy < 1.9 bug: np.percentile 2nd argument needs to be a list - if LooseVersion(np.__version__) < '1.9': - references = references.tolist() self.quantiles_ = [] for col in X.T: @@ -2124,9 +2120,6 @@ def _sparse_fit(self, X, random_state): """ n_samples, n_features = X.shape references = self.references_ * 100 - # numpy < 1.9 bug: np.percentile 2nd argument needs to be a list - if LooseVersion(np.__version__) < '1.9': - references = references.tolist() self.quantiles_ = [] for feature_idx in range(n_features): diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 7c4fe7e1509aa..1a5ad20d32ef4 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -11,27 +11,8 @@ import numpy as np import numpy.linalg as la from scipy import sparse, stats +from scipy.sparse import random as sparse_random -try: - from scipy.sparse import random as sparse_random -except ImportError: - from sklearn.utils.validation import check_random_state - - def sparse_random(num_rows, num_cols, density, random_state=None): - # Helper function to create sparse random matrices. - # TODO: remove once scipy < 0.17 is no longer supported and just use - # scipy.sparse.random - # Note that this is not strictly equivalent to what scipy.sparse.random - # does as in our case the density is only correct in expectation but - # this is enough for our tests. - rng = check_random_state(random_state) - X = rng.uniform(size=(num_rows, num_cols)) - zero_mask = rng.uniform(size=(num_rows, num_cols)) > density - X[zero_mask] = 0 - return sparse.csr_matrix(X) - - -from distutils.version import LooseVersion import pytest from sklearn.utils import gen_batches @@ -389,15 +370,10 @@ def test_standard_scaler_numerical_stability(): # was empirically found to cause numerical problems with np.mean & np.std. x = np.full(8, np.log(1e-5), dtype=np.float64) - if LooseVersion(np.__version__) >= LooseVersion('1.9'): - # This does not raise a warning as the number of samples is too low - # to trigger the problem in recent numpy - x_scaled = assert_no_warnings(scale, x) - assert_array_almost_equal(scale(x), np.zeros(8)) - else: - w = "standard deviation of the data is probably very close to 0" - x_scaled = assert_warns_message(UserWarning, w, scale, x) - assert_array_almost_equal(x_scaled, np.zeros(8)) + # This does not raise a warning as the number of samples is too low + # to trigger the problem in recent numpy + x_scaled = assert_no_warnings(scale, x) + assert_array_almost_equal(scale(x), np.zeros(8)) # with 2 more samples, the std computation run into numerical issues: x = np.full(10, np.log(1e-5), dtype=np.float64) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index ee435673c8256..626705186b59f 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -522,17 +522,6 @@ def test_bad_input(): def test_unicode_kernel(): - # Test that a unicode kernel name does not cause a TypeError - if six.PY2: - # Test unicode (same as str on python3) - clf = svm.SVC(kernel=u'linear', probability=True) - clf.fit(X, Y) - clf.predict_proba(T) - svm.libsvm.cross_validation(iris.data, - iris.target.astype(np.float64), 5, - kernel=u'linear', - random_seed=0) - # Test default behavior on both versions clf = svm.SVC(gamma='scale', kernel='linear', probability=True) clf.fit(X, Y) diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 74b51a0cee586..3bd417d0229a0 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -8,10 +8,9 @@ import importlib from pkgutil import walk_packages -from inspect import getsource, isabstract +from inspect import getsource, isabstract, signature import sklearn -from sklearn.base import signature from sklearn.utils import IS_PYPY from sklearn.utils.testing import SkipTest from sklearn.utils.testing import check_docstring_parameters diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 3cdaf6a5bc450..a69a17147778b 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -7,6 +7,7 @@ import pickle from copy import deepcopy from functools import partial +from inspect import signature import numpy as np from scipy import sparse @@ -58,9 +59,7 @@ pairwise_distances) from sklearn.utils import shuffle -from sklearn.utils.fixes import signature -from sklearn.utils.validation import (has_fit_parameter, _num_samples, - LARGE_SPARSE_SUPPORTED) +from sklearn.utils.validation import has_fit_parameter, _num_samples from sklearn.preprocessing import StandardScaler from sklearn.datasets import load_iris, load_boston, make_blobs @@ -468,18 +467,17 @@ def _generate_sparse_matrix(X_csr): for sparse_format in ['dok', 'lil', 'dia', 'bsr', 'csc', 'coo']: yield sparse_format, X_csr.asformat(sparse_format) - if LARGE_SPARSE_SUPPORTED: - # Generate large indices matrix only if its supported by scipy - X_coo = X_csr.asformat('coo') - X_coo.row = X_coo.row.astype('int64') - X_coo.col = X_coo.col.astype('int64') - yield "coo_64", X_coo - - for sparse_format in ['csc', 'csr']: - X = X_csr.asformat(sparse_format) - X.indices = X.indices.astype('int64') - X.indptr = X.indptr.astype('int64') - yield sparse_format + "_64", X + # Generate large indices matrix only if its supported by scipy + X_coo = X_csr.asformat('coo') + X_coo.row = X_coo.row.astype('int64') + X_coo.col = X_coo.col.astype('int64') + yield "coo_64", X_coo + + for sparse_format in ['csc', 'csr']: + X = X_csr.asformat(sparse_format) + X.indices = X.indices.astype('int64') + X.indptr = X.indptr.astype('int64') + yield sparse_format + "_64", X def check_estimator_sparse_data(name, estimator_orig): diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 6cdca1bda1d1f..19df5b16163d3 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -779,10 +779,6 @@ def stable_cumsum(arr, axis=None, rtol=1e-05, atol=1e-08): atol : float Absolute tolerance, see ``np.allclose`` """ - # sum is as unstable as cumsum for numpy < 1.9 - if np_version < (1, 9): - return np.cumsum(arr, axis=axis, dtype=np.float64) - out = np.cumsum(arr, axis=axis, dtype=np.float64) expected = np.sum(arr, axis=axis, dtype=np.float64) if not np.all(np.isclose(out.take(-1, axis=axis), expected, rtol=rtol, diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index c92a91ad0a0d1..3f89e80ffae6e 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -10,20 +10,20 @@ # # License: BSD 3 clause -import os -import errno -import sys - from distutils.version import LooseVersion +from collections.abc import Sequence as _Sequence # noqa +from collections.abc import Iterable as _Iterable # noqa +from collections.abc import Mapping as _Mapping # noqa +from collections.abc import Sized as _Sized # noqa + import numpy as np import scipy.sparse as sp import scipy - -try: - from inspect import signature -except ImportError: - from ..externals.funcsigs import signature +from scipy.special import boxcox # noqa +from scipy.sparse.linalg import lsqr as sparse_lsqr # noqa +from numpy import nanpercentile # noqa +from numpy import nanmedian # noqa def _parse_version(version_string): @@ -37,59 +37,12 @@ def _parse_version(version_string): return tuple(version) +# < numpy 1.8.0 euler_gamma = getattr(np, 'euler_gamma', 0.577215664901532860606512090082402431) np_version = _parse_version(np.__version__) sp_version = _parse_version(scipy.__version__) -PY3_OR_LATER = sys.version_info[0] >= 3 - - -# Remove when minimum required NumPy >= 1.10 -try: - if (not np.allclose(np.divide(.4, 1, casting="unsafe"), - np.divide(.4, 1, casting="unsafe", dtype=np.float64)) - or not np.allclose(np.divide(.4, 1), .4)): - raise TypeError('Divide not working with dtype: ' - 'https://github.com/numpy/numpy/issues/3484') - divide = np.divide - -except TypeError: - # Compat for old versions of np.divide that do not provide support for - # the dtype args - def divide(x1, x2, out=None, dtype=None): - out_orig = out - if out is None: - out = np.asarray(x1, dtype=dtype) - if out is x1: - out = x1.copy() - else: - if out is not x1: - out[:] = x1 - if dtype is not None and out.dtype != dtype: - out = out.astype(dtype) - out /= x2 - if out_orig is None and np.isscalar(x1): - out = np.asscalar(out) - return out - - -# boxcox ignore NaN in scipy.special.boxcox after 0.14 -if sp_version < (0, 14): - from scipy import stats - - def boxcox(x, lmbda): - with np.errstate(invalid='ignore'): - return stats.boxcox(x, lmbda) -else: - from scipy.special import boxcox # noqa - - -if sp_version < (0, 15): - # Backport fix for scikit-learn/scikit-learn#2986 / scipy/scipy#4142 - from ._scipy_sparse_lsqr_backport import lsqr as sparse_lsqr -else: - from scipy.sparse.linalg import lsqr as sparse_lsqr # noqa try: # SciPy >= 0.19 @@ -214,28 +167,6 @@ def parallel_helper(obj, methodname, *args, **kwargs): return getattr(obj, methodname)(*args, **kwargs) -if 'exist_ok' in signature(os.makedirs).parameters: - makedirs = os.makedirs -else: - def makedirs(name, mode=0o777, exist_ok=False): - """makedirs(name [, mode=0o777][, exist_ok=False]) - - Super-mkdir; create a leaf directory and all intermediate ones. Works - like mkdir, except that any intermediate path segment (not just the - rightmost) will be created if it does not exist. If the target - directory already exists, raise an OSError if exist_ok is False. - Otherwise no exception is raised. This is recursive. - - """ - - try: - os.makedirs(name, mode=mode) - except OSError as e: - if (not exist_ok or e.errno != errno.EEXIST - or not os.path.isdir(name)): - raise - - if np_version < (1, 12): class MaskedArray(np.ma.MaskedArray): # Before numpy 1.12, np.ma.MaskedArray object is not picklable @@ -254,58 +185,6 @@ def __getstate__(self): from numpy.ma import MaskedArray # noqa -if np_version < (1, 11): - def nanpercentile(a, q): - """ - Compute the qth percentile of the data along the specified axis, - while ignoring nan values. - - Returns the qth percentile(s) of the array elements. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - q : float in range of [0,100] (or sequence of floats) - Percentile to compute, which must be between 0 and 100 - inclusive. - - Returns - ------- - percentile : scalar or ndarray - If `q` is a single percentile and `axis=None`, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the percentiles. The other axes are - the axes that remain after the reduction of `a`. If the input - contains integers or floats smaller than ``float64``, the output - data-type is ``float64``. Otherwise, the output data-type is the - same as that of the input. If `out` is specified, that array is - returned instead. - - """ - data = np.compress(~np.isnan(a), a) - if data.size: - return np.percentile(data, q) - else: - size_q = 1 if np.isscalar(q) else len(q) - return np.array([np.nan] * size_q) -else: - from numpy import nanpercentile # noqa - - -if np_version < (1, 9): - def nanmedian(a, axis=None): - if axis is None: - data = a.reshape(-1) - return np.median(np.compress(~np.isnan(data), data)) - else: - data = a.T if not axis else a - return np.array([np.median(np.compress(~np.isnan(row), row)) - for row in data]) -else: - from numpy import nanmedian # noqa - - # Fix for behavior inconsistency on numpy.equal for object dtypes. # For numpy versions < 1.13, numpy.equal tests element-wise identity of objects # instead of equality. This fix returns the mask of NaNs in an array of @@ -318,19 +197,6 @@ def _object_dtype_isnan(X): return X != X -# To be removed once this fix is included in six -try: - from collections.abc import Sequence as _Sequence # noqa - from collections.abc import Iterable as _Iterable # noqa - from collections.abc import Mapping as _Mapping # noqa - from collections.abc import Sized as _Sized # noqa -except ImportError: # python <3.3 - from collections import Sequence as _Sequence # noqa - from collections import Iterable as _Iterable # noqa - from collections import Mapping as _Mapping # noqa - from collections import Sized as _Sized # noqa - - def _joblib_parallel_args(**kwargs): """Set joblib.Parallel arguments in a compatible way for 0.11 and 0.12+ diff --git a/sklearn/utils/testing.py b/sklearn/utils/testing.py index f0b691c0af261..9f3af941daf2b 100644 --- a/sklearn/utils/testing.py +++ b/sklearn/utils/testing.py @@ -21,14 +21,9 @@ import scipy.io from functools import wraps from operator import itemgetter -try: - # Python 2 - from urllib2 import urlopen - from urllib2 import HTTPError -except ImportError: - # Python 3+ - from urllib.request import urlopen - from urllib.error import HTTPError +from inspect import signature +from urllib.request import urlopen +from urllib.error import HTTPError import tempfile import shutil @@ -56,7 +51,6 @@ from sklearn.utils import deprecated, IS_PYPY, _IS_32BIT from sklearn.utils._joblib import joblib from sklearn.utils._unittest_backport import TestCase -from sklearn.utils.fixes import signature additional_names_in_all = [] try: diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py index e5a1f021cda7e..f6049debeb20a 100644 --- a/sklearn/utils/tests/test_deprecation.py +++ b/sklearn/utils/tests/test_deprecation.py @@ -47,8 +47,6 @@ def test_deprecated(): def test_is_deprecated(): - if sys.version_info < (3, 5): - raise SkipTest("This test will run only on python3.5 and above") # Test if _is_deprecated helper identifies wrapping via deprecated # NOTE it works only for class methods and functions assert _is_deprecated(MockClass1.__init__) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 49c9c4b1604fc..a4563c61eeeff 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -25,8 +25,7 @@ from sklearn.linear_model import MultiTaskElasticNet from sklearn.svm import SVC from sklearn.neighbors import KNeighborsRegressor -from sklearn.utils.validation import (check_X_y, check_array, - LARGE_SPARSE_SUPPORTED) +from sklearn.utils.validation import check_X_y, check_array class CorrectNotFittedError(ValueError): @@ -350,10 +349,8 @@ def test_check_estimator(): # Large indices test on bad estimator msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to ' r'support \S{3}_64 matrix, and is not failing gracefully.*') - # only supported by scipy version more than 0.14.0 - if LARGE_SPARSE_SUPPORTED: - assert_raises_regex(AssertionError, msg, check_estimator, - LargeSparseNotSupportedClassifier) + assert_raises_regex(AssertionError, msg, check_estimator, + LargeSparseNotSupportedClassifier) # non-regression test for estimators transforming to sparse data check_estimator(SparseTransformer()) diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index ee2344d52fd0e..81b0044c804f0 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -634,8 +634,6 @@ def test_softmax(): def test_stable_cumsum(): - if np_version < (1, 9): - raise SkipTest("Sum is as unstable as cumsum for numpy < 1.9") assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3])) r = np.random.RandomState(0).rand(100000) assert_warns(RuntimeWarning, stable_cumsum, r, rtol=0, atol=0) diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py index 1fee7224675f5..0dd97c03cb032 100644 --- a/sklearn/utils/tests/test_fixes.py +++ b/sklearn/utils/tests/test_fixes.py @@ -12,7 +12,6 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_allclose -from sklearn.utils.fixes import divide from sklearn.utils.fixes import MaskedArray from sklearn.utils.fixes import nanmedian from sklearn.utils.fixes import nanpercentile @@ -20,10 +19,6 @@ from sklearn.utils.fixes import _object_dtype_isnan -def test_divide(): - assert_equal(divide(.6, 1), .600000000000) - - def test_masked_array_obj_dtype_pickleable(): marr = MaskedArray([1, None, 'a'], dtype=object) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index d2c0ca2921d58..ec8b10ce2b54e 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -40,7 +40,6 @@ assert_all_finite, check_memory, check_non_negative, - LARGE_SPARSE_SUPPORTED, _num_samples ) import sklearn @@ -459,29 +458,17 @@ def X_64bit(request): def test_check_array_accept_large_sparse_no_exception(X_64bit): # When large sparse are allowed - if LARGE_SPARSE_SUPPORTED: - check_array(X_64bit, accept_large_sparse=True, accept_sparse=True) + check_array(X_64bit, accept_large_sparse=True, accept_sparse=True) def test_check_array_accept_large_sparse_raise_exception(X_64bit): # When large sparse are not allowed - if LARGE_SPARSE_SUPPORTED: - msg = ("Only sparse matrices with 32-bit integer indices " - "are accepted. Got int64 indices.") - assert_raise_message(ValueError, msg, - check_array, X_64bit, - accept_sparse=True, - accept_large_sparse=False) - - -def test_check_array_large_indices_non_supported_scipy_version(X_64bit): - # Large indices should not be allowed for scipy<0.14.0 - if not LARGE_SPARSE_SUPPORTED: - msg = ("Scipy version %s does not support large" - " indices, please upgrade your scipy" - " to 0.14.0 or above" % scipy_version) - assert_raise_message(ValueError, msg, check_array, - X_64bit, accept_sparse='csc') + msg = ("Only sparse matrices with 32-bit integer indices " + "are accepted. Got int64 indices.") + assert_raise_message(ValueError, msg, + check_array, X_64bit, + accept_sparse=True, + accept_large_sparse=False) def test_check_array_min_samples_and_features_messages(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 72e64d3214000..da91af7b30124 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -13,13 +13,12 @@ import numpy as np import scipy.sparse as sp -from scipy import __version__ as scipy_version from distutils.version import LooseVersion +from inspect import signature from numpy.core.numeric import ComplexWarning from ..externals import six -from .fixes import signature from .. import get_config as _get_config from ..exceptions import NonBLASDotWarning from ..exceptions import NotFittedError @@ -33,9 +32,6 @@ # performance profiling. warnings.simplefilter('ignore', NonBLASDotWarning) -# checking whether large sparse are supported by scipy or not -LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0' - def _assert_all_finite(X, allow_nan=False): """Like assert_all_finite, but only for ndarray.""" @@ -613,7 +609,7 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True, def _check_large_sparse(X, accept_large_sparse=False): """Raise a ValueError if X has 64bit indices and accept_large_sparse=False """ - if not (accept_large_sparse and LARGE_SPARSE_SUPPORTED): + if not accept_large_sparse: supported_indices = ["int32"] if X.getformat() == "coo": index_keys = ['col', 'row'] @@ -624,10 +620,6 @@ def _check_large_sparse(X, accept_large_sparse=False): for key in index_keys: indices_datatype = getattr(X, key).dtype if (indices_datatype not in supported_indices): - if not LARGE_SPARSE_SUPPORTED: - raise ValueError("Scipy version %s does not support large" - " indices, please upgrade your scipy" - " to 0.14.0 or above" % scipy_version) raise ValueError("Only sparse matrices with 32-bit integer" " indices are accepted. Got %s indices." % indices_datatype)