Skip to content

Commit

Permalink
MAINT Unvendor joblib (scikit-learn#13531)
Browse files Browse the repository at this point in the history
  • Loading branch information
rth authored and NicolasHug committed Apr 17, 2019
1 parent 7243cc3 commit fc33d30
Show file tree
Hide file tree
Showing 67 changed files with 61 additions and 14,071 deletions.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ scikit-learn requires:
- Python (>= 3.5)
- NumPy (>= 1.11.0)
- SciPy (>= 0.17.0)
- joblib (>= 0.11)

**Scikit-learn 0.20 was the last version to support Python2.7.**
Scikit-learn 0.21 and later require Python 3.5 or newer.
Expand Down
5 changes: 2 additions & 3 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ jobs:
py35_np_atlas:
DISTRIB: 'ubuntu'
PYTHON_VERSION: '3.5'
SKLEARN_SITE_JOBLIB: '1'
JOBLIB_VERSION: '0.11'
SKLEARN_NO_OPENMP: 'True'
# Linux + Python 3.5 build with OpenBLAS and without SITE_JOBLIB
Expand All @@ -23,10 +22,11 @@ jobs:
SCIPY_VERSION: '0.17.0'
CYTHON_VERSION: '*'
PILLOW_VERSION: '4.0.0'
# later version of joblib are not packaged in conda for Python 3.5
JOBLIB_VERSION: '0.12.3'
COVERAGE: 'true'
# Linux environment to test the latest available dependencies and MKL.
# It runs tests requiring pandas and PyAMG.
# It also runs with the site joblib instead of the vendored copy of joblib.
pylatest_conda:
DISTRIB: 'conda'
PYTHON_VERSION: '*'
Expand All @@ -41,7 +41,6 @@ jobs:
COVERAGE: 'true'
CHECK_PYTEST_SOFT_DEPENDENCY: 'true'
TEST_DOCSTRINGS: 'true'
SKLEARN_SITE_JOBLIB: '1'
CHECK_WARNINGS: 'true'

- template: build_tools/azure/posix.yml
Expand Down
4 changes: 2 additions & 2 deletions build_tools/azure/install.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ IF "%PYTHON_ARCH%"=="64" (
call deactivate
@rem Clean up any left-over from a previous build
conda remove --all -q -y -n %VIRTUALENV%
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython pytest wheel pillow
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython pytest wheel pillow joblib

call activate %VIRTUALENV%
) else (
pip install numpy scipy cython pytest wheel pillow
pip install numpy scipy cython pytest wheel pillow joblib
)
if "%COVERAGE%" == "true" (
pip install coverage codecov pytest-cov
Expand Down
6 changes: 1 addition & 5 deletions build_tools/azure/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ make_conda() {
if [[ "$DISTRIB" == "conda" ]]; then
TO_INSTALL="python=$PYTHON_VERSION pip pytest pytest-cov \
numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
cython=$CYTHON_VERSION"
cython=$CYTHON_VERSION joblib=$JOBLIB_VERSION"

if [[ "$INSTALL_MKL" == "true" ]]; then
TO_INSTALL="$TO_INSTALL mkl"
Expand All @@ -47,10 +47,6 @@ if [[ "$DISTRIB" == "conda" ]]; then
TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION"
fi

if [[ -n "$JOBLIB_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL joblib=$JOBLIB_VERSION"
fi

make_conda $TO_INSTALL

elif [[ "$DISTRIB" == "ubuntu" ]]; then
Expand Down
1 change: 0 additions & 1 deletion build_tools/travis/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ elif [[ "$DISTRIB" == "scipy-dev" ]]; then
pip install --pre --upgrade --timeout=60 -f $dev_url numpy scipy pandas cython
echo "Installing joblib master"
pip install https://github.com/joblib/joblib/archive/master.zip
export SKLEARN_SITE_JOBLIB=1
echo "Installing pillow master"
pip install https://github.com/python-pillow/Pillow/archive/master.zip
pip install pytest pytest-cov
Expand Down
10 changes: 10 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,13 @@ def pytest_collection_modifyitems(config, items):
for item in items:
if isinstance(item, DoctestItem):
item.add_marker(skip_marker)


def pytest_configure(config):
import sys
sys._is_pytest_session = True


def pytest_unconfigure(config):
import sys
del sys._is_pytest_session
5 changes: 5 additions & 0 deletions doc/modules/computing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,11 @@ These environment variables should be set before importing scikit-learn.
is supported. In addition, dumps from joblib.Memory might be incompatible,
and you might loose some caches and have to redownload some datasets.

.. deprecated:: 0.21

As of version 0.21 this parameter has no effect, vendored joblib was
removed and site joblib is always used.

:SKLEARN_ASSUME_FINITE:

Sets the default value for the `assume_finite` argument of
Expand Down
10 changes: 10 additions & 0 deletions doc/whats_new/v0.21.rst
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,16 @@ Multiple modules
:issue:`13422` by :user:`Madhura Parikh <jdnc>` and
:user:`Clément Doumouro <ClemDoum>`.


Dependencies
............

- |Enhancement| Joblib is no longer vendored in scikit-learn, and becomes a
dependency. Minimal supported version is joblib 0.11, however using
version >= 0.13 is strongly recommended.
:issue:`13531` by :user:`Roman Yurchak <rth>`.


Changes to estimator checks
---------------------------

Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
SCIPY_MIN_VERSION = '0.17.0'
NUMPY_MIN_VERSION = '1.11.0'

JOBLIB_MIN_VERSION = '0.11'

# Optional setuptools features
# We need to import setuptools early, if we want setuptools features,
Expand Down Expand Up @@ -226,7 +227,8 @@ def setup_package():
cmdclass=cmdclass,
install_requires=[
'numpy>={}'.format(NUMPY_MIN_VERSION),
'scipy>={}'.format(SCIPY_MIN_VERSION)
'scipy>={}'.format(SCIPY_MIN_VERSION),
'joblib>={}'.format(JOBLIB_MIN_VERSION)
],
**extra_setuptools_args)

Expand Down
3 changes: 0 additions & 3 deletions sklearn/externals/README
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
This directory contains bundled external dependencies that are updated
every once in a while.

Note to developers and advanced users: setting the SKLEARN_SITE_JOBLIB to
a non null value will force scikit-learn to use the site joblib.

Note for distribution packagers: if you want to remove the duplicated
code and depend on a packaged version, we suggest that you simply do a
symbolic link in this directory.
Expand Down
26 changes: 0 additions & 26 deletions sklearn/externals/copy_joblib.sh

This file was deleted.

140 changes: 11 additions & 129 deletions sklearn/externals/joblib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,15 @@
"""Joblib is a set of tools to provide **lightweight pipelining in
Python**. In particular:
# Import necessary to preserve backward compatibility of pickles
import sys
import warnings

1. transparent disk-caching of functions and lazy re-evaluation
(memoize pattern)
from joblib import *

2. easy simple parallel computing

Joblib is optimized to be **fast** and **robust** in particular on large
data and has specific optimizations for `numpy` arrays. It is
**BSD-licensed**.
msg = ("sklearn.externals.joblib is deprecated in 0.21 and will be removed "
"in 0.23. Please import this functionality directly from joblib, "
"which can be installed with: pip install joblib. If this warning is "
"raised when loading pickled models, you may need to re-serialize "
"those models with scikit-learn 0.21+.")

==================== ===============================================
**Documentation:** https://joblib.readthedocs.io
**Download:** http://pypi.python.org/pypi/joblib#downloads
**Source code:** http://github.com/joblib/joblib
**Report issues:** http://github.com/joblib/joblib/issues
==================== ===============================================
Vision
--------
The vision is to provide tools to easily achieve better performance and
reproducibility when working with long running jobs.
* **Avoid computing twice the same thing**: code is rerun over an
over, for instance when prototyping computational-heavy jobs (as in
scientific development), but hand-crafted solution to alleviate this
issue is error-prone and often leads to unreproducible results
* **Persist to disk transparently**: persisting in an efficient way
arbitrary objects containing large data is hard. Using
joblib's caching mechanism avoids hand-written persistence and
implicitly links the file on disk to the execution context of
the original Python object. As a result, joblib's persistence is
good for resuming an application status or computational job, eg
after a crash.
Joblib addresses these problems while **leaving your code and your flow
control as unmodified as possible** (no framework, no new paradigms).
Main features
------------------
1) **Transparent and fast disk-caching of output value:** a memoize or
make-like functionality for Python functions that works well for
arbitrary Python objects, including very large numpy arrays. Separate
persistence and flow-execution logic from domain logic or algorithmic
code by writing the operations as a set of steps with well-defined
inputs and outputs: Python functions. Joblib can save their
computation to disk and rerun it only if necessary::
>>> from sklearn.externals.joblib import Memory
>>> cachedir = 'your_cache_dir_goes_here'
>>> mem = Memory(cachedir)
>>> import numpy as np
>>> a = np.vander(np.arange(3)).astype(np.float)
>>> square = mem.cache(np.square)
>>> b = square(a) # doctest: +ELLIPSIS
________________________________________________________________________________
[Memory] Calling square...
square(array([[0., 0., 1.],
[1., 1., 1.],
[4., 2., 1.]]))
___________________________________________________________square - 0...s, 0.0min
>>> c = square(a)
>>> # The above call did not trigger an evaluation
2) **Embarrassingly parallel helper:** to make it easy to write readable
parallel code and debug it quickly::
>>> from sklearn.externals.joblib import Parallel, delayed
>>> from math import sqrt
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
3) **Fast compressed Persistence**: a replacement for pickle to work
efficiently on Python objects containing large data (
*joblib.dump* & *joblib.load* ).
..
>>> import shutil ; shutil.rmtree(cachedir)
"""

# PEP0440 compatible formatted version, see:
# https://www.python.org/dev/peps/pep-0440/
#
# Generic release markers:
# X.Y
# X.Y.Z # For bugfix releases
#
# Admissible pre-release markers:
# X.YaN # Alpha release
# X.YbN # Beta release
# X.YrcN # Release Candidate
# X.Y # Final release
#
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#
__version__ = '0.13.0'


from .memory import Memory, MemorizedResult, register_store_backend
from .logger import PrintTime
from .logger import Logger
from .hashing import hash
from .numpy_pickle import dump
from .numpy_pickle import load
from .compressor import register_compressor
from .parallel import Parallel
from .parallel import delayed
from .parallel import cpu_count
from .parallel import register_parallel_backend
from .parallel import parallel_backend
from .parallel import effective_n_jobs

from .externals.loky import wrap_non_picklable_objects


__all__ = ['Memory', 'MemorizedResult', 'PrintTime', 'Logger', 'hash', 'dump',
'load', 'Parallel', 'delayed', 'cpu_count', 'effective_n_jobs',
'register_parallel_backend', 'parallel_backend',
'register_store_backend', 'register_compressor',
'wrap_non_picklable_objects']
if not hasattr(sys, "_is_pytest_session"):
warnings.warn(msg, category=DeprecationWarning)
19 changes: 0 additions & 19 deletions sklearn/externals/joblib/_compat.py

This file was deleted.

Loading

0 comments on commit fc33d30

Please sign in to comment.