Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow n_jobs to receive negative values. #993

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion tsfresh/convenience/relevant_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ def extract_relevant_features(
smaller chunksize.
:type chunksize: None or int

:param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
:param n_jobs: The number of processes to use for parallelization.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param distributor: Advanced parameter: set this to a class name that you want to use as a
Expand Down
14 changes: 12 additions & 2 deletions tsfresh/feature_extraction/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DistributorBaseClass,
MapDistributor,
MultiprocessingDistributor,
effective_n_jobs,
)
from tsfresh.utilities.string_manipulation import convert_to_output_format

Expand Down Expand Up @@ -100,7 +101,11 @@ def extract_features(
:param column_value: The name for the column keeping the value itself. Please see :ref:`data-formats-label`.
:type column_value: str

:param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
:param n_jobs: The number of processes to use for parallelization.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param chunksize: The size of one chunk that is submitted to the worker
Expand Down Expand Up @@ -240,7 +245,11 @@ def _do_extraction(
:param chunk_size: The size of one chunk for the parallelization
:type chunk_size: None or int

:param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
:param n_jobs: The number of processes to use for parallelization.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param disable_progressbar: Do not show a progressbar while doing the calculation.
Expand All @@ -258,6 +267,7 @@ def _do_extraction(
"""

data = to_tsdata(df, column_id, column_kind, column_value, column_sort)
n_jobs = effective_n_jobs(n_jobs)

if distributor is None:
if isinstance(data, Iterable):
Expand Down
13 changes: 11 additions & 2 deletions tsfresh/feature_selection/relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
target_real_feature_binary_test,
target_real_feature_real_test,
)
from tsfresh.utilities.distribution import initialize_warnings_in_workers
from tsfresh.utilities.distribution import (
initialize_warnings_in_workers,
effective_n_jobs
)


def calculate_relevance_table(
Expand Down Expand Up @@ -128,7 +131,11 @@ def calculate_relevance_table(
independent (e.g. mean and median)
:type hypotheses_independent: bool

:param n_jobs: Number of processes to use during the p-value calculation
:param n_jobs: Number of processes to use during the p-value calculation.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param show_warnings: Show warnings during the p-value calculation (needed for debugging of calculators).
Expand Down Expand Up @@ -192,6 +199,8 @@ def calculate_relevance_table(
else:
warnings.simplefilter("default")

n_jobs = effective_n_jobs(n_jobs)

if n_jobs == 0 or n_jobs == 1:
map_function = map
else:
Expand Down
6 changes: 5 additions & 1 deletion tsfresh/feature_selection/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def select_features(
independent (e.g. mean and median)
:type hypotheses_independent: bool

:param n_jobs: Number of processes to use during the p-value calculation
:param n_jobs: Number of processes to use during the p-value calculation.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param show_warnings: Show warnings during the p-value calculation (needed for debugging of calculators).
Expand Down
6 changes: 5 additions & 1 deletion tsfresh/transformers/feature_augmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,11 @@ def __init__(
:param column_value: The column with the values. See :mod:`~tsfresh.feature_extraction.extraction`.
:type column_value: basestring

:param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
:param n_jobs: The number of processes to use for parallelization.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param chunksize: The size of one chunk that is submitted to the worker
Expand Down
6 changes: 5 additions & 1 deletion tsfresh/transformers/feature_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ def __init__(
independent (e.g. mean and median)
:type hypotheses_independent: bool

:param n_jobs: Number of processes to use during the p-value calculation
:param n_jobs: Number of processes to use during the p-value calculation.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param chunksize: Size of the chunks submitted to the worker processes
Expand Down
6 changes: 5 additions & 1 deletion tsfresh/transformers/relevant_feature_augmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,11 @@ def __init__(
smaller chunksize.
:type chunksize: None or int

:param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
:param n_jobs: The number of processes to use for parallelization.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param show_warnings: Show warnings during the feature extraction (needed for debugging of calculators).
Expand Down
8 changes: 7 additions & 1 deletion tsfresh/utilities/dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DistributorBaseClass,
MapDistributor,
MultiprocessingDistributor,
effective_n_jobs,
)


Expand Down Expand Up @@ -420,7 +421,11 @@ def roll_time_series(
than or equal 0.
:type min_timeshift: int

:param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used.
:param n_jobs: The number of processes to use for parallelization.
If zero, no parallelization is used.
``-1`` means using all processors. See scikit-learns'
`Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
for more details.
:type n_jobs: int

:param chunksize: How many shifts per job should be calculated.
Expand Down Expand Up @@ -541,6 +546,7 @@ def roll_time_series(
range_of_shifts = range(1, prediction_steps + 1, rolling_amount)

if distributor is None:
n_jobs = effective_n_jobs(n_jobs)
if n_jobs == 0 or n_jobs == 1:
distributor = MapDistributor(
disable_progressbar=disable_progressbar, progressbar_title="Rolling"
Expand Down
9 changes: 8 additions & 1 deletion tsfresh/utilities/distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from collections.abc import Generator, Iterable
from functools import partial
from itertools import islice, repeat, takewhile
from multiprocessing import Pool
from multiprocessing import Pool, cpu_count

from tqdm import tqdm

Expand Down Expand Up @@ -61,6 +61,13 @@ def initialize_warnings_in_workers(show_warnings):
warnings.simplefilter("default")


def effective_n_jobs(n_jobs):
if n_jobs < 0:
n_jobs = max(cpu_count() + 1 + n_jobs, 1)

return n_jobs


class DistributorBaseClass:
"""
The distributor abstract base class.
Expand Down