From 3cf057a1886ff4fb353c22cd71d13ad5d717b504 Mon Sep 17 00:00:00 2001 From: stergion <35434161+stergion@users.noreply.github.com> Date: Thu, 22 Dec 2022 13:59:42 +0200 Subject: [PATCH 1/2] `n_jobs`: Add support for negative values, similar to sklearn, as explained https://scikit-learn.org/stable/glossary.html#term-n_jobs --- tsfresh/feature_extraction/extraction.py | 2 ++ tsfresh/feature_selection/relevance.py | 7 ++++++- tsfresh/utilities/dataframe_functions.py | 2 ++ tsfresh/utilities/distribution.py | 9 ++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tsfresh/feature_extraction/extraction.py b/tsfresh/feature_extraction/extraction.py index 7a87eadf..cd799102 100644 --- a/tsfresh/feature_extraction/extraction.py +++ b/tsfresh/feature_extraction/extraction.py @@ -21,6 +21,7 @@ DistributorBaseClass, MapDistributor, MultiprocessingDistributor, + effective_n_jobs, ) from tsfresh.utilities.string_manipulation import convert_to_output_format @@ -258,6 +259,7 @@ def _do_extraction( """ data = to_tsdata(df, column_id, column_kind, column_value, column_sort) + n_jobs = effective_n_jobs(n_jobs) if distributor is None: if isinstance(data, Iterable): diff --git a/tsfresh/feature_selection/relevance.py b/tsfresh/feature_selection/relevance.py index bce4cd8b..1f8ca01b 100644 --- a/tsfresh/feature_selection/relevance.py +++ b/tsfresh/feature_selection/relevance.py @@ -25,7 +25,10 @@ target_real_feature_binary_test, target_real_feature_real_test, ) -from tsfresh.utilities.distribution import initialize_warnings_in_workers +from tsfresh.utilities.distribution import ( + initialize_warnings_in_workers, + effective_n_jobs +) def calculate_relevance_table( @@ -192,6 +195,8 @@ def calculate_relevance_table( else: warnings.simplefilter("default") + n_jobs = effective_n_jobs(n_jobs) + if n_jobs == 0 or n_jobs == 1: map_function = map else: diff --git a/tsfresh/utilities/dataframe_functions.py b/tsfresh/utilities/dataframe_functions.py index d27f4931..b8ba3271 100644 --- a/tsfresh/utilities/dataframe_functions.py +++ b/tsfresh/utilities/dataframe_functions.py @@ -15,6 +15,7 @@ DistributorBaseClass, MapDistributor, MultiprocessingDistributor, + effective_n_jobs, ) @@ -541,6 +542,7 @@ def roll_time_series( range_of_shifts = range(1, prediction_steps + 1, rolling_amount) if distributor is None: + n_jobs = effective_n_jobs(n_jobs) if n_jobs == 0 or n_jobs == 1: distributor = MapDistributor( disable_progressbar=disable_progressbar, progressbar_title="Rolling" diff --git a/tsfresh/utilities/distribution.py b/tsfresh/utilities/distribution.py index 66ac692b..5587406c 100644 --- a/tsfresh/utilities/distribution.py +++ b/tsfresh/utilities/distribution.py @@ -14,7 +14,7 @@ from collections.abc import Generator, Iterable from functools import partial from itertools import islice, repeat, takewhile -from multiprocessing import Pool +from multiprocessing import Pool, cpu_count from tqdm import tqdm @@ -61,6 +61,13 @@ def initialize_warnings_in_workers(show_warnings): warnings.simplefilter("default") +def effective_n_jobs(n_jobs): + if n_jobs < 0: + n_jobs = max(cpu_count() + 1 + n_jobs, 1) + + return n_jobs + + class DistributorBaseClass: """ The distributor abstract base class. From b0272de712b60f7036e77b788e6ba5aef906c9a6 Mon Sep 17 00:00:00 2001 From: stergion <35434161+stergion@users.noreply.github.com> Date: Thu, 22 Dec 2022 14:19:19 +0200 Subject: [PATCH 2/2] Update documentation for `n_jobs` to reflect previous change. `n_jobs` can now receive negative values. --- tsfresh/convenience/relevant_extraction.py | 6 +++++- tsfresh/feature_extraction/extraction.py | 12 ++++++++++-- tsfresh/feature_selection/relevance.py | 6 +++++- tsfresh/feature_selection/selection.py | 6 +++++- tsfresh/transformers/feature_augmenter.py | 6 +++++- tsfresh/transformers/feature_selector.py | 6 +++++- tsfresh/transformers/relevant_feature_augmenter.py | 6 +++++- tsfresh/utilities/dataframe_functions.py | 6 +++++- 8 files changed, 45 insertions(+), 9 deletions(-) diff --git a/tsfresh/convenience/relevant_extraction.py b/tsfresh/convenience/relevant_extraction.py index 62a735eb..cbd611d2 100644 --- a/tsfresh/convenience/relevant_extraction.py +++ b/tsfresh/convenience/relevant_extraction.py @@ -98,7 +98,11 @@ def extract_relevant_features( smaller chunksize. :type chunksize: None or int - :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used. + :param n_jobs: The number of processes to use for parallelization. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param distributor: Advanced parameter: set this to a class name that you want to use as a diff --git a/tsfresh/feature_extraction/extraction.py b/tsfresh/feature_extraction/extraction.py index cd799102..d122e5b0 100644 --- a/tsfresh/feature_extraction/extraction.py +++ b/tsfresh/feature_extraction/extraction.py @@ -101,7 +101,11 @@ def extract_features( :param column_value: The name for the column keeping the value itself. Please see :ref:`data-formats-label`. :type column_value: str - :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used. + :param n_jobs: The number of processes to use for parallelization. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param chunksize: The size of one chunk that is submitted to the worker @@ -241,7 +245,11 @@ def _do_extraction( :param chunk_size: The size of one chunk for the parallelization :type chunk_size: None or int - :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used. + :param n_jobs: The number of processes to use for parallelization. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param disable_progressbar: Do not show a progressbar while doing the calculation. diff --git a/tsfresh/feature_selection/relevance.py b/tsfresh/feature_selection/relevance.py index 1f8ca01b..b2175883 100644 --- a/tsfresh/feature_selection/relevance.py +++ b/tsfresh/feature_selection/relevance.py @@ -131,7 +131,11 @@ def calculate_relevance_table( independent (e.g. mean and median) :type hypotheses_independent: bool - :param n_jobs: Number of processes to use during the p-value calculation + :param n_jobs: Number of processes to use during the p-value calculation. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param show_warnings: Show warnings during the p-value calculation (needed for debugging of calculators). diff --git a/tsfresh/feature_selection/selection.py b/tsfresh/feature_selection/selection.py index 7359d347..5c731a36 100644 --- a/tsfresh/feature_selection/selection.py +++ b/tsfresh/feature_selection/selection.py @@ -108,7 +108,11 @@ def select_features( independent (e.g. mean and median) :type hypotheses_independent: bool - :param n_jobs: Number of processes to use during the p-value calculation + :param n_jobs: Number of processes to use during the p-value calculation. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param show_warnings: Show warnings during the p-value calculation (needed for debugging of calculators). diff --git a/tsfresh/transformers/feature_augmenter.py b/tsfresh/transformers/feature_augmenter.py index 421177d1..7ada4fff 100644 --- a/tsfresh/transformers/feature_augmenter.py +++ b/tsfresh/transformers/feature_augmenter.py @@ -101,7 +101,11 @@ def __init__( :param column_value: The column with the values. See :mod:`~tsfresh.feature_extraction.extraction`. :type column_value: basestring - :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used. + :param n_jobs: The number of processes to use for parallelization. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param chunksize: The size of one chunk that is submitted to the worker diff --git a/tsfresh/transformers/feature_selector.py b/tsfresh/transformers/feature_selector.py index 3b9dd746..c344c5c2 100644 --- a/tsfresh/transformers/feature_selector.py +++ b/tsfresh/transformers/feature_selector.py @@ -99,7 +99,11 @@ def __init__( independent (e.g. mean and median) :type hypotheses_independent: bool - :param n_jobs: Number of processes to use during the p-value calculation + :param n_jobs: Number of processes to use during the p-value calculation. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param chunksize: Size of the chunks submitted to the worker processes diff --git a/tsfresh/transformers/relevant_feature_augmenter.py b/tsfresh/transformers/relevant_feature_augmenter.py index 90b85d69..5638270f 100644 --- a/tsfresh/transformers/relevant_feature_augmenter.py +++ b/tsfresh/transformers/relevant_feature_augmenter.py @@ -154,7 +154,11 @@ def __init__( smaller chunksize. :type chunksize: None or int - :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used. + :param n_jobs: The number of processes to use for parallelization. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param show_warnings: Show warnings during the feature extraction (needed for debugging of calculators). diff --git a/tsfresh/utilities/dataframe_functions.py b/tsfresh/utilities/dataframe_functions.py index b8ba3271..2a321844 100644 --- a/tsfresh/utilities/dataframe_functions.py +++ b/tsfresh/utilities/dataframe_functions.py @@ -421,7 +421,11 @@ def roll_time_series( than or equal 0. :type min_timeshift: int - :param n_jobs: The number of processes to use for parallelization. If zero, no parallelization is used. + :param n_jobs: The number of processes to use for parallelization. + If zero, no parallelization is used. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. :type n_jobs: int :param chunksize: How many shifts per job should be calculated.