From 30fdbbce14e2b8685c2f83fa7eff542a07a4cced Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:26:32 -0700 Subject: [PATCH] Optuna Early Exit (#890) * Add logic to enable early exit along with CLI hooks. * Changes based on PR --- .../generate/optuna_run_config_generator.py | 98 +++++++++++++++++-- .../config/input/config_command_profile.py | 10 ++ .../config/input/config_defaults.py | 1 + tests/test_cli.py | 1 + tests/test_optuna_run_config_generator.py | 50 +++++++++- 5 files changed, 153 insertions(+), 7 deletions(-) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index 53af27742..77a6abd34 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -108,6 +108,7 @@ def __init__( self._last_measurement: Optional[RunConfigMeasurement] = None self._best_config_name = "" self._best_config_score: Optional[float] = None + self._best_trial_number: Optional[int] = None self._c_api_mode = config.triton_launch_mode == "c_api" @@ -155,12 +156,13 @@ def get_configs(self) -> Generator[RunConfig, None, None]: if logging.DEBUG: self._print_debug_search_space_info() + min_configs_to_search = self._determine_minimum_number_of_configs_to_search() max_configs_to_search = self._determine_maximum_number_of_configs_to_search() # TODO: TMA-1885: Need an early exit strategy - for trial_count in range(max_configs_to_search): + for trial_number in range(1, max_configs_to_search + 1): trial = self._study.ask() trial_objectives = self._create_trial_objectives(trial) - logger.debug(f"Trial {trial_count+1} of {max_configs_to_search}:") + logger.debug(f"Trial {trial_number} of {max_configs_to_search}:") run_config = self._create_objective_based_run_config(trial_objectives) yield run_config @@ -170,6 +172,9 @@ def get_configs(self) -> Generator[RunConfig, None, None]: if logging.DEBUG: self._print_debug_score_info(run_config, score) + if self._should_terminate_early(min_configs_to_search, trial_number): + logger.debug("Early termination threshold reached") + break self._study.tell(trial, score) def _capture_default_measurement(self, default_run_config: RunConfig) -> None: @@ -180,17 +185,20 @@ def _capture_default_measurement(self, default_run_config: RunConfig) -> None: self._default_measurement = self._last_measurement - def _set_best_measurement(self, run_config: RunConfig, score: float = 0) -> None: + def _set_best_measurement( + self, run_config: RunConfig, score: float = 0, trial_number: int = 0 + ) -> None: if self._best_config_score is None or score > self._best_config_score: self._best_config_name = run_config.model_variants_name() self._best_config_score = score + self._best_trial_number = trial_number def _determine_maximum_number_of_configs_to_search(self) -> int: max_trials_based_on_percentage_of_search_space = ( self._determine_trials_based_on_max_percentage_of_search_space() ) - max_configs_to_search = self._decide_between_percentage_and_trial_count( + max_configs_to_search = self._decide_max_between_percentage_and_trial_count( max_trials_based_on_percentage_of_search_space ) @@ -208,7 +216,7 @@ def _determine_trials_based_on_max_percentage_of_search_space(self) -> int: return max_trials_based_on_percentage_of_search_space - def _decide_between_percentage_and_trial_count( + def _decide_max_between_percentage_and_trial_count( self, max_trials_based_on_percentage_of_search_space: int ) -> int: # By default we will search based on percentage of search space @@ -238,7 +246,7 @@ def _decide_between_percentage_and_trial_count( max_configs_to_search = max_trials_based_on_percentage_of_search_space elif max_trials_set_by_user: logger.debug( - f"Maximum number of trials: {self._config.optuna_max_trials} (set by max. trials)" + f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)" ) max_configs_to_search = self._config.optuna_max_trials else: @@ -252,6 +260,71 @@ def _decide_between_percentage_and_trial_count( logger.info("") return max_configs_to_search + def _determine_minimum_number_of_configs_to_search(self) -> int: + min_trials_based_on_percentage_of_search_space = ( + self._determine_trials_based_on_min_percentage_of_search_space() + ) + + min_configs_to_search = self._decide_min_between_percentage_and_trial_count( + min_trials_based_on_percentage_of_search_space + ) + + return min_configs_to_search + + def _determine_trials_based_on_min_percentage_of_search_space(self) -> int: + total_num_of_possible_configs = ( + self._search_parameters.number_of_total_possible_configurations() + ) + min_trials_based_on_percentage_of_search_space = int( + total_num_of_possible_configs + * self._config.min_percentage_of_search_space + / 100 + ) + + return min_trials_based_on_percentage_of_search_space + + def _decide_min_between_percentage_and_trial_count( + self, min_trials_based_on_percentage_of_search_space: int + ) -> int: + # By default we will search based on percentage of search space + # If the user specifies a number of trials we will use that instead + # If both are specified we will use the larger number + min_trials_set_by_user = self._config.get_config()[ + "optuna_min_trials" + ].is_set_by_user() + min_percentage_set_by_user = self._config.get_config()[ + "min_percentage_of_search_space" + ].is_set_by_user() + + if min_trials_set_by_user and min_percentage_set_by_user: + if ( + self._config.optuna_min_trials + > min_trials_based_on_percentage_of_search_space + ): + logger.debug( + f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)" + ) + min_configs_to_search = self._config.optuna_min_trials + else: + logger.debug( + f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} " + f"({self._config.min_percentage_of_search_space}% of search space)" + ) + min_configs_to_search = min_trials_based_on_percentage_of_search_space + elif min_trials_set_by_user: + logger.debug( + f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)" + ) + min_configs_to_search = self._config.optuna_min_trials + else: + logger.debug( + f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} " + f"({self._config.min_percentage_of_search_space}% of search space)" + ) + min_configs_to_search = min_trials_based_on_percentage_of_search_space + + return min_configs_to_search + def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives: trial_objectives: TrialObjectives = {} for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list: @@ -464,6 +537,19 @@ def _create_perf_analyzer_config( perf_analyzer_config.update_config(model.perf_analyzer_flags()) return perf_analyzer_config + def _should_terminate_early( + self, min_configs_to_search: int, trial_number: int + ) -> bool: + number_of_trials_since_best = trial_number - self._best_trial_number # type: ignore + if trial_number < min_configs_to_search: + should_terminate_early = False + elif number_of_trials_since_best >= self._config.optuna_early_exit_threshold: + should_terminate_early = True + else: + should_terminate_early = False + + return should_terminate_early + def _print_debug_search_space_info(self) -> None: logger.info("") logger.debug( diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index 47fcc403d..9da6ab328 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -62,6 +62,7 @@ DEFAULT_OFFLINE_PLOTS, DEFAULT_ONLINE_OBJECTIVES, DEFAULT_ONLINE_PLOTS, + DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD, DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE, DEFAULT_OPTUNA_MAX_TRIALS, DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE, @@ -957,6 +958,15 @@ def _add_run_search_configs(self): description="Maximum number of trials to profile when using Optuna", ) ) + self._add_config( + ConfigField( + "optuna_early_exit_threshold", + flags=["--optuna_early_exit_threshold"], + field_type=ConfigPrimitive(int), + default_value=DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD, + description="Number of trials without improvement before triggering early exit when using Optuna", + ) + ) self._add_config( ConfigField( "use_concurrency_formula", diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py index a6ccf2a87..f8efa7130 100755 --- a/model_analyzer/config/input/config_defaults.py +++ b/model_analyzer/config/input/config_defaults.py @@ -58,6 +58,7 @@ DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE = 10 DEFAULT_OPTUNA_MIN_TRIALS = 20 DEFAULT_OPTUNA_MAX_TRIALS = 200 +DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD = 10 DEFAULT_USE_CONCURRENCY_FORMULA = False DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False DEFAULT_TRITON_LAUNCH_MODE = "local" diff --git a/tests/test_cli.py b/tests/test_cli.py index d4bfde6f6..1c91148d2 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -90,6 +90,7 @@ def get_test_options(): OptionStruct("int", "profile", "--max_percentage_of_search_space", None, "5", "10"), OptionStruct("int", "profile", "--optuna_min_trials", None, "10", "20"), OptionStruct("int", "profile", "--optuna_max_trials", None, "5", "200"), + OptionStruct("int", "profile", "--optuna_early_exit_threshold", None, "5", "10"), OptionStruct("float", "profile", "--monitoring-interval", "-i", "10.0", "1.0"), OptionStruct("float", "profile", "--perf-analyzer-cpu-util", None, "10.0", str(psutil.cpu_count() * 80.0)), OptionStruct("int", "profile", "--num-configs-per-model", None, "10", "3"), diff --git a/tests/test_optuna_run_config_generator.py b/tests/test_optuna_run_config_generator.py index 81743b53f..ad026bbcd 100755 --- a/tests/test_optuna_run_config_generator.py +++ b/tests/test_optuna_run_config_generator.py @@ -100,7 +100,7 @@ def test_max_number_of_configs_to_search_count(self): def test_max_number_of_configs_to_search_both(self): """ - Test count based on specify both a count and percentage + Test max count based on specify both a count and percentage """ config = self._create_config( additional_args=[ @@ -120,6 +120,54 @@ def test_max_number_of_configs_to_search_both(self): # Since both are specified we will use the smaller of the two (3% of 120 = 3) self.assertEquals(max_configs_to_search, 3) + def test_min_number_of_configs_to_search_percentage(self): + """ + Test percentage based min num of configs to search + """ + min_configs_to_search = ( + self._rcg._determine_minimum_number_of_configs_to_search() + ) + + # Batch sizes (8) * Instance groups (5) * queue delays (3) = 120 + # 5% of search space (120) = 6 + self.assertEquals(min_configs_to_search, 6) + + def test_min_number_of_configs_to_search_count(self): + """ + Test count based min num of configs to search + """ + config = self._create_config(additional_args=["--optuna_min_trials", "12"]) + + self._rcg._config = config + + min_configs_to_search = ( + self._rcg._determine_minimum_number_of_configs_to_search() + ) + + self.assertEquals(min_configs_to_search, 12) + + def test_min_number_of_configs_to_search_both(self): + """ + Test min count based on specify both a count and percentage + """ + config = self._create_config( + additional_args=[ + "--optuna_min_trials", + "6", + "--min_percentage_of_search_space", + "3", + ] + ) + + self._rcg._config = config + + min_configs_to_search = ( + self._rcg._determine_minimum_number_of_configs_to_search() + ) + + # Since both are specified we will use the larger of the two (trials=6) + self.assertEquals(min_configs_to_search, 6) + def test_create_default_run_config(self): """ Test that a default run config is properly created