Skip to content

Commit

Permalink
Optuna Early Exit (#890)
Browse files Browse the repository at this point in the history
* Add logic to enable early exit along with CLI hooks.

* Changes based on PR
  • Loading branch information
nv-braf committed Jun 6, 2024
1 parent b399d10 commit 30fdbbc
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 7 deletions.
98 changes: 92 additions & 6 deletions model_analyzer/config/generate/optuna_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def __init__(
self._last_measurement: Optional[RunConfigMeasurement] = None
self._best_config_name = ""
self._best_config_score: Optional[float] = None
self._best_trial_number: Optional[int] = None

self._c_api_mode = config.triton_launch_mode == "c_api"

Expand Down Expand Up @@ -155,12 +156,13 @@ def get_configs(self) -> Generator[RunConfig, None, None]:
if logging.DEBUG:
self._print_debug_search_space_info()

min_configs_to_search = self._determine_minimum_number_of_configs_to_search()
max_configs_to_search = self._determine_maximum_number_of_configs_to_search()
# TODO: TMA-1885: Need an early exit strategy
for trial_count in range(max_configs_to_search):
for trial_number in range(1, max_configs_to_search + 1):
trial = self._study.ask()
trial_objectives = self._create_trial_objectives(trial)
logger.debug(f"Trial {trial_count+1} of {max_configs_to_search}:")
logger.debug(f"Trial {trial_number} of {max_configs_to_search}:")
run_config = self._create_objective_based_run_config(trial_objectives)
yield run_config

Expand All @@ -170,6 +172,9 @@ def get_configs(self) -> Generator[RunConfig, None, None]:
if logging.DEBUG:
self._print_debug_score_info(run_config, score)

if self._should_terminate_early(min_configs_to_search, trial_number):
logger.debug("Early termination threshold reached")
break
self._study.tell(trial, score)

def _capture_default_measurement(self, default_run_config: RunConfig) -> None:
Expand All @@ -180,17 +185,20 @@ def _capture_default_measurement(self, default_run_config: RunConfig) -> None:

self._default_measurement = self._last_measurement

def _set_best_measurement(self, run_config: RunConfig, score: float = 0) -> None:
def _set_best_measurement(
self, run_config: RunConfig, score: float = 0, trial_number: int = 0
) -> None:
if self._best_config_score is None or score > self._best_config_score:
self._best_config_name = run_config.model_variants_name()
self._best_config_score = score
self._best_trial_number = trial_number

def _determine_maximum_number_of_configs_to_search(self) -> int:
max_trials_based_on_percentage_of_search_space = (
self._determine_trials_based_on_max_percentage_of_search_space()
)

max_configs_to_search = self._decide_between_percentage_and_trial_count(
max_configs_to_search = self._decide_max_between_percentage_and_trial_count(
max_trials_based_on_percentage_of_search_space
)

Expand All @@ -208,7 +216,7 @@ def _determine_trials_based_on_max_percentage_of_search_space(self) -> int:

return max_trials_based_on_percentage_of_search_space

def _decide_between_percentage_and_trial_count(
def _decide_max_between_percentage_and_trial_count(
self, max_trials_based_on_percentage_of_search_space: int
) -> int:
# By default we will search based on percentage of search space
Expand Down Expand Up @@ -238,7 +246,7 @@ def _decide_between_percentage_and_trial_count(
max_configs_to_search = max_trials_based_on_percentage_of_search_space
elif max_trials_set_by_user:
logger.debug(
f"Maximum number of trials: {self._config.optuna_max_trials} (set by max. trials)"
f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)"
)
max_configs_to_search = self._config.optuna_max_trials
else:
Expand All @@ -252,6 +260,71 @@ def _decide_between_percentage_and_trial_count(
logger.info("")
return max_configs_to_search

def _determine_minimum_number_of_configs_to_search(self) -> int:
min_trials_based_on_percentage_of_search_space = (
self._determine_trials_based_on_min_percentage_of_search_space()
)

min_configs_to_search = self._decide_min_between_percentage_and_trial_count(
min_trials_based_on_percentage_of_search_space
)

return min_configs_to_search

def _determine_trials_based_on_min_percentage_of_search_space(self) -> int:
total_num_of_possible_configs = (
self._search_parameters.number_of_total_possible_configurations()
)
min_trials_based_on_percentage_of_search_space = int(
total_num_of_possible_configs
* self._config.min_percentage_of_search_space
/ 100
)

return min_trials_based_on_percentage_of_search_space

def _decide_min_between_percentage_and_trial_count(
self, min_trials_based_on_percentage_of_search_space: int
) -> int:
# By default we will search based on percentage of search space
# If the user specifies a number of trials we will use that instead
# If both are specified we will use the larger number
min_trials_set_by_user = self._config.get_config()[
"optuna_min_trials"
].is_set_by_user()
min_percentage_set_by_user = self._config.get_config()[
"min_percentage_of_search_space"
].is_set_by_user()

if min_trials_set_by_user and min_percentage_set_by_user:
if (
self._config.optuna_min_trials
> min_trials_based_on_percentage_of_search_space
):
logger.debug(
f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)"
)
min_configs_to_search = self._config.optuna_min_trials
else:
logger.debug(
f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} "
f"({self._config.min_percentage_of_search_space}% of search space)"
)
min_configs_to_search = min_trials_based_on_percentage_of_search_space
elif min_trials_set_by_user:
logger.debug(
f"Minimum number of trials: {self._config.optuna_min_trials} (optuna_min_trials)"
)
min_configs_to_search = self._config.optuna_min_trials
else:
logger.debug(
f"Minimum number of trials: {min_trials_based_on_percentage_of_search_space} "
f"({self._config.min_percentage_of_search_space}% of search space)"
)
min_configs_to_search = min_trials_based_on_percentage_of_search_space

return min_configs_to_search

def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives:
trial_objectives: TrialObjectives = {}
for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list:
Expand Down Expand Up @@ -464,6 +537,19 @@ def _create_perf_analyzer_config(
perf_analyzer_config.update_config(model.perf_analyzer_flags())
return perf_analyzer_config

def _should_terminate_early(
self, min_configs_to_search: int, trial_number: int
) -> bool:
number_of_trials_since_best = trial_number - self._best_trial_number # type: ignore
if trial_number < min_configs_to_search:
should_terminate_early = False
elif number_of_trials_since_best >= self._config.optuna_early_exit_threshold:
should_terminate_early = True
else:
should_terminate_early = False

return should_terminate_early

def _print_debug_search_space_info(self) -> None:
logger.info("")
logger.debug(
Expand Down
10 changes: 10 additions & 0 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
DEFAULT_OFFLINE_PLOTS,
DEFAULT_ONLINE_OBJECTIVES,
DEFAULT_ONLINE_PLOTS,
DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
DEFAULT_OPTUNA_MAX_TRIALS,
DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
Expand Down Expand Up @@ -957,6 +958,15 @@ def _add_run_search_configs(self):
description="Maximum number of trials to profile when using Optuna",
)
)
self._add_config(
ConfigField(
"optuna_early_exit_threshold",
flags=["--optuna_early_exit_threshold"],
field_type=ConfigPrimitive(int),
default_value=DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
description="Number of trials without improvement before triggering early exit when using Optuna",
)
)
self._add_config(
ConfigField(
"use_concurrency_formula",
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE = 10
DEFAULT_OPTUNA_MIN_TRIALS = 20
DEFAULT_OPTUNA_MAX_TRIALS = 200
DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD = 10
DEFAULT_USE_CONCURRENCY_FORMULA = False
DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
DEFAULT_TRITON_LAUNCH_MODE = "local"
Expand Down
1 change: 1 addition & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def get_test_options():
OptionStruct("int", "profile", "--max_percentage_of_search_space", None, "5", "10"),
OptionStruct("int", "profile", "--optuna_min_trials", None, "10", "20"),
OptionStruct("int", "profile", "--optuna_max_trials", None, "5", "200"),
OptionStruct("int", "profile", "--optuna_early_exit_threshold", None, "5", "10"),
OptionStruct("float", "profile", "--monitoring-interval", "-i", "10.0", "1.0"),
OptionStruct("float", "profile", "--perf-analyzer-cpu-util", None, "10.0", str(psutil.cpu_count() * 80.0)),
OptionStruct("int", "profile", "--num-configs-per-model", None, "10", "3"),
Expand Down
50 changes: 49 additions & 1 deletion tests/test_optuna_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_max_number_of_configs_to_search_count(self):

def test_max_number_of_configs_to_search_both(self):
"""
Test count based on specify both a count and percentage
Test max count based on specify both a count and percentage
"""
config = self._create_config(
additional_args=[
Expand All @@ -120,6 +120,54 @@ def test_max_number_of_configs_to_search_both(self):
# Since both are specified we will use the smaller of the two (3% of 120 = 3)
self.assertEquals(max_configs_to_search, 3)

def test_min_number_of_configs_to_search_percentage(self):
"""
Test percentage based min num of configs to search
"""
min_configs_to_search = (
self._rcg._determine_minimum_number_of_configs_to_search()
)

# Batch sizes (8) * Instance groups (5) * queue delays (3) = 120
# 5% of search space (120) = 6
self.assertEquals(min_configs_to_search, 6)

def test_min_number_of_configs_to_search_count(self):
"""
Test count based min num of configs to search
"""
config = self._create_config(additional_args=["--optuna_min_trials", "12"])

self._rcg._config = config

min_configs_to_search = (
self._rcg._determine_minimum_number_of_configs_to_search()
)

self.assertEquals(min_configs_to_search, 12)

def test_min_number_of_configs_to_search_both(self):
"""
Test min count based on specify both a count and percentage
"""
config = self._create_config(
additional_args=[
"--optuna_min_trials",
"6",
"--min_percentage_of_search_space",
"3",
]
)

self._rcg._config = config

min_configs_to_search = (
self._rcg._determine_minimum_number_of_configs_to_search()
)

# Since both are specified we will use the larger of the two (trials=6)
self.assertEquals(min_configs_to_search, 6)

def test_create_default_run_config(self):
"""
Test that a default run config is properly created
Expand Down

0 comments on commit 30fdbbc

Please sign in to comment.