diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index e842e3774..53af27742 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -42,6 +42,7 @@ from model_analyzer.config.run.model_run_config import ModelRunConfig from model_analyzer.config.run.run_config import RunConfig from model_analyzer.constants import LOGGER_NAME +from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig from model_analyzer.result.run_config_measurement import RunConfigMeasurement from model_analyzer.triton.model.model_config import ModelConfig @@ -69,6 +70,9 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface): "max_queue_delay_microseconds", ] + # TODO: TMA-1927: Figure out the correct value for this + NO_MEASUREMENT_SCORE = -1 + def __init__( self, config: ConfigCommandProfile, @@ -102,6 +106,8 @@ def __init__( self._num_models = len(models) self._last_measurement: Optional[RunConfigMeasurement] = None + self._best_config_name = "" + self._best_config_score: Optional[float] = None self._c_api_mode = config.triton_launch_mode == "c_api" @@ -137,21 +143,48 @@ def get_configs(self) -> Generator[RunConfig, None, None]: RunConfig The next RunConfig generated by this class """ + logger.info( + "Measuring default configuration to establish a baseline measurement" + ) default_run_config = self._create_default_run_config() yield default_run_config - self._default_measurement = self._last_measurement - max_configs_to_search = self._determine_maximum_number_of_configs_to_search() + self._capture_default_measurement(default_run_config) + self._set_best_measurement(default_run_config) + if logging.DEBUG: + self._print_debug_search_space_info() + + max_configs_to_search = self._determine_maximum_number_of_configs_to_search() # TODO: TMA-1885: Need an early exit strategy - for _ in range(max_configs_to_search): + for trial_count in range(max_configs_to_search): trial = self._study.ask() trial_objectives = self._create_trial_objectives(trial) + logger.debug(f"Trial {trial_count+1} of {max_configs_to_search}:") run_config = self._create_objective_based_run_config(trial_objectives) yield run_config + score = self._calculate_score() + self._set_best_measurement(run_config, score) + + if logging.DEBUG: + self._print_debug_score_info(run_config, score) + self._study.tell(trial, score) + def _capture_default_measurement(self, default_run_config: RunConfig) -> None: + if not self._last_measurement: + raise TritonModelAnalyzerException( + "Default configuration did not return a measurement. Please check PA/Tritonserver log files." + ) + + self._default_measurement = self._last_measurement + + def _set_best_measurement(self, run_config: RunConfig, score: float = 0) -> None: + if self._best_config_score is None or score > self._best_config_score: + self._best_config_name = run_config.model_variants_name() + self._best_config_score = score + def _determine_maximum_number_of_configs_to_search(self) -> int: max_trials_based_on_percentage_of_search_space = ( self._determine_trials_based_on_max_percentage_of_search_space() @@ -193,14 +226,30 @@ def _decide_between_percentage_and_trial_count( self._config.optuna_max_trials < max_trials_based_on_percentage_of_search_space ): + logger.debug( + f"Maximum number of trials: {self._config.optuna_max_trials} (optuna_max_trials)" + ) max_configs_to_search = self._config.optuna_max_trials else: + logger.debug( + f"Maximum number of trials: {max_trials_based_on_percentage_of_search_space} " + f"({self._config.max_percentage_of_search_space}% of search space)" + ) max_configs_to_search = max_trials_based_on_percentage_of_search_space elif max_trials_set_by_user: + logger.debug( + f"Maximum number of trials: {self._config.optuna_max_trials} (set by max. trials)" + ) max_configs_to_search = self._config.optuna_max_trials else: + logger.debug( + f"Maximum number of trials: {max_trials_based_on_percentage_of_search_space} " + f"({self._config.max_percentage_of_search_space}% of search space)" + ) max_configs_to_search = max_trials_based_on_percentage_of_search_space + if logging.DEBUG: + logger.info("") return max_configs_to_search def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives: @@ -239,15 +288,15 @@ def _create_trial_objective( return objective def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: - concurrency = ( + concurrency_formula = ( 2 * int(trial_objectives["instance_group"]) * int(trial_objectives["batch_sizes"]) ) concurrency = ( - DEFAULT_RUN_CONFIG_MAX_CONCURRENCY - if concurrency > DEFAULT_RUN_CONFIG_MAX_CONCURRENCY - else concurrency + self._config.run_config_search_max_concurrency + if concurrency_formula > self._config.run_config_search_max_concurrency + else concurrency_formula ) return concurrency @@ -286,7 +335,7 @@ def _create_parameter_combo( param_combo["dynamic_batching"] = [] # TODO: TMA-1927: Add support for multi-model - if trial_objectives["instance_group"]: + if "instance_group" in trial_objectives: kind = "KIND_CPU" if self._models[0].cpu_only() else "KIND_GPU" param_combo["instance_group"] = [ { @@ -295,10 +344,10 @@ def _create_parameter_combo( } ] - if trial_objectives["batch_sizes"]: + if "batch_sizes" in trial_objectives: param_combo["max_batch_size"] = trial_objectives["batch_sizes"] - if trial_objectives["max_queue_delay_microseconds"]: + if "max_queue_delay_microseconds" in trial_objectives: param_combo["dynamic_batching"] = { "max_queue_delay_microseconds": trial_objectives[ "max_queue_delay_microseconds" @@ -313,8 +362,7 @@ def _calculate_score(self) -> float: self._last_measurement ) else: - # TODO: TMA-1927: Figure out the correct value for this (and make it a constant) - score = -1 + score = OptunaRunConfigGenerator.NO_MEASUREMENT_SCORE return score @@ -416,8 +464,26 @@ def _create_perf_analyzer_config( perf_analyzer_config.update_config(model.perf_analyzer_flags()) return perf_analyzer_config - def _print_debug_logs( - self, measurements: List[Union[RunConfigMeasurement, None]] + def _print_debug_search_space_info(self) -> None: + logger.info("") + logger.debug( + f"Number of configs in search space: {self._search_parameters.number_of_total_possible_configurations()}" + ) + + for name in self._search_parameters.get_search_parameters(): + logger.debug(self._search_parameters.print_info(name)) + + logger.info("") + + def _print_debug_score_info( + self, + run_config: RunConfig, + score: float, ) -> None: - # TODO: TMA-1928 - NotImplemented + if score != OptunaRunConfigGenerator.NO_MEASUREMENT_SCORE: + logger.debug( + f"Objective score for {run_config.model_variants_name()}: {int(score * 100)} --- " # type: ignore + f"Best: {self._best_config_name} ({int(self._best_config_score * 100)})" # type: ignore + ) + + logger.info("") diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 3254572c5..0ae0d4e2b 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -49,6 +49,9 @@ def __init__( self._populate_search_parameters() + def get_search_parameters(self) -> Dict[str, SearchParameter]: + return self._search_parameters + def get_parameter(self, name: str) -> Optional[SearchParameter]: return self._search_parameters.get(name) @@ -76,6 +79,24 @@ def number_of_total_possible_configurations(self) -> int: return total_number_of_configs + def print_info(self, name: str) -> str: + info_string = f" {name}: " + + parameter = self._search_parameters[name] + if parameter.category is ParameterCategory.INTEGER: + info_string += f"{parameter.min_range} to {parameter.max_range}" + elif parameter.category is ParameterCategory.EXPONENTIAL: + info_string += f"{2**parameter.min_range} to {2**parameter.max_range}" # type: ignore + elif ( + parameter.category is ParameterCategory.INT_LIST + or parameter.category is ParameterCategory.STR_LIST + ): + info_string += f"{parameter.enumerated_list}" + + info_string += f" ({self._number_of_configurations_for_parameter(parameter)})" + + return info_string + def _number_of_configurations_for_parameter( self, parameter: SearchParameter ) -> int: diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py index 849731935..09ca74b7e 100755 --- a/model_analyzer/record/metrics_manager.py +++ b/model_analyzer/record/metrics_manager.py @@ -783,13 +783,23 @@ def _print_run_config_info(self, run_config): for model_run_config in run_config.model_run_configs(): perf_config = model_run_config.perf_config() if perf_config["request-rate-range"]: - logger.info( - f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}" - ) + if perf_config["batch-size"] != 1: + logger.info( + f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}" + ) + else: + logger.info( + f"Profiling {model_run_config.model_variant_name()}: request-rate-range={perf_config['request-rate-range']}" + ) else: - logger.info( - f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, concurrency={perf_config['concurrency-range']}" - ) + if perf_config["batch-size"] != 1: + logger.info( + f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, concurrency={perf_config['concurrency-range']}" + ) + else: + logger.info( + f"Profiling {model_run_config.model_variant_name()}: concurrency={perf_config['concurrency-range']}" + ) # Vertical spacing when running multiple models at a time if len(run_config.model_run_configs()) > 1: