From 0bb381e33167077b22a9877f53cbb788f6d8e3b1 Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Thu, 11 Jul 2024 07:04:37 -0700 Subject: [PATCH] Add support for multi-model profiling to the Optuna RCG (#911) * Initial code for multi-model. Live run working * Adding unit test * Fixing codeql issue * Changing L0 test to explicitly choose quick mode --- .../generate/optuna_run_config_generator.py | 170 ++++++++++-------- .../config/input/config_command_profile.py | 4 - qa/L0_quick_search_multi_model/test.sh | 2 +- tests/test_optuna_run_config_generator.py | 104 ++++++++++- 4 files changed, 197 insertions(+), 83 deletions(-) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index b08b15d1c..c53192f16 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -50,9 +50,15 @@ logger = logging.getLogger(LOGGER_NAME) +ModelName: TypeAlias = str +ParameterName: TypeAlias = str +ObjectiveName: TypeAlias = str + TrialObjective: TypeAlias = Union[str | int] -TrialObjectives: TypeAlias = Dict[str, TrialObjective] -ComposingTrialObjectives: TypeAlias = Dict[str, Dict[str, TrialObjective]] +ModelTrialObjectives: TypeAlias = Dict[ParameterName, TrialObjective] +AllTrialObjectives: TypeAlias = Dict[ModelName, ModelTrialObjectives] +ComposingTrialObjectives: TypeAlias = AllTrialObjectives + ParameterCombo: TypeAlias = Dict[str, Any] @@ -104,9 +110,7 @@ def __init__( self._gpu_count = gpu_count self._models = models self._composing_models = composing_models - - # TODO: TMA-1927: Add support for multi-model - self._search_parameters = search_parameters[models[0].model_name()] + self._search_parameters = search_parameters self._composing_search_parameters = {} for composing_model in composing_models: @@ -133,8 +137,10 @@ def __init__( else: self._sampler = optuna.samplers.TPESampler() + self._study_name = ",".join([model.model_name() for model in self._models]) + self._study = optuna.create_study( - study_name=self._models[0].model_name(), + study_name=self._study_name, direction="maximize", sampler=self._sampler, ) @@ -172,7 +178,7 @@ def get_configs(self) -> Generator[RunConfig, None, None]: min_configs_to_search = self._determine_minimum_number_of_configs_to_search() max_configs_to_search = self._determine_maximum_number_of_configs_to_search() - # TODO: TMA-1885: Need an early exit strategy + for trial_number in range(1, max_configs_to_search + 1): trial = self._study.ask() trial_objectives = self._create_trial_objectives(trial) @@ -338,25 +344,31 @@ def _decide_min_between_percentage_and_trial_count( return min_configs_to_search - def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives: - trial_objectives: TrialObjectives = {} - for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list: - parameter = self._search_parameters.get_parameter(parameter_name) + def _create_trial_objectives(self, trial: optuna.Trial) -> AllTrialObjectives: + trial_objectives: AllTrialObjectives = {} - if parameter: - # TODO: TMA-1927: Add support for multi-model - objective_name = self._create_trial_objective_name( - model_name=self._models[0].model_name(), - objective_name=parameter_name, - ) - trial_objectives[parameter_name] = self._create_trial_objective( - trial, objective_name, parameter + for model in self._models: + model_name = model.model_name() + trial_objectives[model_name] = {} + + for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list: + parameter = self._search_parameters[model_name].get_parameter( + parameter_name ) - if self._config.use_concurrency_formula: - trial_objectives["concurrency"] = self._get_objective_concurrency( - trial_objectives - ) + if parameter: + objective_name = self._create_trial_objective_name( + model_name=model_name, parameter_name=parameter_name + ) + + trial_objectives[model_name][ + parameter_name + ] = self._create_trial_objective(trial, objective_name, parameter) + + if self._config.use_concurrency_formula: + trial_objectives[model_name][ + "concurrency" + ] = self._get_objective_concurrency(model_name, trial_objectives) return trial_objectives @@ -374,7 +386,7 @@ def _create_composing_trial_objectives( if parameter: objective_name = self._create_trial_objective_name( model_name=composing_model.model_name(), - objective_name=parameter_name, + parameter_name=parameter_name, ) composing_trial_objectives[composing_model.model_name()][ parameter_name @@ -382,13 +394,17 @@ def _create_composing_trial_objectives( return composing_trial_objectives - def _create_trial_objective_name(self, model_name: str, objective_name: str) -> str: - objective_name = f"{model_name}::{objective_name}" + def _create_trial_objective_name( + self, model_name: ModelName, parameter_name: ParameterName + ) -> ObjectiveName: + # This ensures that Optuna has a unique name + # for each objective we are searching + objective_name = f"{model_name}::{parameter_name}" return objective_name def _create_trial_objective( - self, trial: optuna.Trial, name: str, parameter: SearchParameter + self, trial: optuna.Trial, name: ObjectiveName, parameter: SearchParameter ) -> TrialObjective: if parameter.category is ParameterCategory.INTEGER: objective = trial.suggest_int( @@ -405,10 +421,12 @@ def _create_trial_objective( return objective - def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: - max_batch_size = trial_objectives.get("max_batch_size", 1) + def _get_objective_concurrency( + self, model_name: str, trial_objectives: AllTrialObjectives + ) -> int: + max_batch_size = trial_objectives[model_name].get("max_batch_size", 1) concurrency_formula = ( - 2 * int(trial_objectives["instance_group"]) * max_batch_size + 2 * int(trial_objectives[model_name]["instance_group"]) * max_batch_size ) concurrency = ( self._config.run_config_search_max_concurrency @@ -425,7 +443,7 @@ def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: def _create_objective_based_run_config( self, - trial_objectives: TrialObjectives, + trial_objectives: AllTrialObjectives, composing_trial_objectives: ComposingTrialObjectives, ) -> RunConfig: run_config = RunConfig(self._triton_env) @@ -434,30 +452,29 @@ def _create_objective_based_run_config( composing_trial_objectives ) - # TODO: TMA-1927: Add support for multi-model - model_config_variant = self._create_model_config_variant( - model=self._models[0], - trial_objectives=trial_objectives, - composing_trial_objectives=composing_trial_objectives, - composing_model_config_variants=composing_model_config_variants, - ) + for model in self._models: + model_config_variant = self._create_model_config_variant( + model=model, + trial_objectives=trial_objectives[model.model_name()], + composing_trial_objectives=composing_trial_objectives, + composing_model_config_variants=composing_model_config_variants, + ) - # TODO: TMA-1927: Add support for multi-model - model_run_config = self._create_model_run_config( - model=self._models[0], - model_config_variant=model_config_variant, - composing_model_config_variants=composing_model_config_variants, - trial_objectives=trial_objectives, - ) + model_run_config = self._create_model_run_config( + model=model, + model_config_variant=model_config_variant, + composing_model_config_variants=composing_model_config_variants, + trial_objectives=trial_objectives[model.model_name()], + ) - run_config.add_model_run_config(model_run_config=model_run_config) + run_config.add_model_run_config(model_run_config=model_run_config) return run_config def _create_parameter_combo( self, model: ModelProfileSpec, - trial_objectives: TrialObjectives, + trial_objectives: ModelTrialObjectives, composing_trial_objectives: ComposingTrialObjectives, ) -> ParameterCombo: if model.is_ensemble(): @@ -465,7 +482,9 @@ def _create_parameter_combo( composing_trial_objectives ) else: - param_combo = self._create_non_ensemble_parameter_combo(trial_objectives) + param_combo = self._create_non_ensemble_parameter_combo( + model, trial_objectives + ) return param_combo @@ -493,16 +512,15 @@ def _create_ensemble_parameter_combo( return param_combo def _create_non_ensemble_parameter_combo( - self, trial_objectives: TrialObjectives + self, model: ModelProfileSpec, trial_objectives: ModelTrialObjectives ) -> ParameterCombo: param_combo: ParameterCombo = {} - if self._models[0].supports_dynamic_batching(): + if model.supports_dynamic_batching(): param_combo["dynamic_batching"] = [] - # TODO: TMA-1927: Add support for multi-model if "instance_group" in trial_objectives: - kind = "KIND_CPU" if self._models[0].cpu_only() else "KIND_GPU" + kind = "KIND_CPU" if model.cpu_only() else "KIND_GPU" param_combo["instance_group"] = [ { "count": trial_objectives["instance_group"], @@ -525,7 +543,7 @@ def _create_non_ensemble_parameter_combo( def _create_model_config_variant( self, model: ModelProfileSpec, - trial_objectives: TrialObjectives, + trial_objectives: ModelTrialObjectives, composing_trial_objectives: ComposingTrialObjectives = {}, composing_model_config_variants: List[ModelConfigVariant] = [], ) -> ModelConfigVariant: @@ -578,11 +596,9 @@ def _calculate_score(self) -> float: def _create_default_run_config(self) -> RunConfig: default_run_config = RunConfig(self._triton_env) - # TODO: TMA-1927: Add support for multi-model - default_model_run_config = self._create_default_model_run_config( - self._models[0] - ) - default_run_config.add_model_run_config(default_model_run_config) + for model in self._models: + default_model_run_config = self._create_default_model_run_config(model) + default_run_config.add_model_run_config(default_model_run_config) return default_run_config @@ -669,7 +685,7 @@ def _create_model_run_config( model: ModelProfileSpec, model_config_variant: ModelConfigVariant, composing_model_config_variants: List[ModelConfigVariant], - trial_objectives: TrialObjectives, + trial_objectives: ModelTrialObjectives, ) -> ModelRunConfig: trial_batch_sizes = ( int(trial_objectives["batch_sizes"]) @@ -738,9 +754,11 @@ def _print_debug_search_space_info(self) -> None: logger.info("") def _calculate_num_of_configs_in_search_space(self) -> int: - num_of_configs_in_search_space = ( - self._search_parameters.number_of_total_possible_configurations() - ) + num_of_configs_in_search_space = 1 + for search_parameter in self._search_parameters.values(): + num_of_configs_in_search_space *= ( + search_parameter.number_of_total_possible_configurations() + ) for composing_search_parameter in self._composing_search_parameters.values(): num_of_configs_in_search_space *= ( @@ -750,21 +768,19 @@ def _calculate_num_of_configs_in_search_space(self) -> int: return num_of_configs_in_search_space def _print_debug_model_search_space_info(self) -> None: - if self._composing_models is None: - for name in self._search_parameters.get_search_parameters(): - logger.debug(self._search_parameters.print_info(name)) - else: - logger.debug(f"Model - {self._models[0].model_name()}:") - for name in self._search_parameters.get_search_parameters(): - logger.debug(self._search_parameters.print_info(name)) - - for ( - composing_model_name, - composing_search_parameters, - ) in self._composing_search_parameters.items(): - logger.debug(f"Composing model - {composing_model_name}:") - for name in composing_search_parameters.get_search_parameters(): - logger.debug(composing_search_parameters.print_info(name)) + for model in self._models: + model_name = model.model_name() + logger.debug(f"Model - {model_name}:") + for name in self._search_parameters[model_name].get_search_parameters(): + logger.debug(self._search_parameters[model_name].print_info(name)) + + for ( + composing_model_name, + composing_search_parameters, + ) in self._composing_search_parameters.items(): + logger.debug(f"Composing model - {composing_model_name}:") + for name in composing_search_parameters.get_search_parameters(): + logger.debug(composing_search_parameters.print_info(name)) def _print_debug_score_info( self, diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index 4b9d851a2..aabb2b1f5 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -1469,10 +1469,6 @@ def _preprocess_and_verify_arguments(self): if len(self.concurrency) == 0 and len(self.request_rate) == 0: self.concurrency = [1] - # Change default RCS mode to quick for multi-model concurrent profiling - if self.run_config_profile_models_concurrently_enable: - self.run_config_search_mode = "quick" - if not self.export_path: logger.warning( f"--export-path not specified. Using {self._fields['export_path'].default_value()}" diff --git a/qa/L0_quick_search_multi_model/test.sh b/qa/L0_quick_search_multi_model/test.sh index 158d052ad..fbdc7d009 100755 --- a/qa/L0_quick_search_multi_model/test.sh +++ b/qa/L0_quick_search_multi_model/test.sh @@ -50,7 +50,7 @@ MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --triton-metrics-url http://localhost: MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --output-model-repository-path $OUTPUT_MODEL_REPOSITORY --override-output-model-repository" MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS -e $EXPORT_PATH --checkpoint-directory $CHECKPOINT_DIRECTORY --filename-server-only=$FILENAME_SERVER_ONLY" MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --filename-model-inference=$FILENAME_INFERENCE_MODEL --filename-model-gpu=$FILENAME_GPU_MODEL" -MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --skip-summary-reports" +MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --run-config-search-mode quick --skip-summary-reports" MODEL_ANALYZER_SUBCOMMAND="profile" run_analyzer if [ $? -ne 0 ]; then diff --git a/tests/test_optuna_run_config_generator.py b/tests/test_optuna_run_config_generator.py index 44b9fd804..01953d1fe 100755 --- a/tests/test_optuna_run_config_generator.py +++ b/tests/test_optuna_run_config_generator.py @@ -262,7 +262,7 @@ def test_create_run_config_with_concurrency_formula(self): def test_create_run_bls_config(self): """ - Tests that the concurrency formula option is used correctly + Tests that a BLS run config is created correctly """ config = self._create_bls_config() mock_model_config = MockModelConfig() @@ -340,6 +340,77 @@ def test_create_run_bls_config(self): "500", ) + def test_create_run_multi_model_config(self): + """ + Tests that a multi-model run config is created correctly + """ + config = self._create_multi_model_config() + mock_model_config = MockModelConfig() + mock_model_config.start() + add_model = ModelProfileSpec( + config.profile_models[0], config, MagicMock(), MagicMock() + ) + vgg_model = ModelProfileSpec( + config.profile_models[1], config, MagicMock(), MagicMock() + ) + mock_model_config.stop() + add_search_parameters = SearchParameters( + model=add_model, + config=config, + ) + vgg_search_parameters = SearchParameters( + model=vgg_model, + config=config, + ) + rcg = OptunaRunConfigGenerator( + config=config, + gpu_count=1, + models=[add_model, vgg_model], + composing_models=[], + model_variant_name_manager=ModelVariantNameManager(), + search_parameters={ + "add_sub": add_search_parameters, + "vgg19_libtorch": vgg_search_parameters, + }, + composing_search_parameters={}, + seed=100, + ) + + trial = rcg._study.ask() + trial_objectives = rcg._create_trial_objectives(trial) + composing_trial_objectives = rcg._create_composing_trial_objectives(trial) + run_config = rcg._create_objective_based_run_config( + trial_objectives, composing_trial_objectives + ) + + add_model_config = run_config.model_run_configs()[0].model_config() + vgg_model_config = run_config.model_run_configs()[1].model_config() + add_perf_config = run_config.model_run_configs()[0].perf_config() + vgg_perf_config = run_config.model_run_configs()[0].perf_config() + + # ADD_SUB + PA Config (Seed=100) + # ===================================================================== + self.assertEqual(add_model_config.to_dict()["name"], "add_sub") + self.assertEqual(add_model_config.to_dict()["maxBatchSize"], 16) + self.assertEqual(add_model_config.to_dict()["instanceGroup"][0]["count"], 2) + self.assertEqual( + add_model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"], + "100", + ) + self.assertEqual(add_perf_config["batch-size"], DEFAULT_BATCH_SIZES) + self.assertEqual(add_perf_config["concurrency-range"], 16) + + # VGG19_LIBTORCH + PA Config (Seed=100) + # ===================================================================== + self.assertEqual(vgg_model_config.to_dict()["name"], "vgg19_libtorch") + self.assertEqual(vgg_model_config.to_dict()["instanceGroup"][0]["count"], 4) + self.assertEqual( + vgg_model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"], + "600", + ) + self.assertEqual(vgg_perf_config["batch-size"], DEFAULT_BATCH_SIZES) + self.assertEqual(vgg_perf_config["concurrency-range"], 16) + def _create_config(self, additional_args=[]): args = [ "model-analyzer", @@ -401,6 +472,37 @@ def _create_bls_config(self, additional_args=[]): return config + def _create_multi_model_config(self, additional_args=[]): + args = [ + "model-analyzer", + "profile", + "--model-repository", + "/tmp", + "--config-file", + "/tmp/my_config.yml", + ] + + for arg in additional_args: + args.append(arg) + + # yapf: disable + yaml_str = (""" + profile_models: + add_sub: + model_config_parameters: + dynamic_batching: + max_queue_delay_microseconds: [100, 200, 300] + vgg19_libtorch: + model_config_parameters: + dynamic_batching: + max_queue_delay_microseconds: [400, 500, 600] + """) + # yapf: enable + + config = TestConfig()._evaluate_config(args, yaml_str) + + return config + def tearDown(self): patch.stopall()