diff --git a/model_analyzer/analyzer.py b/model_analyzer/analyzer.py index 1fd0ecf29..fa7a9c49c 100755 --- a/model_analyzer/analyzer.py +++ b/model_analyzer/analyzer.py @@ -84,6 +84,7 @@ def __init__( ) self._search_parameters: Dict[str, SearchParameters] = {} + self._composing_search_parameters: Dict[str, SearchParameters] = {} def profile( self, client: TritonClient, gpus: List[GPUDevice], mode: str, verbose: bool @@ -119,6 +120,7 @@ def profile( self._create_metrics_manager(client, gpus) self._create_model_manager(client, gpus) self._populate_search_parameters() + self._populate_composing_search_parameters() if self._config.triton_launch_mode == "remote": self._warn_if_other_models_loaded_on_remote_server(client) @@ -423,5 +425,17 @@ def _warn_if_other_models_loaded_on_remote_server(self, client): def _populate_search_parameters(self): for model in self._config.profile_models: self._search_parameters[model.model_name()] = SearchParameters( - self._config, model.parameters(), model.model_config_parameters() + self._config, + model.parameters(), + model.model_config_parameters(), + is_bls_model=bool(self._config.bls_composing_models), + ) + + def _populate_composing_search_parameters(self): + for model in self._config.bls_composing_models: + self._composing_search_parameters[model.model_name()] = SearchParameters( + self._config, + model.parameters(), + model.model_config_parameters(), + is_composing_model=True, ) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index 7ec0ea08b..9ad526c36 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -98,6 +98,13 @@ def __init__( # TODO: TMA-1927: Add support for multi-model self._search_parameters = search_parameters[models[0].model_name()] + # TODO: need to add in ensemble support + self._composing_search_parameters = {} + for composing_model in self._config.bls_composing_models: + self._composing_search_parameters[ + composing_model.model_name() + ] = search_parameters[composing_model.model_name()] + self._model_variant_name_manager = model_variant_name_manager self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models) diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index f59104142..9c2acab3d 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -45,11 +45,15 @@ def __init__( config: ConfigCommandProfile = ConfigCommandProfile(), parameters: Dict[str, Any] = {}, model_config_parameters: Dict[str, Any] = {}, + is_bls_model: bool = False, + is_composing_model: bool = False, ): self._config = config self._parameters = parameters self._model_config_parameters = model_config_parameters self._search_parameters: Dict[str, SearchParameter] = {} + self._is_bls_model = is_bls_model + self._is_composing_model = is_composing_model self._populate_search_parameters() @@ -120,11 +124,15 @@ def _populate_search_parameters(self) -> None: def _populate_parameters(self) -> None: self._populate_batch_sizes() - self._populate_concurrency() - # TODO: Populate request rate - TMA-1903 + + if not self._is_composing_model: + self._populate_concurrency() + # TODO: Populate request rate - TMA-1903 def _populate_model_config_parameters(self) -> None: - self._populate_max_batch_size() + if not self._is_bls_model: + self._populate_max_batch_size() + self._populate_instance_group() self._populate_max_queue_delay_microseconds() diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index a00c1e0a2..4b9d851a2 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -17,6 +17,7 @@ import argparse import logging import os +from copy import deepcopy import numba.cuda import psutil @@ -1691,6 +1692,11 @@ def _autofill_values(self): new_model["model_config_parameters"] = model.model_config_parameters() new_profile_models[model.model_name()] = new_model + + # deepcopy is necessary, else it gets overwritten when updating profile_models + self._fields["bls_composing_models"] = deepcopy( + self._fields["bls_composing_models"] + ) self._fields["profile_models"].set_value(new_profile_models) def _using_request_rate(self) -> bool: diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py index 2c7b73cb0..0ff4375b1 100755 --- a/tests/test_search_parameters.py +++ b/tests/test_search_parameters.py @@ -457,6 +457,107 @@ def test_total_possible_configurations(self): # batch_sizes (8) * instance group (8) * concurrency (11) * size (3) self.assertEqual(8 * 8 * 11 * 3, total_num_of_possible_configurations) + def test_search_parameter_creation_bls_default(self): + """ + Test that search parameters are correctly created in default BLS optuna case + """ + + args = [ + "model-analyzer", + "profile", + "--model-repository", + "cli-repository", + "-f", + "path-to-config-file", + "--run-config-search-mode", + "optuna", + ] + + yaml_content = """ + profile_models: add_sub + bls_composing_models: add,sub + """ + + config = TestConfig()._evaluate_config(args=args, yaml_content=yaml_content) + + analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock()) + analyzer._populate_search_parameters() + analyzer._populate_composing_search_parameters() + + # ADD_SUB + # ===================================================================== + # The top level model of a BLS does not search max batch size (always 1) + + # max_batch_size + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" + ) + self.assertIsNone(max_batch_size) + + # concurrency + concurrency = analyzer._search_parameters["add_sub"].get_parameter( + "concurrency" + ) + self.assertEqual(ParameterUsage.RUNTIME, concurrency.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, concurrency.category) + self.assertEqual( + log2(default.DEFAULT_RUN_CONFIG_MIN_CONCURRENCY), concurrency.min_range + ) + self.assertEqual( + log2(default.DEFAULT_RUN_CONFIG_MAX_CONCURRENCY), concurrency.max_range + ) + + # instance_group + instance_group = analyzer._search_parameters["add_sub"].get_parameter( + "instance_group" + ) + self.assertEqual(ParameterUsage.MODEL, instance_group.usage) + self.assertEqual(ParameterCategory.INTEGER, instance_group.category) + self.assertEqual( + default.DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, instance_group.min_range + ) + self.assertEqual( + default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range + ) + + # ADD/SUB (composing models) + # ===================================================================== + # Composing models do not search concurrency + + # max_batch_size + max_batch_size = analyzer._composing_search_parameters["add"].get_parameter( + "max_batch_size" + ) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) + self.assertEqual( + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, + ) + self.assertEqual( + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, + ) + + # concurrency + concurrency = analyzer._composing_search_parameters["sub"].get_parameter( + "concurrency" + ) + self.assertIsNone(concurrency) + + # instance_group + instance_group = analyzer._composing_search_parameters["sub"].get_parameter( + "instance_group" + ) + self.assertEqual(ParameterUsage.MODEL, instance_group.usage) + self.assertEqual(ParameterCategory.INTEGER, instance_group.category) + self.assertEqual( + default.DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, instance_group.min_range + ) + self.assertEqual( + default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range + ) + if __name__ == "__main__": unittest.main()