diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index c1c090033..c9f284c92 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -169,10 +169,10 @@ def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives: trial, parameter_name, parameter ) - # TODO: TMA-1884: Need an option to choose btw. concurrency formula and optuna searching - trial_objectives["concurrency"] = self._get_objective_concurrency( - trial_objectives - ) + if self._config.use_concurrency_formula: + trial_objectives["concurrency"] = self._get_objective_concurrency( + trial_objectives + ) return trial_objectives diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 28428b370..3254572c5 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -90,11 +90,7 @@ def _number_of_configurations_for_parameter( return number_of_parameter_configs def _populate_search_parameters(self) -> None: - if self._parameters: - self._populate_parameters() - else: - self._populate_default_parameters() - + self._populate_parameters() self._populate_model_config_parameters() def _populate_parameters(self) -> None: @@ -102,11 +98,6 @@ def _populate_parameters(self) -> None: self._populate_concurrency() # TODO: Populate request rate - TMA-1903 - def _populate_default_parameters(self) -> None: - # Always populate batch sizes if nothing is specified - # TODO: TMA-1884: Will need to add concurrency if the user wants this searched - self._populate_batch_sizes() - def _populate_model_config_parameters(self) -> None: self._populate_instance_group() self._populate_max_queue_delay_microseconds() @@ -126,12 +117,14 @@ def _populate_batch_sizes(self) -> None: ) def _populate_concurrency(self) -> None: - if self._parameters["concurrency"]: + if self._parameters and self._parameters["concurrency"]: self._populate_list_parameter( parameter_name="concurrency", parameter_list=self._parameters["concurrency"], parameter_category=ParameterCategory.INT_LIST, ) + elif self._config.use_concurrency_formula: + return else: self._populate_rcs_parameter( parameter_name="concurrency", diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index 98b5c09c8..5427aa9bf 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -96,6 +96,7 @@ DEFAULT_TRITON_LAUNCH_MODE, DEFAULT_TRITON_METRICS_URL, DEFAULT_TRITON_SERVER_PATH, + DEFAULT_USE_CONCURRENCY_FORMULA, ) from .config_enum import ConfigEnum from .config_field import ConfigField @@ -936,6 +937,16 @@ def _add_run_search_configs(self): description="Maximum percentage of the search space to profile when using Optuna", ) ) + self._add_config( + ConfigField( + "use_concurrency_formula", + flags=["--use-concurrency-formula"], + field_type=ConfigPrimitive(bool), + parser_args={"action": "store_true"}, + default_value=DEFAULT_USE_CONCURRENCY_FORMULA, + description="Use the concurrency formula instead of searching the concurrency space in Optuna search mode", + ) + ) self._add_config( ConfigField( "run_config_search_mode", diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py index 7771acd91..da8cfae72 100755 --- a/model_analyzer/config/input/config_defaults.py +++ b/model_analyzer/config/input/config_defaults.py @@ -56,6 +56,7 @@ DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE = False DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE = 5 DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE = 10 +DEFAULT_USE_CONCURRENCY_FORMULA = False DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False DEFAULT_TRITON_LAUNCH_MODE = "local" DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.04-py3" diff --git a/tests/test_cli.py b/tests/test_cli.py index b4a1651cf..e3a4ea402 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -65,6 +65,7 @@ def get_test_options(): OptionStruct("bool", "profile","--skip-summary-reports"), OptionStruct("bool", "profile","--skip-detailed-reports"), OptionStruct("bool", "profile","--always-report-gpu-metrics"), + OptionStruct("bool", "profile","--use-concurrency-formula"), #Int/Float options # Options format: @@ -383,6 +384,7 @@ def _test_boolean_option(self, option_struct): cli = option_struct.cli_subcommand() _, config = cli.parse() option_value = config.get_config().get(option_with_underscores).value() + # Boolean values must always default to False self.assertEqual(option_value, False) # Test boolean option diff --git a/tests/test_optuna_run_config_generator.py b/tests/test_optuna_run_config_generator.py index 34670f3a2..0438b7bf7 100755 --- a/tests/test_optuna_run_config_generator.py +++ b/tests/test_optuna_run_config_generator.py @@ -101,6 +101,43 @@ def test_create_objective_based_run_config(self): self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"]) + # These values are the result of using a fixed seed of 100 + self.assertEqual(model_config.to_dict()["maxBatchSize"], 16) + self.assertEqual(model_config.to_dict()["instanceGroup"][0]["count"], 2) + self.assertEqual( + model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"], + "100", + ) + self.assertEqual(perf_config["batch-size"], DEFAULT_BATCH_SIZES) + self.assertEqual(perf_config["concurrency-range"], 16) + + def test_create_run_config_with_concurrency_formula(self): + config = self._create_config(["--use-concurrency-formula"]) + model = config.profile_models[0] + search_parameters = SearchParameters( + config=config, + parameters={}, + model_config_parameters=model.model_config_parameters(), + ) + + rcg = OptunaRunConfigGenerator( + config=config, + gpu_count=1, + models=self._mock_models, + model_variant_name_manager=ModelVariantNameManager(), + search_parameters={"add_sub": search_parameters}, + seed=100, + ) + + trial = rcg._study.ask() + trial_objectives = rcg._create_trial_objectives(trial) + run_config = rcg._create_objective_based_run_config(trial_objectives) + + model_config = run_config.model_run_configs()[0].model_config() + perf_config = run_config.model_run_configs()[0].perf_config() + + self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"]) + # These values are the result of using a fixed seed of 100 self.assertEqual(model_config.to_dict()["maxBatchSize"], 16) self.assertEqual(model_config.to_dict()["instanceGroup"][0]["count"], 2) diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py index f718119be..ad3f4ede7 100755 --- a/tests/test_search_parameters.py +++ b/tests/test_search_parameters.py @@ -248,6 +248,38 @@ def test_search_parameter_creation_default(self): default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range ) + def test_search_parameter_concurrency_formula(self): + """ + Test that when concurrency formula is specified it is + not added as a search parameter + """ + + args = [ + "model-analyzer", + "profile", + "--model-repository", + "cli-repository", + "-f", + "path-to-config-file", + "--run-config-search-mode", + "optuna", + "--use-concurrency-formula", + ] + + yaml_content = """ + profile_models: add_sub + """ + config = TestConfig()._evaluate_config(args=args, yaml_content=yaml_content) + + analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock()) + analyzer._populate_search_parameters() + + concurrency = analyzer._search_parameters["add_sub"].get_parameter( + "concurrency" + ) + + self.assertEqual(concurrency, None) + def test_search_parameter_creation_multi_model_non_default(self): """ Test that search parameters are correctly created in