From 86fc9cde1e7a56cb643da9318ace458b039a5ca9 Mon Sep 17 00:00:00 2001 From: Brian Raf Date: Mon, 3 Jun 2024 18:43:43 +0000 Subject: [PATCH 1/3] Adding support for client batch size --- .../generate/optuna_run_config_generator.py | 21 ++++++--- .../config/generate/search_parameters.py | 42 +++++++++++++---- tests/test_search_parameters.py | 46 ++++++++++++------- 3 files changed, 78 insertions(+), 31 deletions(-) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index 53af27742..639d933be 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -65,6 +65,7 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface): # This list represents all possible parameters Optuna can currently search for optuna_parameter_list = [ "batch_sizes", + "max_batch_size", "instance_group", "concurrency", "max_queue_delay_microseconds", @@ -291,7 +292,7 @@ def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: concurrency_formula = ( 2 * int(trial_objectives["instance_group"]) - * int(trial_objectives["batch_sizes"]) + * int(trial_objectives["max_batch_size"]) ) concurrency = ( self._config.run_config_search_max_concurrency @@ -344,8 +345,8 @@ def _create_parameter_combo( } ] - if "batch_sizes" in trial_objectives: - param_combo["max_batch_size"] = trial_objectives["batch_sizes"] + if "max_batch_size" in trial_objectives: + param_combo["max_batch_size"] = trial_objectives["max_batch_size"] if "max_queue_delay_microseconds" in trial_objectives: param_combo["dynamic_batching"] = { @@ -435,8 +436,16 @@ def _create_model_run_config( model_config_variant: ModelConfigVariant, trial_objectives: TrialObjectives, ) -> ModelRunConfig: + trial_batch_size = ( + int(trial_objectives["batch_sizes"]) + if "batch_sizes" in trial_objectives + else DEFAULT_BATCH_SIZES + ) perf_analyzer_config = self._create_perf_analyzer_config( - model.model_name(), model, int(trial_objectives["concurrency"]) + model.model_name(), + model, + int(trial_objectives["concurrency"]), + trial_batch_size, ) model_run_config = ModelRunConfig( model.model_name(), model_config_variant, perf_analyzer_config @@ -449,14 +458,14 @@ def _create_perf_analyzer_config( model_name: str, model: ModelProfileSpec, concurrency: int, + batch_sizes: int, ) -> PerfAnalyzerConfig: perf_analyzer_config = PerfAnalyzerConfig() perf_analyzer_config.update_config_from_profile_config(model_name, self._config) - # TODO: TMA-1934 add support for user specifying a range of client batch sizes perf_config_params = { - "batch-size": DEFAULT_BATCH_SIZES, + "batch-size": batch_sizes, "concurrency-range": concurrency, } perf_analyzer_config.update_config(perf_config_params) diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 0ae0d4e2b..c7421d47f 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -30,11 +30,15 @@ class SearchParameters: # These map to the run-config-search fields # See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md - exponential_rcs_parameters = ["batch_sizes", "concurrency"] + exponential_rcs_parameters = ["max_batch_size", "batch_sizes", "concurrency"] linear_rcs_parameters = ["instance_group"] - model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"] - runtime_parameters = ["concurrency"] + model_parameters = [ + "max_batch_size", + "instance_group", + "max_queue_delay_microseconds", + ] + runtime_parameters = ["batch_sizes", "concurrency"] def __init__( self, @@ -120,6 +124,7 @@ def _populate_parameters(self) -> None: # TODO: Populate request rate - TMA-1903 def _populate_model_config_parameters(self) -> None: + self._populate_max_batch_size() self._populate_instance_group() self._populate_max_queue_delay_microseconds() @@ -130,12 +135,6 @@ def _populate_batch_sizes(self) -> None: parameter_list=self._parameters["batch_sizes"], parameter_category=ParameterCategory.INT_LIST, ) - else: - self._populate_rcs_parameter( - parameter_name="batch_sizes", - rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, - rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, - ) def _populate_concurrency(self) -> None: if self._parameters and self._parameters["concurrency"]: @@ -153,6 +152,31 @@ def _populate_concurrency(self) -> None: rcs_parameter_max_value=self._config.run_config_search_max_concurrency, ) + def _populate_max_batch_size(self) -> None: + # Example config format: + # model_config_parameters: + # max_batch_size: [1, 4, 16] + if not self._model_config_parameters: + self._populate_rcs_parameter( + parameter_name="max_batch_size", + rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, + rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, + ) + elif "max_batch_size" in self._model_config_parameters.keys(): + parameter_list = self._model_config_parameters["max_batch_size"] + + self._populate_list_parameter( + parameter_name="max_batch_size", + parameter_list=parameter_list, + parameter_category=ParameterCategory.INT_LIST, + ) + else: + self._populate_rcs_parameter( + parameter_name="max_batch_size", + rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, + rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, + ) + def _populate_instance_group(self) -> None: # Example config format: # diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py index ad3f4ede7..2c7b73cb0 100755 --- a/tests/test_search_parameters.py +++ b/tests/test_search_parameters.py @@ -209,17 +209,19 @@ def test_search_parameter_creation_default(self): analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock()) analyzer._populate_search_parameters() - # batch_sizes - batch_sizes = analyzer._search_parameters["add_sub"].get_parameter( - "batch_sizes" + # max_batch_size + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, ) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, ) # concurrency @@ -304,6 +306,7 @@ def test_search_parameter_creation_multi_model_non_default(self): parameters: batch_sizes: [16, 32, 64] model_config_parameters: + max_batch_size: [1, 2, 4, 8] dynamic_batching: max_queue_delay_microseconds: [100, 200, 300] instance_group: @@ -323,12 +326,21 @@ def test_search_parameter_creation_multi_model_non_default(self): # ADD_SUB # =================================================================== - # batch_sizes + # max batch size + # =================================================================== + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" + ) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.INT_LIST, max_batch_size.category) + self.assertEqual([1, 2, 4, 8], max_batch_size.enumerated_list) + + # batch sizes # =================================================================== batch_sizes = analyzer._search_parameters["add_sub"].get_parameter( "batch_sizes" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) + self.assertEqual(ParameterUsage.RUNTIME, batch_sizes.usage) self.assertEqual(ParameterCategory.INT_LIST, batch_sizes.category) self.assertEqual([16, 32, 64], batch_sizes.enumerated_list) @@ -366,18 +378,20 @@ def test_search_parameter_creation_multi_model_non_default(self): # MULT_DIV # =================================================================== - # batch_sizes + # max batch size # =================================================================== - batch_sizes = analyzer._search_parameters["mult_div"].get_parameter( - "batch_sizes" + max_batch_size = analyzer._search_parameters["mult_div"].get_parameter( + "max_batch_size" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, ) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, ) # concurrency From 870f850d0ecdbfb37f19dd66a891b0fd7a0240ef Mon Sep 17 00:00:00 2001 From: Brian Raf Date: Wed, 5 Jun 2024 14:24:23 +0000 Subject: [PATCH 2/3] Fixes based on PR --- .../generate/optuna_run_config_generator.py | 10 +++--- .../config/generate/search_parameters.py | 32 ++++++++----------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index 639d933be..056d575b6 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -436,16 +436,16 @@ def _create_model_run_config( model_config_variant: ModelConfigVariant, trial_objectives: TrialObjectives, ) -> ModelRunConfig: - trial_batch_size = ( + trial_batch_sizes = ( int(trial_objectives["batch_sizes"]) if "batch_sizes" in trial_objectives else DEFAULT_BATCH_SIZES ) perf_analyzer_config = self._create_perf_analyzer_config( - model.model_name(), - model, - int(trial_objectives["concurrency"]), - trial_batch_size, + model_name=model.model_name(), + model=model, + concurrency=int(trial_objectives["concurrency"]), + batch_sizes=trial_batch_sizes, ) model_run_config = ModelRunConfig( model.model_name(), model_config_variant, perf_analyzer_config diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index c7421d47f..1665f6d33 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -156,21 +156,16 @@ def _populate_max_batch_size(self) -> None: # Example config format: # model_config_parameters: # max_batch_size: [1, 4, 16] - if not self._model_config_parameters: - self._populate_rcs_parameter( - parameter_name="max_batch_size", - rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, - rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, - ) - elif "max_batch_size" in self._model_config_parameters.keys(): + if self._is_key_in_model_config_parameters("max_batch_size"): parameter_list = self._model_config_parameters["max_batch_size"] - self._populate_list_parameter( parameter_name="max_batch_size", parameter_list=parameter_list, parameter_category=ParameterCategory.INT_LIST, ) else: + # Need to populate max_batch_size based on RCS min/max values + # when no model config parameters are present self._populate_rcs_parameter( parameter_name="max_batch_size", rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, @@ -184,16 +179,7 @@ def _populate_instance_group(self) -> None: # instance_group: # - kind: KIND_GPU # count: [1, 2, 3, 4] - - # Need to populate instance_group based on RCS min/max values - # even if no model config parameters are present - if not self._model_config_parameters: - self._populate_rcs_parameter( - parameter_name="instance_group", - rcs_parameter_min_value=self._config.run_config_search_min_instance_count, - rcs_parameter_max_value=self._config.run_config_search_max_instance_count, - ) - elif "instance_group" in self._model_config_parameters.keys(): + if self._is_key_in_model_config_parameters("instance_group"): parameter_list = self._model_config_parameters["instance_group"][0][0][ "count" ] @@ -204,12 +190,22 @@ def _populate_instance_group(self) -> None: parameter_category=ParameterCategory.INT_LIST, ) else: + # Need to populate instance_group based on RCS min/max values + # when no model config parameters are present self._populate_rcs_parameter( parameter_name="instance_group", rcs_parameter_min_value=self._config.run_config_search_min_instance_count, rcs_parameter_max_value=self._config.run_config_search_max_instance_count, ) + def _is_key_in_model_config_parameters(self, key: str) -> bool: + key_found = bool( + self._model_config_parameters + and key in self._model_config_parameters.keys() + ) + + return key_found + def _populate_max_queue_delay_microseconds(self) -> None: # Example format # From 334b3a718ff9f3626c496e9c78dda56753ea7db0 Mon Sep 17 00:00:00 2001 From: Brian Raf Date: Wed, 5 Jun 2024 15:19:55 +0000 Subject: [PATCH 3/3] Removing redundant keys() --- model_analyzer/config/generate/search_parameters.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 1665f6d33..58f25e44c 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -200,8 +200,7 @@ def _populate_instance_group(self) -> None: def _is_key_in_model_config_parameters(self, key: str) -> bool: key_found = bool( - self._model_config_parameters - and key in self._model_config_parameters.keys() + self._model_config_parameters and key in self._model_config_parameters ) return key_found