From af43ed114b37677d3c3933e6b7f10dc89c6f23b2 Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Wed, 5 Jun 2024 08:32:45 -0700 Subject: [PATCH] Adding support for client batch size (#892) * Adding support for client batch size * Fixes based on PR * Removing redundant keys() --- .../generate/optuna_run_config_generator.py | 21 +++++-- .../config/generate/search_parameters.py | 57 ++++++++++++------- tests/test_search_parameters.py | 46 +++++++++------ 3 files changed, 83 insertions(+), 41 deletions(-) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index f190cb027..e13596b8f 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -65,6 +65,7 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface): # This list represents all possible parameters Optuna can currently search for optuna_parameter_list = [ "batch_sizes", + "max_batch_size", "instance_group", "concurrency", "max_queue_delay_microseconds", @@ -364,7 +365,7 @@ def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: concurrency_formula = ( 2 * int(trial_objectives["instance_group"]) - * int(trial_objectives["batch_sizes"]) + * int(trial_objectives["max_batch_size"]) ) concurrency = ( self._config.run_config_search_max_concurrency @@ -418,8 +419,8 @@ def _create_parameter_combo( } ] - if "batch_sizes" in trial_objectives: - param_combo["max_batch_size"] = trial_objectives["batch_sizes"] + if "max_batch_size" in trial_objectives: + param_combo["max_batch_size"] = trial_objectives["max_batch_size"] if "max_queue_delay_microseconds" in trial_objectives: param_combo["dynamic_batching"] = { @@ -509,8 +510,16 @@ def _create_model_run_config( model_config_variant: ModelConfigVariant, trial_objectives: TrialObjectives, ) -> ModelRunConfig: + trial_batch_sizes = ( + int(trial_objectives["batch_sizes"]) + if "batch_sizes" in trial_objectives + else DEFAULT_BATCH_SIZES + ) perf_analyzer_config = self._create_perf_analyzer_config( - model.model_name(), model, int(trial_objectives["concurrency"]) + model_name=model.model_name(), + model=model, + concurrency=int(trial_objectives["concurrency"]), + batch_sizes=trial_batch_sizes, ) model_run_config = ModelRunConfig( model.model_name(), model_config_variant, perf_analyzer_config @@ -523,14 +532,14 @@ def _create_perf_analyzer_config( model_name: str, model: ModelProfileSpec, concurrency: int, + batch_sizes: int, ) -> PerfAnalyzerConfig: perf_analyzer_config = PerfAnalyzerConfig() perf_analyzer_config.update_config_from_profile_config(model_name, self._config) - # TODO: TMA-1934 add support for user specifying a range of client batch sizes perf_config_params = { - "batch-size": DEFAULT_BATCH_SIZES, + "batch-size": batch_sizes, "concurrency-range": concurrency, } perf_analyzer_config.update_config(perf_config_params) diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 0ae0d4e2b..58f25e44c 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -30,11 +30,15 @@ class SearchParameters: # These map to the run-config-search fields # See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md - exponential_rcs_parameters = ["batch_sizes", "concurrency"] + exponential_rcs_parameters = ["max_batch_size", "batch_sizes", "concurrency"] linear_rcs_parameters = ["instance_group"] - model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"] - runtime_parameters = ["concurrency"] + model_parameters = [ + "max_batch_size", + "instance_group", + "max_queue_delay_microseconds", + ] + runtime_parameters = ["batch_sizes", "concurrency"] def __init__( self, @@ -120,6 +124,7 @@ def _populate_parameters(self) -> None: # TODO: Populate request rate - TMA-1903 def _populate_model_config_parameters(self) -> None: + self._populate_max_batch_size() self._populate_instance_group() self._populate_max_queue_delay_microseconds() @@ -130,12 +135,6 @@ def _populate_batch_sizes(self) -> None: parameter_list=self._parameters["batch_sizes"], parameter_category=ParameterCategory.INT_LIST, ) - else: - self._populate_rcs_parameter( - parameter_name="batch_sizes", - rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, - rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, - ) def _populate_concurrency(self) -> None: if self._parameters and self._parameters["concurrency"]: @@ -153,6 +152,26 @@ def _populate_concurrency(self) -> None: rcs_parameter_max_value=self._config.run_config_search_max_concurrency, ) + def _populate_max_batch_size(self) -> None: + # Example config format: + # model_config_parameters: + # max_batch_size: [1, 4, 16] + if self._is_key_in_model_config_parameters("max_batch_size"): + parameter_list = self._model_config_parameters["max_batch_size"] + self._populate_list_parameter( + parameter_name="max_batch_size", + parameter_list=parameter_list, + parameter_category=ParameterCategory.INT_LIST, + ) + else: + # Need to populate max_batch_size based on RCS min/max values + # when no model config parameters are present + self._populate_rcs_parameter( + parameter_name="max_batch_size", + rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, + rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, + ) + def _populate_instance_group(self) -> None: # Example config format: # @@ -160,16 +179,7 @@ def _populate_instance_group(self) -> None: # instance_group: # - kind: KIND_GPU # count: [1, 2, 3, 4] - - # Need to populate instance_group based on RCS min/max values - # even if no model config parameters are present - if not self._model_config_parameters: - self._populate_rcs_parameter( - parameter_name="instance_group", - rcs_parameter_min_value=self._config.run_config_search_min_instance_count, - rcs_parameter_max_value=self._config.run_config_search_max_instance_count, - ) - elif "instance_group" in self._model_config_parameters.keys(): + if self._is_key_in_model_config_parameters("instance_group"): parameter_list = self._model_config_parameters["instance_group"][0][0][ "count" ] @@ -180,12 +190,21 @@ def _populate_instance_group(self) -> None: parameter_category=ParameterCategory.INT_LIST, ) else: + # Need to populate instance_group based on RCS min/max values + # when no model config parameters are present self._populate_rcs_parameter( parameter_name="instance_group", rcs_parameter_min_value=self._config.run_config_search_min_instance_count, rcs_parameter_max_value=self._config.run_config_search_max_instance_count, ) + def _is_key_in_model_config_parameters(self, key: str) -> bool: + key_found = bool( + self._model_config_parameters and key in self._model_config_parameters + ) + + return key_found + def _populate_max_queue_delay_microseconds(self) -> None: # Example format # diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py index ad3f4ede7..2c7b73cb0 100755 --- a/tests/test_search_parameters.py +++ b/tests/test_search_parameters.py @@ -209,17 +209,19 @@ def test_search_parameter_creation_default(self): analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock()) analyzer._populate_search_parameters() - # batch_sizes - batch_sizes = analyzer._search_parameters["add_sub"].get_parameter( - "batch_sizes" + # max_batch_size + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, ) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, ) # concurrency @@ -304,6 +306,7 @@ def test_search_parameter_creation_multi_model_non_default(self): parameters: batch_sizes: [16, 32, 64] model_config_parameters: + max_batch_size: [1, 2, 4, 8] dynamic_batching: max_queue_delay_microseconds: [100, 200, 300] instance_group: @@ -323,12 +326,21 @@ def test_search_parameter_creation_multi_model_non_default(self): # ADD_SUB # =================================================================== - # batch_sizes + # max batch size + # =================================================================== + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" + ) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.INT_LIST, max_batch_size.category) + self.assertEqual([1, 2, 4, 8], max_batch_size.enumerated_list) + + # batch sizes # =================================================================== batch_sizes = analyzer._search_parameters["add_sub"].get_parameter( "batch_sizes" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) + self.assertEqual(ParameterUsage.RUNTIME, batch_sizes.usage) self.assertEqual(ParameterCategory.INT_LIST, batch_sizes.category) self.assertEqual([16, 32, 64], batch_sizes.enumerated_list) @@ -366,18 +378,20 @@ def test_search_parameter_creation_multi_model_non_default(self): # MULT_DIV # =================================================================== - # batch_sizes + # max batch size # =================================================================== - batch_sizes = analyzer._search_parameters["mult_div"].get_parameter( - "batch_sizes" + max_batch_size = analyzer._search_parameters["mult_div"].get_parameter( + "max_batch_size" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, ) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, ) # concurrency