From 86fc9cde1e7a56cb643da9318ace458b039a5ca9 Mon Sep 17 00:00:00 2001 From: Brian Raf Date: Mon, 3 Jun 2024 18:43:43 +0000 Subject: [PATCH] Adding support for client batch size --- .../generate/optuna_run_config_generator.py | 21 ++++++--- .../config/generate/search_parameters.py | 42 +++++++++++++---- tests/test_search_parameters.py | 46 ++++++++++++------- 3 files changed, 78 insertions(+), 31 deletions(-) diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index 53af27742..639d933be 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -65,6 +65,7 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface): # This list represents all possible parameters Optuna can currently search for optuna_parameter_list = [ "batch_sizes", + "max_batch_size", "instance_group", "concurrency", "max_queue_delay_microseconds", @@ -291,7 +292,7 @@ def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: concurrency_formula = ( 2 * int(trial_objectives["instance_group"]) - * int(trial_objectives["batch_sizes"]) + * int(trial_objectives["max_batch_size"]) ) concurrency = ( self._config.run_config_search_max_concurrency @@ -344,8 +345,8 @@ def _create_parameter_combo( } ] - if "batch_sizes" in trial_objectives: - param_combo["max_batch_size"] = trial_objectives["batch_sizes"] + if "max_batch_size" in trial_objectives: + param_combo["max_batch_size"] = trial_objectives["max_batch_size"] if "max_queue_delay_microseconds" in trial_objectives: param_combo["dynamic_batching"] = { @@ -435,8 +436,16 @@ def _create_model_run_config( model_config_variant: ModelConfigVariant, trial_objectives: TrialObjectives, ) -> ModelRunConfig: + trial_batch_size = ( + int(trial_objectives["batch_sizes"]) + if "batch_sizes" in trial_objectives + else DEFAULT_BATCH_SIZES + ) perf_analyzer_config = self._create_perf_analyzer_config( - model.model_name(), model, int(trial_objectives["concurrency"]) + model.model_name(), + model, + int(trial_objectives["concurrency"]), + trial_batch_size, ) model_run_config = ModelRunConfig( model.model_name(), model_config_variant, perf_analyzer_config @@ -449,14 +458,14 @@ def _create_perf_analyzer_config( model_name: str, model: ModelProfileSpec, concurrency: int, + batch_sizes: int, ) -> PerfAnalyzerConfig: perf_analyzer_config = PerfAnalyzerConfig() perf_analyzer_config.update_config_from_profile_config(model_name, self._config) - # TODO: TMA-1934 add support for user specifying a range of client batch sizes perf_config_params = { - "batch-size": DEFAULT_BATCH_SIZES, + "batch-size": batch_sizes, "concurrency-range": concurrency, } perf_analyzer_config.update_config(perf_config_params) diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 0ae0d4e2b..c7421d47f 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -30,11 +30,15 @@ class SearchParameters: # These map to the run-config-search fields # See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md - exponential_rcs_parameters = ["batch_sizes", "concurrency"] + exponential_rcs_parameters = ["max_batch_size", "batch_sizes", "concurrency"] linear_rcs_parameters = ["instance_group"] - model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"] - runtime_parameters = ["concurrency"] + model_parameters = [ + "max_batch_size", + "instance_group", + "max_queue_delay_microseconds", + ] + runtime_parameters = ["batch_sizes", "concurrency"] def __init__( self, @@ -120,6 +124,7 @@ def _populate_parameters(self) -> None: # TODO: Populate request rate - TMA-1903 def _populate_model_config_parameters(self) -> None: + self._populate_max_batch_size() self._populate_instance_group() self._populate_max_queue_delay_microseconds() @@ -130,12 +135,6 @@ def _populate_batch_sizes(self) -> None: parameter_list=self._parameters["batch_sizes"], parameter_category=ParameterCategory.INT_LIST, ) - else: - self._populate_rcs_parameter( - parameter_name="batch_sizes", - rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, - rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, - ) def _populate_concurrency(self) -> None: if self._parameters and self._parameters["concurrency"]: @@ -153,6 +152,31 @@ def _populate_concurrency(self) -> None: rcs_parameter_max_value=self._config.run_config_search_max_concurrency, ) + def _populate_max_batch_size(self) -> None: + # Example config format: + # model_config_parameters: + # max_batch_size: [1, 4, 16] + if not self._model_config_parameters: + self._populate_rcs_parameter( + parameter_name="max_batch_size", + rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, + rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, + ) + elif "max_batch_size" in self._model_config_parameters.keys(): + parameter_list = self._model_config_parameters["max_batch_size"] + + self._populate_list_parameter( + parameter_name="max_batch_size", + parameter_list=parameter_list, + parameter_category=ParameterCategory.INT_LIST, + ) + else: + self._populate_rcs_parameter( + parameter_name="max_batch_size", + rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size, + rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size, + ) + def _populate_instance_group(self) -> None: # Example config format: # diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py index ad3f4ede7..2c7b73cb0 100755 --- a/tests/test_search_parameters.py +++ b/tests/test_search_parameters.py @@ -209,17 +209,19 @@ def test_search_parameter_creation_default(self): analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock()) analyzer._populate_search_parameters() - # batch_sizes - batch_sizes = analyzer._search_parameters["add_sub"].get_parameter( - "batch_sizes" + # max_batch_size + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, ) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, ) # concurrency @@ -304,6 +306,7 @@ def test_search_parameter_creation_multi_model_non_default(self): parameters: batch_sizes: [16, 32, 64] model_config_parameters: + max_batch_size: [1, 2, 4, 8] dynamic_batching: max_queue_delay_microseconds: [100, 200, 300] instance_group: @@ -323,12 +326,21 @@ def test_search_parameter_creation_multi_model_non_default(self): # ADD_SUB # =================================================================== - # batch_sizes + # max batch size + # =================================================================== + max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + "max_batch_size" + ) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.INT_LIST, max_batch_size.category) + self.assertEqual([1, 2, 4, 8], max_batch_size.enumerated_list) + + # batch sizes # =================================================================== batch_sizes = analyzer._search_parameters["add_sub"].get_parameter( "batch_sizes" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) + self.assertEqual(ParameterUsage.RUNTIME, batch_sizes.usage) self.assertEqual(ParameterCategory.INT_LIST, batch_sizes.category) self.assertEqual([16, 32, 64], batch_sizes.enumerated_list) @@ -366,18 +378,20 @@ def test_search_parameter_creation_multi_model_non_default(self): # MULT_DIV # =================================================================== - # batch_sizes + # max batch size # =================================================================== - batch_sizes = analyzer._search_parameters["mult_div"].get_parameter( - "batch_sizes" + max_batch_size = analyzer._search_parameters["mult_div"].get_parameter( + "max_batch_size" ) - self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category) + self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range + log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), + max_batch_size.min_range, ) self.assertEqual( - log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range + log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), + max_batch_size.max_range, ) # concurrency