diff --git a/model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py b/model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py index 167aeee12..85c8cdb68 100755 --- a/model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py @@ -16,7 +16,7 @@ import logging from copy import deepcopy -from typing import Generator, List, Optional +from typing import Dict, Generator, List, Optional from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec from model_analyzer.config.generate.model_variant_name_manager import ( @@ -52,7 +52,7 @@ def __init__( models: List[ModelProfileSpec], result_manager: ResultManager, model_variant_name_manager: ModelVariantNameManager, - search_parameters: SearchParameters, + search_parameters: Dict[str, SearchParameters], ): """ Parameters diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py index d047ce758..9120af8c4 100755 --- a/model_analyzer/config/generate/optuna_run_config_generator.py +++ b/model_analyzer/config/generate/optuna_run_config_generator.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Any, Dict, Generator, List, Optional, Union +from typing import Any, Dict, Generator, List, Optional, TypeAlias, Union import optuna @@ -29,9 +29,16 @@ from model_analyzer.config.generate.model_variant_name_manager import ( ModelVariantNameManager, ) +from model_analyzer.config.generate.search_parameter import ( + ParameterCategory, + SearchParameter, +) from model_analyzer.config.generate.search_parameters import SearchParameters from model_analyzer.config.input.config_command_profile import ConfigCommandProfile -from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES +from model_analyzer.config.input.config_defaults import ( + DEFAULT_BATCH_SIZES, + DEFAULT_RUN_CONFIG_MAX_CONCURRENCY, +) from model_analyzer.config.run.model_run_config import ModelRunConfig from model_analyzer.config.run.run_config import RunConfig from model_analyzer.constants import LOGGER_NAME @@ -44,20 +51,32 @@ logger = logging.getLogger(LOGGER_NAME) +TrialObjective: TypeAlias = Union[str | int] +TrialObjectives: TypeAlias = Dict[str, TrialObjective] +ParameterCombo: TypeAlias = Dict[str, Any] + class OptunaRunConfigGenerator(ConfigGeneratorInterface): """ Use Optuna algorithm to create RunConfigs """ + # This list represents all possible parameters Optuna can currently search for + optuna_parameter_list = [ + "batch_sizes", + "instance_group", + "concurrency", + "max_queue_delay_microseconds", + ] + def __init__( self, config: ConfigCommandProfile, gpu_count: int, models: List[ModelProfileSpec], model_variant_name_manager: ModelVariantNameManager, - search_parameters: SearchParameters, - seed: Optional[int] = 0, + search_parameters: Dict[str, SearchParameters], + seed: Optional[int] = None, ): """ Parameters @@ -74,7 +93,8 @@ def __init__( self._config = config self._gpu_count = gpu_count self._models = models - self._search_parameters = search_parameters + # TODO: TMA-1927: Add support for multi-model + self._search_parameters = search_parameters[models[0].model_name()] self._model_variant_name_manager = model_variant_name_manager @@ -126,24 +146,65 @@ def get_configs(self) -> Generator[RunConfig, None, None]: # TODO: TMA-1885: Need an early exit strategy for _ in range(n_trials): trial = self._study.ask() - self._create_trial_objectives(trial) - run_config = self._create_objective_based_run_config() + trial_objectives = self._create_trial_objectives(trial) + run_config = self._create_objective_based_run_config(trial_objectives) yield run_config score = self._calculate_score() self._study.tell(trial, score) - def _create_trial_objectives(self, trial) -> None: - # TODO: TMA-1925: Use SearchParameters here - self._instance_count = trial.suggest_int("instance_count", 1, 8) - self._batch_size = int(2 ** trial.suggest_int("batch_size", 1, 10)) + def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives: + trial_objectives: TrialObjectives = {} + for parameter_name in OptunaRunConfigGenerator.optuna_parameter_list: + parameter = self._search_parameters.get_parameter(parameter_name) + + if parameter: + trial_objectives[parameter_name] = self._create_trial_objective( + trial, parameter_name, parameter + ) # TODO: TMA-1884: Need an option to choose btw. concurrency formula and optuna searching - self._concurrency = 2 * self._instance_count * self._batch_size - if self._concurrency > 1024: - self._concurrency = 1024 + trial_objectives["concurrency"] = self._get_objective_concurrency( + trial_objectives + ) - def _create_objective_based_run_config(self) -> RunConfig: - param_combo = self._create_parameter_combo() + return trial_objectives + + def _create_trial_objective( + self, trial: optuna.Trial, name: str, parameter: SearchParameter + ) -> TrialObjective: + if parameter.category is ParameterCategory.INTEGER: + objective = trial.suggest_int( + name, parameter.min_range, parameter.max_range + ) + elif parameter.category is ParameterCategory.EXPONENTIAL: + objective = int( + 2 ** trial.suggest_int(name, parameter.min_range, parameter.max_range) + ) + elif parameter.category is ParameterCategory.INT_LIST: + objective = int(trial.suggest_categorical(name, parameter.enumerated_list)) + elif parameter.category is ParameterCategory.STR_LIST: + objective = trial.suggest_categorical(name, parameter.enumerated_list) + + return objective + + def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int: + concurrency = ( + 2 + * int(trial_objectives["instance_group"]) + * int(trial_objectives["batch_sizes"]) + ) + concurrency = ( + DEFAULT_RUN_CONFIG_MAX_CONCURRENCY + if concurrency > DEFAULT_RUN_CONFIG_MAX_CONCURRENCY + else concurrency + ) + + return concurrency + + def _create_objective_based_run_config( + self, trial_objectives: TrialObjectives + ) -> RunConfig: + param_combo = self._create_parameter_combo(trial_objectives) # TODO: TMA-1927: Add support for multi-model run_config = RunConfig(self._triton_env) @@ -159,27 +220,39 @@ def _create_objective_based_run_config(self) -> RunConfig: model_run_config = self._create_model_run_config( model=self._models[0], model_config_variant=model_config_variant, + trial_objectives=trial_objectives, ) run_config.add_model_run_config(model_run_config=model_run_config) return run_config - def _create_parameter_combo(self) -> Dict[str, Any]: - # TODO: TMA-1925: Use SearchParameters here - param_combo: Dict["str", Any] = {} - param_combo["dynamic_batching"] = {} + def _create_parameter_combo( + self, trial_objectives: TrialObjectives + ) -> ParameterCombo: + param_combo: ParameterCombo = {} + # TODO: TMA-1938: Need to look at model in ParameterSearch and add this as a parameter + param_combo["dynamic_batching"] = [] # TODO: TMA-1927: Add support for multi-model - kind = "KIND_CPU" if self._models[0].cpu_only() else "KIND_GPU" - param_combo["instance_group"] = [ - { - "count": self._instance_count, - "kind": kind, + if trial_objectives["instance_group"]: + kind = "KIND_CPU" if self._models[0].cpu_only() else "KIND_GPU" + param_combo["instance_group"] = [ + { + "count": trial_objectives["instance_group"], + "kind": kind, + } + ] + + if trial_objectives["batch_sizes"]: + param_combo["max_batch_size"] = trial_objectives["batch_sizes"] + + if trial_objectives["max_queue_delay_microseconds"]: + param_combo["dynamic_batching"] = { + "max_queue_delay_microseconds": trial_objectives[ + "max_queue_delay_microseconds" + ] } - ] - - param_combo["max_batch_size"] = self._batch_size return param_combo @@ -261,9 +334,10 @@ def _create_model_run_config( self, model: ModelProfileSpec, model_config_variant: ModelConfigVariant, + trial_objectives: TrialObjectives, ) -> ModelRunConfig: perf_analyzer_config = self._create_perf_analyzer_config( - model.model_name(), model, self._concurrency + model.model_name(), model, int(trial_objectives["concurrency"]) ) model_run_config = ModelRunConfig( model.model_name(), model_config_variant, perf_analyzer_config diff --git a/model_analyzer/config/generate/run_config_generator_factory.py b/model_analyzer/config/generate/run_config_generator_factory.py index 88579ab67..74adf1fe0 100755 --- a/model_analyzer/config/generate/run_config_generator_factory.py +++ b/model_analyzer/config/generate/run_config_generator_factory.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import Dict, List from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec from model_analyzer.config.generate.model_variant_name_manager import ( @@ -60,7 +60,7 @@ def create_run_config_generator( client: TritonClient, result_manager: ResultManager, model_variant_name_manager: ModelVariantNameManager, - search_parameters: SearchParameters, + search_parameters: Dict[str, SearchParameters], ) -> ConfigGeneratorInterface: """ Parameters @@ -149,7 +149,7 @@ def _create_optuna_plus_concurrency_sweep_run_config_generator( models: List[ModelProfileSpec], result_manager: ResultManager, model_variant_name_manager: ModelVariantNameManager, - search_parameters: SearchParameters, + search_parameters: Dict[str, SearchParameters], ) -> ConfigGeneratorInterface: return OptunaPlusConcurrencySweepRunConfigGenerator( config=command_config, diff --git a/model_analyzer/config/generate/search_parameter.py b/model_analyzer/config/generate/search_parameter.py index 0c342cc56..7dfba1447 100755 --- a/model_analyzer/config/generate/search_parameter.py +++ b/model_analyzer/config/generate/search_parameter.py @@ -28,7 +28,8 @@ class ParameterUsage(Enum): class ParameterCategory(Enum): INTEGER = auto() EXPONENTIAL = auto() - LIST = auto() + STR_LIST = auto() + INT_LIST = auto() @dataclass @@ -40,7 +41,7 @@ class SearchParameter: usage: ParameterUsage category: ParameterCategory - # This is only applicable to LIST category + # This is only applicable to the LIST categories enumerated_list: Optional[List[Any]] = None # These are only applicable to INTEGER and EXPONENTIAL categories diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py index 015e9ac90..0ffa8ebed 100755 --- a/model_analyzer/config/generate/search_parameters.py +++ b/model_analyzer/config/generate/search_parameters.py @@ -49,11 +49,8 @@ def __init__( self._populate_search_parameters() - def get_parameters(self) -> List[SearchParameter]: - return [v for v in self._search_parameters.values()] - - def get_parameter(self, name: str) -> SearchParameter: - return self._search_parameters[name] + def get_parameter(self, name: str) -> Optional[SearchParameter]: + return self._search_parameters.get(name) def get_type(self, name: str) -> ParameterUsage: return self._search_parameters[name].usage @@ -73,6 +70,8 @@ def get_list(self, name: str) -> Optional[List[Any]]: def _populate_search_parameters(self) -> None: if self._parameters: self._populate_parameters() + else: + self._populate_default_parameters() self._populate_model_config_parameters() @@ -81,15 +80,21 @@ def _populate_parameters(self) -> None: self._populate_concurrency() # TODO: Populate request rate - TMA-1903 + def _populate_default_parameters(self) -> None: + # Always populate batch sizes if nothing is specified + # TODO: TMA-1884: Will need to add concurrency if the user wants this searched + self._populate_batch_sizes() + def _populate_model_config_parameters(self) -> None: self._populate_instance_group() self._populate_max_queue_delay_microseconds() def _populate_batch_sizes(self) -> None: - if self._parameters["batch_sizes"]: + if self._parameters and self._parameters["batch_sizes"]: self._populate_list_parameter( parameter_name="batch_sizes", parameter_list=self._parameters["batch_sizes"], + parameter_category=ParameterCategory.INT_LIST, ) else: self._populate_rcs_parameter( @@ -103,6 +108,7 @@ def _populate_concurrency(self) -> None: self._populate_list_parameter( parameter_name="concurrency", parameter_list=self._parameters["concurrency"], + parameter_category=ParameterCategory.INT_LIST, ) else: self._populate_rcs_parameter( @@ -135,6 +141,7 @@ def _populate_instance_group(self) -> None: self._populate_list_parameter( parameter_name="instance_group", parameter_list=parameter_list, + parameter_category=ParameterCategory.INT_LIST, ) else: self._populate_rcs_parameter( @@ -157,6 +164,7 @@ def _populate_max_queue_delay_microseconds(self) -> None: parameter_list=self._model_config_parameters["dynamic_batching"][0][ "max_queue_delay_microseconds" ], + parameter_category=ParameterCategory.INT_LIST, ) def _is_max_queue_delay_in_model_config_parameters(self) -> bool: @@ -176,14 +184,15 @@ def _is_max_queue_delay_in_model_config_parameters(self) -> bool: def _populate_list_parameter( self, parameter_name: str, - parameter_list: List[int], + parameter_list: List[int | str], + parameter_category: ParameterCategory, ) -> None: usage = self._determine_parameter_usage(parameter_name) self._add_search_parameter( name=parameter_name, usage=usage, - category=ParameterCategory.LIST, + category=parameter_category, enumerated_list=parameter_list, ) @@ -257,7 +266,10 @@ def _check_for_illegal_input( max_range: Optional[int], enumerated_list: List[Any], ) -> None: - if category is ParameterCategory.LIST: + if ( + category is ParameterCategory.INT_LIST + or category is ParameterCategory.STR_LIST + ): self._check_for_illegal_list_input(min_range, max_range, enumerated_list) else: if min_range is None or max_range is None: diff --git a/model_analyzer/model_manager.py b/model_analyzer/model_manager.py index 598965e00..9723f9ddb 100755 --- a/model_analyzer/model_manager.py +++ b/model_analyzer/model_manager.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import List, Optional +from typing import Dict, List, Optional from model_analyzer.config.generate.model_variant_name_manager import ( ModelVariantNameManager, @@ -60,7 +60,7 @@ def __init__( result_manager: ResultManager, state_manager: AnalyzerStateManager, constraint_manager: ConstraintManager, - search_parameters: SearchParameters, + search_parameters: Dict[str, SearchParameters], ): """ Parameters diff --git a/tests/test_optuna_run_config_generator.py b/tests/test_optuna_run_config_generator.py index 4609dfbde..34670f3a2 100755 --- a/tests/test_optuna_run_config_generator.py +++ b/tests/test_optuna_run_config_generator.py @@ -24,6 +24,7 @@ from model_analyzer.config.generate.optuna_run_config_generator import ( OptunaRunConfigGenerator, ) +from model_analyzer.config.generate.search_parameters import SearchParameters from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES from model_analyzer.config.input.objects.config_model_profile_spec import ( ConfigModelProfileSpec, @@ -55,12 +56,19 @@ def setUp(self): ] config = self._create_config() + model = config.profile_models[0] + search_parameters = SearchParameters( + config=config, + parameters={}, + model_config_parameters=model.model_config_parameters(), + ) + self._rcg = OptunaRunConfigGenerator( config=config, gpu_count=1, models=self._mock_models, model_variant_name_manager=ModelVariantNameManager(), - search_parameters=MagicMock(), + search_parameters={"add_sub": search_parameters}, seed=100, ) @@ -85,8 +93,8 @@ def test_create_objective_based_run_config(self): Test that an objective based run config is properly created """ trial = self._rcg._study.ask() - self._rcg._create_trial_objectives(trial) - run_config = self._rcg._create_objective_based_run_config() + trial_objectives = self._rcg._create_trial_objectives(trial) + run_config = self._rcg._create_objective_based_run_config(trial_objectives) model_config = run_config.model_run_configs()[0].model_config() perf_config = run_config.model_run_configs()[0].perf_config() @@ -94,10 +102,14 @@ def test_create_objective_based_run_config(self): self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"]) # These values are the result of using a fixed seed of 100 - self.assertEqual(model_config.to_dict()["maxBatchSize"], 8) - self.assertEqual(model_config.to_dict()["instanceGroup"][0]["count"], 5) + self.assertEqual(model_config.to_dict()["maxBatchSize"], 16) + self.assertEqual(model_config.to_dict()["instanceGroup"][0]["count"], 2) + self.assertEqual( + model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"], + "200", + ) self.assertEqual(perf_config["batch-size"], DEFAULT_BATCH_SIZES) - self.assertEqual(perf_config["concurrency-range"], 80) + self.assertEqual(perf_config["concurrency-range"], 64) def _create_config(self, additional_args=[]): args = [ @@ -115,7 +127,11 @@ def _create_config(self, additional_args=[]): # yapf: disable yaml_str = (""" profile_models: - - my-model + add_sub: + model_config_parameters: + dynamic_batching: + max_queue_delay_microseconds: [100, 200, 300] + """) # yapf: enable diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py index 64c154912..6cc1ea142 100755 --- a/tests/test_search_parameters.py +++ b/tests/test_search_parameters.py @@ -78,7 +78,7 @@ def setUp(self): self.search_parameters._add_search_parameter( name="size", usage=ParameterUsage.BUILD, - category=ParameterCategory.LIST, + category=ParameterCategory.STR_LIST, enumerated_list=["FP8", "FP16", "FP32"], ) @@ -123,7 +123,7 @@ def test_list_parameter(self): self.search_parameters.get_type("size"), ) self.assertEqual( - ParameterCategory.LIST, + ParameterCategory.STR_LIST, self.search_parameters.get_category("size"), ) self.assertEqual( @@ -163,14 +163,14 @@ def test_illegal_inputs(self): self.search_parameters._add_search_parameter( name="size", usage=ParameterUsage.BUILD, - category=ParameterCategory.LIST, + category=ParameterCategory.INT_LIST, ) with self.assertRaises(TritonModelAnalyzerException): self.search_parameters._add_search_parameter( name="size", usage=ParameterUsage.BUILD, - category=ParameterCategory.LIST, + category=ParameterCategory.STR_LIST, enumerated_list=["FP8", "FP16", "FP32"], min_range=0, ) @@ -179,7 +179,7 @@ def test_illegal_inputs(self): self.search_parameters._add_search_parameter( name="size", usage=ParameterUsage.BUILD, - category=ParameterCategory.LIST, + category=ParameterCategory.STR_LIST, enumerated_list=["FP8", "FP16", "FP32"], max_range=10, ) @@ -297,7 +297,7 @@ def test_search_parameter_creation_multi_model_non_default(self): "batch_sizes" ) self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage) - self.assertEqual(ParameterCategory.LIST, batch_sizes.category) + self.assertEqual(ParameterCategory.INT_LIST, batch_sizes.category) self.assertEqual([16, 32, 64], batch_sizes.enumerated_list) # concurrency @@ -320,14 +320,14 @@ def test_search_parameter_creation_multi_model_non_default(self): "instance_group" ) self.assertEqual(ParameterUsage.MODEL, instance_group.usage) - self.assertEqual(ParameterCategory.LIST, instance_group.category) + self.assertEqual(ParameterCategory.INT_LIST, instance_group.category) self.assertEqual([1, 2, 3, 4], instance_group.enumerated_list) instance_group = analyzer._search_parameters["add_sub"].get_parameter( "max_queue_delay_microseconds" ) self.assertEqual(ParameterUsage.MODEL, instance_group.usage) - self.assertEqual(ParameterCategory.LIST, instance_group.category) + self.assertEqual(ParameterCategory.INT_LIST, instance_group.category) self.assertEqual([100, 200, 300], instance_group.enumerated_list) # =================================================================== @@ -354,7 +354,7 @@ def test_search_parameter_creation_multi_model_non_default(self): "concurrency" ) self.assertEqual(ParameterUsage.RUNTIME, concurrency.usage) - self.assertEqual(ParameterCategory.LIST, concurrency.category) + self.assertEqual(ParameterCategory.INT_LIST, concurrency.category) self.assertEqual([1, 8, 64, 256], concurrency.enumerated_list) # instance_group