Adding support for concurrency formula as an option in optuna search

triton-inference-server · May 28, 2024 · c2fb047 · c2fb047
1 parent b572c56
commit c2fb047
Show file tree

Hide file tree

Showing 7 changed files with 91 additions and 15 deletions.
diff --git a/model_analyzer/config/generate/optuna_run_config_generator.py b/model_analyzer/config/generate/optuna_run_config_generator.py
@@ -169,10 +169,10 @@ def _create_trial_objectives(self, trial: optuna.Trial) -> TrialObjectives:
                     trial, parameter_name, parameter
                 )
 
-        # TODO: TMA-1884: Need an option to choose btw. concurrency formula and optuna searching
-        trial_objectives["concurrency"] = self._get_objective_concurrency(
-            trial_objectives
-        )
+        if self._config.use_concurrency_formula:
+            trial_objectives["concurrency"] = self._get_objective_concurrency(
+                trial_objectives
+            )
 
         return trial_objectives
 

diff --git a/model_analyzer/config/generate/search_parameters.py b/model_analyzer/config/generate/search_parameters.py
@@ -90,23 +90,14 @@ def _number_of_configurations_for_parameter(
         return number_of_parameter_configs
 
     def _populate_search_parameters(self) -> None:
-        if self._parameters:
-            self._populate_parameters()
-        else:
-            self._populate_default_parameters()
-
+        self._populate_parameters()
         self._populate_model_config_parameters()
 
     def _populate_parameters(self) -> None:
         self._populate_batch_sizes()
         self._populate_concurrency()
         # TODO: Populate request rate - TMA-1903
 
-    def _populate_default_parameters(self) -> None:
-        # Always populate batch sizes if nothing is specified
-        # TODO: TMA-1884: Will need to add concurrency if the user wants this searched
-        self._populate_batch_sizes()
-
     def _populate_model_config_parameters(self) -> None:
         self._populate_instance_group()
         self._populate_max_queue_delay_microseconds()
@@ -126,12 +117,14 @@ def _populate_batch_sizes(self) -> None:
             )
 
     def _populate_concurrency(self) -> None:
-        if self._parameters["concurrency"]:
+        if self._parameters and self._parameters["concurrency"]:
             self._populate_list_parameter(
                 parameter_name="concurrency",
                 parameter_list=self._parameters["concurrency"],
                 parameter_category=ParameterCategory.INT_LIST,
             )
+        elif self._config.use_concurrency_formula:
+            return
         else:
             self._populate_rcs_parameter(
                 parameter_name="concurrency",

diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -96,6 +96,7 @@
     DEFAULT_TRITON_LAUNCH_MODE,
     DEFAULT_TRITON_METRICS_URL,
     DEFAULT_TRITON_SERVER_PATH,
+    DEFAULT_USE_CONCURRENCY_FORMULA,
 )
 from .config_enum import ConfigEnum
 from .config_field import ConfigField
@@ -936,6 +937,16 @@ def _add_run_search_configs(self):
                 description="Maximum percentage of the search space to profile when using Optuna",
             )
         )
+        self._add_config(
+            ConfigField(
+                "use_concurrency_formula",
+                flags=["--use-concurrency-formula"],
+                field_type=ConfigPrimitive(bool),
+                parser_args={"action": "store_true"},
+                default_value=DEFAULT_USE_CONCURRENCY_FORMULA,
+                description="Use the concurrency formula instead of searching the concurrency space in Optuna search mode",
+            )
+        )
         self._add_config(
             ConfigField(
                 "run_config_search_mode",

diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py
@@ -56,6 +56,7 @@
 DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE = False
 DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE = 5
 DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE = 10
+DEFAULT_USE_CONCURRENCY_FORMULA = False
 DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
 DEFAULT_TRITON_LAUNCH_MODE = "local"
 DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.04-py3"

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -65,6 +65,7 @@ def get_test_options():
         OptionStruct("bool", "profile","--skip-summary-reports"),
         OptionStruct("bool", "profile","--skip-detailed-reports"),
         OptionStruct("bool", "profile","--always-report-gpu-metrics"),
+        OptionStruct("bool", "profile","--use-concurrency-formula"),
 
         #Int/Float options
         # Options format:
@@ -383,6 +384,7 @@ def _test_boolean_option(self, option_struct):
         cli = option_struct.cli_subcommand()
         _, config = cli.parse()
         option_value = config.get_config().get(option_with_underscores).value()
+        # Boolean values must always default to False
         self.assertEqual(option_value, False)
 
         # Test boolean option

diff --git a/tests/test_optuna_run_config_generator.py b/tests/test_optuna_run_config_generator.py
@@ -101,6 +101,43 @@ def test_create_objective_based_run_config(self):
 
         self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"])
 
+        # These values are the result of using a fixed seed of 100
+        self.assertEqual(model_config.to_dict()["maxBatchSize"], 16)
+        self.assertEqual(model_config.to_dict()["instanceGroup"][0]["count"], 2)
+        self.assertEqual(
+            model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"],
+            "100",
+        )
+        self.assertEqual(perf_config["batch-size"], DEFAULT_BATCH_SIZES)
+        self.assertEqual(perf_config["concurrency-range"], 16)
+
+    def test_create_run_config_with_concurrency_formula(self):
+        config = self._create_config(["--use-concurrency-formula"])
+        model = config.profile_models[0]
+        search_parameters = SearchParameters(
+            config=config,
+            parameters={},
+            model_config_parameters=model.model_config_parameters(),
+        )
+
+        rcg = OptunaRunConfigGenerator(
+            config=config,
+            gpu_count=1,
+            models=self._mock_models,
+            model_variant_name_manager=ModelVariantNameManager(),
+            search_parameters={"add_sub": search_parameters},
+            seed=100,
+        )
+
+        trial = rcg._study.ask()
+        trial_objectives = rcg._create_trial_objectives(trial)
+        run_config = rcg._create_objective_based_run_config(trial_objectives)
+
+        model_config = run_config.model_run_configs()[0].model_config()
+        perf_config = run_config.model_run_configs()[0].perf_config()
+
+        self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"])
+
         # These values are the result of using a fixed seed of 100
         self.assertEqual(model_config.to_dict()["maxBatchSize"], 16)
         self.assertEqual(model_config.to_dict()["instanceGroup"][0]["count"], 2)

diff --git a/tests/test_search_parameters.py b/tests/test_search_parameters.py
@@ -248,6 +248,38 @@ def test_search_parameter_creation_default(self):
             default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range
         )
 
+    def test_search_parameter_concurrency_formula(self):
+        """
+        Test that when concurrency formula is specified it is
+        not added as a search parameter
+        """
+
+        args = [
+            "model-analyzer",
+            "profile",
+            "--model-repository",
+            "cli-repository",
+            "-f",
+            "path-to-config-file",
+            "--run-config-search-mode",
+            "optuna",
+            "--use-concurrency-formula",
+        ]
+
+        yaml_content = """
+        profile_models: add_sub
+        """
+        config = TestConfig()._evaluate_config(args=args, yaml_content=yaml_content)
+
+        analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock())
+        analyzer._populate_search_parameters()
+
+        concurrency = analyzer._search_parameters["add_sub"].get_parameter(
+            "concurrency"
+        )
+
+        self.assertEqual(concurrency, None)
+
     def test_search_parameter_creation_multi_model_non_default(self):
         """
         Test that search parameters are correctly created in