triton-inference-server · nv-braf · Oct 18, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py
@@ -17,16 +17,16 @@
 import json
 import logging
 from itertools import repeat
-from typing import Dict, Generator, List, Optional, Tuple
+from typing import Any, Dict, Generator, List, Optional, Tuple
 
 from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
 from model_analyzer.config.input.config_defaults import (
     DEFAULT_INPUT_JSON_PATH,
     DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
     DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
-    DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
     DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
     DEFAULT_RUN_CONFIG_MIN_TEXT_INPUT_LENGTH,
+    DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
 )
 from model_analyzer.constants import (
     LOGGER_NAME,
@@ -214,9 +214,10 @@ def _create_input_dict(self, model_perf_analyzer_flags: Dict) -> Dict:
         else:
             return {}
 
-    def _create_inference_load_list(self) -> List[int]:
-        # The two possible inference loads are request rate or concurrency
-        # Concurrency is the default and will be used unless the user specifies
+    def _create_inference_load_list(self) -> List[Any]:
+        # The three possible inference loads are request rate, concurrency or periodic concurrency
+        # For LLM models periodic concurrency is used for non-LLM models
+        # concurrency is the default and will be used unless the user specifies
         # request rate, either as a model parameter or a config option
         if self._cli_config.is_llm_model():
             return self._create_periodic_concurrency_list()
@@ -247,16 +248,50 @@ def _create_concurrency_list(self) -> List[int]:
                 self._cli_config.run_config_search_max_concurrency,
             )
 
-    def _create_periodic_concurrency_list(self) -> List[int]:
+    def _create_periodic_concurrency_list(self) -> List[str]:
         if self._model_parameters["periodic_concurrency"]:
             return sorted(self._model_parameters["periodic_concurrency"])
         elif self._cli_config.run_config_search_disable:
-            return [DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY]
+            return [DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY]
+
+        periodic_concurrencies = self._generate_periodic_concurrencies()
+        return periodic_concurrencies
+
+    def _generate_periodic_concurrencies(self) -> List[str]:
+        periodic_concurrencies = []
+
+        periodic_concurrency_doubled_list = utils.generate_doubled_list(
+            self._cli_config.run_config_search_min_periodic_concurrency,
+            self._cli_config.run_config_search_max_periodic_concurrency,
+        )
+
+        step_doubled_list = utils.generate_doubled_list(
+            self._cli_config.run_config_search_min_periodic_concurrency_step,
+            self._cli_config.run_config_search_max_periodic_concurrency_step,
+        )
+
+        for start in periodic_concurrency_doubled_list:
+            for end in periodic_concurrency_doubled_list:
+                for step in step_doubled_list:
+                    if self._is_illegal_periodic_concurrency_combination(
+                        start, end, step
+                    ):
+                        continue
+
+                    periodic_concurrencies.append(f"{start}:{end}:{step}")
+        return periodic_concurrencies
+
+    def _is_illegal_periodic_concurrency_combination(
+        self, start: int, end: int, step: int
+    ) -> bool:
+        if start > end:
+            return True
+        elif start == end and step != 1:
+            return True
+        elif (end - start) % step:
+            return True
         else:
-            return utils.generate_doubled_list(
-                self._cli_config.run_config_search_min_periodic_concurrency,
-                self._cli_config.run_config_search_max_periodic_concurrency,
-            )
+            return False
 
     def _create_text_input_length_list(self) -> List[int]:
         if not self._cli_config.is_llm_model():

diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -498,7 +498,7 @@ def _add_profile_models_configs(self):
                             schema={
                                 "batch_sizes": ConfigListNumeric(type_=int),
                                 "concurrency": ConfigListNumeric(type_=int),
-                                "periodic_concurrency": ConfigListNumeric(type_=int),
+                                "periodic_concurrency": ConfigListString(),
                                 "request_rate": ConfigListNumeric(type_=int),
                                 "request_period": ConfigListNumeric(type_=int),
                                 "text_input_length": ConfigListNumeric(type_=int),
@@ -569,9 +569,8 @@ def _add_profile_models_configs(self):
             ConfigField(
                 "periodic_concurrency",
                 flags=["--periodic-concurrency"],
-                field_type=ConfigListNumeric(int),
-                description="Comma-delimited list of periodic concurrency values or ranges <start:end:step>"
-                " to be used during profiling",
+                field_type=ConfigListString(),
+                description="A list of ranges <start:end:step> to be used during profiling",
             )
         )
         self._add_config(

diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py
@@ -45,6 +45,7 @@
 DEFAULT_CLIENT_PROTOCOL = "grpc"
 DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024
 DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1
+DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY = "1:1:1"
 DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY = 1024
 DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY = 16
 DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY_STEP = 128

diff --git a/model_analyzer/config/input/config_list_numeric.py b/model_analyzer/config/input/config_list_numeric.py
@@ -103,7 +103,14 @@ def set_value(self, value):
         try:
             if self._is_string(value):
                 self._value = []
-                value = value.split(",")
+                if "," in value:
+                    value = value.split(",")
+                elif ":" in value:
+                    value = value.split(":")
+                    if len(value) == 2:
+                        value = {"start": value[0], "stop": value[1], "step": 1}
+                    else:
+                        value = {"start": value[0], "stop": value[1], "step": value[2]}
 
             if self._is_list(value):
                 new_value = self._process_list(value)

diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py
@@ -29,7 +29,7 @@
     DEFAULT_OUTPUT_MODEL_REPOSITORY,
     DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
     DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
-    DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
+    DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
     DEFAULT_TRITON_GRPC_ENDPOINT,
     DEFAULT_TRITON_HTTP_ENDPOINT,
     DEFAULT_TRITON_INSTALL_PATH,
@@ -241,7 +241,7 @@ def construct_perf_analyzer_config(
     export_file_name="my-model-results.json",
     batch_size=DEFAULT_BATCH_SIZES,
     concurrency=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
-    periodic_concurrency=DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
+    periodic_concurrency=DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
     request_rate=None,
     max_token_count=DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
     launch_mode=DEFAULT_TRITON_LAUNCH_MODE,
@@ -264,7 +264,7 @@ def construct_perf_analyzer_config(
         The batch size for this PA configuration
     concurrency: int
         The concurrency value for this PA configuration
-    periodic_concurrency:
+    periodic_concurrency: list
         The periodic concurrency value for this PA configuration
     request_rate: int
         The request rate value for this PA configuration

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -145,7 +145,7 @@ def get_test_options():
         #   expected_default_value
         OptionStruct("intlist", "profile", "--batch-sizes", "-b", "2, 4, 6", "1"),
         OptionStruct("intlist", "profile", "--concurrency", "-c", "1, 2, 3", None),
-        OptionStruct("intlist", "profile", "--periodic-concurrency", None, "1, 2, 3", None),
+        OptionStruct("stringlist", "profile", "--periodic-concurrency", None, '"5:50:5", "10:100:10"', None, None),
         OptionStruct("intlist", "profile", "--request-rate", None, "1, 2, 3", None),
         OptionStruct("intlist", "profile", "--request-period", None, "1, 2, 3", None),
         OptionStruct("intlist", "profile", "--text-input-length", None, "1, 2, 3", None),
@@ -603,9 +603,15 @@ def _convert_string_to_numeric(self, number):
         return float(number) if "." in number else int(number)
 
     def _convert_string_to_int_list(self, list_values):
-        ret_val = [int(x) for x in list_values.split(",")]
+        if ":" in list_values:
+            ret_val = [int(x) for x in list_values.split(":")]
+            ret_val = list(range(ret_val[0], ret_val[1] + 1, ret_val[2]))
+        else:
+            ret_val = [int(x) for x in list_values.split(",")]
+
         if len(ret_val) == 1:
             return ret_val[0]
+
         return ret_val
 
     def _convert_string_to_string_list(self, list_values):

diff --git a/tests/test_perf_analyzer_config_generator.py b/tests/test_perf_analyzer_config_generator.py
@@ -577,15 +577,23 @@ def test_llm_search_max_token_count(self):
         # yapf: enable
 
         max_token_counts = utils.generate_doubled_list(1, 256)
-        expected_configs = [
-            construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True)
-            for mtc in max_token_counts
-        ]
+        periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"]
+
+        expected_configs = []
+        for mtc in max_token_counts:
+            for pc in periodic_concurrencies:
+                expected_configs.append(
+                    construct_perf_analyzer_config(
+                        max_token_count=mtc,
+                        llm_search_mode=True,
+                        periodic_concurrency=pc,
+                    )
+                )
 
         pa_cli_args = [
             "--llm-search-enable",
             "--run-config-search-max-periodic-concurrency",
-            "16",
+            "32",
             "--run-config-search-max-text-input-length",
             "1",
         ]
@@ -611,17 +619,109 @@ def test_llm_search_text_input_length(self):
         # yapf: enable
 
         text_input_lengths = utils.generate_doubled_list(1, 1024)
+        periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"]
+
+        expected_configs = []
+        for _ in text_input_lengths:
+            for pc in periodic_concurrencies:
+                expected_configs.append(
+                    construct_perf_analyzer_config(
+                        llm_search_mode=True, periodic_concurrency=pc
+                    )
+                )
+
+        pa_cli_args = [
+            "--llm-search-enable",
+            "--run-config-search-max-periodic-concurrency",
+            "32",
+            "--run-config-search-max-max-token-count",
+            "1",
+        ]
+        self._run_and_test_perf_analyzer_config_generator(
+            yaml_str, expected_configs, pa_cli_args
+        )
+
+    def test_periodic_concurrency_parameter(self):
+        """
+        Test LLM Search:
+            - periodic-concurrency: 10:100:10
+
+        Max token set to 1
+        Text input set to 1
+        """
+
+        # yapf: disable
+        yaml_str = ("""
+            perf_analyzer_flags:
+                input-data: input-data.json
+            profile_models:
+                - my-model
+            """)
+        # yapf: enable
+
         expected_configs = [
-            construct_perf_analyzer_config(llm_search_mode=True)
-            for pl in text_input_lengths
+            construct_perf_analyzer_config(
+                llm_search_mode=True, periodic_concurrency="10:100:10"
+            )
         ]
 
         pa_cli_args = [
             "--llm-search-enable",
-            "--run-config-search-max-periodic-concurrency",
-            "16",
+            "--periodic-concurrency",
+            "10:100:10",
             "--run-config-search-max-max-token-count",
             "1",
+            "--run-config-search-max-text-input-length",
+            "1",
+        ]
+        self._run_and_test_perf_analyzer_config_generator(
+            yaml_str, expected_configs, pa_cli_args
+        )
+
+    def test_periodic_concurrency_search(self):
+        """
+        Test LLM Search:
+            - Period Concurrency using RCS values
+
+        Max token set to 1
+        Text input set to 1
+        """
+
+        # yapf: disable
+        yaml_str = ("""
+            perf_analyzer_flags:
+                input-data: input-data.json
+            profile_models:
+                - my-model
+            """)
+        # yapf: enable
+
+        periodic_concurrencies = [
+            "16:32:8",
+            "16:32:16",
+            "16:64:8",
+            "16:64:16",
+            "32:64:8",
+            "32:64:16",
+            "32:64:32",
+        ]
+        expected_configs = [
+            construct_perf_analyzer_config(
+                llm_search_mode=True, periodic_concurrency=pc
+            )
+            for pc in periodic_concurrencies
+        ]
+
+        pa_cli_args = [
+            "--llm-search-enable",
+            "--run-config-search-max-max-token-count",
+            "1",
+            "--run-config-search-max-text-input-length",
+            "1",
+            "--run-config-search-max-periodic-concurrency",
+            "64",
+            "--run-config-search-min-periodic-concurrency-step",
+            "8",
         ]
         self._run_and_test_perf_analyzer_config_generator(
             yaml_str, expected_configs, pa_cli_args