diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py index 771e895f1..104ed79e6 100755 --- a/model_analyzer/config/generate/perf_analyzer_config_generator.py +++ b/model_analyzer/config/generate/perf_analyzer_config_generator.py @@ -17,16 +17,16 @@ import json import logging from itertools import repeat -from typing import Dict, Generator, List, Optional, Tuple +from typing import Any, Dict, Generator, List, Optional, Tuple from model_analyzer.config.input.config_command_profile import ConfigCommandProfile from model_analyzer.config.input.config_defaults import ( DEFAULT_INPUT_JSON_PATH, DEFAULT_RUN_CONFIG_MIN_CONCURRENCY, DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT, - DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY, DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE, DEFAULT_RUN_CONFIG_MIN_TEXT_INPUT_LENGTH, + DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY, ) from model_analyzer.constants import ( LOGGER_NAME, @@ -214,9 +214,10 @@ def _create_input_dict(self, model_perf_analyzer_flags: Dict) -> Dict: else: return {} - def _create_inference_load_list(self) -> List[int]: - # The two possible inference loads are request rate or concurrency - # Concurrency is the default and will be used unless the user specifies + def _create_inference_load_list(self) -> List[Any]: + # The three possible inference loads are request rate, concurrency or periodic concurrency + # For LLM models periodic concurrency is used for non-LLM models + # concurrency is the default and will be used unless the user specifies # request rate, either as a model parameter or a config option if self._cli_config.is_llm_model(): return self._create_periodic_concurrency_list() @@ -247,16 +248,50 @@ def _create_concurrency_list(self) -> List[int]: self._cli_config.run_config_search_max_concurrency, ) - def _create_periodic_concurrency_list(self) -> List[int]: + def _create_periodic_concurrency_list(self) -> List[str]: if self._model_parameters["periodic_concurrency"]: return sorted(self._model_parameters["periodic_concurrency"]) elif self._cli_config.run_config_search_disable: - return [DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY] + return [DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY] + + periodic_concurrencies = self._generate_periodic_concurrencies() + return periodic_concurrencies + + def _generate_periodic_concurrencies(self) -> List[str]: + periodic_concurrencies = [] + + periodic_concurrency_doubled_list = utils.generate_doubled_list( + self._cli_config.run_config_search_min_periodic_concurrency, + self._cli_config.run_config_search_max_periodic_concurrency, + ) + + step_doubled_list = utils.generate_doubled_list( + self._cli_config.run_config_search_min_periodic_concurrency_step, + self._cli_config.run_config_search_max_periodic_concurrency_step, + ) + + for start in periodic_concurrency_doubled_list: + for end in periodic_concurrency_doubled_list: + for step in step_doubled_list: + if self._is_illegal_periodic_concurrency_combination( + start, end, step + ): + continue + + periodic_concurrencies.append(f"{start}:{end}:{step}") + return periodic_concurrencies + + def _is_illegal_periodic_concurrency_combination( + self, start: int, end: int, step: int + ) -> bool: + if start > end: + return True + elif start == end and step != 1: + return True + elif (end - start) % step: + return True else: - return utils.generate_doubled_list( - self._cli_config.run_config_search_min_periodic_concurrency, - self._cli_config.run_config_search_max_periodic_concurrency, - ) + return False def _create_text_input_length_list(self) -> List[int]: if not self._cli_config.is_llm_model(): diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index bdce45027..9da3e7d31 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -498,7 +498,7 @@ def _add_profile_models_configs(self): schema={ "batch_sizes": ConfigListNumeric(type_=int), "concurrency": ConfigListNumeric(type_=int), - "periodic_concurrency": ConfigListNumeric(type_=int), + "periodic_concurrency": ConfigListString(), "request_rate": ConfigListNumeric(type_=int), "request_period": ConfigListNumeric(type_=int), "text_input_length": ConfigListNumeric(type_=int), @@ -569,9 +569,8 @@ def _add_profile_models_configs(self): ConfigField( "periodic_concurrency", flags=["--periodic-concurrency"], - field_type=ConfigListNumeric(int), - description="Comma-delimited list of periodic concurrency values or ranges " - " to be used during profiling", + field_type=ConfigListString(), + description="A list of ranges to be used during profiling", ) ) self._add_config( diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py index bab62a4fd..aad674838 100755 --- a/model_analyzer/config/input/config_defaults.py +++ b/model_analyzer/config/input/config_defaults.py @@ -45,6 +45,7 @@ DEFAULT_CLIENT_PROTOCOL = "grpc" DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024 DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1 +DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY = "1:1:1" DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY = 1024 DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY = 16 DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY_STEP = 128 diff --git a/model_analyzer/config/input/config_list_numeric.py b/model_analyzer/config/input/config_list_numeric.py index 799cbdf9e..b677bcdab 100755 --- a/model_analyzer/config/input/config_list_numeric.py +++ b/model_analyzer/config/input/config_list_numeric.py @@ -103,7 +103,14 @@ def set_value(self, value): try: if self._is_string(value): self._value = [] - value = value.split(",") + if "," in value: + value = value.split(",") + elif ":" in value: + value = value.split(":") + if len(value) == 2: + value = {"start": value[0], "stop": value[1], "step": 1} + else: + value = {"start": value[0], "stop": value[1], "step": value[2]} if self._is_list(value): new_value = self._process_list(value) diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py index 380a5d404..e8448ae98 100755 --- a/tests/common/test_utils.py +++ b/tests/common/test_utils.py @@ -29,7 +29,7 @@ DEFAULT_OUTPUT_MODEL_REPOSITORY, DEFAULT_RUN_CONFIG_MIN_CONCURRENCY, DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT, - DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY, + DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY, DEFAULT_TRITON_GRPC_ENDPOINT, DEFAULT_TRITON_HTTP_ENDPOINT, DEFAULT_TRITON_INSTALL_PATH, @@ -241,7 +241,7 @@ def construct_perf_analyzer_config( export_file_name="my-model-results.json", batch_size=DEFAULT_BATCH_SIZES, concurrency=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY, - periodic_concurrency=DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY, + periodic_concurrency=DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY, request_rate=None, max_token_count=DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT, launch_mode=DEFAULT_TRITON_LAUNCH_MODE, @@ -264,7 +264,7 @@ def construct_perf_analyzer_config( The batch size for this PA configuration concurrency: int The concurrency value for this PA configuration - periodic_concurrency: + periodic_concurrency: list The periodic concurrency value for this PA configuration request_rate: int The request rate value for this PA configuration diff --git a/tests/test_cli.py b/tests/test_cli.py index c6669b2c2..1a2fb84a2 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -145,7 +145,7 @@ def get_test_options(): # expected_default_value OptionStruct("intlist", "profile", "--batch-sizes", "-b", "2, 4, 6", "1"), OptionStruct("intlist", "profile", "--concurrency", "-c", "1, 2, 3", None), - OptionStruct("intlist", "profile", "--periodic-concurrency", None, "1, 2, 3", None), + OptionStruct("stringlist", "profile", "--periodic-concurrency", None, '"5:50:5", "10:100:10"', None, None), OptionStruct("intlist", "profile", "--request-rate", None, "1, 2, 3", None), OptionStruct("intlist", "profile", "--request-period", None, "1, 2, 3", None), OptionStruct("intlist", "profile", "--text-input-length", None, "1, 2, 3", None), @@ -603,9 +603,15 @@ def _convert_string_to_numeric(self, number): return float(number) if "." in number else int(number) def _convert_string_to_int_list(self, list_values): - ret_val = [int(x) for x in list_values.split(",")] + if ":" in list_values: + ret_val = [int(x) for x in list_values.split(":")] + ret_val = list(range(ret_val[0], ret_val[1] + 1, ret_val[2])) + else: + ret_val = [int(x) for x in list_values.split(",")] + if len(ret_val) == 1: return ret_val[0] + return ret_val def _convert_string_to_string_list(self, list_values): diff --git a/tests/test_perf_analyzer_config_generator.py b/tests/test_perf_analyzer_config_generator.py index f00084335..a405e2df6 100755 --- a/tests/test_perf_analyzer_config_generator.py +++ b/tests/test_perf_analyzer_config_generator.py @@ -577,15 +577,23 @@ def test_llm_search_max_token_count(self): # yapf: enable max_token_counts = utils.generate_doubled_list(1, 256) - expected_configs = [ - construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True) - for mtc in max_token_counts - ] + periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"] + + expected_configs = [] + for mtc in max_token_counts: + for pc in periodic_concurrencies: + expected_configs.append( + construct_perf_analyzer_config( + max_token_count=mtc, + llm_search_mode=True, + periodic_concurrency=pc, + ) + ) pa_cli_args = [ "--llm-search-enable", "--run-config-search-max-periodic-concurrency", - "16", + "32", "--run-config-search-max-text-input-length", "1", ] @@ -611,17 +619,109 @@ def test_llm_search_text_input_length(self): # yapf: enable text_input_lengths = utils.generate_doubled_list(1, 1024) + periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"] + + expected_configs = [] + for _ in text_input_lengths: + for pc in periodic_concurrencies: + expected_configs.append( + construct_perf_analyzer_config( + llm_search_mode=True, periodic_concurrency=pc + ) + ) + + pa_cli_args = [ + "--llm-search-enable", + "--run-config-search-max-periodic-concurrency", + "32", + "--run-config-search-max-max-token-count", + "1", + ] + self._run_and_test_perf_analyzer_config_generator( + yaml_str, expected_configs, pa_cli_args + ) + + def test_periodic_concurrency_parameter(self): + """ + Test LLM Search: + - periodic-concurrency: 10:100:10 + + Max token set to 1 + Text input set to 1 + """ + + # yapf: disable + yaml_str = (""" + perf_analyzer_flags: + input-data: input-data.json + profile_models: + - my-model + """) + # yapf: enable + expected_configs = [ - construct_perf_analyzer_config(llm_search_mode=True) - for pl in text_input_lengths + construct_perf_analyzer_config( + llm_search_mode=True, periodic_concurrency="10:100:10" + ) ] pa_cli_args = [ "--llm-search-enable", - "--run-config-search-max-periodic-concurrency", - "16", + "--periodic-concurrency", + "10:100:10", "--run-config-search-max-max-token-count", "1", + "--run-config-search-max-text-input-length", + "1", + ] + self._run_and_test_perf_analyzer_config_generator( + yaml_str, expected_configs, pa_cli_args + ) + + def test_periodic_concurrency_search(self): + """ + Test LLM Search: + - Period Concurrency using RCS values + + Max token set to 1 + Text input set to 1 + """ + + # yapf: disable + yaml_str = (""" + perf_analyzer_flags: + input-data: input-data.json + profile_models: + - my-model + """) + # yapf: enable + + periodic_concurrencies = [ + "16:32:8", + "16:32:16", + "16:64:8", + "16:64:16", + "32:64:8", + "32:64:16", + "32:64:32", + ] + expected_configs = [ + construct_perf_analyzer_config( + llm_search_mode=True, periodic_concurrency=pc + ) + for pc in periodic_concurrencies + ] + + pa_cli_args = [ + "--llm-search-enable", + "--run-config-search-max-max-token-count", + "1", + "--run-config-search-max-text-input-length", + "1", + "--run-config-search-max-periodic-concurrency", + "64", + "--run-config-search-min-periodic-concurrency-step", + "8", ] self._run_and_test_perf_analyzer_config_generator( yaml_str, expected_configs, pa_cli_args