Skip to content

Commit

Permalink
Correct how periodic concurrency works in PACG (#777)
Browse files Browse the repository at this point in the history
* Created a new class ConfigRangeNumeric and using it for periodic-concurrency

* Fixes and defaults for periodic concurrency

* First unit test passing

* PACG chagnes complete. Unit tests updated and passing

* Removing uneeded class

* Fixing codeQL and hwoo's review suggestions

* Adding missing else
  • Loading branch information
nv-braf authored Oct 18, 2023
1 parent e81a369 commit efea104
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 30 deletions.
57 changes: 46 additions & 11 deletions model_analyzer/config/generate/perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@
import json
import logging
from itertools import repeat
from typing import Dict, Generator, List, Optional, Tuple
from typing import Any, Dict, Generator, List, Optional, Tuple

from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.config.input.config_defaults import (
DEFAULT_INPUT_JSON_PATH,
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
DEFAULT_RUN_CONFIG_MIN_TEXT_INPUT_LENGTH,
DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
)
from model_analyzer.constants import (
LOGGER_NAME,
Expand Down Expand Up @@ -214,9 +214,10 @@ def _create_input_dict(self, model_perf_analyzer_flags: Dict) -> Dict:
else:
return {}

def _create_inference_load_list(self) -> List[int]:
# The two possible inference loads are request rate or concurrency
# Concurrency is the default and will be used unless the user specifies
def _create_inference_load_list(self) -> List[Any]:
# The three possible inference loads are request rate, concurrency or periodic concurrency
# For LLM models periodic concurrency is used for non-LLM models
# concurrency is the default and will be used unless the user specifies
# request rate, either as a model parameter or a config option
if self._cli_config.is_llm_model():
return self._create_periodic_concurrency_list()
Expand Down Expand Up @@ -247,16 +248,50 @@ def _create_concurrency_list(self) -> List[int]:
self._cli_config.run_config_search_max_concurrency,
)

def _create_periodic_concurrency_list(self) -> List[int]:
def _create_periodic_concurrency_list(self) -> List[str]:
if self._model_parameters["periodic_concurrency"]:
return sorted(self._model_parameters["periodic_concurrency"])
elif self._cli_config.run_config_search_disable:
return [DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY]
return [DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY]

periodic_concurrencies = self._generate_periodic_concurrencies()
return periodic_concurrencies

def _generate_periodic_concurrencies(self) -> List[str]:
periodic_concurrencies = []

periodic_concurrency_doubled_list = utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency,
self._cli_config.run_config_search_max_periodic_concurrency,
)

step_doubled_list = utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency_step,
self._cli_config.run_config_search_max_periodic_concurrency_step,
)

for start in periodic_concurrency_doubled_list:
for end in periodic_concurrency_doubled_list:
for step in step_doubled_list:
if self._is_illegal_periodic_concurrency_combination(
start, end, step
):
continue

periodic_concurrencies.append(f"{start}:{end}:{step}")
return periodic_concurrencies

def _is_illegal_periodic_concurrency_combination(
self, start: int, end: int, step: int
) -> bool:
if start > end:
return True
elif start == end and step != 1:
return True
elif (end - start) % step:
return True
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency,
self._cli_config.run_config_search_max_periodic_concurrency,
)
return False

def _create_text_input_length_list(self) -> List[int]:
if not self._cli_config.is_llm_model():
Expand Down
7 changes: 3 additions & 4 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def _add_profile_models_configs(self):
schema={
"batch_sizes": ConfigListNumeric(type_=int),
"concurrency": ConfigListNumeric(type_=int),
"periodic_concurrency": ConfigListNumeric(type_=int),
"periodic_concurrency": ConfigListString(),
"request_rate": ConfigListNumeric(type_=int),
"request_period": ConfigListNumeric(type_=int),
"text_input_length": ConfigListNumeric(type_=int),
Expand Down Expand Up @@ -569,9 +569,8 @@ def _add_profile_models_configs(self):
ConfigField(
"periodic_concurrency",
flags=["--periodic-concurrency"],
field_type=ConfigListNumeric(int),
description="Comma-delimited list of periodic concurrency values or ranges <start:end:step>"
" to be used during profiling",
field_type=ConfigListString(),
description="A list of ranges <start:end:step> to be used during profiling",
)
)
self._add_config(
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
DEFAULT_CLIENT_PROTOCOL = "grpc"
DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1
DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY = "1:1:1"
DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY = 1024
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY = 16
DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY_STEP = 128
Expand Down
9 changes: 8 additions & 1 deletion model_analyzer/config/input/config_list_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,14 @@ def set_value(self, value):
try:
if self._is_string(value):
self._value = []
value = value.split(",")
if "," in value:
value = value.split(",")
elif ":" in value:
value = value.split(":")
if len(value) == 2:
value = {"start": value[0], "stop": value[1], "step": 1}
else:
value = {"start": value[0], "stop": value[1], "step": value[2]}

if self._is_list(value):
new_value = self._process_list(value)
Expand Down
6 changes: 3 additions & 3 deletions tests/common/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
DEFAULT_OUTPUT_MODEL_REPOSITORY,
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
DEFAULT_TRITON_GRPC_ENDPOINT,
DEFAULT_TRITON_HTTP_ENDPOINT,
DEFAULT_TRITON_INSTALL_PATH,
Expand Down Expand Up @@ -241,7 +241,7 @@ def construct_perf_analyzer_config(
export_file_name="my-model-results.json",
batch_size=DEFAULT_BATCH_SIZES,
concurrency=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
periodic_concurrency=DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
periodic_concurrency=DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
request_rate=None,
max_token_count=DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
launch_mode=DEFAULT_TRITON_LAUNCH_MODE,
Expand All @@ -264,7 +264,7 @@ def construct_perf_analyzer_config(
The batch size for this PA configuration
concurrency: int
The concurrency value for this PA configuration
periodic_concurrency:
periodic_concurrency: list
The periodic concurrency value for this PA configuration
request_rate: int
The request rate value for this PA configuration
Expand Down
10 changes: 8 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def get_test_options():
# expected_default_value
OptionStruct("intlist", "profile", "--batch-sizes", "-b", "2, 4, 6", "1"),
OptionStruct("intlist", "profile", "--concurrency", "-c", "1, 2, 3", None),
OptionStruct("intlist", "profile", "--periodic-concurrency", None, "1, 2, 3", None),
OptionStruct("stringlist", "profile", "--periodic-concurrency", None, '"5:50:5", "10:100:10"', None, None),
OptionStruct("intlist", "profile", "--request-rate", None, "1, 2, 3", None),
OptionStruct("intlist", "profile", "--request-period", None, "1, 2, 3", None),
OptionStruct("intlist", "profile", "--text-input-length", None, "1, 2, 3", None),
Expand Down Expand Up @@ -603,9 +603,15 @@ def _convert_string_to_numeric(self, number):
return float(number) if "." in number else int(number)

def _convert_string_to_int_list(self, list_values):
ret_val = [int(x) for x in list_values.split(",")]
if ":" in list_values:
ret_val = [int(x) for x in list_values.split(":")]
ret_val = list(range(ret_val[0], ret_val[1] + 1, ret_val[2]))
else:
ret_val = [int(x) for x in list_values.split(",")]

if len(ret_val) == 1:
return ret_val[0]

return ret_val

def _convert_string_to_string_list(self, list_values):
Expand Down
118 changes: 109 additions & 9 deletions tests/test_perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,23 @@ def test_llm_search_max_token_count(self):
# yapf: enable

max_token_counts = utils.generate_doubled_list(1, 256)
expected_configs = [
construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True)
for mtc in max_token_counts
]
periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"]

expected_configs = []
for mtc in max_token_counts:
for pc in periodic_concurrencies:
expected_configs.append(
construct_perf_analyzer_config(
max_token_count=mtc,
llm_search_mode=True,
periodic_concurrency=pc,
)
)

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-periodic-concurrency",
"16",
"32",
"--run-config-search-max-text-input-length",
"1",
]
Expand All @@ -611,17 +619,109 @@ def test_llm_search_text_input_length(self):
# yapf: enable

text_input_lengths = utils.generate_doubled_list(1, 1024)
periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"]

expected_configs = []
for _ in text_input_lengths:
for pc in periodic_concurrencies:
expected_configs.append(
construct_perf_analyzer_config(
llm_search_mode=True, periodic_concurrency=pc
)
)

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-periodic-concurrency",
"32",
"--run-config-search-max-max-token-count",
"1",
]
self._run_and_test_perf_analyzer_config_generator(
yaml_str, expected_configs, pa_cli_args
)

def test_periodic_concurrency_parameter(self):
"""
Test LLM Search:
- periodic-concurrency: 10:100:10
Max token set to 1
Text input set to 1
"""

# yapf: disable
yaml_str = ("""
perf_analyzer_flags:
input-data: input-data.json
profile_models:
- my-model
""")
# yapf: enable

expected_configs = [
construct_perf_analyzer_config(llm_search_mode=True)
for pl in text_input_lengths
construct_perf_analyzer_config(
llm_search_mode=True, periodic_concurrency="10:100:10"
)
]

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-periodic-concurrency",
"16",
"--periodic-concurrency",
"10:100:10",
"--run-config-search-max-max-token-count",
"1",
"--run-config-search-max-text-input-length",
"1",
]
self._run_and_test_perf_analyzer_config_generator(
yaml_str, expected_configs, pa_cli_args
)

def test_periodic_concurrency_search(self):
"""
Test LLM Search:
- Period Concurrency using RCS values
Max token set to 1
Text input set to 1
"""

# yapf: disable
yaml_str = ("""
perf_analyzer_flags:
input-data: input-data.json
profile_models:
- my-model
""")
# yapf: enable

periodic_concurrencies = [
"16:32:8",
"16:32:16",
"16:64:8",
"16:64:16",
"32:64:8",
"32:64:16",
"32:64:32",
]
expected_configs = [
construct_perf_analyzer_config(
llm_search_mode=True, periodic_concurrency=pc
)
for pc in periodic_concurrencies
]

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-max-token-count",
"1",
"--run-config-search-max-text-input-length",
"1",
"--run-config-search-max-periodic-concurrency",
"64",
"--run-config-search-min-periodic-concurrency-step",
"8",
]
self._run_and_test_perf_analyzer_config_generator(
yaml_str, expected_configs, pa_cli_args
Expand Down

0 comments on commit efea104

Please sign in to comment.