Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct how periodic concurrency works in PACG #777

Merged
merged 7 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 46 additions & 11 deletions model_analyzer/config/generate/perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@
import json
import logging
from itertools import repeat
from typing import Dict, Generator, List, Optional, Tuple
from typing import Any, Dict, Generator, List, Optional, Tuple

from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.config.input.config_defaults import (
DEFAULT_INPUT_JSON_PATH,
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
DEFAULT_RUN_CONFIG_MIN_TEXT_INPUT_LENGTH,
DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
)
from model_analyzer.constants import (
LOGGER_NAME,
Expand Down Expand Up @@ -214,9 +214,10 @@ def _create_input_dict(self, model_perf_analyzer_flags: Dict) -> Dict:
else:
return {}

def _create_inference_load_list(self) -> List[int]:
# The two possible inference loads are request rate or concurrency
# Concurrency is the default and will be used unless the user specifies
def _create_inference_load_list(self) -> List[Any]:
# The three possible inference loads are request rate, concurrency or periodic concurrency
# For LLM models periodic concurrency is used for non-LLM models
# concurrency is the default and will be used unless the user specifies
# request rate, either as a model parameter or a config option
if self._cli_config.is_llm_model():
return self._create_periodic_concurrency_list()
Expand Down Expand Up @@ -247,16 +248,50 @@ def _create_concurrency_list(self) -> List[int]:
self._cli_config.run_config_search_max_concurrency,
)

def _create_periodic_concurrency_list(self) -> List[int]:
def _create_periodic_concurrency_list(self) -> List[str]:
if self._model_parameters["periodic_concurrency"]:
return sorted(self._model_parameters["periodic_concurrency"])
nv-hwoo marked this conversation as resolved.
Show resolved Hide resolved
elif self._cli_config.run_config_search_disable:
return [DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY]
return [DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY]

periodic_concurrencies = self._generate_periodic_concurrencies()
return periodic_concurrencies

def _generate_periodic_concurrencies(self) -> List[str]:
periodic_concurrencies = []

periodic_concurrency_doubled_list = utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency,
self._cli_config.run_config_search_max_periodic_concurrency,
)

step_doubled_list = utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency_step,
self._cli_config.run_config_search_max_periodic_concurrency_step,
)

for start in periodic_concurrency_doubled_list:
for end in periodic_concurrency_doubled_list:
for step in step_doubled_list:
if self._is_illegal_periodic_concurrency_combination(
start, end, step
):
continue

periodic_concurrencies.append(f"{start}:{end}:{step}")
return periodic_concurrencies

def _is_illegal_periodic_concurrency_combination(
self, start: int, end: int, step: int
) -> bool:
if start > end:
return True
elif start == end and step != 1:
return True
elif (end - start) % step:
return True
else:
return utils.generate_doubled_list(
self._cli_config.run_config_search_min_periodic_concurrency,
self._cli_config.run_config_search_max_periodic_concurrency,
)
return False

def _create_text_input_length_list(self) -> List[int]:
if not self._cli_config.is_llm_model():
Expand Down
7 changes: 3 additions & 4 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def _add_profile_models_configs(self):
schema={
"batch_sizes": ConfigListNumeric(type_=int),
"concurrency": ConfigListNumeric(type_=int),
"periodic_concurrency": ConfigListNumeric(type_=int),
"periodic_concurrency": ConfigListString(),
"request_rate": ConfigListNumeric(type_=int),
"request_period": ConfigListNumeric(type_=int),
"text_input_length": ConfigListNumeric(type_=int),
Expand Down Expand Up @@ -569,9 +569,8 @@ def _add_profile_models_configs(self):
ConfigField(
"periodic_concurrency",
flags=["--periodic-concurrency"],
field_type=ConfigListNumeric(int),
description="Comma-delimited list of periodic concurrency values or ranges <start:end:step>"
" to be used during profiling",
field_type=ConfigListString(),
description="A list of ranges <start:end:step> to be used during profiling",
)
)
self._add_config(
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
DEFAULT_CLIENT_PROTOCOL = "grpc"
DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1
DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY = "1:1:1"
DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY = 1024
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY = 16
DEFAULT_RUN_CONFIG_MAX_PERIODIC_CONCURRENCY_STEP = 128
Expand Down
9 changes: 8 additions & 1 deletion model_analyzer/config/input/config_list_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,14 @@ def set_value(self, value):
try:
if self._is_string(value):
self._value = []
value = value.split(",")
if "," in value:
value = value.split(",")
elif ":" in value:
nv-hwoo marked this conversation as resolved.
Show resolved Hide resolved
value = value.split(":")
if len(value) == 2:
value = {"start": value[0], "stop": value[1], "step": 1}
else:
value = {"start": value[0], "stop": value[1], "step": value[2]}

if self._is_list(value):
new_value = self._process_list(value)
Expand Down
6 changes: 3 additions & 3 deletions tests/common/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
DEFAULT_OUTPUT_MODEL_REPOSITORY,
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
DEFAULT_TRITON_GRPC_ENDPOINT,
DEFAULT_TRITON_HTTP_ENDPOINT,
DEFAULT_TRITON_INSTALL_PATH,
Expand Down Expand Up @@ -241,7 +241,7 @@ def construct_perf_analyzer_config(
export_file_name="my-model-results.json",
batch_size=DEFAULT_BATCH_SIZES,
concurrency=DEFAULT_RUN_CONFIG_MIN_CONCURRENCY,
periodic_concurrency=DEFAULT_RUN_CONFIG_MIN_PERIODIC_CONCURRENCY,
periodic_concurrency=DEFAULT_RUN_CONFIG_PERIODIC_CONCURRENCY,
request_rate=None,
max_token_count=DEFAULT_RUN_CONFIG_MIN_MAX_TOKEN_COUNT,
launch_mode=DEFAULT_TRITON_LAUNCH_MODE,
Expand All @@ -264,7 +264,7 @@ def construct_perf_analyzer_config(
The batch size for this PA configuration
concurrency: int
The concurrency value for this PA configuration
periodic_concurrency:
periodic_concurrency: list
The periodic concurrency value for this PA configuration
request_rate: int
The request rate value for this PA configuration
Expand Down
10 changes: 8 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def get_test_options():
# expected_default_value
OptionStruct("intlist", "profile", "--batch-sizes", "-b", "2, 4, 6", "1"),
OptionStruct("intlist", "profile", "--concurrency", "-c", "1, 2, 3", None),
OptionStruct("intlist", "profile", "--periodic-concurrency", None, "1, 2, 3", None),
OptionStruct("stringlist", "profile", "--periodic-concurrency", None, '"5:50:5", "10:100:10"', None, None),
OptionStruct("intlist", "profile", "--request-rate", None, "1, 2, 3", None),
OptionStruct("intlist", "profile", "--request-period", None, "1, 2, 3", None),
OptionStruct("intlist", "profile", "--text-input-length", None, "1, 2, 3", None),
Expand Down Expand Up @@ -603,9 +603,15 @@ def _convert_string_to_numeric(self, number):
return float(number) if "." in number else int(number)

def _convert_string_to_int_list(self, list_values):
ret_val = [int(x) for x in list_values.split(",")]
if ":" in list_values:
ret_val = [int(x) for x in list_values.split(":")]
ret_val = list(range(ret_val[0], ret_val[1] + 1, ret_val[2]))
else:
ret_val = [int(x) for x in list_values.split(",")]

if len(ret_val) == 1:
return ret_val[0]

return ret_val

def _convert_string_to_string_list(self, list_values):
Expand Down
118 changes: 109 additions & 9 deletions tests/test_perf_analyzer_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,15 +577,23 @@ def test_llm_search_max_token_count(self):
# yapf: enable

max_token_counts = utils.generate_doubled_list(1, 256)
expected_configs = [
construct_perf_analyzer_config(max_token_count=mtc, llm_search_mode=True)
for mtc in max_token_counts
]
periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"]

expected_configs = []
for mtc in max_token_counts:
for pc in periodic_concurrencies:
expected_configs.append(
construct_perf_analyzer_config(
max_token_count=mtc,
llm_search_mode=True,
periodic_concurrency=pc,
)
)

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-periodic-concurrency",
"16",
"32",
"--run-config-search-max-text-input-length",
"1",
]
Expand All @@ -611,17 +619,109 @@ def test_llm_search_text_input_length(self):
# yapf: enable

text_input_lengths = utils.generate_doubled_list(1, 1024)
periodic_concurrencies = ["16:32:4", "16:32:8", "16:32:16"]

expected_configs = []
for _ in text_input_lengths:
for pc in periodic_concurrencies:
expected_configs.append(
construct_perf_analyzer_config(
llm_search_mode=True, periodic_concurrency=pc
)
)

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-periodic-concurrency",
"32",
"--run-config-search-max-max-token-count",
"1",
]
self._run_and_test_perf_analyzer_config_generator(
yaml_str, expected_configs, pa_cli_args
)

def test_periodic_concurrency_parameter(self):
"""
Test LLM Search:
- periodic-concurrency: 10:100:10

Max token set to 1
Text input set to 1
"""

# yapf: disable
yaml_str = ("""
perf_analyzer_flags:
input-data: input-data.json
profile_models:
- my-model
""")
# yapf: enable

expected_configs = [
construct_perf_analyzer_config(llm_search_mode=True)
for pl in text_input_lengths
construct_perf_analyzer_config(
llm_search_mode=True, periodic_concurrency="10:100:10"
)
]

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-periodic-concurrency",
"16",
"--periodic-concurrency",
"10:100:10",
"--run-config-search-max-max-token-count",
"1",
"--run-config-search-max-text-input-length",
"1",
]
self._run_and_test_perf_analyzer_config_generator(
yaml_str, expected_configs, pa_cli_args
)

def test_periodic_concurrency_search(self):
"""
Test LLM Search:
- Period Concurrency using RCS values

Max token set to 1
Text input set to 1
"""

# yapf: disable
yaml_str = ("""
perf_analyzer_flags:
input-data: input-data.json
profile_models:
- my-model
""")
# yapf: enable

periodic_concurrencies = [
"16:32:8",
"16:32:16",
"16:64:8",
"16:64:16",
"32:64:8",
"32:64:16",
"32:64:32",
]
expected_configs = [
construct_perf_analyzer_config(
llm_search_mode=True, periodic_concurrency=pc
)
for pc in periodic_concurrencies
]

pa_cli_args = [
"--llm-search-enable",
"--run-config-search-max-max-token-count",
"1",
"--run-config-search-max-text-input-length",
"1",
"--run-config-search-max-periodic-concurrency",
"64",
"--run-config-search-min-periodic-concurrency-step",
"8",
]
self._run_and_test_perf_analyzer_config_generator(
yaml_str, expected_configs, pa_cli_args
Expand Down
Loading