Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for client batch size #892

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions model_analyzer/config/generate/optuna_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface):
# This list represents all possible parameters Optuna can currently search for
optuna_parameter_list = [
"batch_sizes",
"max_batch_size",
"instance_group",
"concurrency",
"max_queue_delay_microseconds",
Expand Down Expand Up @@ -291,7 +292,7 @@ def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int:
concurrency_formula = (
2
* int(trial_objectives["instance_group"])
* int(trial_objectives["batch_sizes"])
* int(trial_objectives["max_batch_size"])
)
concurrency = (
self._config.run_config_search_max_concurrency
Expand Down Expand Up @@ -344,8 +345,8 @@ def _create_parameter_combo(
}
]

if "batch_sizes" in trial_objectives:
param_combo["max_batch_size"] = trial_objectives["batch_sizes"]
if "max_batch_size" in trial_objectives:
param_combo["max_batch_size"] = trial_objectives["max_batch_size"]

if "max_queue_delay_microseconds" in trial_objectives:
param_combo["dynamic_batching"] = {
Expand Down Expand Up @@ -435,8 +436,16 @@ def _create_model_run_config(
model_config_variant: ModelConfigVariant,
trial_objectives: TrialObjectives,
) -> ModelRunConfig:
trial_batch_sizes = (
int(trial_objectives["batch_sizes"])
tgerdesnv marked this conversation as resolved.
Show resolved Hide resolved
if "batch_sizes" in trial_objectives
else DEFAULT_BATCH_SIZES
)
perf_analyzer_config = self._create_perf_analyzer_config(
model.model_name(), model, int(trial_objectives["concurrency"])
model_name=model.model_name(),
model=model,
concurrency=int(trial_objectives["concurrency"]),
batch_sizes=trial_batch_sizes,
)
model_run_config = ModelRunConfig(
model.model_name(), model_config_variant, perf_analyzer_config
Expand All @@ -449,14 +458,14 @@ def _create_perf_analyzer_config(
model_name: str,
model: ModelProfileSpec,
concurrency: int,
batch_sizes: int,
) -> PerfAnalyzerConfig:
perf_analyzer_config = PerfAnalyzerConfig()

perf_analyzer_config.update_config_from_profile_config(model_name, self._config)

# TODO: TMA-1934 add support for user specifying a range of client batch sizes
perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"batch-size": batch_sizes,
"concurrency-range": concurrency,
}
perf_analyzer_config.update_config(perf_config_params)
Expand Down
57 changes: 38 additions & 19 deletions model_analyzer/config/generate/search_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@ class SearchParameters:

# These map to the run-config-search fields
# See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md
exponential_rcs_parameters = ["batch_sizes", "concurrency"]
exponential_rcs_parameters = ["max_batch_size", "batch_sizes", "concurrency"]
linear_rcs_parameters = ["instance_group"]

model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"]
runtime_parameters = ["concurrency"]
model_parameters = [
"max_batch_size",
"instance_group",
"max_queue_delay_microseconds",
]
runtime_parameters = ["batch_sizes", "concurrency"]

def __init__(
self,
Expand Down Expand Up @@ -120,6 +124,7 @@ def _populate_parameters(self) -> None:
# TODO: Populate request rate - TMA-1903

def _populate_model_config_parameters(self) -> None:
self._populate_max_batch_size()
self._populate_instance_group()
self._populate_max_queue_delay_microseconds()

Expand All @@ -130,12 +135,6 @@ def _populate_batch_sizes(self) -> None:
parameter_list=self._parameters["batch_sizes"],
parameter_category=ParameterCategory.INT_LIST,
)
else:
self._populate_rcs_parameter(
parameter_name="batch_sizes",
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
)

def _populate_concurrency(self) -> None:
if self._parameters and self._parameters["concurrency"]:
Expand All @@ -153,23 +152,34 @@ def _populate_concurrency(self) -> None:
rcs_parameter_max_value=self._config.run_config_search_max_concurrency,
)

def _populate_max_batch_size(self) -> None:
# Example config format:
# model_config_parameters:
# max_batch_size: [1, 4, 16]
if self._is_key_in_model_config_parameters("max_batch_size"):
parameter_list = self._model_config_parameters["max_batch_size"]
self._populate_list_parameter(
parameter_name="max_batch_size",
parameter_list=parameter_list,
parameter_category=ParameterCategory.INT_LIST,
)
else:
tgerdesnv marked this conversation as resolved.
Show resolved Hide resolved
# Need to populate max_batch_size based on RCS min/max values
# when no model config parameters are present
self._populate_rcs_parameter(
parameter_name="max_batch_size",
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
)

def _populate_instance_group(self) -> None:
# Example config format:
#
# model_config_parameters:
# instance_group:
# - kind: KIND_GPU
# count: [1, 2, 3, 4]

# Need to populate instance_group based on RCS min/max values
# even if no model config parameters are present
if not self._model_config_parameters:
self._populate_rcs_parameter(
parameter_name="instance_group",
rcs_parameter_min_value=self._config.run_config_search_min_instance_count,
rcs_parameter_max_value=self._config.run_config_search_max_instance_count,
)
elif "instance_group" in self._model_config_parameters.keys():
if self._is_key_in_model_config_parameters("instance_group"):
parameter_list = self._model_config_parameters["instance_group"][0][0][
"count"
]
Expand All @@ -180,12 +190,21 @@ def _populate_instance_group(self) -> None:
parameter_category=ParameterCategory.INT_LIST,
)
else:
# Need to populate instance_group based on RCS min/max values
# when no model config parameters are present
self._populate_rcs_parameter(
parameter_name="instance_group",
rcs_parameter_min_value=self._config.run_config_search_min_instance_count,
rcs_parameter_max_value=self._config.run_config_search_max_instance_count,
)

def _is_key_in_model_config_parameters(self, key: str) -> bool:
key_found = bool(
self._model_config_parameters and key in self._model_config_parameters
)

return key_found

def _populate_max_queue_delay_microseconds(self) -> None:
# Example format
#
Expand Down
46 changes: 30 additions & 16 deletions tests/test_search_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,17 +209,19 @@ def test_search_parameter_creation_default(self):
analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock())
analyzer._populate_search_parameters()

# batch_sizes
batch_sizes = analyzer._search_parameters["add_sub"].get_parameter(
"batch_sizes"
# max_batch_size
max_batch_size = analyzer._search_parameters["add_sub"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE),
max_batch_size.min_range,
)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE),
max_batch_size.max_range,
)

# concurrency
Expand Down Expand Up @@ -304,6 +306,7 @@ def test_search_parameter_creation_multi_model_non_default(self):
parameters:
batch_sizes: [16, 32, 64]
model_config_parameters:
max_batch_size: [1, 2, 4, 8]
dynamic_batching:
max_queue_delay_microseconds: [100, 200, 300]
instance_group:
Expand All @@ -323,12 +326,21 @@ def test_search_parameter_creation_multi_model_non_default(self):
# ADD_SUB
# ===================================================================

# batch_sizes
# max batch size
# ===================================================================
max_batch_size = analyzer._search_parameters["add_sub"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.INT_LIST, max_batch_size.category)
self.assertEqual([1, 2, 4, 8], max_batch_size.enumerated_list)

# batch sizes
# ===================================================================
batch_sizes = analyzer._search_parameters["add_sub"].get_parameter(
"batch_sizes"
)
self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage)
self.assertEqual(ParameterUsage.RUNTIME, batch_sizes.usage)
self.assertEqual(ParameterCategory.INT_LIST, batch_sizes.category)
self.assertEqual([16, 32, 64], batch_sizes.enumerated_list)

Expand Down Expand Up @@ -366,18 +378,20 @@ def test_search_parameter_creation_multi_model_non_default(self):
# MULT_DIV
# ===================================================================

# batch_sizes
# max batch size
# ===================================================================
batch_sizes = analyzer._search_parameters["mult_div"].get_parameter(
"batch_sizes"
max_batch_size = analyzer._search_parameters["mult_div"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE),
max_batch_size.min_range,
)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE),
max_batch_size.max_range,
)

# concurrency
Expand Down
Loading