Skip to content

Commit

Permalink
Adding support for client batch size
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Jun 3, 2024
1 parent 496de44 commit 86fc9cd
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 31 deletions.
21 changes: 15 additions & 6 deletions model_analyzer/config/generate/optuna_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface):
# This list represents all possible parameters Optuna can currently search for
optuna_parameter_list = [
"batch_sizes",
"max_batch_size",
"instance_group",
"concurrency",
"max_queue_delay_microseconds",
Expand Down Expand Up @@ -291,7 +292,7 @@ def _get_objective_concurrency(self, trial_objectives: TrialObjectives) -> int:
concurrency_formula = (
2
* int(trial_objectives["instance_group"])
* int(trial_objectives["batch_sizes"])
* int(trial_objectives["max_batch_size"])
)
concurrency = (
self._config.run_config_search_max_concurrency
Expand Down Expand Up @@ -344,8 +345,8 @@ def _create_parameter_combo(
}
]

if "batch_sizes" in trial_objectives:
param_combo["max_batch_size"] = trial_objectives["batch_sizes"]
if "max_batch_size" in trial_objectives:
param_combo["max_batch_size"] = trial_objectives["max_batch_size"]

if "max_queue_delay_microseconds" in trial_objectives:
param_combo["dynamic_batching"] = {
Expand Down Expand Up @@ -435,8 +436,16 @@ def _create_model_run_config(
model_config_variant: ModelConfigVariant,
trial_objectives: TrialObjectives,
) -> ModelRunConfig:
trial_batch_size = (
int(trial_objectives["batch_sizes"])
if "batch_sizes" in trial_objectives
else DEFAULT_BATCH_SIZES
)
perf_analyzer_config = self._create_perf_analyzer_config(
model.model_name(), model, int(trial_objectives["concurrency"])
model.model_name(),
model,
int(trial_objectives["concurrency"]),
trial_batch_size,
)
model_run_config = ModelRunConfig(
model.model_name(), model_config_variant, perf_analyzer_config
Expand All @@ -449,14 +458,14 @@ def _create_perf_analyzer_config(
model_name: str,
model: ModelProfileSpec,
concurrency: int,
batch_sizes: int,
) -> PerfAnalyzerConfig:
perf_analyzer_config = PerfAnalyzerConfig()

perf_analyzer_config.update_config_from_profile_config(model_name, self._config)

# TODO: TMA-1934 add support for user specifying a range of client batch sizes
perf_config_params = {
"batch-size": DEFAULT_BATCH_SIZES,
"batch-size": batch_sizes,
"concurrency-range": concurrency,
}
perf_analyzer_config.update_config(perf_config_params)
Expand Down
42 changes: 33 additions & 9 deletions model_analyzer/config/generate/search_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@ class SearchParameters:

# These map to the run-config-search fields
# See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md
exponential_rcs_parameters = ["batch_sizes", "concurrency"]
exponential_rcs_parameters = ["max_batch_size", "batch_sizes", "concurrency"]
linear_rcs_parameters = ["instance_group"]

model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"]
runtime_parameters = ["concurrency"]
model_parameters = [
"max_batch_size",
"instance_group",
"max_queue_delay_microseconds",
]
runtime_parameters = ["batch_sizes", "concurrency"]

def __init__(
self,
Expand Down Expand Up @@ -120,6 +124,7 @@ def _populate_parameters(self) -> None:
# TODO: Populate request rate - TMA-1903

def _populate_model_config_parameters(self) -> None:
self._populate_max_batch_size()
self._populate_instance_group()
self._populate_max_queue_delay_microseconds()

Expand All @@ -130,12 +135,6 @@ def _populate_batch_sizes(self) -> None:
parameter_list=self._parameters["batch_sizes"],
parameter_category=ParameterCategory.INT_LIST,
)
else:
self._populate_rcs_parameter(
parameter_name="batch_sizes",
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
)

def _populate_concurrency(self) -> None:
if self._parameters and self._parameters["concurrency"]:
Expand All @@ -153,6 +152,31 @@ def _populate_concurrency(self) -> None:
rcs_parameter_max_value=self._config.run_config_search_max_concurrency,
)

def _populate_max_batch_size(self) -> None:
# Example config format:
# model_config_parameters:
# max_batch_size: [1, 4, 16]
if not self._model_config_parameters:
self._populate_rcs_parameter(
parameter_name="max_batch_size",
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
)
elif "max_batch_size" in self._model_config_parameters.keys():
parameter_list = self._model_config_parameters["max_batch_size"]

self._populate_list_parameter(
parameter_name="max_batch_size",
parameter_list=parameter_list,
parameter_category=ParameterCategory.INT_LIST,
)
else:
self._populate_rcs_parameter(
parameter_name="max_batch_size",
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
)

def _populate_instance_group(self) -> None:
# Example config format:
#
Expand Down
46 changes: 30 additions & 16 deletions tests/test_search_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,17 +209,19 @@ def test_search_parameter_creation_default(self):
analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock())
analyzer._populate_search_parameters()

# batch_sizes
batch_sizes = analyzer._search_parameters["add_sub"].get_parameter(
"batch_sizes"
# max_batch_size
max_batch_size = analyzer._search_parameters["add_sub"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE),
max_batch_size.min_range,
)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE),
max_batch_size.max_range,
)

# concurrency
Expand Down Expand Up @@ -304,6 +306,7 @@ def test_search_parameter_creation_multi_model_non_default(self):
parameters:
batch_sizes: [16, 32, 64]
model_config_parameters:
max_batch_size: [1, 2, 4, 8]
dynamic_batching:
max_queue_delay_microseconds: [100, 200, 300]
instance_group:
Expand All @@ -323,12 +326,21 @@ def test_search_parameter_creation_multi_model_non_default(self):
# ADD_SUB
# ===================================================================

# batch_sizes
# max batch size
# ===================================================================
max_batch_size = analyzer._search_parameters["add_sub"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.INT_LIST, max_batch_size.category)
self.assertEqual([1, 2, 4, 8], max_batch_size.enumerated_list)

# batch sizes
# ===================================================================
batch_sizes = analyzer._search_parameters["add_sub"].get_parameter(
"batch_sizes"
)
self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage)
self.assertEqual(ParameterUsage.RUNTIME, batch_sizes.usage)
self.assertEqual(ParameterCategory.INT_LIST, batch_sizes.category)
self.assertEqual([16, 32, 64], batch_sizes.enumerated_list)

Expand Down Expand Up @@ -366,18 +378,20 @@ def test_search_parameter_creation_multi_model_non_default(self):
# MULT_DIV
# ===================================================================

# batch_sizes
# max batch size
# ===================================================================
batch_sizes = analyzer._search_parameters["mult_div"].get_parameter(
"batch_sizes"
max_batch_size = analyzer._search_parameters["mult_div"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, batch_sizes.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, batch_sizes.category)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE), batch_sizes.min_range
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE),
max_batch_size.min_range,
)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE), batch_sizes.max_range
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE),
max_batch_size.max_range,
)

# concurrency
Expand Down

0 comments on commit 86fc9cd

Please sign in to comment.