Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding BLS composing models to SearchParameters #898

Merged
merged 2 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion model_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def __init__(
)

self._search_parameters: Dict[str, SearchParameters] = {}
self._composing_search_parameters: Dict[str, SearchParameters] = {}

def profile(
self, client: TritonClient, gpus: List[GPUDevice], mode: str, verbose: bool
Expand Down Expand Up @@ -119,6 +120,7 @@ def profile(
self._create_metrics_manager(client, gpus)
self._create_model_manager(client, gpus)
self._populate_search_parameters()
self._populate_composing_search_parameters()

if self._config.triton_launch_mode == "remote":
self._warn_if_other_models_loaded_on_remote_server(client)
Expand Down Expand Up @@ -423,5 +425,17 @@ def _warn_if_other_models_loaded_on_remote_server(self, client):
def _populate_search_parameters(self):
for model in self._config.profile_models:
self._search_parameters[model.model_name()] = SearchParameters(
self._config, model.parameters(), model.model_config_parameters()
self._config,
model.parameters(),
model.model_config_parameters(),
is_bls_model=bool(self._config.bls_composing_models),
)

def _populate_composing_search_parameters(self):
for model in self._config.bls_composing_models:
self._composing_search_parameters[model.model_name()] = SearchParameters(
self._config,
model.parameters(),
model.model_config_parameters(),
is_composing_model=True,
)
7 changes: 7 additions & 0 deletions model_analyzer/config/generate/optuna_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ def __init__(
# TODO: TMA-1927: Add support for multi-model
self._search_parameters = search_parameters[models[0].model_name()]

# TODO: need to add in ensemble support
self._composing_search_parameters = {}
for composing_model in self._config.bls_composing_models:
self._composing_search_parameters[
composing_model.model_name()
] = search_parameters[composing_model.model_name()]

self._model_variant_name_manager = model_variant_name_manager

self._triton_env = BruteRunConfigGenerator.determine_triton_server_env(models)
Expand Down
14 changes: 11 additions & 3 deletions model_analyzer/config/generate/search_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,15 @@ def __init__(
config: ConfigCommandProfile = ConfigCommandProfile(),
parameters: Dict[str, Any] = {},
model_config_parameters: Dict[str, Any] = {},
is_bls_model: bool = False,
is_composing_model: bool = False,
):
self._config = config
self._parameters = parameters
self._model_config_parameters = model_config_parameters
self._search_parameters: Dict[str, SearchParameter] = {}
self._is_bls_model = is_bls_model
self._is_composing_model = is_composing_model

self._populate_search_parameters()

Expand Down Expand Up @@ -120,11 +124,15 @@ def _populate_search_parameters(self) -> None:

def _populate_parameters(self) -> None:
self._populate_batch_sizes()
self._populate_concurrency()
# TODO: Populate request rate - TMA-1903

if not self._is_composing_model:
self._populate_concurrency()
# TODO: Populate request rate - TMA-1903

def _populate_model_config_parameters(self) -> None:
self._populate_max_batch_size()
if not self._is_bls_model:
self._populate_max_batch_size()

self._populate_instance_group()
self._populate_max_queue_delay_microseconds()

Expand Down
6 changes: 6 additions & 0 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import argparse
import logging
import os
from copy import deepcopy

import numba.cuda
import psutil
Expand Down Expand Up @@ -1691,6 +1692,11 @@ def _autofill_values(self):
new_model["model_config_parameters"] = model.model_config_parameters()

new_profile_models[model.model_name()] = new_model

# deepcopy is necessary, else it gets overwritten when updating profile_models
self._fields["bls_composing_models"] = deepcopy(
self._fields["bls_composing_models"]
)
self._fields["profile_models"].set_value(new_profile_models)

def _using_request_rate(self) -> bool:
Expand Down
101 changes: 101 additions & 0 deletions tests/test_search_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,107 @@ def test_total_possible_configurations(self):
# batch_sizes (8) * instance group (8) * concurrency (11) * size (3)
self.assertEqual(8 * 8 * 11 * 3, total_num_of_possible_configurations)

def test_search_parameter_creation_bls_default(self):
"""
Test that search parameters are correctly created in default BLS optuna case
"""

args = [
"model-analyzer",
"profile",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--run-config-search-mode",
"optuna",
]

yaml_content = """
profile_models: add_sub
bls_composing_models: add,sub
"""

config = TestConfig()._evaluate_config(args=args, yaml_content=yaml_content)

analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock())
analyzer._populate_search_parameters()
analyzer._populate_composing_search_parameters()

# ADD_SUB
# =====================================================================
# The top level model of a BLS does not search max batch size (always 1)

# max_batch_size
max_batch_size = analyzer._search_parameters["add_sub"].get_parameter(
"max_batch_size"
)
self.assertIsNone(max_batch_size)

# concurrency
concurrency = analyzer._search_parameters["add_sub"].get_parameter(
"concurrency"
)
self.assertEqual(ParameterUsage.RUNTIME, concurrency.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, concurrency.category)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MIN_CONCURRENCY), concurrency.min_range
)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MAX_CONCURRENCY), concurrency.max_range
)

# instance_group
instance_group = analyzer._search_parameters["add_sub"].get_parameter(
"instance_group"
)
self.assertEqual(ParameterUsage.MODEL, instance_group.usage)
self.assertEqual(ParameterCategory.INTEGER, instance_group.category)
self.assertEqual(
default.DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, instance_group.min_range
)
self.assertEqual(
default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range
)

# ADD/SUB (composing models)
# =====================================================================
# Composing models do not search concurrency

# max_batch_size
max_batch_size = analyzer._composing_search_parameters["add"].get_parameter(
"max_batch_size"
)
self.assertEqual(ParameterUsage.MODEL, max_batch_size.usage)
self.assertEqual(ParameterCategory.EXPONENTIAL, max_batch_size.category)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE),
max_batch_size.min_range,
)
self.assertEqual(
log2(default.DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE),
max_batch_size.max_range,
)

# concurrency
concurrency = analyzer._composing_search_parameters["sub"].get_parameter(
"concurrency"
)
self.assertIsNone(concurrency)

# instance_group
instance_group = analyzer._composing_search_parameters["sub"].get_parameter(
"instance_group"
)
self.assertEqual(ParameterUsage.MODEL, instance_group.usage)
self.assertEqual(ParameterCategory.INTEGER, instance_group.category)
self.assertEqual(
default.DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, instance_group.min_range
)
self.assertEqual(
default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range
)


if __name__ == "__main__":
unittest.main()
Loading