Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Connect up parameter description class #869

Merged
merged 9 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion model_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
import logging
import sys
from copy import deepcopy
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union

from model_analyzer.cli.cli import CLI
from model_analyzer.config.generate.base_model_config_generator import (
BaseModelConfigGenerator,
)
from model_analyzer.config.generate.config_parameters import ConfigParameters
from model_analyzer.constants import LOGGER_NAME, PA_ERROR_LOG_FILENAME
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
from model_analyzer.triton.server.server import TritonServer
Expand Down Expand Up @@ -82,6 +83,8 @@ def __init__(
constraint_manager=self._constraint_manager,
)

self._search_parameters: Dict[str, ConfigParameters] = {}

def profile(
self, client: TritonClient, gpus: List[GPUDevice], mode: str, verbose: bool
) -> None:
Expand Down Expand Up @@ -115,6 +118,7 @@ def profile(

self._create_metrics_manager(client, gpus)
self._create_model_manager(client, gpus)
self._populate_search_parameters()

if self._config.triton_launch_mode == "remote":
self._warn_if_other_models_loaded_on_remote_server(client)
Expand Down Expand Up @@ -414,3 +418,9 @@ def _warn_if_other_models_loaded_on_remote_server(self, client):
f"A model not being profiled ({model_name}) is loaded on the remote Tritonserver. "
"This could impact the profile results."
)

def _populate_search_parameters(self):
for model in self._config.profile_models:
self._search_parameters[model.model_name()] = ConfigParameters(
self._config, model.parameters(), model.model_config_parameters()
)
2 changes: 1 addition & 1 deletion model_analyzer/config/generate/config_parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ class ConfigParameter:
max_range: Optional[int] = None

# This is only applicable to LIST category
enumerated_list: List[Any] = []
enumerated_list: Optional[List[Any]] = None
213 changes: 196 additions & 17 deletions model_analyzer/config/generate/config_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, List, Optional, Tuple
from math import log2
from typing import Any, Dict, List, Optional, Tuple

from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException

from .config_parameter import ConfigParameter, ParameterCategory, ParameterType
Expand All @@ -26,23 +28,26 @@
Contains information about all configuration parameters the user wants to search
"""

def __init__(self):
self._parameters: Dict[str, ConfigParameter] = {}
# These map to the run-config-search fields
# See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md
exponential_rcs_parameters = ["batch_sizes", "concurrency"]
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
linear_rcs_parameters = ["instance_group"]

def add_parameter(
model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"]
runtime_parameters = ["concurrency"]

def __init__(
self,
name: str,
ptype: ParameterType,
category: ParameterCategory,
min_range: Optional[int] = None,
max_range: Optional[int] = None,
enumerated_list: List[Any] = [],
) -> None:
self._check_for_illegal_input(category, min_range, max_range, enumerated_list)
config: ConfigCommandProfile,
parameters: Dict[str, Any] = {},
model_config_parameters: Dict[str, Any] = {},
):
self._config = config
self._parameters = parameters
self._model_config_parameters = model_config_parameters
self._search_parameters: Dict[str, ConfigParameter] = {}

self._parameters[name] = ConfigParameter(
ptype, category, min_range, max_range, enumerated_list
)
self._populate_search_parameters()

def get_parameter(self, name: str) -> ConfigParameter:
return self._parameters[name]
Expand All @@ -53,12 +58,186 @@
def get_category(self, name: str) -> ParameterCategory:
return self._parameters[name].category

def get_range(self, name: str) -> Tuple[int, int]:
def get_range(self, name: str) -> Tuple[Optional[int], Optional[int]]:
return (self._parameters[name].min_range, self._parameters[name].max_range)

def get_list(self, name: str) -> List[Any]:
def get_list(self, name: str) -> Optional[List[Any]]:
return self._parameters[name].enumerated_list

def _populate_search_parameters(self) -> None:
self._populate_parameters()
self._populate_model_config_parameters()

def _populate_parameters(self) -> None:
self._populate_batch_sizes()
self._populate_concurrency()
# TODO: Populate request rate - TMA-1903

def _populate_model_config_parameters(self) -> None:
self._populate_instance_group()
self._populate_max_queue_delay_microseconds()

def _populate_batch_sizes(self) -> None:
if self._parameters["batch_sizes"]:
self._populate_list_parameter(
parameter_name="batch_sizes",
parameter_list=self._parameters["batch_sizes"],
)
else:
self._populate_rcs_parameter(
parameter_name="batch_sizes",
rcs_parameter_min_value=self._config.run_config_search_min_model_batch_size,
rcs_parameter_max_value=self._config.run_config_search_max_model_batch_size,
)

def _populate_concurrency(self) -> None:
if self._parameters["concurrency"]:
self._populate_list_parameter(
parameter_name="concurrency",
parameter_list=self._parameters["concurrency"],
)
else:
self._populate_rcs_parameter(
parameter_name="concurrency",
rcs_parameter_min_value=self._config.run_config_search_min_concurrency,
rcs_parameter_max_value=self._config.run_config_search_max_concurrency,
)

def _populate_instance_group(self) -> None:
# Example config format:
#
# model_config_parameters:
# instance_group:
# - kind: KIND_GPU
# count: [1, 2, 3, 4]

# Need to populate instance_group based on RCS min/max values
# even if no model config parameters are present
if not self._model_config_parameters:
self._populate_rcs_parameter(
parameter_name="instance_group",
rcs_parameter_min_value=self._config.run_config_search_min_instance_count,
rcs_parameter_max_value=self._config.run_config_search_max_instance_count,
)
elif "instance_group" in self._model_config_parameters.keys():
parameter_list = self._model_config_parameters["instance_group"][0][0][
"count"
]

self._populate_list_parameter(
parameter_name="instance_group",
parameter_list=parameter_list,
)
else:
self._populate_rcs_parameter(
parameter_name="instance_group",
rcs_parameter_min_value=self._config.run_config_search_min_instance_count,
rcs_parameter_max_value=self._config.run_config_search_max_instance_count,
)

def _populate_max_queue_delay_microseconds(self) -> None:
# Example format
#
# model_config_parameters:
# dynamic_batching:
# max_queue_delay_microseconds: [100, 200, 300]

# There is no RCS field for max_queue_delay_microseconds
if self._is_max_queue_delay_in_model_config_parameters():
self._populate_list_parameter(
parameter_name="max_queue_delay_microseconds",
parameter_list=self._model_config_parameters["dynamic_batching"][0][
"max_queue_delay_microseconds"
],
)

def _is_max_queue_delay_in_model_config_parameters(self) -> bool:
if self._model_config_parameters:
max_queue_delay_present = (
"dynamic_batching" in self._model_config_parameters.keys()
and (
"max_queue_delay_microseconds"
in self._model_config_parameters["dynamic_batching"][0]
)
)
else:
max_queue_delay_present = False

return max_queue_delay_present

def _populate_list_parameter(
self,
parameter_name: str,
parameter_list: List[int],
) -> None:
ptype = self._determine_parameter_type(parameter_name)

self._add_parameter(
name=parameter_name,
ptype=ptype,
category=ParameterCategory.LIST,
enumerated_list=parameter_list,
)

def _populate_rcs_parameter(
self,
parameter_name: str,
rcs_parameter_min_value: int,
rcs_parameter_max_value: int,
) -> None:
ptype = self._determine_parameter_type(parameter_name)
category = self._determine_parameter_category(parameter_name)

if category == ParameterCategory.EXPONENTIAL:
min_range = int(log2(rcs_parameter_min_value)) # type: ignore
max_range = int(log2(rcs_parameter_max_value)) # type: ignore
else:
min_range = rcs_parameter_min_value # type: ignore
max_range = rcs_parameter_max_value # type: ignore

self._add_parameter(
name=parameter_name,
ptype=ptype,
category=category,
min_range=min_range,
max_range=max_range,
)

def _determine_parameter_category(self, name: str) -> ParameterCategory:
if name in ConfigParameters.exponential_rcs_parameters:
category = ParameterCategory.EXPONENTIAL
elif name in ConfigParameters.linear_rcs_parameters:
category = ParameterCategory.INTEGER
else:
TritonModelAnalyzerException(f"ParameterCategory not found for {name}")

return category

def _determine_parameter_type(self, name: str) -> ParameterType:
if name in ConfigParameters.model_parameters:
ptype = ParameterType.MODEL
elif name in ConfigParameters.runtime_parameters:
ptype = ParameterType.RUNTIME
else:
TritonModelAnalyzerException(f"ParameterType not found for {name}")

return ptype

def _add_parameter(
self,
name: str,
ptype: ParameterType,
category: ParameterCategory,
min_range: Optional[int] = None,
max_range: Optional[int] = None,
enumerated_list: List[Any] = [],
) -> None:
self._check_for_illegal_input(category, min_range, max_range, enumerated_list)

self._parameters[name] = ConfigParameter(
Fixed Show fixed Hide fixed
ptype, category, min_range, max_range, enumerated_list
)

def _check_for_illegal_input(
self,
category: ParameterCategory,
Expand Down
4 changes: 2 additions & 2 deletions model_analyzer/config/input/config_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,10 @@ def _check_quick_search_model_config_parameters_combinations(self) -> None:
if not "profile_models" in config:
return

if config["run_config_search_mode"] != "quick":
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
if config["run_config_search_mode"].value() != "quick":
return

profile_models = config()["profile_models"].value()
profile_models = config["profile_models"].value()
for model in profile_models:
model_config_params = deepcopy(model.model_config_parameters())
if model_config_params:
Expand Down
27 changes: 21 additions & 6 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1529,19 +1529,34 @@ def _autofill_values(self):

# Run parameters
if not model.parameters():
new_model["parameters"] = {
"batch_sizes": self.batch_sizes,
"concurrency": self.concurrency,
"request_rate": self.request_rate,
}
if self.run_config_search_mode != "optuna":
new_model["parameters"] = {
"batch_sizes": self.batch_sizes,
"concurrency": self.concurrency,
"request_rate": self.request_rate,
}
else:
if self._fields["batch_sizes"].is_set_by_user():
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
new_model["parameters"] = {"batch_sizes": self.batch_sizes}
else:
new_model["parameters"] = {"batch_sizes": []}

new_model["parameters"]["concurrency"] = self.concurrency
new_model["parameters"]["request_rate"] = self.request_rate

else:
new_model["parameters"] = {}
if "batch_sizes" in model.parameters():
new_model["parameters"].update(
{"batch_sizes": model.parameters()["batch_sizes"]}
)
else:
new_model["parameters"].update({"batch_sizes": self.batch_sizes})
if self.run_config_search_mode != "optuna":
new_model["parameters"].update(
{"batch_sizes": self.batch_sizes}
)
else:
new_model["parameters"].update({"batch_sizes": []})

if "concurrency" in model.parameters():
new_model["parameters"].update(
Expand Down
Loading
Loading