Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Connect up parameter description class #869

Merged
merged 9 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion model_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
import logging
import sys
from copy import deepcopy
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union

from model_analyzer.cli.cli import CLI
from model_analyzer.config.generate.base_model_config_generator import (
BaseModelConfigGenerator,
)
from model_analyzer.config.generate.config_parameters import ConfigParameters
from model_analyzer.constants import LOGGER_NAME, PA_ERROR_LOG_FILENAME
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
from model_analyzer.triton.server.server import TritonServer
Expand Down Expand Up @@ -82,6 +83,8 @@ def __init__(
constraint_manager=self._constraint_manager,
)

self._search_parameters: Dict[str, ConfigParameters] = {}

def profile(
self, client: TritonClient, gpus: List[GPUDevice], mode: str, verbose: bool
) -> None:
Expand Down Expand Up @@ -115,6 +118,7 @@ def profile(

self._create_metrics_manager(client, gpus)
self._create_model_manager(client, gpus)
self._populate_search_parameters()

if self._config.triton_launch_mode == "remote":
self._warn_if_other_models_loaded_on_remote_server(client)
Expand Down Expand Up @@ -414,3 +418,9 @@ def _warn_if_other_models_loaded_on_remote_server(self, client):
f"A model not being profiled ({model_name}) is loaded on the remote Tritonserver. "
"This could impact the profile results."
)

def _populate_search_parameters(self):
for model in self._config.profile_models:
self._search_parameters[model.model_name()] = ConfigParameters(
self._config, model.parameters(), model.model_config_parameters()
)
2 changes: 1 addition & 1 deletion model_analyzer/config/generate/config_parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ class ConfigParameter:
max_range: Optional[int] = None

# This is only applicable to LIST category
enumerated_list: List[Any] = []
enumerated_list: Optional[List[Any]] = None
169 changes: 152 additions & 17 deletions model_analyzer/config/generate/config_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any, List, Optional, Tuple
from math import log2
from typing import Any, Dict, List, Optional, Tuple

from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException

from .config_parameter import ConfigParameter, ParameterCategory, ParameterType
Expand All @@ -26,23 +28,22 @@ class ConfigParameters:
Contains information about all configuration parameters the user wants to search
"""

def __init__(self):
self._parameters: Dict[str, ConfigParameter] = {}
exponential_parameters = ["batch_sizes", "concurrency"]
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
linear_parameters = ["instance_group"]
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved

model_parameters = ["batch_sizes", "instance_group", "max_queue_delay_microseconds"]
runtime_parameters = ["concurrency"]

def add_parameter(
def __init__(
self,
name: str,
ptype: ParameterType,
category: ParameterCategory,
min_range: Optional[int] = None,
max_range: Optional[int] = None,
enumerated_list: List[Any] = [],
) -> None:
self._check_for_illegal_input(category, min_range, max_range, enumerated_list)
config: Optional[ConfigCommandProfile] = None,
parameters: Dict[str, Any] = {},
model_config_parameters: Dict[str, Any] = {},
):
self._parameters: Dict[str, ConfigParameter] = {}

self._parameters[name] = ConfigParameter(
ptype, category, min_range, max_range, enumerated_list
)
if config:
self.populate_search_parameters(config, parameters, model_config_parameters)

def get_parameter(self, name: str) -> ConfigParameter:
return self._parameters[name]
Expand All @@ -53,12 +54,146 @@ def get_type(self, name: str) -> ParameterType:
def get_category(self, name: str) -> ParameterCategory:
return self._parameters[name].category

def get_range(self, name: str) -> Tuple[int, int]:
def get_range(self, name: str) -> Tuple[Optional[int], Optional[int]]:
return (self._parameters[name].min_range, self._parameters[name].max_range)

def get_list(self, name: str) -> List[Any]:
def get_list(self, name: str) -> Optional[List[Any]]:
return self._parameters[name].enumerated_list

def populate_search_parameters(
self,
config: ConfigCommandProfile,
parameters: Dict[str, Any],
model_config_parameters: Dict[str, Any],
) -> None:
self._populate_parameters(config, parameters)
self._populate_model_config_parameters(config, model_config_parameters)

def _populate_parameters(
self,
config: ConfigCommandProfile,
parameters: Dict[str, Any],
) -> None:
self._populate_parameter(
parameter_name="batch_sizes",
rcs_min_value=config.run_config_search_min_model_batch_size,
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
rcs_max_value=config.run_config_search_max_model_batch_size,
parameter_list=parameters["batch_sizes"],
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
)
# TODO: Figure out how to use request rate
self._populate_parameter(
parameter_name="concurrency",
rcs_min_value=config.run_config_search_min_concurrency,
rcs_max_value=config.run_config_search_max_concurrency,
parameter_list=parameters["concurrency"],
)

def _populate_parameter(
self,
parameter_name: str,
rcs_min_value: Optional[int] = None,
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
rcs_max_value: Optional[int] = None,
parameter_list: Optional[List[int]] = None,
) -> None:
ptype = self._determine_parameter_type(parameter_name)

if parameter_list:
self._add_parameter(
name=parameter_name,
ptype=ptype,
category=ParameterCategory.LIST,
enumerated_list=parameter_list,
)
else:
category = self._determine_parameter_category(parameter_name)

if category == ParameterCategory.EXPONENTIAL:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic appears to be very trusting that things are done right elsewhere. Do we want to add safeguards?

If a parameter list is passed, do the RCS values get silently ignored? I thought all four parameters (including parameter_list) are passed elsewhere, which means the RCS values would be ignored. If a parameter list is not passed, it feels like the error is a bit misleading, since what if it's not passed but the parameter isn't supposed to be exponential/linear. I think some refactoring needs to be done here. I think this can exist as one function, but with increased documentation to understand what is happening and safeguarding to prevent logic errors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rule is that if the user specifies a list then we use that, else we use RCS values to create a min/max search space.

I've added a comment to reflect this.

min_range = int(log2(rcs_min_value)) # type: ignore
max_range = int(log2(rcs_max_value)) # type: ignore
else:
min_range = rcs_min_value # type: ignore
max_range = rcs_max_value # type: ignore

self._add_parameter(
name=parameter_name,
ptype=ptype,
category=category,
min_range=min_range,
max_range=max_range,
)

def _determine_parameter_category(self, name: str) -> ParameterCategory:
if name in ConfigParameters.exponential_parameters:
category = ParameterCategory.EXPONENTIAL
elif name in ConfigParameters.linear_parameters:
category = ParameterCategory.INTEGER
else:
TritonModelAnalyzerException(f"ParameterCategory not found for {name}")

return category

def _determine_parameter_type(self, name: str) -> ParameterType:
if name in ConfigParameters.model_parameters:
ptype = ParameterType.MODEL
elif name in ConfigParameters.runtime_parameters:
ptype = ParameterType.RUNTIME
else:
TritonModelAnalyzerException(f"ParameterType not found for {name}")

return ptype

def _populate_model_config_parameters(
self, config: ConfigCommandProfile, model_config_parameters: Dict[str, Any]
) -> None:
# Need to populate instance_group based on RCS min/max values
# even if no model config parameters are present
if not model_config_parameters:
self._populate_parameter(
parameter_name="instance_group",
rcs_min_value=config.run_config_search_min_instance_count,
rcs_max_value=config.run_config_search_max_instance_count,
)
return

if "instance_group" in model_config_parameters.keys():
parameter_list = model_config_parameters["instance_group"][0][0]["count"]
else:
parameter_list = None

self._populate_parameter(
parameter_name="instance_group",
rcs_min_value=config.run_config_search_min_instance_count,
rcs_max_value=config.run_config_search_max_instance_count,
parameter_list=parameter_list,
)

if "dynamic_batching" in model_config_parameters.keys():
if (
"max_queue_delay_microseconds"
in model_config_parameters["dynamic_batching"][0]
):
self._populate_parameter(
parameter_name="max_queue_delay_microseconds",
parameter_list=model_config_parameters["dynamic_batching"][0][
"max_queue_delay_microseconds"
],
)

def _add_parameter(
self,
name: str,
ptype: ParameterType,
category: ParameterCategory,
min_range: Optional[int] = None,
max_range: Optional[int] = None,
enumerated_list: List[Any] = [],
) -> None:
self._check_for_illegal_input(category, min_range, max_range, enumerated_list)

self._parameters[name] = ConfigParameter(
Fixed Show fixed Hide fixed
ptype, category, min_range, max_range, enumerated_list
)

def _check_for_illegal_input(
self,
category: ParameterCategory,
Expand Down
4 changes: 2 additions & 2 deletions model_analyzer/config/input/config_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,10 @@ def _check_quick_search_model_config_parameters_combinations(self) -> None:
if not "profile_models" in config:
return

if config["run_config_search_mode"] != "quick":
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
if config["run_config_search_mode"].value() != "quick":
return

profile_models = config()["profile_models"].value()
profile_models = config["profile_models"].value()
for model in profile_models:
model_config_params = deepcopy(model.model_config_parameters())
if model_config_params:
Expand Down
27 changes: 21 additions & 6 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1529,19 +1529,34 @@ def _autofill_values(self):

# Run parameters
if not model.parameters():
new_model["parameters"] = {
"batch_sizes": self.batch_sizes,
"concurrency": self.concurrency,
"request_rate": self.request_rate,
}
if self.run_config_search_mode != "optuna":
new_model["parameters"] = {
"batch_sizes": self.batch_sizes,
"concurrency": self.concurrency,
"request_rate": self.request_rate,
}
else:
if self._fields["batch_sizes"].is_set_by_user():
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
new_model["parameters"] = {"batch_sizes": self.batch_sizes}
else:
new_model["parameters"] = {"batch_sizes": []}

new_model["parameters"]["concurrency"] = self.concurrency
new_model["parameters"]["request_rate"] = self.request_rate

else:
new_model["parameters"] = {}
if "batch_sizes" in model.parameters():
new_model["parameters"].update(
{"batch_sizes": model.parameters()["batch_sizes"]}
)
else:
new_model["parameters"].update({"batch_sizes": self.batch_sizes})
if self.run_config_search_mode != "optuna":
new_model["parameters"].update(
{"batch_sizes": self.batch_sizes}
)
else:
new_model["parameters"].update({"batch_sizes": []})

if "concurrency" in model.parameters():
new_model["parameters"].update(
Expand Down
Loading
Loading