Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding option to disable DCGM when in remote mode #952

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion model_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,11 @@ def _create_model_manager(self, client, gpus):
def _get_server_only_metrics(self, client, gpus):
if self._config.triton_launch_mode != "c_api":
if not self._state_manager._starting_fresh_run:
if self._do_checkpoint_gpus_match(gpus):
if self._config.dcgm_disable:
logger.info(
"DCGM is disabled - cannot verify that GPU devices match checkpoint"
)
elif self._do_checkpoint_gpus_match(gpus):
logger.info(
"GPU devices match checkpoint - skipping server metric acquisition"
)
Expand Down
14 changes: 14 additions & 0 deletions model_analyzer/config/input/config_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def _check_for_illegal_config_settings(
self._check_for_bls_incompatibility(args, yaml_config)
self._check_for_concurrency_rate_request_conflicts(args, yaml_config)
self._check_for_config_search_rate_request_conflicts(args, yaml_config)
self._check_for_dcgm_disable_launch_mode_conflict(args, yaml_config)

def _set_field_values(
self, args: Namespace, yaml_config: Optional[Dict[str, List]]
Expand Down Expand Up @@ -398,6 +399,19 @@ def _check_for_config_search_rate_request_conflicts(
f"\nCannot have both `run-config-search-max-request-rate` and `run-config-search-min/max-concurrency` specified in the config/CLI."
)

def _check_for_dcgm_disable_launch_mode_conflict(
self, args: Namespace, yaml_config: Optional[Dict[str, List]]
) -> None:
if self._get_config_value("dcgm_disable", args, yaml_config):
launch_mode = self._get_config_value(
"triton_launch_mode", args, yaml_config
)

if launch_mode != "remote":
raise TritonModelAnalyzerException(
f"\nIf `dcgm-disable` then `triton-launch-mode` must be set to remote"
)

def _preprocess_and_verify_arguments(self):
"""
Enforces some rules on the config.
Expand Down
11 changes: 11 additions & 0 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
DEFAULT_CLIENT_PROTOCOL,
DEFAULT_COLLECT_CPU_METRICS,
DEFAULT_CONCURRENCY_SWEEP_DISABLE,
DEFAULT_DCGM_DISABLE,
DEFAULT_DURATION_SECONDS,
DEFAULT_EXPORT_PATH,
DEFAULT_FILENAME_MODEL_GPU,
Expand Down Expand Up @@ -288,6 +289,16 @@ def _fill_config(self):
description="Report GPU metrics, even when the model is `cpu_only`.",
)
)
self._add_config(
ConfigField(
"dcgm_disable",
field_type=ConfigPrimitive(bool),
flags=["--dcgm-disable"],
parser_args={"action": "store_true"},
default_value=DEFAULT_DCGM_DISABLE,
description="Disables DCGM, which prevents obtaining information about GPUs",
)
)
self._add_config(
ConfigField(
"skip_summary_reports",
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
DEFAULT_USE_CONCURRENCY_FORMULA = False
DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
DEFAULT_DCGM_DISABLE = False
DEFAULT_TRITON_LAUNCH_MODE = "local"
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.11-py3"
DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
Expand Down
6 changes: 5 additions & 1 deletion model_analyzer/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,11 @@
)

# Set up devices
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
foo = config.dcgm_disable
Fixed Show fixed Hide fixed
if config.dcgm_disable:
gpus = []
else:
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)

# Check/create output model repository
create_output_model_repository(config)
Expand Down
23 changes: 13 additions & 10 deletions model_analyzer/triton/server/server_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def get_server_handle(config, gpus, use_model_repository=False):
"""

if config.triton_launch_mode == "remote":
server = TritonServerFactory._get_remote_server_handle(config)
server = TritonServerFactory._get_remote_server_handle(
config, print_warning_message=use_model_repository
)
elif config.triton_launch_mode == "local":
server = TritonServerFactory._get_local_server_handle(
config, gpus, use_model_repository=True
Expand All @@ -147,22 +149,23 @@ def get_server_handle(config, gpus, use_model_repository=False):
return server

@staticmethod
def _get_remote_server_handle(config):
def _get_remote_server_handle(config, print_warning_message=True):
triton_config = TritonServerConfig()
triton_config.update_config(config.triton_server_flags)
triton_config["model-repository"] = "remote-model-repository"
logger.info("Using remote Triton Server")
server = TritonServerFactory.create_server_local(
path=None, config=triton_config, gpus=[], log_path=""
)
logger.warning(
"GPU memory metrics reported in the remote mode are not"
" accurate. Model Analyzer uses Triton explicit model control to"
" load/unload models. Some frameworks do not release the GPU"
" memory even when the memory is not being used. Consider"
' using the "local" or "docker" mode if you want to accurately'
" monitor the GPU memory usage for different models."
)
if print_warning_message:
logger.warning(
"GPU memory metrics reported in the remote mode are not"
" accurate. Model Analyzer uses Triton explicit model control to"
" load/unload models. Some frameworks do not release the GPU"
" memory even when the memory is not being used. Consider"
' using the "local" or "docker" mode if you want to accurately'
" monitor the GPU memory usage for different models."
)

return server

Expand Down
1 change: 1 addition & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def get_test_options():
OptionStruct("bool", "profile","--always-report-gpu-metrics"),
OptionStruct("bool", "profile","--use-concurrency-formula"),
OptionStruct("bool", "profile","--concurrency-sweep-disable"),
OptionStruct("bool", "profile","--dcgm-disable"),


#Int/Float options
Expand Down
76 changes: 76 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,82 @@ def test_model_type_llm(self):
config.inference_output_fields, DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS
)

def test_dcgm_disable_and_launch_mode(self):
"""
Test that launch mode is set to remote when dcgm is disabled
"""

# Should raise an exception for docker, local, and c_api launch modes
args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"docker",
]

yaml_content = ""

with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"local",
]

with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"c_api",
]

with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

# Should not raise an exception for remote mode
args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"remote",
]

_ = self._evaluate_config(args, yaml_content, subcommand="profile")

def _test_request_rate_config_conflicts(
self, base_args: List[Any], yaml_content: str
) -> None:
Expand Down
Loading