Skip to content

Commit

Permalink
Adding option to disable DCGM when in remote mode (#952)
Browse files Browse the repository at this point in the history
* Adding option to disable DCGM when in remote mode.

* Updating documentation
  • Loading branch information
nv-braf authored Dec 20, 2024
1 parent a66700f commit 92c8386
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 12 deletions.
3 changes: 3 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ cpu_only_composing_models: <comma-delimited-string-list>
# List of GPU UUIDs to be used for the profiling. Use 'all' to profile all the GPUs visible by CUDA
[ gpus: <string|comma-delimited-list-string> | default: 'all' ]
# Disables DCGM (used to verify info about GPUs)
[ dcgm_disable: <bool> | default: false ]
# Search mode. Options are "brute", "quick", and "optuna"
[ run_config_search_mode: <string> | default: brute]
Expand Down
6 changes: 5 additions & 1 deletion model_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,11 @@ def _create_model_manager(self, client, gpus):
def _get_server_only_metrics(self, client, gpus):
if self._config.triton_launch_mode != "c_api":
if not self._state_manager._starting_fresh_run:
if self._do_checkpoint_gpus_match(gpus):
if self._config.dcgm_disable:
logger.info(
"DCGM is disabled - cannot verify that GPU devices match checkpoint"
)
elif self._do_checkpoint_gpus_match(gpus):
logger.info(
"GPU devices match checkpoint - skipping server metric acquisition"
)
Expand Down
14 changes: 14 additions & 0 deletions model_analyzer/config/input/config_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def _check_for_illegal_config_settings(
self._check_for_bls_incompatibility(args, yaml_config)
self._check_for_concurrency_rate_request_conflicts(args, yaml_config)
self._check_for_config_search_rate_request_conflicts(args, yaml_config)
self._check_for_dcgm_disable_launch_mode_conflict(args, yaml_config)

def _set_field_values(
self, args: Namespace, yaml_config: Optional[Dict[str, List]]
Expand Down Expand Up @@ -398,6 +399,19 @@ def _check_for_config_search_rate_request_conflicts(
f"\nCannot have both `run-config-search-max-request-rate` and `run-config-search-min/max-concurrency` specified in the config/CLI."
)

def _check_for_dcgm_disable_launch_mode_conflict(
self, args: Namespace, yaml_config: Optional[Dict[str, List]]
) -> None:
if self._get_config_value("dcgm_disable", args, yaml_config):
launch_mode = self._get_config_value(
"triton_launch_mode", args, yaml_config
)

if launch_mode != "remote":
raise TritonModelAnalyzerException(
f"\nIf `dcgm-disable` then `triton-launch-mode` must be set to remote"
)

def _preprocess_and_verify_arguments(self):
"""
Enforces some rules on the config.
Expand Down
11 changes: 11 additions & 0 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
DEFAULT_CLIENT_PROTOCOL,
DEFAULT_COLLECT_CPU_METRICS,
DEFAULT_CONCURRENCY_SWEEP_DISABLE,
DEFAULT_DCGM_DISABLE,
DEFAULT_DURATION_SECONDS,
DEFAULT_EXPORT_PATH,
DEFAULT_FILENAME_MODEL_GPU,
Expand Down Expand Up @@ -288,6 +289,16 @@ def _fill_config(self):
description="Report GPU metrics, even when the model is `cpu_only`.",
)
)
self._add_config(
ConfigField(
"dcgm_disable",
field_type=ConfigPrimitive(bool),
flags=["--dcgm-disable"],
parser_args={"action": "store_true"},
default_value=DEFAULT_DCGM_DISABLE,
description="Disables DCGM, which prevents obtaining information about GPUs",
)
)
self._add_config(
ConfigField(
"skip_summary_reports",
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
DEFAULT_USE_CONCURRENCY_FORMULA = False
DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
DEFAULT_DCGM_DISABLE = False
DEFAULT_TRITON_LAUNCH_MODE = "local"
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.11-py3"
DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
Expand Down
5 changes: 4 additions & 1 deletion model_analyzer/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,10 @@ def main():
)

# Set up devices
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)
if config.dcgm_disable:
gpus = []
else:
gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus)

# Check/create output model repository
create_output_model_repository(config)
Expand Down
23 changes: 13 additions & 10 deletions model_analyzer/triton/server/server_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def get_server_handle(config, gpus, use_model_repository=False):
"""

if config.triton_launch_mode == "remote":
server = TritonServerFactory._get_remote_server_handle(config)
server = TritonServerFactory._get_remote_server_handle(
config, print_warning_message=use_model_repository
)
elif config.triton_launch_mode == "local":
server = TritonServerFactory._get_local_server_handle(
config, gpus, use_model_repository=True
Expand All @@ -147,22 +149,23 @@ def get_server_handle(config, gpus, use_model_repository=False):
return server

@staticmethod
def _get_remote_server_handle(config):
def _get_remote_server_handle(config, print_warning_message=True):
triton_config = TritonServerConfig()
triton_config.update_config(config.triton_server_flags)
triton_config["model-repository"] = "remote-model-repository"
logger.info("Using remote Triton Server")
server = TritonServerFactory.create_server_local(
path=None, config=triton_config, gpus=[], log_path=""
)
logger.warning(
"GPU memory metrics reported in the remote mode are not"
" accurate. Model Analyzer uses Triton explicit model control to"
" load/unload models. Some frameworks do not release the GPU"
" memory even when the memory is not being used. Consider"
' using the "local" or "docker" mode if you want to accurately'
" monitor the GPU memory usage for different models."
)
if print_warning_message:
logger.warning(
"GPU memory metrics reported in the remote mode are not"
" accurate. Model Analyzer uses Triton explicit model control to"
" load/unload models. Some frameworks do not release the GPU"
" memory even when the memory is not being used. Consider"
' using the "local" or "docker" mode if you want to accurately'
" monitor the GPU memory usage for different models."
)

return server

Expand Down
1 change: 1 addition & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def get_test_options():
OptionStruct("bool", "profile","--always-report-gpu-metrics"),
OptionStruct("bool", "profile","--use-concurrency-formula"),
OptionStruct("bool", "profile","--concurrency-sweep-disable"),
OptionStruct("bool", "profile","--dcgm-disable"),


#Int/Float options
Expand Down
76 changes: 76 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2388,6 +2388,82 @@ def test_model_type_llm(self):
config.inference_output_fields, DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS
)

def test_dcgm_disable_and_launch_mode(self):
"""
Test that launch mode is set to remote when dcgm is disabled
"""

# Should raise an exception for docker, local, and c_api launch modes
args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"docker",
]

yaml_content = ""

with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"local",
]

with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"c_api",
]

with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

# Should not raise an exception for remote mode
args = [
"model-analyzer",
"profile",
"--profile-models",
"modelA",
"--model-repository",
"cli-repository",
"-f",
"path-to-config-file",
"--dcgm-disable",
"--triton-launch-mode",
"remote",
]

_ = self._evaluate_config(args, yaml_content, subcommand="profile")

def _test_request_rate_config_conflicts(
self, base_args: List[Any], yaml_content: str
) -> None:
Expand Down

0 comments on commit 92c8386

Please sign in to comment.