diff --git a/docs/config.md b/docs/config.md index 132ace31e..350b3f215 100644 --- a/docs/config.md +++ b/docs/config.md @@ -188,6 +188,9 @@ cpu_only_composing_models: # List of GPU UUIDs to be used for the profiling. Use 'all' to profile all the GPUs visible by CUDA [ gpus: | default: 'all' ] +# Disables DCGM (used to verify info about GPUs) +[ dcgm_disable: | default: false ] + # Search mode. Options are "brute", "quick", and "optuna" [ run_config_search_mode: | default: brute] diff --git a/model_analyzer/analyzer.py b/model_analyzer/analyzer.py index 39fc10f34..dfdcfb6d0 100755 --- a/model_analyzer/analyzer.py +++ b/model_analyzer/analyzer.py @@ -214,7 +214,11 @@ def _create_model_manager(self, client, gpus): def _get_server_only_metrics(self, client, gpus): if self._config.triton_launch_mode != "c_api": if not self._state_manager._starting_fresh_run: - if self._do_checkpoint_gpus_match(gpus): + if self._config.dcgm_disable: + logger.info( + "DCGM is disabled - cannot verify that GPU devices match checkpoint" + ) + elif self._do_checkpoint_gpus_match(gpus): logger.info( "GPU devices match checkpoint - skipping server metric acquisition" ) diff --git a/model_analyzer/config/input/config_command.py b/model_analyzer/config/input/config_command.py index 97990901c..866225d79 100755 --- a/model_analyzer/config/input/config_command.py +++ b/model_analyzer/config/input/config_command.py @@ -129,6 +129,7 @@ def _check_for_illegal_config_settings( self._check_for_bls_incompatibility(args, yaml_config) self._check_for_concurrency_rate_request_conflicts(args, yaml_config) self._check_for_config_search_rate_request_conflicts(args, yaml_config) + self._check_for_dcgm_disable_launch_mode_conflict(args, yaml_config) def _set_field_values( self, args: Namespace, yaml_config: Optional[Dict[str, List]] @@ -398,6 +399,19 @@ def _check_for_config_search_rate_request_conflicts( f"\nCannot have both `run-config-search-max-request-rate` and `run-config-search-min/max-concurrency` specified in the config/CLI." ) + def _check_for_dcgm_disable_launch_mode_conflict( + self, args: Namespace, yaml_config: Optional[Dict[str, List]] + ) -> None: + if self._get_config_value("dcgm_disable", args, yaml_config): + launch_mode = self._get_config_value( + "triton_launch_mode", args, yaml_config + ) + + if launch_mode != "remote": + raise TritonModelAnalyzerException( + f"\nIf `dcgm-disable` then `triton-launch-mode` must be set to remote" + ) + def _preprocess_and_verify_arguments(self): """ Enforces some rules on the config. diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index 7b2b557b6..a4a416aa3 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -45,6 +45,7 @@ DEFAULT_CLIENT_PROTOCOL, DEFAULT_COLLECT_CPU_METRICS, DEFAULT_CONCURRENCY_SWEEP_DISABLE, + DEFAULT_DCGM_DISABLE, DEFAULT_DURATION_SECONDS, DEFAULT_EXPORT_PATH, DEFAULT_FILENAME_MODEL_GPU, @@ -288,6 +289,16 @@ def _fill_config(self): description="Report GPU metrics, even when the model is `cpu_only`.", ) ) + self._add_config( + ConfigField( + "dcgm_disable", + field_type=ConfigPrimitive(bool), + flags=["--dcgm-disable"], + parser_args={"action": "store_true"}, + default_value=DEFAULT_DCGM_DISABLE, + description="Disables DCGM, which prevents obtaining information about GPUs", + ) + ) self._add_config( ConfigField( "skip_summary_reports", diff --git a/model_analyzer/config/input/config_defaults.py b/model_analyzer/config/input/config_defaults.py index 68d31fda4..f5b0272cf 100755 --- a/model_analyzer/config/input/config_defaults.py +++ b/model_analyzer/config/input/config_defaults.py @@ -62,6 +62,7 @@ DEFAULT_USE_CONCURRENCY_FORMULA = False DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False DEFAULT_CONCURRENCY_SWEEP_DISABLE = False +DEFAULT_DCGM_DISABLE = False DEFAULT_TRITON_LAUNCH_MODE = "local" DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.11-py3" DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000" diff --git a/model_analyzer/entrypoint.py b/model_analyzer/entrypoint.py index 284101889..67254e978 100755 --- a/model_analyzer/entrypoint.py +++ b/model_analyzer/entrypoint.py @@ -260,7 +260,10 @@ def main(): ) # Set up devices - gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus) + if config.dcgm_disable: + gpus = [] + else: + gpus = GPUDeviceFactory().verify_requested_gpus(config.gpus) # Check/create output model repository create_output_model_repository(config) diff --git a/model_analyzer/triton/server/server_factory.py b/model_analyzer/triton/server/server_factory.py index 241461701..a2231e6e3 100755 --- a/model_analyzer/triton/server/server_factory.py +++ b/model_analyzer/triton/server/server_factory.py @@ -126,7 +126,9 @@ def get_server_handle(config, gpus, use_model_repository=False): """ if config.triton_launch_mode == "remote": - server = TritonServerFactory._get_remote_server_handle(config) + server = TritonServerFactory._get_remote_server_handle( + config, print_warning_message=use_model_repository + ) elif config.triton_launch_mode == "local": server = TritonServerFactory._get_local_server_handle( config, gpus, use_model_repository=True @@ -147,7 +149,7 @@ def get_server_handle(config, gpus, use_model_repository=False): return server @staticmethod - def _get_remote_server_handle(config): + def _get_remote_server_handle(config, print_warning_message=True): triton_config = TritonServerConfig() triton_config.update_config(config.triton_server_flags) triton_config["model-repository"] = "remote-model-repository" @@ -155,14 +157,15 @@ def _get_remote_server_handle(config): server = TritonServerFactory.create_server_local( path=None, config=triton_config, gpus=[], log_path="" ) - logger.warning( - "GPU memory metrics reported in the remote mode are not" - " accurate. Model Analyzer uses Triton explicit model control to" - " load/unload models. Some frameworks do not release the GPU" - " memory even when the memory is not being used. Consider" - ' using the "local" or "docker" mode if you want to accurately' - " monitor the GPU memory usage for different models." - ) + if print_warning_message: + logger.warning( + "GPU memory metrics reported in the remote mode are not" + " accurate. Model Analyzer uses Triton explicit model control to" + " load/unload models. Some frameworks do not release the GPU" + " memory even when the memory is not being used. Consider" + ' using the "local" or "docker" mode if you want to accurately' + " monitor the GPU memory usage for different models." + ) return server diff --git a/tests/test_cli.py b/tests/test_cli.py index b0460d88c..612afed5f 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -67,6 +67,7 @@ def get_test_options(): OptionStruct("bool", "profile","--always-report-gpu-metrics"), OptionStruct("bool", "profile","--use-concurrency-formula"), OptionStruct("bool", "profile","--concurrency-sweep-disable"), + OptionStruct("bool", "profile","--dcgm-disable"), #Int/Float options diff --git a/tests/test_config.py b/tests/test_config.py index 3b42b8b05..fef50748d 100755 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2388,6 +2388,82 @@ def test_model_type_llm(self): config.inference_output_fields, DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS ) + def test_dcgm_disable_and_launch_mode(self): + """ + Test that launch mode is set to remote when dcgm is disabled + """ + + # Should raise an exception for docker, local, and c_api launch modes + args = [ + "model-analyzer", + "profile", + "--profile-models", + "modelA", + "--model-repository", + "cli-repository", + "-f", + "path-to-config-file", + "--dcgm-disable", + "--triton-launch-mode", + "docker", + ] + + yaml_content = "" + + with self.assertRaises(TritonModelAnalyzerException): + self._evaluate_config(args, yaml_content, subcommand="profile") + + args = [ + "model-analyzer", + "profile", + "--profile-models", + "modelA", + "--model-repository", + "cli-repository", + "-f", + "path-to-config-file", + "--dcgm-disable", + "--triton-launch-mode", + "local", + ] + + with self.assertRaises(TritonModelAnalyzerException): + self._evaluate_config(args, yaml_content, subcommand="profile") + + args = [ + "model-analyzer", + "profile", + "--profile-models", + "modelA", + "--model-repository", + "cli-repository", + "-f", + "path-to-config-file", + "--dcgm-disable", + "--triton-launch-mode", + "c_api", + ] + + with self.assertRaises(TritonModelAnalyzerException): + self._evaluate_config(args, yaml_content, subcommand="profile") + + # Should not raise an exception for remote mode + args = [ + "model-analyzer", + "profile", + "--profile-models", + "modelA", + "--model-repository", + "cli-repository", + "-f", + "path-to-config-file", + "--dcgm-disable", + "--triton-launch-mode", + "remote", + ] + + _ = self._evaluate_config(args, yaml_content, subcommand="profile") + def _test_request_rate_config_conflicts( self, base_args: List[Any], yaml_content: str ) -> None: