Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Option to disable concurrency sweeping #893

Merged
merged 1 commit into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,16 @@ def get_configs(self) -> Generator[RunConfig, None, None]:
logger.info("")
yield from self._execute_optuna_search()
logger.info("")
logger.info(
"Done with Optuna mode search. Gathering concurrency sweep measurements for reports"
)
logger.info("")
yield from self._sweep_concurrency_over_top_results()
logger.info("")
logger.info("Done gathering concurrency sweep measurements for reports")
if self._config.concurrency_sweep_disable:
logger.info("Done with Optuna mode search.")
else:
logger.info(
"Done with Optuna mode search. Gathering concurrency sweep measurements for reports"
)
logger.info("")
yield from self._sweep_concurrency_over_top_results()
logger.info("")
logger.info("Done gathering concurrency sweep measurements for reports")
logger.info("")

def _execute_optuna_search(self) -> Generator[RunConfig, None, None]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,16 @@ def get_configs(self) -> Generator[RunConfig, None, None]:
logger.info("")
yield from self._execute_quick_search()
logger.info("")
logger.info(
"Done with quick mode search. Gathering concurrency sweep measurements for reports"
)
logger.info("")
yield from self._sweep_concurrency_over_top_results()
logger.info("")
logger.info("Done gathering concurrency sweep measurements for reports")
if self._config.concurrency_sweep_disable:
logger.info("Done with quick mode search.")
else:
logger.info(
"Done with quick mode search. Gathering concurrency sweep measurements for reports"
)
logger.info("")
yield from self._sweep_concurrency_over_top_results()
logger.info("")
logger.info("Done gathering concurrency sweep measurements for reports")
logger.info("")

def _execute_quick_search(self) -> Generator[RunConfig, None, None]:
Expand Down
11 changes: 11 additions & 0 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
DEFAULT_CHECKPOINT_DIRECTORY,
DEFAULT_CLIENT_PROTOCOL,
DEFAULT_COLLECT_CPU_METRICS,
DEFAULT_CONCURRENCY_SWEEP_DISABLE,
DEFAULT_DURATION_SECONDS,
DEFAULT_EXPORT_PATH,
DEFAULT_FILENAME_MODEL_GPU,
Expand Down Expand Up @@ -1012,6 +1013,16 @@ def _add_run_search_configs(self):
description="Enables the searching of request rate (instead of concurrency).",
)
)
self._add_config(
ConfigField(
"concurrency_sweep_disable",
flags=["--concurrency-sweep-disable"],
field_type=ConfigPrimitive(bool),
parser_args={"action": "store_true"},
default_value=DEFAULT_CONCURRENCY_SWEEP_DISABLE,
description="Disables the sweeping of concurrencies for the top-N models after quick/optuna search completion.",
)
)

def _add_triton_configs(self):
"""
Expand Down
1 change: 1 addition & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
DEFAULT_OPTUNA_MAX_TRIALS = 200
DEFAULT_USE_CONCURRENCY_FORMULA = False
DEFAULT_REQUEST_RATE_SEARCH_ENABLE = False
DEFAULT_CONCURRENCY_SWEEP_DISABLE = False
DEFAULT_TRITON_LAUNCH_MODE = "local"
DEFAULT_TRITON_DOCKER_IMAGE = "nvcr.io/nvidia/tritonserver:24.04-py3"
DEFAULT_TRITON_HTTP_ENDPOINT = "localhost:8000"
Expand Down
2 changes: 2 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def get_test_options():
OptionStruct("bool", "profile","--skip-detailed-reports"),
OptionStruct("bool", "profile","--always-report-gpu-metrics"),
OptionStruct("bool", "profile","--use-concurrency-formula"),
OptionStruct("bool", "profile","--concurrency-sweep-disable"),


#Int/Float options
# Options format:
Expand Down
Loading