Skip to content

Commit

Permalink
Optuna Search Mode (Alpha) Release (#896)
Browse files Browse the repository at this point in the history
* Adding cli option for optuna search (#867)

* Adding cli option for optuna search

* Changed RCS description

* Class to hold info about parameters (#868)

* Initial code for ConfigParameters class

* Fixing codeql issue

* Fixes based on review

* Connect up parameter description class (#869)

* Added hooks for creating search parameters with some basic unit testing

* Adding more unit testing

* Cleaning up codeql

* Adding story ref for TODO

* Changes based on review comments

* Refactored ConfigParameters

* Renaming to SearchParameter(s)

* Moving unit testing into SearchParameters test class

* Fix codeql issues

* Creating Optuna RCG factory (#878)

* Creating optuna RCG factory

* fixing codeql issues

* Removing metrics manager

* Fixing mypy failure

* Optuna Search Class (#877)

* Base Optuna class plus unit testing

* codeql fixes

* more codeql fixes

* Removing metrics manager

* Removing metrics manager from Optuna RCG unit test

* Removing client from quick/optuna RCGs

* Changing gpus to gpu_count in quick/optuna RCGs

* Removing magic number

* Fixing codeql issue

* Fixing optuna version

* Adding todo comment about client batch size support

* Using SearchParameters in OptunaRCG (#881)

* Using SearchParameters in OptunaRCG

* Fixing search parameter unit tests

* Removing debug line

* Changes based on PR

* Adding call for default parameters

* Added todo for dynamic batching

* Add Percentage Search Space to Optuna (#882)

* Added method for calculating total possible configurations

* Added min/max percentage of search space to CLI

* Connected up in optuna RCG

* Added in support to cap optuna search based on a strict number of trials (#884)

* Adding support for concurrency formula as an option in Optuna search (#885)

* Fixing merge confilct

* Adding --use-concurrency-formula to unit testing

* Add Debug info to Optuna (#889)

* Adding debug info + bug fixes

* Fixes based on PR

* Optuna Early Exit (#890)

* Add logic to enable early exit along with CLI hooks.

* Changes based on PR

* Check that model supports dynamic batching when creating param_combo (#891)

* Adding option to disable concurrency sweeping (#893)

* Adding support for client batch size (#892)

* Adding support for client batch size

* Fixes based on PR

* Removing redundant keys()

* Fixing codeQL issue

* Attempt to fix unittest issue

* Removing 3.8 testing
  • Loading branch information
nv-braf authored Jun 7, 2024
1 parent ef12a85 commit acf085f
Show file tree
Hide file tree
Showing 21 changed files with 2,114 additions and 81 deletions.
28 changes: 14 additions & 14 deletions .github/workflows/python-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,21 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-22.04"]
python-version: ["3.8", "3.11"]
python-version: ["3.11"]
env:
SKIP_GPU_TESTS: 1

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .
- name: Test with unittest
run: |
pip install unittest-parallel
python3 -m unittest_parallel -v -s ./tests -t .
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .
- name: Test with unittest
run: |
pip install unittest-parallel
python3 -m unittest_parallel -v -s ./tests -t .
13 changes: 12 additions & 1 deletion model_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
import logging
import sys
from copy import deepcopy
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union

from model_analyzer.cli.cli import CLI
from model_analyzer.config.generate.base_model_config_generator import (
BaseModelConfigGenerator,
)
from model_analyzer.config.generate.search_parameters import SearchParameters
from model_analyzer.constants import LOGGER_NAME, PA_ERROR_LOG_FILENAME
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
from model_analyzer.triton.server.server import TritonServer
Expand Down Expand Up @@ -82,6 +83,8 @@ def __init__(
constraint_manager=self._constraint_manager,
)

self._search_parameters: Dict[str, SearchParameters] = {}

def profile(
self, client: TritonClient, gpus: List[GPUDevice], mode: str, verbose: bool
) -> None:
Expand Down Expand Up @@ -115,6 +118,7 @@ def profile(

self._create_metrics_manager(client, gpus)
self._create_model_manager(client, gpus)
self._populate_search_parameters()

if self._config.triton_launch_mode == "remote":
self._warn_if_other_models_loaded_on_remote_server(client)
Expand Down Expand Up @@ -200,6 +204,7 @@ def _create_model_manager(self, client, gpus):
metrics_manager=self._metrics_manager,
state_manager=self._state_manager,
constraint_manager=self._constraint_manager,
search_parameters=self._search_parameters,
)

def _get_server_only_metrics(self, client, gpus):
Expand Down Expand Up @@ -414,3 +419,9 @@ def _warn_if_other_models_loaded_on_remote_server(self, client):
f"A model not being profiled ({model_name}) is loaded on the remote Tritonserver. "
"This could impact the profile results."
)

def _populate_search_parameters(self):
for model in self._config.profile_models:
self._search_parameters[model.model_name()] = SearchParameters(
self._config, model.parameters(), model.model_config_parameters()
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from copy import deepcopy
from typing import Dict, Generator, List, Optional

from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
from model_analyzer.config.generate.model_variant_name_manager import (
ModelVariantNameManager,
)
from model_analyzer.config.generate.optuna_run_config_generator import (
OptunaRunConfigGenerator,
)
from model_analyzer.config.generate.search_parameters import SearchParameters
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.config.run.run_config import RunConfig
from model_analyzer.constants import LOGGER_NAME
from model_analyzer.result.parameter_search import ParameterSearch
from model_analyzer.result.result_manager import ResultManager
from model_analyzer.result.run_config_measurement import RunConfigMeasurement

from .config_generator_interface import ConfigGeneratorInterface

logger = logging.getLogger(LOGGER_NAME)


class OptunaPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
"""
First run OptunaConfigGenerator for an Optuna search, then use
ParameterSearch for a concurrency sweep + binary search of the default
and Top N results
"""

def __init__(
self,
config: ConfigCommandProfile,
gpu_count: int,
models: List[ModelProfileSpec],
result_manager: ResultManager,
model_variant_name_manager: ModelVariantNameManager,
search_parameters: Dict[str, SearchParameters],
):
"""
Parameters
----------
config: ConfigCommandProfile
Profile configuration information
gpu_count: Number of gpus in the system
models: List of ModelProfileSpec
List of models to profile
result_manager: ResultManager
The object that handles storing and sorting the results from the perf analyzer
model_variant_name_manager: ModelVariantNameManager
Maps model variants to config names
search_parameters: SearchParameters
The object that handles the users configuration search parameters
"""
self._config = config
self._gpu_count = gpu_count
self._models = models
self._result_manager = result_manager
self._model_variant_name_manager = model_variant_name_manager
self._search_parameters = search_parameters

def set_last_results(
self, measurements: List[Optional[RunConfigMeasurement]]
) -> None:
self._last_measurement = measurements[-1]
self._rcg.set_last_results(measurements)

def get_configs(self) -> Generator[RunConfig, None, None]:
"""
Returns
-------
RunConfig
The next RunConfig generated by this class
"""

logger.info("")
logger.info("Starting Optuna mode search to find optimal configs")
logger.info("")
yield from self._execute_optuna_search()
logger.info("")
if self._config.concurrency_sweep_disable:
logger.info("Done with Optuna mode search.")
else:
logger.info(
"Done with Optuna mode search. Gathering concurrency sweep measurements for reports"
)
logger.info("")
yield from self._sweep_concurrency_over_top_results()
logger.info("")
logger.info("Done gathering concurrency sweep measurements for reports")
logger.info("")

def _execute_optuna_search(self) -> Generator[RunConfig, None, None]:
self._rcg: ConfigGeneratorInterface = self._create_optuna_run_config_generator()

yield from self._rcg.get_configs()

def _create_optuna_run_config_generator(self) -> OptunaRunConfigGenerator:
return OptunaRunConfigGenerator(
config=self._config,
gpu_count=self._gpu_count,
models=self._models,
model_variant_name_manager=self._model_variant_name_manager,
search_parameters=self._search_parameters,
)

def _sweep_concurrency_over_top_results(self) -> Generator[RunConfig, None, None]:
for model_name in self._result_manager.get_model_names():
top_results = self._result_manager.top_n_results(
model_name=model_name,
n=self._config.num_configs_per_model,
include_default=True,
)

for result in top_results:
run_config = deepcopy(result.run_config())
parameter_search = ParameterSearch(self._config)
for concurrency in parameter_search.search_parameters():
run_config = self._set_concurrency(run_config, concurrency)
yield run_config
parameter_search.add_run_config_measurement(self._last_measurement)

def _set_concurrency(self, run_config: RunConfig, concurrency: int) -> RunConfig:
for model_run_config in run_config.model_run_configs():
perf_config = model_run_config.perf_config()
perf_config.update_config({"concurrency-range": concurrency})

return run_config
Loading

0 comments on commit acf085f

Please sign in to comment.