Optuna Search Mode (Alpha) Release (#896)

* Adding cli option for optuna search (#867) * Adding cli option for optuna search * Changed RCS description * Class to hold info about parameters (#868) * Initial code for ConfigParameters class * Fixing codeql issue * Fixes based on review * Connect up parameter description class (#869) * Added hooks for creating search parameters with some basic unit testing * Adding more unit testing * Cleaning up codeql * Adding story ref for TODO * Changes based on review comments * Refactored ConfigParameters * Renaming to SearchParameter(s) * Moving unit testing into SearchParameters test class * Fix codeql issues * Creating Optuna RCG factory (#878) * Creating optuna RCG factory * fixing codeql issues * Removing metrics manager * Fixing mypy failure * Optuna Search Class (#877) * Base Optuna class plus unit testing * codeql fixes * more codeql fixes * Removing metrics manager * Removing metrics manager from Optuna RCG unit test * Removing client from quick/optuna RCGs * Changing gpus to gpu_count in quick/optuna RCGs * Removing magic number * Fixing codeql issue * Fixing optuna version * Adding todo comment about client batch size support * Using SearchParameters in OptunaRCG (#881) * Using SearchParameters in OptunaRCG * Fixing search parameter unit tests * Removing debug line * Changes based on PR * Adding call for default parameters * Added todo for dynamic batching * Add Percentage Search Space to Optuna (#882) * Added method for calculating total possible configurations * Added min/max percentage of search space to CLI * Connected up in optuna RCG * Added in support to cap optuna search based on a strict number of trials (#884) * Adding support for concurrency formula as an option in Optuna search (#885) * Fixing merge confilct * Adding --use-concurrency-formula to unit testing * Add Debug info to Optuna (#889) * Adding debug info + bug fixes * Fixes based on PR * Optuna Early Exit (#890) * Add logic to enable early exit along with CLI hooks. * Changes based on PR * Check that model supports dynamic batching when creating param_combo (#891) * Adding option to disable concurrency sweeping (#893) * Adding support for client batch size (#892) * Adding support for client batch size * Fixes based on PR * Removing redundant keys() * Fixing codeQL issue * Attempt to fix unittest issue * Removing 3.8 testing
triton-inference-server · Jun 7, 2024 · acf085f · acf085f
1 parent ef12a85
commit acf085f
Show file tree

Hide file tree

Showing 21 changed files with 2,114 additions and 81 deletions.
diff --git a/.github/workflows/python-package.yaml b/.github/workflows/python-package.yaml
@@ -39,21 +39,21 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-22.04"]
-        python-version: ["3.8", "3.11"]
+        python-version: ["3.11"]
     env:
       SKIP_GPU_TESTS: 1
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v3
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install -e .
-    - name: Test with unittest
-      run: |
-        pip install unittest-parallel
-        python3 -m unittest_parallel -v -s ./tests -t .
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e .
+      - name: Test with unittest
+        run: |
+          pip install unittest-parallel
+          python3 -m unittest_parallel -v -s ./tests -t .
diff --git a/model_analyzer/analyzer.py b/model_analyzer/analyzer.py
@@ -17,12 +17,13 @@
 import logging
 import sys
 from copy import deepcopy
-from typing import List, Optional, Union
+from typing import Dict, List, Optional, Union
 
 from model_analyzer.cli.cli import CLI
 from model_analyzer.config.generate.base_model_config_generator import (
     BaseModelConfigGenerator,
 )
+from model_analyzer.config.generate.search_parameters import SearchParameters
 from model_analyzer.constants import LOGGER_NAME, PA_ERROR_LOG_FILENAME
 from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
 from model_analyzer.triton.server.server import TritonServer
@@ -82,6 +83,8 @@ def __init__(
             constraint_manager=self._constraint_manager,
         )
 
+        self._search_parameters: Dict[str, SearchParameters] = {}
+
     def profile(
         self, client: TritonClient, gpus: List[GPUDevice], mode: str, verbose: bool
     ) -> None:
@@ -115,6 +118,7 @@ def profile(
 
         self._create_metrics_manager(client, gpus)
         self._create_model_manager(client, gpus)
+        self._populate_search_parameters()
 
         if self._config.triton_launch_mode == "remote":
             self._warn_if_other_models_loaded_on_remote_server(client)
@@ -200,6 +204,7 @@ def _create_model_manager(self, client, gpus):
             metrics_manager=self._metrics_manager,
             state_manager=self._state_manager,
             constraint_manager=self._constraint_manager,
+            search_parameters=self._search_parameters,
         )
 
     def _get_server_only_metrics(self, client, gpus):
@@ -414,3 +419,9 @@ def _warn_if_other_models_loaded_on_remote_server(self, client):
                     f"A model not being profiled ({model_name}) is loaded on the remote Tritonserver. "
                     "This could impact the profile results."
                 )
+
+    def _populate_search_parameters(self):
+        for model in self._config.profile_models:
+            self._search_parameters[model.model_name()] = SearchParameters(
+                self._config, model.parameters(), model.model_config_parameters()
+            )
diff --git a/model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py b/model_analyzer/config/generate/optuna_plus_concurrency_sweep_run_config_generator.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from copy import deepcopy
+from typing import Dict, Generator, List, Optional
+
+from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
+from model_analyzer.config.generate.model_variant_name_manager import (
+    ModelVariantNameManager,
+)
+from model_analyzer.config.generate.optuna_run_config_generator import (
+    OptunaRunConfigGenerator,
+)
+from model_analyzer.config.generate.search_parameters import SearchParameters
+from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
+from model_analyzer.config.run.run_config import RunConfig
+from model_analyzer.constants import LOGGER_NAME
+from model_analyzer.result.parameter_search import ParameterSearch
+from model_analyzer.result.result_manager import ResultManager
+from model_analyzer.result.run_config_measurement import RunConfigMeasurement
+
+from .config_generator_interface import ConfigGeneratorInterface
+
+logger = logging.getLogger(LOGGER_NAME)
+
+
+class OptunaPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
+    """
+    First run OptunaConfigGenerator for an Optuna search, then use
+    ParameterSearch for a concurrency sweep + binary search of the default
+    and Top N results
+    """
+
+    def __init__(
+        self,
+        config: ConfigCommandProfile,
+        gpu_count: int,
+        models: List[ModelProfileSpec],
+        result_manager: ResultManager,
+        model_variant_name_manager: ModelVariantNameManager,
+        search_parameters: Dict[str, SearchParameters],
+    ):
+        """
+        Parameters
+        ----------
+        config: ConfigCommandProfile
+            Profile configuration information
+        gpu_count: Number of gpus in the system
+        models: List of ModelProfileSpec
+            List of models to profile
+        result_manager: ResultManager
+            The object that handles storing and sorting the results from the perf analyzer
+        model_variant_name_manager: ModelVariantNameManager
+            Maps model variants to config names
+        search_parameters: SearchParameters
+            The object that handles the users configuration search parameters
+        """
+        self._config = config
+        self._gpu_count = gpu_count
+        self._models = models
+        self._result_manager = result_manager
+        self._model_variant_name_manager = model_variant_name_manager
+        self._search_parameters = search_parameters
+
+    def set_last_results(
+        self, measurements: List[Optional[RunConfigMeasurement]]
+    ) -> None:
+        self._last_measurement = measurements[-1]
+        self._rcg.set_last_results(measurements)
+
+    def get_configs(self) -> Generator[RunConfig, None, None]:
+        """
+        Returns
+        -------
+        RunConfig
+            The next RunConfig generated by this class
+        """
+
+        logger.info("")
+        logger.info("Starting Optuna mode search to find optimal configs")
+        logger.info("")
+        yield from self._execute_optuna_search()
+        logger.info("")
+        if self._config.concurrency_sweep_disable:
+            logger.info("Done with Optuna mode search.")
+        else:
+            logger.info(
+                "Done with Optuna mode search. Gathering concurrency sweep measurements for reports"
+            )
+            logger.info("")
+            yield from self._sweep_concurrency_over_top_results()
+            logger.info("")
+            logger.info("Done gathering concurrency sweep measurements for reports")
+        logger.info("")
+
+    def _execute_optuna_search(self) -> Generator[RunConfig, None, None]:
+        self._rcg: ConfigGeneratorInterface = self._create_optuna_run_config_generator()
+
+        yield from self._rcg.get_configs()
+
+    def _create_optuna_run_config_generator(self) -> OptunaRunConfigGenerator:
+        return OptunaRunConfigGenerator(
+            config=self._config,
+            gpu_count=self._gpu_count,
+            models=self._models,
+            model_variant_name_manager=self._model_variant_name_manager,
+            search_parameters=self._search_parameters,
+        )
+
+    def _sweep_concurrency_over_top_results(self) -> Generator[RunConfig, None, None]:
+        for model_name in self._result_manager.get_model_names():
+            top_results = self._result_manager.top_n_results(
+                model_name=model_name,
+                n=self._config.num_configs_per_model,
+                include_default=True,
+            )
+
+            for result in top_results:
+                run_config = deepcopy(result.run_config())
+                parameter_search = ParameterSearch(self._config)
+                for concurrency in parameter_search.search_parameters():
+                    run_config = self._set_concurrency(run_config, concurrency)
+                    yield run_config
+                    parameter_search.add_run_config_measurement(self._last_measurement)
+
+    def _set_concurrency(self, run_config: RunConfig, concurrency: int) -> RunConfig:
+        for model_run_config in run_config.model_run_configs():
+            perf_config = model_run_config.perf_config()
+            perf_config.update_config({"concurrency-range": concurrency})
+
+        return run_config