From 390907d81900abb60fb1dea4c0bd6403c3dd5fc1 Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Fri, 6 Sep 2024 08:59:05 -0700 Subject: [PATCH] Add SearchParameters class (#76) * Initial code done. Some unit testing in place * All unit tests passing + pre-commit changes * Fixing codeQL issue * Fixing pytest issue * Adding TypeAlias * Removing python 3.8 * Changes based on pre-review w/ Elias * Fixing codeQL issue * Removing type ignore * Fixing comment --- .../config/generate/search_parameter.py | 49 +++ .../config/generate/search_parameters.py | 385 +++++++++++++++++ .../genai_perf/config/input/config_command.py | 117 +++++ genai-perf/genai_perf/config/model_spec.py | 45 ++ genai-perf/tests/test_search_parameters.py | 398 ++++++++++++++++++ 5 files changed, 994 insertions(+) create mode 100644 genai-perf/genai_perf/config/generate/search_parameter.py create mode 100644 genai-perf/genai_perf/config/generate/search_parameters.py create mode 100644 genai-perf/genai_perf/config/input/config_command.py create mode 100644 genai-perf/genai_perf/config/model_spec.py create mode 100644 genai-perf/tests/test_search_parameters.py diff --git a/genai-perf/genai_perf/config/generate/search_parameter.py b/genai-perf/genai_perf/config/generate/search_parameter.py new file mode 100644 index 00000000..ba2f433a --- /dev/null +++ b/genai-perf/genai_perf/config/generate/search_parameter.py @@ -0,0 +1,49 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from enum import Enum, auto +from typing import Any, List, Optional, TypeAlias, Union + +ParameterList: TypeAlias = List[Union[int, str]] + + +class ParameterUsage(Enum): + MODEL = auto() + RUNTIME = auto() + BUILD = auto() + + +class ParameterCategory(Enum): + INTEGER = auto() + EXPONENTIAL = auto() + STR_LIST = auto() + INT_LIST = auto() + + +@dataclass +class SearchParameter: + """ + A dataclass that holds information about a configuration's search parameter + """ + + usage: ParameterUsage + category: ParameterCategory + + # This is only applicable to the LIST categories + enumerated_list: Optional[List[Any]] = None + + # These are only applicable to INTEGER and EXPONENTIAL categories + min_range: Optional[int] = None + max_range: Optional[int] = None diff --git a/genai-perf/genai_perf/config/generate/search_parameters.py b/genai-perf/genai_perf/config/generate/search_parameters.py new file mode 100644 index 00000000..8c171037 --- /dev/null +++ b/genai-perf/genai_perf/config/generate/search_parameters.py @@ -0,0 +1,385 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from math import log2 +from typing import Any, Dict, List, Optional + +from genai_perf.config.generate.search_parameter import ( + ParameterCategory, + ParameterList, + ParameterUsage, + SearchParameter, +) +from genai_perf.config.input.config_command import ConfigCommand, Range +from genai_perf.exceptions import GenAIPerfException + + +class SearchParameters: + """ + Contains information about all configuration parameters the user wants to search + """ + + # These map to the various fields that can be set for PA and model configs + # See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md + exponential_range_parameters = [ + "model_batch_size", + "runtime_batch_size", + "concurrency", + "request_rate", + ] + linear_range_parameters = ["instance_count"] + + model_parameters = [ + "model_batch_size", + "instance_count", + "max_queue_delay", + ] + runtime_parameters = ["runtime_batch_size", "concurrency", "request_rate"] + + def __init__( + self, + config: ConfigCommand = ConfigCommand(), + is_bls_model: bool = False, + is_ensemble_model: bool = False, + is_composing_model: bool = False, + ): + self._config = config.optimize + + # TODO: OPTIMIZE + # self._supports_model_batch_size = model.supports_batching() + self._supports_model_batch_size = True + + self._search_parameters: Dict[str, SearchParameter] = {} + self._is_ensemble_model = is_ensemble_model + self._is_bls_model = is_bls_model + self._is_composing_model = is_composing_model + + self._populate_search_parameters() + + ########################################################################### + # Accessor Methods + ########################################################################### + def get_parameter(self, name: str) -> Optional[SearchParameter]: + return self._search_parameters.get(name) + + def get_type(self, name: str) -> ParameterUsage: + return self._search_parameters[name].usage + + def get_category(self, name: str) -> ParameterCategory: + return self._search_parameters[name].category + + def get_range(self, name: str) -> Range: + min_range = int(self._search_parameters[name].min_range or 0) + max_range = int(self._search_parameters[name].max_range or 0) + + return Range( + min=min_range, + max=max_range, + ) + + def get_list(self, name: str) -> Optional[List[Any]]: + return self._search_parameters[name].enumerated_list + + ########################################################################### + # Search Parameters + ########################################################################### + def _populate_search_parameters(self) -> None: + self._populate_perf_analyzer_parameters() + self._populate_model_config_parameters() + + ########################################################################### + # Perf Analyzer Parameters + ########################################################################### + def _populate_perf_analyzer_parameters(self) -> None: + self._populate_perf_analyzer_batch_size() + + if not self._is_composing_model: + if self._config.is_request_rate_specified(): + self._populate_request_rate() + else: + self._populate_concurrency() + + def _populate_perf_analyzer_batch_size(self) -> None: + if isinstance(self._config.perf_analyzer.batch_size, list): + self._populate_list_parameter( + parameter_name="runtime_batch_size", + parameter_list=list(self._config.perf_analyzer.batch_size), + parameter_category=ParameterCategory.INT_LIST, + ) + elif isinstance(self._config.perf_analyzer.batch_size, Range): + self._populate_range_parameter( + parameter_name="runtime_batch_size", + parameter_min_value=self._config.perf_analyzer.batch_size.min, + parameter_max_value=self._config.perf_analyzer.batch_size.max, + ) + + def _populate_concurrency(self) -> None: + if self._config.perf_analyzer.use_concurrency_formula: + return + elif isinstance(self._config.perf_analyzer.concurrency, list): + self._populate_list_parameter( + parameter_name="concurrency", + parameter_list=list(self._config.perf_analyzer.concurrency), + parameter_category=ParameterCategory.INT_LIST, + ) + elif isinstance(self._config.perf_analyzer.concurrency, Range): + self._populate_range_parameter( + parameter_name="concurrency", + parameter_min_value=self._config.perf_analyzer.concurrency.min, + parameter_max_value=self._config.perf_analyzer.concurrency.max, + ) + + def _populate_request_rate(self) -> None: + if isinstance(self._config.perf_analyzer.request_rate, list): + self._populate_list_parameter( + parameter_name="request_rate", + parameter_list=list(self._config.perf_analyzer.request_rate), + parameter_category=ParameterCategory.INT_LIST, + ) + elif isinstance(self._config.perf_analyzer.request_rate, Range): + self._populate_range_parameter( + parameter_name="request_rate", + parameter_min_value=self._config.perf_analyzer.request_rate.min, + parameter_max_value=self._config.perf_analyzer.request_rate.max, + ) + + ########################################################################### + # Model Config Parameters + ########################################################################### + def _populate_model_config_parameters(self) -> None: + self._populate_model_batch_size() + self._populate_instance_count() + self._populate_max_queue_delay() + + def _populate_model_batch_size(self) -> None: + if isinstance(self._config.model_config.batch_size, list): + self._populate_list_parameter( + parameter_name="model_batch_size", + parameter_list=list(self._config.model_config.batch_size), + parameter_category=ParameterCategory.INT_LIST, + ) + elif ( + self._supports_model_batch_size + and not self._is_bls_model + and isinstance(self._config.model_config.batch_size, Range) + ): + # Need to populate max_batch_size based on range values + # when no model config parameters are present + self._populate_range_parameter( + parameter_name="model_batch_size", + parameter_min_value=self._config.model_config.batch_size.min, + parameter_max_value=self._config.model_config.batch_size.max, + ) + + def _populate_instance_count(self) -> None: + if isinstance(self._config.model_config.instance_count, list): + self._populate_list_parameter( + parameter_name="instance_count", + parameter_list=list(self._config.model_config.instance_count), + parameter_category=ParameterCategory.INT_LIST, + ) + elif not self._is_ensemble_model and isinstance( + self._config.model_config.instance_count, Range + ): + # Need to populate instance_count based on range values + # when no model config parameters are present + self._populate_range_parameter( + parameter_name="instance_count", + parameter_min_value=self._config.model_config.instance_count.min, + parameter_max_value=self._config.model_config.instance_count.max, + ) + + def _populate_max_queue_delay(self) -> None: + if isinstance(self._config.model_config.max_queue_delay, list): + self._populate_list_parameter( + parameter_name="max_queue_delay", + parameter_list=list(self._config.model_config.max_queue_delay), + parameter_category=ParameterCategory.INT_LIST, + ) + elif isinstance(self._config.model_config.max_queue_delay, Range): + self._populate_range_parameter( + parameter_name="max_queue_delay", + parameter_min_value=self._config.model_config.max_queue_delay.min, + parameter_max_value=self._config.model_config.max_queue_delay.max, + ) + + ########################################################################### + # Populate Methods + ########################################################################### + def _populate_list_parameter( + self, + parameter_name: str, + parameter_list: ParameterList, + parameter_category: ParameterCategory, + ) -> None: + usage = self._determine_parameter_usage(parameter_name) + + self._add_search_parameter( + name=parameter_name, + usage=usage, + category=parameter_category, + enumerated_list=parameter_list, + ) + + def _populate_range_parameter( + self, + parameter_name: str, + parameter_min_value: int, + parameter_max_value: int, + ) -> None: + usage = self._determine_parameter_usage(parameter_name) + category = self._determine_parameter_category(parameter_name) + + if category == ParameterCategory.EXPONENTIAL: + min_range = int(log2(parameter_min_value)) # type: ignore + max_range = int(log2(parameter_max_value)) # type: ignore + else: + min_range = parameter_min_value # type: ignore + max_range = parameter_max_value # type: ignore + + self._add_search_parameter( + name=parameter_name, + usage=usage, + category=category, + min_range=min_range, + max_range=max_range, + ) + + def _determine_parameter_category(self, name: str) -> ParameterCategory: + if name in SearchParameters.exponential_range_parameters: + category = ParameterCategory.EXPONENTIAL + elif name in SearchParameters.linear_range_parameters: + category = ParameterCategory.INTEGER + else: + GenAIPerfException(f"ParameterCategory not found for {name}") + + return category + + def _determine_parameter_usage(self, name: str) -> ParameterUsage: + if name in SearchParameters.model_parameters: + usage = ParameterUsage.MODEL + elif name in SearchParameters.runtime_parameters: + usage = ParameterUsage.RUNTIME + else: + GenAIPerfException(f"ParameterUsage not found for {name}") + + return usage + + def _add_search_parameter( + self, + name: str, + usage: ParameterUsage, + category: ParameterCategory, + min_range: Optional[int] = None, + max_range: Optional[int] = None, + enumerated_list: List[Any] = [], + ) -> None: + self._check_for_illegal_input(category, min_range, max_range, enumerated_list) + + self._search_parameters[name] = SearchParameter( + usage=usage, + category=category, + enumerated_list=enumerated_list, + min_range=min_range, + max_range=max_range, + ) + + ########################################################################### + # Info/Debug Methods + ########################################################################### + def number_of_total_possible_configurations(self) -> int: + total_number_of_configs = 1 + for parameter in self._search_parameters.values(): + total_number_of_configs *= self._number_of_configurations_for_parameter( + parameter + ) + + return total_number_of_configs + + def print_info(self, name: str) -> str: + info_string = f" {name}: " + + parameter = self._search_parameters[name] + if parameter.category is ParameterCategory.INTEGER: + info_string += f"{parameter.min_range} to {parameter.max_range}" + elif parameter.category is ParameterCategory.EXPONENTIAL: + info_string += f"{2**parameter.min_range} to {2**parameter.max_range}" # type: ignore + elif ( + parameter.category is ParameterCategory.INT_LIST + or parameter.category is ParameterCategory.STR_LIST + ): + info_string += f"{parameter.enumerated_list}" + + info_string += f" ({self._number_of_configurations_for_parameter(parameter)})" + + return info_string + + def _number_of_configurations_for_parameter( + self, parameter: SearchParameter + ) -> int: + if ( + parameter.category is ParameterCategory.INTEGER + or parameter.category is ParameterCategory.EXPONENTIAL + ): + number_of_parameter_configs = parameter.max_range - parameter.min_range + 1 # type: ignore + else: + number_of_parameter_configs = len(parameter.enumerated_list) # type: ignore + + return number_of_parameter_configs + + ########################################################################### + # Error Checking Methods + ########################################################################### + def _check_for_illegal_input( + self, + category: ParameterCategory, + min_range: Optional[int], + max_range: Optional[int], + enumerated_list: List[Any], + ) -> None: + if ( + category is ParameterCategory.INT_LIST + or category is ParameterCategory.STR_LIST + ): + self._check_for_illegal_list_input(min_range, max_range, enumerated_list) + else: + if min_range is None or max_range is None: + raise GenAIPerfException( + f"Both min_range and max_range must be specified" + ) + + if min_range and max_range: + if min_range > max_range: + raise GenAIPerfException( + f"min_range cannot be larger than max_range" + ) + + def _check_for_illegal_list_input( + self, + min_range: Optional[int], + max_range: Optional[int], + enumerated_list: List[Any], + ) -> None: + if not enumerated_list: + raise GenAIPerfException( + f"enumerated_list must be specified for a ParameterCategory.LIST" + ) + elif min_range is not None: + raise GenAIPerfException( + f"min_range cannot be specified for a ParameterCategory.LIST" + ) + elif max_range is not None: + raise GenAIPerfException( + f"max_range cannot be specified for a ParameterCategory.LIST" + ) diff --git a/genai-perf/genai_perf/config/input/config_command.py b/genai-perf/genai_perf/config/input/config_command.py new file mode 100644 index 00000000..611a2459 --- /dev/null +++ b/genai-perf/genai_perf/config/input/config_command.py @@ -0,0 +1,117 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from copy import copy +from dataclasses import dataclass, field +from typing import List, Optional, TypeAlias, Union + + +def default_field(obj): + return field(default_factory=lambda: copy(obj)) + + +# TODO: OPTIMIZE +# These will be moved to RunConfig once it's created +@dataclass(frozen=True) +class RunConfigDefaults: + # Model Defaults + MIN_MODEL_BATCH_SIZE = 1 + MAX_MODEL_BATCH_SIZE = 128 + MIN_INSTANCE_COUNT = 1 + MAX_INSTANCE_COUNT = 5 + MAX_QUEUE_DELAY = None + DYNAMIC_BATCHING = True + CPU_ONLY = False + + # PA Defaults + STIMULUS_TYPE = "concurrency" + PA_BATCH_SIZE = [1] + MIN_CONCURRENCY = 1 + MAX_CONCURRENCY = 1024 + MIN_REQUEST_RATE = 16 + MAX_REQUEST_RATE = 8192 + USE_CONCURRENCY_FORMULA = True + + +# TODO: OPTIMIZE +# These are placeholder dataclasses until the real Command Parser is written + + +@dataclass +class Range: + min: int + max: int + + +ConfigRangeOrList: TypeAlias = Optional[Union[Range, List[int]]] + + +@dataclass +class ConfigModelConfig: + batch_size: ConfigRangeOrList = default_field( + Range( + min=RunConfigDefaults.MIN_MODEL_BATCH_SIZE, + max=RunConfigDefaults.MAX_MODEL_BATCH_SIZE, + ) + ) + instance_count: ConfigRangeOrList = default_field( + Range( + min=RunConfigDefaults.MIN_INSTANCE_COUNT, + max=RunConfigDefaults.MAX_INSTANCE_COUNT, + ) + ) + max_queue_delay: Optional[List[int]] = default_field( + RunConfigDefaults.MAX_QUEUE_DELAY + ) + dynamic_batching: bool = default_field(RunConfigDefaults.DYNAMIC_BATCHING) + cpu_only: bool = default_field(RunConfigDefaults.CPU_ONLY) + + +@dataclass +class ConfigPerfAnalyzer: + stimulus_type: str = default_field(RunConfigDefaults.STIMULUS_TYPE) + batch_size: ConfigRangeOrList = default_field(RunConfigDefaults.PA_BATCH_SIZE) + concurrency: ConfigRangeOrList = default_field( + Range( + min=RunConfigDefaults.MIN_CONCURRENCY, max=RunConfigDefaults.MAX_CONCURRENCY + ) + ) + request_rate: ConfigRangeOrList = default_field( + Range( + min=RunConfigDefaults.MIN_REQUEST_RATE, + max=RunConfigDefaults.MAX_REQUEST_RATE, + ) + ) + use_concurrency_formula: bool = default_field( + RunConfigDefaults.USE_CONCURRENCY_FORMULA + ) + + def is_request_rate_specified(self) -> bool: + rr_specified = self.stimulus_type == "request_rate" + + return rr_specified + + +@dataclass +class ConfigOptimize: + model_config: ConfigModelConfig = ConfigModelConfig() + perf_analyzer: ConfigPerfAnalyzer = ConfigPerfAnalyzer() + + def is_request_rate_specified(self) -> bool: + return self.perf_analyzer.is_request_rate_specified() + + +@dataclass +class ConfigCommand: + optimize: ConfigOptimize = ConfigOptimize() diff --git a/genai-perf/genai_perf/config/model_spec.py b/genai-perf/genai_perf/config/model_spec.py new file mode 100644 index 00000000..6ff5b6ef --- /dev/null +++ b/genai-perf/genai_perf/config/model_spec.py @@ -0,0 +1,45 @@ +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from typing import Dict, List, Optional + +# TODO: OPTIMIZE +# from genai_perf.result.model_constraints import ModelConstraints + + +@dataclass +class ModelSpec: + """ + A dataclass that specifies the various ways + a model is configured among PA/GAP/Triton + """ + + # Model information/parameters + model_name: str + cpu_only: bool = False + objectives: Optional[List] = None + # TODO: OPTIMIZE + # constraints: Optional[ModelConstraints] + model_config_parameters: Optional[Dict] = None + + # PA/GAP flags/parameters + perf_analyzer_parameters: Optional[Dict] = None + perf_analyzer_flags: Optional[Dict] = None + genai_perf_flags: Optional[Dict] = None + + # Triton flags/args + triton_server_flags: Optional[Dict] = None + triton_server_args: Optional[Dict] = None + triton_docker_args: Optional[Dict] = None diff --git a/genai-perf/tests/test_search_parameters.py b/genai-perf/tests/test_search_parameters.py new file mode 100644 index 00000000..31c380e9 --- /dev/null +++ b/genai-perf/tests/test_search_parameters.py @@ -0,0 +1,398 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from copy import deepcopy +from math import log2 +from unittest.mock import patch + +from genai_perf.config.generate.search_parameters import ( + ParameterCategory, + ParameterUsage, + SearchParameters, +) +from genai_perf.config.input.config_command import ( + ConfigCommand, + Range, + RunConfigDefaults, +) +from genai_perf.exceptions import GenAIPerfException + + +class TestSearchParameters(unittest.TestCase): + def setUp(self): + self.config = deepcopy(ConfigCommand()) + + self.search_parameters = SearchParameters(config=self.config) + + self.search_parameters._add_search_parameter( + name="concurrency", + usage=ParameterUsage.RUNTIME, + category=ParameterCategory.EXPONENTIAL, + min_range=log2(RunConfigDefaults.MIN_CONCURRENCY), + max_range=log2(RunConfigDefaults.MAX_CONCURRENCY), + ) + + self.search_parameters._add_search_parameter( + name="size", + usage=ParameterUsage.BUILD, + category=ParameterCategory.STR_LIST, + enumerated_list=["FP8", "FP16", "FP32"], + ) + + def tearDown(self): + patch.stopall() + + def test_exponential_parameter(self): + """ + Test exponential parameter, accessing dataclass directly + """ + + parameter = self.search_parameters.get_parameter("concurrency") + + self.assertEqual(ParameterUsage.RUNTIME, parameter.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, parameter.category) + self.assertEqual(log2(RunConfigDefaults.MIN_CONCURRENCY), parameter.min_range) + self.assertEqual(log2(RunConfigDefaults.MAX_CONCURRENCY), parameter.max_range) + + def test_integer_parameter(self): + """ + Test integer parameter, using accessor methods + """ + + self.assertEqual( + ParameterUsage.MODEL, + self.search_parameters.get_type("instance_count"), + ) + self.assertEqual( + ParameterCategory.INTEGER, + self.search_parameters.get_category("instance_count"), + ) + self.assertEqual( + Range( + min=RunConfigDefaults.MIN_INSTANCE_COUNT, + max=RunConfigDefaults.MAX_INSTANCE_COUNT, + ), + self.search_parameters.get_range("instance_count"), + ) + + def test_list_parameter(self): + """ + Test list parameter, using accessor methods + """ + + self.assertEqual( + ParameterUsage.BUILD, + self.search_parameters.get_type("size"), + ) + self.assertEqual( + ParameterCategory.STR_LIST, + self.search_parameters.get_category("size"), + ) + self.assertEqual( + ["FP8", "FP16", "FP32"], self.search_parameters.get_list("size") + ) + + def test_illegal_inputs(self): + """ + Check that an exception is raised for illegal input combos + """ + with self.assertRaises(GenAIPerfException): + self.search_parameters._add_search_parameter( + name="concurrency", + usage=ParameterUsage.RUNTIME, + category=ParameterCategory.EXPONENTIAL, + max_range=10, + ) + + with self.assertRaises(GenAIPerfException): + self.search_parameters._add_search_parameter( + name="concurrency", + usage=ParameterUsage.RUNTIME, + category=ParameterCategory.EXPONENTIAL, + min_range=0, + ) + + with self.assertRaises(GenAIPerfException): + self.search_parameters._add_search_parameter( + name="concurrency", + usage=ParameterUsage.RUNTIME, + category=ParameterCategory.EXPONENTIAL, + min_range=10, + max_range=9, + ) + + with self.assertRaises(GenAIPerfException): + self.search_parameters._add_search_parameter( + name="size", + usage=ParameterUsage.BUILD, + category=ParameterCategory.INT_LIST, + ) + + with self.assertRaises(GenAIPerfException): + self.search_parameters._add_search_parameter( + name="size", + usage=ParameterUsage.BUILD, + category=ParameterCategory.STR_LIST, + enumerated_list=["FP8", "FP16", "FP32"], + min_range=0, + ) + + with self.assertRaises(GenAIPerfException): + self.search_parameters._add_search_parameter( + name="size", + usage=ParameterUsage.BUILD, + category=ParameterCategory.STR_LIST, + enumerated_list=["FP8", "FP16", "FP32"], + max_range=10, + ) + + def test_search_parameter_creation_optimize_default(self): + """ + Test that search parameters are correctly created in default optimize case + """ + + config = deepcopy(ConfigCommand()) + search_parameters = SearchParameters(config) + + ####################################################################### + # Model Config + ####################################################################### + + # Batch Size + # ===================================================================== + model_batch_size = search_parameters.get_parameter("model_batch_size") + self.assertEqual(ParameterUsage.MODEL, model_batch_size.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, model_batch_size.category) + self.assertEqual( + log2(RunConfigDefaults.MIN_MODEL_BATCH_SIZE), + model_batch_size.min_range, + ) + self.assertEqual( + log2(RunConfigDefaults.MAX_MODEL_BATCH_SIZE), + model_batch_size.max_range, + ) + + # Instance Count + # ===================================================================== + instance_count = search_parameters.get_parameter("instance_count") + self.assertEqual(ParameterUsage.MODEL, instance_count.usage) + self.assertEqual(ParameterCategory.INTEGER, instance_count.category) + self.assertEqual(RunConfigDefaults.MIN_INSTANCE_COUNT, instance_count.min_range) + self.assertEqual(RunConfigDefaults.MAX_INSTANCE_COUNT, instance_count.max_range) + + # Max Queue Delay + max_queue_delay = search_parameters.get_parameter("max_queue_delay") + self.assertIsNone(max_queue_delay) + + ####################################################################### + # PA Config + ####################################################################### + + # Batch size + # ===================================================================== + runtime_batch_size = search_parameters.get_parameter("runtime_batch_size") + self.assertEqual(ParameterUsage.RUNTIME, runtime_batch_size.usage) + self.assertEqual(ParameterCategory.INT_LIST, runtime_batch_size.category) + self.assertEqual( + RunConfigDefaults.PA_BATCH_SIZE, runtime_batch_size.enumerated_list + ) + + # Concurrency - this is not set because use_concurrency_formula is True + # ===================================================================== + concurrency = search_parameters.get_parameter("concurrency") + + self.assertIsNone(concurrency) + + # Request Rate + # ===================================================================== + request_rate = search_parameters.get_parameter("request_rate") + self.assertIsNone(request_rate) + + def test_search_parameter_no_concurrency_formula(self): + """ + Test that search parameters are correctly created when concurrency formula is disabled + """ + config = deepcopy(ConfigCommand()) + config.optimize.perf_analyzer.use_concurrency_formula = False + + search_parameters = SearchParameters(config) + + concurrency = search_parameters.get_parameter("concurrency") + self.assertEqual(ParameterUsage.RUNTIME, concurrency.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, concurrency.category) + self.assertEqual(log2(RunConfigDefaults.MIN_CONCURRENCY), concurrency.min_range) + self.assertEqual(log2(RunConfigDefaults.MAX_CONCURRENCY), concurrency.max_range) + + def test_search_parameter_request_rate(self): + """ + Test that request rate is used when specified in config + """ + config = deepcopy(ConfigCommand()) + config.optimize.perf_analyzer.stimulus_type = "request_rate" + + search_parameters = SearchParameters(config) + + request_rate = search_parameters.get_parameter("request_rate") + self.assertEqual(ParameterUsage.RUNTIME, request_rate.usage) + self.assertEqual(ParameterCategory.EXPONENTIAL, request_rate.category) + self.assertEqual( + log2(RunConfigDefaults.MIN_REQUEST_RATE), request_rate.min_range + ) + self.assertEqual( + log2(RunConfigDefaults.MAX_REQUEST_RATE), request_rate.max_range + ) + + def test_number_of_configs_range(self): + """ + Test number of configs for a range (INTEGER/EXPONENTIAL) + """ + + # INTEGER + # ===================================================================== + num_of_configs = self.search_parameters._number_of_configurations_for_parameter( + self.search_parameters.get_parameter("instance_count") + ) + self.assertEqual(5, num_of_configs) + + # EXPONENTIAL + # ===================================================================== + num_of_configs = self.search_parameters._number_of_configurations_for_parameter( + self.search_parameters.get_parameter("concurrency") + ) + self.assertEqual(11, num_of_configs) + + def test_number_of_configs_list(self): + """ + Test number of configs for a list + """ + + num_of_configs = self.search_parameters._number_of_configurations_for_parameter( + self.search_parameters.get_parameter("size") + ) + self.assertEqual(3, num_of_configs) + + def test_total_possible_configurations(self): + """ + Test number of total possible configurations + """ + total_num_of_possible_configurations = ( + self.search_parameters.number_of_total_possible_configurations() + ) + + # model_batch_size (8) * instance count (5) * concurrency (11) * size (3) + self.assertEqual(8 * 5 * 11 * 3, total_num_of_possible_configurations) + + # TODO: OPTIMIZE: + # This will be enabled once BLS support is added + # + # def test_search_parameter_creation_bls_default(self): + # """ + # Test that search parameters are correctly created in default BLS optuna case + # """ + + # args = [ + # "model-analyzer", + # "profile", + # "--model-repository", + # "cli-repository", + # "-f", + # "path-to-config-file", + # "--run-config-search-mode", + # "optuna", + # ] + + # yaml_content = """ + # profile_models: add_sub + # bls_composing_models: add,sub + # """ + + # config = TestConfig()._evaluate_config(args=args, yaml_content=yaml_content) + + # analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock()) + + # mock_model_config = MockModelConfig() + # mock_model_config.start() + # analyzer._populate_search_parameters(MagicMock(), MagicMock()) + # analyzer._populate_composing_search_parameters(MagicMock(), MagicMock()) + # mock_model_config.stop() + + # # ADD_SUB + # # ===================================================================== + # # The top level model of a BLS does not search max batch size (always 1) + + # # max_batch_size + # max_batch_size = analyzer._search_parameters["add_sub"].get_parameter( + # "max_batch_size" + # ) + # self.assertIsNone(max_batch_size) + + # # concurrency + # concurrency = analyzer._search_parameters["add_sub"].get_parameter( + # "concurrency" + # ) + # self.assertEqual(ParameterUsage.RUNTIME, concurrency.usage) + # self.assertEqual(ParameterCategory.EXPONENTIAL, concurrency.category) + # self.assertEqual( + # log2(default.DEFAULT_RUN_CONFIG_MIN_CONCURRENCY), concurrency.min_range + # ) + # self.assertEqual( + # log2(default.DEFAULT_RUN_CONFIG_MAX_CONCURRENCY), concurrency.max_range + # ) + + # # instance_group + # instance_group = analyzer._search_parameters["add_sub"].get_parameter( + # "instance_group" + # ) + # self.assertEqual(ParameterUsage.MODEL, instance_group.usage) + # self.assertEqual(ParameterCategory.INTEGER, instance_group.category) + # self.assertEqual( + # default.DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, instance_group.min_range + # ) + # self.assertEqual( + # default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range + # ) + + # # ADD/SUB (composing models) + # # ===================================================================== + # # Composing models do not search concurrency and has no max batch size + + # # max_batch_size + # max_batch_size = analyzer._composing_search_parameters["add"].get_parameter( + # "max_batch_size" + # ) + # self.assertIsNone(max_batch_size) + + # # concurrency + # concurrency = analyzer._composing_search_parameters["sub"].get_parameter( + # "concurrency" + # ) + # self.assertIsNone(concurrency) + + # # instance_group + # instance_group = analyzer._composing_search_parameters["sub"].get_parameter( + # "instance_group" + # ) + # self.assertEqual(ParameterUsage.MODEL, instance_group.usage) + # self.assertEqual(ParameterCategory.INTEGER, instance_group.category) + # self.assertEqual( + # default.DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, instance_group.min_range + # ) + # self.assertEqual( + # default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range + # ) + + +if __name__ == "__main__": + unittest.main()