Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LLM support to Brute Search #769

Merged
merged 23 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
49b4e15
Initial coding complete
nv-braf Oct 5, 2023
43d5c1d
First unit test passing
nv-braf Oct 5, 2023
d765027
Adding test for prompt length
nv-braf Oct 5, 2023
c198e5a
Refactor PACG methods
nv-braf Oct 5, 2023
79aa02a
Further refactoring
nv-braf Oct 5, 2023
ac81a6b
Ensure early exit isn't enabled for LLM models
nv-braf Oct 5, 2023
015a2c2
Fix type checking errors
nv-braf Oct 6, 2023
2619b83
Attempt at fixing codeql issue
nv-braf Oct 7, 2023
9f2a065
Revert "Attempt at fixing codeql issue"
nv-braf Oct 10, 2023
c5b702e
Attempt at codeQL fix
nv-braf Oct 10, 2023
cbdc746
Adding deepcopy back in
nv-braf Oct 10, 2023
0c909ea
Removing deepcopy in an attempt to fix codeQL errors
nv-braf Oct 10, 2023
3f4450a
Update model_analyzer/config/input/config_command_profile.py
nv-braf Oct 11, 2023
c69b577
Update model_analyzer/config/generate/perf_analyzer_config_generator.py
nv-braf Oct 11, 2023
b1eed54
Update model_analyzer/config/generate/perf_analyzer_config_generator.py
nv-braf Oct 11, 2023
a2fa148
Update model_analyzer/config/generate/perf_analyzer_config_generator.py
nv-braf Oct 11, 2023
c96d897
Moving location of method
nv-braf Oct 11, 2023
daee4cc
Changing parameter to inference load
nv-braf Oct 11, 2023
3966c6c
Changing parameter to inference load
nv-braf Oct 11, 2023
fbe1abf
Changing prompt length to text input length
nv-braf Oct 11, 2023
abec25d
Changing max_tokens to use request-parameter
nv-braf Oct 11, 2023
f8729db
Fix input-data typo
nv-braf Oct 12, 2023
2cda3df
Changing non-parameter to parameter
nv-braf Oct 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,7 @@ def __init__(
logger.info("")
AutomaticModelConfigGenerator._log_first_run = True

self._max_instance_count = config.run_config_search_max_instance_count
self._min_instance_count = config.run_config_search_min_instance_count
self._max_model_batch_size = config.run_config_search_max_model_batch_size
self._min_model_batch_size = config.run_config_search_min_model_batch_size
self._set_min_max_search_values(config)

self._instance_kind = "KIND_CPU" if self._cpu_only else "KIND_GPU"

Expand Down Expand Up @@ -162,3 +159,9 @@ def _get_curr_param_combo(self) -> Dict:
config["dynamic_batching"] = {}

return config

def _set_min_max_search_values(self, config: ConfigCommandProfile) -> None:
self._max_instance_count = config.run_config_search_max_instance_count
self._min_instance_count = config.run_config_search_min_instance_count
self._max_model_batch_size = config.run_config_search_max_model_batch_size
self._min_model_batch_size = config.run_config_search_min_model_batch_size
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from model_analyzer.config.run.run_config import RunConfig
from model_analyzer.constants import LOGGER_NAME
from model_analyzer.device.gpu_device import GPUDevice
from model_analyzer.result.parameter_search import ParameterSearch
from model_analyzer.result.inference_load_search import InferenceLoadSearch
from model_analyzer.result.result_manager import ResultManager
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
from model_analyzer.triton.client.client import TritonClient
Expand All @@ -39,10 +39,10 @@
logger = logging.getLogger(LOGGER_NAME)


class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface):
class BrutePlusBinarySearchRunConfigGenerator(ConfigGeneratorInterface):
"""
First run BruteRunConfigGenerator for a brute search, then for
automatic searches use ParameterSearch to perform a binary search
automatic searches use InferenceLoadSearch to perform a binary search
"""

def __init__(
Expand Down Expand Up @@ -132,17 +132,19 @@ def _binary_search_over_top_results(self) -> Generator[RunConfig, None, None]:
for result in top_results:
run_config = deepcopy(result.run_config())
model_parameters = self._get_model_parameters(model_name)
parameter_search = ParameterSearch(
inference_load_search = InferenceLoadSearch(
config=self._config,
model_parameters=model_parameters,
skip_parameter_sweep=True,
skip_inference_load_sweep=True,
)
for parameter in parameter_search.search_parameters():
run_config = self._set_parameter(
run_config, model_parameters, parameter
for inference_load in inference_load_search.search_inference_loads():
run_config = self._set_inference_load(
run_config, model_parameters, inference_load
)
yield run_config
parameter_search.add_run_config_measurement(self._last_measurement)
inference_load_search.add_run_config_measurement(
self._last_measurement
)

def _get_model_parameters(self, model_name: str) -> Dict:
for model in self._models:
Expand All @@ -151,14 +153,14 @@ def _get_model_parameters(self, model_name: str) -> Dict:

return {}

def _set_parameter(
self, run_config: RunConfig, model_parameters: Dict, parameter: int
def _set_inference_load(
self, run_config: RunConfig, model_parameters: Dict, inference_load: int
) -> RunConfig:
for model_run_config in run_config.model_run_configs():
perf_config = model_run_config.perf_config()
if self._config.is_request_rate_specified(model_parameters):
perf_config.update_config({"request-rate-range": parameter})
perf_config.update_config({"request-rate-range": inference_load})
else:
perf_config.update_config({"concurrency-range": parameter})
perf_config.update_config({"concurrency-range": inference_load})

return run_config
12 changes: 10 additions & 2 deletions model_analyzer/config/generate/model_run_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,5 +150,13 @@ def _determine_early_exit_enables(
concurrency_specified = model.parameters()["concurrency"]
config_parameters_exist = model.model_config_parameters()

self._pacg_early_exit_enable = early_exit_enable or not concurrency_specified
self._mcg_early_exit_enable = early_exit_enable or not config_parameters_exist
if config.is_llm_model():
self._pacg_early_exit_enable = False
self._mcg_early_exit_enable = False
nv-hwoo marked this conversation as resolved.
Show resolved Hide resolved
else:
self._pacg_early_exit_enable = (
early_exit_enable or not concurrency_specified
)
self._mcg_early_exit_enable = (
early_exit_enable or not config_parameters_exist
)
Loading
Loading