diff --git a/model_analyzer/config/generate/quick_run_config_generator.py b/model_analyzer/config/generate/quick_run_config_generator.py index 5454765bf..e2b9b3756 100755 --- a/model_analyzer/config/generate/quick_run_config_generator.py +++ b/model_analyzer/config/generate/quick_run_config_generator.py @@ -33,6 +33,7 @@ from model_analyzer.config.generate.neighborhood import Neighborhood from model_analyzer.config.generate.search_config import SearchConfig from model_analyzer.config.input.config_command_profile import ConfigCommandProfile +from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES from model_analyzer.config.run.model_run_config import ModelRunConfig from model_analyzer.config.run.run_config import RunConfig from model_analyzer.constants import LOGGER_NAME @@ -704,13 +705,31 @@ def _create_default_perf_analyzer_config( model_config.get_field("name"), self._config ) - perf_config_params = {"batch-size": 1, "concurrency-range": 1} + default_concurrency = self._calculate_default_concurrency(model_config) + + perf_config_params = { + "batch-size": DEFAULT_BATCH_SIZES, + "concurrency-range": default_concurrency, + } default_perf_analyzer_config.update_config(perf_config_params) default_perf_analyzer_config.update_config(model.perf_analyzer_flags()) return default_perf_analyzer_config + def _calculate_default_concurrency(self, model_config: ModelConfig) -> int: + default_max_batch_size = model_config.max_batch_size() + + # string format is: ":GPU" + default_instance_count = int( + model_config.instance_group_string(system_gpu_count=len(self._gpus)).split( + ":" + )[0] + ) + default_concurrency = 2 * default_max_batch_size * default_instance_count + + return default_concurrency + def _print_debug_logs( self, measurements: List[Union[RunConfigMeasurement, None]] ) -> None: diff --git a/tests/test_quick_run_config_generator.py b/tests/test_quick_run_config_generator.py index 544a7bd5a..7eef2514f 100755 --- a/tests/test_quick_run_config_generator.py +++ b/tests/test_quick_run_config_generator.py @@ -533,12 +533,16 @@ def test_default_config_generation(self): sc = SearchConfig(dimensions=dims, radius=5, min_initialized=2) qrcg = QuickRunConfigGenerator( - sc, config, MagicMock(), models, {}, MagicMock(), ModelVariantNameManager() + sc, config, ["GPU0"], models, {}, MagicMock(), ModelVariantNameManager() ) default_run_config = qrcg._create_default_run_config() self.assertIn("--percentile=96", default_run_config.representation()) + self.assertIn( + "--concurrency-range=8", + default_run_config.model_run_configs()[0].perf_config().representation(), + ) def test_default_ensemble_config_generation(self): """