triton-inference-server · nv-braf · Jul 17, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -927,7 +927,7 @@ def _add_run_search_configs(self):
         self._add_config(
             ConfigField(
                 "min_percentage_of_search_space",
-                flags=["--min_percentage_of_search_space"],
+                flags=["--min-percentage-of-search-space"],
                 field_type=ConfigPrimitive(int),
                 default_value=DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
                 description="Minimum percentage of the search space to profile when using Optuna",
@@ -936,7 +936,7 @@ def _add_run_search_configs(self):
         self._add_config(
             ConfigField(
                 "max_percentage_of_search_space",
-                flags=["--max_percentage_of_search_space"],
+                flags=["--max-percentage-of-search-space"],
                 field_type=ConfigPrimitive(int),
                 default_value=DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
                 description="Maximum percentage of the search space to profile when using Optuna",
@@ -945,7 +945,7 @@ def _add_run_search_configs(self):
         self._add_config(
             ConfigField(
                 "optuna_min_trials",
-                flags=["--optuna_min_trials"],
+                flags=["--optuna-min-trials"],
                 field_type=ConfigPrimitive(int),
                 default_value=DEFAULT_OPTUNA_MIN_TRIALS,
                 description="Minimum number of trials to profile when using Optuna",
@@ -954,7 +954,7 @@ def _add_run_search_configs(self):
         self._add_config(
             ConfigField(
                 "optuna_max_trials",
-                flags=["--optuna_max_trials"],
+                flags=["--optuna-max-trials"],
                 field_type=ConfigPrimitive(int),
                 default_value=DEFAULT_OPTUNA_MAX_TRIALS,
                 description="Maximum number of trials to profile when using Optuna",
@@ -963,7 +963,7 @@ def _add_run_search_configs(self):
         self._add_config(
             ConfigField(
                 "optuna_early_exit_threshold",
-                flags=["--optuna_early_exit_threshold"],
+                flags=["--optuna-early-exit-threshold"],
                 field_type=ConfigPrimitive(int),
                 default_value=DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
                 description="Number of trials without improvement before triggering early exit when using Optuna",

diff --git a/qa/L0_optuna_bls_model/check_results.py b/qa/L0_optuna_bls_model/check_results.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+
+import yaml
+
+
+class TestOutputValidator:
+    """
+    Functions that validate the output
+    of the test
+    """
+
+    def __init__(self, config, test_name, analyzer_log):
+        self._config = config
+        self._models = config["profile_models"]
+        self._analyzer_log = analyzer_log
+
+        check_function = self.__getattribute__(f"check_{test_name}")
+
+        if check_function():
+            sys.exit(0)
+        else:
+            sys.exit(1)
+
+    def check_profile_logs(self):
+        """
+        Check that each model was profiled the number of times
+        corresponding with batch size and concurrency combinations
+
+        (No model config parameter combos expected here!)
+        """
+
+        with open(self._analyzer_log, "r") as f:
+            log_contents = f.read()
+
+        #  Number of configs in search space: 275
+        #  Model - bls:
+        #    concurrency: 1 to 1024 (11)
+        #    instance_group: 1 to 5 (5)
+        #  Composing model - add:
+        #    instance_group: 1 to 5 (5)
+        #
+        #  Minimum number of trials: 14 (5% of search space)
+        #  Maximum number of trials: 28 (10% of search space)
+        #
+        # Then you have 4 x (0-9) for the concurrency sweep on Top 3 + default
+        # 0 because all concurrencies could have been tested during the optuna run
+        expected_min_num_measurements = 14 + 0
+        expected_max_num_measurements = 28 + 36
+
+        for model in self._models:
+            token = f"Profiling {model}_config"
+            token_idx = 0
+            found_count = 0
+            while True:
+                token_idx = log_contents.find(token, token_idx + 1)
+                if token_idx == -1:
+                    break
+                found_count += 1
+            if (
+                found_count < expected_min_num_measurements
+                or found_count > expected_max_num_measurements
+            ):
+                print(
+                    f"\n***\n***  Expected range of measurements for {model} : {expected_min_num_measurements} to {expected_max_num_measurements}. "
+                    f"Found {found_count}. \n***"
+                )
+                return False
+        return True
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-f",
+        "--config-file",
+        type=str,
+        required=True,
+        help="The path to the config yaml file.",
+    )
+    parser.add_argument(
+        "-l",
+        "--analyzer-log-file",
+        type=str,
+        required=True,
+        help="The full path to the analyzer log.",
+    )
+    parser.add_argument(
+        "-t",
+        "--test-name",
+        type=str,
+        required=True,
+        help="The name of the test to be run.",
+    )
+    args = parser.parse_args()
+
+    with open(args.config_file, "r") as f:
+        config = yaml.safe_load(f)
+
+    TestOutputValidator(config, args.test_name, args.analyzer_log_file)
diff --git a/qa/L0_optuna_bls_model/input_data.json b/qa/L0_optuna_bls_model/input_data.json
@@ -0,0 +1,21 @@
+{
+	"data": [
+		{
+			"MODEL_NAME": [
+				"add"
+			],
+			"INPUT0": [
+				0.74106514,
+				0.7371813,
+				0.5274665,
+				0.13930903
+			],
+			"INPUT1": [
+				0.7845891,
+				0.88089234,
+				0.8466405,
+				0.55024815
+			]
+		}
+	]
+}
diff --git a/qa/L0_optuna_bls_model/test.sh b/qa/L0_optuna_bls_model/test.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source ../common/util.sh
+source ../common/check_analyzer_results.sh
+create_logs_dir "L0_bls_model"
+
+# Set test parameters
+MODEL_ANALYZER="$(which model-analyzer)"
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"}
+QA_MODELS="bls"
+BLS_COMPOSING_MODELS="add"
+INPUT_JSON="$(pwd)/input_data.json"
+MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')"
+TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"}
+CLIENT_PROTOCOL="grpc"
+PORTS=($(find_available_ports 3))
+GPUS=($(get_all_gpus_uuids))
+OUTPUT_MODEL_REPOSITORY=${OUTPUT_MODEL_REPOSITORY:=$(get_output_directory)}
+CONFIG_FILE="config.yml"
+FILENAME_SERVER_ONLY="server-metrics.csv"
+FILENAME_INFERENCE_MODEL="model-metrics-inference.csv"
+FILENAME_GPU_MODEL="model-metrics-gpu.csv"
+
+rm -rf $OUTPUT_MODEL_REPOSITORY
+create_result_paths
+SERVER_LOG=$TEST_LOG_DIR/server.log
+
+python3 test_config_generator.py --profile-models $MODEL_NAMES --bls-composing-models $BLS_COMPOSING_MODELS -i $INPUT_JSON
+
+# Run the analyzer and check the results
+RET=0
+
+set +e
+
+MODEL_ANALYZER_ARGS="-m $MODEL_REPOSITORY -f $CONFIG_FILE"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --client-protocol=$CLIENT_PROTOCOL --triton-launch-mode=$TRITON_LAUNCH_MODE"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --triton-http-endpoint localhost:${PORTS[0]} --triton-grpc-endpoint localhost:${PORTS[1]}"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --triton-metrics-url http://localhost:${PORTS[2]}/metrics"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --output-model-repository-path $OUTPUT_MODEL_REPOSITORY --override-output-model-repository"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS -e $EXPORT_PATH --checkpoint-directory $CHECKPOINT_DIRECTORY --filename-server-only=$FILENAME_SERVER_ONLY"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --filename-model-inference=$FILENAME_INFERENCE_MODEL --filename-model-gpu=$FILENAME_GPU_MODEL"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --run-config-search-mode optuna"
+MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --skip-detailed-reports --triton-output-path=$SERVER_LOG"
+MODEL_ANALYZER_SUBCOMMAND="profile"
+MODEL_ANALYZER_GLOBAL_OPTIONS="-v"
+
+run_analyzer
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed. model-analyzer $MODEL_ANALYZER_SUBCOMMAND exited with non-zero exit code. \n***"
+    cat $ANALYZER_LOG
+    RET=1
+else
+    # Check the Analyzer log for correct output
+    TEST_NAME='profile_logs'
+    python3 check_results.py -f $CONFIG_FILE -t $TEST_NAME -l $ANALYZER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Output Verification Failed for $TEST_NAME test.\n***"
+        cat $ANALYZER_LOG
+        RET=1
+    fi
+
+    SERVER_METRICS_FILE=${EXPORT_PATH}/results/${FILENAME_SERVER_ONLY}
+    MODEL_METRICS_GPU_FILE=${EXPORT_PATH}/results/${FILENAME_GPU_MODEL}
+    MODEL_METRICS_INFERENCE_FILE=${EXPORT_PATH}/results/${FILENAME_INFERENCE_MODEL}
+
+    for file in SERVER_METRICS_FILE, MODEL_METRICS_GPU_FILE, MODEL_METRICS_INFERENCE_FILE; do
+        check_no_csv_exists $file
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Output Verification Failed.\n***"
+            cat $ANALYZER_LOG
+            RET=1
+        fi
+    done
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test PASSED\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_optuna_bls_model/test_config_generator.py b/qa/L0_optuna_bls_model/test_config_generator.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+import yaml
+
+
+class TestConfigGenerator:
+    """
+    This class contains functions that
+    create configs for various test scenarios.
+
+    The `setup` function does the work common to all tests
+
+    TO ADD A TEST: Simply add a member function whose name starts
+                    with 'generate'.
+    """
+
+    def __init__(self):
+        test_functions = [
+            self.__getattribute__(name)
+            for name in dir(self)
+            if name.startswith("generate")
+        ]
+
+        for test_function in test_functions:
+            self.setup()
+            test_function()
+
+    def setup(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "-m",
+            "--profile-models",
+            type=str,
+            required=True,
+            help="Comma separated list of models to be profiled",
+        )
+        parser.add_argument(
+            "--bls-composing-models",
+            type=str,
+            required=True,
+            help="Comma separated list of BLS composing models",
+        )
+        parser.add_argument(
+            "-i",
+            "--input-json-file",
+            type=str,
+            required=True,
+            help="Input data JSON file path",
+        )
+
+        args = parser.parse_args()
+        self.config = {}
+        self.config["profile_models"] = sorted(args.profile_models.split(","))
+        self.config["bls_composing_models"] = sorted(
+            args.bls_composing_models.split(",")
+        )
+        self.config["perf_analyzer_flags"] = {"input-data": args.input_json_file}
+
+    def generate_config(self):
+        with open("config.yml", "w+") as f:
+            yaml.dump(self.config, f)
+
+
+if __name__ == "__main__":
+    TestConfigGenerator()