Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optuna CI Testing #912

Merged
merged 10 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ def _add_run_search_configs(self):
self._add_config(
ConfigField(
"min_percentage_of_search_space",
flags=["--min_percentage_of_search_space"],
flags=["--min-percentage-of-search-space"],
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
field_type=ConfigPrimitive(int),
default_value=DEFAULT_OPTUNA_MIN_PERCENTAGE_OF_SEARCH_SPACE,
description="Minimum percentage of the search space to profile when using Optuna",
Expand All @@ -936,7 +936,7 @@ def _add_run_search_configs(self):
self._add_config(
ConfigField(
"max_percentage_of_search_space",
flags=["--max_percentage_of_search_space"],
flags=["--max-percentage-of-search-space"],
field_type=ConfigPrimitive(int),
default_value=DEFAULT_OPTUNA_MAX_PERCENTAGE_OF_SEARCH_SPACE,
description="Maximum percentage of the search space to profile when using Optuna",
Expand All @@ -945,7 +945,7 @@ def _add_run_search_configs(self):
self._add_config(
ConfigField(
"optuna_min_trials",
flags=["--optuna_min_trials"],
flags=["--optuna-min-trials"],
field_type=ConfigPrimitive(int),
default_value=DEFAULT_OPTUNA_MIN_TRIALS,
description="Minimum number of trials to profile when using Optuna",
Expand All @@ -954,7 +954,7 @@ def _add_run_search_configs(self):
self._add_config(
ConfigField(
"optuna_max_trials",
flags=["--optuna_max_trials"],
flags=["--optuna-max-trials"],
field_type=ConfigPrimitive(int),
default_value=DEFAULT_OPTUNA_MAX_TRIALS,
description="Maximum number of trials to profile when using Optuna",
Expand All @@ -963,7 +963,7 @@ def _add_run_search_configs(self):
self._add_config(
ConfigField(
"optuna_early_exit_threshold",
flags=["--optuna_early_exit_threshold"],
flags=["--optuna-early-exit-threshold"],
field_type=ConfigPrimitive(int),
default_value=DEFAULT_OPTUNA_EARLY_EXIT_THRESHOLD,
description="Number of trials without improvement before triggering early exit when using Optuna",
Expand Down
116 changes: 116 additions & 0 deletions qa/L0_optuna_bls_model/check_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
dyastremsky marked this conversation as resolved.
Show resolved Hide resolved
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import sys

import yaml


class TestOutputValidator:
"""
Functions that validate the output
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
of the test
"""

def __init__(self, config, test_name, analyzer_log):
self._config = config
self._models = config["profile_models"]
self._analyzer_log = analyzer_log

check_function = self.__getattribute__(f"check_{test_name}")

if check_function():
sys.exit(0)
else:
sys.exit(1)

def check_profile_logs(self):
"""
Check that each model was profiled the number of times
corresponding with batch size and concurrency combinations

(No model config parameter combos expected here!)
"""

with open(self._analyzer_log, "r") as f:
log_contents = f.read()

# Number of configs in search space: 275
# Model - bls:
# concurrency: 1 to 1024 (11)
# instance_group: 1 to 5 (5)
# Composing model - add:
# instance_group: 1 to 5 (5)
#
# Minimum number of trials: 14 (5% of search space)
# Maximum number of trials: 28 (10% of search space)
#
# Then you have 4 x (0-9) for the concurrency sweep on Top 3 + default
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
# 0 because all concurrencies could have been tested during the optuna run
expected_min_num_measurements = 14 + 0
expected_max_num_measurements = 28 + 36

for model in self._models:
token = f"Profiling {model}_config"
token_idx = 0
found_count = 0
while True:
token_idx = log_contents.find(token, token_idx + 1)
if token_idx == -1:
break
found_count += 1
if (
found_count < expected_min_num_measurements
or found_count > expected_max_num_measurements
):
print(
f"\n***\n*** Expected range of measurements for {model} : {expected_min_num_measurements} to {expected_max_num_measurements}. "
f"Found {found_count}. \n***"
)
return False
return True


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-f",
"--config-file",
type=str,
required=True,
help="The path to the config yaml file.",
)
parser.add_argument(
"-l",
"--analyzer-log-file",
type=str,
required=True,
help="The full path to the analyzer log.",
)
parser.add_argument(
"-t",
"--test-name",
type=str,
required=True,
help="The name of the test to be run.",
)
args = parser.parse_args()

with open(args.config_file, "r") as f:
config = yaml.safe_load(f)

TestOutputValidator(config, args.test_name, args.analyzer_log_file)
21 changes: 21 additions & 0 deletions qa/L0_optuna_bls_model/input_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"data": [
{
"MODEL_NAME": [
"add"
],
"INPUT0": [
0.74106514,
0.7371813,
0.5274665,
0.13930903
],
"INPUT1": [
0.7845891,
0.88089234,
0.8466405,
0.55024815
]
}
]
}
98 changes: 98 additions & 0 deletions qa/L0_optuna_bls_model/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/bin/bash
# Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

source ../common/util.sh
source ../common/check_analyzer_results.sh
create_logs_dir "L0_bls_model"

# Set test parameters
MODEL_ANALYZER="$(which model-analyzer)"
nv-braf marked this conversation as resolved.
Show resolved Hide resolved
REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"}
QA_MODELS="bls"
BLS_COMPOSING_MODELS="add"
INPUT_JSON="$(pwd)/input_data.json"
MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')"
TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"}
CLIENT_PROTOCOL="grpc"
PORTS=($(find_available_ports 3))
GPUS=($(get_all_gpus_uuids))
OUTPUT_MODEL_REPOSITORY=${OUTPUT_MODEL_REPOSITORY:=$(get_output_directory)}
CONFIG_FILE="config.yml"
FILENAME_SERVER_ONLY="server-metrics.csv"
FILENAME_INFERENCE_MODEL="model-metrics-inference.csv"
FILENAME_GPU_MODEL="model-metrics-gpu.csv"

rm -rf $OUTPUT_MODEL_REPOSITORY
create_result_paths
SERVER_LOG=$TEST_LOG_DIR/server.log

python3 test_config_generator.py --profile-models $MODEL_NAMES --bls-composing-models $BLS_COMPOSING_MODELS -i $INPUT_JSON

# Run the analyzer and check the results
RET=0

set +e

MODEL_ANALYZER_ARGS="-m $MODEL_REPOSITORY -f $CONFIG_FILE"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --client-protocol=$CLIENT_PROTOCOL --triton-launch-mode=$TRITON_LAUNCH_MODE"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --triton-http-endpoint localhost:${PORTS[0]} --triton-grpc-endpoint localhost:${PORTS[1]}"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --triton-metrics-url http://localhost:${PORTS[2]}/metrics"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --output-model-repository-path $OUTPUT_MODEL_REPOSITORY --override-output-model-repository"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS -e $EXPORT_PATH --checkpoint-directory $CHECKPOINT_DIRECTORY --filename-server-only=$FILENAME_SERVER_ONLY"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --filename-model-inference=$FILENAME_INFERENCE_MODEL --filename-model-gpu=$FILENAME_GPU_MODEL"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --run-config-search-mode optuna"
MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ARGS --skip-detailed-reports --triton-output-path=$SERVER_LOG"
MODEL_ANALYZER_SUBCOMMAND="profile"
MODEL_ANALYZER_GLOBAL_OPTIONS="-v"

run_analyzer

if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Failed. model-analyzer $MODEL_ANALYZER_SUBCOMMAND exited with non-zero exit code. \n***"
cat $ANALYZER_LOG
RET=1
else
# Check the Analyzer log for correct output
TEST_NAME='profile_logs'
python3 check_results.py -f $CONFIG_FILE -t $TEST_NAME -l $ANALYZER_LOG
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Output Verification Failed for $TEST_NAME test.\n***"
cat $ANALYZER_LOG
RET=1
fi

SERVER_METRICS_FILE=${EXPORT_PATH}/results/${FILENAME_SERVER_ONLY}
MODEL_METRICS_GPU_FILE=${EXPORT_PATH}/results/${FILENAME_GPU_MODEL}
MODEL_METRICS_INFERENCE_FILE=${EXPORT_PATH}/results/${FILENAME_INFERENCE_MODEL}

for file in SERVER_METRICS_FILE, MODEL_METRICS_GPU_FILE, MODEL_METRICS_INFERENCE_FILE; do
check_no_csv_exists $file
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Output Verification Failed.\n***"
cat $ANALYZER_LOG
RET=1
fi
done
fi
set -e

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Test PASSED\n***"
else
echo -e "\n***\n*** Test FAILED\n***"
fi

exit $RET
81 changes: 81 additions & 0 deletions qa/L0_optuna_bls_model/test_config_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3

# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse

import yaml


class TestConfigGenerator:
"""
This class contains functions that
create configs for various test scenarios.

The `setup` function does the work common to all tests

TO ADD A TEST: Simply add a member function whose name starts
with 'generate'.
"""

def __init__(self):
test_functions = [
self.__getattribute__(name)
for name in dir(self)
if name.startswith("generate")
]

for test_function in test_functions:
self.setup()
test_function()

def setup(self):
parser = argparse.ArgumentParser()
parser.add_argument(
"-m",
"--profile-models",
type=str,
required=True,
help="Comma separated list of models to be profiled",
)
parser.add_argument(
"--bls-composing-models",
type=str,
required=True,
help="Comma separated list of BLS composing models",
)
parser.add_argument(
"-i",
"--input-json-file",
type=str,
required=True,
help="Input data JSON file path",
)

args = parser.parse_args()
self.config = {}
self.config["profile_models"] = sorted(args.profile_models.split(","))
self.config["bls_composing_models"] = sorted(
args.bls_composing_models.split(",")
)
self.config["perf_analyzer_flags"] = {"input-data": args.input_json_file}

def generate_config(self):
with open("config.yml", "w+") as f:
yaml.dump(self.config, f)


if __name__ == "__main__":
TestConfigGenerator()
Loading
Loading