Skip to content

Commit

Permalink
Add request-parameter option and refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Sep 21, 2023
1 parent d87d8e6 commit f7678d2
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 54 deletions.
139 changes: 102 additions & 37 deletions src/c++/perf_analyzer/command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,31 @@ CLParser::Parse(int argc, char** argv)
return params_;
}

std::vector<std::string>
SplitString(const std::string& str, const std::string& delimiter = ":")
{
std::vector<std::string> substrs;
size_t pos = 0;
while (pos != std::string::npos) {
size_t colon_pos = str.find(":", pos);
substrs.push_back(str.substr(pos, colon_pos - pos));
if (colon_pos == std::string::npos) {
pos = colon_pos;
} else {
pos = colon_pos + 1;
}
}
return substrs;
}

void
ToLowerCase(std::string& s)
{
std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) {
return std::tolower(c);
});
}

// Used to format the usage message
std::string
CLParser::FormatMessage(std::string str, int offset) const
Expand Down Expand Up @@ -279,18 +304,21 @@ CLParser::Usage(const std::string& msg)
std::cerr
<< FormatMessage(
"--periodic-concurrency-range <start:end:step>: Determines the "
"range of concurrency levels in the similar manner as "
"--concurrency-range. Perf Analyzer will start from the "
"concurrency level of 'start' and go until it reaches 'end' with "
"a stride of 'step'. Unlike --concurrency-range, the user can "
"range of concurrency levels in the similar but slightly "
"different manner as the --concurrency-range. Perf Analyzer will "
"start from the concurrency level of 'start' and increase by "
"'step' each time. Unlike --concurrency-range, the 'end' "
"indicates the *total* number of concurrency since the 'start' "
"(including) and will stop increasing once the cumulative number "
"of concurrent requests has reached the 'end'. The user can "
"specify *when* to periodically increase the concurrency level "
"using the --request-period option. The concurrency level will "
"periodically increase for every n-th response specified by "
"--request-period. Since this disables stability check in "
"Perf Analyzer and reports response timestamps only, the user "
"must provide --profile-export-file to specify where to dump all "
"the measured timestamps. The default values of 'start', 'end', "
"and 'step' are 1.",
"--request-period. Since this disables stability check in Perf "
"Analyzer and reports response timestamps only, the user must "
"provide --profile-export-file to specify where to dump all the "
"measured timestamps. The default values of 'start', 'end', and "
"'step' are 1.",
18)
<< std::endl;
std::cerr
Expand All @@ -301,6 +329,17 @@ CLParser::Usage(const std::string& msg)
"value is 10.",
18)
<< std::endl;
std::cerr
<< FormatMessage(
"--request-parameter <name:value:type>: Specifies a custom "
"parameter that can be sent to a Triton backend as part of the "
"request. For example, providing '--request-parameter "
"max_tokens:256:uint' to the command line will set an additional "
"parameter 'max_tokens' of type 'uint' to 256 as part of the "
"request. The --request-parameter may be specified multiple times "
"for different custom parameters.",
18)
<< std::endl;
std::cerr
<< FormatMessage(
" --request-rate-range <start:end:step>: Determines the range of "
Expand Down Expand Up @@ -835,6 +874,7 @@ CLParser::ParseCommandLine(int argc, char** argv)
{"profile-export-file", required_argument, 0, 58},
{"periodic-concurrency-range", required_argument, 0, 59},
{"request-period", required_argument, 0, 60},
{"request-parameter", required_argument, 0, 61},
{0, 0, 0, 0}};

// Parse commandline...
Expand Down Expand Up @@ -1514,42 +1554,37 @@ CLParser::ParseCommandLine(int argc, char** argv)
case 59: {
params_->using_periodic_concurrency_range = true;
std::string arg = optarg;
size_t pos = 0;
int index = 0;
while (pos != std::string::npos) {
size_t colon_pos = arg.find(":", pos);
if (index > 2) {
Usage(
"Failed to parse --periodic-concurrency-range. The value "
"does not match <start:end:step>.");
}
int64_t val;
if (colon_pos == std::string::npos) {
val = std::stoull(arg.substr(pos, colon_pos));
pos = colon_pos;
} else {
val = std::stoull(arg.substr(pos, colon_pos - pos));
pos = colon_pos + 1;
}
switch (index) {
case 0:
params_->periodic_concurrency_range.start = val;
break;
case 1:
params_->periodic_concurrency_range.end = val;
break;
case 2:
params_->periodic_concurrency_range.step = val;
break;
std::vector<std::string> values{SplitString(arg)};
if (values.size() < 2) {
Usage(
"Failed to parse --periodic-concurrency-range. Both <start> "
"and <end> values must be provided.");
} else if (values.size() > 3) {
Usage(
"Failed to parse --periodic-concurrency-range. The value does "
"not match <start:end:step>.");
}

for (size_t i = 0; i < values.size(); ++i) {
uint64_t val = std::stoull(values[i]);
if (i == 0) {
params_->periodic_concurrency_range.start = val;
} else if (i == 1) {
params_->periodic_concurrency_range.end = val;
} else if (i == 2) {
params_->periodic_concurrency_range.step = val;
}
index++;
}

Range<uint64_t> range{params_->periodic_concurrency_range};
if (range.step == 0) {
Usage(
"Failed to parse --periodic-concurrency-range. The <step> "
"value must be > 0.");
} else if (range.start > range.end) {
Usage(
"Failed to parse --periodic-concurrency-range. The <start> "
"must be <= <end>.");
} else if ((range.end - range.start) % range.step != 0) {
Usage(
"Failed to parse --periodic-concurrency-range. The <step> "
Expand All @@ -1566,6 +1601,36 @@ CLParser::ParseCommandLine(int argc, char** argv)
}
break;
}
case 61: {
std::string arg = optarg;
std::vector<std::string> values{SplitString(arg)};
if (values.size() != 3) {
Usage(
"Failed to parse --request-parameter. The value does not match "
"<name:value:type>.");
}

std::for_each(values.begin(), values.end(), ToLowerCase);
std::string name{values[0]};
std::string value{values[1]};
std::string type{values[2]};

RequestParameter param;
if (type == "bool") {
param.bool_value = value == "true" ? true : false;
} else if (type == "uint") {
param.uint_value = std::stoull(value);
} else if (type == "int") {
param.int_value = std::stoll(value);
} else if (type == "string") {
param.str_value = value;
} else {
Usage(
"Failed to parse --request-parameter. Unsupported type: '" +
type + "'.");
}
break;
}
case 'v':
params_->extra_verbose = params_->verbose;
params_->verbose = true;
Expand Down
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/command_line_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct PerfAnalyzerParameters {
bool using_periodic_concurrency_range = false;
Range<uint64_t> periodic_concurrency_range{1, 1, 1};
uint64_t request_period = 10;
std::unordered_map<std::string, RequestParameter> request_parameters;
uint64_t latency_threshold_ms = NO_LIMIT;
double stability_threshold = 0.1;
size_t max_trials = 10;
Expand Down
7 changes: 7 additions & 0 deletions src/c++/perf_analyzer/perf_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ class Range {
T step;
};

struct RequestParameter {
std::string str_value;
int64_t int_value;
uint64_t uint_value;
bool bool_value;
};

// Converts the datatype from tensorflow to perf analyzer space
// \param tf_dtype The data type string returned from the model metadata.
// \param datatype Returns the datatype in perf_analyzer space.
Expand Down
102 changes: 85 additions & 17 deletions src/c++/perf_analyzer/test_command_line_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -347,22 +347,6 @@ CheckValidRange(
std::vector<char*>& args, char* option_name, TestCLParser& parser,
PAParamsPtr& act, bool& using_range, Range<uint64_t>& range)
{
SUBCASE("start provided")
{
args.push_back(option_name);
args.push_back("100"); // start

int argc = args.size();
char* argv[argc];
std::copy(args.begin(), args.end(), argv);

REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
CHECK(!parser.UsageCalled());

using_range = true;
range.start = 100;
}

SUBCASE("start:end provided")
{
args.push_back(option_name);
Expand Down Expand Up @@ -520,7 +504,7 @@ CheckInvalidRange(
std::copy(args.begin(), args.end(), argv);

REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
CHECK(!parser.UsageCalled());
CHECK(parser.UsageCalled());

// BUG (TMA-1307): Should detect this and through an error. User will
// enter this and have no clue why the end and step sizes are not used
Expand Down Expand Up @@ -1161,6 +1145,22 @@ TEST_CASE("Testing Command Line Parser")
{
char* option_name = "--concurrency-range";

SUBCASE("start provided")
{
args.push_back(option_name);
args.push_back("100"); // start

int argc = args.size();
char* argv[argc];
std::copy(args.begin(), args.end(), argv);

REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
CHECK(!parser.UsageCalled());

exp->using_concurrency_range = true;
exp->concurrency_range.start = 100;
}

CheckValidRange(
args, option_name, parser, act, exp->using_concurrency_range,
exp->concurrency_range);
Expand Down Expand Up @@ -1202,6 +1202,26 @@ TEST_CASE("Testing Command Line Parser")
exp->streaming = true;
exp->url = "localhost:8001"; // gRPC url

SUBCASE("start provided")
{
args.push_back(option_name);
args.push_back("100"); // start

int argc = args.size();
char* argv[argc];
std::copy(args.begin(), args.end(), argv);

REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
CHECK(parser.UsageCalled());

// FIXME (TMA-1307): Currently the expected error message does not match
// the actual error message since TestCLParser ignores the exit statement
// when the Usage() is called and proceeds executing the program when it
// should stop the program.

check_params = false;
}

CheckValidRange(
args, option_name, parser, act, exp->using_periodic_concurrency_range,
exp->periodic_concurrency_range);
Expand Down Expand Up @@ -1319,6 +1339,54 @@ TEST_CASE("Testing Command Line Parser")
}
}

SUBCASE("Option : --request-parameter")
{
char* option_name = "--request-parameter";

SUBCASE("missing type")
{
args.push_back(option_name);
args.push_back("max_tokens:256");

int argc = args.size();
char* argv[argc];
std::copy(args.begin(), args.end(), argv);

REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
CHECK(parser.UsageCalled());

// FIXME (TMA-1307): Currently the expected error message does not match
// the actual error message since TestCLParser ignores the exit statement
// when the Usage() is called and proceeds executing the program when it
// should stop the program.
//
// expected_msg = CreateUsageMessage(
// option_name, "The value does not match <name:value:type>.");
// CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg);

check_params = false;
}

SUBCASE("unsupported type")
{
args.push_back(option_name);
args.push_back("max_tokens:256:hello");

int argc = args.size();
char* argv[argc];
std::copy(args.begin(), args.end(), argv);

REQUIRE_NOTHROW(act = parser.Parse(argc, argv));
CHECK(parser.UsageCalled());

expected_msg =
CreateUsageMessage(option_name, "Unsupported type: 'hello'.");
CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg);

check_params = false;
}
}

SUBCASE("Option : --latency-threshold")
{
expected_msg = CreateUsageMessage(
Expand Down

0 comments on commit f7678d2

Please sign in to comment.