From fd77ba8ac0be029aa44644b5107edd49cff4b340 Mon Sep 17 00:00:00 2001 From: Hyunjae Woo <107147848+nv-hwoo@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:20:24 -0700 Subject: [PATCH] Add new command line options to enable periodic concurrency mode (#391) * Add macros to reuse it for checking range options * Add tests for periodic-concurrency-range option * Add periodic-concurrency-range and request-period options * Add doc for periodic-concurrency-range and request-period * Add test for request-period option * Revert macro and add reusable test function * Add more tests * Small refactor * Refactor a subcase * Require bi-directional gRPC streaming for periodic concurrency mode * Address feedback * Refine the error message * Add bi-directional gRPC streaming options for periodic concurrency mode * Add request-parameter option and refactor * Refactor * Add valid case for request-parameter option * Add --request-parameter doc and edit periodic concurrency description * Custom request parameter is currently only supported by gRPC * Parse and store the type of request parameter * Add checks between act vs. exp * Remove uint type and rebase * Change doc * Minor fix * Address feedback --- src/c++/perf_analyzer/command_line_parser.cc | 227 ++++++-- src/c++/perf_analyzer/command_line_parser.h | 4 +- src/c++/perf_analyzer/docs/cli.md | 42 +- src/c++/perf_analyzer/perf_analyzer.cc | 10 +- src/c++/perf_analyzer/perf_utils.h | 9 + .../perf_analyzer/test_command_line_parser.cc | 498 ++++++++++++++---- 6 files changed, 641 insertions(+), 149 deletions(-) diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 7398455cb..5a73d5927 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -47,6 +47,31 @@ CLParser::Parse(int argc, char** argv) return params_; } +std::vector +SplitString(const std::string& str, const std::string& delimiter = ":") +{ + std::vector substrs; + size_t pos = 0; + while (pos != std::string::npos) { + size_t colon_pos = str.find(":", pos); + substrs.push_back(str.substr(pos, colon_pos - pos)); + if (colon_pos == std::string::npos) { + pos = colon_pos; + } else { + pos = colon_pos + 1; + } + } + return substrs; +} + +void +ToLowerCase(std::string& s) +{ + std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { + return std::tolower(c); + }); +} + // Used to format the usage message std::string CLParser::FormatMessage(std::string str, int offset) const @@ -88,6 +113,8 @@ CLParser::Usage(const std::string& msg) std::cerr << "\t--measurement-interval (-p) " << std::endl; std::cerr << "\t--concurrency-range " << std::endl; + std::cerr << "\t--periodic-concurrency-range " << std::endl; + std::cerr << "\t--request-period " << std::endl; std::cerr << "\t--request-rate-range " << std::endl; std::cerr << "\t--request-distribution <\"poisson\"|\"constant\">" << std::endl; @@ -274,6 +301,45 @@ CLParser::Usage(const std::string& msg) "not be 0 for sequence models while using asynchronous mode.", 18) << std::endl; + std::cerr + << FormatMessage( + "--periodic-concurrency-range : Determines the " + "range of concurrency levels in the similar but slightly " + "different manner as the --concurrency-range. Perf Analyzer will " + "start from the concurrency level of 'start' and increase by " + "'step' each time. Unlike --concurrency-range, the 'end' " + "indicates the *total* number of concurrency since the 'start' " + "(including) and will stop increasing once the cumulative number " + "of concurrent requests has reached the 'end'. The user can " + "specify *when* to periodically increase the concurrency level " + "using the --request-period option. The concurrency level will " + "periodically increase for every n-th response specified by " + "--request-period. Since this disables stability check in Perf " + "Analyzer and reports response timestamps only, the user must " + "provide --profile-export-file to specify where to dump all the " + "measured timestamps. The default values of 'start', 'end', and " + "'step' are 1.", + 18) + << std::endl; + std::cerr + << FormatMessage( + "--request-period : Indicates the number of responses that " + "each request must receive before new, concurrent requests are " + "sent when --periodic-concurrency-range is specified. Default " + "value is 10.", + 18) + << std::endl; + std::cerr + << FormatMessage( + "--request-parameter : Specifies a custom " + "parameter that can be sent to a Triton backend as part of the " + "request. For example, providing '--request-parameter " + "max_tokens:256:int' to the command line will set an additional " + "parameter 'max_tokens' of type 'int' to 256 as part of the " + "request. The --request-parameter may be specified multiple times " + "for different custom parameters.", + 18) + << std::endl; std::cerr << FormatMessage( " --request-rate-range : Determines the range of " @@ -806,6 +872,9 @@ CLParser::ParseCommandLine(int argc, char** argv) {"output-tensor-format", required_argument, 0, 56}, {"version", no_argument, 0, 57}, {"profile-export-file", required_argument, 0, 58}, + {"periodic-concurrency-range", required_argument, 0, 59}, + {"request-period", required_argument, 0, 60}, + {"request-parameter", required_argument, 0, 61}, {0, 0, 0, 0}}; // Parse commandline... @@ -895,37 +964,23 @@ CLParser::ParseCommandLine(int argc, char** argv) case 7: { params_->using_concurrency_range = true; std::string arg = optarg; - size_t pos = 0; - int index = 0; - while (pos != std::string::npos) { - size_t colon_pos = arg.find(":", pos); - if (index > 2) { - Usage( - "Failed to parse --concurrency-range. The value does not " - "match ."); - } - int64_t val; - if (colon_pos == std::string::npos) { - val = std::stoull(arg.substr(pos, colon_pos)); - pos = colon_pos; - } else { - val = std::stoull(arg.substr(pos, colon_pos - pos)); - pos = colon_pos + 1; - } - switch (index) { - case 0: - params_->concurrency_range.start = val; - break; - case 1: - params_->concurrency_range.end = val; - break; - case 2: - params_->concurrency_range.step = val; - break; - } - index++; + std::vector values{SplitString(arg)}; + if (values.size() > 3) { + Usage( + "Failed to parse --concurrency-range. The value does not match " + "."); } + for (size_t i = 0; i < values.size(); ++i) { + uint64_t val = std::stoull(values[i]); + if (i == 0) { + params_->concurrency_range.start = val; + } else if (i == 1) { + params_->concurrency_range.end = val; + } else if (i == 2) { + params_->concurrency_range.step = val; + } + } break; } case 8: @@ -1482,6 +1537,88 @@ CLParser::ParseCommandLine(int argc, char** argv) params_->profile_export_file = profile_export_file; break; } + case 59: { + params_->is_using_periodic_concurrency_mode = true; + std::string arg = optarg; + std::vector values{SplitString(arg)}; + if (values.size() < 2) { + Usage( + "Failed to parse --periodic-concurrency-range. Both " + "and values must be provided."); + } else if (values.size() > 3) { + Usage( + "Failed to parse --periodic-concurrency-range. The value does " + "not match ."); + } + + for (size_t i = 0; i < values.size(); ++i) { + uint64_t val = std::stoull(values[i]); + if (i == 0) { + params_->periodic_concurrency_range.start = val; + } else if (i == 1) { + params_->periodic_concurrency_range.end = val; + } else if (i == 2) { + params_->periodic_concurrency_range.step = val; + } + } + + Range range{params_->periodic_concurrency_range}; + if (range.step == 0) { + Usage( + "Failed to parse --periodic-concurrency-range. The " + "value must be > 0."); + } else if (range.start > range.end) { + Usage( + "Failed to parse --periodic-concurrency-range. The " + "must be <= ."); + } else if ((range.end - range.start) % range.step != 0) { + Usage( + "Failed to parse --periodic-concurrency-range. The " + "value must be a factor of the range size ( - )."); + } + break; + } + case 60: { + std::string request_period{optarg}; + if (std::stoi(request_period) > 0) { + params_->request_period = std::stoull(request_period); + } else { + Usage("Failed to parse --request-period. The value must be > 0"); + } + break; + } + case 61: { + std::string arg = optarg; + std::vector values{SplitString(arg)}; + if (values.size() != 3) { + Usage( + "Failed to parse --request-parameter. The value does not match " + "."); + } + + std::for_each(values.begin(), values.end(), ToLowerCase); + std::string name{values[0]}; + std::string value{values[1]}; + std::string type{values[2]}; + + RequestParameter param; + if (type == "bool") { + param.type = RequestParameterType::BOOL; + param.bool_value = value == "true" ? true : false; + } else if (type == "int") { + param.type = RequestParameterType::INT; + param.int_value = std::stoll(value); + } else if (type == "string") { + param.type = RequestParameterType::STRING; + param.str_value = value; + } else { + Usage( + "Failed to parse --request-parameter. Unsupported type: '" + + type + "'."); + } + params_->request_parameters[name] = param; + break; + } case 'v': params_->extra_verbose = params_->verbose; params_->verbose = true; @@ -1639,10 +1776,36 @@ CLParser::VerifyOptions() Usage("Cannot use concurrency options with --request-rate-range."); } - if (params_->using_request_rate_range && params_->using_concurrency_range) { + std::vector load_modes{ + params_->is_using_periodic_concurrency_mode, + params_->using_concurrency_range, params_->using_request_rate_range, + params_->using_custom_intervals}; + if (std::count(load_modes.begin(), load_modes.end(), true) > 1) { + Usage( + "Cannot specify more then one inference load mode. Please choose only " + "one of the following modes: --concurrency-range, " + "--periodic-concurrency-range, --request-rate-range, or " + "--request-intervals."); + } + + if (params_->is_using_periodic_concurrency_mode && !params_->streaming) { + Usage( + "The --periodic-concurrency-range option requires bi-directional gRPC " + "streaming."); + } + + if (params_->is_using_periodic_concurrency_mode && + (params_->profile_export_file == "")) { + Usage( + "Must provide --profile-export-file when using the " + "--periodic-concurrency-range option."); + } + + if (params_->request_parameters.size() > 0 && + params_->protocol != cb::ProtocolType::GRPC) { Usage( - "Cannot specify --concurrency-range and --request-rate-range " - "simultaneously."); + "The --request-parameter option is currently only supported by gRPC " + "protocol."); } if (params_->using_request_rate_range && params_->mpi_driver->IsMPIRun() && diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index a0706525c..518e7b2cf 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -58,6 +58,7 @@ struct PerfAnalyzerParameters { uint64_t measurement_window_ms = 5000; bool using_concurrency_range = false; Range concurrency_range{1, 1, 1}; + std::unordered_map request_parameters; uint64_t latency_threshold_ms = NO_LIMIT; double stability_threshold = 0.1; size_t max_trials = 10; @@ -151,9 +152,8 @@ struct PerfAnalyzerParameters { std::string profile_export_file{""}; bool is_using_periodic_concurrency_mode{false}; - Range periodic_concurrency_range{1, 1, 1}; - uint64_t periodic_concurrency_request_period{10}; + uint64_t request_period{10}; }; using PAParamsPtr = std::shared_ptr; diff --git a/src/c++/perf_analyzer/docs/cli.md b/src/c++/perf_analyzer/docs/cli.md index 3fae93692..5961224c8 100644 --- a/src/c++/perf_analyzer/docs/cli.md +++ b/src/c++/perf_analyzer/docs/cli.md @@ -173,13 +173,51 @@ Specifies the range of concurrency levels covered by Perf Analyzer. Perf Analyzer will start from the concurrency level of 'start' and go until 'end' with a stride of 'step'. -Default of 'end' and 'step' are `1`. If 'end' is not specified then Perf -Analyzer will run for a single concurrency level determined by 'start'. If +Default of 'start', 'end', and 'step' are `1`. If 'end' is not specified then +Perf Analyzer will run for a single concurrency level determined by 'start'. If 'end' is set as `0`, then the concurrency limit will be incremented by 'step' until the latency threshold is met. 'end' and `--latency-threshold` cannot both be `0`. 'end' cannot be `0` for sequence models while using asynchronous mode. +#### `--periodic-concurrency-range=` + +Specifies the range of concurrency levels in the similar but slightly different +manner as the `--concurrency-range`. Perf Analyzer will start from the +concurrency level of 'start' and increase by 'step' each time. Unlike +`--concurrency-range`, the 'end' indicates the *total* number of concurrency +since the 'start' (including) and will stop increasing once the cumulative +number of concurrent requests has reached the 'end'. The user can specify +*when* to periodically increase the concurrency level using the +`--request-period` option. The concurrency level will periodically increase for +every `n`-th response specified by `--request-period`. Since this disables +stability check in Perf Analyzer and reports response timestamps only, the user +must provide `--profile-export-file` to specify where to dump all the measured +timestamps. + +The default values of 'start', 'end', and 'step' are `1`. + +#### `--request-period=` + +Specifies the number of responses that each request must receive before new, +concurrent requests are sent when `--periodic-concurrency-range` is specified. + +Default value is `10`. + +#### `--request-parameter=` + +Specifies a custom parameter that can be sent to a Triton backend as part of +the request. For example, providing '--request-parameter max_tokens:256:int' +to the command line will set an additional parameter 'max_tokens' of type +'int' to 256 as part of the request. The --request-parameter may be specified +multiple times for different custom parameters. + +Valid `type` values are: `bool`, `int`, and `string`. + +> **NOTE** +> +> The `--request-parameter` is currently only supported by gRPC protocol. + #### `--request-rate-range=` Specifies the range of request rates for load generated by Perf Analyzer. This diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index 44ec520f2..c3e5e5f90 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -160,13 +160,6 @@ PerfAnalyzer::CreateAnalyzerObjects() } std::unique_ptr manager; - params_->is_using_periodic_concurrency_mode = true; - params_->periodic_concurrency_range = { - std::stoi(std::getenv("MY_START")), std::stoi(std::getenv("MY_END")), - std::stoi(std::getenv("MY_STEP"))}; - params_->periodic_concurrency_request_period = - std::stoi(std::getenv("MY_REQUEST_PERIOD")); - if (params_->targeting_concurrency()) { if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) || (parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) { @@ -221,8 +214,7 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->async, params_->streaming, params_->batch_size, params_->max_threads, params_->max_concurrency, params_->shared_memory_type, params_->output_shm_size, parser_, factory, - params_->periodic_concurrency_range, - params_->periodic_concurrency_request_period); + params_->periodic_concurrency_range, params_->request_period); } else if (params_->using_request_rate_range) { if ((params_->sequence_id_range != 0) && (params_->sequence_id_range < params_->num_of_sequences)) { diff --git a/src/c++/perf_analyzer/perf_utils.h b/src/c++/perf_analyzer/perf_utils.h index 7166936a9..0871de42c 100644 --- a/src/c++/perf_analyzer/perf_utils.h +++ b/src/c++/perf_analyzer/perf_utils.h @@ -83,6 +83,15 @@ class Range { T step; }; +enum RequestParameterType { STRING = 0, INT = 1, BOOL = 2 }; + +struct RequestParameter { + std::string str_value; + int64_t int_value; + bool bool_value; + RequestParameterType type; +}; + // Converts the datatype from tensorflow to perf analyzer space // \param tf_dtype The data type string returned from the model metadata. // \param datatype Returns the datatype in perf_analyzer space. diff --git a/src/c++/perf_analyzer/test_command_line_parser.cc b/src/c++/perf_analyzer/test_command_line_parser.cc index 86a5a9175..bdf819ff8 100644 --- a/src/c++/perf_analyzer/test_command_line_parser.cc +++ b/src/c++/perf_analyzer/test_command_line_parser.cc @@ -175,6 +175,35 @@ CHECK_PARAMS(PAParamsPtr act, PAParamsPtr exp) CHECK_STRING(act->filename, act->filename); CHECK(act->mpi_driver != nullptr); CHECK_STRING(act->memory_type, exp->memory_type); + CHECK( + act->is_using_periodic_concurrency_mode == + exp->is_using_periodic_concurrency_mode); + CHECK( + act->periodic_concurrency_range.start == + exp->periodic_concurrency_range.start); + CHECK( + act->periodic_concurrency_range.end == + exp->periodic_concurrency_range.end); + CHECK( + act->periodic_concurrency_range.step == + exp->periodic_concurrency_range.step); + CHECK(act->request_period == exp->request_period); + CHECK(act->request_parameters.size() == exp->request_parameters.size()); + for (auto act_param : act->request_parameters) { + auto exp_param = exp->request_parameters.find(act_param.first); + REQUIRE_MESSAGE( + exp_param != exp->request_parameters.end(), + "Unexpected parameter: ", act_param.first); + + CHECK(act_param.second.type == exp_param->second.type); + if (act_param.second.type == RequestParameterType::STRING) { + CHECK(act_param.second.str_value == exp_param->second.str_value); + } else if (act_param.second.type == RequestParameterType::INT) { + CHECK(act_param.second.int_value == exp_param->second.int_value); + } else if (act_param.second.type == RequestParameterType::BOOL) { + CHECK(act_param.second.bool_value == exp_param->second.bool_value); + } + } } @@ -342,11 +371,167 @@ class TestCLParser : public CLParser { } }; +void +CheckValidRange( + std::vector& args, char* option_name, TestCLParser& parser, + PAParamsPtr& act, bool& using_range, Range& range) +{ + SUBCASE("start:end provided") + { + args.push_back(option_name); + args.push_back("100:400"); // start:end + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(!parser.UsageCalled()); + + using_range = true; + range.start = 100; + range.end = 400; + } + + SUBCASE("start:end:step provided") + { + args.push_back(option_name); + args.push_back("100:400:10"); // start:end:step + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(!parser.UsageCalled()); + + using_range = true; + range.start = 100; + range.end = 400; + range.step = 10; + } +} + +void +CheckInvalidRange( + std::vector& args, char* option_name, TestCLParser& parser, + PAParamsPtr& act, bool& check_params) +{ + std::string expected_msg; + + // FIXME (TMA-1307): Uncomment the subcase below when the issue is resolved. + // Currently the expected error message does not match the actual error + // message since TestCLParser ignores the exit statement when the Usage() is + // called and proceeds executing the program when it should stop the program. + /* + SUBCASE("too many input values") + { + args.push_back(option_name); + args.push_back("200:100:25:10"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(parser.UsageCalled()); + + expected_msg = CreateUsageMessage( + option_name, "The value does not match ."); + CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + + check_params = false; + } + */ + + SUBCASE("invalid start value") + { + args.push_back(option_name); + args.push_back("bad:400:10"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(parser.UsageCalled()); + + expected_msg = + CreateUsageMessage(option_name, "Invalid value provided: bad:400:10"); + CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + + check_params = false; + } + + SUBCASE("invalid end value") + { + args.push_back(option_name); + args.push_back("100:bad:10"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(parser.UsageCalled()); + + expected_msg = + CreateUsageMessage(option_name, "Invalid value provided: 100:bad:10"); + CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + + check_params = false; + } + + SUBCASE("invalid step value") + { + args.push_back(option_name); + args.push_back("100:400:bad"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(parser.UsageCalled()); + + expected_msg = + CreateUsageMessage(option_name, "Invalid value provided: 100:400:bad"); + CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + + check_params = false; + } + + SUBCASE("no input values") + { + args.push_back(option_name); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + opterr = 0; // Disable error output for GetOpt library for this case + + REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + CHECK(parser.UsageCalled()); + + // BUG (TMA-1307): Usage message does not contain error. Error statement + // "option '--concurrency-range' requires an argument" written directly + // to std::out + // + CHECK_STRING("Usage Message", parser.GetUsageMessage(), ""); + + check_params = false; + } +} + + TEST_CASE("Testing Command Line Parser") { char* model_name = "my_model"; char* app_name = "test_perf_analyzer"; + std::string expected_msg; + std::vector args{app_name, "-m", model_name}; opterr = 1; // Enable error output for GetOpt library bool check_params = true; @@ -967,194 +1152,292 @@ TEST_CASE("Testing Command Line Parser") SUBCASE("Option : --concurrency-range") { - SUBCASE("expected use") + char* option_name = "--concurrency-range"; + + SUBCASE("start provided") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "100:400:10"}; + args.push_back(option_name); + args.push_back("100"); // start + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(!parser.UsageCalled()); exp->using_concurrency_range = true; exp->concurrency_range.start = 100; - exp->concurrency_range.end = 400; - exp->concurrency_range.step = 10; } - SUBCASE("only two options") + CheckValidRange( + args, option_name, parser, act, exp->using_concurrency_range, + exp->concurrency_range); + + CheckInvalidRange(args, option_name, parser, act, check_params); + + SUBCASE("wrong separator") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "100:400"}; + args.push_back(option_name); + args.push_back("100,400,10"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(!parser.UsageCalled()); - exp->using_concurrency_range = true; - exp->concurrency_range.start = 100; - exp->concurrency_range.end = 400; + // BUG (TMA-1307): Should detect this and through an error. User will + // enter this and have no clue why the end and step sizes are not used + // correctly. + // + + check_params = false; } - SUBCASE("only one options") + SUBCASE("invalid condition - end and latency threshold are 0") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "100"}; + args.push_back(option_name); + args.push_back("100:0:25"); + args.push_back("--latency-threshold"); + args.push_back("0"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); - // QUESTION: What does this mean? Why pass only one? - // REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); - CHECK(!parser.UsageCalled()); + CHECK(parser.UsageCalled()); + CHECK_STRING( + "Usage Message", parser.GetUsageMessage(), + "The end of the search range and the latency limit can not be both 0 " + "(or 0.0) simultaneously"); - exp->using_concurrency_range = true; - exp->concurrency_range.start = 100; + check_params = false; } + } - SUBCASE("no options") + SUBCASE("Option : --periodic-concurrency-range") + { + char* option_name = "--periodic-concurrency-range"; + + // Add required args that specifies where to dump profiled data + args.insert( + args.end(), {"-i", "grpc", "--async", "--streaming", + "--profile-export-file", "profile.json"}); + exp->protocol = cb::ProtocolType::GRPC; + exp->async = true; + exp->streaming = true; + exp->url = "localhost:8001"; // gRPC url + exp->max_threads = 4; // not targeting concurrency + + SUBCASE("start provided") { - int argc = 4; - char* argv[argc] = {app_name, "-m", model_name, "--concurrency-range"}; + args.push_back(option_name); + args.push_back("100"); // start - opterr = 0; // Disable error output for GetOpt library for this case + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(parser.UsageCalled()); - // BUG: Usage message does not contain error. Error statement - // "option '--concurrency-range' requires an argument" written directly - // to std::out - // - CHECK_STRING("Usage Message", parser.GetUsageMessage(), ""); + // FIXME (TMA-1307): Currently the expected error message does not match + // the actual error message since TestCLParser ignores the exit statement + // when the Usage() is called and proceeds executing the program when it + // should stop the program. + + check_params = false; } - SUBCASE("too many options") + CheckValidRange( + args, option_name, parser, act, exp->is_using_periodic_concurrency_mode, + exp->periodic_concurrency_range); + + CheckInvalidRange(args, option_name, parser, act, check_params); + + SUBCASE("more than one load mode") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "200:100:25:10"}; + args.push_back(option_name); + args.push_back("100:400"); + args.push_back("--concurrency-range"); + args.push_back("10:40"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(parser.UsageCalled()); - expected_msg = CreateUsageMessage( - "--concurrency-range", "The value does not match ."); + expected_msg = + "Cannot specify more then one inference load mode. Please choose " + "only one of the following modes: --concurrency-range, " + "--periodic-concurrency-range, --request-rate-range, or " + "--request-intervals."; CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); - exp->using_concurrency_range = true; - exp->concurrency_range.start = 200; - exp->concurrency_range.end = 100; - exp->concurrency_range.step = 25; + check_params = false; } - SUBCASE("way too many options") + SUBCASE("no export file specified") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", - "200:100:25:10:20:30"}; + // Remove the export file args + args.pop_back(); + args.pop_back(); + + args.push_back(option_name); + args.push_back("100:400"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(parser.UsageCalled()); - expected_msg = CreateUsageMessage( - "--concurrency-range", "The value does not match ."); + expected_msg = + "Must provide --profile-export-file when using the " + "--periodic-concurrency-range option."; CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); - exp->using_concurrency_range = true; - exp->concurrency_range.start = 200; - exp->concurrency_range.end = 100; - exp->concurrency_range.step = 25; + check_params = false; } - SUBCASE("wrong separator") + SUBCASE("step is not factor of range size") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "100,400,10"}; + args.push_back(option_name); + args.push_back("100:400:7"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); - CHECK(!parser.UsageCalled()); + CHECK(parser.UsageCalled()); - // BUG: Should detect this and through an error. User will enter this and - // have no clue why the end and step sizes are not used correctly. - // + expected_msg = CreateUsageMessage( + option_name, + "The value must be a factor of the range size ( - " + ")."); + CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); - exp->using_concurrency_range = true; - exp->concurrency_range.start = 100; + check_params = false; } - SUBCASE("bad start value") + SUBCASE("step is zero") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "bad:400:10"}; + args.push_back(option_name); + args.push_back("10:400:0"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(parser.UsageCalled()); - expected_msg = CreateUsageMessage( - "--concurrency-range", "Invalid value provided: bad:400:10"); + expected_msg = + CreateUsageMessage(option_name, "The value must be > 0."); CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); - exp->using_concurrency_range = true; + check_params = false; } + } - SUBCASE("bad end value") + SUBCASE("Option : --request-period") + { + expected_msg = + CreateUsageMessage("--request-period", "The value must be > 0"); + CHECK_INT_OPTION("--request-period", exp->request_period, expected_msg); + + SUBCASE("set to 0") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "100:bad:10"}; + args.push_back("--request-period"); + args.push_back("0"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(parser.UsageCalled()); - expected_msg = CreateUsageMessage( - "--concurrency-range", "Invalid value provided: 100:bad:10"); CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); - - exp->using_concurrency_range = true; - exp->concurrency_range.start = 100; } + } + + SUBCASE("Option : --request-parameter") + { + char* option_name = "--request-parameter"; - SUBCASE("bad step value") + // Add required args that specifies where to dump profiled data + args.insert(args.end(), {"-i", "grpc", "--async", "--streaming"}); + exp->protocol = cb::ProtocolType::GRPC; + exp->async = true; + exp->streaming = true; + exp->url = "localhost:8001"; // gRPC url + + SUBCASE("valid parameter") { - int argc = 5; - char* argv[argc] = { - app_name, "-m", model_name, "--concurrency-range", "100:400:bad"}; + args.push_back(option_name); + args.push_back("max_tokens:256:int"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); - CHECK(parser.UsageCalled()); + CHECK(!parser.UsageCalled()); - expected_msg = CreateUsageMessage( - "--concurrency-range", "Invalid value provided: 100:400:bad"); - CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + RequestParameter param; + param.int_value = 256; + param.type = RequestParameterType::INT; + exp->request_parameters["max_tokens"] = param; + } - exp->using_concurrency_range = true; - exp->concurrency_range.start = 100; - exp->concurrency_range.end = 400; + SUBCASE("missing type") + { + args.push_back(option_name); + args.push_back("max_tokens:256"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); + + // FIXME (TMA-1307): Currently the expected error message does not match + // the actual error message since TestCLParser ignores the exit statement + // when the Usage() is called and proceeds executing the program when it + // should stop the program. + // + // REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); + // CHECK(parser.UsageCalled()); + // expected_msg = CreateUsageMessage( + // option_name, "The value does not match ."); + // CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + + check_params = false; } - SUBCASE("invalid condition - end and latency threshold are 0") + SUBCASE("unsupported type") { - int argc = 7; - char* argv[argc] = {app_name, "-m", - model_name, "--concurrency-range", - "100:0:25", "--latency-threshold", - "0"}; + args.push_back(option_name); + args.push_back("max_tokens:256:hello"); + + int argc = args.size(); + char* argv[argc]; + std::copy(args.begin(), args.end(), argv); REQUIRE_NOTHROW(act = parser.Parse(argc, argv)); CHECK(parser.UsageCalled()); - CHECK_STRING( - "Usage Message", parser.GetUsageMessage(), - "The end of the search range and the latency limit can not be both 0 " - "(or 0.0) simultaneously"); - exp->using_concurrency_range = true; - exp->concurrency_range.start = 100; - exp->concurrency_range.end = 0; - exp->concurrency_range.step = 25; - exp->latency_threshold_ms = 0; + expected_msg = + CreateUsageMessage(option_name, "Unsupported type: 'hello'."); + CHECK_STRING("Usage Message", parser.GetUsageMessage(), expected_msg); + + check_params = false; } } @@ -1457,6 +1740,13 @@ TEST_CASE("Testing Command Line Parser") } if (check_params) { + if (act == nullptr) { + std::cerr + << "Error: Attempting to access `act` but was not initialized. Check " + "if the test cases are missing `check_params = false` statement." + << std::endl; + exit(1); + } CHECK_PARAMS(act, exp); } optind = 1; // Reset GotOpt index, needed to parse the next command line