From ca1fec6774f2df695def1b05ce67e6b9c810589e Mon Sep 17 00:00:00 2001 From: Matthew Kotila Date: Wed, 31 Jan 2024 17:50:47 -0800 Subject: [PATCH] Remove periodic concurrency feature --- src/c++/perf_analyzer/CMakeLists.txt | 6 +- src/c++/perf_analyzer/command_line_parser.cc | 121 +------------ src/c++/perf_analyzer/command_line_parser.h | 9 +- src/c++/perf_analyzer/docs/cli.md | 26 +-- .../docs/inference_load_modes.md | 36 +--- src/c++/perf_analyzer/docs/llm.md | 79 +-------- src/c++/perf_analyzer/infer_context.cc | 9 +- src/c++/perf_analyzer/infer_context.h | 15 +- src/c++/perf_analyzer/inference_profiler.h | 15 +- src/c++/perf_analyzer/perf_analyzer.cc | 14 +- .../periodic_concurrency_manager.cc | 121 ------------- .../periodic_concurrency_manager.h | 93 ---------- .../periodic_concurrency_worker.cc | 71 -------- .../periodic_concurrency_worker.h | 80 --------- .../perf_analyzer/test_command_line_parser.cc | 162 +----------------- 15 files changed, 15 insertions(+), 842 deletions(-) delete mode 100644 src/c++/perf_analyzer/periodic_concurrency_manager.cc delete mode 100644 src/c++/perf_analyzer/periodic_concurrency_manager.h delete mode 100644 src/c++/perf_analyzer/periodic_concurrency_worker.cc delete mode 100644 src/c++/perf_analyzer/periodic_concurrency_worker.h diff --git a/src/c++/perf_analyzer/CMakeLists.txt b/src/c++/perf_analyzer/CMakeLists.txt index bebdba4d5..f6cd1cb0d 100644 --- a/src/c++/perf_analyzer/CMakeLists.txt +++ b/src/c++/perf_analyzer/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -68,8 +68,6 @@ set( sequence_manager.cc profile_data_collector.cc profile_data_exporter.cc - periodic_concurrency_manager.cc - periodic_concurrency_worker.cc ) set( @@ -110,8 +108,6 @@ set( request_record.h profile_data_collector.h profile_data_exporter.h - periodic_concurrency_manager.h - periodic_concurrency_worker.h ) add_executable( diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 711f1714e..b1fe9a015 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -113,8 +113,6 @@ CLParser::Usage(const std::string& msg) std::cerr << "\t--measurement-interval (-p) " << std::endl; std::cerr << "\t--concurrency-range " << std::endl; - std::cerr << "\t--periodic-concurrency-range " << std::endl; - std::cerr << "\t--request-period " << std::endl; std::cerr << "\t--request-rate-range " << std::endl; std::cerr << "\t--request-distribution <\"poisson\"|\"constant\">" << std::endl; @@ -301,34 +299,6 @@ CLParser::Usage(const std::string& msg) "not be 0 for sequence models while using asynchronous mode.", 18) << std::endl; - std::cerr - << FormatMessage( - "--periodic-concurrency-range : Determines the " - "range of concurrency levels in the similar but slightly " - "different manner as the --concurrency-range. Perf Analyzer will " - "start from the concurrency level of 'start' and increase by " - "'step' each time. Unlike --concurrency-range, the 'end' " - "indicates the *total* number of concurrency since the 'start' " - "(including) and will stop increasing once the cumulative number " - "of concurrent requests has reached the 'end'. The user can " - "specify *when* to periodically increase the concurrency level " - "using the --request-period option. The concurrency level will " - "periodically increase for every n-th response specified by " - "--request-period. Since this disables stability check in Perf " - "Analyzer and reports response timestamps only, the user must " - "provide --profile-export-file to specify where to dump all the " - "measured timestamps. The default values of 'start', 'end', and " - "'step' are 1.", - 18) - << std::endl; - std::cerr - << FormatMessage( - "--request-period : Indicates the number of responses that " - "each request must receive before new, concurrent requests are " - "sent when --periodic-concurrency-range is specified. Default " - "value is 10.", - 18) - << std::endl; std::cerr << FormatMessage( "--request-parameter : Specifies a custom " @@ -872,9 +842,7 @@ CLParser::ParseCommandLine(int argc, char** argv) {"output-tensor-format", required_argument, 0, 56}, {"version", no_argument, 0, 57}, {"profile-export-file", required_argument, 0, 58}, - {"periodic-concurrency-range", required_argument, 0, 59}, - {"request-period", required_argument, 0, 60}, - {"request-parameter", required_argument, 0, 61}, + {"request-parameter", required_argument, 0, 59}, {0, 0, 0, 0}}; // Parse commandline... @@ -1538,56 +1506,6 @@ CLParser::ParseCommandLine(int argc, char** argv) break; } case 59: { - params_->is_using_periodic_concurrency_mode = true; - std::string arg = optarg; - std::vector values{SplitString(arg)}; - if (values.size() < 2) { - Usage( - "Failed to parse --periodic-concurrency-range. Both " - "and values must be provided."); - } else if (values.size() > 3) { - Usage( - "Failed to parse --periodic-concurrency-range. The value does " - "not match ."); - } - - for (size_t i = 0; i < values.size(); ++i) { - uint64_t val = std::stoull(values[i]); - if (i == 0) { - params_->periodic_concurrency_range.start = val; - } else if (i == 1) { - params_->periodic_concurrency_range.end = val; - } else if (i == 2) { - params_->periodic_concurrency_range.step = val; - } - } - - Range range{params_->periodic_concurrency_range}; - if (range.step == 0) { - Usage( - "Failed to parse --periodic-concurrency-range. The " - "value must be > 0."); - } else if (range.start > range.end) { - Usage( - "Failed to parse --periodic-concurrency-range. The " - "must be <= ."); - } else if ((range.end - range.start) % range.step != 0) { - Usage( - "Failed to parse --periodic-concurrency-range. The " - "value must be a factor of the range size ( - )."); - } - break; - } - case 60: { - std::string request_period{optarg}; - if (std::stoi(request_period) > 0) { - params_->request_period = std::stoull(request_period); - } else { - Usage("Failed to parse --request-period. The value must be > 0"); - } - break; - } - case 61: { std::string arg = optarg; std::vector values{SplitString(arg)}; if (values.size() != 3) { @@ -1766,46 +1684,13 @@ CLParser::VerifyOptions() } std::vector load_modes{ - params_->is_using_periodic_concurrency_mode, params_->using_concurrency_range, params_->using_request_rate_range, params_->using_custom_intervals}; if (std::count(load_modes.begin(), load_modes.end(), true) > 1) { Usage( "Cannot specify more then one inference load mode. Please choose only " "one of the following modes: --concurrency-range, " - "--periodic-concurrency-range, --request-rate-range, or " - "--request-intervals."); - } - - if (params_->is_using_periodic_concurrency_mode && !params_->streaming) { - Usage( - "The --periodic-concurrency-range option requires bi-directional gRPC " - "streaming."); - } - - if (params_->is_using_periodic_concurrency_mode && - (params_->profile_export_file == "")) { - Usage( - "Must provide --profile-export-file when using the " - "--periodic-concurrency-range option."); - } - - if (params_->is_using_periodic_concurrency_mode) { - if (params_->periodic_concurrency_range.end == pa::NO_LIMIT) { - std::cerr - << "WARNING: The maximum attainable concurrency will be limited by " - "max_threads specification." - << std::endl; - params_->periodic_concurrency_range.end = params_->max_threads; - } else { - if (params_->max_threads_specified) { - std::cerr << "WARNING: Overriding max_threads specification to ensure " - "requested concurrency range." - << std::endl; - } - params_->max_threads = std::max( - params_->max_threads, params_->periodic_concurrency_range.end); - } + "--request-rate-range, or --request-intervals."); } if (params_->request_parameters.size() > 0 && diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index 9ff4869ff..89c309a13 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -131,8 +131,7 @@ struct PerfAnalyzerParameters { { return ( using_concurrency_range || using_old_options || - !(using_request_rate_range || using_custom_intervals || - is_using_periodic_concurrency_mode)); + !(using_request_rate_range || using_custom_intervals)); } // Sets the threshold for PA client overhead. @@ -150,10 +149,6 @@ struct PerfAnalyzerParameters { // The profile export file path. std::string profile_export_file{""}; - - bool is_using_periodic_concurrency_mode{false}; - Range periodic_concurrency_range{1, 1, 1}; - uint64_t request_period{10}; }; using PAParamsPtr = std::shared_ptr; diff --git a/src/c++/perf_analyzer/docs/cli.md b/src/c++/perf_analyzer/docs/cli.md index 5961224c8..b60c02f84 100644 --- a/src/c++/perf_analyzer/docs/cli.md +++ b/src/c++/perf_analyzer/docs/cli.md @@ -1,5 +1,5 @@