diff --git a/onnxruntime/test/perftest/README.md b/onnxruntime/test/perftest/README.md index 59059cf6b62b7..4169d1bf54c65 100644 --- a/onnxruntime/test/perftest/README.md +++ b/onnxruntime/test/perftest/README.md @@ -35,6 +35,10 @@ Options: -x: [intra_op_num_threads]: Sets the number of threads used to parallelize the execution within nodes. A value of 0 means the test will auto-select a default. Must >=0. -y: [inter_op_num_threads]: Sets the number of threads used to parallelize the execution of the graph (across nodes), A value of 0 means the test will auto-select a default. Must >=0. + + -C: [session_config_entries]: Specify session configuration entries as key-value pairs: -C "| |" + Refer to onnxruntime_session_options_config_keys.h for valid keys and values. + [Example] -C "session.disable_cpu_ep_fallback|1 ep.context_enable|1" -h: help. diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 6c1d447c7b3a3..7cfbe0a84e3e6 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -6,6 +6,9 @@ #include #include +#include +#include +#include // Windows Specific #ifdef _WIN32 @@ -57,6 +60,9 @@ namespace perftest { "\t-d [CUDA only][cudnn_conv_algorithm]: Specify CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default). \n" "\t-q [CUDA only] use separate stream for copy. \n" "\t-z: Set denormal as zero. When turning on this option reduces latency dramatically, a model may have denormals.\n" + "\t-C: Specify session configuration entries as key-value pairs: -C \"| |\" \n" + "\t Refer to onnxruntime_session_options_config_keys.h for valid keys and values. \n" + "\t [Example] -C \"session.disable_cpu_ep_fallback|1 ep.context_enable|1\" \n" "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n" "\t [DML only] [performance_preference]: DML device performance preference, options: 'default', 'minimum_power', 'high_performance', \n" "\t [DML only] [device_filter]: DML device filter, options: 'any', 'gpu', 'npu', \n" @@ -149,9 +155,42 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, return true; } +static bool ParseSessionConfigs(const std::string& configs_string, + std::unordered_map& session_configs) { + std::istringstream ss(configs_string); + std::string token; + + while (ss >> token) { + if (token == "") { + continue; + } + + std::string_view token_sv(token); + + auto pos = token_sv.find("|"); + if (pos == std::string_view::npos || pos == 0 || pos == token_sv.length()) { + // Error: must use a '|' to separate the key and value for session configuration entries. + return false; + } + + std::string key(token_sv.substr(0, pos)); + std::string value(token_sv.substr(pos + 1)); + + auto it = session_configs.find(key); + if (it != session_configs.end()) { + // Error: specified duplicate session configuration entry: {key} + return false; + } + + session_configs.insert(std::make_pair(std::move(key), std::move(value))); + } + + return true; +} + /*static*/ bool CommandLineParser::ParseArguments(PerformanceTestConfig& test_config, int argc, ORTCHAR_T* argv[]) { int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("b:m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:AMPIDZvhsqz"))) != -1) { + while ((ch = getopt(argc, argv, ORT_TSTR("b:m:e:r:t:p:x:y:c:d:o:u:i:f:F:S:T:C:AMPIDZvhsqz"))) != -1) { switch (ch) { case 'f': { std::basic_string dim_name; @@ -322,6 +361,12 @@ static bool ParseDimensionOverride(std::basic_string& dim_identifier, case 'T': test_config.run_config.intra_op_thread_affinities = ToUTF8String(optarg); break; + case 'C': { + if (!ParseSessionConfigs(ToUTF8String(optarg), test_config.run_config.session_config_entries)) { + return false; + } + break; + } case 'D': test_config.run_config.disable_spinning = true; break; diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 6854a2649060a..87506c7240578 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -634,22 +634,41 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); session_options.DisableMemPattern(); session_options.SetExecutionMode(performance_test_config.run_config.execution_mode); + // Set any extra session configuration entries provided by the user via command-line arguments. + // + // Some session config entries can also be set via dedicated command-line options. + // If the user uses multiple command-line options to set the same session config entry, + // we'll print a warning. Note that the dedicated command-line options will take precedence. + const auto& user_session_configs = performance_test_config.run_config.session_config_entries; + for (auto& it : user_session_configs) { + session_options.AddConfigEntry(it.first.c_str(), it.second.c_str()); + } + + auto warn_dup_config_entry = [&user_session_configs](const char* key) -> void { + if (user_session_configs.find(key) != user_session_configs.end()) { + fprintf(stderr, "[WARNING]: Trying to set session config entry '%s' via multiple command-line options\n", key); + } + }; + if (performance_test_config.run_config.intra_op_num_threads > 0) { fprintf(stdout, "Setting intra_op_num_threads to %d\n", performance_test_config.run_config.intra_op_num_threads); session_options.SetIntraOpNumThreads(performance_test_config.run_config.intra_op_num_threads); } if (!performance_test_config.run_config.intra_op_thread_affinities.empty()) { + warn_dup_config_entry(kOrtSessionOptionsConfigIntraOpThreadAffinities); fprintf(stdout, "Setting intra op thread affinity as %s\n", performance_test_config.run_config.intra_op_thread_affinities.c_str()); session_options.AddConfigEntry(kOrtSessionOptionsConfigIntraOpThreadAffinities, performance_test_config.run_config.intra_op_thread_affinities.c_str()); } if (performance_test_config.run_config.disable_spinning) { + warn_dup_config_entry(kOrtSessionOptionsConfigAllowIntraOpSpinning); fprintf(stdout, "Disabling intra-op thread spinning entirely\n"); session_options.AddConfigEntry(kOrtSessionOptionsConfigAllowIntraOpSpinning, "0"); } if (performance_test_config.run_config.disable_spinning_between_run) { + warn_dup_config_entry(kOrtSessionOptionsConfigForceSpinningStop); fprintf(stdout, "Disabling intra-op thread spinning between runs\n"); session_options.AddConfigEntry(kOrtSessionOptionsConfigForceSpinningStop, "1"); } @@ -661,12 +680,16 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); // Set optimization level. session_options.SetGraphOptimizationLevel(performance_test_config.run_config.optimization_level); - if (!performance_test_config.run_config.profile_file.empty()) + if (!performance_test_config.run_config.profile_file.empty()) { session_options.EnableProfiling(performance_test_config.run_config.profile_file.c_str()); - if (!performance_test_config.run_config.optimized_model_path.empty()) + } + if (!performance_test_config.run_config.optimized_model_path.empty()) { session_options.SetOptimizedModelFilePath(performance_test_config.run_config.optimized_model_path.c_str()); - if (performance_test_config.run_config.set_denormal_as_zero) + } + if (performance_test_config.run_config.set_denormal_as_zero) { + warn_dup_config_entry(kOrtSessionOptionsConfigSetDenormalAsZero); session_options.AddConfigEntry(kOrtSessionOptionsConfigSetDenormalAsZero, "1"); + } if (!performance_test_config.run_config.free_dim_name_overrides.empty()) { for (auto const& dim_override : performance_test_config.run_config.free_dim_name_overrides) { if (g_ort->AddFreeDimensionOverrideByName(session_options, ToUTF8String(dim_override.first).c_str(), dim_override.second) != nullptr) { diff --git a/onnxruntime/test/perftest/test_configuration.h b/onnxruntime/test/perftest/test_configuration.h index 43ad556247f97..5a49414a49004 100644 --- a/onnxruntime/test/perftest/test_configuration.h +++ b/onnxruntime/test/perftest/test_configuration.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "core/graph/constants.h" #include "core/framework/session_options.h" @@ -56,6 +57,7 @@ struct RunConfig { bool do_cuda_copy_in_separate_stream{false}; bool set_denormal_as_zero{false}; std::basic_string ep_runtime_config_string; + std::unordered_map session_config_entries; std::map, int64_t> free_dim_name_overrides; std::map, int64_t> free_dim_denotation_overrides; std::string intra_op_thread_affinities;