From dd21ba4cc8e579638c05a0f5664852d8069cb35e Mon Sep 17 00:00:00 2001 From: AlbertGuan9527 <87043564+AlbertGuan9527@users.noreply.github.com> Date: Fri, 16 Aug 2024 20:59:37 -0700 Subject: [PATCH 1/3] Add run option workload_type --- .../core/session/onnxruntime_run_options_config_keys.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h index c80b8c0c164b6..9942f8c656760 100644 --- a/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h @@ -49,3 +49,8 @@ static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_con // If the value is set to -1, cuda graph capture/replay is disabled in that run. // User are not expected to set the value to 0 as it is reserved for internal use. static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id"; + +// Specify the type of workload for this run. +// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default] +// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance. +static const char* const kOrtRunOptionsWorkloadType = "run.workload_type"; From 787dd7824ced9dec345211f0e559c36fad4e9ae3 Mon Sep 17 00:00:00 2001 From: AlbertGuan9527 <87043564+AlbertGuan9527@users.noreply.github.com> Date: Fri, 16 Aug 2024 21:00:26 -0700 Subject: [PATCH 2/3] Add session option workload_type --- .../core/session/onnxruntime_session_options_config_keys.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index 209fd4279cc99..ecd0c43cdaa64 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -279,3 +279,9 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas // Refer to MatMulNBits op schema for more details. // If not provided, default is 4. static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level"; + +// Specify the type of workload for this session. +// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default] +// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance. +static const char* const kOrtSessionOptionsWorkloadType = "session.workload_type"; + From 6ea5bc3fca0f447ced4a4d5734668568b4d0ff7b Mon Sep 17 00:00:00 2001 From: AlbertGuan9527 <87043564+AlbertGuan9527@users.noreply.github.com> Date: Fri, 16 Aug 2024 21:25:39 -0700 Subject: [PATCH 3/3] Fix format --- .../core/session/onnxruntime_session_options_config_keys.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h index ecd0c43cdaa64..02dd622f42e88 100644 --- a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h +++ b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -284,4 +284,3 @@ static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "sessio // “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default] // “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance. static const char* const kOrtSessionOptionsWorkloadType = "session.workload_type"; -