microsoft · jywu-msft · Aug 28, 2024 · Aug 17, 2024 · Aug 17, 2024 · Aug 17, 2024
diff --git a/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h
@@ -49,3 +49,8 @@
 // If the value is set to -1, cuda graph capture/replay is disabled in that run.
 // User are not expected to set the value to 0 as it is reserved for internal use.
 static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";
+
+// Specify the type of workload for this run.
+// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
+// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
+static const char* const kOrtRunOptionsWorkloadType = "run.workload_type";
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -279,3 +279,8 @@
 // Refer to MatMulNBits op schema for more details.
 // If not provided, default is 4.
 static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
+
+// Specify the type of workload for this session.
+// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
+// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
+static const char* const kOrtSessionOptionsWorkloadType = "session.workload_type";