From 6fa0c00079b62198aef51b8226460e752d12cb31 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Thu, 19 Dec 2024 13:47:13 -0800
Subject: [PATCH] Enable Ort::Value to be a resisable vector

---
 .../core/platform/EigenNonBlockingThreadPool.h       | 12 +++++++-----
 .../onnxruntime/core/session/onnxruntime_cxx_api.h   |  4 ++--
 onnxruntime/core/providers/cuda/cu_inc/common.cuh    |  2 +-
 onnxruntime/test/shared_lib/test_nontensor_types.cc  |  5 +++++
 4 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
index 7f0046d137a64..696628b872e8f 100644
--- a/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
+++ b/include/onnxruntime/core/platform/EigenNonBlockingThreadPool.h
@@ -212,18 +212,18 @@ class ThreadPoolProfiler {
     WAIT_REVOKE,
     MAX_EVENT
   };
-  ThreadPoolProfiler(int, const CHAR_TYPE*){};
+  ThreadPoolProfiler(int, const CHAR_TYPE*) {};
   ~ThreadPoolProfiler() = default;
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(ThreadPoolProfiler);
-  void Start(){};
+  void Start() {};
   std::string Stop() { return "not available for minimal build"; }
-  void LogStart(){};
+  void LogStart() {};
   void LogEnd(ThreadPoolEvent){};
   void LogEndAndStart(ThreadPoolEvent){};
   void LogStartAndCoreAndBlock(std::ptrdiff_t){};
   void LogCoreAndBlock(std::ptrdiff_t){};
-  void LogThreadId(int){};
-  void LogRun(int){};
+  void LogThreadId(int) {};
+  void LogRun(int) {};
   std::string DumpChildThreadStat() { return {}; }
 };
 #else
@@ -634,9 +634,11 @@ class RunQueue {
   // position, these conditions would be indistinguishable); (2) obtain
   // consistent snapshot of front_/back_ for Size operation using the
   // modification counters.
+#pragma warning(disable : 4324)
   ORT_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned> front_;
   ORT_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned> back_;
   ORT_ALIGN_TO_AVOID_FALSE_SHARING Elem array_[kSize];
+#pragma warning(default : 4324)
 
   // SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false,
   // only whether the size is 0 is guaranteed to be correct.
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 2d5e1a9bddeec..e56133c43911e 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -1294,11 +1294,11 @@ using UnownedValue = detail::ValueImpl<detail::Unowned<OrtValue>>;
  */
 struct Value : detail::ValueImpl<OrtValue> {
   using Base = detail::ValueImpl<OrtValue>;
+  using Base::Base;
   using OrtSparseValuesParam = detail::OrtSparseValuesParam;
   using Shape = detail::Shape;
 
-  explicit Value(std::nullptr_t) {}         ///< Create an empty Value object, must be assigned a valid one to be used
-  explicit Value(OrtValue* p) : Base{p} {}  ///< Used for interop with the C API
+  explicit Value(std::nullptr_t) {}  ///< Create an empty Value object, must be assigned a valid one to be used
   Value(Value&&) = default;
   Value& operator=(Value&&) = default;
 
diff --git a/onnxruntime/core/providers/cuda/cu_inc/common.cuh b/onnxruntime/core/providers/cuda/cu_inc/common.cuh
index 1d3e43f386c1b..eff55e531d985 100644
--- a/onnxruntime/core/providers/cuda/cu_inc/common.cuh
+++ b/onnxruntime/core/providers/cuda/cu_inc/common.cuh
@@ -19,7 +19,7 @@ namespace onnxruntime {
 namespace cuda {
 
 // float16 arithmetic is supported after sm5.3 with intrinsics, and cuda does not provide fallback for lower versions
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 530
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530) && ((__CUDACC_VER_MAJOR__ < 12) || ((__CUDACC_VER_MAJOR__ == 12) && (__CUDACC_VER_MINOR__ < 2)))
 __device__ __forceinline__ half operator+(const half& lh, const half& rh) { return half((float)lh + (float)rh); }
 __device__ __forceinline__ half operator-(const half& lh, const half& rh) { return half((float)lh - (float)rh); }
 __device__ __forceinline__ half operator*(const half& lh, const half& rh) { return half((float)lh * (float)rh); }
diff --git a/onnxruntime/test/shared_lib/test_nontensor_types.cc b/onnxruntime/test/shared_lib/test_nontensor_types.cc
index a431c42bb9294..8bba54494ac1b 100644
--- a/onnxruntime/test/shared_lib/test_nontensor_types.cc
+++ b/onnxruntime/test/shared_lib/test_nontensor_types.cc
@@ -678,6 +678,11 @@ TEST(CApiTest, SparseTensorFillSparseTensorFormatAPI) {
   }
 }
 
+TEST(CApi, TestResize) {
+  std::vector<Ort::Value> values;
+  values.resize(10);
+}
+
 TEST(CApiTest, SparseTensorFillSparseFormatStringsAPI) {
   auto allocator = Ort::AllocatorWithDefaultOptions();
   Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);