From df740d7d152dfd04d29f4310a02d41b68f010b37 Mon Sep 17 00:00:00 2001
From: Scott McKay <skottmckay@gmail.com>
Date: Wed, 3 Jan 2024 07:57:51 +1000
Subject: [PATCH] Throw if unique_ptr or array allocation fails due to SafeInt
 overflow (#18941)

### Description
<!-- Describe your changes. -->
If we fail to calculate the buffer size (due to overflow) we currently
return a nullptr. This is inconsistent as an actual memory allocation
failure throws. An overflow would typically be due to bad input so an
exception makes more sense given that.

Change to throw so code using MakeUniquePtr* and AllocArray* doesn't
need to check for nullptr.

Add some extra info to the log message to help debugging.

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
Should help with #18905 by avoiding the invalid attempted usage of a
nullptr from the allocation. Extra info _might_ help with figuring out
where the overflow is coming from which is the real issue.
---
 .../onnxruntime/core/framework/allocator.h    | 99 ++++++++++++++-----
 .../cpu/quantization/matmul_nbits.cc          |  3 -
 onnxruntime/core/framework/allocator.cc       |  2 +-
 onnxruntime/core/framework/sparse_tensor.cc   |  3 +-
 4 files changed, 75 insertions(+), 32 deletions(-)
diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h
index cbc2208b6bbd7..9015b23296e08 100644
--- a/include/onnxruntime/core/framework/allocator.h
+++ b/include/onnxruntime/core/framework/allocator.h
@@ -3,12 +3,14 @@
 
 #pragma once
 
+#include <map>
+
 #include "core/common/common.h"
 #include "core/framework/allocator_stats.h"
+// some enums are defined in session/onnxruntime_c_api.h but used in ortdevice.h/ortmemory.h
 #include "core/session/onnxruntime_c_api.h"
-#include "ortdevice.h"
-#include "ortmemoryinfo.h"
-#include <map>
+#include "core/framework/ortdevice.h"
+#include "core/framework/ortmemoryinfo.h"
 
 // This configures the arena based allocator used by ORT
 // See docs/C_API.md for details on what these mean and how to choose these values
@@ -68,8 +70,12 @@ class IAllocator {
   IAllocator(const OrtMemoryInfo& info) : memory_info_(info) {}
   virtual ~IAllocator() = default;
   /**
-  @remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
-  */
+   * Allocate memory of the specified size.
+   * If size is 0, nullptr is returned.
+   * If allocation fails, an exception is thrown.
+   *
+   * @remarks Use SafeInt when calculating the size of memory to allocate using Alloc.
+   */
   virtual void* Alloc(size_t size) = 0;
 
   virtual void Free(void* p) = 0;
@@ -100,7 +106,8 @@ class IAllocator {
    * \param out Total size required after any alignment is applied
    * \return true, successful. false, overflow
    */
-  [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment, size_t* out) noexcept;
+  [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment,
+                                                             size_t* out) noexcept;
 
   /**
    * https://cwe.mitre.org/data/definitions/190.html
@@ -120,8 +127,10 @@ class IAllocator {
    */
   void* AllocArray(size_t nmemb, size_t size) {
     size_t len;
-    if (!CalcMemSizeForArray(nmemb, size, &len))
-      return nullptr;
+    if (!CalcMemSizeForArray(nmemb, size, &len)) {
+      ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size);
+    }
+
     return Alloc(len);
   }
 
@@ -131,8 +140,10 @@ class IAllocator {
   template <size_t alignment>
   void* AllocArrayWithAlignment(size_t nmemb, size_t size) {
     size_t len;
-    if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len))
-      return nullptr;
+    if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len)) {
+      ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size, " with alignment ", alignment);
+    }
+
     return Alloc(len);
   }
 
@@ -144,13 +155,14 @@ class IAllocator {
      @param stream Which stream instance allocated chunk will be used with.
      @param wait_fn If the allocator want to dynamic reuse a chunk from another stream, use this wait_fn to sync on
                     the target stream to make the reuse safe.
-     @returns std::unique_ptr with allocated memory and deleter.
+     @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory.
   */
   template <typename T>
   static IAllocatorUniquePtr<T> MakeUniquePtr(std::shared_ptr<IAllocator> allocator, size_t count_or_bytes,
                                               bool use_reserve = false,
                                               Stream* stream = nullptr, WaitNotificationFn wait_fn = nullptr) {
-    if (allocator == nullptr) return nullptr;
+    ValidateAllocator(allocator);
+
     // for now limit to fundamental types. we could support others, but to do so either we or the caller
     // needs to call the dtor for the objects, for buffers allocated on device we don't have destructor
     // static_assert(std::is_fundamental<T>::value, "Fundamental type required as no destructors are called.");
@@ -161,38 +173,73 @@ class IAllocator {
     if constexpr (!std::is_void<T>::value) {
       // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
       // reachable if T is void. use std::conditional to 'use' void* in the sizeof call
-      if (!CalcMemSizeForArray(
-              count_or_bytes, sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type), &alloc_size)) {
-        return nullptr;
-      }
+      constexpr auto size = sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type);
+      alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size);
     }
 
     // allocate
     T* p = static_cast<T*>(AllocateBufferWithOptions(*allocator, alloc_size, use_reserve, stream, std::move(wait_fn)));
-    return IAllocatorUniquePtr<T>{
-        p,
-        [allocator = std::move(allocator)](T* p) { allocator->Free(p); }};
+    ValidateAllocation(p, alloc_size);
+
+    return IAllocatorUniquePtr<T>{p,
+                                  [allocator = std::move(allocator)](T* p) {
+                                    allocator->Free(p);
+                                  }};
   }
 
+  /**
+     Create a std::unique_ptr that is allocated and freed by the provided OrtAllocator.
+     @param ort_allocator The allocator.
+     @param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate.
+     @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory.
+  */
   template <typename T>
   static IAllocatorUniquePtr<T> MakeUniquePtrFromOrtAllocator(OrtAllocator* ort_allocator, size_t count_or_bytes) {
-    if (!ort_allocator) return nullptr;
+    ValidateAllocator(ort_allocator);
 
     size_t alloc_size = count_or_bytes;
     // if T is not void, 'count_or_bytes' == number of items so allow for that
     if constexpr (!std::is_void<T>::value) {
       // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't
       // reachable if T is void. use std::conditional to 'use' void* in the sizeof call
-      if (!CalcMemSizeForArray(
-              count_or_bytes, sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type), &alloc_size)) {
-        return nullptr;
-      }
+      constexpr auto size = sizeof(typename std::conditional<std::is_void<T>::value, void*, T>::type);
+      alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size);
     }
-    T* p = static_cast<T*>(ort_allocator->Alloc(ort_allocator, count_or_bytes));
-    return IAllocatorUniquePtr<T>{p, [ort_allocator](T* p) { ort_allocator->Free(ort_allocator, p); }};
+
+    T* p = static_cast<T*>(ort_allocator->Alloc(ort_allocator, alloc_size));
+    ValidateAllocation(p, alloc_size);
+
+    return IAllocatorUniquePtr<T>{p,
+                                  [ort_allocator](T* p) {
+                                    ort_allocator->Free(ort_allocator, p);
+                                  }};
   }
 
  private:
+  //
+  // validation functions. split out from methods that are templatized on the data type to minimize binary size.
+  //
+
+  template <typename T>
+  static void ValidateAllocator(const T& allocator) {
+    ORT_ENFORCE(allocator != nullptr);
+  }
+
+  static size_t ValidatedCalcMemSizeForArray(size_t count, size_t size) {
+    size_t alloc_size = 0;
+    if (!CalcMemSizeForArray(count, size, &alloc_size)) {
+      ORT_THROW("Invalid size requested for allocation: ", count, " * ", size);
+    }
+
+    return alloc_size;
+  }
+
+  static void ValidateAllocation(void* p, size_t size) {
+    // allocator should throw directly but in case it didn't ensure we do here so that calling code doesn't
+    // need to check for nullptr when an actual allocation was expected.
+    ORT_ENFORCE(p != nullptr || size == 0, "Memory allocation failed. Size=", size);
+  };
+
   OrtMemoryInfo memory_info_;
 };
 
diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
index b060d500c6484..a9703dc68dd26 100644
--- a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
+++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
@@ -71,9 +71,6 @@ Status MatMulNBits::PrePack(const Tensor& tensor, int input_idx, /*out*/ Allocat
     if (packed_b_size_ == 0) return Status::OK();
     auto qptr = tensor.Data<uint8_t>();
     packed_b_ = IAllocator::MakeUniquePtr<void>(alloc, packed_b_size_, true);
-    if (packed_b_ == nullptr) {
-      return Status::OK();
-    }
     std::memset(packed_b_.get(), 0, packed_b_size_);
     MlasNBitsGemmPackB(packed_b_.get(), qptr, nullptr, nullptr, N_, K_, K_, block_size_, static_cast<int>(nbits_),
                        is_asym_, false, compt_type, pool);
diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc
index 2499ead9effbd..c3e96e450c59b 100644
--- a/onnxruntime/core/framework/allocator.cc
+++ b/onnxruntime/core/framework/allocator.cc
@@ -33,7 +33,7 @@ bool IAllocator::CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, siz
   ORT_CATCH(const OnnxRuntimeException& ex) {
     // overflow in calculating the size thrown by SafeInt.
     ORT_HANDLE_EXCEPTION([&]() {
-      LOGS_DEFAULT(ERROR) << ex.what();
+      LOGS_DEFAULT(ERROR) << ex.what() << " nmemb=" << nmemb << " size=" << size << " alignment=" << alignment;
       ok = false;
     });
   }
diff --git a/onnxruntime/core/framework/sparse_tensor.cc b/onnxruntime/core/framework/sparse_tensor.cc
index 5af2f4e4b543f..a3bcea4762d3e 100644
--- a/onnxruntime/core/framework/sparse_tensor.cc
+++ b/onnxruntime/core/framework/sparse_tensor.cc
@@ -220,7 +220,6 @@ Status SparseTensor::AllocateBuffer(int64_t buffer_size, size_t num_values) {
     ORT_RETURN_IF_NOT(buffer_size_t > values_bytes,
                       "Values size ", static_cast<size_t>(values_bytes), " must be less than total buffer size: ", buffer_size);
     auto data_ptr = IAllocator::MakeUniquePtr<void>(allocator_, buffer_size_t);
-    ORT_RETURN_IF(data_ptr == nullptr, "SparseTensor Allocation failed for size: ", buffer_size);
     if (IsDataTypeString()) {
       // We own the buffer, so we must properly construct strings. Neither of the Tensors
       // we construct on top of the buffer own it. We are constructing empty strings, hopefully
@@ -592,4 +591,4 @@ Status SparseTensor::Copy(const IDataTransfer& data_transfer, SparseTensor& dst_
 
 }  // namespace onnxruntime
 
-#endif  // !defined(DISABLE_SPARSE_TENSORS)
\ No newline at end of file
+#endif  // !defined(DISABLE_SPARSE_TENSORS)