Rename to Model Builder API.

Cleanups All works.
microsoft · Dec 23, 2024 · 7ce7e8f · 7ce7e8f
1 parent 0f213ef
commit 7ce7e8f
Show file tree

Hide file tree

Showing 14 changed files with 282 additions and 146 deletions.
diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
@@ -763,6 +763,10 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   */
   bool GetInitializedTensor(const std::string& tensor_name, const ONNX_NAMESPACE::TensorProto*& value) const;
 
+  /** Populate `value` if an externally allocated OrtValue exists for an initializer with the given name.
+   */
+  bool GetOrtValueInitializer(const std::string& name, OrtValue& value) const;
+
   /** Gets all the initializer tensors in this Graph. */
   const InitializedTensorSet& GetAllInitializedTensors() const noexcept { return name_to_initial_tensor_; }
 
@@ -1449,15 +1453,15 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
                                   const OrtFormatLoadOptions& load_options,
                                   const logging::Logger& logger, std::unique_ptr<Graph>& graph);
 
-  static Status LoadFromGraphApiModel(const OrtGraph& api_graph,
-                                      const Model& owning_model,
-                                      const std::unordered_map<std::string, int>& domain_to_version,
-                                      IOnnxRuntimeOpSchemaCollectionPtr schema_registry,
-                                      bool strict_shape_type_inference,
-                                      const logging::Logger& logger,
-                                      std::unique_ptr<Graph>& graph);
+  static Status LoadFromModelBuilderApiModel(const OrtGraph& api_graph,
+                                             const Model& owning_model,
+                                             const std::unordered_map<std::string, int>& domain_to_version,
+                                             IOnnxRuntimeOpSchemaCollectionPtr schema_registry,
+                                             bool strict_shape_type_inference,
+                                             const logging::Logger& logger,
+                                             std::unique_ptr<Graph>& graph);
 
-  Status UpdateUsingGraphApiModel(const OrtModel& api_model);
+  Status UpdateUsingModelBuilderApiModel(const OrtModel& api_model);
 
 #if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
   const RuntimeOptimizationRecordContainer& RuntimeOptimizations() const {
@@ -1699,7 +1703,7 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
     return nodes_[node_index].get();
   }
 
-  Status LoadFromGraphApiModel(const OrtGraph& api_graph, bool updating_existing_graph = false);
+  Status LoadFromModelBuilderApiModel(const OrtGraph& api_graph, bool updating_existing_graph = false);
 
   const Model& owning_model_;
 
@@ -1715,6 +1719,11 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
 
   InitializedTensorSet name_to_initial_tensor_;
 
+  // Initializers that are external to the Graph. e.g. created using Model Builder API from existing memory.
+  // As we need to convert to TensorProto for the optimizers to work and keep the deleter information we store them
+  // in the Graph instance and retrieve during session state finalization.
+  std::unordered_map<std::string, OrtValue> ortvalue_initializers_;
+
   std::unordered_set<std::reference_wrapper<const std::string>,
                      std::hash<std::string>, std::equal_to<std::string>>
       sparse_tensor_names_;
@@ -1736,6 +1745,7 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   // in some case, a fused sub-graph will happens multiple times in one model, we use a map
   // to store reusable-schema in lookup.
   InlinedHashMap<std::string, std::reference_wrapper<ONNX_NAMESPACE::OpSchema>> reusable_fused_schema_map_;
+
 #endif  // !defined(ORT_MINIMAL_BUILD)
 
   // Graph nodes.

diff --git a/include/onnxruntime/core/graph/graph_viewer.h b/include/onnxruntime/core/graph/graph_viewer.h
@@ -193,6 +193,12 @@ class GraphViewer {
   IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const { return graph_->GetSchemaRegistry(); }
 #endif
 
+  /** Populate `value` if an externally allocated OrtValue exists for an initializer with the given name.
+   */
+  bool GetOrtValueInitializer(const std::string& name, OrtValue& value) const {
+    return graph_->GetOrtValueInitializer(name, value);
+  }
+
  private:
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(GraphViewer);
   GraphViewer(const Graph& graph, const IndexedSubGraph* filter_info);

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -5160,22 +5160,23 @@ struct OrtModelBuilderApi {
    *    Use CreateTensorWithDataAsOrtValue or CreateTensorWithDataAndDeleterAsOrtValue to create an OrtValue
    *    with a tensor that contains a pointer to the existing data.
    *    User must keep pointer valid for lifetime of the inference session.
+   *    Set `data_is_external` to true.
    *
    * Allocated memory:
    *    Use CreateTensorAsOrtValue (allocates memory) and populate the tensor with the data.
-   *    ORT will own the memory.
-   *
-   *
+   *    Set `data_is_external` to false.
    *
    * \param[in] graph The OrtGraph instance to update.
    * \param[in] name The value name for the initializer.
    * \param[in] tensor The OrtValue instance containing the tensor data.
+   * \param[in] data_is_external Set to true if the data is external and should not be copied.
    *
    * \snippet{doc} snippets.dox OrtStatus Return Value
    *
    * \since Version 1.21.
    */
-  ORT_API2_STATUS(AddInitializerToGraph, _In_ OrtGraph* graph, _In_ const char* name, _Inout_ OrtValue* tensor);
+  ORT_API2_STATUS(AddInitializerToGraph, _In_ OrtGraph* graph, _In_ const char* name, _Inout_ OrtValue* tensor,
+                  bool data_is_external);
 
   /** \brief Add an OrtNode to an OrtGraph
    *

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -1785,6 +1785,19 @@ struct Value : detail::ValueImpl<OrtValue> {
                             const int64_t* shape, size_t shape_len,
                             ONNXTensorElementDataType type);
 
+  /** \brief Creates a tensor with a user supplied buffer. Wraps OrtApi::CreateTensorWithDataAndDeleterAsOrtValue.
+   *
+   * \param deleter OrtAllocator that will be used to free the buffer when no longer required.
+   * \param p_data Pointer to the data buffer.
+   * \param p_data_byte_count The number of bytes in the data buffer.
+   * \param shape Pointer to the tensor shape dimensions.
+   * \param shape_len The number of tensor shape dimensions.
+   * \param type The data type.
+   */
+  static Value CreateTensor(OrtAllocator* deleter, void* p_data, size_t p_data_byte_count,
+                            const int64_t* shape, size_t shape_len,
+                            ONNXTensorElementDataType type);
+
   /** \brief Creates an OrtValue with a tensor using a supplied OrtAllocator. Wraps OrtApi::CreateTensorAsOrtValue.
    *         This overload will allocate the buffer for the tensor according to the supplied shape and data type.
    *         The allocated buffer will be owned by the returned OrtValue and will be freed when the OrtValue is released.
@@ -1810,7 +1823,8 @@ struct Value : detail::ValueImpl<OrtValue> {
    * \param shape_len The number of tensor shape dimensions.
    * \param type The data type.
    */
-  static Value CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type);
+  static Value CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len,
+                            ONNXTensorElementDataType type);
 
   /** \brief Creates an OrtValue with a Map Onnx type representation.
    *  The API would ref-count the supplied OrtValues and they will be released
@@ -2542,9 +2556,6 @@ struct ValueInfoImpl : Ort::detail::Base<T> {
 
   std::string Name() const;
   ConstTypeInfo TypeInfo() const;
-
-  template <typename U>
-  bool operator==(const ValueInfoImpl<U>& o) const;
 };
 }  // namespace detail
 
@@ -2570,9 +2581,6 @@ template <typename T>
 struct NodeImpl : Ort::detail::Base<T> {
   using B = Ort::detail::Base<T>;
   using B::B;
-
-  template <typename U>
-  bool operator==(const NodeImpl<U>& o) const;
 };
 }  // namespace detail
 
@@ -2619,11 +2627,8 @@ struct GraphImpl : Ort::detail::Base<T> {
 
   void SetInputs(std::vector<ValueInfo>& inputs);
   void SetOutputs(std::vector<ValueInfo>& outputs);
-  void AddInitializer(const std::string& name, Value& initializer);  // Graph takes ownership of Value
-  void AddNode(Node& node);                                          // Graph takes ownership of Node
-
-  template <typename U>
-  bool operator==(const GraphImpl<U>& o) const;
+  void AddInitializer(const std::string& name, Value& initializer, bool data_is_external);  // Graph takes ownership of Value
+  void AddNode(Node& node);                                                                 // Graph takes ownership of Node
 };
 }  // namespace detail
 
@@ -2648,9 +2653,6 @@ struct ModelImpl : Ort::detail::Base<T> {
   using B::B;
 
   void AddGraph(Graph& graph);
-
-  template <typename U>
-  bool operator==(const ModelImpl<U>& o) const;
 };
 }  // namespace detail
 

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -1707,23 +1707,35 @@ void ValueImpl<T>::FillSparseTensorBlockSparse(const OrtMemoryInfo* data_mem_inf
 }  // namespace detail
 
 template <typename T>
-inline Value Value::CreateTensor(const OrtMemoryInfo* info, T* p_data, size_t p_data_element_count, const int64_t* shape, size_t shape_len) {
+inline Value Value::CreateTensor(const OrtMemoryInfo* info, T* p_data, size_t p_data_element_count,
+                                 const int64_t* shape, size_t shape_len) {
   return CreateTensor(info, p_data, p_data_element_count * sizeof(T), shape, shape_len, TypeToTensorType<T>::type);
 }
 
-inline Value Value::CreateTensor(const OrtMemoryInfo* info, void* p_data, size_t p_data_byte_count, const int64_t* shape, size_t shape_len,
+inline Value Value::CreateTensor(const OrtMemoryInfo* info, void* p_data, size_t p_data_byte_count,
+                                 const int64_t* shape, size_t shape_len,
                                  ONNXTensorElementDataType type) {
   OrtValue* out;
   ThrowOnError(GetApi().CreateTensorWithDataAsOrtValue(info, p_data, p_data_byte_count, shape, shape_len, type, &out));
   return Value{out};
 }
 
+inline Value Value::CreateTensor(OrtAllocator* deleter, void* p_data, size_t p_data_byte_count,
+                                 const int64_t* shape, size_t shape_len,
+                                 ONNXTensorElementDataType type) {
+  OrtValue* out;
+  ThrowOnError(GetApi().CreateTensorWithDataAndDeleterAsOrtValue(deleter, p_data, p_data_byte_count,
+                                                                 shape, shape_len, type, &out));
+  return Value{out};
+}
+
 template <typename T>
 inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len) {
   return CreateTensor(allocator, shape, shape_len, TypeToTensorType<T>::type);
 }
 
-inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len, ONNXTensorElementDataType type) {
+inline Value Value::CreateTensor(OrtAllocator* allocator, const int64_t* shape, size_t shape_len,
+                                 ONNXTensorElementDataType type) {
   OrtValue* out;
   ThrowOnError(GetApi().CreateTensorAsOrtValue(allocator, shape, shape_len, type, &out));
   return Value{out};
@@ -1741,7 +1753,8 @@ inline Value Value::CreateSparseTensor(const OrtMemoryInfo* info, void* p_data,
                                        const Shape& values_shape, ONNXTensorElementDataType type) {
   OrtValue* out;
   ThrowOnError(GetApi().CreateSparseTensorWithValuesAsOrtValue(info, p_data, dense_shape.shape, dense_shape.shape_len,
-                                                               values_shape.shape, values_shape.shape_len, type, &out));
+                                                               values_shape.shape, values_shape.shape_len, type,
+                                                               &out));
   return Value{out};
 }
 
@@ -2425,9 +2438,9 @@ inline void GraphImpl<OrtGraph>::SetOutputs(std::vector<ValueInfo>& outputs) {
   std::for_each(outputs.begin(), outputs.end(), [](ValueInfo& vi) { vi.release(); });
 }
 
-inline void GraphImpl<OrtGraph>::AddInitializer(const std::string& name, Value& initializer) {
+inline void GraphImpl<OrtGraph>::AddInitializer(const std::string& name, Value& initializer, bool data_is_external) {
   // Graph takes ownership of `initializer`
-  ThrowOnError(GetModelBuilderApi().AddInitializerToGraph(p_, name.c_str(), initializer.release()));
+  ThrowOnError(GetModelBuilderApi().AddInitializerToGraph(p_, name.c_str(), initializer.release(), data_is_external));
 }
 
 inline void GraphImpl<OrtGraph>::AddNode(Node& node) {

diff --git a/onnxruntime/core/framework/session_state_utils.cc b/onnxruntime/core/framework/session_state_utils.cc
@@ -200,13 +200,12 @@ static common::Status DeserializeTensorProto(const Env& env, const std::basic_st
   }
 }
 
-common::Status AllocateTensor(
-    const onnxruntime::MemBuffer* m,
-    std::unique_ptr<onnxruntime::Tensor>& p_tensor,
-    const onnxruntime::DataTypeImpl* const& type,
-    onnxruntime::TensorShape& tensor_shape,
-    bool use_device_allocator_for_initializers,
-    const onnxruntime::AllocatorPtr& alloc) {
+common::Status AllocateTensor(const onnxruntime::MemBuffer* m,
+                              std::unique_ptr<onnxruntime::Tensor>& p_tensor,
+                              const onnxruntime::DataTypeImpl* const& type,
+                              onnxruntime::TensorShape& tensor_shape,
+                              bool use_device_allocator_for_initializers,
+                              const onnxruntime::AllocatorPtr& alloc) {
   if (m != nullptr) {
     p_tensor = std::make_unique<Tensor>(type, tensor_shape, m->GetBuffer(), m->GetAllocInfo());
     if (m->GetLen() < p_tensor->SizeInBytes()) {
@@ -350,6 +349,7 @@ common::Status SaveInitializedTensors(
     }
     ORT_RETURN_IF_ERROR(planner.Trace(entry.first, entry.second));
   }
+
   // 2. allocate weight buffer on different locations
   //  planned_initializers_memory_size_in_byte is not actual physical size.
   //  It's the virtual size computed by planner.
@@ -382,6 +382,9 @@ common::Status SaveInitializedTensors(
     if (user_supplied_initializer_ids.find(entry.first) != user_supplied_initializer_ids.end()) {
       ort_value = *(session_options.initializers_to_share_map.at(name));
       LOGS(logger, INFO) << "Using user supplied initializer with name (" << name << ").";
+
+    } else if (graph.GetOrtValueInitializer(name, ort_value)) {
+      // populated OrtValue from the Graph instance
     } else {
       const ONNX_NAMESPACE::TensorProto& tensor_proto = *(entry.second);