From 0584f09fec98766c54a53391866cfc2e5b0258a6 Mon Sep 17 00:00:00 2001 From: Dmitri Smirnov Date: Wed, 4 Dec 2024 16:49:04 -0800 Subject: [PATCH] Finished serialization. Certain kernels do not share their pre-packed weights, they simply keep them to themselves. TODO: Make them share. --- include/onnxruntime/core/graph/graph.h | 14 +- .../core/graph/model_saving_options.h | 2 +- .../framework/prepacked_weights_container.cc | 34 ++-- .../framework/prepacked_weights_container.h | 26 ++- onnxruntime/core/framework/session_state.cc | 27 ++- .../framework/tensor_external_data_info.cc | 53 ++++-- .../framework/tensor_external_data_info.h | 30 +++- .../core/framework/tensorprotoutils.cc | 4 +- onnxruntime/core/graph/graph.cc | 159 +++++------------- 9 files changed, 176 insertions(+), 173 deletions(-) diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index 7e0d74eb343d0..cdc2875660d7b 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -1489,12 +1489,14 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto, std::optional new_name); - ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitiallizersImpl(const std::filesystem::path& model_path, - const std::filesystem::path& external_file_path, - const ModelSavingOptions& model_saving_options, - ONNX_NAMESPACE::GraphProto& graph_proto, - std::ostream& external_stream, - int64_t& external_offset) const; + Status ToGraphProtoWithExternalInitiallizersImpl( + const std::filesystem::path& model_path, + const std::filesystem::path& external_file_path, + const std::filesystem::path& modified_external_file_path, + const ModelSavingOptions& model_saving_options, + ONNX_NAMESPACE::GraphProto& graph_proto, + std::ostream& external_stream, + int64_t& external_offset) const; #endif diff --git a/include/onnxruntime/core/graph/model_saving_options.h b/include/onnxruntime/core/graph/model_saving_options.h index d4ed2d0668f87..2df67e625a55f 100644 --- a/include/onnxruntime/core/graph/model_saving_options.h +++ b/include/onnxruntime/core/graph/model_saving_options.h @@ -41,4 +41,4 @@ struct ModelSavingOptions { const PrepackedForSerialization* prepacked_for_save = nullptr; }; -} +} // namespace onnxruntime diff --git a/onnxruntime/core/framework/prepacked_weights_container.cc b/onnxruntime/core/framework/prepacked_weights_container.cc index 3be398c9d54fc..bc9ae2a5873af 100644 --- a/onnxruntime/core/framework/prepacked_weights_container.cc +++ b/onnxruntime/core/framework/prepacked_weights_container.cc @@ -56,18 +56,21 @@ PrepackedForSerialization::PrepackedForSerialization() PrepackedForSerialization::~PrepackedForSerialization() = default; -void PrepackedForSerialization::Subgraph::Insert(std::string key, PrePackedWeights&& packed_weight) { +void PrepackedForSerialization::Subgraph::InsertFromDisk(std::string key, PrePackedWeights&& packed_weight) { auto result = key_to_blobs_.emplace(std::move(key), std::move(packed_weight)); ORT_ENFORCE(result.second, "Duplicate pre-packed weight from disk"); } -bool PrepackedForSerialization::Subgraph::CreateOrOverWrite(const std::string& weight_name, std::string key, - PrePackedWeights&& packed_weight) { - // We overwrite the existing key. This is necessary in case we already have a pre-packed weight - // mapped from disk, but we want to overwrite it with our most recent pre-packed version. - auto result = key_to_blobs_.insert_or_assign(std::move(key), std::move(packed_weight)); - weight_to_pre_packs_[weight_name].push_back(result.first); - return result.second; +bool PrepackedForSerialization::Subgraph::WritePackedForSaving(const std::string& weight_name, const std::string& key, + PrePackedWeights&& packed_weight) { + auto hit = key_to_blobs_.find(key); + if (hit == key_to_blobs_.end()) { + auto result = key_to_blobs_.insert({key, std::move(packed_weight)}); + sorted_by_weight_for_writing_[weight_name].push_back(result.first); + return true; + } + hit->second = std::move(packed_weight); + return false; } const PrePackedWeights* PrepackedForSerialization::Subgraph::GetPrepackedWeights(const std::string& key) const { @@ -96,12 +99,23 @@ std::optional PrepackedForSerialization::TakePrepackedWeights( return result; } -PrepackedForSerialization::Subgraph& PrepackedForSerialization::FindOrCreateSubgraph(const Graph& graph) { +PrepackedForSerialization::Subgraph& PrepackedForSerialization::FindOrCreatePrepackedGraph(const Graph& graph) { if (graph.ParentGraph() == nullptr) { return main_graph_; } - auto& parent = FindOrCreateSubgraph(*graph.ParentGraph()); + auto& parent = FindOrCreatePrepackedGraph(*graph.ParentGraph()); return parent.GetOrCreateSubgraph(graph); } +const PrepackedForSerialization::Subgraph* PrepackedForSerialization::FindPrepackedGraph(const Graph& graph) const { + if (graph.ParentGraph() == nullptr) { + return &main_graph_; + } + auto* parent = FindPrepackedGraph(*graph.ParentGraph()); + if (parent != nullptr) { + parent = parent->GetSubgraph(graph); + } + return parent; +} + } // namespace onnxruntime diff --git a/onnxruntime/core/framework/prepacked_weights_container.h b/onnxruntime/core/framework/prepacked_weights_container.h index c6fc9a209edb4..a072a0bdc04c5 100644 --- a/onnxruntime/core/framework/prepacked_weights_container.h +++ b/onnxruntime/core/framework/prepacked_weights_container.h @@ -91,8 +91,8 @@ class PrepackedForSerialization final { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(PrepackedForSerialization); using KeyToBlobMap = std::unordered_map; - using KeyToBlobMapIterator = KeyToBlobMap::iterator; - using BlobsInderect = std::vector; + using KeyToBlobMapConstIterator = KeyToBlobMap::const_iterator; + using BlobsInderect = std::vector; using BlobsConstIterator = BlobsInderect::const_iterator; // Maps weight name to iterators in key_to_blobs_. It associates a weight name with its pre-packs. @@ -130,11 +130,10 @@ class PrepackedForSerialization final { return it == subgraph_prepacks_.end() ? nullptr : it->second.get(); } - // This does not populate per-initializer structures. - void Insert(std::string key, PrePackedWeights&& packed_weight); + void InsertFromDisk(std::string key, PrePackedWeights&& packed_weight); - bool CreateOrOverWrite(const std::string& weight_name, std::string key, - PrePackedWeights&& packed_weight); + bool WritePackedForSaving(const std::string& weight_name, const std::string& key, + PrePackedWeights&& packed_weight); const PrePackedWeights* GetPrepackedWeights(const std::string& key) const; @@ -148,11 +147,20 @@ class PrepackedForSerialization final { save_mode_on_ = value; } + // Returns iterators to key->blob pair for writing + const BlobsInderect* GetBlobsForWeight(const std::string& weight_name) const { + auto hit = sorted_by_weight_for_writing_.find(weight_name); + if (hit != sorted_by_weight_for_writing_.end()) { + return &hit->second; + } + return nullptr; + } + private: bool save_mode_on_; Subgraph* parent_ = nullptr; KeyToBlobMap& key_to_blobs_; - WeightToPrePacksMap weight_to_pre_packs_; + WeightToPrePacksMap sorted_by_weight_for_writing_; // Map Graph ptr to subgraphs std::unordered_map> subgraph_prepacks_; }; @@ -179,7 +187,9 @@ class PrepackedForSerialization final { std::optional TakePrepackedWeights(const std::string& key); - Subgraph& FindOrCreateSubgraph(const Graph& graph); + Subgraph& FindOrCreatePrepackedGraph(const Graph& graph); + + const Subgraph* FindPrepackedGraph(const Graph& graph) const; private: // Map of key to pre-packed blobs.This is common for all subgraphs diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index e581553e2208a..f1974c9576ad4 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -387,19 +387,18 @@ static Status KernelUseSharedPrePackedBuffers(OpKernel& kernel, int input_idx, return Status::OK(); } -// Here we use the data that is owned by somebody else -static void SavePrepackedDataForWriting(const std::string& weight_name, - const std::string& key, - const PrePackedWeights& prepacked_weights, - PrepackedForSerialization::Subgraph& prepacked_subgraph) { +static void WritePrepackedForSaving(const std::string& weight_name, + const std::string& key, + const PrePackedWeights& prepacked_weights, + PrepackedForSerialization::Subgraph& prepacked_subgraph) { PrePackedWeights weights_for_saving; for (const auto& prepacked_buffer : prepacked_weights.buffers_) { - // BufferDeleter is nullptr because we do not own the data + // BufferDeleter is nullptr because we do not own the data in this case weights_for_saving.buffers_.emplace_back(prepacked_buffer.get(), BufferDeleter(nullptr)); } weights_for_saving.buffer_sizes_ = prepacked_weights.buffer_sizes_; - prepacked_subgraph.CreateOrOverWrite(weight_name, key, std::move(weights_for_saving)); + prepacked_subgraph.WritePackedForSaving(weight_name, key, std::move(weights_for_saving)); } static std::string GenerateKeyForPrepackedWeightsMap(const std::string& op_type, @@ -417,7 +416,7 @@ Status SessionState::PrepackConstantInitializedTensors( const std::unordered_map& initializers_to_share_map) { auto prepacked_constant_weights = [this, &constant_initializers_use_count, &initializers_to_share_map]( bool should_cache_prepacked_weights_for_shared_initializers) -> Status { - auto& prepacked_subgraph = prepacked_weights_for_serialization_.FindOrCreateSubgraph(graph_); + auto& prepacked_subgraph = prepacked_weights_for_serialization_.FindOrCreatePrepackedGraph(graph_); for (auto& node : GetGraphViewer().Nodes()) { auto kernel = GetMutableKernel(node.Index()); @@ -492,8 +491,8 @@ Status SessionState::PrepackConstantInitializedTensors( if (prepacked_weights_for_serialization_.IsSaveModeOn()) { // Here we take references to the shared container owned data, so we unmap any entries // that we are mapping from disk - SavePrepackedDataForWriting(input_name, prepacked_weights_container_key, prepacked_shared, - prepacked_subgraph); + WritePrepackedForSaving(input_name, prepacked_weights_container_key, prepacked_shared, + prepacked_subgraph); } } else { // container doesn't contain the pre-packed weight - so write into it for sharing across kernel instances @@ -523,8 +522,8 @@ Status SessionState::PrepackConstantInitializedTensors( if (prepacked_weights_for_serialization_.IsSaveModeOn()) { // Here we take references to the shared container owned data, so we unmap any entries // that we are mapping from disk, so we write the most fresh data possible - SavePrepackedDataForWriting(input_name, prepacked_weights_container_key, shared_prepacked, - prepacked_subgraph); + WritePrepackedForSaving(input_name, prepacked_weights_container_key, shared_prepacked, + prepacked_subgraph); } } } @@ -554,8 +553,8 @@ Status SessionState::PrepackConstantInitializedTensors( if (prepacked_subgraph.IsSaveModeOn() || weights_to_use == nullptr) { // In this case pre-packed container owns the data - prepacked_subgraph.CreateOrOverWrite(input_name, prepacked_weights_container_key, - std::move(weights_to_be_filled_in)); + prepacked_subgraph.WritePackedForSaving(input_name, prepacked_weights_container_key, + std::move(weights_to_be_filled_in)); weights_to_use = prepacked_subgraph.GetPrepackedWeights(prepacked_weights_container_key); assert(weights_to_use != nullptr); } diff --git a/onnxruntime/core/framework/tensor_external_data_info.cc b/onnxruntime/core/framework/tensor_external_data_info.cc index 4e6e2a4a82a17..c4f09bdba6256 100644 --- a/onnxruntime/core/framework/tensor_external_data_info.cc +++ b/onnxruntime/core/framework/tensor_external_data_info.cc @@ -4,6 +4,7 @@ #include "tensor_external_data_info.h" #include "core/common/common.h" #include "core/common/narrow.h" +#include "core/common/safeint.h" #include "core/common/string_utils.h" #include "core/platform/path_lib.h" @@ -54,8 +55,9 @@ Status ExternalDataInfo::Create(const RepeatedPtrField& } else if (stringmap.key() == "checksum" && !stringmap.value().empty()) { out->checksum_ = stringmap.value(); } else if (stringmap.key().find("prepacked", 0) == 0) { - // Starts with 'prepacked'. Each prepacked entry may have multiple blobs with the same key - // we output them with the same key + // Starts with 'prepacked', each has its own key. + // Each prepacked entry may have multiple blobs with the same key + // we output them with the same key // format = key|offset;length;checksum[|offset;length;checksum] // We are ignoring invalid entries (should not be any), and rely // on in memory pre-packs regenerated in this case. @@ -114,17 +116,38 @@ void ExternalDataInfo::SetExternalLocationToProto(const std::filesystem::path& e length->set_value(std::to_string(tensor_bytes_size)); } -// void ExternalDataInfo::AddPrepackedEntriesToProto( -// const PrepackedForSerialization::BlobsInderect& prepacked_for_write, ::ONNX_NAMESPACE::TensorProto& proto) { -// size_t prepack_count = 0; -// std::stringstream os; -// for (auto iter : prepacked_for_write) { -// const auto& [key, prepacked_weights] = *iter; -// os << key << '|'; -// const size_t blob_num = prepacked_weights.buffers_.size(); -// for (size_t i = 0; blob_num; ++i) { -// //XXX: Need offset calculation -// // os << ed_weights.blobs_[i].offset << ';'; -// } -// } +std::ostream& ExternalDataInfo::AddPrepackedEntriesToProto( + const PrepackedForSerialization::BlobsInderect& prepacked_for_write, bool align, int64_t allocation_granularity, + std::ostream& os, int64_t& external_offset, ::ONNX_NAMESPACE::TensorProto& proto) { + for (const auto& iter : prepacked_for_write) { + size_t prepack_count = 0; + const auto& [key, prepacked_weights] = *iter; + std::stringstream prepacked_entry; + prepacked_entry << key << "|"; + for (size_t i = 0, size = prepacked_weights.buffers_.size(); i < size; ++i) { + if (align) { + // return early on error + if (!AlignAndPad(os, allocation_granularity, external_offset)) { + return os; + } + } + const auto size_in_bytes = prepacked_weights.buffer_sizes_[i]; + if (prepack_count++ > 0) { + prepacked_entry << "|"; + } + // Checksum is currently not validated + prepacked_entry << external_offset << ";" << size_in_bytes << ";0"; + if (!os.write(reinterpret_cast(prepacked_weights.buffers_[i].get()), size_in_bytes)) { + return os; + } + external_offset = SafeInt(external_offset) + size_in_bytes; + } + auto* prepacked = proto.add_external_data(); + std::string prepacked_key("prepacked_"); + prepacked_key.append(std::to_string(prepack_count)); + prepacked->set_key(std::move(prepacked_key)); + prepacked->set_value(prepacked_entry.str()); + } + return os; +} } // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/framework/tensor_external_data_info.h b/onnxruntime/core/framework/tensor_external_data_info.h index 853c14338a2f8..3c65fd51815d7 100644 --- a/onnxruntime/core/framework/tensor_external_data_info.h +++ b/onnxruntime/core/framework/tensor_external_data_info.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include @@ -39,8 +40,33 @@ class ExternalDataInfo { size_t tensor_bytes_size, ::ONNX_NAMESPACE::TensorProto& proto); - static void AddPrepackedEntriesToProto(const PrepackedForSerialization::BlobsInderect& prepacked_for_write, - ::ONNX_NAMESPACE::TensorProto& proto); + // Pads the output with zeros according to the specified allocation_granularity + // It updates external_offset for alignment. + // need to do padding before write actual tensor data as we do offset alignment at the begin of + // large tensors (offset need to be page aligned and allocation granularity aligned) like below: + // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX + // |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->| + static std::ostream& AlignAndPad(std::ostream& stream, int64_t allocation_granularity, int64_t& external_offset) { + // Align to the larger of the page size or the allocation granularity + int64_t alignment_factor = std::max(static_cast(4096), allocation_granularity); + // Align to the next page or alloc granularity boundary + int64_t new_external_offset = static_cast( + std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * + alignment_factor; + + // padding tensor with zeros for alignment + for (int64_t index = external_offset; index != new_external_offset; ++index) { + stream << '\0'; + } + external_offset = new_external_offset; + return stream; + } + + static std::ostream& AddPrepackedEntriesToProto(const PrepackedForSerialization::BlobsInderect& prepacked_for_write, + bool align, int64_t allocation_granularity, + std::ostream& os, + int64_t& external_offset, + ::ONNX_NAMESPACE::TensorProto& proto); using PrepackedInfo = std::tuple; using PrepackedInfos = std::unordered_map>; diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index 6d1f9d631d0aa..0ce6e4360db5f 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -1057,6 +1057,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo if (prepacked_info != nullptr && !prepacked_infos->empty()) { for (const auto& [key, blobs] : *prepacked_infos) { PrePackedWeights prepacked_weights; + prepacked_weights.buffers_.reserve(blobs.size()); + prepacked_weights.buffer_sizes_.reserve(blobs.size()); for (const auto& blob : blobs) { const auto blob_offset = std::get<0>(blob); const auto blob_length = std::get<1>(blob); @@ -1074,7 +1076,7 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo prepacked_weights.buffer_sizes_.push_back(blob_length); } if (!blobs.empty()) { - prepacked_info->Insert(key, std::move(prepacked_weights)); + prepacked_info->InsertFromDisk(key, std::move(prepacked_weights)); } } } diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index b7353ca3875bf..a54c0b421b8f8 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -4086,59 +4086,51 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const { return result; } -// Create a recursive function that does bottom up with subgraphs -ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl( +// A recursive function that does bottom up with subgraphs +Status Graph::ToGraphProtoWithExternalInitiallizersImpl( const std::filesystem::path& model_path, const std::filesystem::path& external_file_path, + const std::filesystem::path& modified_external_file_path, const ModelSavingOptions& model_saving_options, ONNX_NAMESPACE::GraphProto& output_graph_proto, std::ostream& external_stream, int64_t& external_offset) const { - // update external_offset for alignment - // need to do padding before write actual tensor data as we do offset alignment at the begin of - // large tensors (offset need to be page aligned and allocation granularity aligned) like below: - // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX - // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| - auto compute_and_pad = [&external_stream](int64_t allocation_granularity, int64_t& external_offset) { - // Align to the larger of the page size or the allocation granularity - int64_t alignment_factor = std::max(static_cast(4096), allocation_granularity); - // Align to the next page or alloc granularity boundary - int64_t new_external_offset = static_cast( - std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * - alignment_factor; - - // padding tensor with zeros for alignment - for (int64_t index = external_offset; index != new_external_offset; ++index) { - external_stream << '\0'; - } - external_offset = new_external_offset; - }; - // Process subgraphs for (const auto& node : Nodes()) { if (node.ContainsSubgraph()) { // Let find this node in the output_graph_proto - auto hit = std::find_if(output_graph_proto.node().begin(), - output_graph_proto.node().end(), + auto hit = std::find_if(output_graph_proto.mutable_node()->begin(), + output_graph_proto.mutable_node()->end(), [&node](const ONNX_NAMESPACE::NodeProto& proto) { return proto.name() == node.Name(); }); - ORT_ENFORCE(hit != output_graph_proto.node().end(), "Node ", node.Name(), - " not found in output_graph_proto"); + ORT_RETURN_IF_NOT(hit != output_graph_proto.mutable_node()->end(), "Node ", node.Name(), + " not found in output_graph_proto"); auto& result_node = *hit; for (const auto& [name, subgraph] : node.GetAttributeNameToSubgraphMap()) { // Lets find this subgraph in the result_node - auto sub_hit = std::find_if(result_node.attribute().begin(), - result_node.attribute().end(), + auto sub_hit = std::find_if(result_node.mutable_attribute()->begin(), + result_node.mutable_attribute()->end(), [&name](const ONNX_NAMESPACE::AttributeProto& proto) { return proto.name() == name; }); - ORT_ENFORCE(sub_hit != result_node.attribute().end(), "Subgraph ", name, - " not found in node ", node.Name()); + ORT_RETURN_IF_NOT(sub_hit != result_node.mutable_attribute()->end() && utils::HasGraph(*sub_hit), + "Subgraph ", name, " not found in node ", node.Name()); + auto& result_subgraph = *sub_hit->mutable_g(); + ORT_RETURN_IF_ERROR(subgraph->ToGraphProtoWithExternalInitiallizersImpl( + model_path, external_file_path, + modified_external_file_path, model_saving_options, + result_subgraph, external_stream, external_offset)); } } } + const PrepackedForSerialization::Subgraph* prepacked_parent_graph = nullptr; + if (model_saving_options.prepacked_for_save != nullptr) { + // Is there any pre-packed weights for this subgraph? + prepacked_parent_graph = model_saving_options.prepacked_for_save->FindPrepackedGraph(*this); + } + // Add the initializers to the result graph. for (const auto& initializer : graph_proto_->initializer()) { #if !defined(DISABLE_SPARSE_TENSORS) @@ -4146,14 +4138,14 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl( // Sparse tensors are added to the ONNX file. auto& sparse_initializer = *output_graph_proto.add_sparse_initializer(); auto status = utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer); - ORT_ENFORCE(status.IsOK(), "Failed to convert dense initializer to sparse"); + ORT_RETURN_IF_NOT(status.IsOK(), "Failed to convert dense initializer to sparse"); } else { #endif // Dense tensors larger than the threshold are added to the external file. TensorProto* output_proto = output_graph_proto.add_initializer(); std::vector raw_data; - ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data)); + ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data)); size_t tensor_bytes_size = raw_data.size(); if (tensor_bytes_size < model_saving_options.initializer_size_threshold) { *output_proto = initializer; @@ -4164,15 +4156,16 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl( // need to do padding before write actual tensor data as we do offset alignment at the begin of // large tensors (offset need to be page aligned and allocation granularity aligned) like below: // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX - // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| + // |<---smaller tensor---->|<---padding--->|<------------------large tensor----------------------------->| if (model_saving_options.align_offset && static_cast(tensor_bytes_size) > model_saving_options.align_threshold) { - compute_and_pad(model_saving_options.allocation_granularity, external_offset); + ORT_RETURN_IF_NOT(ExternalDataInfo::AlignAndPad(external_stream, model_saving_options.allocation_granularity, + external_offset), + "Failed writing external data to: ", modified_external_file_path); } - if (!external_stream.write(reinterpret_cast(raw_data.data()), tensor_bytes_size)) { - ORT_THROW("Failed to write external initializers to file: ", modified_external_file_path); - } + ORT_RETURN_IF_NOT(external_stream.write(reinterpret_cast(raw_data.data()), tensor_bytes_size), + "Failed to write external initializers to file: ", modified_external_file_path); ExternalDataInfo::SetExternalLocationToProto(external_file_path, external_offset, tensor_bytes_size, *output_proto); @@ -4186,15 +4179,21 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitiallizersImpl( external_offset += tensor_bytes_size; - const PrepackedForSerialization::Subgraph* prepacked_subgraph = nullptr; - if (model_saving_options.prepacked_for_save != nullptr) { - prepacked_subgraph = *model_saving_options.prepacked_for_save->FindOrCreateSubgraph(*this); + if (prepacked_parent_graph != nullptr) { + const auto* iters_to_blobs = prepacked_parent_graph->GetBlobsForWeight(initializer.name()); + if (iters_to_blobs != nullptr && !iters_to_blobs->empty()) { + ORT_RETURN_IF_NOT(ExternalDataInfo::AddPrepackedEntriesToProto( + *iters_to_blobs, model_saving_options.align_offset, + model_saving_options.allocation_granularity, + external_stream, external_offset, *output_proto)); + } } #if !defined(DISABLE_SPARSE_TENSORS) } #endif } + return Status::OK(); } ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers( @@ -4211,84 +4210,12 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers( // Create the external file. std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary); - ORT_ENFORCE(external_stream.is_open()); + ORT_ENFORCE(external_stream.is_open(), "Failed to open for writing:", modified_external_file_path); int64_t external_offset = 0; - // update external_offset for alignment - // need to do padding before write actual tensor data as we do offset alignment at the begin of - // large tensors (offset need to be page aligned and allocation granularity aligned) like below: - // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX - // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| - auto compute_and_pad = [&external_stream](int64_t allocation_granularity, int64_t& external_offset) { - // Align to the larger of the page size or the allocation granularity - int64_t alignment_factor = std::max(static_cast(4096), allocation_granularity); - // Align to the next page or alloc granularity boundary - int64_t new_external_offset = static_cast( - std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * - alignment_factor; - - // padding tensor with zeros for alignment - for (int64_t index = external_offset; index != new_external_offset; ++index) { - external_stream << '\0'; - } - external_offset = new_external_offset; - }; - - // Add the initializers to the result graph. -#if !defined(DISABLE_SPARSE_TENSORS) - const auto sparse_end = sparse_tensor_names_.end(); -#endif - - for (const auto& initializer : graph_proto_->initializer()) { -#if !defined(DISABLE_SPARSE_TENSORS) - if (sparse_end != sparse_tensor_names_.find(initializer.name())) { - // Sparse tensors are added to the ONNX file. - auto& sparse_initializer = *result.add_sparse_initializer(); - auto status = utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer); - ORT_ENFORCE(status.IsOK(), "Failed to convert dense initializer to sparse"); - } else { -#endif - // Dense tensors larger than the threshold are added to the external file. - TensorProto* output_proto = result.add_initializer(); - - std::vector raw_data; - ORT_THROW_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data)); - size_t tensor_bytes_size = raw_data.size(); - if (tensor_bytes_size < model_saving_options.initializer_size_threshold) { - *output_proto = initializer; - continue; - } - - // update external_offset for alignment - // need to do padding before write actual tensor data as we do offset alignment at the begin of - // large tensors (offset need to be page aligned and allocation granularity aligned) like below: - // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX - // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| - if (model_saving_options.align_offset && static_cast(tensor_bytes_size) > - model_saving_options.align_threshold) { - compute_and_pad(model_saving_options.allocation_granularity, external_offset); - } - - if (!external_stream.write(reinterpret_cast(raw_data.data()), tensor_bytes_size)) { - ORT_THROW("Failed to write external initializers to file: ", modified_external_file_path); - } - - ExternalDataInfo::SetExternalLocationToProto(external_file_path, external_offset, - tensor_bytes_size, *output_proto); - - output_proto->set_name(initializer.name()); - output_proto->set_data_type(initializer.data_type()); - for (int i = 0; i != initializer.dims_size(); ++i) { - output_proto->add_dims(initializer.dims(i)); - } - output_proto->set_doc_string(initializer.doc_string()); - - external_offset += tensor_bytes_size; - -#if !defined(DISABLE_SPARSE_TENSORS) - } -#endif - } + ORT_THROW_IF_ERROR(ToGraphProtoWithExternalInitiallizersImpl(model_path, external_file_path, + modified_external_file_path, model_saving_options, + result, external_stream, external_offset)); if (!external_stream.flush()) { ORT_THROW("Failed to flush file with external initializers: ", modified_external_file_path);