diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index c51f38553c3b4..994087d985235 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -1139,16 +1139,48 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi const ONNX_NAMESPACE::GraphProto& ToGraphProto(); ONNX_NAMESPACE::GraphProto ToGraphProto() const; + // Options to align external initializer offset. + // For models running on CPU, ORT will try to use mmap to load external initializers. + // To use mmap, external initializer need to be offset aligned. + // ORT saves external initializers into signle data file, each initializer is accessed with + // offset(start position of initializer) and length(byte length of initializer) of the data file. + // To use mmap, each offset need to be aligned which means offset need to divisible by + // allocation granularity(64KB for windows and 4K for other OSes). + // With align_offset to true, ORT will align offset for large initializer when + // save ONNX model with external data file. + struct OffsetAlignmentInfo { + // Offset will always be page aligned and allocation granularity aligned for mmap support. + // This is done by padding previous tensor data with zeros keeping same length. + bool align_offset = false; + // Alignment threshold for size of data. + // Having a low threshold will waste file space for small initializers. + // Only when tensor's data size is > the page_align_threshold it will be force aligned. + // Default to 1MB. + int64_t align_threshold = 1048576; + // The allocation Granularity for mmap() support. + // Typically 64KB for Windows & 4KB for other OSes. Default to 64KB. + int64_t allocation_granularity = 65536; + }; + /** Gets the GraphProto representation of this Graph @param external_file_path File path of the binary file to use for initializers. @param model_file_path path of the model file. @param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved in the external file. Initializer smaller than this threshold are included in the onnx file. + @param align_info offset alignment info. @returns GraphProto serialization of the graph. */ ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path, const std::filesystem::path& model_file_path, - size_t initializer_size_threshold) const; + size_t initializer_size_threshold, + const OffsetAlignmentInfo& align_info) const; + + ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path, + const std::filesystem::path& model_file_path, + size_t initializer_size_threshold) const { + OffsetAlignmentInfo default_options; + return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options); + } /** Gets the ISchemaRegistry instances being used with this Graph. */ IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const; diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index e950d68947b91..7e82edabedbb3 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -4021,7 +4021,8 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const { ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path, const std::filesystem::path& model_file_path, - size_t initializer_size_threshold) const { + size_t initializer_size_threshold, + const OffsetAlignmentInfo& align_info) const { GraphProto result; ToGraphProtoInternal(result); ORT_ENFORCE(external_file_path.is_relative()); @@ -4059,6 +4060,27 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std continue; } + // update external_offset for alignment + // need to do padding before write actual tensor data as we do offset alignment at the begin of + // large tensors (offset need to be page aligned and alloction granularity aligned) like below: + // \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX + // |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->| + if (align_info.align_offset && static_cast(tensor_bytes_size) > align_info.align_threshold) { + // Align to the larger of the page size or the allocation granularity + int64_t alignment_factor = std::max(static_cast(4096), align_info.allocation_granularity); + // Align to the next page or alloc granularity boundary + int64_t new_external_offset = static_cast( + std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * + alignment_factor; + + // padding tensor with zeros for alignment + for (int64_t index = external_offset; index != new_external_offset; ++index) { + external_stream << '0'; + } + + external_offset = new_external_offset; + } + for (size_t index = 0; index != tensor_bytes_size; ++index) { external_stream << raw_data[index]; } diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc index d38c1ace7d7a8..e67884e3875d8 100644 --- a/onnxruntime/core/graph/model.cc +++ b/onnxruntime/core/graph/model.cc @@ -383,12 +383,14 @@ ModelProto Model::ToProto() const { ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, - size_t initializer_size_threshold) const { + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info) const { ModelProto result(model_proto_); const auto& graph = *graph_; *(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name, file_path, - initializer_size_threshold); + initializer_size_threshold, + align_info); return result; } @@ -605,14 +607,16 @@ template static Status SaveModelWithExternalInitializers(Model& model, const T& file_path, const std::filesystem::path& external_file_name, - size_t initializer_size_threshold) { + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info) { int fd = 0; Status status = Env::Default().FileOpenWr(file_path, fd); ORT_RETURN_IF_ERROR(status); ORT_TRY { status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name, - initializer_size_threshold); + initializer_size_threshold, + align_info); } ORT_CATCH(const std::exception& ex) { ORT_HANDLE_EXCEPTION([&]() { @@ -642,8 +646,10 @@ Status Model::Load(const PathString& file_path, std::shared_ptr& p_model, Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path, const std::filesystem::path& external_file_name, - size_t initializer_size_threshold) { - return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold); + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info) { + return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold, + align_info); } Status Model::LoadFromBytes(int count, const void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) { @@ -759,7 +765,8 @@ Status Model::SaveWithExternalInitializers(Model& model, int fd, const std::filesystem::path& file_path, const std::filesystem::path& external_file_name, - size_t initializer_size_threshold) { + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info) { if (fd < 0) { return Status(ONNXRUNTIME, INVALID_ARGUMENT, " is less than 0."); } @@ -767,7 +774,8 @@ Status Model::SaveWithExternalInitializers(Model& model, ORT_RETURN_IF_ERROR(model.MainGraph().Resolve()); auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path, - initializer_size_threshold); + initializer_size_threshold, + align_info); google::protobuf::io::FileOutputStream output(fd); const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush(); if (result) { diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h index ea34dba889277..9bcec6f78ca08 100644 --- a/onnxruntime/core/graph/model.h +++ b/onnxruntime/core/graph/model.h @@ -187,25 +187,54 @@ class Model { // Get model's serialization proto data. // Save initializer larger than the given threshold (in bytes) into an external binary file // with the given name. This function is useful to avoid hitting the size limit of protobuf files. + // initializer offset could be page aligned and allocation granularity aligned for mmap support. ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, - size_t initializer_size_threshold) const; + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info) const; + + ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, + const std::filesystem::path& file_path, + size_t initializer_size_threshold) const { + Graph::OffsetAlignmentInfo default_align_info; + return ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold, default_align_info); + } static common::Status Save(Model& model, const PathString& file_path); static common::Status Save(Model& model, int fd); // Save the model to file using an external file for initializers larger than the given threshold (in bytes). + // Initializer offset could be page aligned and allocation granularity aligned for mmap support. + static common::Status SaveWithExternalInitializers(Model& model, + const std::filesystem::path& file_path, + const std::filesystem::path& external_file_path, + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info); + + static common::Status SaveWithExternalInitializers(Model& model, + const std::filesystem::path& file_path, + const std::filesystem::path& external_file_path, + size_t initializer_size_threshold) { + Graph::OffsetAlignmentInfo default_align_info; + return SaveWithExternalInitializers(model, file_path, external_file_path, initializer_size_threshold, default_align_info); + } + static common::Status SaveWithExternalInitializers(Model& model, + int fd, const std::filesystem::path& file_path, const std::filesystem::path& external_file_path, - size_t initializer_size_threshold); + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info); static common::Status SaveWithExternalInitializers(Model& model, int fd, const std::filesystem::path& file_path, const std::filesystem::path& external_file_path, - size_t initializer_size_threshold); + size_t initializer_size_threshold) { + Graph::OffsetAlignmentInfo default_align_info; + return SaveWithExternalInitializers(model, fd, file_path, external_file_path, initializer_size_threshold, default_align_info); + } static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto); diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 5eed7c5c6f2b5..4b4136fd6ebd5 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -2054,10 +2054,13 @@ common::Status InferenceSession::Initialize() { const size_t optimized_model_external_initializers_min_size_in_bytes = ParseStringWithClassicLocale(session_options_.config_options.GetConfigOrDefault( kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, "1024")); + Graph::OffsetAlignmentInfo align_info; + align_info.align_offset = true; ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_, session_options_.optimized_model_filepath, optimized_model_external_initializers_file_name, - optimized_model_external_initializers_min_size_in_bytes)); + optimized_model_external_initializers_min_size_in_bytes, + align_info)); } } } diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc index 447b0edef879b..d0bc088175755 100644 --- a/onnxruntime/test/framework/save_model_with_external_initializers.cc +++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc @@ -23,13 +23,15 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, const std::filesystem::path& input_external_init_file, const std::filesystem::path& output_onnx, const std::filesystem::path& output_external_init_file, - size_t initializer_size_threshold) { + size_t initializer_size_threshold, + const Graph::OffsetAlignmentInfo& align_info) { auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel"); std::shared_ptr model; ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger)); std::filesystem::remove(output_onnx); std::filesystem::remove(output_external_init_file); - ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold)); + ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold, + align_info)); std::shared_ptr model_from_external; ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger)); @@ -75,6 +77,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch"); ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch"); + + if (align_info.align_offset) { + for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) { + if (entry.has_key() && entry.has_value() && entry.key() == "offset") { + size_t tensor_offset; + std::stringstream stream(entry.value()); + stream >> tensor_offset; + ORT_RETURN_IF_NOT(tensor_offset % align_info.allocation_granularity == 0, "tensor offset not align"); + } + } + } } // Cleanup. ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed"); @@ -84,12 +97,22 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx, // Original model does not have external initializers TEST(SaveWithExternalInitializers, Mnist) { - ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100)); + Graph::OffsetAlignmentInfo align_info; + ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100, align_info)); } // Original model has external initializers TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) { - ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0)); + Graph::OffsetAlignmentInfo align_info; + ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info)); +} + +// Original model has external initializers, align offset +TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) { + Graph::OffsetAlignmentInfo align_info; + align_info.align_offset = true; + align_info.align_threshold = 0; + ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info)); } } // namespace test