diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index c51f38553c3b4..ddbc7cc33be75 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -1144,11 +1144,23 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   @param model_file_path path of the model file.
   @param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
   in the external file. Initializer smaller than this threshold are included in the onnx file.
+  @align_offset offset will always be page aligned and alloction granularity aligned for mmap support. 
+  This is done by padding previous tensor data with zeros keeping same length. 
+  Tensor data will be aligned if > align_threshold
+  @align_threshold alignment threshold for size of data.
+  Having a low threshold will waste file space for small initializers. 
+  Only when tensor's data is > the page_align_threshold it will be force aligned.
+  Default to 1MB.
+  @allocation_granularity the allocation Granularity for mmap() support.
+  Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
   @returns GraphProto serialization of the graph.
   */
   ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
                                                                   const std::filesystem::path& model_file_path,
-                                                                  size_t initializer_size_threshold) const;
+                                                                  size_t initializer_size_threshold,
+                                                                  bool align_offset = FALSE,
+                                                                  size_t align_threshold = 1048576,
+                                                                  size_t allocation_granularity = 65536) const;
 
   /** Gets the ISchemaRegistry instances being used with this Graph. */
   IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index e950d68947b91..f3b7bad572d85 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -4021,7 +4021,10 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
 
 ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
                                                                        const std::filesystem::path& model_file_path,
-                                                                       size_t initializer_size_threshold) const {
+                                                                       size_t initializer_size_threshold,
+                                                                       bool align_offset,
+                                                                       size_t align_threshold,
+                                                                       size_t allocation_granularity) const {
   GraphProto result;
   ToGraphProtoInternal(result);
   ORT_ENFORCE(external_file_path.is_relative());
@@ -4063,6 +4066,22 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
         external_stream << raw_data[index];
       }
 
+      // update external_offset for alignment
+      if (align_offset && tensor_bytes_size > align_threshold) {
+        // Align to the larger of the page size or the allocation granularity
+        size_t alignment_factor = std::max(static_cast<size_t>(4096), allocation_granularity);
+        // Align to the next page or alloc granularity boundary
+        size_t new_external_offset = static_cast<size_t>(
+            std::floor((external_offset + alignment_factor - 1) / alignment_factor)) * alignment_factor;
+
+        // padding tensor with zeros for alignment
+        for (size_t index = external_offset; index != new_external_offset; ++index) {
+          external_stream << '0';
+        }
+
+        external_offset = new_external_offset;
+      }
+
       output_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
       ONNX_NAMESPACE::StringStringEntryProto* location = output_proto->add_external_data();
       location->set_key("location");
diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index ee4d9f9154971..be086c167e611 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -383,12 +383,18 @@ ModelProto Model::ToProto() const {
 
 ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
                                                        const std::filesystem::path& file_path,
-                                                       size_t initializer_size_threshold) const {
+                                                       size_t initializer_size_threshold,
+                                                       bool align_offset,
+                                                       size_t align_threshold,
+                                                       size_t allocation_granularity) const {
   ModelProto result(model_proto_);
   const auto& graph = *graph_;
   *(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
                                                                          file_path,
-                                                                         initializer_size_threshold);
+                                                                         initializer_size_threshold,
+                                                                         align_offset,
+                                                                         align_threshold,
+                                                                         allocation_granularity);
   return result;
 }
 
@@ -605,14 +611,19 @@ template <typename T>
 static Status SaveModelWithExternalInitializers(Model& model,
                                                 const T& file_path,
                                                 const std::filesystem::path& external_file_name,
-                                                size_t initializer_size_threshold) {
+                                                size_t initializer_size_threshold,
+                                                bool align_offset = FALSE,
+                                                size_t align_threshold = 1048576,
+                                                size_t allocation_granularity = 65536) {
   int fd = 0;
   Status status = Env::Default().FileOpenWr(file_path, fd);
   ORT_RETURN_IF_ERROR(status);
 
   ORT_TRY {
     status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
-                                                 initializer_size_threshold);
+                                                 initializer_size_threshold,
+                                                 align_offset, align_threshold,
+                                                 allocation_granularity);
   }
   ORT_CATCH(const std::exception& ex) {
     ORT_HANDLE_EXCEPTION([&]() {
@@ -642,8 +653,12 @@ Status Model::Load(const PathString& file_path, std::shared_ptr<Model>& p_model,
 
 Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
                                            const std::filesystem::path& external_file_name,
-                                           size_t initializer_size_threshold) {
-  return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
+                                           size_t initializer_size_threshold,
+                                           bool align_offset,
+                                           size_t align_threshold,
+                                           size_t allocation_granularity) {
+  return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold,
+                                           align_offset, align_threshold, allocation_granularity);
 }
 
 Status Model::LoadFromBytes(int count, void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) {
@@ -759,7 +774,10 @@ Status Model::SaveWithExternalInitializers(Model& model,
                                            int fd,
                                            const std::filesystem::path& file_path,
                                            const std::filesystem::path& external_file_name,
-                                           size_t initializer_size_threshold) {
+                                           size_t initializer_size_threshold,
+                                           bool align_offset,
+                                           size_t align_threshold,
+                                           size_t allocation_granularity) {
   if (fd < 0) {
     return Status(ONNXRUNTIME, INVALID_ARGUMENT, "<fd> is less than 0.");
   }
@@ -767,7 +785,9 @@ Status Model::SaveWithExternalInitializers(Model& model,
   ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());
 
   auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path,
-                                                                initializer_size_threshold);
+                                                                initializer_size_threshold,
+                                                                align_offset, align_threshold,
+                                                                allocation_granularity);
   google::protobuf::io::FileOutputStream output(fd);
   const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
   if (result) {
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index 728af727ac83b..49ab0882147f5 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -187,25 +187,36 @@ class Model {
   // Get model's serialization proto data.
   // Save initializer larger than the given threshold (in bytes) into an external binary file
   // with the given name. This function is useful to avoid hitting the size limit of protobuf files.
+  // initializer offset could be page aligned and allocation granularity aligned for mmap support. 
   ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
                                                                   const std::filesystem::path& file_path,
-                                                                  size_t initializer_size_threshold) const;
+                                                                  size_t initializer_size_threshold,
+                                                                  bool align_offset = FALSE,
+                                                                  size_t align_threshold = 1048576,
+                                                                  size_t allocation_granularity = 65536) const;
 
   static common::Status Save(Model& model, const PathString& file_path);
 
   static common::Status Save(Model& model, int fd);
 
   // Save the model to file using an external file for initializers larger than the given threshold (in bytes).
+  // Initializer offset could be page aligned and allocation granularity aligned for mmap support.
   static common::Status SaveWithExternalInitializers(Model& model,
                                                      const std::filesystem::path& file_path,
                                                      const std::filesystem::path& external_file_path,
-                                                     size_t initializer_size_threshold);
+                                                     size_t initializer_size_threshold,
+                                                     bool align_offset = FALSE,
+                                                     size_t align_threshold = 1048576,
+                                                     size_t allocation_granularity = 65536);
 
   static common::Status SaveWithExternalInitializers(Model& model,
                                                      int fd,
                                                      const std::filesystem::path& file_path,
                                                      const std::filesystem::path& external_file_path,
-                                                     size_t initializer_size_threshold);
+                                                     size_t initializer_size_threshold,
+                                                     bool align_offset = FALSE,
+                                                     size_t align_threshold = 1048576,
+                                                     size_t allocation_granularity = 65536);
 
   static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto);
 
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 5ad2f08467792..e7af1b09da96c 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -2055,7 +2055,8 @@ common::Status InferenceSession::Initialize() {
           ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_,
                                                                              session_options_.optimized_model_filepath,
                                                                              optimized_model_external_initializers_file_name,
-                                                                             optimized_model_external_initializers_min_size_in_bytes));
+                                                                             optimized_model_external_initializers_min_size_in_bytes,
+                                                                             TRUE));
         }
       }
     }
diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index 447b0edef879b..3ea66e55dce84 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -23,13 +23,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
                                const std::filesystem::path& input_external_init_file,
                                const std::filesystem::path& output_onnx,
                                const std::filesystem::path& output_external_init_file,
-                               size_t initializer_size_threshold) {
+                               size_t initializer_size_threshold,
+                               bool align_offset = false,
+                               size_t align_threshold = 1,
+                               size_t allocation_granularity = 4096) {
   auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel");
   std::shared_ptr<Model> model;
   ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger));
   std::filesystem::remove(output_onnx);
   std::filesystem::remove(output_external_init_file);
-  ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold));
+  ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold,
+                                                          align_offset, align_threshold, allocation_granularity));
 
   std::shared_ptr<Model> model_from_external;
   ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger));
@@ -75,6 +79,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
 
     ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");
     ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch");
+
+    if (align_offset) {
+      for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) {
+        if (entry.has_key() && entry.has_value() && entry.key() == "offset") {
+          size_t tensor_offset;
+          std::stringstream stream(entry.value());
+          stream >> tensor_offset;
+          ORT_RETURN_IF_NOT(tensor_offset % allocation_granularity == 0, "tensor offset not align");
+        }
+      }
+    }
   }
   // Cleanup.
   ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed");
@@ -92,5 +107,10 @@ TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) {
   ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0));
 }
 
+// Original model has external initializers, align offset
+TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) {
+  ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, true));
+}
+
 }  // namespace test
 }  // namespace onnxruntime