From abfc34461cadc761dfead6b07da0bfbd6056a3eb Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 17:17:55 +0000
Subject: [PATCH 01/50] a

---
 include/onnxruntime/core/graph/graph.h        |  20 +---
 include/onnxruntime/core/graph/graph_viewer.h |   2 +-
 .../core/framework/graph_partitioner.cc       |   9 +-
 .../framework/model_metadef_id_generator.cc   |   2 +-
 onnxruntime/core/framework/node_unit.cc       |   2 +-
 onnxruntime/core/framework/node_unit.h        |   3 +-
 onnxruntime/core/framework/session_options.h  |   2 +-
 .../core/framework/tensorprotoutils.cc        | 109 ++++++++----------
 onnxruntime/core/framework/tensorprotoutils.h |  21 ++--
 onnxruntime/core/graph/graph.cc               |  21 ++--
 .../core/graph/graph_flatbuffers_utils.cc     |   6 +-
 .../core/graph/graph_flatbuffers_utils.h      |   7 +-
 onnxruntime/core/graph/model.cc               |   8 +-
 onnxruntime/core/graph/model.h                |  10 +-
 onnxruntime/core/optimizer/initializer.cc     |   6 +-
 onnxruntime/core/optimizer/initializer.h      |   4 +-
 .../core/optimizer/matmul_scale_fusion.cc     |   2 +-
 .../optimizer/optimizer_execution_frame.cc    |   9 +-
 .../optimizer/optimizer_execution_frame.h     |   4 +-
 .../optimizer/qdq_transformer/qdq_util.cc     |   4 +-
 .../core/optimizer/qdq_transformer/qdq_util.h |   5 +-
 .../ort_optimizer_api_impl.cc                 |   9 +-
 .../core/providers/cpu/ml/label_encoder.h     |   4 +-
 .../openvino/openvino_execution_provider.cc   |  10 +-
 .../provider_bridge_provider.cc               |   2 +-
 .../shared_library/provider_interfaces.h      |   9 +-
 .../shared_library/provider_wrappedtypes.h    |   6 +-
 onnxruntime/core/session/custom_ops.cc        |   3 +-
 onnxruntime/core/session/inference_session.cc |   4 +-
 onnxruntime/core/session/inference_session.h  |   2 +-
 .../core/session/provider_bridge_ort.cc       |   8 +-
 .../test/flatbuffers/flatbuffer_utils_test.cc |   2 +-
 .../save_model_with_external_initializers.cc  |  51 ++++----
 .../test/framework/sparse_kernels_test.cc     |  92 +++++++--------
 .../test/framework/tensorutils_test.cc        |  16 +--
 .../test/framework/test_tensor_loader.cc      |  10 +-
 .../test/optimizer/initializer_test.cc        |  60 +++++-----
 onnxruntime/test/util/test_utils.cc           |   2 +-
 38 files changed, 268 insertions(+), 278 deletions(-)
diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index 4f3377f0aa0c0..ae57dd712fd68 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -10,16 +10,7 @@
 #include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
-
-#ifdef _WIN32
-#pragma warning(push)
-// disable some warnings from protobuf to pass Windows build
-#pragma warning(disable : 4244)
-#endif
-
-#ifdef _WIN32
-#pragma warning(pop)
-#endif
+#include <filesystem>
 
 #include "core/common/flatbuffers.h"
 
@@ -147,7 +138,7 @@ class Node {
   const std::string& Domain() const noexcept { return domain_; }
 
   /** Gets the path of the owning model if any. */
-  const Path& ModelPath() const noexcept;
+  const std::filesystem::path& ModelPath() const noexcept;
 
   /** Gets the Node's execution priority.
   @remarks Lower value means higher priority  */
@@ -693,7 +684,7 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   const std::string& Description() const noexcept;
 
   /** Gets the path of the owning model, if any. */
-  const Path& ModelPath() const;
+  const std::filesystem::path& ModelPath() const;
 
   /** Returns true if this is a subgraph or false if it is a high-level graph. */
   bool IsSubgraph() const { return parent_graph_ != nullptr; }
@@ -1149,13 +1140,14 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   ONNX_NAMESPACE::GraphProto ToGraphProto() const;
 
   /** Gets the GraphProto representation of this Graph
-  @params external_file_name name of the binary file to use for initializers
+  @params external_file_name name of the binary file to use for initializers. Must be a UTF-8 string.
+  @params destination_file_path path of the model file.
   @param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
   in the external file. Initializer smaller than this threshold are included in the onnx file.
   @returns GraphProto serialization of the graph.
   */
   ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
-                                                                  const PathString& file_path,
+                                                                  const std::filesystem::path& file_path,
                                                                   size_t initializer_size_threshold) const;
 
   /** Gets the ISchemaRegistry instances being used with this Graph. */
diff --git a/include/onnxruntime/core/graph/graph_viewer.h b/include/onnxruntime/core/graph/graph_viewer.h
index 1816099d3210f..e904c244c0d19 100644
--- a/include/onnxruntime/core/graph/graph_viewer.h
+++ b/include/onnxruntime/core/graph/graph_viewer.h
@@ -43,7 +43,7 @@ class GraphViewer {
   const std::string& Description() const noexcept;
 
   /** Gets the path of the owning model if any **/
-  const Path& ModelPath() const noexcept { return graph_->ModelPath(); }
+  const std::filesystem::path& ModelPath() const noexcept { return graph_->ModelPath(); }
 
   /**
   Gets a tensor created from an initializer.
diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index 90ee8a46f66a9..ea4f71440d0f8 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -637,7 +637,7 @@ static Status InlineFunctionsAOTImpl(const ExecutionProviders& execution_provide
 
 static Status CreateEpContextModel(const ExecutionProviders& execution_providers,
                                    const Graph& graph,
-                                   const std::string& ep_context_path,
+                                   const std::filesystem::path& ep_context_path,
                                    const logging::Logger& logger) {
   InlinedVector<const Node*> all_ep_context_nodes;
   for (const auto& ep : execution_providers) {
@@ -658,13 +658,14 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
     return std::make_pair(false, static_cast<const Node*>(nullptr));
   };
 
-  onnxruntime::PathString context_cache_path;
-  PathString model_pathstring = graph.ModelPath().ToPathString();
+  std::filesystem::path context_cache_path;
+  const std::filesystem::path& model_pathstring = graph.ModelPath();
 
   if (!ep_context_path.empty()) {
+    // On Windows here we explicitly cast the ep_context_path string to UTF-16 because we assume ep_context_path is in UTF-8
     context_cache_path = ToPathString(ep_context_path);
   } else if (!model_pathstring.empty()) {
-    context_cache_path = model_pathstring + ToPathString("_ctx.onnx");
+    context_cache_path = model_pathstring / ORT_TSTR("_ctx.onnx");
   }
 
   {
diff --git a/onnxruntime/core/framework/model_metadef_id_generator.cc b/onnxruntime/core/framework/model_metadef_id_generator.cc
index e51c6ebc29975..c3f7382e6ef7e 100644
--- a/onnxruntime/core/framework/model_metadef_id_generator.cc
+++ b/onnxruntime/core/framework/model_metadef_id_generator.cc
@@ -40,7 +40,7 @@ int ModelMetadefIdGenerator::GenerateId(const onnxruntime::GraphViewer& graph_vi
 
     // prefer path the model was loaded from
     // this may not be available if the model was loaded from a stream or in-memory bytes
-    const auto& model_path_str = main_graph.ModelPath().ToPathString();
+    const auto& model_path_str = main_graph.ModelPath().string();
     if (!model_path_str.empty()) {
       MurmurHash3::x86_128(model_path_str.data(), gsl::narrow_cast<int32_t>(model_path_str.size()), hash[0], &hash);
     } else {
diff --git a/onnxruntime/core/framework/node_unit.cc b/onnxruntime/core/framework/node_unit.cc
index 174942b9033d0..2bcacebfdec06 100644
--- a/onnxruntime/core/framework/node_unit.cc
+++ b/onnxruntime/core/framework/node_unit.cc
@@ -277,7 +277,7 @@ const std::string& NodeUnit::OpType() const noexcept { return target_node_.OpTyp
 const std::string& NodeUnit::Name() const noexcept { return target_node_.Name(); }
 int NodeUnit::SinceVersion() const noexcept { return target_node_.SinceVersion(); }
 NodeIndex NodeUnit::Index() const noexcept { return target_node_.Index(); }
-const Path& NodeUnit::ModelPath() const noexcept { return target_node_.ModelPath(); }
+const std::filesystem::path& NodeUnit::ModelPath() const noexcept { return target_node_.ModelPath(); }
 ProviderType NodeUnit::GetExecutionProviderType() const noexcept { return target_node_.GetExecutionProviderType(); }
 
 void NodeUnit::InitForSingleNode() {
diff --git a/onnxruntime/core/framework/node_unit.h b/onnxruntime/core/framework/node_unit.h
index a168495f12ebf..e84e62479162f 100644
--- a/onnxruntime/core/framework/node_unit.h
+++ b/onnxruntime/core/framework/node_unit.h
@@ -9,6 +9,7 @@
 #include <string>
 #include <optional>
 #include <vector>
+#include <filesystem>
 
 #include "core/graph/basic_types.h"
 #include "core/graph/graph.h"
@@ -78,7 +79,7 @@ class NodeUnit {
   const std::string& Name() const noexcept;
   int SinceVersion() const noexcept;
   NodeIndex Index() const noexcept;
-  const Path& ModelPath() const noexcept;
+  const std::filesystem::path& ModelPath() const noexcept;
   ProviderType GetExecutionProviderType() const noexcept;
 
   const Node& GetNode() const noexcept { return target_node_; }
diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
index 0453a7ecac81f..bc277ef24a9fd 100644
--- a/onnxruntime/core/framework/session_options.h
+++ b/onnxruntime/core/framework/session_options.h
@@ -89,7 +89,7 @@ struct SessionOptions {
   //
   // If session config value is not set, it will be assumed to be ONNX
   // unless the filepath ends in '.ort' (case insensitive).
-  std::basic_string<ORTCHAR_T> optimized_model_filepath;
+  std::filesystem::path optimized_model_filepath;
 
   // enable the memory pattern optimization.
   // The idea is if the input shapes are the same, we could trace the internal memory allocation
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 6af78f18fb82f..4426cc788e9a9 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -6,7 +6,7 @@
 #include <memory>
 #include <algorithm>
 #include <limits>
-
+#include <filesystem>
 #if defined(__wasm__)
 #include <emscripten.h>
 #endif
@@ -165,7 +165,7 @@ DEFINE_INT4_UNPACK_TENSOR_WITH_RAW_DATA_IMPL(Int4x2)
 DEFINE_INT4_UNPACK_TENSOR_WITH_RAW_DATA_IMPL(UInt4x2)
 
 static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                                  const ORTCHAR_T* tensor_proto_dir,
+                                  const std::filesystem::path& tensor_proto_dir,
                                   std::basic_string<ORTCHAR_T>& external_file_path,
                                   onnxruntime::FileOffsetType& file_offset,
                                   SafeInt<size_t>& tensor_byte_size) {
@@ -180,16 +180,7 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 
   const auto& location = external_data_info->GetRelPath();
 
-  if (location == onnxruntime::utils::kTensorProtoMemoryAddressTag) {
-    external_file_path = location;
-  } else {
-    if (tensor_proto_dir != nullptr) {
-      external_file_path = onnxruntime::ConcatPathComponent(tensor_proto_dir,
-                                                            external_data_info->GetRelPath());
-    } else {
-      external_file_path = external_data_info->GetRelPath();
-    }
-  }
+  external_file_path = location == onnxruntime::utils::kTensorProtoMemoryAddressTag ? std::filesystem::path(location) : (tensor_proto_dir / location);
 
   ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &tensor_byte_size));
   const size_t external_data_length = external_data_info->GetLength();
@@ -207,7 +198,7 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 // then uses the current directory instead.
 // This function does not unpack string_data of an initializer tensor
 Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                                 const ORTCHAR_T* tensor_proto_dir,
+                                 const std::filesystem::path& tensor_proto_dir,
                                  std::vector<uint8_t>& unpacked_tensor) {
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
@@ -231,7 +222,7 @@ Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto
 
 // TODO(unknown): Change the current interface to take Path object for model path
 // so that validating and manipulating path for reading external data becomes easy
-Status TensorProtoToOrtValueImpl(const Env& env, const ORTCHAR_T* model_path,
+Status TensorProtoToOrtValueImpl(const Env& env, const std::filesystem::path& model_path,
                                  const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                  const MemBuffer* m, AllocatorPtr alloc,
                                  OrtValue& value) {
@@ -276,7 +267,7 @@ namespace utils {
 
 #if !defined(ORT_MINIMAL_BUILD)
 static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto& tensor,
-                                               const ORTCHAR_T* tensor_proto_dir,
+                                               const std::filesystem::path& tensor_proto_dir,
                                                size_t expected_num_elements, size_t element_size,
                                                /*out*/ unsigned char* p_data) {
   ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");
@@ -292,7 +283,7 @@ static Status UnpackTensorWithExternalDataImpl(const ONNX_NAMESPACE::TensorProto
 
 template <typename T>
 Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
-                                    const ORTCHAR_T* tensor_proto_dir, size_t expected_num_elements,
+                                    const std::filesystem::path& tensor_proto_dir, size_t expected_num_elements,
                                     /*out*/ T* p_data) {
   static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
 
@@ -300,34 +291,34 @@ Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
                                           reinterpret_cast<unsigned char*>(p_data));
 }
 
-#define DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(INT4_TYPE)                                                      \
-  template <>                                                                                                        \
-  Status UnpackTensorWithExternalData<INT4_TYPE>(const ONNX_NAMESPACE::TensorProto& tensor,                          \
-                                                 const ORTCHAR_T* tensor_proto_dir, size_t expected_num_elements,    \
-                                                 /*out*/ INT4_TYPE* p_data) {                                        \
-    static_assert(std::is_trivially_copyable<INT4_TYPE>::value, "T must be trivially copyable");                     \
-                                                                                                                     \
-    ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");                                                           \
-    std::vector<uint8_t> unpacked_tensor;                                                                            \
-    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));                       \
-                                                                                                                     \
-    size_t num_packed_pairs = INT4_TYPE::CalcNumInt4Pairs(expected_num_elements);                                    \
-    ORT_RETURN_IF_NOT(num_packed_pairs == unpacked_tensor.size(), "Unexpected number of packed int4 pairs");         \
-                                                                                                                     \
-    gsl::span<const INT4_TYPE> src_span = gsl::make_span(reinterpret_cast<const INT4_TYPE*>(unpacked_tensor.data()), \
-                                                         num_packed_pairs);                                          \
-    gsl::span<INT4_TYPE> dst_span = gsl::make_span(p_data, expected_num_elements);                                   \
-                                                                                                                     \
-    std::memcpy(dst_span.data(), src_span.data(), num_packed_pairs);                                                 \
-                                                                                                                     \
-    return Status::OK();                                                                                             \
+#define DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(INT4_TYPE)                                                               \
+  template <>                                                                                                                 \
+  Status UnpackTensorWithExternalData<INT4_TYPE>(const ONNX_NAMESPACE::TensorProto& tensor,                                   \
+                                                 const std::filesystem::path& tensor_proto_dir, size_t expected_num_elements, \
+                                                 /*out*/ INT4_TYPE* p_data) {                                                 \
+    static_assert(std::is_trivially_copyable<INT4_TYPE>::value, "T must be trivially copyable");                              \
+                                                                                                                              \
+    ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");                                                                    \
+    std::vector<uint8_t> unpacked_tensor;                                                                                     \
+    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));                                \
+                                                                                                                              \
+    size_t num_packed_pairs = INT4_TYPE::CalcNumInt4Pairs(expected_num_elements);                                             \
+    ORT_RETURN_IF_NOT(num_packed_pairs == unpacked_tensor.size(), "Unexpected number of packed int4 pairs");                  \
+                                                                                                                              \
+    gsl::span<const INT4_TYPE> src_span = gsl::make_span(reinterpret_cast<const INT4_TYPE*>(unpacked_tensor.data()),          \
+                                                         num_packed_pairs);                                                   \
+    gsl::span<INT4_TYPE> dst_span = gsl::make_span(p_data, expected_num_elements);                                            \
+                                                                                                                              \
+    std::memcpy(dst_span.data(), src_span.data(), num_packed_pairs);                                                          \
+                                                                                                                              \
+    return Status::OK();                                                                                                      \
   }
 
 DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(Int4x2)
 DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(UInt4x2)
 
 #define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type) \
-  template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const ORTCHAR_T*, size_t, type*);
+  template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const std::filesystem::path&, size_t, type*);
 
 INSTANTIATE_UNPACK_EXTERNAL_TENSOR(float)
 INSTANTIATE_UNPACK_EXTERNAL_TENSOR(double)
@@ -352,7 +343,7 @@ INSTANTIATE_UNPACK_EXTERNAL_TENSOR(Float8E5M2FNUZ)
 
 template <>
 Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& /*tensor*/,
-                                    const ORTCHAR_T* /*tensor_proto_dir*/, size_t /*expected_num_elements*/,
+                                    const std::filesystem::path& /*tensor_proto_dir*/, size_t /*expected_num_elements*/,
                                     /*out*/ std::string* /*p_data*/) {
   return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "External data type cannot be STRING.");
 }
@@ -705,13 +696,13 @@ DEFINE_INT4_UNPACK_TENSOR_IMPL(UInt4x2, TensorProto_DataType_UINT4)
 // Uses the model path to construct the full path for loading external data. In case when model_path is empty
 // it uses current directory.
 template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesystem::path& model_path,
                     /*out*/ T* p_data, size_t expected_num_elements) {
 #if !defined(ORT_MINIMAL_BUILD)
   if (HasExternalData(tensor)) {
     return UnpackTensorWithExternalData(
         tensor,
-        model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str(),
+        model_path.empty() ? std::filesystem::path() : model_path.parent_path(),
         expected_num_elements,
         p_data);
   }
@@ -727,7 +718,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
 
 // instantiate the UnpackTensor variant that supports external data
 #define INSTANTIATE_UNPACK_TENSOR(type) \
-  template Status UnpackTensor(const ONNX_NAMESPACE::TensorProto&, const Path&, type* p_data, size_t);
+  template Status UnpackTensor(const ONNX_NAMESPACE::TensorProto&, const std::filesystem::path&, type* p_data, size_t);
 
 INSTANTIATE_UNPACK_TENSOR(float)
 INSTANTIATE_UNPACK_TENSOR(double)
@@ -885,17 +876,17 @@ static void DeleteCharArray(void* param) noexcept {
 
 #if !defined(__wasm__)
 static Status GetFileContent(
-    const Env& env, const ORTCHAR_T* file_path, FileOffsetType offset, size_t length,
+    const Env& env, const std::filesystem::path& file_path, FileOffsetType offset, size_t length,
     void*& raw_buffer, OrtCallback& deleter) {
   // query length if it is 0
   if (length == 0) {
-    ORT_RETURN_IF_ERROR(env.GetFileLength(file_path, length));
+    length = std::filesystem::file_size(file_path);
   }
 
   // first, try to map into memory
   {
     Env::MappedMemoryPtr mapped_memory{};
-    auto status = env.MapFileIntoMemory(file_path, offset, length, mapped_memory);
+    auto status = env.MapFileIntoMemory(file_path.native().c_str(), offset, length, mapped_memory);
     if (status.IsOK()) {
       deleter = mapped_memory.get_deleter().callback;
       raw_buffer = mapped_memory.release();
@@ -906,7 +897,7 @@ static Status GetFileContent(
   // if that fails, try to copy
   auto buffer = std::make_unique<char[]>(length);
   ORT_RETURN_IF_ERROR(env.ReadFileIntoBuffer(
-      file_path, offset, length, gsl::make_span(buffer.get(), length)));
+      file_path.native().c_str(), offset, length, gsl::make_span(buffer.get(), length)));
 
   deleter = OrtCallback{DeleteCharArray, buffer.get()};
   raw_buffer = buffer.release();
@@ -914,12 +905,12 @@ static Status GetFileContent(
 }
 #endif
 
-Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path,
+Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& model_path,
                                  const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                  void*& ext_data_buf, SafeInt<size_t>& ext_data_len, OrtCallback& ext_data_deleter) {
   ORT_ENFORCE(utils::HasExternalData(tensor_proto));
   std::basic_string<ORTCHAR_T> tensor_proto_dir;
-  if (model_path != nullptr) {
+  if (!model_path.empty()) {
     ORT_RETURN_IF_ERROR(GetDirNameFromFilePath(model_path, tensor_proto_dir));
   }
   const ORTCHAR_T* t_prot_dir_s = tensor_proto_dir.size() == 0 ? nullptr : tensor_proto_dir.c_str();
@@ -1036,7 +1027,7 @@ Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path,
  * @param tensor        pre-allocated tensor object, where we store the data
  * @return
  */
-Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
+Status TensorProtoToTensor(const Env& env, const std::filesystem::path& model_path,
                            const ONNX_NAMESPACE::TensorProto& tensor_proto,
                            Tensor& tensor) {
   // Validate tensor compatibility
@@ -1125,13 +1116,13 @@ Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
   return Status::OK();
 }
 
-Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* model_path,
+Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path& model_path,
                              const ONNX_NAMESPACE::TensorProto& tensor_proto,
                              const MemBuffer& m, OrtValue& value) {
   return TensorProtoToOrtValueImpl(env, model_path, tensor_proto, &m, nullptr, value);
 }
 
-Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* model_path,
+Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path& model_path,
                              const ONNX_NAMESPACE::TensorProto& tensor_proto,
                              AllocatorPtr alloc, OrtValue& value) {
   return TensorProtoToOrtValueImpl(env, model_path, tensor_proto, nullptr, alloc, value);
@@ -1207,7 +1198,7 @@ ONNX_NAMESPACE::TensorProto TensorToTensorProto(const Tensor& tensor, const std:
 }
 
 common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
-                                              const Path& model_path,
+                                              const std::filesystem::path& model_path,
                                               ONNX_NAMESPACE::TensorProto& tensor, const std::string& tensor_name) {
   ORT_RETURN_IF_NOT(node.attribute_size() > 0, "Constant node: ", node.name(), " has no data attributes");
 
@@ -1266,7 +1257,7 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
 }
 
 common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
-                                              const Path& model_path,
+                                              const std::filesystem::path& model_path,
                                               ONNX_NAMESPACE::TensorProto& tensor) {
   return ConstantNodeProtoToTensorProto(node, model_path, tensor, node.output(0));
 }
@@ -1274,7 +1265,7 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
 #if !defined(DISABLE_SPARSE_TENSORS)
 static Status CopySparseData(size_t n_sparse_elements,
                              const ONNX_NAMESPACE::TensorProto& indices,
-                             const Path& model_path,
+                             const std::filesystem::path& model_path,
                              gsl::span<const int64_t> dims,
                              std::function<void(size_t from_idx, size_t to_idx)> copier) {
   Status status = Status::OK();
@@ -1393,7 +1384,7 @@ static Status CopySparseData(size_t n_sparse_elements,
 }
 
 common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
-                                                   const Path& model_path,
+                                                   const std::filesystem::path& model_path,
                                                    ONNX_NAMESPACE::TensorProto& dense) {
   Status status = Status::OK();
 
@@ -1591,7 +1582,7 @@ static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements,
 }
 
 common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense_proto,
-                                              const Path& model_path,
+                                              const std::filesystem::path& model_path,
                                               ONNX_NAMESPACE::SparseTensorProto& result) {
   ORT_ENFORCE(HasDataType(dense_proto), "Must have a valid data type");
 
@@ -1699,7 +1690,7 @@ template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::T
   }
 
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
-                             const Path& model_path,
+                             const std::filesystem::path& model_path,
                              std::vector<uint8_t>& unpacked_tensor) {
   // TODO, if std::vector does not use a custom allocator, the default std::allocator will
   // allocation the memory aligned to std::max_align_t, need look into allocating
@@ -1707,7 +1698,7 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
   if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
     ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
         initializer,
-        (model_path.IsEmpty() || model_path.ParentPath().IsEmpty()) ? nullptr : model_path.ParentPath().ToPathString().c_str(),
+        (model_path.empty() || model_path.parent_path().empty()) ? std::filesystem::path() : model_path.parent_path(),
         unpacked_tensor));
     return Status::OK();
   }
@@ -1746,7 +1737,7 @@ Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
                              std::vector<uint8_t>& unpacked_tensor) {
   ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
                 "The given initializer contains external data");
-  return UnpackInitializerData(initializer, Path(), unpacked_tensor);
+  return UnpackInitializerData(initializer, std::filesystem::path(), unpacked_tensor);
 }
 
 }  // namespace utils
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 000502ba47594..552dd465ef4b5 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -18,6 +18,7 @@
 #include "core/framework/tensor_external_data_info.h"
 #include "core/graph/onnx_protobuf.h"
 #include "core/platform/env.h"
+#include <filesystem>
 
 namespace ONNX_NAMESPACE {
 class TensorProto;
@@ -45,7 +46,7 @@ TensorShape GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorProto& ten
  *                          the current working dir. This path could be either a relative path or an absolute path.
  * \return Status::OK on success with 'value' containing the Tensor in CPU based memory.
  */
-common::Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
+common::Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path& tensor_proto_path,
                                      const ONNX_NAMESPACE::TensorProto& input,
                                      const MemBuffer& m, OrtValue& value);
 
@@ -57,7 +58,7 @@ common::Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* tensor_pro
  * \param alloc             Allocator to use for allocating the buffer. Must allocate CPU based memory.
  * \return Status::OK on success with 'value' containing the Tensor in CPU based memory.
  */
-common::Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* tensor_proto_path,
+common::Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path& tensor_proto_path,
                                      const ONNX_NAMESPACE::TensorProto& input,
                                      AllocatorPtr alloc, OrtValue& value);
 
@@ -69,7 +70,7 @@ common::Status TensorProtoToOrtValue(const Env& env, const ORTCHAR_T* tensor_pro
  * @param tensorp       destination empty tensor
  * @return
  */
-common::Status TensorProtoToTensor(const Env& env, const ORTCHAR_T* model_path,
+common::Status TensorProtoToTensor(const Env& env, const std::filesystem::path& model_path,
                                    const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                    Tensor& tensor);
 
@@ -100,7 +101,7 @@ constexpr const ORTCHAR_T* kTensorProtoMemoryAddressTag = ORT_TSTR("*/_ORT_MEM_A
 
 // Given a tensor proto with external data obtain a pointer to the data and its length.
 // The ext_data_deleter argument is updated with a callback that owns/releases the data.
-common::Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path,
+common::Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& model_path,
                                          const ONNX_NAMESPACE::TensorProto& tensor_proto,
                                          void*& ext_data_buf, SafeInt<size_t>& ext_data_len,
                                          OrtCallback& ext_data_deleter);
@@ -113,11 +114,11 @@ common::Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_
 // model_path is used for contructing full path for external_data
 // tensor_name specifies the name for the new TensorProto TensorProto
 common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
-                                              const Path& model_path,
+                                              const std::filesystem::path& model_path,
                                               ONNX_NAMESPACE::TensorProto& tensor, const std::string& tensor_name);
 
 common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
-                                              const Path& model_path,
+                                              const std::filesystem::path& model_path,
                                               ONNX_NAMESPACE::TensorProto& tensor);
 
 #if !defined(DISABLE_SPARSE_TENSORS)
@@ -126,7 +127,7 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
 // The resulting TensorProto will contain the data as raw data.
 // model_path is used for contructing full path for external_data
 common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
-                                                   const Path& model_path,
+                                                   const std::filesystem::path& model_path,
                                                    ONNX_NAMESPACE::TensorProto& dense);
 
 #if !defined(ORT_MINIMAL_BUILD)
@@ -135,7 +136,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
 // The resulting SparseTensorProto will contain the data as raw data
 // model_path is used for contructing full path for external_data
 common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense,
-                                              const Path& model_path,
+                                              const std::filesystem::path& model_path,
                                               ONNX_NAMESPACE::SparseTensorProto& sparse);
 #endif  // !ORT_MINIMAL_BUILD
 #endif  // !defined(DISABLE_SPARSE_TENSORS)
@@ -446,7 +447,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
 // Uses the model path to construct the full path for loading external data. In case when model_path is empty
 // it uses current directory.
 template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesystem::path& model_path,
                     /*out*/ T* p_data, size_t expected_size);
 
 /**
@@ -458,7 +459,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
-                                     const Path& model_path,
+                                     const std::filesystem::path& model_path,
                                      std::vector<uint8_t>& unpacked_tensor);
 
 /**
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 0c1d79532f120..f96bdea71f3b9 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -560,7 +560,7 @@ void Node::SetPriority(int priority) noexcept {
   priority_ = priority;
 }
 
-const Path& Node::ModelPath() const noexcept {
+const std::filesystem::path& Node::ModelPath() const noexcept {
   return graph_->ModelPath();
 }
 
@@ -3025,7 +3025,8 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
   ctx.set_opset_imports(DomainToVersionMap());
   ctx.set_schema_registry(schema_registry_.get());
   // Set the parent directory of model path to load external tensors if exist
-  ctx.set_model_dir(ToUTF8String(ModelPath().ParentPath().ToPathString()));
+  // TODO: avoid converting it to a multibyte string
+  ctx.set_model_dir(ModelPath().parent_path().string());
 
   LexicalScopeContext parent;
   if (parent_node_) {
@@ -3370,7 +3371,7 @@ const std::string& Graph::Description() const noexcept {
   return graph_proto_->doc_string();
 }
 
-const Path& Graph::ModelPath() const {
+const std::filesystem::path& Graph::ModelPath() const {
   return owning_model_.ModelPath();
 }
 
@@ -3972,20 +3973,18 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
 }
 
 ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
-                                                                       const PathString& destination_file_path,
+                                                                       const std::filesystem::path& destination_file_path,
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
-
-  Path parent_path = Path::Parse(destination_file_path).ParentPath();
-  Path external_file_path = Path::Parse(ToPathString(external_file_name));
-  // Check if parent_path is relative path (length = 0)
-  if (parent_path.ToPathString().length()) {
+  std::filesystem::path external_file_path = ToPathString(external_file_name);
+  // If destination_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could be empty.
+  if (destination_file_path.has_parent_path()) {
     // Save external data file in same directory as model
-    external_file_path = parent_path.Append(external_file_path);
+    external_file_path = destination_file_path.parent_path() / external_file_path;
   }
 
-  std::ofstream external_stream(external_file_path.ToPathString(), std::ofstream::out | std::ofstream::binary);
+  std::ofstream external_stream(external_file_path.native(), std::ofstream::out | std::ofstream::binary);
   ORT_ENFORCE(external_stream.is_open());
   int64_t external_offset = 0;
 
diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.cc b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
index 7dfdba687517f..922759b02e75f 100644
--- a/onnxruntime/core/graph/graph_flatbuffers_utils.cc
+++ b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
@@ -28,7 +28,7 @@ SaveDims(flatbuffers::FlatBufferBuilder& builder, const DimsFieldType& dims) {
 
 Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
                                 const TensorProto& initializer,
-                                const Path& model_path,
+                                const std::filesystem::path& model_path,
                                 flatbuffers::Offset<fbs::Tensor>& fbs_tensor,
                                 const ExternalDataWriter& external_writer) {
   auto name = SaveStringToOrtFormat(builder, initializer.has_name(), initializer.name());
@@ -85,7 +85,7 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
 #if !defined(DISABLE_SPARSE_TENSORS)
 Status SaveSparseInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
                                       const ONNX_NAMESPACE::SparseTensorProto& initializer,
-                                      const Path& model_path,
+                                      const std::filesystem::path& model_path,
                                       flatbuffers::Offset<fbs::SparseTensor>& fbs_sparse_tensor) {
   // values
   const auto& values = initializer.values();
@@ -126,7 +126,7 @@ Status SaveSparseInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
 Status SaveAttributeOrtFormat(flatbuffers::FlatBufferBuilder& builder,
                               const AttributeProto& attr_proto,
                               flatbuffers::Offset<fbs::Attribute>& fbs_attr,
-                              const Path& model_path,
+                              const std::filesystem::path& model_path,
                               const onnxruntime::Graph* subgraph) {
   auto name = SaveStringToOrtFormat(builder, attr_proto.has_name(), attr_proto.name());
   auto doc_string = SaveStringToOrtFormat(builder, attr_proto.has_doc_string(), attr_proto.doc_string());
diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.h b/onnxruntime/core/graph/graph_flatbuffers_utils.h
index 33eba34fbaff0..224d966500e18 100644
--- a/onnxruntime/core/graph/graph_flatbuffers_utils.h
+++ b/onnxruntime/core/graph/graph_flatbuffers_utils.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <memory>
+#include <filesystem>
 
 #include "core/common/flatbuffers.h"
 
@@ -71,13 +72,13 @@ constexpr uint32_t kMinimumSizeForExternalData = 64;
 /// if the initializer contains kMinimumSizeForExternalData bytes or more, and not string data.</param>
 Status SaveInitializerOrtFormat(
     flatbuffers::FlatBufferBuilder& builder, const ONNX_NAMESPACE::TensorProto& initializer,
-    const Path& model_path, flatbuffers::Offset<fbs::Tensor>& fbs_tensor,
+    const std::filesystem::path& model_path, flatbuffers::Offset<fbs::Tensor>& fbs_tensor,
     const ExternalDataWriter& external_writer = nullptr);
 
 #if !defined(DISABLE_SPARSE_TENSORS)
 Status SaveSparseInitializerOrtFormat(
     flatbuffers::FlatBufferBuilder& builder, const ONNX_NAMESPACE::SparseTensorProto& initializer,
-    const Path& model_path, flatbuffers::Offset<fbs::SparseTensor>& fbs_sparse_tensor);
+    const std::filesystem::path& model_path, flatbuffers::Offset<fbs::SparseTensor>& fbs_sparse_tensor);
 #endif  // !defined(DISABLE_SPARSE_TENSORS)
 
 // Convert a given AttributeProto into fbs::Attribute
@@ -86,7 +87,7 @@ Status SaveSparseInitializerOrtFormat(
 //       instead of the GraphProto in attr_proto
 Status SaveAttributeOrtFormat(
     flatbuffers::FlatBufferBuilder& builder, const ONNX_NAMESPACE::AttributeProto& attr_proto,
-    flatbuffers::Offset<fbs::Attribute>& fbs_attr, const Path& model_path,
+    flatbuffers::Offset<fbs::Attribute>& fbs_attr, const std::filesystem::path& model_path,
     const onnxruntime::Graph* subgraph);
 
 /// <summary>
diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index b3935e69ad7b1..d93a92d59dea5 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -81,7 +81,7 @@ Model::Model(const std::string& graph_name,
              const std::vector<ONNX_NAMESPACE::FunctionProto>& model_local_functions,
              const logging::Logger& logger,
              const ModelOptions& options)
-    : model_path_(Path::Parse(model_path)) {
+    : model_path_(model_path) {
   model_proto_.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
   model_proto_.mutable_graph()->set_name(graph_name);
   model_metadata_ = model_metadata;
@@ -159,7 +159,7 @@ Model::Model(const ModelProto& model_proto, const PathString& model_path,
 Model::Model(ModelProto&& model_proto, const PathString& model_path,
              const IOnnxRuntimeOpSchemaRegistryList* local_registries,
              const logging::Logger& logger, const ModelOptions& options)
-    : model_path_(Path::Parse(model_path)) {
+    : model_path_(model_path) {
   if (!utils::HasGraph(model_proto)) {
     ORT_THROW("ModelProto does not have a graph.");
   }
@@ -642,7 +642,7 @@ Status Model::Save(Model& model, const std::string& file_path) {
   return SaveModel(model, file_path);
 }
 
-Status Model::SaveWithExternalInitializers(Model& model, const PathString& file_path,
+Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
                                            const std::string& external_file_name,
                                            size_t initializer_size_threshold) {
   return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
@@ -759,7 +759,7 @@ Status Model::Save(Model& model, int p_fd) {
 
 Status Model::SaveWithExternalInitializers(Model& model,
                                            int fd,
-                                           const PathString& file_path,
+                                           const std::filesystem::path& file_path,
                                            const std::string& external_file_name,
                                            size_t initializer_size_threshold) {
   if (fd < 0) {
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index 6f4b7f4f9f00b..db18d56364679 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -174,7 +174,7 @@ class Model {
   const ModelMetaData& MetaData() const noexcept;
 
   // Gets the path from which the model was loaded, if any.
-  const Path& ModelPath() const noexcept { return model_path_; }
+  const std::filesystem::path& ModelPath() const noexcept { return model_path_; }
 
   // Get model's main graph.
   Graph& MainGraph() noexcept;
@@ -199,14 +199,16 @@ class Model {
   static common::Status Save(Model& model, int fd);
 
   // Save the model to file using an external file for initializers larger than the given threshold (in bytes).
+  // external_file_name must be a UTF-8 string
   static common::Status SaveWithExternalInitializers(Model& model,
-                                                     const PathString& file_path,
+                                                     const std::filesystem::path& file_path,
                                                      const std::string& external_file_name,
                                                      size_t initializer_size_threshold);
 
+  // external_file_name must be a UTF-8 string
   static common::Status SaveWithExternalInitializers(Model& model,
                                                      int fd,
-                                                     const PathString& file_path,
+                                                     const std::filesystem::path& file_path,
                                                      const std::string& external_file_name,
                                                      size_t initializer_size_threshold);
 
@@ -332,7 +334,7 @@ class Model {
   ModelMetaData model_metadata_;
 
   // Path to model file. May be empty.
-  const Path model_path_;
+  const std::filesystem::path model_path_;
 
   // Main graph of the model.
   std::unique_ptr<Graph> graph_;
diff --git a/onnxruntime/core/optimizer/initializer.cc b/onnxruntime/core/optimizer/initializer.cc
index 3679a40d32eee..7d80e6e5d3a76 100644
--- a/onnxruntime/core/optimizer/initializer.cc
+++ b/onnxruntime/core/optimizer/initializer.cc
@@ -25,13 +25,13 @@ Initializer::Initializer(ONNX_NAMESPACE::TensorProto_DataType data_type,
   }
 }
 
-Initializer::Initializer(const ONNX_NAMESPACE::TensorProto& tensor_proto, const Path& model_path) {
+Initializer::Initializer(const ONNX_NAMESPACE::TensorProto& tensor_proto, const std::filesystem::path& model_path) {
   ORT_ENFORCE(utils::HasDataType(tensor_proto), "Initializer must have a datatype");
 #if !defined(__wasm__)
   // using full filepath is required by utils::TensorProtoToTensor(). One exception is WebAssembly platform, where
   // external data is not loaded from real file system.
   if (utils::HasExternalData(tensor_proto)) {
-    ORT_ENFORCE(!model_path.IsEmpty(),
+    ORT_ENFORCE(!model_path.empty(),
                 "model_path must not be empty. Ensure that a path is provided when the model is created or loaded.");
   }
 #endif
@@ -46,7 +46,7 @@ Initializer::Initializer(const ONNX_NAMESPACE::TensorProto& tensor_proto, const
   // This must be pre-allocated
   Tensor w(DataTypeImpl::TensorTypeFromONNXEnum(proto_data_type)->GetElementType(), proto_shape,
            std::make_shared<CPUAllocator>());
-  ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), model_path.ToPathString().c_str(), tensor_proto, w));
+  ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), model_path, tensor_proto, w));
   data_ = std::move(w);
 }
 
diff --git a/onnxruntime/core/optimizer/initializer.h b/onnxruntime/core/optimizer/initializer.h
index 78e3fd6a3d24e..b8ae2188beb5d 100644
--- a/onnxruntime/core/optimizer/initializer.h
+++ b/onnxruntime/core/optimizer/initializer.h
@@ -7,7 +7,7 @@
 #include <functional>
 #include <vector>
 #include <cmath>
-
+#include <filesystem>
 #include "core/common/common.h"
 #include "core/common/narrow.h"
 #include "core/common/path.h"
@@ -28,7 +28,7 @@ class Initializer final {
               gsl::span<const int64_t> dims);
 
   Initializer(const ONNX_NAMESPACE::TensorProto& tensor_proto,
-              const Path& model_path = {});
+              const std::filesystem::path& model_path = {});
 
   ~Initializer() = default;
 
diff --git a/onnxruntime/core/optimizer/matmul_scale_fusion.cc b/onnxruntime/core/optimizer/matmul_scale_fusion.cc
index e4cdeadbf54d7..1f8b1a4c878e3 100644
--- a/onnxruntime/core/optimizer/matmul_scale_fusion.cc
+++ b/onnxruntime/core/optimizer/matmul_scale_fusion.cc
@@ -17,7 +17,7 @@ namespace onnxruntime {
 namespace {
 template <typename T>
 struct ExtractScalarAsFloatDispatchTarget {
-  Status operator()(const ONNX_NAMESPACE::TensorProto& tensor_proto, const Path& model_path, float& scalar_float) {
+  Status operator()(const ONNX_NAMESPACE::TensorProto& tensor_proto, const std::filesystem::path& model_path, float& scalar_float) {
     T scalar;
     ORT_RETURN_IF_ERROR(utils::UnpackTensor(tensor_proto, model_path, &scalar, 1));
     scalar_float = static_cast<float>(scalar);
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
index 1eabc079f3a20..ed7d5feb2beb3 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
@@ -30,7 +30,7 @@ static size_t EstimateInputsOutputs(gsl::span<const Node* const> nodes) {
 
 OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
                                     const InitializedTensorSet& initialized_tensor_set,
-                                    const Path& model_path,
+                                    const std::filesystem::path& model_path,
                                     const IExecutionProvider& execution_provider,
                                     const std::function<bool(const std::string&)>& is_sparse_initializer_func)
     : execution_provider_(execution_provider),
@@ -52,7 +52,7 @@ OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
       OrtValue ort_value;
       ORT_RETURN_IF_ERROR(
           utils::TensorProtoToOrtValue(Env::Default(),
-                                       model_path.IsEmpty() ? nullptr : model_path.ToPathString().c_str(),
+                                       model_path,
                                        tensor_proto, allocator_ptr_, ort_value));
 
       initializers_[idx] = std::move(ort_value);
@@ -77,7 +77,7 @@ OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
 
 OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
                                     const std::unordered_map<std::string, OrtValue>& initialized_tensor_set,
-                                    const Path& model_path,
+                                    const std::filesystem::path& /* model_path */,
                                     const IExecutionProvider& execution_provider,
                                     const std::function<bool(const std::string&)>& is_sparse_initializer_func)
     : execution_provider_(execution_provider),
@@ -88,8 +88,7 @@ OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
   ORT_THROW_IF_ERROR(data_transfer_mgr_.RegisterDataTransfer(std::make_unique<CPUDataTransfer>()));
 
   // Create MLValues related maps
-  auto initialize_maps = [this, &initialized_tensor_set, &model_path](const NodeArg& arg, size_t /*index*/) -> Status {
-    (void)model_path;
+  auto initialize_maps = [this, &initialized_tensor_set](const NodeArg& arg, size_t /*index*/) -> Status {
     int idx = ort_value_name_idx_map_.Add(arg.Name());
     ort_value_idx_nodearg_map_.insert_or_assign(idx, &arg);
 
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.h b/onnxruntime/core/optimizer/optimizer_execution_frame.h
index 3dbf6c1d97aa6..ffda05d349c39 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.h
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.h
@@ -24,13 +24,13 @@ class OptimizerExecutionFrame final : public IExecutionFrame {
    public:
     Info(const std::vector<const Node*>& nodes,
          const InitializedTensorSet& initialized_tensor_set,
-         const Path& model_path,
+         const std::filesystem::path& model_path,
          const IExecutionProvider& execution_provider,
          const std::function<bool(const std::string&)>& is_sparse_initializer_func);
 
     Info(const std::vector<const Node*>& nodes,
          const std::unordered_map<std::string, OrtValue>& initialized_tensor_set,
-         const Path& model_path,
+         const std::filesystem::path& model_path,
          const IExecutionProvider& execution_provider,
          const std::function<bool(const std::string&)>& is_sparse_initializer_func);
 
diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc
index e245636ce9a84..c13da669b2f46 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc
+++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.cc
@@ -17,7 +17,7 @@ namespace onnxruntime::QDQ {
 bool IsQDQPairSupported(
     const Node& q_node, const Node& dq_node,
     const GetConstantInitializerFn& get_const_initializer,
-    const Path& model_path) {
+    const std::filesystem::path& model_path) {
   ConstPointerContainer<std::vector<NodeArg*>> dq_input_defs = dq_node.InputDefs();
   ConstPointerContainer<std::vector<NodeArg*>> q_input_defs = q_node.InputDefs();
 
@@ -79,7 +79,7 @@ bool IsQDQPairSupported(
 bool IsDQQConversion(
     const Node& dq_node, const Node& q_node,
     const GetConstantInitializerFn& get_const_initializer,
-    const Path& model_path) {
+    const std::filesystem::path& model_path) {
   ConstPointerContainer<std::vector<NodeArg*>> dq_input_defs = dq_node.InputDefs();
   ConstPointerContainer<std::vector<NodeArg*>> q_input_defs = q_node.InputDefs();
 
diff --git a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h
index 8333168b0093f..ab93efa8e85b2 100644
--- a/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h
+++ b/onnxruntime/core/optimizer/qdq_transformer/qdq_util.h
@@ -5,6 +5,7 @@
 
 #include <functional>
 #include <string>
+#include <filesystem>
 
 namespace ONNX_NAMESPACE {
 class TensorProto;
@@ -36,7 +37,7 @@ using GetConstantInitializerFn = std::function<const ONNX_NAMESPACE::TensorProto
 bool IsQDQPairSupported(
     const Node& q_node, const Node& dq_node,
     const GetConstantInitializerFn& get_const_initializer,
-    const Path& model_path);
+    const std::filesystem::path& model_path);
 
 // Check if a DQ -> Q sequence represents a conversion in quantization data type.
 // Example of uint8 to uint16:
@@ -48,7 +49,7 @@ bool IsQDQPairSupported(
 bool IsDQQConversion(
     const Node& dq_node, const Node& q_node,
     const GetConstantInitializerFn& get_const_initializer,
-    const Path& model_path);
+    const std::filesystem::path& model_path);
 
 // Check if DQ is supported in extended level QDQ transformers. It requires:
 // 1. DQ doesn't have optional input.
diff --git a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
index c532f56b3d3d9..5ec51b6f30f18 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
+++ b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
@@ -46,11 +46,11 @@ class ApiValueInfo final : public api::ValueInfoRef {
 class ApiTensor final : public api::TensorRef {
  private:
   const onnx::TensorProto& tensor_proto_;
-  const Path& model_path_;
+  const std::filesystem::path& model_path_;
   AllocatorPtr cpu_allocator_;
 
  public:
-  explicit ApiTensor(const onnx::TensorProto& tensor_proto, const Path& model_path, AllocatorPtr cpu_allocator)
+  explicit ApiTensor(const onnx::TensorProto& tensor_proto, const std::filesystem::path& model_path, AllocatorPtr cpu_allocator)
       : tensor_proto_(tensor_proto), model_path_(model_path), cpu_allocator_(std::move(cpu_allocator)) {}
 
   const onnx::TensorProto& TensorProto() {
@@ -289,10 +289,11 @@ std::vector<uint8_t> ApiTensor::Data() const {
   auto tensor_shape_dims = utils::GetTensorShapeFromTensorProto(tensor_proto_);
   TensorShape tensor_shape{std::move(tensor_shape_dims)};
   onnxruntime::Tensor tensor(tensor_dtype, tensor_shape, cpu_allocator_);
-  ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), model_path_.ToPathString().c_str(),
+  ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), model_path_,
                                                 tensor_proto_, tensor));
   size_t num_bytes = gsl::narrow_cast<size_t>(tensor.SizeInBytes());
   const uint8_t* data = static_cast<const uint8_t*>(tensor.DataRaw());
+  // TODO: the returned data is unaligned
   return std::vector<uint8_t>(data, data + num_bytes);
 }
 // </ApiTensor>
@@ -554,7 +555,7 @@ void ApiGraph::TransposeInitializer(std::string_view name, const std::vector<int
   TensorShape new_tensor_shape(new_tensor_shape_dims);
   Tensor out_tensor(tensor_dtype, new_tensor_shape, cpu_allocator_);
 
-  ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), graph_.ModelPath().ToPathString().c_str(),
+  ORT_THROW_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), graph_.ModelPath(),
                                                 *tensor_proto, in_tensor));
 
   ORT_THROW_IF_ERROR(Transpose::DoTranspose(permutations, in_tensor, out_tensor));
diff --git a/onnxruntime/core/providers/cpu/ml/label_encoder.h b/onnxruntime/core/providers/cpu/ml/label_encoder.h
index 0f9f7cfb5dba6..ee47b13016884 100644
--- a/onnxruntime/core/providers/cpu/ml/label_encoder.h
+++ b/onnxruntime/core/providers/cpu/ml/label_encoder.h
@@ -123,7 +123,7 @@ std::vector<T> GetAttribute(const OpKernelInfo& info, const std::string& name, c
   }
   const SafeInt<size_t> tensor_size(element_count);
   std::vector<T> out(tensor_size);
-  result = utils::UnpackTensor<T>(attr_tensor_proto, Path(), out.data(), tensor_size);
+  result = utils::UnpackTensor<T>(attr_tensor_proto, std::filesystem::path(), out.data(), tensor_size);
   ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack tensor attribute ", name);
   return out;
 }
@@ -134,7 +134,7 @@ T GetDefault(const OpKernelInfo& info, const std::string& attr_name, const T& ba
   auto result = info.GetAttr("default_tensor", &attr_tensor_proto);
   if (result.IsOK() && utils::HasDataType(attr_tensor_proto)) {
     T default_value;
-    result = utils::UnpackTensor<T>(attr_tensor_proto, Path(), &default_value, 1);
+    result = utils::UnpackTensor<T>(attr_tensor_proto, std::filesystem::path(), &default_value, 1);
     ORT_ENFORCE(result.IsOK(), "LabelEncoder could not unpack default tensor ", attr_name);
     return default_value;
   } else if constexpr (std::is_same_v<T, std::string> || std::is_same_v<T, float> || std::is_same_v<T, int64_t>) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 6f7e1fb607864..59d58ca0c458b 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -89,14 +89,8 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   if (!(GetEnvironmentVar("ORT_OPENVINO_ENABLE_CI_LOG").empty())) {
     std::cout << "In the OpenVINO EP" << std::endl;
   }
-#ifdef _WIN32
-  std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
-  global_context_->onnx_model_path_name =
-      std::string(onnx_path.begin(), onnx_path.end());
-#else
-  global_context_->onnx_model_path_name =
-      graph_viewer.ModelPath().ToPathString();
-#endif
+  global_context_->onnx_model_path_name = graph_viewer.ModelPath().string();
+
   global_context_->onnx_opset_version =
       graph_viewer.DomainToVersionMap().at(kOnnxDomain);
 
diff --git a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
index 27d8a0f06f565..6e6a80f097c12 100644
--- a/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
+++ b/onnxruntime/core/providers/shared_library/provider_bridge_provider.cc
@@ -503,7 +503,7 @@ template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ int64_t* p_data, size_t expected_size) { return g_host->UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }
 template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ uint64_t* p_data, size_t expected_size) { return g_host->UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }
-Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesystem::path& model_path,
                              /*out*/ std::vector<uint8_t>& unpacked_tensor) {
   return g_host->UnpackInitializerData(tensor, model_path, unpacked_tensor);
 }
diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h
index cc3b13f696a96..7dac474f011d4 100644
--- a/onnxruntime/core/providers/shared_library/provider_interfaces.h
+++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h
@@ -5,6 +5,7 @@
 #include <utility>
 #include <vector>
 #include <list>
+#include <filesystem>
 
 // Public wrappers around internal ort interfaces (currently)
 #include "core/providers/shared_library/provider_host_api.h"
@@ -209,7 +210,7 @@ struct ProviderHost {
   virtual Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ uint32_t* p_data, size_t expected_size) = 0;
   virtual Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ int64_t* p_data, size_t expected_size) = 0;
   virtual Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ uint64_t* p_data, size_t expected_size) = 0;
-  virtual Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+  virtual Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesystem::path& model_path,
                                        /*out*/ std::vector<uint8_t>& unpacked_tensor) = 0;
 
   virtual uint16_t math__floatToHalf(float f) = 0;
@@ -784,7 +785,7 @@ struct ProviderHost {
   virtual const std::string& NodeUnit__Name(const NodeUnit* p) noexcept = 0;
   virtual int NodeUnit__SinceVersion(const NodeUnit* p) noexcept = 0;
   virtual NodeIndex NodeUnit__Index(const NodeUnit* p) noexcept = 0;
-  virtual const Path& NodeUnit__ModelPath(const NodeUnit* p) noexcept = 0;
+  virtual const std::filesystem::path& NodeUnit__ModelPath(const NodeUnit* p) noexcept = 0;
   virtual ProviderType NodeUnit__GetExecutionProviderType(const NodeUnit* p) noexcept = 0;
 
   virtual const Node& NodeUnit__GetNode(const NodeUnit* p) noexcept = 0;
@@ -834,7 +835,7 @@ struct ProviderHost {
   virtual const Graph* Graph__ParentGraph(const Graph* p) const = 0;
   virtual Graph* Graph__MutableParentGraph(Graph* p) = 0;
   virtual const std::string& Graph__Name(const Graph* p) const noexcept = 0;
-  virtual const Path& Graph__ModelPath(const Graph* p) const = 0;
+  virtual const std::filesystem::path& Graph__ModelPath(const Graph* p) const = 0;
   virtual const std::vector<const NodeArg*>& Graph__GetInputsIncludingInitializers(const Graph* p) const noexcept = 0;
   virtual bool Graph__IsSubgraph(const Graph* p) = 0;
   virtual const Node* Graph__GetProducerNode(const Graph* p, const std::string& node_arg_name) const = 0;
@@ -868,7 +869,7 @@ struct ProviderHost {
   virtual std::unique_ptr<Model> GraphViewer__CreateModel(const GraphViewer* p, const logging::Logger& logger) = 0;
 
   virtual const std::string& GraphViewer__Name(const GraphViewer* p) noexcept = 0;
-  virtual const Path& GraphViewer__ModelPath(const GraphViewer* p) noexcept = 0;
+  virtual const std::filesystem::path& GraphViewer__ModelPath(const GraphViewer* p) noexcept = 0;
 
   virtual const Node* GraphViewer__GetNode(const GraphViewer* p, NodeIndex node_index) = 0;
   virtual const NodeArg* GraphViewer__GetNodeArg(const GraphViewer* p, const std::string& name) = 0;
diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
index fd2540b42a3db..ac16029e86639 100644
--- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
+++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
@@ -811,7 +811,7 @@ struct NodeUnit final {
   const std::string& Name() const noexcept { return g_host->NodeUnit__Name(this); }
   int SinceVersion() const noexcept { return g_host->NodeUnit__SinceVersion(this); }
   NodeIndex Index() const noexcept { return g_host->NodeUnit__Index(this); }
-  const Path& ModelPath() const noexcept { return g_host->NodeUnit__ModelPath(this); }
+  const std::filesystem::path& ModelPath() const noexcept { return g_host->NodeUnit__ModelPath(this); }
   ProviderType GetExecutionProviderType() const noexcept { return g_host->NodeUnit__GetExecutionProviderType(this); }
 
   const Node& GetNode() const noexcept { return g_host->NodeUnit__GetNode(this); }
@@ -873,7 +873,7 @@ struct Graph final {
   const Graph* ParentGraph() const { return g_host->Graph__ParentGraph(this); }
   Graph* MutableParentGraph() { return g_host->Graph__MutableParentGraph(this); }
   const std::string& Name() const noexcept { return g_host->Graph__Name(this); }
-  const Path& ModelPath() const { return g_host->Graph__ModelPath(this); }
+  const std::filesystem::path& ModelPath() const { return g_host->Graph__ModelPath(this); }
   const std::vector<const NodeArg*>& GetInputsIncludingInitializers() const noexcept { return g_host->Graph__GetInputsIncludingInitializers(this); }
   bool IsSubgraph() const { return g_host->Graph__IsSubgraph(this); }
   const Node* GetProducerNode(const std::string& node_arg_name) const { return g_host->Graph__GetProducerNode(this, node_arg_name); }
@@ -923,7 +923,7 @@ class GraphViewer final {
   std::unique_ptr<Model> CreateModel(const logging::Logger& logger) const { return g_host->GraphViewer__CreateModel(this, logger); }
 
   const std::string& Name() const noexcept { return g_host->GraphViewer__Name(this); }
-  const Path& ModelPath() const noexcept { return g_host->GraphViewer__ModelPath(this); }
+  const std::filesystem::path& ModelPath() const noexcept { return g_host->GraphViewer__ModelPath(this); }
 
   const Node* GetNode(NodeIndex node_index) const { return g_host->GraphViewer__GetNode(this, node_index); }
   const NodeArg* GetNodeArg(const std::string& name) const { return g_host->GraphViewer__GetNodeArg(this, name); }
diff --git a/onnxruntime/core/session/custom_ops.cc b/onnxruntime/core/session/custom_ops.cc
index d0c46142ac060..59c36488d2fd9 100644
--- a/onnxruntime/core/session/custom_ops.cc
+++ b/onnxruntime/core/session/custom_ops.cc
@@ -584,7 +584,8 @@ ORT_API_STATUS_IMPL(OrtApis::KernelInfoGetAttribute_tensor, _In_ const OrtKernel
     auto tensorp = std::make_unique<onnxruntime::Tensor>(type, tensor_shape, std::move(alloc_ptr));
 
     // Deserialize TensorProto into pre-allocated, empty Tensor.
-    status = onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), nullptr, tensor_proto, *tensorp);
+    // TODO: here the TensorProto loses model path information, so it cannot be an external tensor.
+    status = onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), std::filesystem::path(), tensor_proto, *tensorp);
     if (!status.IsOK()) {
       return onnxruntime::ToOrtStatus(status);
     }
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index d1add79f0cb00..860f872ba6116 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -828,7 +828,7 @@ common::Status InferenceSession::RegisterGraphTransformer(
   return graph_transformer_mgr_.Register(std::move(p_graph_transformer), level);
 }
 
-common::Status InferenceSession::SaveToOrtFormat(const PathString& filepath) const {
+common::Status InferenceSession::SaveToOrtFormat(const std::filesystem::path& filepath) const {
   ORT_RETURN_IF_NOT(FLATBUFFERS_LITTLEENDIAN, "ort format only supports little-endian machines");
 
   // Get the byte size of the ModelProto and round it to the next MB and use it as flatbuffers' init_size
@@ -868,7 +868,7 @@ common::Status InferenceSession::SaveToOrtFormat(const PathString& filepath) con
     uint8_t* buf = builder.GetBufferPointer();
     int size = builder.GetSize();
     file.write(reinterpret_cast<const char*>(buf), size);
-    ORT_RETURN_IF_NOT(file, "Failed to save ORT format model to file: ", ToUTF8String(filepath));
+    ORT_RETURN_IF_NOT(file, "Failed to save ORT format model to file: ", ToUTF8String(filepath.native()));
   }
 
   return Status::OK();
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index 48f6d73b077cb..a0a5fab2d2203 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -615,7 +615,7 @@ class InferenceSession {
     return !custom_schema_registries_.empty();
   }
 
-  common::Status SaveToOrtFormat(const PathString& filepath) const;
+  common::Status SaveToOrtFormat(const std::filesystem::path& filepath) const;
 #endif
 
   /**
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index d18b3ac40d489..3bca36153ecb7 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -283,7 +283,7 @@ struct ProviderHostImpl : ProviderHost {
   Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ uint32_t* p_data, size_t expected_size) override { return utils::UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }
   Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ int64_t* p_data, size_t expected_size) override { return utils::UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }
   Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len, /*out*/ uint64_t* p_data, size_t expected_size) override { return utils::UnpackTensor(tensor, raw_data, raw_data_len, p_data, expected_size); }
-  Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path,
+  Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesystem::path& model_path,
                                /*out*/ std::vector<uint8_t>& unpacked_tensor) override {
     return utils::UnpackInitializerData(tensor, model_path, unpacked_tensor);
   }
@@ -999,7 +999,7 @@ struct ProviderHostImpl : ProviderHost {
   const std::string& NodeUnit__Name(const NodeUnit* p) noexcept override { return p->Name(); }
   int NodeUnit__SinceVersion(const NodeUnit* p) noexcept override { return p->SinceVersion(); }
   NodeIndex NodeUnit__Index(const NodeUnit* p) noexcept override { return p->Index(); }
-  const Path& NodeUnit__ModelPath(const NodeUnit* p) noexcept override { return p->ModelPath(); }
+  const std::filesystem::path& NodeUnit__ModelPath(const NodeUnit* p) noexcept override { return p->ModelPath(); }
   ProviderType NodeUnit__GetExecutionProviderType(const NodeUnit* p) noexcept override {
     return p->GetExecutionProviderType();
   }
@@ -1076,7 +1076,7 @@ struct ProviderHostImpl : ProviderHost {
   const Graph* Graph__ParentGraph(const Graph* p) const override { return p->ParentGraph(); }
   Graph* Graph__MutableParentGraph(Graph* p) override { return p->MutableParentGraph(); }
   const std::string& Graph__Name(const Graph* p) const noexcept override { return p->Name(); }
-  const Path& Graph__ModelPath(const Graph* p) const override { return p->ModelPath(); }
+  const std::filesystem::path& Graph__ModelPath(const Graph* p) const override { return p->ModelPath(); }
   const std::vector<const NodeArg*>& Graph__GetInputsIncludingInitializers(const Graph* p) const noexcept override { return p->GetInputsIncludingInitializers(); }
   bool Graph__IsSubgraph(const Graph* p) override { return p->IsSubgraph(); }
   const Node* Graph__GetProducerNode(const Graph* p, const std::string& node_arg_name) const override { return p->GetProducerNode(node_arg_name); }
@@ -1132,7 +1132,7 @@ struct ProviderHostImpl : ProviderHost {
   }
 
   const std::string& GraphViewer__Name(const GraphViewer* p) noexcept override { return p->Name(); }
-  const Path& GraphViewer__ModelPath(const GraphViewer* p) noexcept override { return p->ModelPath(); }
+  const std::filesystem::path& GraphViewer__ModelPath(const GraphViewer* p) noexcept override { return p->ModelPath(); }
 
   const Node* GraphViewer__GetNode(const GraphViewer* p, NodeIndex node_index) override { return p->GetNode(node_index); }
   const NodeArg* GraphViewer__GetNodeArg(const GraphViewer* p, const std::string& name) override { return p->GetNodeArg(name); }
diff --git a/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc b/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc
index f36dbaf3d1aca..f54a74c78c801 100644
--- a/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc
+++ b/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc
@@ -230,7 +230,7 @@ TEST(FlatbufferUtilsTest, ExternalWriteReadWithLoadInitializers) {
   std::vector<flatbuffers::Offset<fbs::Tensor>> fbs_tensors;
   for (const auto& initializer : initializers) {
     flatbuffers::Offset<fbs::Tensor> fbs_tensor;
-    ASSERT_STATUS_OK(SaveInitializerOrtFormat(builder, initializer, Path(), fbs_tensor, writer));
+    ASSERT_STATUS_OK(SaveInitializerOrtFormat(builder, initializer, std::filesystem::path(), fbs_tensor, writer));
     fbs_tensors.push_back(fbs_tensor);
   }
 
diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index 19c7bf476e6e1..8bad22dc2c77b 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/common/common.h"
+#include "core/common/status.h"
 #include "core/common/path_string.h"
 #include "core/framework/data_types.h"
 #include "core/graph/model.h"
@@ -17,48 +19,48 @@ using namespace onnxruntime;
 namespace onnxruntime {
 namespace test {
 
-void LoadSaveAndCompareModel(const std::string& input_onnx,
-                             const std::string& input_external_init_file,
-                             const std::string& output_onnx,
-                             const std::string& output_external_init_file,
-                             size_t initializer_size_threshold) {
+Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
+                               const std::filesystem::path& input_external_init_file,
+                               const std::filesystem::path& output_onnx,
+                               const std::filesystem::path& output_external_init_file,
+                               size_t initializer_size_threshold) {
   std::shared_ptr<Model> model;
-  ASSERT_STATUS_OK(Model::Load(ToPathString(input_onnx), model, nullptr, DefaultLoggingManager().DefaultLogger()));
-  std::remove(output_onnx.c_str());
-  std::remove(output_external_init_file.c_str());
-  ASSERT_STATUS_OK(Model::SaveWithExternalInitializers(*model, ToPathString(output_onnx), output_external_init_file, initializer_size_threshold));
+  ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, DefaultLoggingManager().DefaultLogger()));
+  std::filesystem::remove(output_onnx);
+  std::filesystem::remove(output_external_init_file);
+  ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, ToUTF8String(output_external_init_file.native()), initializer_size_threshold));
 
   std::shared_ptr<Model> model_from_external;
-  ASSERT_STATUS_OK(Model::Load(ToPathString(output_onnx), model_from_external, nullptr, DefaultLoggingManager().DefaultLogger()));
+  ORT_RETURN_IF_ERROR(Model::Load(ToPathString(output_onnx), model_from_external, nullptr, DefaultLoggingManager().DefaultLogger()));
 
   Graph& graph = model->MainGraph();
   // Perform shape inference on the graph, if this succeeds then it means that we could correctly read the
   // integer initializers used by reshape and transpose.
-  ASSERT_STATUS_OK(graph.Resolve());
+  ORT_RETURN_IF_ERROR(graph.Resolve());
   Graph& graph_from_external = model_from_external->MainGraph();
 
   InitializedTensorSet initializers = graph.GetAllInitializedTensors();
   InitializedTensorSet initializers_from_external = graph_from_external.GetAllInitializedTensors();
 
-  ASSERT_EQ(initializers.size(), initializers_from_external.size());
+  ORT_RETURN_IF_NOT(initializers.size() == initializers_from_external.size(), "size mismatch");
 
   // Compare the initializers of the two versions.
-  Path model_path{};
-  Path external_data_path{};
+  std::filesystem::path model_path{};
+  std::filesystem::path external_data_path{};
   for (auto i : initializers) {
     const std::string kInitName = i.first;
     const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
     const ONNX_NAMESPACE::TensorProto* from_external_tensor_proto = initializers_from_external[kInitName];
 
     std::vector<uint8_t> tensor_proto_data;
-    model_path = Path::Parse(ToPathString(input_onnx));
-    external_data_path = (input_external_init_file.size()) ? model_path.ParentPath().Append(Path::Parse(ToPathString(input_external_init_file))) : Path();
+    model_path = input_onnx;
+    external_data_path = (!input_external_init_file.empty()) ? (model_path.parent_path() / input_external_init_file) : std::filesystem::path();
     ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, external_data_path, tensor_proto_data));
     size_t tensor_proto_size = tensor_proto_data.size();
 
     std::vector<uint8_t> from_external_tensor_proto_data;
-    model_path = Path::Parse(ToPathString(output_onnx));
-    external_data_path = model_path.ParentPath().Append(Path::Parse(ToPathString(output_external_init_file)));
+    model_path = output_onnx;
+    external_data_path = model_path.parent_path() / output_external_init_file;
     ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, model_path, from_external_tensor_proto_data));
     size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();
 
@@ -70,22 +72,23 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
       EXPECT_EQ(from_external_tensor_proto->data_location(), ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
     }
 
-    ASSERT_EQ(tensor_proto_size, from_external_tensor_proto_size);
-    EXPECT_EQ(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size), 0);
+    ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");
+    ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch");
   }
   // Cleanup.
-  ASSERT_EQ(std::remove(output_onnx.c_str()), 0);
-  ASSERT_EQ(std::remove(PathToUTF8String(external_data_path.ToPathString()).c_str()), 0);
+  ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed");
+  ORT_RETURN_IF_NOT(std::filesystem::remove(external_data_path), "delete file failed");
+  return Status::OK();
 }
 
 // Original model does not have external initializers
 TEST(SaveWithExternalInitializers, Mnist) {
-  LoadSaveAndCompareModel("testdata/mnist.onnx", "", "testdata/mnist_with_external_initializers.onnx", "mnist_external_initializers.bin", 100);
+  ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100));
 }
 
 // Original model has external initializers
 TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) {
-  LoadSaveAndCompareModel("testdata/model_with_orig_ext_data.onnx", "model_with_orig_ext_data.onnx.data", "testdata/model_with_new_external_initializers.onnx", "model_with_new_external_initializers.bin", 0);
+  ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0));
 }
 
 }  // namespace test
diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc
index 80f23b054a4ad..fa42bb6e96cd5 100644
--- a/onnxruntime/test/framework/sparse_kernels_test.cc
+++ b/onnxruntime/test/framework/sparse_kernels_test.cc
@@ -795,7 +795,7 @@ static void TestConversion(bool use_1D_indices, int32_t indices_type,
   TensorProto dense;
   // Path is required for loading external data (if any)
   // When path is empty it will look for the data in current dir
-  ASSERT_STATUS_OK(utils::ConstantNodeProtoToTensorProto(node, Path(), dense));
+  ASSERT_STATUS_OK(utils::ConstantNodeProtoToTensorProto(node, std::filesystem::path(), dense));
 
   gsl::span<const T> expected_span = gsl::make_span<const T>(expected.data(), expected.size());
   checker(expected_span, dense);
@@ -810,7 +810,7 @@ static void TestConversionAllZeros(bool use_1D_indices,
   TensorProto dense;
   // Path is required for loading external data (if any)
   // When path is empty it will look for the data in current dir
-  ASSERT_STATUS_OK(utils::ConstantNodeProtoToTensorProto(node, Path(), dense));
+  ASSERT_STATUS_OK(utils::ConstantNodeProtoToTensorProto(node, std::filesystem::path(), dense));
 
   gsl::span<const T> expected_span = gsl::make_span<const T>(expected.data(), expected.size());
   checker(expected_span, dense);
@@ -1109,30 +1109,31 @@ void RawSparseDataChecker<MLFloat16>(gsl::span<const MLFloat16> expected_bfloat,
 }
 
 template <typename T>
-static void TestDenseToSparseConversionValues(size_t indices_start,
-                                              std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
-                                              std::function<void(gsl::span<const T> expected,
-                                                                 gsl::span<const int64_t> expected_indicies,
-                                                                 const SparseTensorProto& actual)>
-                                                  checker) {
+static Status TestDenseToSparseConversionValues(size_t indices_start,
+                                                std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
+                                                std::function<void(gsl::span<const T> expected,
+                                                                   gsl::span<const int64_t> expected_indicies,
+                                                                   const SparseTensorProto& actual)>
+                                                    checker) {
   std::vector<T> expected_values;
   std::vector<int64_t> expected_indicies;
   // Path is required for loading external data
   // Using empty path here since the data is not external
-  Path model_path;
+  std::filesystem::path model_path;
   TensorProto dense_tensor = CreateDenseTensor(indices_start, inserter, expected_values, expected_indicies);
 
   SparseTensorProto sparse_tensor;
-  ASSERT_STATUS_OK(utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor));
+  ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor));
 
   gsl::span<const T>
       expected_values_span = gsl::make_span(expected_values.data(), expected_values.size());
   gsl::span<const int64_t> expected_ind_span = gsl::make_span(expected_indicies.data(), expected_indicies.size());
   checker(expected_values_span, expected_ind_span, sparse_tensor);
+  return Status::OK();
 }
 
 template <typename T>
-static void TestDenseAllZerosToSparseConversion(
+static Status TestDenseAllZerosToSparseConversion(
     std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
     std::function<void(gsl::span<const T> expected,
                        gsl::span<const int64_t> expected_indicies,
@@ -1142,55 +1143,56 @@ static void TestDenseAllZerosToSparseConversion(
   std::vector<int64_t> expected_indicies;
   // Path is required for loading external data
   // Using empty path here since the data is not external
-  Path model_path;
+  std::filesystem::path model_path;
   TensorProto dense_tensor = CreateDenseTensorAllZeros(inserter);
 
   SparseTensorProto sparse_tensor;
-  ASSERT_STATUS_OK(utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor));
+  ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(dense_tensor, model_path, sparse_tensor));
 
   gsl::span<const T>
       expected_values_span = gsl::make_span(expected_values.data(), expected_values.size());
   gsl::span<const int64_t> expected_ind_span = gsl::make_span(expected_indicies.data(), expected_indicies.size());
   checker(expected_values_span, expected_ind_span, sparse_tensor);
+  return Status::OK();
 }
 
 template <typename T>
-static void TestDenseToSparseConversion(size_t indices_start,
-                                        std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
-                                        std::function<void(gsl::span<const T> expected,
-                                                           gsl::span<const int64_t> expected_indicies,
-                                                           const SparseTensorProto& actual)>
-                                            checker) {
-  TestDenseToSparseConversionValues<T>(indices_start, inserter, checker);
-  TestDenseAllZerosToSparseConversion<T>(inserter, checker);
+static Status TestDenseToSparseConversion(size_t indices_start,
+                                          std::function<void(const std::vector<T>& values, TensorProto& tp)> inserter,
+                                          std::function<void(gsl::span<const T> expected,
+                                                             gsl::span<const int64_t> expected_indicies,
+                                                             const SparseTensorProto& actual)>
+                                              checker) {
+  ORT_RETURN_IF_ERROR(TestDenseToSparseConversionValues<T>(indices_start, inserter, checker));
+  return TestDenseAllZerosToSparseConversion<T>(inserter, checker);
 }
 
 TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
   // This one will test indices that are less than max int8 value
   // which should result in int8 indices
-  TestDenseToSparseConversion<float>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<float>(
       20U,
       [](const std::vector<float>& values, TensorProto& tp) {
         tp.set_data_type(TensorProto_DataType_FLOAT);
         tp.set_name("dense_float");
         tp.mutable_float_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<float>);
+      RawSparseDataChecker<float>));
 
   // This one will test indices that are max(int8) < ind < max(int16) value
   // which should result in int16 indices
-  TestDenseToSparseConversion<double>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<double>(
       static_cast<size_t>(std::numeric_limits<int8_t>::max()) + 20U,
       [](const std::vector<double>& values, TensorProto& tp) {
         tp.set_data_type(TensorProto_DataType_DOUBLE);
         tp.set_name("dense_double");
         tp.mutable_double_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<double>);
+      RawSparseDataChecker<double>));
 
   // This one will test indices that are max(int16) < ind < max(int32) value
   // which should result in int32 indices
-  TestDenseToSparseConversion<BFloat16>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<BFloat16>(
       static_cast<size_t>(std::numeric_limits<int16_t>::max()) + 20U,
       [](const std::vector<BFloat16>& values, TensorProto& tp) {
         tp.set_data_type(TensorProto_DataType_BFLOAT16);
@@ -1199,12 +1201,12 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
           tp.mutable_int32_data()->Add(v.val);
         }
       },
-      RawSparseDataChecker<BFloat16>);
+      RawSparseDataChecker<BFloat16>));
 
   // Protobuf can not hold anything more than 2Gb and it overflows. Can't test 64-bit indices
   // on conversion unless explicitly created.
   // which should result in int32 indices
-  TestDenseToSparseConversion<MLFloat16>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<MLFloat16>(
       20U,
       [](const std::vector<MLFloat16>& values, TensorProto& tp) {
         tp.set_data_type(TensorProto_DataType_FLOAT16);
@@ -1213,78 +1215,78 @@ TEST(SparseTensorConversionTests, TestDenseToSparseConversion) {
           tp.mutable_int32_data()->Add(v.val);
         }
       },
-      RawSparseDataChecker<MLFloat16>);
+      RawSparseDataChecker<MLFloat16>));
 
-  TestDenseToSparseConversion<int16_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<int16_t>(
       20U,
       [](const std::vector<int16_t>& values, TensorProto& tp) {
         tp.set_name("dense_int16");
         tp.set_data_type(TensorProto_DataType_INT16);
         tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<int16_t>);
+      RawSparseDataChecker<int16_t>));
 
-  TestDenseToSparseConversion<uint16_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<uint16_t>(
       20U,
       [](const std::vector<uint16_t>& values, TensorProto& tp) {
         tp.set_name("dense_uint16");
         tp.set_data_type(TensorProto_DataType_UINT16);
         tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<uint16_t>);
+      RawSparseDataChecker<uint16_t>));
 
-  TestDenseToSparseConversion<int32_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<int32_t>(
       20U,
       [](const std::vector<int32_t>& values, TensorProto& tp) {
         tp.set_name("dense_int32");
         tp.set_data_type(TensorProto_DataType_INT32);
         tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<int32_t>);
+      RawSparseDataChecker<int32_t>));
 
-  TestDenseToSparseConversion<uint32_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<uint32_t>(
       20U,
       [](const std::vector<uint32_t>& values, TensorProto& tp) {
         tp.set_name("dense_uint32");
         tp.set_data_type(TensorProto_DataType_UINT32);
         tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<uint32_t>);
+      RawSparseDataChecker<uint32_t>));
 
-  TestDenseToSparseConversion<int64_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<int64_t>(
       20U,
       [](const std::vector<int64_t>& values, TensorProto& tp) {
         tp.set_name("dense_int64");
         tp.set_data_type(TensorProto_DataType_INT64);
         tp.mutable_int64_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<int64_t>);
+      RawSparseDataChecker<int64_t>));
 
-  TestDenseToSparseConversion<uint64_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<uint64_t>(
       20U,
       [](const std::vector<uint64_t>& values, TensorProto& tp) {
         tp.set_name("dense_uint64");
         tp.set_data_type(TensorProto_DataType_UINT64);
         tp.mutable_uint64_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<uint64_t>);
+      RawSparseDataChecker<uint64_t>));
 
-  TestDenseToSparseConversion<int8_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<int8_t>(
       20U,
       [](const std::vector<int8_t>& values, TensorProto& tp) {
         tp.set_name("dense_int8");
         tp.set_data_type(TensorProto_DataType_INT8);
         tp.mutable_int32_data()->Add(values.cbegin(), values.cend());
       },
-      RawSparseDataChecker<int8_t>);
+      RawSparseDataChecker<int8_t>));
 
-  TestDenseToSparseConversion<uint8_t>(
+  ASSERT_STATUS_OK(TestDenseToSparseConversion<uint8_t>(
       20U,
       [](const std::vector<uint8_t>& values, TensorProto& tp) {
         tp.set_name("dense_int64");
         RawDataWriter(values, tp, TensorProto_DataType_UINT8);
       },
-      RawSparseDataChecker<uint8_t>);
+      RawSparseDataChecker<uint8_t>));
 }
 
 TEST(SparseTensorConversionTests, CsrConversion) {
diff --git a/onnxruntime/test/framework/tensorutils_test.cc b/onnxruntime/test/framework/tensorutils_test.cc
index 42f82c374348e..05bdb3a9a033d 100644
--- a/onnxruntime/test/framework/tensorutils_test.cc
+++ b/onnxruntime/test/framework/tensorutils_test.cc
@@ -21,7 +21,7 @@ namespace test {
 
 // T must be float for double, and it must match with the 'type' argument
 template <typename T>
-void TestUnpackFloatTensor(TensorProto_DataType type, const Path& model_path) {
+void TestUnpackFloatTensor(TensorProto_DataType type, const std::filesystem::path& model_path) {
   TensorProto float_tensor_proto;
   float_tensor_proto.set_data_type(type);
   T f[4] = {1.1f, 2.2f, 3.3f, 4.4f};
@@ -45,7 +45,7 @@ TEST(TensorProtoUtilsTest, UnpackTensor) {
   // Path is required for loading external data.
   // Using empty path here since this test does not test
   // external data utils
-  Path model_path;
+  std::filesystem::path model_path;
   bool_tensor_proto.set_data_type(TensorProto_DataType_BOOL);
   bool_tensor_proto.add_int32_data(1);
 
@@ -142,7 +142,7 @@ void CreateTensorWithExternalData(TensorProto_DataType type, const std::vector<T
 }
 
 template <typename T>
-void UnpackAndValidate(const TensorProto& tensor_proto, const Path& model_path, const std::vector<T>& test_data) {
+void UnpackAndValidate(const TensorProto& tensor_proto, const std::filesystem::path& model_path, const std::vector<T>& test_data) {
   // Unpack tensor with external data
   std::vector<T> val(test_data.size());
   auto st = utils::UnpackTensor(tensor_proto, model_path, val.data(), test_data.size());
@@ -155,7 +155,7 @@ void UnpackAndValidate(const TensorProto& tensor_proto, const Path& model_path,
 }
 
 template <>
-void UnpackAndValidate<bool>(const TensorProto& tensor_proto, const Path& model_path,
+void UnpackAndValidate<bool>(const TensorProto& tensor_proto, const std::filesystem::path& model_path,
                              const std::vector<bool>& test_data) {
   // Unpack tensor with external data
   auto arr = std::make_unique<bool[]>(test_data.size());
@@ -169,7 +169,7 @@ void UnpackAndValidate<bool>(const TensorProto& tensor_proto, const Path& model_
 }
 
 template <typename T>
-void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path) {
+void TestUnpackExternalTensor(TensorProto_DataType type, const std::filesystem::path& model_path) {
   // Create external data
   std::basic_string<ORTCHAR_T> filename(ORT_TSTR("tensor_XXXXXX"));
   TensorProto tensor_proto;
@@ -181,7 +181,7 @@ void TestUnpackExternalTensor(TensorProto_DataType type, const Path& model_path)
 }
 }  // namespace
 TEST(TensorProtoUtilsTest, UnpackTensorWithExternalData) {
-  Path model_path;
+  std::filesystem::path model_path;
   TestUnpackExternalTensor<float>(TensorProto_DataType_FLOAT, model_path);
   TestUnpackExternalTensor<double>(TensorProto_DataType_DOUBLE, model_path);
   TestUnpackExternalTensor<int32_t>(TensorProto_DataType_INT32, model_path);
@@ -225,7 +225,7 @@ static void TestConstantNodeConversion(const std::string& attrib_name,
       [&input, &add_data](AttributeProto& attrib) { add_data(attrib, input); });
 
   TensorProto tp;
-  Path model_path;
+  std::filesystem::path model_path;
   EXPECT_STATUS_OK(utils::ConstantNodeProtoToTensorProto(c, model_path, tp));
 
   EXPECT_THAT(get_data(tp), ::testing::ContainerEq(input));
@@ -311,7 +311,7 @@ template <typename T>
 static void TestConstantNodeConversionWithExternalData(TensorProto_DataType type) {
   // Create a constant node with external data
   auto test_data = CreateValues<T>();
-  Path model_path;
+  std::filesystem::path model_path;
   PathString tensor_filename(ORT_TSTR("tensor_XXXXXX"));
   auto c = CreateConstantNodeWithExternalData<T>(type, tensor_filename, test_data);
   std::unique_ptr<ORTCHAR_T, decltype(&DeleteFileFromDisk)> file_deleter(const_cast<ORTCHAR_T*>(tensor_filename.c_str()),
diff --git a/onnxruntime/test/framework/test_tensor_loader.cc b/onnxruntime/test/framework/test_tensor_loader.cc
index 71d70abceb82e..17edad73085c9 100644
--- a/onnxruntime/test/framework/test_tensor_loader.cc
+++ b/onnxruntime/test/framework/test_tensor_loader.cc
@@ -34,7 +34,7 @@ TEST(CApiTensorTest, load_simple_float_tensor_not_enough_space) {
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
 
   ASSERT_STATUS_NOT_OK(
-      utils::TensorProtoToOrtValue(Env::Default(), nullptr, p,
+      utils::TensorProtoToOrtValue(Env::Default(), std::filesystem::path(), p,
                                    MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info),
                                    value));
 }
@@ -55,7 +55,7 @@ TEST(CApiTensorTest, load_simple_float_tensor_membuffer) {
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
   ASSERT_STATUS_OK(
-      utils::TensorProtoToOrtValue(Env::Default(), nullptr, p,
+      utils::TensorProtoToOrtValue(Env::Default(), std::filesystem::path(), p,
                                    MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info),
                                    value));
   float* real_output;
@@ -83,7 +83,7 @@ TEST(CApiTensorTest, load_simple_float_tensor_allocator) {
   AllocatorPtr tmp_allocator = std::make_shared<CPUAllocator>();
   OrtValue value;
 
-  ASSERT_STATUS_OK(utils::TensorProtoToOrtValue(Env::Default(), nullptr, p, tmp_allocator, value));
+  ASSERT_STATUS_OK(utils::TensorProtoToOrtValue(Env::Default(), std::filesystem::path(), p, tmp_allocator, value));
 
   float* real_output;
   auto ort_st = g_ort->GetTensorMutableData(&value, (void**)&real_output);
@@ -139,7 +139,7 @@ static void run_external_data_test() {
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
   ASSERT_STATUS_OK(utils::TensorProtoToOrtValue(
-      Env::Default(), nullptr, p, MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info), value));
+      Env::Default(), std::filesystem::path(), p, MemBuffer(output.data(), output.size() * sizeof(float), cpu_memory_info), value));
 
   float* real_output;
   auto ort_st = g_ort->GetTensorMutableData(&value, (void**)&real_output);
@@ -190,7 +190,7 @@ TEST(CApiTensorTest, load_huge_tensor_with_external_data) {
   OrtValue value;
   OrtMemoryInfo cpu_memory_info(onnxruntime::CPU, OrtDeviceAllocator, OrtDevice(), 0, OrtMemTypeDefault);
   ASSERT_STATUS_OK(
-      utils::TensorProtoToOrtValue(Env::Default(), nullptr, p,
+      utils::TensorProtoToOrtValue(Env::Default(), std::filesystem::path(), p,
                                    MemBuffer(output.data(), output.size() * sizeof(int), cpu_memory_info), value));
 
   int* buffer;
diff --git a/onnxruntime/test/optimizer/initializer_test.cc b/onnxruntime/test/optimizer/initializer_test.cc
index ee93cfaa67e2a..902ea01a68f67 100644
--- a/onnxruntime/test/optimizer/initializer_test.cc
+++ b/onnxruntime/test/optimizer/initializer_test.cc
@@ -51,12 +51,12 @@ TEST(OptimizerInitializerTest, LoadExternalData) {
     return tensor_data;
   }();
   const gsl::span<const int> tensor_data_span = gsl::make_span(tensor_data);
-  const auto tensor_data_dir_path = Path::Parse(ToPathString("."));
-  const auto tensor_data_dir_relative_path = Path::Parse(ToPathString("OptimizerInitializerTest_LoadExternalData.bin"));
+  const std::filesystem::path tensor_data_dir_path = ORT_TSTR(".");
+  const std::filesystem::path tensor_data_dir_relative_path = ORT_TSTR("OptimizerInitializerTest_LoadExternalData.bin");
   ScopedFileDeleter file_deleter{};
 
   ASSERT_STATUS_OK(WriteExternalDataFile(
-      tensor_data_span, (tensor_data_dir_path / tensor_data_dir_relative_path).ToPathString(), file_deleter));
+      tensor_data_span, tensor_data_dir_path / tensor_data_dir_relative_path, file_deleter));
 
   const auto tensor_proto_base =
       [&]() {
@@ -65,7 +65,7 @@ TEST(OptimizerInitializerTest, LoadExternalData) {
         tensor_proto.add_dims(tensor_data.size());
         tensor_proto.set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT32);
         tensor_proto.set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
-        SetTensorProtoExternalData("location", ToUTF8String(tensor_data_dir_relative_path.ToPathString()), tensor_proto);
+        SetTensorProtoExternalData("location", ToUTF8String(tensor_data_dir_relative_path.native()), tensor_proto);
         SetTensorProtoExternalData("offset", "0", tensor_proto);
         SetTensorProtoExternalData("length", std::to_string(tensor_data.size() * sizeof(int32_t)), tensor_proto);
         return tensor_proto;
@@ -95,8 +95,8 @@ TEST(OptimizerInitializerTest, LoadExternalData) {
   check_initializer_load(0, tensor_data.size() + 1);
 
   // bad model paths
-  EXPECT_THROW(Initializer i(tensor_proto_base, Path{}), OnnxRuntimeException);
-  EXPECT_THROW(Initializer i(tensor_proto_base, Path::Parse(ToPathString("invalid/directory"))), OnnxRuntimeException);
+  EXPECT_THROW(Initializer i(tensor_proto_base, std::filesystem::path()), OnnxRuntimeException);
+  EXPECT_THROW(Initializer i(tensor_proto_base, ORT_TSTR("invalid/directory")), OnnxRuntimeException);
 
   // bad length
   {
@@ -165,7 +165,7 @@ void TestInitializerRawData() {
   tensor_proto.add_dims(4);
   tensor_proto.set_raw_data(data.data(), data.size() * sizeof(T));
 
-  const Initializer init(tensor_proto, Path());
+  const Initializer init(tensor_proto, std::filesystem::path());
 
   for (size_t idx = 0; idx < data.size(); idx++) {
     EXPECT_EQ(data[idx], init.data<T>()[idx]);
@@ -220,35 +220,35 @@ void TestInitializerDataField() {
     AddData<T>(data, idx, tensor_proto);
   }
 
-  const Initializer init(tensor_proto, Path());
+  const Initializer init(tensor_proto, std::filesystem::path());
 
   for (size_t idx = 0; idx < data.size(); idx++) {
     EXPECT_EQ(data[idx], init.data<T>()[idx]);
   }
 }
 
-#define TestInitializerDataFieldSpecialized(type)                \
-  template <>                                                    \
-  void TestInitializerDataField<type>() {                        \
-    std::vector<type> data{                                      \
-        0, 1, 2, 3,                                              \
-        4, 5, 6, 7,                                              \
-        8, 9, 10, 11};                                           \
-                                                                 \
-    ONNX_NAMESPACE::TensorProto tensor_proto;                    \
-    tensor_proto.set_data_type(GetTensorProtoDataType<type>());  \
-    tensor_proto.set_name("OptimizerInitializerTest_DataField"); \
-    tensor_proto.add_dims(3);                                    \
-    tensor_proto.add_dims(4);                                    \
-    for (size_t idx = 0; idx < data.size(); idx++) {             \
-      tensor_proto.add_##type##_data(data[idx]);                 \
-    }                                                            \
-                                                                 \
-    const Initializer init(tensor_proto, Path());                \
-                                                                 \
-    for (size_t idx = 0; idx < data.size(); idx++) {             \
-      EXPECT_EQ(data[idx], init.data<type>()[idx]);              \
-    }                                                            \
+#define TestInitializerDataFieldSpecialized(type)                  \
+  template <>                                                      \
+  void TestInitializerDataField<type>() {                          \
+    std::vector<type> data{                                        \
+        0, 1, 2, 3,                                                \
+        4, 5, 6, 7,                                                \
+        8, 9, 10, 11};                                             \
+                                                                   \
+    ONNX_NAMESPACE::TensorProto tensor_proto;                      \
+    tensor_proto.set_data_type(GetTensorProtoDataType<type>());    \
+    tensor_proto.set_name("OptimizerInitializerTest_DataField");   \
+    tensor_proto.add_dims(3);                                      \
+    tensor_proto.add_dims(4);                                      \
+    for (size_t idx = 0; idx < data.size(); idx++) {               \
+      tensor_proto.add_##type##_data(data[idx]);                   \
+    }                                                              \
+                                                                   \
+    const Initializer init(tensor_proto, std::filesystem::path()); \
+                                                                   \
+    for (size_t idx = 0; idx < data.size(); idx++) {               \
+      EXPECT_EQ(data[idx], init.data<type>()[idx]);                \
+    }                                                              \
   }
 
 typedef int64_t int64;
diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc
index 598147b81dd89..6bc0f8d105495 100644
--- a/onnxruntime/test/util/test_utils.cc
+++ b/onnxruntime/test/util/test_utils.cc
@@ -233,7 +233,7 @@ void CheckShapeEquality(const ONNX_NAMESPACE::TensorShapeProto* shape1,
 #if !defined(DISABLE_SPARSE_TENSORS)
 void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl::span<const int64_t> expected_indicies) {
   using namespace ONNX_NAMESPACE;
-  Path model_path;
+  std::filesystem::path model_path;
   std::vector<uint8_t> unpack_buffer;
   gsl::span<const int64_t> ind_span;
   std::vector<int64_t> converted_indices;

From 35ed41db972b097197e32f82ea390b77d6a7e3ef Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 17:32:02 +0000
Subject: [PATCH 02/50] update

---
 .../core/providers/qnn/builder/opbuilder/base_op_builder.cc   | 4 +---
 onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc         | 2 +-
 orttraining/orttraining/core/framework/checkpoint_common.cc   | 4 ++--
 orttraining/orttraining/models/runner/training_util.cc        | 2 +-
 orttraining/orttraining/training_api/checkpoint.cc            | 2 +-
 5 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
index ccedc28ae807e..2ee4d4b8f3b72 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
@@ -235,9 +235,7 @@ Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrap
 
   TensorShape new_tensor_shape(new_tensor_shape_dims);
   Tensor out_tensor = Tensor(tensor_dtype, new_tensor_shape, cpu_allocator);
-  onnxruntime::PathString model_path = qnn_model_wrapper.GetGraphViewer().ModelPath().ToPathString();
-  const ORTCHAR_T* model_path_str = model_path.empty() ? nullptr : model_path.c_str();
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(Env::Default(), model_path_str, initializer, in_tensor));
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), initializer, in_tensor));
   ORT_RETURN_IF_ERROR(Transpose::DoTranspose(permutations, in_tensor, out_tensor));
   onnx::TensorProto new_tensor_proto = onnxruntime::utils::TensorToTensorProto(out_tensor, "test");
   ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(new_tensor_proto, transposed_data));
diff --git a/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc b/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc
index f54a74c78c801..7289f92c65663 100644
--- a/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc
+++ b/onnxruntime/test/flatbuffers/flatbuffer_utils_test.cc
@@ -313,7 +313,7 @@ TEST(FlatbufferUtilsTest, ExternalWriteReadWithLoadOrtTensor) {
   std::vector<flatbuffers::Offset<fbs::Tensor>> fbs_tensors;
   for (const auto& initializer : initializers) {
     flatbuffers::Offset<fbs::Tensor> fbs_tensor;
-    ASSERT_STATUS_OK(SaveInitializerOrtFormat(builder, initializer, Path(), fbs_tensor, writer));
+    ASSERT_STATUS_OK(SaveInitializerOrtFormat(builder, initializer, std::filesystem::path(), fbs_tensor, writer));
     fbs_tensors.push_back(fbs_tensor);
   }
 
diff --git a/orttraining/orttraining/core/framework/checkpoint_common.cc b/orttraining/orttraining/core/framework/checkpoint_common.cc
index 295f17b894095..2c36895de2ac5 100644
--- a/orttraining/orttraining/core/framework/checkpoint_common.cc
+++ b/orttraining/orttraining/core/framework/checkpoint_common.cc
@@ -16,7 +16,7 @@ namespace onnxruntime {
 namespace training {
 
 /**
- * @brief Create OrtValues From TensorProto objects
+ * @brief Create OrtValues From TensorProto objects. Doesn't support external tensor.
  *
  * @param tensor_protos vector of TensorProto
  * @param name_to_ort_value saved results.
@@ -42,7 +42,7 @@ Status CreateOrtValuesFromTensorProtos(
                                            tensor_proto.data_type())
                                            ->GetElementType();
     auto p_tensor = std::make_unique<Tensor>(tensor_dtype, tensor_shape, cpu_allocator);
-    ORT_RETURN_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), nullptr, tensor_proto, *p_tensor));
+    ORT_RETURN_IF_ERROR(utils::TensorProtoToTensor(Env::Default(), std::filesystem::path(), tensor_proto, *p_tensor));
 
     OrtValue ort_value;
     ort_value.Init(p_tensor.release(),
diff --git a/orttraining/orttraining/models/runner/training_util.cc b/orttraining/orttraining/models/runner/training_util.cc
index 7764508d9a091..6af3bf4410065 100644
--- a/orttraining/orttraining/models/runner/training_util.cc
+++ b/orttraining/orttraining/models/runner/training_util.cc
@@ -53,7 +53,7 @@ common::Status DataSet::AddData(const vector<ONNX_NAMESPACE::TensorProto>& featu
     OrtMemoryInfo info("Cpu", OrtDeviceAllocator, OrtDevice{}, 0, OrtMemTypeDefault);
     std::unique_ptr<char[]> buffer = std::make_unique<char[]>(cpu_tensor_length);
     ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(
-        Env::Default(), nullptr, tensor_proto, MemBuffer(buffer.get(), cpu_tensor_length, info), ort_value));
+        Env::Default(), std::filesystem::path(), tensor_proto, MemBuffer(buffer.get(), cpu_tensor_length, info), ort_value));
 
     sample->push_back(ort_value);
     ortvalue_buffers_.emplace_back(std::move(buffer));
diff --git a/orttraining/orttraining/training_api/checkpoint.cc b/orttraining/orttraining/training_api/checkpoint.cc
index cb355ed04e907..56029b34c24d7 100644
--- a/orttraining/orttraining/training_api/checkpoint.cc
+++ b/orttraining/orttraining/training_api/checkpoint.cc
@@ -330,7 +330,7 @@ Status FromTensorProtos(gsl::span<const ONNX_NAMESPACE::TensorProto> trainable_t
         for (const auto& tensor_proto : tensor_protos) {
           flatbuffers::Offset<fbs::Tensor> fbs_tensor;
           ORT_RETURN_IF_ERROR(
-              fbs::utils::SaveInitializerOrtFormat(builder, tensor_proto, Path(), fbs_tensor, external_data_writer));
+              fbs::utils::SaveInitializerOrtFormat(builder, tensor_proto, std::filesystem::path(), fbs_tensor, external_data_writer));
           fbs_tensors.push_back(fbs_tensor);
         }
 

From a7fa25330458a04e505cc8191158faff861a6a70 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 17:49:28 +0000
Subject: [PATCH 03/50] update

---
 .../core/providers/qnn/builder/opbuilder/slice_op_builder.cc  | 4 +---
 onnxruntime/core/providers/qnn/qnn_execution_provider.cc      | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
index 88c94581a8887..410dc7629c4f8 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
@@ -92,9 +92,7 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel
   Tensor tensor(dtype, shape, std::make_shared<CPUAllocator>());
 
   // Deserialize initializer into Tensor.
-  onnxruntime::PathString model_path = qnn_model_wrapper.GetGraphViewer().ModelPath().ToPathString();
-  const ORTCHAR_T* model_path_str = model_path.empty() ? nullptr : model_path.c_str();
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), model_path_str,
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(),
                                                               *initializer_proto, tensor));
 
   Status status;
diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
index 26049fd9bdc4a..4f86f1257a434 100644
--- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
+++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
@@ -715,7 +715,7 @@ Status QNNExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused
     const onnxruntime::GraphViewer& graph_viewer_0(fused_nodes_and_graphs[0].filtered_graph);
     is_ctx_file_exist = qnn::ValidateContextCacheFilePath(is_qnn_ctx_model,
                                                           context_cache_path_cfg_,
-                                                          graph_viewer_0.ModelPath().ToPathString(),
+                                                          graph_viewer_0.ModelPath().native(),
                                                           context_cache_path);
   }
 

From 70209c0664a42d497da1f03110bec64f9a1fc7f0 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 17:52:01 +0000
Subject: [PATCH 04/50] udpate

---
 .../core/providers/nnapi/nnapi_builtin/builders/helper.cc       | 2 +-
 .../core/providers/nnapi/nnapi_builtin/builders/helper.h        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index 745504ca04941..c8ded32cbb783 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -185,7 +185,7 @@ bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit) {
 }
 
 common::Status GetQuantizationScaleAndZeroPoint(const GraphViewer& graph_viewer, const NodeUnitIODef& io_def,
-                                                const Path& model_path, float& scale, int32_t& zero_point) {
+                                                const std::filesystem::path& model_path, float& scale, int32_t& zero_point) {
   scale = 0.0f;
   zero_point = 0;
 
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
index a606b8aceb63d..1e3668160fbcf 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -132,7 +132,7 @@ bool IsQuantizedBinaryOp(QuantizedOpType quant_op_type);
 bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit);
 
 common::Status GetQuantizationScaleAndZeroPoint(
-    const GraphViewer& graph_viewer, const NodeUnitIODef& io_def, const Path& model_path,
+    const GraphViewer& graph_viewer, const NodeUnitIODef& io_def, const std::filesystem::path& model_path,
     float& scale, int32_t& zero_point);
 
 common::Status GetQuantizationScaleAndZeroPoint(

From 751de2630b1cac3b1887219d83e249f75aea5bb4 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 18:28:46 +0000
Subject: [PATCH 05/50] Fix bug

---
 onnxruntime/core/framework/tensorprotoutils.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 4426cc788e9a9..9395b0eb6d932 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -880,7 +880,8 @@ static Status GetFileContent(
     void*& raw_buffer, OrtCallback& deleter) {
   // query length if it is 0
   if (length == 0) {
-    length = std::filesystem::file_size(file_path);
+    // The return type of std::filesystem::file_size is uintmax_t which could be bigger than size_t
+    length = narrow<size_t>(std::filesystem::file_size(file_path));
   }
 
   // first, try to map into memory
@@ -913,11 +914,10 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
   if (!model_path.empty()) {
     ORT_RETURN_IF_ERROR(GetDirNameFromFilePath(model_path, tensor_proto_dir));
   }
-  const ORTCHAR_T* t_prot_dir_s = tensor_proto_dir.size() == 0 ? nullptr : tensor_proto_dir.c_str();
   std::basic_string<ORTCHAR_T> external_data_file_path;
   FileOffsetType file_offset;
   SafeInt<size_t> raw_data_safe_len = 0;
-  ORT_RETURN_IF_ERROR(GetExternalDataInfo(tensor_proto, t_prot_dir_s, external_data_file_path, file_offset,
+  ORT_RETURN_IF_ERROR(GetExternalDataInfo(tensor_proto, tensor_proto_dir, external_data_file_path, file_offset,
                                           raw_data_safe_len));
 
   if (external_data_file_path == onnxruntime::utils::kTensorProtoMemoryAddressTag) {

From 1168d5a4ac2d1dc0fd9dce171e641547f2dc315a Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 19:39:38 +0000
Subject: [PATCH 06/50] update

---
 .../dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp     | 2 +-
 .../dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h       | 2 +-
 .../nnapi/nnapi_builtin/builders/op_builder_helpers.cc        | 4 ++--
 .../nnapi/nnapi_builtin/builders/op_builder_helpers.h         | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
index f29fbc7a1a65b..52b619c51ea1e 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
@@ -3018,7 +3018,7 @@ namespace Windows::AI::MachineLearning::Adapter
 
     std::tuple<std::unique_ptr<std::byte[]>, size_t> UnpackTensor(
         const onnx::TensorProto& initializer,
-        const onnxruntime::Path& modelPath)
+        const std::filesystem::path& modelPath)
     {
         std::unique_ptr<std::byte[]> unpackedTensor;
         size_t tensorByteSize = 0;
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
index 59e253e88457a..401b996921f50 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
@@ -681,5 +681,5 @@ bool TryGetStaticInputShapes(const onnxruntime::Node& node, EdgeShapes& inputSha
 bool TryGetStaticOutputShapes(const onnxruntime::Node& node, EdgeShapes& outputShapes);
 bool ContainsEmptyDimensions(const EdgeShapes& shapes, gsl::span<const uint32_t> ignoredShapeIndices = gsl::span<const uint32_t>());
 
-std::tuple<std::unique_ptr<std::byte[]>, size_t> UnpackTensor(const onnx::TensorProto& initializer, const onnxruntime::Path& modelPath);
+std::tuple<std::unique_ptr<std::byte[]>, size_t> UnpackTensor(const onnx::TensorProto& initializer, const std::filesystem::path& modelPath);
 }    // namespace Windows::AI::MachineLearning::Adapter
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
index dab7bccf43396..c1770e0119b25 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.cc
@@ -1142,7 +1142,7 @@ bool IsQuantizationScaleSupported(const GraphViewer& graph_viewer,
 bool IsQuantizationZeroPointSupported(const GraphViewer& graph_viewer,
                                       const NodeUnitIODef& io_def,
                                       const std::string& op_type,
-                                      const Path& model_path,
+                                      const std::filesystem::path& model_path,
                                       bool is_quant_matmul,
                                       bool is_conv_matmul_u8s8_weight) {
   // zero point is optional here
@@ -1282,7 +1282,7 @@ bool IsQuantizedIOSupported(const GraphViewer& graph_viewer, const NodeUnit& nod
 bool HasRequiredScaleAndZeroPoint(const GraphViewer& graph_viewer,
                                   const std::string& op_desc,
                                   const NodeUnitIODef& io_def,
-                                  const Path& path,
+                                  const std::filesystem::path& path,
                                   float required_scale, int32_t required_zp) {
   float scale = 0.0f;
   int32_t zp = 0;
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h
index 0844857a06d61..8f2fefd8deb7d 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h
@@ -200,7 +200,7 @@ bool IsQuantizationScaleSupported(const GraphViewer& graph_viewer,
 bool IsQuantizationZeroPointSupported(const GraphViewer& graph_viewer,
                                       const NodeUnitIODef& io_def,
                                       const std::string& op_type,
-                                      const Path& model_path,
+                                      const std::filesystem::path& model_path,
                                       bool is_quant_matmul,
                                       bool is_conv_matmul_u8s8_weight);
 
@@ -214,7 +214,7 @@ bool IsQuantizedIOSupported(const GraphViewer& graph_viewer, const NodeUnit& nod
 bool HasRequiredScaleAndZeroPoint(const GraphViewer& graph_viewer,
                                   const std::string& op_desc,
                                   const NodeUnitIODef& io_def,
-                                  const Path& path,
+                                  const std::filesystem::path& path,
                                   float required_scale, int32_t required_zp);
 
 // performs broadcasting operation on two shapes to make them compatible

From 39ccb7695893d0b5e5e6ade2418a461c88e24b91 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 20:38:04 +0000
Subject: [PATCH 07/50] update

---
 .../dml/DmlExecutionProvider/src/Utility.h        | 15 ++++-----------
 .../tensorrt/tensorrt_execution_provider.cc       |  6 +-----
 .../tensorrt/tensorrt_execution_provider_utils.h  |  4 ++--
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h
index 02166f992449e..f779cec19fd5b 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h
@@ -16,21 +16,14 @@ namespace Dml
         return g_converterToUtf16.from_bytes(str.data());
     }
 
-    static inline std::wstring GetModelName(const onnxruntime::Path& modelPath)
+    static inline std::wstring GetModelName(const std::filesystem::path& modelPath)
     {
-        if (modelPath.GetComponents().empty())
+        if (modelPath.empty() || !modelPath.has_filename() || !modelPath.has_extension())
         {
             return L"";
         }
         
-        const onnxruntime::PathString& pathString = modelPath.GetComponents().back();
-        size_t dotPosition = pathString.find_last_of('.');
-        if (dotPosition == std::string::npos)
-        {
-            return L"";
-        }
-
-        return pathString.substr(0, dotPosition);
+	return modelPath.stem().native();
     }
 
     static inline std::wstring GetSanitizedFileName(std::wstring_view name)
@@ -138,4 +131,4 @@ namespace StringUtil
 
         return {};
     }
-}
\ No newline at end of file
+}
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index dff74a404a456..68bdb2ef60ef5 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -2318,12 +2318,8 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   // Construct subgraph capability from node list
   std::vector<std::unique_ptr<ComputeCapability>> result;
   // Get ModelPath
-  const auto& path_string = graph.ModelPath().ToPathString();
-#ifdef _WIN32
-  wcstombs_s(nullptr, model_path_, sizeof(model_path_), path_string.c_str(), sizeof(model_path_));
-#else
+  const auto& path_string = graph.ModelPath().string();
   strcpy(model_path_, path_string.c_str());
-#endif
 
   // If the model consists of only a single "EPContext" contrib op, it means TRT EP can fetch the precompiled engine info from the node and
   // load the engine directly without having to go through the processes of graph proto reconstruction, calling TRT parser and engine compilation.
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
index df12d90338782..eb47a12d13a40 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
@@ -539,8 +539,8 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
   // Use the model's file name instead of the entire path to avoid cache regeneration if path changes
   const auto& model_path_components = main_graph.ModelPath().GetComponents();
 
-  if (!model_path_components.empty()) {
-    std::string model_name = PathToUTF8String(model_path_components.back());
+  if (!main_graph.ModelPath().empty()) {
+    std::string model_name = PathToUTF8String(main_graph.ModelPath().filename());
 
     LOGS_DEFAULT(INFO) << "[TensorRT EP] Model name is " << model_name;
     // Ensure enough characters are hashed in case model names are too short

From 6e4e440c6fad0865360e29ea9a22c5230b276844 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 13:55:10 -0700
Subject: [PATCH 08/50] update

---
 cmake/external/onnx                                         | 2 +-
 .../DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp | 6 +++---
 .../DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h   | 2 +-
 .../dml/DmlExecutionProvider/src/GraphDescBuilder.cpp       | 2 +-
 .../dml/DmlExecutionProvider/src/GraphDescBuilder.h         | 2 +-
 .../dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp   | 6 +++---
 .../dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h     | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cmake/external/onnx b/cmake/external/onnx
index 595228d99e397..990217f043af7 160000
--- a/cmake/external/onnx
+++ b/cmake/external/onnx
@@ -1 +1 @@
-Subproject commit 595228d99e3977ac27cb79d5963adda262af99ad
+Subproject commit 990217f043af7222348ca8f0301e17fa7b841781
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp
index 10b8b7fe42f86..2f110ba339beb 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.cpp
@@ -20,7 +20,7 @@ namespace Dml
         DmlRuntimeFusedGraphKernel(
             const onnxruntime::OpKernelInfo& kernelInfo,
             std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
-            const onnxruntime::Path& modelPath,
+            const std::filesystem::path& modelPath,
             std::vector<std::shared_ptr<onnxruntime::Node>>&& subgraphNodes,
             std::vector<const onnxruntime::NodeArg*>&& subgraphInputs,
             std::vector<const onnxruntime::NodeArg*>&& subgraphOutputs,
@@ -314,7 +314,7 @@ namespace Dml
 
         mutable std::optional<DML_BUFFER_BINDING> m_persistentResourceBinding;
         std::shared_ptr<const onnxruntime::IndexedSubGraph> m_indexedSubGraph;
-        const onnxruntime::Path& m_modelPath;
+        const std::filesystem::path& m_modelPath;
 
         std::vector<std::shared_ptr<onnxruntime::Node>> m_subgraphNodes;
         std::vector<const onnxruntime::NodeArg*> m_subgraphInputs;
@@ -341,7 +341,7 @@ namespace Dml
     onnxruntime::OpKernel* CreateRuntimeFusedGraphKernel(
         const onnxruntime::OpKernelInfo& info,
         std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
-        const onnxruntime::Path& modelPath,
+        const std::filesystem::path& modelPath,
         std::vector<std::shared_ptr<onnxruntime::Node>>&& subgraphNodes,
         std::vector<const onnxruntime::NodeArg*>&& subgraphInputs,
         std::vector<const onnxruntime::NodeArg*>&& subgraphOutputs,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
index d679c5aa5667c..e83fa628d44a8 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
@@ -10,7 +10,7 @@ namespace Dml
     onnxruntime::OpKernel* CreateRuntimeFusedGraphKernel(
         const onnxruntime::OpKernelInfo& info,
         std::shared_ptr<const onnxruntime::IndexedSubGraph> indexedSubGraph,
-        const onnxruntime::Path& modelPath,
+        const std::filesystem::path& modelPath,
         std::vector<std::shared_ptr<onnxruntime::Node>>&& subgraphNodes,
         std::vector<const onnxruntime::NodeArg*>&& subgraphInputs,
         std::vector<const onnxruntime::NodeArg*>&& subgraphOutputs,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp
index 2bd9377e4c2fa..387767f821b3e 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp
@@ -232,7 +232,7 @@ namespace Dml::GraphDescBuilder
         const std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>>& isInitializerTransferable,
         const std::unordered_map<std::string, GraphNodeProperties>& graphNodePropertyMap,
         const ExecutionProviderImpl* executionHandle,
-        const onnxruntime::Path& modelPath,
+        const std::filesystem::path& modelPath,
         gsl::span<const onnxruntime::Node* const> subgraphNodes,
         gsl::span<const onnxruntime::NodeArg* const> subgraphInputs,
         gsl::span<const onnxruntime::NodeArg* const> subgraphOutputs,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
index 4055984b40405..9728dc47c8f3c 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
@@ -41,7 +41,7 @@ namespace Dml
             const std::unordered_map<std::string, std::pair<const ONNX_NAMESPACE::TensorProto*, bool>>& isInitializerTransferable,
             const std::unordered_map<std::string, GraphNodeProperties>& graphNodePropertyMap,
             const ExecutionProviderImpl* executionHandle,
-            const onnxruntime::Path& modelPath,
+            const std::filesystem::path& modelPath,
             gsl::span<const onnxruntime::Node* const> subgraphNodes,
             gsl::span<const onnxruntime::NodeArg* const> subgraphInputs,
             gsl::span<const onnxruntime::NodeArg* const> subgraphOutputs,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
index 52b619c51ea1e..0a2a5bbcbedaf 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.cpp
@@ -842,7 +842,7 @@ namespace Windows::AI::MachineLearning::Adapter
               const onnx::TensorProto* tensorProto = &attributeProto->t();
 
               // An empty path is used as external weights are not currently supported in this case
-              Microsoft::WRL::ComPtr<IMLOperatorTensor> tensorWrapper = wil::MakeOrThrow<OnnxTensorWrapper>(const_cast<onnx::TensorProto*>(tensorProto), onnxruntime::Path());
+              Microsoft::WRL::ComPtr<IMLOperatorTensor> tensorWrapper = wil::MakeOrThrow<OnnxTensorWrapper>(const_cast<onnx::TensorProto*>(tensorProto), std::filesystem::path());
               *tensor = tensorWrapper.Detach();
               return S_OK;
             }
@@ -1545,7 +1545,7 @@ namespace Windows::AI::MachineLearning::Adapter
         ORT_CATCH_RETURN
     }
 
-    OnnxTensorWrapper::OnnxTensorWrapper(onnx::TensorProto* impl, const onnxruntime::Path& modelPath) : m_impl(impl)
+    OnnxTensorWrapper::OnnxTensorWrapper(onnx::TensorProto* impl, const std::filesystem::path& modelPath) : m_impl(impl)
     {
         // The tensor may be stored as raw data or in typed fields.
         if (impl->data_location() == onnx::TensorProto_DataLocation_EXTERNAL)
@@ -2826,7 +2826,7 @@ namespace Windows::AI::MachineLearning::Adapter
             {
                 // An empty path is used as external weights are not currently supported in this case
                 Microsoft::WRL::ComPtr<IMLOperatorTensor> tensorWrapper = wil::MakeOrThrow<OnnxTensorWrapper>(
-                    const_cast<onnx::TensorProto*>(ctx->getInputData(index)), onnxruntime::Path());
+                    const_cast<onnx::TensorProto*>(ctx->getInputData(index)), std::filesystem::path());
                 return tensorWrapper;
             }
         );
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
index 401b996921f50..4708de1651d39 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
@@ -283,7 +283,7 @@ class OnnxTensorWrapper : public WRL::Base<IMLOperatorTensor>, public Closable
  public:
     OnnxTensorWrapper() = default;
 
-    OnnxTensorWrapper(onnx::TensorProto* impl, const onnxruntime::Path& modelPath);
+    OnnxTensorWrapper(onnx::TensorProto* impl, const std::filesystem::path& modelPath);
 
     uint32_t STDMETHODCALLTYPE GetDimensionCount() const noexcept override;
 

From 042a90eab0cbd4917365aeedaf9f6b9ed766ae8d Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 13:56:11 -0700
Subject: [PATCH 09/50] revert

---
 cmake/external/onnx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/external/onnx b/cmake/external/onnx
index 990217f043af7..595228d99e397 160000
--- a/cmake/external/onnx
+++ b/cmake/external/onnx
@@ -1 +1 @@
-Subproject commit 990217f043af7222348ca8f0301e17fa7b841781
+Subproject commit 595228d99e3977ac27cb79d5963adda262af99ad

From 80872cea110a092f22d3c7728a1d99184a951092 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 4 Jun 2024 22:55:18 +0000
Subject: [PATCH 10/50] update

---
 .../providers/tensorrt/tensorrt_execution_provider_utils.h    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
index eb47a12d13a40..95abcd1bad2b8 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
@@ -537,9 +537,7 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
   };
 
   // Use the model's file name instead of the entire path to avoid cache regeneration if path changes
-  const auto& model_path_components = main_graph.ModelPath().GetComponents();
-
-  if (!main_graph.ModelPath().empty()) {
+  if (main_graph.ModelPath().has_filename()) {
     std::string model_name = PathToUTF8String(main_graph.ModelPath().filename());
 
     LOGS_DEFAULT(INFO) << "[TensorRT EP] Model name is " << model_name;

From eff34807accd2b09e5127c98a73d62b54145f3e9 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 5 Jun 2024 00:28:00 +0000
Subject: [PATCH 11/50] update

---
 onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc | 2 +-
 onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
index 2171ce056e029..42788f2960197 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.cc
@@ -29,7 +29,7 @@ bool GraphHasCtxNode(const GraphViewer& graph_viewer) {
   return false;
 }
 
-const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer) {
+const std::filesystem::path& GetModelPath(const GraphViewer& graph_viewer) {
   // find the top level graph
   const Graph* cur_graph = &graph_viewer.GetGraph();
   while (cur_graph->IsSubgraph()) {
diff --git a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
index f8fefc12c3453..3be08d043da48 100644
--- a/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
+++ b/onnxruntime/core/providers/tensorrt/onnx_ctx_model_helper.h
@@ -24,7 +24,7 @@ static const std::string EPCONTEXT_WARNING =
                                               for the best model loading time";
 
 bool GraphHasCtxNode(const GraphViewer& graph_viewer);
-const onnxruntime::Path& GetModelPath(const GraphViewer& graph_viewer);
+const std::filesystem::path& GetModelPath(const GraphViewer& graph_viewer);
 std::filesystem::path GetPathOrParentPathOfCtxModel(const std::string& ep_context_file_path);
 ONNX_NAMESPACE::ModelProto* CreateCtxModel(const GraphViewer& graph_viewer,
                                            const std::string engine_cache_path,

From 802a3b8ba986afa2310ace9dc78197036d16089f Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 5 Jun 2024 01:27:31 +0000
Subject: [PATCH 12/50] update

---
 .../core/providers/tensorrt/tensorrt_execution_provider.cc     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 68bdb2ef60ef5..54e9c68fa0b86 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -2319,7 +2319,8 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   std::vector<std::unique_ptr<ComputeCapability>> result;
   // Get ModelPath
   const auto& path_string = graph.ModelPath().string();
-  strcpy(model_path_, path_string.c_str());
+  strncpy(model_path_, path_string.c_str(), sizeof(model_path_) - 1);
+  model_path_[sizeof(model_path_) - 1] = '\0';
 
   // If the model consists of only a single "EPContext" contrib op, it means TRT EP can fetch the precompiled engine info from the node and
   // load the engine directly without having to go through the processes of graph proto reconstruction, calling TRT parser and engine compilation.

From 9ed5dd603f0f5631b06594e24b4add769835b40d Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 5 Jun 2024 09:17:54 -0700
Subject: [PATCH 13/50] update

---
 cmake/onnxruntime_providers_tensorrt.cmake     |  1 -
 .../tensorrt/tensorrt_execution_provider.cc    | 18 +++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/cmake/onnxruntime_providers_tensorrt.cmake b/cmake/onnxruntime_providers_tensorrt.cmake
index e56de0c7124dc..90203216600fa 100644
--- a/cmake/onnxruntime_providers_tensorrt.cmake
+++ b/cmake/onnxruntime_providers_tensorrt.cmake
@@ -13,7 +13,6 @@
   set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
   set(PROTOBUF_LIBRARY ${PROTOBUF_LIB})
   if (WIN32)
-    add_definitions(-D_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING=1)
     set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd4324 /wd4701 /wd4804 /wd4702 /wd4458 /wd4703")
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 54e9c68fa0b86..6cc2b489e5e2c 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -2319,7 +2319,11 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   std::vector<std::unique_ptr<ComputeCapability>> result;
   // Get ModelPath
   const auto& path_string = graph.ModelPath().string();
+#ifdef _WIN32
+  strncpy_s(model_path_, path_string.c_str(), sizeof(model_path_) - 1);
+#else
   strncpy(model_path_, path_string.c_str(), sizeof(model_path_) - 1);
+#endif
   model_path_[sizeof(model_path_) - 1] = '\0';
 
   // If the model consists of only a single "EPContext" contrib op, it means TRT EP can fetch the precompiled engine info from the node and
@@ -2507,13 +2511,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
 /**
  * Refit the weight-stripped engine
  */
-common::Status TensorrtExecutionProvider::RefitEngine(std::string onnx_model_filename,
-                                                      std::string& onnx_model_folder_path,
-                                                      std::string& weight_stripped_engine_cath_path,
-                                                      bool path_check,
-                                                      nvinfer1::ICudaEngine* trt_engine,
-                                                      bool serialize_refitted_engine,
-                                                      bool detailed_build_log) {
+common::Status TensorrtExecutionProvider::RefitEngine([[maybe_unused]] std::string onnx_model_filename,
+                                                      [[maybe_unused]] std::string& onnx_model_folder_path,
+                                                      [[maybe_unused]] std::string& weight_stripped_engine_cath_path,
+                                                      [[maybe_unused]] bool path_check,
+                                                      [[maybe_unused]] nvinfer1::ICudaEngine* trt_engine,
+                                                      [[maybe_unused]] bool serialize_refitted_engine,
+                                                      [[maybe_unused]] bool detailed_build_log) {
 #if NV_TENSORRT_MAJOR >= 10
   std::filesystem::path onnx_model_path{onnx_model_folder_path};
   onnx_model_path.append(onnx_model_filename);

From 7e2e9cb83d0adfeae650fde2725fb266aeab94c8 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 02:41:26 +0000
Subject: [PATCH 14/50] update

---
 onnxruntime/core/framework/graph_partitioner.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index ea4f71440d0f8..533b15c4e3a39 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -659,13 +659,13 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
   };
 
   std::filesystem::path context_cache_path;
-  const std::filesystem::path& model_pathstring = graph.ModelPath();
+  const std::filesystem::path& model_path = graph.ModelPath();
 
   if (!ep_context_path.empty()) {
     // On Windows here we explicitly cast the ep_context_path string to UTF-16 because we assume ep_context_path is in UTF-8
     context_cache_path = ToPathString(ep_context_path);
-  } else if (!model_pathstring.empty()) {
-    context_cache_path = model_pathstring / ORT_TSTR("_ctx.onnx");
+  } else if (!model_path.empty()) {
+    context_cache_path = model_path / ORT_TSTR("_ctx.onnx");
   }
 
   {

From f40277214d9bc7a13626099bcfab794e786604db Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 13:20:47 -0700
Subject: [PATCH 15/50] Update
 onnxruntime/core/framework/model_metadef_id_generator.cc

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 onnxruntime/core/framework/model_metadef_id_generator.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/framework/model_metadef_id_generator.cc b/onnxruntime/core/framework/model_metadef_id_generator.cc
index c3f7382e6ef7e..8b1d1f4f304c9 100644
--- a/onnxruntime/core/framework/model_metadef_id_generator.cc
+++ b/onnxruntime/core/framework/model_metadef_id_generator.cc
@@ -40,7 +40,7 @@ int ModelMetadefIdGenerator::GenerateId(const onnxruntime::GraphViewer& graph_vi
 
     // prefer path the model was loaded from
     // this may not be available if the model was loaded from a stream or in-memory bytes
-    const auto& model_path_str = main_graph.ModelPath().string();
+    const auto model_path_str = main_graph.ModelPath().string();
     if (!model_path_str.empty()) {
       MurmurHash3::x86_128(model_path_str.data(), gsl::narrow_cast<int32_t>(model_path_str.size()), hash[0], &hash);
     } else {

From 8f88a65780bc83dd0325e115bfa0358676925c07 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 13:29:35 -0700
Subject: [PATCH 16/50] Update onnxruntime/core/graph/graph.cc

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 onnxruntime/core/graph/graph.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index f96bdea71f3b9..fafc208629e7a 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3984,7 +3984,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
     external_file_path = destination_file_path.parent_path() / external_file_path;
   }
 
-  std::ofstream external_stream(external_file_path.native(), std::ofstream::out | std::ofstream::binary);
+  std::ofstream external_stream(external_file_path, std::ofstream::out | std::ofstream::binary);
   ORT_ENFORCE(external_stream.is_open());
   int64_t external_offset = 0;
 

From 6964a700492b96ae557edfd28ba98f1336e5b732 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 20:42:57 +0000
Subject: [PATCH 17/50] update

---
 include/onnxruntime/core/graph/graph.h        |   2 +-
 onnxruntime/core/framework/session_options.h  |   1 +
 .../core/framework/tensorprotoutils.cc        | 339 +++++++++---------
 onnxruntime/core/framework/tensorprotoutils.h |   4 +-
 4 files changed, 167 insertions(+), 179 deletions(-)

diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index ae57dd712fd68..cfdd4fe96de7e 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -1141,7 +1141,7 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
 
   /** Gets the GraphProto representation of this Graph
   @params external_file_name name of the binary file to use for initializers. Must be a UTF-8 string.
-  @params destination_file_path path of the model file.
+  @param file_path path of the model file.
   @param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
   in the external file. Initializer smaller than this threshold are included in the onnx file.
   @returns GraphProto serialization of the graph.
diff --git a/onnxruntime/core/framework/session_options.h b/onnxruntime/core/framework/session_options.h
index bc277ef24a9fd..13da26d5e6053 100644
--- a/onnxruntime/core/framework/session_options.h
+++ b/onnxruntime/core/framework/session_options.h
@@ -7,6 +7,7 @@
 #include <vector>
 #include <iostream>
 #include <codecvt>
+#include <filesystem>
 #include "core/common/gsl.h"
 #include "core/common/inlined_containers.h"
 #include "core/framework/config_options.h"
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 9395b0eb6d932..cfe6c2e520b99 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -111,8 +111,8 @@ namespace onnxruntime {
 namespace {
 
 // This function doesn't support string tensors
-static Status UnpackTensorWithRawDataImpl(const void* raw_data, size_t raw_data_len,
-                                          size_t expected_num_elements, size_t element_size,
+static Status UnpackTensorWithRawDataImpl(const void* raw_data, size_t raw_data_len, size_t expected_num_elements,
+                                          size_t element_size,
                                           /*out*/ unsigned char* p_data) {
   auto src = gsl::make_span<const unsigned char>(static_cast<const unsigned char*>(raw_data), raw_data_len);
   auto dst = gsl::make_span<unsigned char>(p_data, expected_num_elements * element_size);
@@ -152,8 +152,8 @@ Status UnpackTensorWithRawData(const void* raw_data, size_t raw_data_len, size_t
     size_t num_packed_pairs = INT4_TYPE::CalcNumInt4Pairs(expected_num_elements);                                    \
     ORT_RETURN_IF_NOT(num_packed_pairs == raw_data_len, "Unexpected number of packed int4 pairs");                   \
                                                                                                                      \
-    gsl::span<const INT4_TYPE> src_span = gsl::make_span(reinterpret_cast<const INT4_TYPE*>(raw_data),               \
-                                                         num_packed_pairs);                                          \
+    gsl::span<const INT4_TYPE> src_span =                                                                            \
+        gsl::make_span(reinterpret_cast<const INT4_TYPE*>(raw_data), num_packed_pairs);                              \
     gsl::span<INT4_TYPE> dst_span = gsl::make_span(p_data, num_packed_pairs);                                        \
                                                                                                                      \
     std::memcpy(dst_span.data(), src_span.data(), num_packed_pairs);                                                 \
@@ -180,13 +180,15 @@ static Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_prot
 
   const auto& location = external_data_info->GetRelPath();
 
-  external_file_path = location == onnxruntime::utils::kTensorProtoMemoryAddressTag ? std::filesystem::path(location) : (tensor_proto_dir / location);
+  external_file_path = location == onnxruntime::utils::kTensorProtoMemoryAddressTag ? std::filesystem::path(location)
+                                                                                    : (tensor_proto_dir / location);
 
   ORT_RETURN_IF_ERROR(onnxruntime::utils::GetSizeInBytesFromTensorProto<0>(tensor_proto, &tensor_byte_size));
   const size_t external_data_length = external_data_info->GetLength();
   ORT_RETURN_IF_NOT(external_data_length == 0 || external_data_length == tensor_byte_size,
-                    "TensorProto: ", tensor_proto.name(), " external data size mismatch. Computed size: ",
-                    *&tensor_byte_size, ", external_data.length: ", external_data_length);
+                    "TensorProto: ", tensor_proto.name(),
+                    " external data size mismatch. Computed size: ", *&tensor_byte_size,
+                    ", external_data.length: ", external_data_length);
 
   file_offset = external_data_info->GetOffset();
 
@@ -203,12 +205,8 @@ Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto
   std::basic_string<ORTCHAR_T> external_file_path;
   onnxruntime::FileOffsetType file_offset;
   SafeInt<size_t> tensor_byte_size;
-  ORT_RETURN_IF_ERROR(GetExternalDataInfo(
-      tensor_proto,
-      tensor_proto_dir,
-      external_file_path,
-      file_offset,
-      tensor_byte_size));
+  ORT_RETURN_IF_ERROR(
+      GetExternalDataInfo(tensor_proto, tensor_proto_dir, external_file_path, file_offset, tensor_byte_size));
 
   unpacked_tensor.resize(tensor_byte_size);
   ORT_RETURN_IF_ERROR(onnxruntime::Env::Default().ReadFileIntoBuffer(
@@ -223,9 +221,8 @@ Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto
 // TODO(unknown): Change the current interface to take Path object for model path
 // so that validating and manipulating path for reading external data becomes easy
 Status TensorProtoToOrtValueImpl(const Env& env, const std::filesystem::path& model_path,
-                                 const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                                 const MemBuffer* m, AllocatorPtr alloc,
-                                 OrtValue& value) {
+                                 const ONNX_NAMESPACE::TensorProto& tensor_proto, const MemBuffer* m,
+                                 AllocatorPtr alloc, OrtValue& value) {
   if (m && m->GetBuffer() == nullptr) {
     return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "MemBuffer has not been allocated.");
   }
@@ -291,34 +288,35 @@ Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
                                           reinterpret_cast<unsigned char*>(p_data));
 }
 
-#define DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(INT4_TYPE)                                                               \
-  template <>                                                                                                                 \
-  Status UnpackTensorWithExternalData<INT4_TYPE>(const ONNX_NAMESPACE::TensorProto& tensor,                                   \
-                                                 const std::filesystem::path& tensor_proto_dir, size_t expected_num_elements, \
-                                                 /*out*/ INT4_TYPE* p_data) {                                                 \
-    static_assert(std::is_trivially_copyable<INT4_TYPE>::value, "T must be trivially copyable");                              \
-                                                                                                                              \
-    ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");                                                                    \
-    std::vector<uint8_t> unpacked_tensor;                                                                                     \
-    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));                                \
-                                                                                                                              \
-    size_t num_packed_pairs = INT4_TYPE::CalcNumInt4Pairs(expected_num_elements);                                             \
-    ORT_RETURN_IF_NOT(num_packed_pairs == unpacked_tensor.size(), "Unexpected number of packed int4 pairs");                  \
-                                                                                                                              \
-    gsl::span<const INT4_TYPE> src_span = gsl::make_span(reinterpret_cast<const INT4_TYPE*>(unpacked_tensor.data()),          \
-                                                         num_packed_pairs);                                                   \
-    gsl::span<INT4_TYPE> dst_span = gsl::make_span(p_data, expected_num_elements);                                            \
-                                                                                                                              \
-    std::memcpy(dst_span.data(), src_span.data(), num_packed_pairs);                                                          \
-                                                                                                                              \
-    return Status::OK();                                                                                                      \
+#define DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(INT4_TYPE)                                              \
+  template <>                                                                                                \
+  Status UnpackTensorWithExternalData<INT4_TYPE>(const ONNX_NAMESPACE::TensorProto& tensor,                  \
+                                                 const std::filesystem::path& tensor_proto_dir,              \
+                                                 size_t expected_num_elements, /*out*/ INT4_TYPE* p_data) {  \
+    static_assert(std::is_trivially_copyable<INT4_TYPE>::value, "T must be trivially copyable");             \
+                                                                                                             \
+    ORT_RETURN_IF(nullptr == p_data, "nullptr == p_data");                                                   \
+    std::vector<uint8_t> unpacked_tensor;                                                                    \
+    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(tensor, tensor_proto_dir, unpacked_tensor));               \
+                                                                                                             \
+    size_t num_packed_pairs = INT4_TYPE::CalcNumInt4Pairs(expected_num_elements);                            \
+    ORT_RETURN_IF_NOT(num_packed_pairs == unpacked_tensor.size(), "Unexpected number of packed int4 pairs"); \
+                                                                                                             \
+    gsl::span<const INT4_TYPE> src_span =                                                                    \
+        gsl::make_span(reinterpret_cast<const INT4_TYPE*>(unpacked_tensor.data()), num_packed_pairs);        \
+    gsl::span<INT4_TYPE> dst_span = gsl::make_span(p_data, expected_num_elements);                           \
+                                                                                                             \
+    std::memcpy(dst_span.data(), src_span.data(), num_packed_pairs);                                         \
+                                                                                                             \
+    return Status::OK();                                                                                     \
   }
 
 DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(Int4x2)
 DEFINE_INT4_UNPACK_TENSOR_WITH_EXT_DATA_IMPL(UInt4x2)
 
-#define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type) \
-  template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const std::filesystem::path&, size_t, type*);
+#define INSTANTIATE_UNPACK_EXTERNAL_TENSOR(type)                                                                 \
+  template Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto&, const std::filesystem::path&, \
+                                               size_t, type*);
 
 INSTANTIATE_UNPACK_EXTERNAL_TENSOR(float)
 INSTANTIATE_UNPACK_EXTERNAL_TENSOR(double)
@@ -360,7 +358,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
                       /*out*/ T* p_data, size_t expected_num_elements) {                                    \
     if (nullptr == p_data) {                                                                                \
       const size_t size = raw_data != nullptr ? raw_data_len : tensor.field_size();                         \
-      if (size == 0) return Status::OK();                                                                   \
+      if (size == 0)                                                                                        \
+        return Status::OK();                                                                                \
       return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);                                         \
     }                                                                                                       \
     if (nullptr == p_data || Type != tensor.data_type()) {                                                  \
@@ -370,9 +369,9 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
       return UnpackTensorWithRawData(raw_data, raw_data_len, expected_num_elements, p_data);                \
     }                                                                                                       \
     if (static_cast<size_t>(tensor.field_size()) != expected_num_elements)                                  \
-      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,                                                 \
-                             "corrupted protobuf data: tensor shape size(", expected_num_elements,          \
-                             ") does not match the data size(", tensor.field_size(), ") in proto");         \
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "corrupted protobuf data: tensor shape size(",  \
+                             expected_num_elements, ") does not match the data size(", tensor.field_size(), \
+                             ") in proto");                                                                 \
     auto& data = tensor.field_name();                                                                       \
     for (auto data_iter = data.cbegin(); data_iter != data.cend(); ++data_iter)                             \
       *p_data++ = static_cast<T>(*data_iter);                                                               \
@@ -400,7 +399,8 @@ template <>
 Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* /*raw_data*/, size_t /*raw_data_len*/,
                     /*out*/ std::string* p_data, size_t expected_size) {
   if (nullptr == p_data) {
-    if (tensor.string_data_size() == 0) return Status::OK();
+    if (tensor.string_data_size() == 0)
+      return Status::OK();
     return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
   }
   if (ONNX_NAMESPACE::TensorProto_DataType_STRING != tensor.data_type()) {
@@ -425,7 +425,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
                     /*out*/ bool* p_data, size_t expected_size) {
   if (nullptr == p_data) {
     const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0) return Status::OK();
+    if (size == 0)
+      return Status::OK();
     return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
   }
   if (ONNX_NAMESPACE::TensorProto_DataType_BOOL != tensor.data_type()) {
@@ -452,7 +453,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_d
                     /*out*/ MLFloat16* p_data, size_t expected_size) {
   if (nullptr == p_data) {
     const size_t size = raw_data != nullptr ? raw_data_len : tensor.int32_data_size();
-    if (size == 0) return Status::OK();
+    if (size == 0)
+      return Status::OK();
     return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT);
   }
   if (ONNX_NAMESPACE::TensorProto_DataType_FLOAT16 != tensor.data_type()) {
@@ -700,11 +702,8 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesy
                     /*out*/ T* p_data, size_t expected_num_elements) {
 #if !defined(ORT_MINIMAL_BUILD)
   if (HasExternalData(tensor)) {
-    return UnpackTensorWithExternalData(
-        tensor,
-        model_path.empty() ? std::filesystem::path() : model_path.parent_path(),
-        expected_num_elements,
-        p_data);
+    return UnpackTensorWithExternalData(tensor, model_path.empty() ? std::filesystem::path() : model_path.parent_path(),
+                                        expected_num_elements, p_data);
   }
 #else
   ORT_UNUSED_PARAMETER(model_path);
@@ -803,8 +802,8 @@ TensorShape GetTensorShapeFromTensorShapeProto(const ONNX_NAMESPACE::TensorShape
   const auto& dims = tensor_shape_proto.dim();
   std::vector<int64_t> tensor_shape_vec(static_cast<size_t>(dims.size()));
   for (int i = 0; i < dims.size(); ++i) {
-    tensor_shape_vec[i] = HasDimValue(dims[i]) ? dims[i].dim_value()
-                                               : -1; /* symbolic dimensions are represented as -1 in onnxruntime*/
+    tensor_shape_vec[i] =
+        HasDimValue(dims[i]) ? dims[i].dim_value() : -1; /* symbolic dimensions are represented as -1 in onnxruntime*/
   }
   return TensorShape(std::move(tensor_shape_vec));
 }
@@ -829,7 +828,8 @@ ORT_API_STATUS_IMPL(OrtInitializeBufferForTensor, _In_opt_ void* input, size_t i
                     enum ONNXTensorElementDataType type) {
   OrtStatus* status = nullptr;
   ORT_TRY {
-    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return nullptr;
+    if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr)
+      return nullptr;
     size_t tensor_size = input_len / sizeof(std::string);
     std::string* ptr = reinterpret_cast<std::string*>(input);
     for (size_t i = 0, n = tensor_size; i < n; ++i) {
@@ -837,16 +837,15 @@ ORT_API_STATUS_IMPL(OrtInitializeBufferForTensor, _In_opt_ void* input, size_t i
     }
   }
   ORT_CATCH(const std::exception& ex) {
-    ORT_HANDLE_EXCEPTION([&]() {
-      status = OrtApis::CreateStatus(ORT_RUNTIME_EXCEPTION, ex.what());
-    });
+    ORT_HANDLE_EXCEPTION([&]() { status = OrtApis::CreateStatus(ORT_RUNTIME_EXCEPTION, ex.what()); });
   }
 
   return status;
 }
 
 ORT_API(void, OrtUninitializeBuffer, _In_opt_ void* input, size_t input_len, enum ONNXTensorElementDataType type) {
-  if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr) return;
+  if (type != ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING || input == nullptr)
+    return;
   size_t tensor_size = input_len / sizeof(std::string);
   std::string* ptr = reinterpret_cast<std::string*>(input);
   using std::string;
@@ -875,9 +874,8 @@ static void DeleteCharArray(void* param) noexcept {
 }
 
 #if !defined(__wasm__)
-static Status GetFileContent(
-    const Env& env, const std::filesystem::path& file_path, FileOffsetType offset, size_t length,
-    void*& raw_buffer, OrtCallback& deleter) {
+static Status GetFileContent(const Env& env, const std::filesystem::path& file_path, FileOffsetType offset,
+                             size_t length, void*& raw_buffer, OrtCallback& deleter) {
   // query length if it is 0
   if (length == 0) {
     // The return type of std::filesystem::file_size is uintmax_t which could be bigger than size_t
@@ -897,8 +895,8 @@ static Status GetFileContent(
 
   // if that fails, try to copy
   auto buffer = std::make_unique<char[]>(length);
-  ORT_RETURN_IF_ERROR(env.ReadFileIntoBuffer(
-      file_path.native().c_str(), offset, length, gsl::make_span(buffer.get(), length)));
+  ORT_RETURN_IF_ERROR(
+      env.ReadFileIntoBuffer(file_path.native().c_str(), offset, length, gsl::make_span(buffer.get(), length)));
 
   deleter = OrtCallback{DeleteCharArray, buffer.get()};
   raw_buffer = buffer.release();
@@ -907,8 +905,8 @@ static Status GetFileContent(
 #endif
 
 Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& model_path,
-                                 const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                                 void*& ext_data_buf, SafeInt<size_t>& ext_data_len, OrtCallback& ext_data_deleter) {
+                                 const ONNX_NAMESPACE::TensorProto& tensor_proto, void*& ext_data_buf,
+                                 SafeInt<size_t>& ext_data_len, OrtCallback& ext_data_deleter) {
   ORT_ENFORCE(utils::HasExternalData(tensor_proto));
   std::basic_string<ORTCHAR_T> tensor_proto_dir;
   if (!model_path.empty()) {
@@ -917,8 +915,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
   std::basic_string<ORTCHAR_T> external_data_file_path;
   FileOffsetType file_offset;
   SafeInt<size_t> raw_data_safe_len = 0;
-  ORT_RETURN_IF_ERROR(GetExternalDataInfo(tensor_proto, tensor_proto_dir, external_data_file_path, file_offset,
-                                          raw_data_safe_len));
+  ORT_RETURN_IF_ERROR(
+      GetExternalDataInfo(tensor_proto, tensor_proto_dir, external_data_file_path, file_offset, raw_data_safe_len));
 
   if (external_data_file_path == onnxruntime::utils::kTensorProtoMemoryAddressTag) {
     // the value in location is the memory address of the data
@@ -928,8 +926,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
   } else {
 #if defined(__wasm__)
     ORT_RETURN_IF(file_offset < 0 || file_offset + raw_data_safe_len >= 4294967296,
-                  "External initializer: ", tensor_proto.name(),
-                  " offset: ", file_offset, " size to read: ", static_cast<size_t>(raw_data_safe_len),
+                  "External initializer: ", tensor_proto.name(), " offset: ", file_offset,
+                  " size to read: ", static_cast<size_t>(raw_data_safe_len),
                   " are out of bounds or can not be read in full (>4GB).");
 
     auto buffer = std::make_unique<char[]>(raw_data_safe_len);
@@ -960,7 +958,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
                                  }
 
                                  try {
-                                   // Copy the file data (fileData,offset,length) into WebAssembly memory (HEAPU8,buffer,length).
+                                   // Copy the file data (fileData,offset,length) into WebAssembly memory
+                                   // (HEAPU8,buffer,length).
                                    HEAPU8.set(fileData.subarray(offset, offset + length), buffer);
                                    return 0;
                                  } catch {
@@ -987,7 +986,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
       default:
         err_msg = "Unknown error occurred in memory copy.";
     }
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to load external data file \"", external_data_file_path, "\", error: ", err_msg);
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to load external data file \"", external_data_file_path,
+                           "\", error: ", err_msg);
 #else
     size_t file_length;
     // error reporting is inconsistent across platforms. Make sure the full path we attempted to open is included.
@@ -1000,9 +1000,9 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
     SafeInt<FileOffsetType> end_of_read(file_offset);
     end_of_read += raw_data_safe_len;
     ORT_RETURN_IF(file_offset < 0 || end_of_read > narrow<FileOffsetType>(file_length),
-                  "External initializer: ", tensor_proto.name(),
-                  " offset: ", file_offset, " size to read: ", static_cast<size_t>(raw_data_safe_len),
-                  " given file_length: ", file_length, " are out of bounds or can not be read in full.");
+                  "External initializer: ", tensor_proto.name(), " offset: ", file_offset,
+                  " size to read: ", static_cast<size_t>(raw_data_safe_len), " given file_length: ", file_length,
+                  " are out of bounds or can not be read in full.");
     ORT_RETURN_IF_ERROR(GetFileContent(env, external_data_file_path.c_str(), file_offset, raw_data_safe_len,
                                        ext_data_buf, ext_data_deleter));
     ext_data_len = raw_data_safe_len;
@@ -1012,11 +1012,10 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
   return Status::OK();
 }
 
-#define CASE_PROTO(X, Y)                                                      \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:        \
-    ORT_RETURN_IF_ERROR(                                                      \
-        UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len,                 \
-                        (Y*)preallocated, static_cast<size_t>(tensor_size))); \
+#define CASE_PROTO(X, Y)                                                                                            \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##X:                                              \
+    ORT_RETURN_IF_ERROR(                                                                                            \
+        UnpackTensor<Y>(tensor_proto, raw_data, raw_data_len, (Y*)preallocated, static_cast<size_t>(tensor_size))); \
     break;
 
 /**
@@ -1028,14 +1027,14 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
  * @return
  */
 Status TensorProtoToTensor(const Env& env, const std::filesystem::path& model_path,
-                           const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                           Tensor& tensor) {
+                           const ONNX_NAMESPACE::TensorProto& tensor_proto, Tensor& tensor) {
   // Validate tensor compatibility
   TensorShape tensor_shape = GetTensorShapeFromTensorProto(tensor_proto);
   if (tensor_shape != tensor.Shape()) {
     return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "TensorProtoToTensor() tensor shape mismatch!");
   }
-  const DataTypeImpl* const source_type = DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
+  const DataTypeImpl* const source_type =
+      DataTypeImpl::TensorTypeFromONNXEnum(tensor_proto.data_type())->GetElementType();
   if (source_type->Size() > tensor.DataType()->Size()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "TensorProto type ", DataTypeImpl::ToString(source_type),
                            " can not be written into Tensor type ", DataTypeImpl::ToString(tensor.DataType()));
@@ -1117,14 +1116,12 @@ Status TensorProtoToTensor(const Env& env, const std::filesystem::path& model_pa
 }
 
 Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path& model_path,
-                             const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                             const MemBuffer& m, OrtValue& value) {
+                             const ONNX_NAMESPACE::TensorProto& tensor_proto, const MemBuffer& m, OrtValue& value) {
   return TensorProtoToOrtValueImpl(env, model_path, tensor_proto, &m, nullptr, value);
 }
 
 Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path& model_path,
-                             const ONNX_NAMESPACE::TensorProto& tensor_proto,
-                             AllocatorPtr alloc, OrtValue& value) {
+                             const ONNX_NAMESPACE::TensorProto& tensor_proto, AllocatorPtr alloc, OrtValue& value) {
   return TensorProtoToOrtValueImpl(env, model_path, tensor_proto, nullptr, alloc, value);
 }
 
@@ -1246,8 +1243,8 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
       ORT_UNUSED_PARAMETER(model_path);
 #endif
     default:
-      ORT_THROW("Unsupported attribute value type of ", constant_attribute.type(),
-                " in 'Constant' node '", node.name(), "'");
+      ORT_THROW("Unsupported attribute value type of ", constant_attribute.type(), " in 'Constant' node '", node.name(),
+                "'");
   }
 
   // set name last in case attribute type was tensor (would copy over name)
@@ -1266,8 +1263,10 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
 static Status CopySparseData(size_t n_sparse_elements,
                              const ONNX_NAMESPACE::TensorProto& indices,
                              const std::filesystem::path& model_path,
-                             gsl::span<const int64_t> dims,
-                             std::function<void(size_t from_idx, size_t to_idx)> copier) {
+                             gsl::span<const int64_t>
+                                 dims,
+                             std::function<void(size_t from_idx, size_t to_idx)>
+                                 copier) {
   Status status = Status::OK();
   TensorShape indices_shape(indices.dims().data(), indices.dims().size());
   const auto elements = narrow<size_t>(indices_shape.Size());
@@ -1284,7 +1283,8 @@ static Status CopySparseData(size_t n_sparse_elements,
         ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
         indices_data = ReinterpretAsSpan<const int64_t>(gsl::make_span(unpack_buffer));
       } else {
-        ORT_RETURN_IF_NOT(indices.int64_data_size() == static_cast<int64_t>(elements), "Sparse indices int64 data size does not match expected");
+        ORT_RETURN_IF_NOT(indices.int64_data_size() == static_cast<int64_t>(elements),
+                          "Sparse indices int64 data size does not match expected");
         indices_data = gsl::make_span(indices.int64_data().data(), elements);
       }
       break;
@@ -1298,7 +1298,8 @@ static Status CopySparseData(size_t n_sparse_elements,
         unpack_buffer.clear();
         unpack_buffer.shrink_to_fit();
       } else {
-        ORT_RETURN_IF_NOT(indices.int32_data_size() == static_cast<int64_t>(elements), "Sparse indices int32 data size does not match expected");
+        ORT_RETURN_IF_NOT(indices.int32_data_size() == static_cast<int64_t>(elements),
+                          "Sparse indices int32 data size does not match expected");
         indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend());
       }
       indices_data = gsl::make_span(indices_values);
@@ -1337,8 +1338,9 @@ static Status CopySparseData(size_t n_sparse_elements,
       break;
     }
     default:
-      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
-                             "Invalid SparseTensor indices. Should one of the following types: int8, int16, int32 or int64");
+      return ORT_MAKE_STATUS(
+          ONNXRUNTIME, INVALID_GRAPH,
+          "Invalid SparseTensor indices. Should one of the following types: int8, int16, int32 or int64");
   }
 
   if (indices_shape.NumDimensions() == 1) {
@@ -1376,8 +1378,8 @@ static Status CopySparseData(size_t n_sparse_elements,
 
     ORT_ENFORCE(cur_index == indices_data.end());
   } else {
-    status = ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH, "Invalid SparseTensor indices. Should be rank 0 or 1. Got:",
-                             indices_shape);
+    status = ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                             "Invalid SparseTensor indices. Should be rank 0 or 1. Got:", indices_shape);
   }
 
   return status;
@@ -1425,53 +1427,45 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
       switch (element_size) {
         case 1: {
           status = CopySparseData(
-              n_sparse_elements,
-              indices, model_path, dims,
-              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+              n_sparse_elements, indices, model_path, dims, [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
                 static_cast<uint8_t*>(dense_data)[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
               });
 
           break;
         }
         case 2: {
-          status = CopySparseData(
-              n_sparse_elements,
-              indices, model_path, dims,
-              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
-                const auto* src = static_cast<const uint16_t*>(sparse_data) + from_idx;
-                auto* dst = static_cast<uint16_t*>(dense_data) + to_idx;
-                memcpy(dst, src, sizeof(uint16_t));
-              });
+          status = CopySparseData(n_sparse_elements, indices, model_path, dims,
+                                  [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                                    const auto* src = static_cast<const uint16_t*>(sparse_data) + from_idx;
+                                    auto* dst = static_cast<uint16_t*>(dense_data) + to_idx;
+                                    memcpy(dst, src, sizeof(uint16_t));
+                                  });
 
           break;
         }
         case 4: {
-          status = CopySparseData(
-              n_sparse_elements,
-              indices, model_path, dims,
-              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
-                const auto* src = static_cast<const uint32_t*>(sparse_data) + from_idx;
-                auto* dst = static_cast<uint32_t*>(dense_data) + to_idx;
-                memcpy(dst, src, sizeof(uint32_t));
-              });
+          status = CopySparseData(n_sparse_elements, indices, model_path, dims,
+                                  [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                                    const auto* src = static_cast<const uint32_t*>(sparse_data) + from_idx;
+                                    auto* dst = static_cast<uint32_t*>(dense_data) + to_idx;
+                                    memcpy(dst, src, sizeof(uint32_t));
+                                  });
 
           break;
         }
         case 8: {
-          status = CopySparseData(
-              n_sparse_elements,
-              indices, model_path, dims,
-              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
-                const auto* src = static_cast<const uint64_t*>(sparse_data) + from_idx;
-                auto* dst = static_cast<uint64_t*>(dense_data) + to_idx;
-                memcpy(dst, src, sizeof(uint64_t));
-              });
+          status = CopySparseData(n_sparse_elements, indices, model_path, dims,
+                                  [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+                                    const auto* src = static_cast<const uint64_t*>(sparse_data) + from_idx;
+                                    auto* dst = static_cast<uint64_t*>(dense_data) + to_idx;
+                                    memcpy(dst, src, sizeof(uint64_t));
+                                  });
           break;
         }
 
         default:
-          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                                 "Element_size of: ", element_size, " is not supported.", " type: ", type);
+          return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Element_size of: ", element_size, " is not supported.",
+                                 " type: ", type);
       }
 
       ORT_RETURN_IF_ERROR(status);
@@ -1512,9 +1506,7 @@ inline void CopyElement<uint8_t>(void* dst, const void* src, int64_t dst_index,
 }
 
 template <typename T>
-static void SetIndices(gsl::span<int64_t> gathered_indices,
-                       std::string& raw_indices,
-                       TensorProto& indices) {
+static void SetIndices(gsl::span<int64_t> gathered_indices, std::string& raw_indices, TensorProto& indices) {
   raw_indices.resize(gathered_indices.size() * sizeof(T));
   auto* ind_dest = reinterpret_cast<T*>(raw_indices.data());
   size_t dest_index = 0;
@@ -1532,8 +1524,7 @@ static void SetIndices(gsl::span<int64_t> gathered_indices,
 }
 
 static void SparsifyGeneric(const void* dense_raw_data, size_t n_dense_elements, size_t element_size,
-                            IsZeroFunc is_zero, CopyElementFunc copy,
-                            TensorProto& values, TensorProto& indices,
+                            IsZeroFunc is_zero, CopyElementFunc copy, TensorProto& values, TensorProto& indices,
                             size_t& nnz) {
   auto advance = [element_size](const void* start, size_t elements) -> const void* {
     return (reinterpret_cast<const uint8_t*>(start) + elements * element_size);
@@ -1614,28 +1605,28 @@ common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto&
   void* dense_data = dense_raw_data.data();
   switch (element_size) {
     case 1: {
-      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint8_t>, CopyElement<uint8_t>, values, indices, nnz);
+      SparsifyGeneric(dense_data, n_dense_elements, element_size, IsZero<uint8_t>, CopyElement<uint8_t>, values,
+                      indices, nnz);
       break;
     }
     case 2: {
-      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint16_t>, CopyElement<uint16_t>, values, indices, nnz);
+      SparsifyGeneric(dense_data, n_dense_elements, element_size, IsZero<uint16_t>, CopyElement<uint16_t>, values,
+                      indices, nnz);
       break;
     }
     case 4: {
-      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint32_t>, CopyElement<uint32_t>, values, indices, nnz);
+      SparsifyGeneric(dense_data, n_dense_elements, element_size, IsZero<uint32_t>, CopyElement<uint32_t>, values,
+                      indices, nnz);
       break;
     }
     case 8: {
-      SparsifyGeneric(dense_data, n_dense_elements, element_size,
-                      IsZero<uint64_t>, CopyElement<uint64_t>, values, indices, nnz);
+      SparsifyGeneric(dense_data, n_dense_elements, element_size, IsZero<uint64_t>, CopyElement<uint64_t>, values,
+                      indices, nnz);
       break;
     }
     default:
-      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL,
-                             "Element_size of: ", element_size, " is not supported.", " data_type: ", data_type);
+      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Element_size of: ", element_size, " is not supported.",
+                             " data_type: ", data_type);
   }
 
   // Fix up shapes
@@ -1655,38 +1646,36 @@ template common::Status GetSizeInBytesFromTensorProto<kAllocAlignment>(const ONN
                                                                        size_t* out);
 template common::Status GetSizeInBytesFromTensorProto<0>(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
 
-#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                               \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {      \
-    SafeInt<size_t> tensor_byte_size;                                            \
-    size_t element_count = 0;                                                    \
-    if (initializer.has_raw_data()) {                                            \
-      tensor_byte_size = initializer.raw_data().size();                          \
-      element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                   \
-    } else {                                                                     \
-      element_count = initializer.DATA_SIZE();                                   \
-      tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                   \
-    }                                                                            \
-    unpacked_tensor.resize(tensor_byte_size);                                    \
-    return onnxruntime::utils::UnpackTensor(                                     \
-        initializer,                                                             \
-        initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,    \
-        initializer.has_raw_data() ? initializer.raw_data().size() : 0,          \
-        reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
-    break;                                                                       \
-  }
-
-#define CASE_UNPACK_INT4(TYPE, ELEMENT_TYPE, DATA_SIZE)                          \
-  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {      \
-    TensorShape tensor_shape = GetTensorShapeFromTensorProto(initializer);       \
-    size_t element_count = static_cast<size_t>(tensor_shape.Size());             \
-    size_t packed_element_count = ELEMENT_TYPE::CalcNumInt4Pairs(element_count); \
-    unpacked_tensor.resize(packed_element_count * sizeof(ELEMENT_TYPE));         \
-    return onnxruntime::utils::UnpackTensor(                                     \
-        initializer,                                                             \
-        initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,    \
-        initializer.has_raw_data() ? initializer.raw_data().size() : 0,          \
-        reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
-    break;                                                                       \
+#define CASE_UNPACK(TYPE, ELEMENT_TYPE, DATA_SIZE)                                                                   \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {                                          \
+    SafeInt<size_t> tensor_byte_size;                                                                                \
+    size_t element_count = 0;                                                                                        \
+    if (initializer.has_raw_data()) {                                                                                \
+      tensor_byte_size = initializer.raw_data().size();                                                              \
+      element_count = tensor_byte_size / sizeof(ELEMENT_TYPE);                                                       \
+    } else {                                                                                                         \
+      element_count = initializer.DATA_SIZE();                                                                       \
+      tensor_byte_size = element_count * sizeof(ELEMENT_TYPE);                                                       \
+    }                                                                                                                \
+    unpacked_tensor.resize(tensor_byte_size);                                                                        \
+    return onnxruntime::utils::UnpackTensor(initializer,                                                             \
+                                            initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,    \
+                                            initializer.has_raw_data() ? initializer.raw_data().size() : 0,          \
+                                            reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
+    break;                                                                                                           \
+  }
+
+#define CASE_UNPACK_INT4(TYPE, ELEMENT_TYPE, DATA_SIZE)                                                              \
+  case ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_##TYPE: {                                          \
+    TensorShape tensor_shape = GetTensorShapeFromTensorProto(initializer);                                           \
+    size_t element_count = static_cast<size_t>(tensor_shape.Size());                                                 \
+    size_t packed_element_count = ELEMENT_TYPE::CalcNumInt4Pairs(element_count);                                     \
+    unpacked_tensor.resize(packed_element_count * sizeof(ELEMENT_TYPE));                                             \
+    return onnxruntime::utils::UnpackTensor(initializer,                                                             \
+                                            initializer.has_raw_data() ? initializer.raw_data().data() : nullptr,    \
+                                            initializer.has_raw_data() ? initializer.raw_data().size() : 0,          \
+                                            reinterpret_cast<ELEMENT_TYPE*>(unpacked_tensor.data()), element_count); \
+    break;                                                                                                           \
   }
 
 Status UnpackInitializerData(const onnx::TensorProto& initializer,
@@ -1728,13 +1717,11 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
     default:
       break;
   }
-  return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                         "Unsupported type: ", initializer.data_type());
+  return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported type: ", initializer.data_type());
 }
 #undef CASE_UNPACK
 
-Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
-                             std::vector<uint8_t>& unpacked_tensor) {
+Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer, std::vector<uint8_t>& unpacked_tensor) {
   ORT_RETURN_IF(initializer.data_location() == TensorProto_DataLocation_EXTERNAL,
                 "The given initializer contains external data");
   return UnpackInitializerData(initializer, std::filesystem::path(), unpacked_tensor);
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 552dd465ef4b5..135f8063d9e28 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -42,7 +42,7 @@ TensorShape GetTensorShapeFromTensorProto(const ONNX_NAMESPACE::TensorProto& ten
 /**
  * deserialize a TensorProto into a preallocated memory buffer on CPU.
  * \param tensor_proto_path A local file path of where the 'input' was loaded from.
- *                          Can be NULL if the tensor proto doesn't have external data or it was loaded from
+ *                          Can be empty if the tensor proto doesn't have external data or it was loaded from
  *                          the current working dir. This path could be either a relative path or an absolute path.
  * \return Status::OK on success with 'value' containing the Tensor in CPU based memory.
  */
@@ -53,7 +53,7 @@ common::Status TensorProtoToOrtValue(const Env& env, const std::filesystem::path
 /**
  * deserialize a TensorProto into a buffer on CPU allocated using 'alloc'.
  * \param tensor_proto_path A local file path of where the 'input' was loaded from.
- *                          Can be NULL if the tensor proto doesn't have external data or it was loaded from
+ *                          Can be empty if the tensor proto doesn't have external data or it was loaded from
  *                          the current working dir. This path could be either a relative path or an absolute path.
  * \param alloc             Allocator to use for allocating the buffer. Must allocate CPU based memory.
  * \return Status::OK on success with 'value' containing the Tensor in CPU based memory.

From 5f3080eeb157724d7503b02d5fbf5b21937b99e6 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 20:53:13 +0000
Subject: [PATCH 18/50] update

---
 onnxruntime/core/providers/vitisai/imp/graph.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/vitisai/imp/graph.cc b/onnxruntime/core/providers/vitisai/imp/graph.cc
index 061bc414fcec7..a89f269eaf35b 100644
--- a/onnxruntime/core/providers/vitisai/imp/graph.cc
+++ b/onnxruntime/core/providers/vitisai/imp/graph.cc
@@ -109,7 +109,7 @@ void graph_save(const Graph& graph, const std::string& filename, const std::stri
   if (initializer_size_threshold == std::numeric_limits<size_t>::max()) {
     model_proto = model.ToProto();
   } else {
-    model_proto = model.ToGraphProtoWithExternalInitializers(filename_dat, graph.ModelPath().ToPathString(), initializer_size_threshold);
+    model_proto = model.ToGraphProtoWithExternalInitializers(filename_dat, graph.ModelPath(), initializer_size_threshold);
   }
   auto& metadata = model.MetaData();
   if (!metadata.empty()) {

From 4da522932828f222279fbb1b055aeb777d4369ad Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 14:58:09 -0700
Subject: [PATCH 19/50] update

---
 include/onnxruntime/core/graph/graph.h              |  4 ++--
 onnxruntime/core/graph/graph.cc                     | 13 +++++--------
 onnxruntime/core/graph/model.cc                     | 10 +++++-----
 onnxruntime/core/graph/model.h                      | 10 ++++------
 .../providers/shared_library/provider_interfaces.h  |  2 +-
 .../shared_library/provider_wrappedtypes.h          |  2 +-
 onnxruntime/core/session/provider_bridge_ort.cc     |  2 +-
 .../save_model_with_external_initializers.cc        | 11 ++++++-----
 8 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index cfdd4fe96de7e..5f5a10428a158 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -1140,13 +1140,13 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
   ONNX_NAMESPACE::GraphProto ToGraphProto() const;
 
   /** Gets the GraphProto representation of this Graph
-  @params external_file_name name of the binary file to use for initializers. Must be a UTF-8 string.
+  @param external_file_path File path of the binary file to use for initializers.
   @param file_path path of the model file.
   @param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
   in the external file. Initializer smaller than this threshold are included in the onnx file.
   @returns GraphProto serialization of the graph.
   */
-  ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
+  ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
                                                                   const std::filesystem::path& file_path,
                                                                   size_t initializer_size_threshold) const;
 
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index fafc208629e7a..e7795317016e5 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3972,19 +3972,16 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
   return result;
 }
 
-ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
+ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
                                                                        const std::filesystem::path& destination_file_path,
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
-  std::filesystem::path external_file_path = ToPathString(external_file_name);
   // If destination_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could be empty.
-  if (destination_file_path.has_parent_path()) {
-    // Save external data file in same directory as model
-    external_file_path = destination_file_path.parent_path() / external_file_path;
-  }
+  // Else, save external data file in same directory as model
+  const std::filesystem::path modified_external_file_path = destination_file_path.has_parent_path() ? destination_file_path.parent_path() / external_file_path : external_file_path;
 
-  std::ofstream external_stream(external_file_path, std::ofstream::out | std::ofstream::binary);
+  std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
   ORT_ENFORCE(external_stream.is_open());
   int64_t external_offset = 0;
 
@@ -4021,7 +4018,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
       output_proto->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
       ONNX_NAMESPACE::StringStringEntryProto* location = output_proto->add_external_data();
       location->set_key("location");
-      location->set_value(external_file_name);
+      location->set_value(ToUTF8String(external_file_path.native()));
       ONNX_NAMESPACE::StringStringEntryProto* offset = output_proto->add_external_data();
       offset->set_key("offset");
       offset->set_value(std::to_string(external_offset));
diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index d93a92d59dea5..067526a7e6e7a 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -378,8 +378,8 @@ ModelProto Model::ToProto() const {
   return result;
 }
 
-ModelProto Model::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
-                                                       const PathString& file_path,
+ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
+                                                       const std::filesystem::path& file_path,
                                                        size_t initializer_size_threshold) const {
   ModelProto result(model_proto_);
   const auto& graph = *graph_;
@@ -602,7 +602,7 @@ Status Model::Save(Model& model, const std::wstring& file_path) {
 template <typename T>
 static Status SaveModelWithExternalInitializers(Model& model,
                                                 const T& file_path,
-                                                const std::string& external_file_name,
+                                                const std::filesystem::path& external_file_name,
                                                 size_t initializer_size_threshold) {
   int fd = 0;
   Status status = Env::Default().FileOpenWr(file_path, fd);
@@ -643,7 +643,7 @@ Status Model::Save(Model& model, const std::string& file_path) {
 }
 
 Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
-                                           const std::string& external_file_name,
+                                           const std::filesystem::path& external_file_name,
                                            size_t initializer_size_threshold) {
   return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
 }
@@ -760,7 +760,7 @@ Status Model::Save(Model& model, int p_fd) {
 Status Model::SaveWithExternalInitializers(Model& model,
                                            int fd,
                                            const std::filesystem::path& file_path,
-                                           const std::string& external_file_name,
+                                           const std::filesystem::path& external_file_name,
                                            size_t initializer_size_threshold) {
   if (fd < 0) {
     return Status(ONNXRUNTIME, INVALID_ARGUMENT, "<fd> is less than 0.");
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index db18d56364679..6172db8262c89 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -187,8 +187,8 @@ class Model {
   // Get model's serialization proto data.
   // Save initializer larger than the given threshold (in bytes) into an external binary file
   // with the given name. This function is useful to avoid hitting the size limit of protobuf files.
-  ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
-                                                                  const PathString& file_path,
+  ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
+                                                                  const std::filesystem::path& file_path,
                                                                   size_t initializer_size_threshold) const;
 
 #ifdef _WIN32
@@ -199,17 +199,15 @@ class Model {
   static common::Status Save(Model& model, int fd);
 
   // Save the model to file using an external file for initializers larger than the given threshold (in bytes).
-  // external_file_name must be a UTF-8 string
   static common::Status SaveWithExternalInitializers(Model& model,
                                                      const std::filesystem::path& file_path,
-                                                     const std::string& external_file_name,
+                                                     const std::filesystem::path& external_file_path,
                                                      size_t initializer_size_threshold);
 
-  // external_file_name must be a UTF-8 string
   static common::Status SaveWithExternalInitializers(Model& model,
                                                      int fd,
                                                      const std::filesystem::path& file_path,
-                                                     const std::string& external_file_name,
+                                                     const std::filesystem::path& external_file_path,
                                                      size_t initializer_size_threshold);
 
   static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto);
diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h
index 7dac474f011d4..7454b322a310c 100644
--- a/onnxruntime/core/providers/shared_library/provider_interfaces.h
+++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h
@@ -807,7 +807,7 @@ struct ProviderHost {
   virtual void Model__operator_delete(Model* p) = 0;
   virtual Graph& Model__MainGraph(Model* p) = 0;
   virtual std::unique_ptr<ONNX_NAMESPACE::ModelProto> Model__ToProto(Model* p) = 0;
-  virtual std::unique_ptr<ONNX_NAMESPACE::ModelProto> Model__ToGraphProtoWithExternalInitializers(Model* p, const std::string& external_file_name, const PathString& file_path, size_t initializer_size_threshold) = 0;
+  virtual std::unique_ptr<ONNX_NAMESPACE::ModelProto> Model__ToGraphProtoWithExternalInitializers(Model* p, const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, size_t initializer_size_threshold) = 0;
   virtual const ModelMetaData& Model__MetaData(const Model* p) const noexcept = 0;
   virtual Status Model__Load(const PathString& file_path, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) = 0;
 
diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
index ac16029e86639..2ccd05fe9df60 100644
--- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
+++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h
@@ -840,7 +840,7 @@ struct Model final {
   Graph& MainGraph() { return g_host->Model__MainGraph(this); }
 
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> ToProto() { return g_host->Model__ToProto(this); }
-  std::unique_ptr<ONNX_NAMESPACE::ModelProto> ToGraphProtoWithExternalInitializers(const std::string& external_file_name, const PathString& file_path, size_t initializer_size_threshold) { return g_host->Model__ToGraphProtoWithExternalInitializers(this, external_file_name, file_path, initializer_size_threshold); }
+  std::unique_ptr<ONNX_NAMESPACE::ModelProto> ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, size_t initializer_size_threshold) { return g_host->Model__ToGraphProtoWithExternalInitializers(this, external_file_name, file_path, initializer_size_threshold); }
   const ModelMetaData& MetaData() const noexcept { return g_host->Model__MetaData(this); }
 
   Model() = delete;
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 5bcb2c040f9ad..9b2c897281cf7 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1039,7 +1039,7 @@ struct ProviderHostImpl : ProviderHost {
   void Model__operator_delete(Model* p) override { delete p; }
   Graph& Model__MainGraph(Model* p) override { return p->MainGraph(); }
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> Model__ToProto(Model* p) override { return std::make_unique<ONNX_NAMESPACE::ModelProto>(p->ToProto()); }
-  std::unique_ptr<ONNX_NAMESPACE::ModelProto> Model__ToGraphProtoWithExternalInitializers(Model* p, const std::string& external_file_name, const PathString& file_path, size_t initializer_size_threshold) override { return std::make_unique<ONNX_NAMESPACE::ModelProto>(p->ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold)); };
+  std::unique_ptr<ONNX_NAMESPACE::ModelProto> Model__ToGraphProtoWithExternalInitializers(Model* p, const std::filesystem::path& external_file_name, const std::filesystem::path& file_path, size_t initializer_size_threshold) override { return std::make_unique<ONNX_NAMESPACE::ModelProto>(p->ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold)); };
   const ModelMetaData& Model__MetaData(const Model* p) const noexcept override { return p->MetaData(); };
   Status Model__Load(const PathString& file_path, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) override { return Model::Load(file_path, model_proto); }
 
diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index 8bad22dc2c77b..fe640b3fb8728 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -24,14 +24,15 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
                                const std::filesystem::path& output_onnx,
                                const std::filesystem::path& output_external_init_file,
                                size_t initializer_size_threshold) {
+  auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel");
   std::shared_ptr<Model> model;
-  ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, DefaultLoggingManager().DefaultLogger()));
+  ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger));
   std::filesystem::remove(output_onnx);
   std::filesystem::remove(output_external_init_file);
-  ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, ToUTF8String(output_external_init_file.native()), initializer_size_threshold));
+  ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold));
 
   std::shared_ptr<Model> model_from_external;
-  ORT_RETURN_IF_ERROR(Model::Load(ToPathString(output_onnx), model_from_external, nullptr, DefaultLoggingManager().DefaultLogger()));
+  ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger));
 
   Graph& graph = model->MainGraph();
   // Perform shape inference on the graph, if this succeeds then it means that we could correctly read the
@@ -55,13 +56,13 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
     std::vector<uint8_t> tensor_proto_data;
     model_path = input_onnx;
     external_data_path = (!input_external_init_file.empty()) ? (model_path.parent_path() / input_external_init_file) : std::filesystem::path();
-    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, external_data_path, tensor_proto_data));
+    ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(*tensor_proto, external_data_path, tensor_proto_data));
     size_t tensor_proto_size = tensor_proto_data.size();
 
     std::vector<uint8_t> from_external_tensor_proto_data;
     model_path = output_onnx;
     external_data_path = model_path.parent_path() / output_external_init_file;
-    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, model_path, from_external_tensor_proto_data));
+    ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, model_path, from_external_tensor_proto_data));
     size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();
 
     if (from_external_tensor_proto_size < initializer_size_threshold) {

From e7871721b7ea271f7c6457535b26c6b63e5158e5 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 15:40:54 -0700
Subject: [PATCH 20/50] update

---
 .../test/framework/save_model_with_external_initializers.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index fe640b3fb8728..8af53ea356159 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -48,7 +48,7 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
   // Compare the initializers of the two versions.
   std::filesystem::path model_path{};
   std::filesystem::path external_data_path{};
-  for (auto i : initializers) {
+  for (const auto& i : initializers) {
     const std::string kInitName = i.first;
     const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
     const ONNX_NAMESPACE::TensorProto* from_external_tensor_proto = initializers_from_external[kInitName];
@@ -67,10 +67,10 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
 
     if (from_external_tensor_proto_size < initializer_size_threshold) {
       // 'Small' tensors should be embedded in the onnx file.
-      EXPECT_EQ(from_external_tensor_proto->data_location(), ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_DEFAULT);
+      ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_DEFAULT);
     } else {
       // 'Large' tensors should be added to the external binary file.
-      EXPECT_EQ(from_external_tensor_proto->data_location(), ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
+      ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
     }
 
     ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");

From 72eb1e147c288c1da6117d53f484033e5888c16e Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 6 Jun 2024 15:59:41 -0700
Subject: [PATCH 21/50] update

---
 onnxruntime/core/framework/tensorprotoutils.cc | 14 --------------
 onnxruntime/core/session/custom_ops.cc         |  3 ++-
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index cfe6c2e520b99..b4150ecc23db2 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -989,20 +989,6 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to load external data file \"", external_data_file_path,
                            "\", error: ", err_msg);
 #else
-    size_t file_length;
-    // error reporting is inconsistent across platforms. Make sure the full path we attempted to open is included.
-    auto status = env.GetFileLength(external_data_file_path.c_str(), file_length);
-    if (!status.IsOK()) {
-      return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "GetFileLength for ", ToUTF8String(external_data_file_path),
-                             " failed:", status.ErrorMessage());
-    }
-
-    SafeInt<FileOffsetType> end_of_read(file_offset);
-    end_of_read += raw_data_safe_len;
-    ORT_RETURN_IF(file_offset < 0 || end_of_read > narrow<FileOffsetType>(file_length),
-                  "External initializer: ", tensor_proto.name(), " offset: ", file_offset,
-                  " size to read: ", static_cast<size_t>(raw_data_safe_len), " given file_length: ", file_length,
-                  " are out of bounds or can not be read in full.");
     ORT_RETURN_IF_ERROR(GetFileContent(env, external_data_file_path.c_str(), file_offset, raw_data_safe_len,
                                        ext_data_buf, ext_data_deleter));
     ext_data_len = raw_data_safe_len;
diff --git a/onnxruntime/core/session/custom_ops.cc b/onnxruntime/core/session/custom_ops.cc
index 59c36488d2fd9..7102dbfc750ed 100644
--- a/onnxruntime/core/session/custom_ops.cc
+++ b/onnxruntime/core/session/custom_ops.cc
@@ -585,7 +585,8 @@ ORT_API_STATUS_IMPL(OrtApis::KernelInfoGetAttribute_tensor, _In_ const OrtKernel
 
     // Deserialize TensorProto into pre-allocated, empty Tensor.
     // TODO: here the TensorProto loses model path information, so it cannot be an external tensor.
-    status = onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), std::filesystem::path(), tensor_proto, *tensorp);
+    status = onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), std::filesystem::path(),
+                                                     tensor_proto, *tensorp);
     if (!status.IsOK()) {
       return onnxruntime::ToOrtStatus(status);
     }

From f5b9abf2b1a47421e21f71b4f6fc2081630a0d9b Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 7 Jun 2024 02:35:40 +0000
Subject: [PATCH 22/50] update

---
 .../test/framework/save_model_with_external_initializers.cc   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
index 8af53ea356159..447b0edef879b 100644
--- a/onnxruntime/test/framework/save_model_with_external_initializers.cc
+++ b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -67,10 +67,10 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
 
     if (from_external_tensor_proto_size < initializer_size_threshold) {
       // 'Small' tensors should be embedded in the onnx file.
-      ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_DEFAULT);
+      ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_DEFAULT, "location mismatch");
     } else {
       // 'Large' tensors should be added to the external binary file.
-      ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL);
+      ORT_RETURN_IF_NOT(from_external_tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL, "location mismatch");
     }
 
     ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");

From 7e6c67b019ab1515304ca7a104ef9419970972c8 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 7 Jun 2024 13:10:40 -0700
Subject: [PATCH 23/50] update

---
 onnxruntime/core/framework/tensorprotoutils.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index b4150ecc23db2..bf2c946a73170 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -989,6 +989,16 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to load external data file \"", external_data_file_path,
                            "\", error: ", err_msg);
 #else
+    // The GetFileContent function doesn't report error if the requested data range is invalid. Therefore we need to
+    // manually check file size first.
+    std::uintmax_t file_length = std::filesystem::file_size(external_data_file_path);
+
+    SafeInt<FileOffsetType> end_of_read(file_offset);
+    end_of_read += raw_data_safe_len;
+    ORT_RETURN_IF(file_offset < 0 || static_cast<std::uintmax_t>(end_of_read) > file_length,
+                  "External initializer: ", tensor_proto.name(), " offset: ", file_offset,
+                  " size to read: ", static_cast<size_t>(raw_data_safe_len), " given file_length: ", file_length,
+                  " are out of bounds or can not be read in full.");
     ORT_RETURN_IF_ERROR(GetFileContent(env, external_data_file_path.c_str(), file_offset, raw_data_safe_len,
                                        ext_data_buf, ext_data_deleter));
     ext_data_len = raw_data_safe_len;

From 0d77607d8d25472338c19a9b75bfba926de76663 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 7 Jun 2024 13:11:32 -0700
Subject: [PATCH 24/50] update

---
 onnxruntime/core/framework/tensorprotoutils.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index 135f8063d9e28..b607cb488d831 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -5,6 +5,7 @@
 
 #include <vector>
 #include <type_traits>
+#include <filesystem>
 
 #ifndef SHARED_PROVIDER
 #include "core/common/common.h"
@@ -18,7 +19,7 @@
 #include "core/framework/tensor_external_data_info.h"
 #include "core/graph/onnx_protobuf.h"
 #include "core/platform/env.h"
-#include <filesystem>
+
 
 namespace ONNX_NAMESPACE {
 class TensorProto;

From 2039f95806852184fbea75ed96958c31d0f5eca2 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 7 Jun 2024 21:10:49 +0000
Subject: [PATCH 25/50] update

---
 onnxruntime/test/optimizer/initializer_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/optimizer/initializer_test.cc b/onnxruntime/test/optimizer/initializer_test.cc
index 902ea01a68f67..9e55d9b2ef921 100644
--- a/onnxruntime/test/optimizer/initializer_test.cc
+++ b/onnxruntime/test/optimizer/initializer_test.cc
@@ -96,7 +96,7 @@ TEST(OptimizerInitializerTest, LoadExternalData) {
 
   // bad model paths
   EXPECT_THROW(Initializer i(tensor_proto_base, std::filesystem::path()), OnnxRuntimeException);
-  EXPECT_THROW(Initializer i(tensor_proto_base, ORT_TSTR("invalid/directory")), OnnxRuntimeException);
+  EXPECT_THROW(Initializer i(tensor_proto_base, ORT_TSTR("invalid/directory")), std::filesystem::filesystem_error);
 
   // bad length
   {

From 86751d1dc8cd27f7bccca05c8532fcf1d0f2746a Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 10 Jun 2024 17:31:07 +0000
Subject: [PATCH 26/50] Add filesystem header

---
 include/onnxruntime/core/graph/graph_viewer.h                  | 3 ++-
 onnxruntime/core/framework/tensorprotoutils.h                  | 1 -
 onnxruntime/core/graph/model.h                                 | 1 +
 onnxruntime/core/optimizer/optimizer_execution_frame.h         | 1 +
 onnxruntime/core/providers/cpu/ml/label_encoder.h              | 2 +-
 .../dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h  | 2 +-
 .../providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h  | 1 +
 .../dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h        | 2 ++
 .../core/providers/dml/DmlExecutionProvider/src/Utility.h      | 2 +-
 .../core/providers/nnapi/nnapi_builtin/builders/helper.h       | 1 +
 .../nnapi/nnapi_builtin/builders/op_builder_helpers.h          | 1 +
 onnxruntime/core/session/inference_session.h                   | 1 +
 12 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/onnxruntime/core/graph/graph_viewer.h b/include/onnxruntime/core/graph/graph_viewer.h
index e904c244c0d19..9385e2f092e58 100644
--- a/include/onnxruntime/core/graph/graph_viewer.h
+++ b/include/onnxruntime/core/graph/graph_viewer.h
@@ -2,10 +2,11 @@
 // Licensed under the MIT License.
 
 #pragma once
+#include <unordered_set>
+#include <filesystem>
 
 #include "core/graph/graph.h"
 #include "core/framework/session_options.h"
-#include <unordered_set>
 
 namespace onnxruntime {
 class Function;
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
index b607cb488d831..2f3f942e75578 100644
--- a/onnxruntime/core/framework/tensorprotoutils.h
+++ b/onnxruntime/core/framework/tensorprotoutils.h
@@ -20,7 +20,6 @@
 #include "core/graph/onnx_protobuf.h"
 #include "core/platform/env.h"
 
-
 namespace ONNX_NAMESPACE {
 class TensorProto;
 class TensorShapeProto;
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index 6172db8262c89..0c971d28b8ab4 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -7,6 +7,7 @@
 #include <memory>
 #include <climits>
 #include <string>
+#include <filesystem>
 
 #include "core/common/flatbuffers.h"
 
diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.h b/onnxruntime/core/optimizer/optimizer_execution_frame.h
index ffda05d349c39..b0f7f461661b5 100644
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.h
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <unordered_map>
+#include <filesystem>
 
 #include "core/common/inlined_containers.h"
 #include "core/graph/graph.h"
diff --git a/onnxruntime/core/providers/cpu/ml/label_encoder.h b/onnxruntime/core/providers/cpu/ml/label_encoder.h
index ee47b13016884..f7a454cf519e1 100644
--- a/onnxruntime/core/providers/cpu/ml/label_encoder.h
+++ b/onnxruntime/core/providers/cpu/ml/label_encoder.h
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 #pragma once
-
+#include <filesystem>
 #include "core/common/common.h"
 #include "core/framework/op_kernel.h"
 #include "core/providers/cpu/ml/ml_common.h"
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
index e83fa628d44a8..e800175268557 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlRuntimeFusedGraphKernel.h
@@ -1,6 +1,6 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
-
+#include <filesystem>
 #include "core/framework/op_kernel.h"
 #include "GraphDescBuilder.h"
 #include "DmlRuntimeGraphFusionTransformer.h"
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
index 9728dc47c8f3c..3f778b3a7feba 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.h
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 #pragma once
+#include <filesystem>
 
 #include "MLOperatorAuthorImpl.h"
 #include "ExecutionProvider.h"
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
index 4708de1651d39..7e51ce026d365 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h
@@ -2,6 +2,8 @@
 // Licensed under the MIT License.
 
 #pragma once
+#include <filesystem>
+
 #include "core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h"
 #include "core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h"
 #include "core/providers/dml/DmlExecutionProvider/src/DmlEdgeShapes.h"
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h
index f779cec19fd5b..a3f2777a0c805 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Utility.h
@@ -6,7 +6,7 @@
 #include <string_view>
 #include <locale>
 #include <codecvt>
-        
+#include <filesystem>        
 
 namespace Dml
 {
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
index 1e3668160fbcf..d4967b6251824 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -5,6 +5,7 @@
 
 #include <string>
 #include <vector>
+#include <filesystem>
 #include "core/common/inlined_containers.h"
 #include "core/graph/basic_types.h"
 #include "core/providers/nnapi/nnapi_builtin/nnapi_lib/NeuralNetworksTypes.h"
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h
index 8f2fefd8deb7d..94e511e04dff3 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder_helpers.h
@@ -5,6 +5,7 @@
 
 #include <cstdint>
 #include <vector>
+#include <filesystem>
 
 #include "core/common/common.h"
 #include "core/framework/node_unit.h"
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index a0a5fab2d2203..8612ebe55d9af 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -7,6 +7,7 @@
 #include <optional>
 #include <string>
 #include <unordered_map>
+#include <filesystem>
 
 #include "core/common/common.h"
 #include "core/common/inlined_containers.h"

From 9074a9bf8d4ac812a1b713963cb2da7ac1fae11e Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 10 Jun 2024 17:33:17 +0000
Subject: [PATCH 27/50] update

---
 .../core/providers/qnn/builder/opbuilder/slice_op_builder.cc  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
index 410dc7629c4f8..b033c8723ea86 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/slice_op_builder.cc
@@ -92,8 +92,8 @@ static Status GetInitializerInputData(const NodeUnitIODef& input, const QnnModel
   Tensor tensor(dtype, shape, std::make_shared<CPUAllocator>());
 
   // Deserialize initializer into Tensor.
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(onnxruntime::Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(),
-                                                              *initializer_proto, tensor));
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(
+      onnxruntime::Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), *initializer_proto, tensor));
 
   Status status;
 

From 0d142abf49ec616d3874eef2ec8cbc7b95a346a2 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 10 Jun 2024 17:34:14 +0000
Subject: [PATCH 28/50] Add line wrapping

---
 .../core/providers/nnapi/nnapi_builtin/builders/helper.cc      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index c8ded32cbb783..5108f90fc763a 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -185,7 +185,8 @@ bool HasValidBinaryOpQuantizedInputTypes(const NodeUnit& node_unit) {
 }
 
 common::Status GetQuantizationScaleAndZeroPoint(const GraphViewer& graph_viewer, const NodeUnitIODef& io_def,
-                                                const std::filesystem::path& model_path, float& scale, int32_t& zero_point) {
+                                                const std::filesystem::path& model_path, float& scale,
+                                                int32_t& zero_point) {
   scale = 0.0f;
   zero_point = 0;
 

From ac3c3a1299c1a9dd30e7549f22074c8ab1f0d199 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 10 Jun 2024 17:35:47 +0000
Subject: [PATCH 29/50] Line wrapping

---
 .../core/providers/qnn/builder/opbuilder/base_op_builder.cc    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
index 67f6bf7d2ae92..2fbe59bf0d578 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
@@ -235,7 +235,8 @@ Status BaseOpBuilder::TransposeInitializer(const QnnModelWrapper& qnn_model_wrap
 
   TensorShape new_tensor_shape(new_tensor_shape_dims);
   Tensor out_tensor = Tensor(tensor_dtype, new_tensor_shape, cpu_allocator);
-  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), initializer, in_tensor));
+  ORT_RETURN_IF_ERROR(onnxruntime::utils::TensorProtoToTensor(
+      Env::Default(), qnn_model_wrapper.GetGraphViewer().ModelPath(), initializer, in_tensor));
   ORT_RETURN_IF_ERROR(Transpose::DoTranspose(permutations, in_tensor, out_tensor));
   onnx::TensorProto new_tensor_proto = onnxruntime::utils::TensorToTensorProto(out_tensor, "test");
   ORT_RETURN_IF_ERROR(qnn_model_wrapper.UnpackInitializerData(new_tensor_proto, transposed_data));

From b8a314cb94610f58109ae7ee85ebe58cfdedfbe4 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 10 Jun 2024 13:14:19 -0700
Subject: [PATCH 30/50] Update graph_partitioner.cc

---
 onnxruntime/core/framework/graph_partitioner.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index 533b15c4e3a39..3a5a2cb06b267 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -662,8 +662,7 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
   const std::filesystem::path& model_path = graph.ModelPath();
 
   if (!ep_context_path.empty()) {
-    // On Windows here we explicitly cast the ep_context_path string to UTF-16 because we assume ep_context_path is in UTF-8
-    context_cache_path = ToPathString(ep_context_path);
+    context_cache_path = ep_context_path;
   } else if (!model_path.empty()) {
     context_cache_path = model_path / ORT_TSTR("_ctx.onnx");
   }

From 1ecc42d733c1ee264e7171a02e642bff856c00a1 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 10 Jun 2024 14:23:08 -0700
Subject: [PATCH 31/50] Update tensorprotoutils.cc

---
 onnxruntime/core/framework/tensorprotoutils.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index bf2c946a73170..e60f5f3c56593 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -218,8 +218,6 @@ Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto
   return Status::OK();
 }
 
-// TODO(unknown): Change the current interface to take Path object for model path
-// so that validating and manipulating path for reading external data becomes easy
 Status TensorProtoToOrtValueImpl(const Env& env, const std::filesystem::path& model_path,
                                  const ONNX_NAMESPACE::TensorProto& tensor_proto, const MemBuffer* m,
                                  AllocatorPtr alloc, OrtValue& value) {

From 2c6c2c17366d735bbad4a5170def699dca018b98 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 11 Jun 2024 13:14:18 -0700
Subject: [PATCH 32/50] Update onnxruntime/core/framework/tensorprotoutils.cc

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 onnxruntime/core/framework/tensorprotoutils.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index e60f5f3c56593..5b7d2c02eef4a 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -1681,7 +1681,7 @@ Status UnpackInitializerData(const onnx::TensorProto& initializer,
   if (initializer.data_location() == TensorProto_DataLocation_EXTERNAL) {
     ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(
         initializer,
-        (model_path.empty() || model_path.parent_path().empty()) ? std::filesystem::path() : model_path.parent_path(),
+        model_path.parent_path(),
         unpacked_tensor));
     return Status::OK();
   }

From dce865da05a325fb37ad195d4e4c3b845e6923ad Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 11 Jun 2024 13:18:05 -0700
Subject: [PATCH 33/50] update

---
 include/onnxruntime/core/graph/graph.h             |  4 ++--
 onnxruntime/core/framework/graph_partitioner.cc    | 13 +++++--------
 onnxruntime/core/graph/graph.cc                    |  8 ++++----
 onnxruntime/core/graph/model.cc                    |  4 ----
 onnxruntime/core/graph/model.h                     |  3 ++-
 .../ort_optimizer_api_impl.cc                      |  3 ++-
 .../tensorrt/tensorrt_execution_provider.cc        | 14 +++++++-------
 onnxruntime/core/session/inference_session.cc      |  5 +++--
 .../test/framework/allocation_planner_test.cc      |  2 +-
 .../test/framework/inference_session_test.cc       | 12 ++++++------
 onnxruntime/test/ir/graph_test.cc                  |  6 +++---
 11 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
index 5f5a10428a158..538cbfdcefc47 100644
--- a/include/onnxruntime/core/graph/graph.h
+++ b/include/onnxruntime/core/graph/graph.h
@@ -1141,13 +1141,13 @@ class Graph {  // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
 
   /** Gets the GraphProto representation of this Graph
   @param external_file_path File path of the binary file to use for initializers.
-  @param file_path path of the model file.
+  @param model_file_path path of the model file.
   @param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
   in the external file. Initializer smaller than this threshold are included in the onnx file.
   @returns GraphProto serialization of the graph.
   */
   ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
-                                                                  const std::filesystem::path& file_path,
+                                                                  const std::filesystem::path& model_file_path,
                                                                   size_t initializer_size_threshold) const;
 
   /** Gets the ISchemaRegistry instances being used with this Graph. */
diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index 3a5a2cb06b267..6ae09301154ac 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -664,16 +664,13 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
   if (!ep_context_path.empty()) {
     context_cache_path = ep_context_path;
   } else if (!model_path.empty()) {
-    context_cache_path = model_path / ORT_TSTR("_ctx.onnx");
+    context_cache_path = model_path.native() + ORT_TSTR("_ctx.onnx");
+  } else {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Both ep_context_path and model_path are empty");
   }
 
-  {
-#ifdef _WIN32
-    std::wifstream fs(context_cache_path);
-#else
-    std::ifstream fs(context_cache_path);
-#endif
-    ORT_RETURN_IF(fs.good(), "Failed to generate EP context model since the file exist already.");
+  if (std::filesystem::exists(context_cache_path)) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to generate EP context model since the file '", context_cache_path, "' exist already.");
   }
 
   Model ep_context_model(graph.Name(), false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index e7795317016e5..2232e77e1ac8f 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3973,13 +3973,13 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
 }
 
 ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
-                                                                       const std::filesystem::path& destination_file_path,
+                                                                       const std::filesystem::path& model_file_path,
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
-  // If destination_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could be empty.
-  // Else, save external data file in same directory as model
-  const std::filesystem::path modified_external_file_path = destination_file_path.has_parent_path() ? destination_file_path.parent_path() / external_file_path : external_file_path;
+  // If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could be empty.
+  // Else, save external data file in same directory as the model.
+  const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;
 
   std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
   ORT_ENFORCE(external_stream.is_open());
diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index 067526a7e6e7a..2936e5d92eb40 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -638,10 +638,6 @@ Status Model::Load(const PathString& file_path, std::shared_ptr<Model>& p_model,
   return LoadModel(file_path, p_model, local_registries, logger, options);
 }
 
-Status Model::Save(Model& model, const std::string& file_path) {
-  return SaveModel(model, file_path);
-}
-
 Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
                                            const std::filesystem::path& external_file_name,
                                            size_t initializer_size_threshold) {
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index 0c971d28b8ab4..65f9fd4d03627 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -194,8 +194,9 @@ class Model {
 
 #ifdef _WIN32
   static common::Status Save(Model& model, const std::wstring& file_path);
-#endif
+#else
   static common::Status Save(Model& model, const std::string& file_path);
+#endif
 
   static common::Status Save(Model& model, int fd);
 
diff --git a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
index 5ec51b6f30f18..548895335b1ac 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
+++ b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
@@ -293,7 +293,8 @@ std::vector<uint8_t> ApiTensor::Data() const {
                                                 tensor_proto_, tensor));
   size_t num_bytes = gsl::narrow_cast<size_t>(tensor.SizeInBytes());
   const uint8_t* data = static_cast<const uint8_t*>(tensor.DataRaw());
-  // TODO: the returned data is unaligned
+  // TODO: the returned data is unaligned, which does not meet the alignment requirement that mlas requires. Because
+  // the returned type is a vector, not a Tensor or tensor buffer that is allocated from a CPU allocator.
   return std::vector<uint8_t>(data, data + num_bytes);
 }
 // </ApiTensor>
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index 6cc2b489e5e2c..8cecf4cf98cd6 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -2511,13 +2511,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
 /**
  * Refit the weight-stripped engine
  */
-common::Status TensorrtExecutionProvider::RefitEngine([[maybe_unused]] std::string onnx_model_filename,
-                                                      [[maybe_unused]] std::string& onnx_model_folder_path,
-                                                      [[maybe_unused]] std::string& weight_stripped_engine_cath_path,
-                                                      [[maybe_unused]] bool path_check,
-                                                      [[maybe_unused]] nvinfer1::ICudaEngine* trt_engine,
-                                                      [[maybe_unused]] bool serialize_refitted_engine,
-                                                      [[maybe_unused]] bool detailed_build_log) {
+common::Status TensorrtExecutionProvider::RefitEngine(std::string onnx_model_filename,
+                                                      std::string& onnx_model_folder_path,
+                                                      std::string& weight_stripped_engine_cath_path,
+                                                      bool path_check,
+                                                      nvinfer1::ICudaEngine* trt_engine,
+                                                      bool serialize_refitted_engine,
+                                                      bool detailed_build_log) {
 #if NV_TENSORRT_MAJOR >= 10
   std::filesystem::path onnx_model_path{onnx_model_folder_path};
   onnx_model_path.append(onnx_model_filename);
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 274deae7368ac..2ef5cd22012f2 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1267,8 +1267,9 @@ common::Status InferenceSession::TransformGraph(onnxruntime::Graph& graph, bool
       // for the result of the first step in layout transformation
       debug_graph_fn = [counter = 1, this](const Graph& graph) mutable {
         if (graph.GraphProtoSyncNeeded()) {
-          ORT_THROW_IF_ERROR(
-              Model::Save(*model_, "post_layout_transform_step_" + std::to_string(counter) + ".onnx"));
+          std::basic_ostringstream<ORTCHAR_T> modelpath;
+          modelpath << ORT_TSTR("post_layout_transform_step_") << counter << ORT_TSTR(".onnx");
+          ORT_THROW_IF_ERROR(Model::Save(*model_, modelpath.str()));
         }
 
         // counter is used to denote the step, so increment regardless of whether we wrote out the model in this step.
diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc
index 3a01f2c8d95ad..ab6e5f0dc4508 100644
--- a/onnxruntime/test/framework/allocation_planner_test.cc
+++ b/onnxruntime/test/framework/allocation_planner_test.cc
@@ -1805,7 +1805,7 @@ TEST_F(PlannerTest, ParaPlanCreation) {
 
   status = sess.RegisterExecutionProvider(DefaultCpuExecutionProvider());
   ASSERT_TRUE(status.IsOK());
-  ASSERT_TRUE(model.Save(model, "./simplified_ssd.onnx").IsOK());
+  ASSERT_TRUE(model.Save(model, ORT_TSTR("./simplified_ssd.onnx")).IsOK());
 
   std::string s1;
   const bool rc = model.ToProto().SerializeToString(&s1);
diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc
index d0520ebbcba5a..84389c1d9711c 100644
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@@ -1278,7 +1278,7 @@ TEST(InferenceSessionTests, TestOptionalInputs) {
   }
 }
 
-static void CreateFuseOpModel(const std::string& model_file_name) {
+static void CreateFuseOpModel(const PathString& model_file_name) {
   onnxruntime::Model model("graph_1", false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
                            {{kOnnxDomain, 12}}, {}, DefaultLoggingManager().DefaultLogger());
   auto& graph = model.MainGraph();
@@ -1312,7 +1312,7 @@ static void CreateFuseOpModel(const std::string& model_file_name) {
 }
 
 TEST(ExecutionProviderTest, FunctionTest) {
-  std::string model_file_name = "execution_provider_test_graph.onnx";
+  PathString model_file_name = ORT_TSTR("execution_provider_test_graph.onnx");
   CreateFuseOpModel(model_file_name);
 
   SessionOptions so;
@@ -1365,7 +1365,7 @@ TEST(ExecutionProviderTest, FunctionTest) {
 }
 
 TEST(ExecutionProviderTest, ShapeInferenceForFusedFunctionTest) {
-  std::string model_file_name = "fused_node_shape_inference_test_graph.onnx";
+  PathString model_file_name = ORT_TSTR("fused_node_shape_inference_test_graph.onnx");
 
   CreateFuseOpModel(model_file_name);
 
@@ -1393,7 +1393,7 @@ TEST(ExecutionProviderTest, ShapeInferenceForFusedFunctionTest) {
 }
 
 TEST(ExecutionProviderTest, OpKernelInfoCanReadConfigOptions) {
-  std::string model_file_name = "OpKernelInfoCanReadConfigOptions.onnx";
+  PathString model_file_name = ORT_TSTR("OpKernelInfoCanReadConfigOptions.onnx");
   CreateFuseOpModel(model_file_name);
 
   SessionOptions so;
@@ -1580,7 +1580,7 @@ TEST(InferenceSessionTests, Test3LayerNestedSubgraph) {
 
   auto status = graph.Resolve();
   ASSERT_TRUE(status.IsOK());
-  std::string model_file_name = "3-layer-nested-subgraph-test.onnx";
+  PathString model_file_name = ORT_TSTR("3-layer-nested-subgraph-test.onnx");
   status = onnxruntime::Model::Save(model, model_file_name);
   ASSERT_TRUE(status.IsOK());
 
@@ -1732,7 +1732,7 @@ TEST(InferenceSessionTests, Test2LayerNestedSubgraph) {
 
   auto status = graph.Resolve();
   ASSERT_TRUE(status.IsOK());
-  std::string model_file_name = "2-layer-nested-subgraph-test.onnx";
+  PathString model_file_name = ORT_TSTR("2-layer-nested-subgraph-test.onnx");
   status = onnxruntime::Model::Save(model, model_file_name);
   ASSERT_TRUE(status.IsOK());
 
diff --git a/onnxruntime/test/ir/graph_test.cc b/onnxruntime/test/ir/graph_test.cc
index ff10765741bbe..4766ef6fbc621 100644
--- a/onnxruntime/test/ir/graph_test.cc
+++ b/onnxruntime/test/ir/graph_test.cc
@@ -388,7 +388,7 @@ TEST_F(GraphTest, UnusedValueInfoSerializes) {
   std::shared_ptr<Model> model;
   ASSERT_STATUS_OK(Model::Load(std::move(m), model, nullptr, *logger_));
   model->MainGraph().SetGraphProtoSyncNeeded();
-  EXPECT_TRUE(Model::Save(*model, "graph_with_unused_value_info.onnx").IsOK());
+  EXPECT_TRUE(Model::Save(*model, ORT_TSTR("graph_with_unused_value_info.onnx")).IsOK());
 }
 
 TEST_F(GraphTest, WrongOpset) {
@@ -762,7 +762,7 @@ TEST_F(GraphTest, GraphConstruction_CheckIsAcyclic) {
   auto status = graph.Resolve();
   EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
 
-  EXPECT_TRUE(Model::Save(model, "graph_1.onnx").IsOK());
+  EXPECT_TRUE(Model::Save(model, ORT_TSTR("graph_1.onnx")).IsOK());
   std::shared_ptr<Model> model2;
   EXPECT_TRUE(Model::Load(ORT_TSTR("graph_1.onnx"), model2, nullptr, *logger_).IsOK());
 
@@ -1476,7 +1476,7 @@ TEST_F(GraphTest, GraphConstruction_TypeInference) {
   EXPECT_EQ("node_4_out_1", graph.GetOutputs()[0]->Name());
   EXPECT_EQ(2u, graph.GetInputs().size());
 
-  EXPECT_TRUE(Model::Save(model, "model_x.onnx").IsOK());
+  EXPECT_TRUE(Model::Save(model, ORT_TSTR("model_x.onnx")).IsOK());
   std::shared_ptr<Model> loaded_model;
   EXPECT_TRUE(Model::Load(ORT_TSTR("model_x.onnx"), loaded_model, nullptr, *logger_).IsOK());
   EXPECT_EQ(2u, loaded_model->MainGraph().GetInputs().size());

From fb70965b5dff1ae039ed0a5420b548b08771f9e0 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 12 Jun 2024 18:35:28 +0000
Subject: [PATCH 34/50] revert

---
 onnxruntime/core/graph/model.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index 2936e5d92eb40..3c5447d9a34e9 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -597,6 +597,10 @@ static Status SaveModel(Model& model, const T& file_path) {
 Status Model::Save(Model& model, const std::wstring& file_path) {
   return SaveModel(model, file_path);
 }
+#else
+Status Model::Save(Model& model, const std::string& file_path) {
+  return SaveModel(model, file_path);
+}
 #endif
 
 template <typename T>

From ee284cba713efaad3c958d15ed8e6a16ef4919c3 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 12 Jun 2024 19:08:22 +0000
Subject: [PATCH 35/50] update

---
 orttraining/orttraining/training_api/module.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orttraining/orttraining/training_api/module.cc b/orttraining/orttraining/training_api/module.cc
index 347673628e106..dc724fbae48eb 100644
--- a/orttraining/orttraining/training_api/module.cc
+++ b/orttraining/orttraining/training_api/module.cc
@@ -685,7 +685,7 @@ Status Module::ExportModelForInferencing(const std::string& inference_model_path
     ORT_THROW_IF_ERROR(
         Model::SaveWithExternalInitializers(*inference_model, inference_model_pathstring, external_data_name, 64));
   } else {
-    ORT_THROW_IF_ERROR(Model::Save(*inference_model, inference_model_path));
+    ORT_THROW_IF_ERROR(Model::Save(*inference_model, ToPathString(inference_model_path)));
   }
   // Save the model at the desired location.
   return Status::OK();

From 75d75efa7ec21cd5e1903239b287204c4f17feb4 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 12 Jun 2024 20:07:11 +0000
Subject: [PATCH 36/50] update

---
 .../core/framework/ortmodule_graph_builder.cc |  2 +-
 .../core/optimizer/graph_transformer_config.h |  1 +
 .../python/orttraining_pybind_state.cc        |  2 +-
 .../test/gradient/allreduce_op_test.cc        | 12 ++++----
 .../test/optimizer/graph_transform_test.cc    | 28 +++++++++----------
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/orttraining/orttraining/core/framework/ortmodule_graph_builder.cc b/orttraining/orttraining/core/framework/ortmodule_graph_builder.cc
index c5948e563fcf8..e01456ee3d769 100644
--- a/orttraining/orttraining/core/framework/ortmodule_graph_builder.cc
+++ b/orttraining/orttraining/core/framework/ortmodule_graph_builder.cc
@@ -183,7 +183,7 @@ Status OrtModuleGraphBuilder::OptimizeForwardGraph(const TrainingGraphTransforme
   }
 
   if (!config.optimized_pre_grad_filepath.empty()) {
-    ORT_RETURN_IF_ERROR(Model::Save(*forward_model_, config.optimized_pre_grad_filepath));
+    ORT_RETURN_IF_ERROR(Model::Save(*forward_model_, ToPathString(config.optimized_pre_grad_filepath)));
   }
 
   return Status::OK();
diff --git a/orttraining/orttraining/core/optimizer/graph_transformer_config.h b/orttraining/orttraining/core/optimizer/graph_transformer_config.h
index f72dbfa3fdfc3..c496e36689de1 100644
--- a/orttraining/orttraining/core/optimizer/graph_transformer_config.h
+++ b/orttraining/orttraining/core/optimizer/graph_transformer_config.h
@@ -28,6 +28,7 @@ struct TrainingGraphTransformerConfiguration : public GraphTransformerConfigurat
   bool print_input_density{false};
 
   // Path for serialization of the transformed optimized model. If empty, serialization is disabled.
+  // A UTF-8 string.
   std::string optimized_pre_grad_filepath;
 };
 
diff --git a/orttraining/orttraining/python/orttraining_pybind_state.cc b/orttraining/orttraining/python/orttraining_pybind_state.cc
index 5ea60102f3ef8..5697ce6f0c7a4 100644
--- a/orttraining/orttraining/python/orttraining_pybind_state.cc
+++ b/orttraining/orttraining/python/orttraining_pybind_state.cc
@@ -617,7 +617,7 @@ void addObjectMethodsForTraining(py::module& m) {
         ORT_THROW_IF_ERROR(gradient_graph_builder->builder_->Build());
       })
       .def("save", [](PyGradientGraphBuilderContext* gradient_graph_builder, const std::string& path) {
-        ORT_THROW_IF_ERROR(Model::Save(*(gradient_graph_builder->model_), path));
+        ORT_THROW_IF_ERROR(Model::Save(*(gradient_graph_builder->model_), ToPathString(path)));
       })
       .def("get_model", [](PyGradientGraphBuilderContext* gradient_graph_builder) {
         std::string model_str;
diff --git a/orttraining/orttraining/test/gradient/allreduce_op_test.cc b/orttraining/orttraining/test/gradient/allreduce_op_test.cc
index 82f01a3c43681..1b1bd680a1191 100644
--- a/orttraining/orttraining/test/gradient/allreduce_op_test.cc
+++ b/orttraining/orttraining/test/gradient/allreduce_op_test.cc
@@ -472,7 +472,7 @@ TEST(AllreduceTest, GPUHierarchicalAdasumAllreduceOptimizerTest) {
   build_allreduce_graph(graph, adasum_graph_configs, training::AdasumReductionType::GpuHierarchicalReduction, true /*build_optimizer*/,
                         false /*half_precision*/);
 
-  std::string model_file_name = "GPUHierarchicalAdasumAllreduceOptimizerTest.onnx";
+  PathString model_file_name = ORT_TSTR("GPUHierarchicalAdasumAllreduceOptimizerTest.onnx");
   auto status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -649,7 +649,7 @@ TEST(AllreduceTest, GPUHierarchicalAdasumAllreduceOptimizerFP16Test) {
   build_allreduce_graph(graph, adasum_graph_configs, training::AdasumReductionType::GpuHierarchicalReduction, true /*build_optimizer*/,
                         true /*half_precision*/);
 
-  std::string model_file_name = "GPUHierarchicalAdasumAllreduceOptimizerFP16Test.onnx";
+  PathString model_file_name = ORT_TSTR("GPUHierarchicalAdasumAllreduceOptimizerFP16Test.onnx");
   auto status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -791,7 +791,7 @@ TEST(AllreduceTest, GPUHierarchicalAdasumAllreduceTest) {
   adasum_graph_configs.push_back(adasum_graph_config);
   build_allreduce_graph(graph, adasum_graph_configs, training::AdasumReductionType::GpuHierarchicalReduction);
 
-  std::string model_file_name = "GPUHierarchicalAdasumAllreduceTest.onnx";
+  PathString model_file_name = ORT_TSTR("GPUHierarchicalAdasumAllreduceTest.onnx");
   auto status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -896,7 +896,7 @@ TEST(AllreduceTest, GPUHierarchicalAdasumFP16AllreduceTest) {
                         false /*build_optimizer*/,
                         true /*half_precision*/);
 
-  std::string model_file_name = "GPUHierarchicalAdasumFP16AllreduceTest.onnx";
+  PathString model_file_name = ORT_TSTR("GPUHierarchicalAdasumFP16AllreduceTest.onnx");
   auto status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -1003,7 +1003,7 @@ TEST(AllreduceTest, GPUAdasumAllreduceTest) {
 
   build_allreduce_graph(graph, adasum_graph_configs, training::AdasumReductionType::CpuReduction);
 
-  std::string model_file_name = "GPUAdasumAllreduceTest.onnx";
+  PathString model_file_name = ORT_TSTR("GPUAdasumAllreduceTest.onnx");
   auto status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -1110,7 +1110,7 @@ TEST(AllreduceTest, GPUAdasumFP16AllreduceTest) {
 
   build_allreduce_graph(graph, adasum_graph_configs, training::AdasumReductionType::CpuReduction, true /*half_precision*/);
 
-  std::string model_file_name = "GPUAdasumFP16AllreduceTest.onnx";
+  PathString model_file_name = ORT_TSTR("GPUAdasumFP16AllreduceTest.onnx");
   auto status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
diff --git a/orttraining/orttraining/test/optimizer/graph_transform_test.cc b/orttraining/orttraining/test/optimizer/graph_transform_test.cc
index 4ab035a171430..b2ab4891f2e1e 100644
--- a/orttraining/orttraining/test/optimizer/graph_transform_test.cc
+++ b/orttraining/orttraining/test/optimizer/graph_transform_test.cc
@@ -627,7 +627,7 @@ TEST_F(GraphTransformationTests, MegatronMLPPartitionRank0) {
       TransformerLevel::Level1));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
 
-  auto model_uri2 = "mlp_megatron_basic_test_partition_rank0.onnx";
+  PathString model_uri2 = ORT_TSTR("mlp_megatron_basic_test_partition_rank0.onnx");
   ASSERT_STATUS_OK(Model::Save(*p_model, model_uri2));
 
   {
@@ -705,7 +705,7 @@ TEST_F(GraphTransformationTests, MegatronMLPPartitionRank1) {
                                                      TransformerLevel::Level1));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
 
-  auto model_uri2 = "mlp_megatron_basic_test_partition_rank1.onnx";
+  PathString model_uri2 = ORT_TSTR("mlp_megatron_basic_test_partition_rank1.onnx");
   ASSERT_STATUS_OK(Model::Save(*p_model, model_uri2));
 
   {
@@ -765,7 +765,7 @@ TEST_F(GraphTransformationTests, MegatronMLPPartitionRank1) {
 }
 
 TEST_F(GraphTransformationTests, MegatronSelfAttentionPartitionRank0) {
-  auto model_uri = MODEL_FOLDER "model_parallel/self_attention_megatron_basic_test.onnx";
+  PathString model_uri = MODEL_FOLDER "model_parallel/self_attention_megatron_basic_test.onnx";
   std::shared_ptr<Model> p_model;
   ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
   Graph& graph = p_model->MainGraph();
@@ -781,7 +781,7 @@ TEST_F(GraphTransformationTests, MegatronSelfAttentionPartitionRank0) {
       TransformerLevel::Level1));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
 
-  auto model_uri2 = "self_attention_megatron_basic_test_partition_rank0.onnx";
+  PathString model_uri2 = ORT_TSTR("self_attention_megatron_basic_test_partition_rank0.onnx");
   ASSERT_STATUS_OK(Model::Save(*p_model, model_uri2));
 
   {
@@ -838,7 +838,7 @@ TEST_F(GraphTransformationTests, MegatronSelfAttentionPartitionRank0) {
 }
 
 TEST_F(GraphTransformationTests, MegatronSelfAttentionPartitionRank1) {
-  auto model_uri = MODEL_FOLDER "model_parallel/self_attention_megatron_basic_test.onnx";
+  PathString model_uri = MODEL_FOLDER "model_parallel/self_attention_megatron_basic_test.onnx";
   std::shared_ptr<Model> p_model;
   ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
   Graph& graph = p_model->MainGraph();
@@ -856,7 +856,7 @@ TEST_F(GraphTransformationTests, MegatronSelfAttentionPartitionRank1) {
                                                      TransformerLevel::Level1));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_));
 
-  auto model_uri2 = "self_attention_megatron_basic_test_partition_rank1.onnx";
+  PathString model_uri2 = ORT_TSTR("self_attention_megatron_basic_test_partition_rank1.onnx");
   ASSERT_STATUS_OK(Model::Save(*p_model, model_uri2));
 
   {
@@ -913,7 +913,7 @@ TEST_F(GraphTransformationTests, MegatronSelfAttentionPartitionRank1) {
 }
 
 TEST_F(GraphTransformationTests, BiasGeluRecomputeTest) {
-  auto model_uri = MODEL_FOLDER "fusion/bias_gelu_fusion_recompute.onnx";
+  PathString model_uri = MODEL_FOLDER "fusion/bias_gelu_fusion_recompute.onnx";
   std::shared_ptr<Model> p_model;
   ASSERT_STATUS_OK(Model::Load(model_uri, p_model, nullptr, *logger_));
   Graph& graph = p_model->MainGraph();
@@ -1397,7 +1397,7 @@ static void RunPartitionCorrectnessTest(std::string model_path,
         TransformerLevel::Level1));
     ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, logger));
     graphs.push_back(&graph);
-    auto model_uri2 = ToPathString(model_path) + ORT_TSTR("_partition_rank_") + ToPathString(std::to_string(i)) + ORT_TSTR(".onnx");
+    PathString model_uri2 = ToPathString(model_path) + ORT_TSTR("_partition_rank_") + ToPathString(std::to_string(i)) + ORT_TSTR(".onnx");
     ASSERT_STATUS_OK(Model::Save(*p_models[i], model_uri2));
   }
 
@@ -1405,7 +1405,7 @@ static void RunPartitionCorrectnessTest(std::string model_path,
   auto& combine_graph = combine_model.MainGraph();
   auto ret = horizontal_parallel_test_utils::MergeGraphsOnAllWorkers(graphs, combine_graph);
   ORT_ENFORCE(ret.IsOK());
-  auto model_uri2 = ToPathString(model_path) + ORT_TSTR("_partition_combine.onnx");
+  PathString model_uri2 = ToPathString(model_path) + ORT_TSTR("_partition_combine.onnx");
   ASSERT_STATUS_OK(Model::Save(combine_model, model_uri2));
 
   float scale = 1.f;
@@ -1790,7 +1790,7 @@ TEST_F(GraphTransformationTests, ScaledSumFusionTwoInputs) {
 
 #ifdef ENABLE_TRITON
 TEST_F(GraphTransformationTests, TritonFusion) {
-  auto model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
+  PathString model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
   std::shared_ptr<Model> model;
   ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
   Graph& graph = model->MainGraph();
@@ -1805,7 +1805,7 @@ TEST_F(GraphTransformationTests, TritonFusion) {
   ASSERT_TRUE(op_to_count["LayerNormalization"] == 4);
 
   {
-    auto model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
+    PathString model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
     std::shared_ptr<Model> model;
     ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
     Graph& graph = model->MainGraph();
@@ -1845,7 +1845,7 @@ TEST_F(GraphTransformationTests, TritonFusion) {
 
   // No Dropout.
   {
-    auto model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
+    PathString model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
     std::shared_ptr<Model> model;
     ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
     Graph& graph = model->MainGraph();
@@ -1884,7 +1884,7 @@ TEST_F(GraphTransformationTests, TritonFusion) {
 
   // Ignore min nodes.
   {
-    auto model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
+    PathString model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
     std::shared_ptr<Model> model;
     ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
     Graph& graph = model->MainGraph();
@@ -1924,7 +1924,7 @@ TEST_F(GraphTransformationTests, TritonFusion) {
 
   // Exclude Softmax using axis attribute.
   {
-    auto model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
+    PathString model_uri = MODEL_FOLDER "bert_toy_opset14.onnx";
     std::shared_ptr<Model> model;
     ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_));
     Graph& graph = model->MainGraph();

From f5524d8213de676f1036e0dbe37628dd005cfa4c Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 12 Jun 2024 21:14:50 +0000
Subject: [PATCH 37/50] update

---
 .../providers/tensorrt/tensorrt_basic_test.cc | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 4d2538c947dcc..51e7ab2778c40 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -66,7 +66,7 @@ void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64
  *     /
  *   "M"
  */
-void CreateBaseModel(std::string model_name,
+void CreateBaseModel(const PathString& model_name,
                      std::string graph_name,
                      std::vector<int> dims,
                      bool add_non_zero_node = false) {
@@ -165,7 +165,7 @@ void RunSession2(InferenceSession& session_object,
   VerifyOutputs(fetches, expected_dims, expected_values);
 }
 
-void RunWithOneSessionSingleThreadInference(std::string model_name, std::string sess_log_id) {
+void RunWithOneSessionSingleThreadInference(PathString model_name, std::string sess_log_id) {
   SessionOptions so;
   so.session_logid = sess_log_id;
   RunOptions run_options;
@@ -222,7 +222,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string
   ASSERT_TRUE(HasCacheFileWithPrefix(params.trt_ep_context_file_path));
 }
 
-void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string sess_log_id, bool has_non_zero_node = false) {
+void RunWithOneSessionMultiThreadsInference(PathString model_name, std::string sess_log_id, bool has_non_zero_node = false) {
   SessionOptions so;
   so.session_logid = sess_log_id;
   RunOptions run_options;
@@ -289,7 +289,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string
 
 TEST(TensorrtExecutionProviderTest, SessionCreationWithMultiThreadsAndInferenceWithMultiThreads) {
   std::vector<std::thread> threads;
-  std::string model_name = "trt_execution_provider_multithreading_test.onnx";
+  PathString model_name = ORT_TSTR("trt_execution_provider_multithreading_test.onnx");
   std::string graph_name = "multithreading_test";
   std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionSingleThread";
   std::vector<int> dims = {1, 3, 2};
@@ -305,7 +305,7 @@ TEST(TensorrtExecutionProviderTest, SessionCreationWithMultiThreadsAndInferenceW
 }
 
 TEST(TensorrtExecutionProviderTest, SessionCreationWithSingleThreadAndInferenceWithMultiThreads) {
-  std::string model_name = "trt_execution_provider_multithreading_test.onnx";
+  PathString model_name = ORT_TSTR("trt_execution_provider_multithreading_test.onnx");
   std::string graph_name = "multithreading_test";
   std::string sess_log_id = "TRTEPMultiThreadingTestWithOneSessionMultiThreads";
   std::vector<int> dims = {1, 3, 2};
@@ -360,7 +360,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
 }
 
 TEST(TensorrtExecutionProviderTest, EPContextNode) {
-  std::string model_name = "EPContextNode_test.onnx";
+  PathString model_name = ORT_TSTR("EPContextNode_test.onnx");
   std::string graph_name = "EPContextNode_test";
   std::string sess_log_id = "EPContextNode_test";
   std::vector<int> dims = {1, 3, 2};
@@ -546,7 +546,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
 }
 
 TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
-  std::string model_name = "testdata/trt_plugin_custom_op_test.onnx";
+  PathString model_name = "testdata/trt_plugin_custom_op_test.onnx";
   SessionOptions so;
   so.session_logid = "TensorrtExecutionProviderTRTPluginsTest";
   RunOptions run_options;
@@ -593,7 +593,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   ASSERT_NE(pos, std::string::npos);
   std::string cache_type = ToUTF8String(param.substr(0, pos));
 
-  std::string model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
+  PathString model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
   std::vector<int> dims;
   if (input_type.compare("dynamic") == 0) {
     dims = {1, -1, -1};  // dynamic shape input
@@ -917,7 +917,7 @@ TEST(TensorrtExecutionProviderTest, FunctionTest) {
 
   auto status = graph.Resolve();
   ASSERT_TRUE(status.IsOK());
-  std::string model_file_name = "trt_execution_provider_function_test.onnx";
+  PathString model_file_name = ORT_TSTR("trt_execution_provider_function_test.onnx");
   status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -1019,7 +1019,7 @@ TEST(TensorrtExecutionProviderTest, DISABLED_NodeIndexMappingTest) {  //  [W:onn
 
   auto status = graph.Resolve();
   ASSERT_TRUE(status.IsOK());
-  std::string model_file_name = "trt_execution_provider_nodeindexmapping_test.onnx";
+  PathString model_file_name = ORT_TSTR("trt_execution_provider_nodeindexmapping_test.onnx");
   status = onnxruntime::Model::Save(model, model_file_name);
 
   SessionOptions so;
@@ -1131,7 +1131,7 @@ TEST(TensorrtExecutionProviderTest, RemoveCycleTest) {
 
   auto status = graph.Resolve();
   ASSERT_TRUE(status.IsOK());
-  std::string model_file_name = "trt_execution_provider_removecycle_test.onnx";
+  PathString model_file_name = ORT_TSTR("trt_execution_provider_removecycle_test.onnx");
   status = onnxruntime::Model::Save(model, model_file_name);
 
   std::vector<int64_t> dims_mul_x = {1, 3, 2};

From 191053f4dc6e11c36266a7df435db669ba129c8e Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 13 Jun 2024 03:05:59 +0000
Subject: [PATCH 38/50] update

---
 .../test/providers/tensorrt/tensorrt_basic_test.cc        | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 51e7ab2778c40..86771e8a0275a 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -461,7 +461,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    */
   InferenceSession session_object3{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params3;
-  model_name = params.trt_ep_context_file_path;
+  model_name = ToPathString(params.trt_ep_context_file_path);
   params3.trt_engine_cache_enable = 1;
   execution_provider = TensorrtExecutionProviderWithOptions(&params3);
   EXPECT_TRUE(session_object3.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
@@ -490,7 +490,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
    */
   InferenceSession session_object4{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params4;
-  model_name = "./context_model_folder/EPContextNode_test_ctx.onnx";
+  model_name = ORT_TSTR("./context_model_folder/EPContextNode_test_ctx.onnx");
   execution_provider = TensorrtExecutionProviderWithOptions(&params4);
   EXPECT_TRUE(session_object4.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object4.Load(model_name);
@@ -514,7 +514,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   params5.trt_dump_ep_context_model = 1;
   params5.trt_ep_context_embed_mode = 1;
   params5.trt_ep_context_file_path = "EP_Context_model_2.onnx";
-  model_name = "EPContextNode_test.onnx";
+  model_name = ORT_TSTR("EPContextNode_test.onnx");
   execution_provider = TensorrtExecutionProviderWithOptions(&params5);
   EXPECT_TRUE(session_object5.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object5.Load(model_name);
@@ -528,7 +528,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
   InferenceSession session_object6{so, GetEnvironment()};
   OrtTensorRTProviderOptionsV2 params6;
   params6.trt_ep_context_embed_mode = 1;
-  model_name = params5.trt_ep_context_file_path;
+  model_name = ToPathString(params5.trt_ep_context_file_path);
   execution_provider = TensorrtExecutionProviderWithOptions(&params6);
   EXPECT_TRUE(session_object6.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
   status = session_object6.Load(model_name);

From 076d569cfc40484e6cf2c3c3e75614f8ff8c8837 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 13 Jun 2024 03:08:02 +0000
Subject: [PATCH 39/50] update

---
 onnxruntime/test/optimizer/resnet50_fusion_test.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/test/optimizer/resnet50_fusion_test.cc b/onnxruntime/test/optimizer/resnet50_fusion_test.cc
index 04b11b46e5002..5cb0206156a84 100644
--- a/onnxruntime/test/optimizer/resnet50_fusion_test.cc
+++ b/onnxruntime/test/optimizer/resnet50_fusion_test.cc
@@ -61,13 +61,13 @@ TEST_F(ResNet50FusionTests, FuseConvIntegrationTest) {
   ASSERT_STATUS_OK(graph_transformation_mgr_32.Register(std::make_unique<ConvActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr_32.Register(std::make_unique<ConvAddActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr_32.ApplyTransformers(fp32_graph, TransformerLevel::Level3, *logger));
-  ASSERT_STATUS_OK(Model::Save(*fp32_model, "resnet50_fused.onnx"));
+  ASSERT_STATUS_OK(Model::Save(*fp32_model, ORT_TSTR("resnet50_fused.onnx")));
 
   onnxruntime::GraphTransformerManager graph_transformation_mgr_16{5};
   ASSERT_STATUS_OK(graph_transformation_mgr_16.Register(std::make_unique<ConvActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr_16.Register(std::make_unique<ConvAddActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr_16.ApplyTransformers(fp16_graph, TransformerLevel::Level3, *logger));
-  ASSERT_STATUS_OK(Model::Save(*fp16_model, "resnet50_fp16_fused.onnx"));
+  ASSERT_STATUS_OK(Model::Save(*fp16_model, ORT_TSTR("resnet50_fp16_fused.onnx")));
   //  std::cout << "-------Op Counts After Fusion---------" << std::endl;
   fp32_op_count = CountOpsInGraph(fp32_graph);
   fp16_op_count = CountOpsInGraph(fp16_graph);
@@ -91,7 +91,7 @@ TEST_F(ResNet50FusionTests, FuseConvAddReluUnitTest) {
   ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique<ConvAddActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level3, *logger));
   op_to_count = CountOpsInGraph(graph);
-  ASSERT_STATUS_OK(Model::Save(*p_model, "conv_add_relu_fp16_fused.onnx"));
+  ASSERT_STATUS_OK(Model::Save(*p_model, ORT_TSTR("conv_add_relu_fp16_fused.onnx")));
   ASSERT_TRUE(op_to_count["Add"] == 0);   // Add removed from graph
   ASSERT_TRUE(op_to_count["Relu"] == 0);  // Relu removed from graph
 }
@@ -109,7 +109,7 @@ TEST_F(ResNet50FusionTests, FuseConvAddUnitTest) {
   ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique<ConvAddActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level3, *logger));
   op_to_count = CountOpsInGraph(graph);
-  ASSERT_STATUS_OK(Model::Save(*p_model, "conv_add_fp16_fused.onnx"));
+  ASSERT_STATUS_OK(Model::Save(*p_model, ORT_TSTR("conv_add_fp16_fused.onnx")));
   ASSERT_TRUE(op_to_count["Add"] == 0);  // Add removed from graph
 }
 TEST_F(ResNet50FusionTests, FuseConvReluUnitTest) {
@@ -126,9 +126,9 @@ TEST_F(ResNet50FusionTests, FuseConvReluUnitTest) {
   ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique<ConvActivationFusion>(), TransformerLevel::Level3));
   ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level3, *logger));
   op_to_count = CountOpsInGraph(graph);
-  ASSERT_STATUS_OK(Model::Save(*p_model, "conv_relu_fp16_fused.onnx"));
+  ASSERT_STATUS_OK(Model::Save(*p_model, ORT_TSTR("conv_relu_fp16_fused.onnx")));
   ASSERT_TRUE(op_to_count["Relu"] == 0);  // Add removed from graph
 }
 #endif  // defined(MLAS_F16VEC_INTRINSICS_SUPPORTED) && !defined(DISABLE_CONTRIB_OPS)
 }  // namespace test
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime

From 69d748501b5122b367ce3b7051fd3ba54ed2c261 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 13 Jun 2024 17:13:08 +0000
Subject: [PATCH 40/50] fix

---
 onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 86771e8a0275a..98f0fa4b85cdd 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -546,7 +546,7 @@ TEST(TensorrtExecutionProviderTest, EPContextNode) {
 }
 
 TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
-  PathString model_name = "testdata/trt_plugin_custom_op_test.onnx";
+  PathString model_name = ORT_TSTR("testdata/trt_plugin_custom_op_test.onnx");
   SessionOptions so;
   so.session_logid = "TensorrtExecutionProviderTRTPluginsTest";
   RunOptions run_options;
@@ -575,7 +575,6 @@ TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
   OrtTensorRTProviderOptionsV2 params;
   std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
   EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
-  std::cout << model_name << std::endl;
   auto status = session_object.Load(model_name);
   ASSERT_TRUE(status.IsOK());
   status = session_object.Initialize();
@@ -593,7 +592,8 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   ASSERT_NE(pos, std::string::npos);
   std::string cache_type = ToUTF8String(param.substr(0, pos));
 
-  PathString model_name = "trt_execution_provider_" + cache_type + "caching_test_" + input_type + ".onnx";
+  PathString model_name = ORT_TSTR("trt_execution_provider_");
+  model_name += cache_type + "caching_test_" + input_type + ".onnx";
   std::vector<int> dims;
   if (input_type.compare("dynamic") == 0) {
     dims = {1, -1, -1};  // dynamic shape input

From 8f68f88df8e5e51a67232a2929798297a827275e Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 13 Jun 2024 21:05:42 +0000
Subject: [PATCH 41/50] update

---
 .../providers/tensorrt/tensorrt_basic_test.cc    | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 98f0fa4b85cdd..69b3b89cd5e01 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -590,10 +590,12 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   size_t pos = param.find("_");
   std::string input_type = param.substr(pos + 1);
   ASSERT_NE(pos, std::string::npos);
-  std::string cache_type = ToUTF8String(param.substr(0, pos));
-
-  PathString model_name = ORT_TSTR("trt_execution_provider_");
-  model_name += cache_type + "caching_test_" + input_type + ".onnx";
+  std::string cache_type_mbs = param.substr(0, pos);
+  PathString cache_type = ToPathString(cache_type_mbs);
+  std::basic_ostringstream<ORTCHAR_T> oss;
+  oss << ORT_TSTR("trt_execution_provider_") << cache_type << ORT_TSTR("_caching_test_") << ToPathString(input_type)
+      << ORT_TSTR(".onnx");
+  PathString model_name = oss.str();
   std::vector<int> dims;
   if (input_type.compare("dynamic") == 0) {
     dims = {1, -1, -1};  // dynamic shape input
@@ -604,7 +606,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   CreateBaseModel(model_name, cache_type + "cachingtest", dims);
 
   SessionOptions so;
-  so.session_logid = "TensorrtExecutionProvider" + cache_type + "cacheTest";
+  so.session_logid = "TensorrtExecutionProvider" + cache_type_mbs + "cacheTest";
   RunOptions run_options;
   run_options.run_tag = so.session_logid;
   InferenceSession session_object{so, GetEnvironment()};
@@ -633,7 +635,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
   std::vector<float> expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f};
 
   OrtTensorRTProviderOptionsV2 params;
-  if (cache_type.compare("engine") == 0) {
+  if (cache_type_mbs.compare("engine") == 0) {
     /* Following code block tests the functionality of engine and optimization profile of ORT TRT, including:
      * - engine cache serialization/de-serialization
      * - profile cache serialization/de-serialization
@@ -807,7 +809,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
       }
     }
 
-  } else if (cache_type.compare("timing") == 0) {
+  } else if (cache_type_mbs.compare("timing") == 0) {
     /* Following code block tests the functionality of timing cache, including:
      * - timing cache cache serialization/de-serialization
      * - TODO: benefir of usign a timing cache no matter if dynamic / static input

From d2785d8e3b33f3eaa8398336dc0b85fad26b3273 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 14 Jun 2024 04:03:00 +0000
Subject: [PATCH 42/50] update

---
 onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 69b3b89cd5e01..f65a2fa6c4e20 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -603,7 +603,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
     dims = {1, 3, 2};
   }
 
-  CreateBaseModel(model_name, cache_type + "cachingtest", dims);
+  CreateBaseModel(model_name, cache_type + ORT_TSTR("cachingtest"), dims);
 
   SessionOptions so;
   so.session_logid = "TensorrtExecutionProvider" + cache_type_mbs + "cacheTest";

From b2d57da1372b0fe4158caf0cef3b7d79a4217d72 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 14 Jun 2024 05:43:36 +0000
Subject: [PATCH 43/50] Revert "update"

This reverts commit d2785d8e3b33f3eaa8398336dc0b85fad26b3273.
---
 onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index f65a2fa6c4e20..69b3b89cd5e01 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -603,7 +603,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
     dims = {1, 3, 2};
   }
 
-  CreateBaseModel(model_name, cache_type + ORT_TSTR("cachingtest"), dims);
+  CreateBaseModel(model_name, cache_type + "cachingtest", dims);
 
   SessionOptions so;
   so.session_logid = "TensorrtExecutionProvider" + cache_type_mbs + "cacheTest";

From 148d94af0a89d4a780ab13ade2ef3238bf62fb40 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Fri, 14 Jun 2024 05:43:59 +0000
Subject: [PATCH 44/50] update

---
 onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 69b3b89cd5e01..2b5b82d0fc16a 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -603,7 +603,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) {
     dims = {1, 3, 2};
   }
 
-  CreateBaseModel(model_name, cache_type + "cachingtest", dims);
+  CreateBaseModel(model_name, cache_type_mbs + "cachingtest", dims);
 
   SessionOptions so;
   so.session_logid = "TensorrtExecutionProvider" + cache_type_mbs + "cacheTest";

From 0bace1a718120f5796f3dad2d99eed94f20f6b06 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Mon, 24 Jun 2024 13:46:22 -0700
Subject: [PATCH 45/50] update

---
 onnxruntime/core/framework/graph_partitioner.cc           | 3 ++-
 onnxruntime/core/graph/graph.cc                           | 8 ++++----
 onnxruntime/core/optimizer/matmul_scale_fusion.cc         | 3 ++-
 .../transpose_optimization/ort_optimizer_api_impl.cc      | 3 ++-
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/onnxruntime/core/framework/graph_partitioner.cc b/onnxruntime/core/framework/graph_partitioner.cc
index 6ae09301154ac..4f745b74abce7 100644
--- a/onnxruntime/core/framework/graph_partitioner.cc
+++ b/onnxruntime/core/framework/graph_partitioner.cc
@@ -670,7 +670,8 @@ static Status CreateEpContextModel(const ExecutionProviders& execution_providers
   }
 
   if (std::filesystem::exists(context_cache_path)) {
-    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to generate EP context model since the file '", context_cache_path, "' exist already.");
+    return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to generate EP context model since the file '",
+                           context_cache_path, "' exist already.");
   }
 
   Model ep_context_model(graph.Name(), false, ModelMetaData(), PathString(), IOnnxRuntimeOpSchemaRegistryList(),
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 2232e77e1ac8f..7d1172302f9de 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3025,8 +3025,8 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
   ctx.set_opset_imports(DomainToVersionMap());
   ctx.set_schema_registry(schema_registry_.get());
   // Set the parent directory of model path to load external tensors if exist
-  // TODO: avoid converting it to a multibyte string
-  ctx.set_model_dir(ModelPath().parent_path().string());
+  // ONNX expectes a UTF-8 string here.
+  ctx.set_model_dir(ToUTF8String(ModelPath().parent_path().native()));
 
   LexicalScopeContext parent;
   if (parent_node_) {
@@ -3977,8 +3977,8 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
-  // If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could be empty.
-  // Else, save external data file in same directory as the model.
+  // If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could
+  // be empty. Else, save external data file in same directory as the model.
   const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;
 
   std::ofstream external_stream(modified_external_file_path, std::ofstream::out | std::ofstream::binary);
diff --git a/onnxruntime/core/optimizer/matmul_scale_fusion.cc b/onnxruntime/core/optimizer/matmul_scale_fusion.cc
index 1f8b1a4c878e3..338722fb00782 100644
--- a/onnxruntime/core/optimizer/matmul_scale_fusion.cc
+++ b/onnxruntime/core/optimizer/matmul_scale_fusion.cc
@@ -17,7 +17,8 @@ namespace onnxruntime {
 namespace {
 template <typename T>
 struct ExtractScalarAsFloatDispatchTarget {
-  Status operator()(const ONNX_NAMESPACE::TensorProto& tensor_proto, const std::filesystem::path& model_path, float& scalar_float) {
+  Status operator()(const ONNX_NAMESPACE::TensorProto& tensor_proto, const std::filesystem::path& model_path,
+                    float& scalar_float) {
     T scalar;
     ORT_RETURN_IF_ERROR(utils::UnpackTensor(tensor_proto, model_path, &scalar, 1));
     scalar_float = static_cast<float>(scalar);
diff --git a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
index 548895335b1ac..1f7e54cb807ea 100644
--- a/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
+++ b/onnxruntime/core/optimizer/transpose_optimization/ort_optimizer_api_impl.cc
@@ -50,7 +50,8 @@ class ApiTensor final : public api::TensorRef {
   AllocatorPtr cpu_allocator_;
 
  public:
-  explicit ApiTensor(const onnx::TensorProto& tensor_proto, const std::filesystem::path& model_path, AllocatorPtr cpu_allocator)
+  explicit ApiTensor(const onnx::TensorProto& tensor_proto, const std::filesystem::path& model_path,
+                     AllocatorPtr cpu_allocator)
       : tensor_proto_(tensor_proto), model_path_(model_path), cpu_allocator_(std::move(cpu_allocator)) {}
 
   const onnx::TensorProto& TensorProto() {

From 5827bdd5dd1bec7210af8bf31a30ab2090b9735b Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 25 Jun 2024 12:39:17 -0700
Subject: [PATCH 46/50] Update onnxruntime/core/framework/tensorprotoutils.cc

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 onnxruntime/core/framework/tensorprotoutils.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
index 5b7d2c02eef4a..77323f268a27d 100644
--- a/onnxruntime/core/framework/tensorprotoutils.cc
+++ b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -700,7 +700,7 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const std::filesy
                     /*out*/ T* p_data, size_t expected_num_elements) {
 #if !defined(ORT_MINIMAL_BUILD)
   if (HasExternalData(tensor)) {
-    return UnpackTensorWithExternalData(tensor, model_path.empty() ? std::filesystem::path() : model_path.parent_path(),
+    return UnpackTensorWithExternalData(tensor, model_path.parent_path(),
                                         expected_num_elements, p_data);
   }
 #else

From e9fad36d418bda91ba97879506302518ec14eb94 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Tue, 25 Jun 2024 13:51:25 -0700
Subject: [PATCH 47/50] update

---
 onnxruntime/core/graph/model.cc | 8 +-------
 onnxruntime/core/graph/model.h  | 6 +-----
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
index 3c5447d9a34e9..e9d1b4e944edd 100644
--- a/onnxruntime/core/graph/model.cc
+++ b/onnxruntime/core/graph/model.cc
@@ -593,15 +593,9 @@ static Status SaveModel(Model& model, const T& file_path) {
 #endif
 }
 
-#ifdef _WIN32
-Status Model::Save(Model& model, const std::wstring& file_path) {
+Status Model::Save(Model& model, const PathString& file_path) {
   return SaveModel(model, file_path);
 }
-#else
-Status Model::Save(Model& model, const std::string& file_path) {
-  return SaveModel(model, file_path);
-}
-#endif
 
 template <typename T>
 static Status SaveModelWithExternalInitializers(Model& model,
diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
index 65f9fd4d03627..9c73ee16963bd 100644
--- a/onnxruntime/core/graph/model.h
+++ b/onnxruntime/core/graph/model.h
@@ -192,11 +192,7 @@ class Model {
                                                                   const std::filesystem::path& file_path,
                                                                   size_t initializer_size_threshold) const;
 
-#ifdef _WIN32
-  static common::Status Save(Model& model, const std::wstring& file_path);
-#else
-  static common::Status Save(Model& model, const std::string& file_path);
-#endif
+  static common::Status Save(Model& model, const PathString& file_path);
 
   static common::Status Save(Model& model, int fd);
 

From 514ea8c83fa2130ee99c36675c8f11d3c2abba25 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Wed, 26 Jun 2024 09:44:30 -0700
Subject: [PATCH 48/50] update

---
 onnxruntime/core/graph/graph.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 7d1172302f9de..046ca4ccc9f71 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3977,6 +3977,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
+  ORT_ENFORCE(!model_file_path.has_parent_path() || external_file_path.is_relative());
   // If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could
   // be empty. Else, save external data file in same directory as the model.
   const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;

From 0f7edb04c8cadb6b7ebc0feb956760f62b7aa4f8 Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 27 Jun 2024 16:28:47 -0700
Subject: [PATCH 49/50] Update onnxruntime/core/graph/graph.cc

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 onnxruntime/core/graph/graph.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 046ca4ccc9f71..9ec8c019c8e0b 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3025,7 +3025,7 @@ Status Graph::VerifyNodeAndOpMatch(const ResolveOptions& options) {
   ctx.set_opset_imports(DomainToVersionMap());
   ctx.set_schema_registry(schema_registry_.get());
   // Set the parent directory of model path to load external tensors if exist
-  // ONNX expectes a UTF-8 string here.
+  // ONNX expects a UTF-8 string here.
   ctx.set_model_dir(ToUTF8String(ModelPath().parent_path().native()));
 
   LexicalScopeContext parent;

From 19009ce4b88d40f97e4ed59b33195c10d3cf05cd Mon Sep 17 00:00:00 2001
From: Changming Sun <chasun@microsoft.com>
Date: Thu, 27 Jun 2024 16:30:24 -0700
Subject: [PATCH 50/50] Update onnxruntime/core/graph/graph.cc

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 onnxruntime/core/graph/graph.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
index 9ec8c019c8e0b..67451301023e5 100644
--- a/onnxruntime/core/graph/graph.cc
+++ b/onnxruntime/core/graph/graph.cc
@@ -3977,7 +3977,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
-  ORT_ENFORCE(!model_file_path.has_parent_path() || external_file_path.is_relative());
+  ORT_ENFORCE(external_file_path.is_relative());
   // If model_file_path is just a file name without a path separator, for example: "model.onnx". Its parent path could
   // be empty. Else, save external data file in same directory as the model.
   const std::filesystem::path modified_external_file_path = model_file_path.parent_path() / external_file_path;