diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
index c6c9d8f4894c5..7e7819ac31a19 100644
--- a/cmake/onnxruntime_providers.cmake
+++ b/cmake/onnxruntime_providers.cmake
@@ -66,11 +66,7 @@ if(onnxruntime_USE_CUDA)
   set(PROVIDERS_CUDA onnxruntime_providers_cuda)
 endif()
 if(onnxruntime_USE_COREML)
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    set(PROVIDERS_COREML onnxruntime_providers_coreml coreml_proto)
-  else()
-    set(PROVIDERS_COREML onnxruntime_providers_coreml)
-  endif()
+  set(PROVIDERS_COREML onnxruntime_providers_coreml coreml_proto)
 endif()
 if(onnxruntime_USE_NNAPI_BUILTIN)
   set(PROVIDERS_NNAPI onnxruntime_providers_nnapi)
diff --git a/cmake/onnxruntime_providers_coreml.cmake b/cmake/onnxruntime_providers_coreml.cmake
index 2ca4a22aca7d2..c9f35e5337f9b 100644
--- a/cmake/onnxruntime_providers_coreml.cmake
+++ b/cmake/onnxruntime_providers_coreml.cmake
@@ -7,6 +7,27 @@ endif()
 
 add_compile_definitions(USE_COREML=1)
 
+# Check if we can build the coremltools code for creating an mlpackage with an mlprogram.
+# The coremltools source requires std::filesystem::path which is only available from iOS 13 on.
+set(_enable_ML_PROGRAM ON)
+if (IOS AND CMAKE_OSX_DEPLOYMENT_TARGET VERSION_LESS 13.0)
+  message(WARNING "CoreML ML Program is not supported on iOS < 13.0. Excluding ML Program support from build.")
+  set(_enable_ML_PROGRAM OFF)
+elseif(LINUX)
+  # uuid-dev is required. we don't bother installing on CIs as it's really for manual developer testing.
+  find_library(LibUUID_LIBRARY NAMES uuid)
+  find_path(LibUUID_INCLUDE_DIR NAMES uuid/uuid.h)
+  if (NOT LibUUID_INCLUDE_DIR)
+    message(STATUS "uuid/uuid.h was not found as is required for ML Program support. "
+                    "Run `sudo apt install uuid-dev` if you need to test ML Program related CoreML EP code. ")
+    set(_enable_ML_PROGRAM OFF)
+  endif()
+endif()
+
+if (_enable_ML_PROGRAM)
+  add_compile_definitions(COREML_ENABLE_MLPROGRAM=1)
+endif()
+
 # Compile CoreML proto definition to ${CMAKE_CURRENT_BINARY_DIR}/coreml_proto
 set(COREML_PROTO_ROOT ${coremltools_SOURCE_DIR}/mlmodel/format)
 file(GLOB coreml_proto_srcs "${COREML_PROTO_ROOT}/*.proto")
@@ -19,8 +40,8 @@ target_compile_definitions(coreml_proto
                            PUBLIC $<TARGET_PROPERTY:${PROTOBUF_LIB},INTERFACE_COMPILE_DEFINITIONS>)
 set_target_properties(coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
 set_target_properties(coreml_proto PROPERTIES COMPILE_FLAGS "-fvisibility-inlines-hidden")
-set(_src_sub_dir "coreml_proto/")
 
+set(_src_sub_dir "coreml_proto/")
 onnxruntime_protobuf_generate(
   APPEND_PATH
   GEN_SRC_SUB_DIR ${_src_sub_dir}
@@ -55,6 +76,10 @@ file(GLOB_RECURSE onnxruntime_providers_shared_utils_cc_srcs CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
 )
 
+file(GLOB onnxruntime_providers_coreml_public_headers CONFIGURE_DEPENDS
+  "${ONNXRUNTIME_INCLUDE_DIR}/core/providers/coreml/*.h"
+)
+
 file(GLOB
   onnxruntime_providers_coreml_cc_srcs_top CONFIGURE_DEPENDS
   "${ONNXRUNTIME_ROOT}/core/providers/coreml/*.h"
@@ -67,15 +92,38 @@ file(GLOB_RECURSE
   "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.h"
   "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/*.cc"
 )
-if (NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
-  list(REMOVE_ITEM onnxruntime_providers_coreml_cc_srcs_nested
-  "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/model_builder.h"
-  "${ONNXRUNTIME_ROOT}/core/providers/coreml/builders/model_builder.cc"
+
+if(_enable_ML_PROGRAM)
+  # Add helpers to create mlpackage weights. limit to just the files we need to minimize the changes to make them
+  # build on Windows and Linux.
+  file(GLOB
+    onnxruntime_providers_coreml_milblob_cc_srcs CONFIGURE_DEPENDS
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/*.hpp"
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/*.cpp"
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Util/*.hpp"
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/BlobDataType.hpp"
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/StorageFormat.hpp"
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/FileWriter.?pp"
+    "${coremltools_SOURCE_DIR}/mlmodel/src/MILBlob/Blob/StorageWriter.?pp"
+  )
+
+  # Add helpers to create mlpackage
+  file(GLOB
+    onnxruntime_providers_coreml_modelpackage_cc_srcs CONFIGURE_DEPENDS
+    "${coremltools_SOURCE_DIR}/modelpackage/src/ModelPackage.?pp"
+    "${coremltools_SOURCE_DIR}/modelpackage/src/Utils/JsonMap.?pp"
   )
+
+  set(coremltools_srcs
+    ${onnxruntime_providers_coreml_milblob_cc_srcs}
+    ${onnxruntime_providers_coreml_modelpackage_cc_srcs}
+  )
+
+  source_group(TREE ${coremltools_SOURCE_DIR} PREFIX coremltools FILES ${coremltools_srcs})
 endif()
 
 # Add CoreML objective c++ source code
-if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+if (APPLE)
   file(GLOB
     onnxruntime_providers_coreml_objcc_srcs CONFIGURE_DEPENDS
     "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model.h"
@@ -83,26 +131,79 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
     "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.h"
     "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.mm"
   )
+else()
+  # add the Model implementation that uses the protobuf types but excludes any actual CoreML dependencies
+  # by using stub implementations on non-Apple platforms.
+  file(GLOB
+    onnxruntime_providers_coreml_objcc_srcs CONFIGURE_DEPENDS
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/host_utils_stub.cc"
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model.h"
+    "${ONNXRUNTIME_ROOT}/core/providers/coreml/model/model_stub.cc"
+  )
 endif()
 
 set(onnxruntime_providers_coreml_cc_srcs
   ${onnxruntime_providers_coreml_cc_srcs_top}
   ${onnxruntime_providers_coreml_cc_srcs_nested}
   ${onnxruntime_providers_shared_utils_cc_srcs}
+  ${onnxruntime_providers_coreml_objcc_srcs}
 )
 
-source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_coreml_cc_srcs})
+source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_providers_coreml_cc_srcs})
+source_group(TREE ${ONNXRUNTIME_INCLUDE_DIR} FILES ${onnxruntime_providers_coreml_public_headers})
+
 onnxruntime_add_static_library(onnxruntime_providers_coreml
-  ${onnxruntime_providers_coreml_cc_srcs} ${onnxruntime_providers_coreml_objcc_srcs}
+  ${onnxruntime_providers_coreml_public_headers}
+  ${onnxruntime_providers_coreml_cc_srcs}
+  ${coremltools_srcs}
 )
+
 onnxruntime_add_include_to_target(onnxruntime_providers_coreml
-  onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB}  flatbuffers::flatbuffers Boost::mp11 safeint_interface
+  onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11
+  safeint_interface
 )
-if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-  onnxruntime_add_include_to_target(onnxruntime_providers_coreml coreml_proto)
-  target_link_libraries(onnxruntime_providers_coreml PRIVATE coreml_proto "-framework Foundation" "-framework CoreML")
-  add_dependencies(onnxruntime_providers_coreml coreml_proto)
+
+onnxruntime_add_include_to_target(onnxruntime_providers_coreml coreml_proto)
+target_link_libraries(onnxruntime_providers_coreml PRIVATE coreml_proto)
+add_dependencies(onnxruntime_providers_coreml coreml_proto)
+
+if (APPLE)
+  target_compile_definitions(onnxruntime_providers_coreml PRIVATE __APPLE__)
 endif()
+
+if (_enable_ML_PROGRAM)
+  # Setup coremltools fp16 and json dependencies for creating an mlpackage.
+  #
+  # These are also used by external/xnnpack.cmake. fp16 depends on psimd
+  FetchContent_Declare(psimd URL ${DEP_URL_psimd} URL_HASH SHA1=${DEP_SHA1_psimd})
+  onnxruntime_fetchcontent_makeavailable(psimd)
+  set(PSIMD_SOURCE_DIR ${psimd_SOURCE_DIR})
+  FetchContent_Declare(fp16 URL ${DEP_URL_fp16} URL_HASH SHA1=${DEP_SHA1_fp16})
+  set(FP16_BUILD_TESTS OFF CACHE INTERNAL "")
+  set(FP16_BUILD_BENCHMARKS OFF CACHE INTERNAL "")
+  onnxruntime_fetchcontent_makeavailable(fp16)
+
+  # need to tweak the include paths to match what the coreml source code expects
+  target_include_directories(onnxruntime_providers_coreml PRIVATE
+                            ${fp16_SOURCE_DIR}/include
+                            ${nlohmann_json_SOURCE_DIR}/single_include/nlohmann
+                            ${coremltools_SOURCE_DIR}
+                            ${coremltools_SOURCE_DIR}/mlmodel/src/
+                            ${coremltools_SOURCE_DIR}/modelpackage/src/
+  )
+
+  add_dependencies(onnxruntime_providers_coreml nlohmann_json::nlohmann_json fp16)
+
+  if (LINUX)
+    target_link_libraries(onnxruntime_providers_coreml PRIVATE uuid)
+  endif()
+endif()
+
+if (APPLE)
+  target_link_libraries(onnxruntime_providers_coreml PRIVATE "-framework Foundation" "-framework CoreML")
+endif()
+
 add_dependencies(onnxruntime_providers_coreml ${onnxruntime_EXTERNAL_DEPENDENCIES})
 
 set_target_properties(onnxruntime_providers_coreml PROPERTIES CXX_STANDARD_REQUIRED ON)
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 308caad296831..3ed695327c183 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -567,11 +567,7 @@ if(onnxruntime_USE_ROCM)
 endif()
 
 if(onnxruntime_USE_COREML)
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml coreml_proto)
-  else()
-    list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml)
-  endif()
+  list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml coreml_proto)
 endif()
 
 if(onnxruntime_USE_ACL)
@@ -676,15 +672,9 @@ endif()
 
 if(onnxruntime_USE_COREML)
   list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/coreml/*)
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
-    list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml coreml_proto)
-    list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml coreml_proto)
-    list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml coreml_proto)
-  else()
-    list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml)
-    list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml)
-    list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml)
-  endif()
+  list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml coreml_proto)
+  list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml coreml_proto)
+  list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml coreml_proto)
 endif()
 
 if(onnxruntime_USE_XNNPACK)
diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h
index 03715eb5b78b2..55abb90b981f5 100644
--- a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h
+++ b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h
@@ -28,9 +28,12 @@ enum COREMLFlags {
   // dynamic shapes. However, the performance may be negatively impacted if inputs have dynamic shapes.
   COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008,
 
+  // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
+  COREML_FLAG_CREATE_MLPROGRAM = 0x010,
+
   // Keep COREML_FLAG_LAST at the end of the enum definition
   // And assign the last COREMLFlag to it
-  COREML_FLAG_LAST = COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES,
+  COREML_FLAG_LAST = COREML_FLAG_CREATE_MLPROGRAM,
 };
 
 #ifdef __cplusplus
diff --git a/objectivec/include/ort_coreml_execution_provider.h b/objectivec/include/ort_coreml_execution_provider.h
index a015b6fd60c8f..6ff18176ebeb2 100644
--- a/objectivec/include/ort_coreml_execution_provider.h
+++ b/objectivec/include/ort_coreml_execution_provider.h
@@ -41,6 +41,17 @@ NS_ASSUME_NONNULL_BEGIN
  */
 @property BOOL onlyEnableForDevicesWithANE;
 
+/**
+ * Only allow CoreML EP to take nodes with inputs with static shapes. By default it will also allow inputs with
+ * dynamic shapes. However, the performance may be negatively impacted if inputs have dynamic shapes.
+ */
+@property BOOL onlyAllowStaticInputShapes;
+
+/**
+ * Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
+ */
+@property BOOL createMLProgram;
+
 @end
 
 @interface ORTSessionOptions (ORTSessionOptionsCoreMLEP)
diff --git a/objectivec/ort_coreml_execution_provider.mm b/objectivec/ort_coreml_execution_provider.mm
index 6340fdea1c3a7..58b47d68eea63 100644
--- a/objectivec/ort_coreml_execution_provider.mm
+++ b/objectivec/ort_coreml_execution_provider.mm
@@ -26,7 +26,10 @@ - (BOOL)appendCoreMLExecutionProviderWithOptions:(ORTCoreMLExecutionProviderOpti
     const uint32_t flags =
         (options.useCPUOnly ? COREML_FLAG_USE_CPU_ONLY : 0) |
         (options.enableOnSubgraphs ? COREML_FLAG_ENABLE_ON_SUBGRAPH : 0) |
-        (options.onlyEnableForDevicesWithANE ? COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE : 0);
+        (options.onlyEnableForDevicesWithANE ? COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE : 0) |
+        (options.onlyAllowStaticInputShapes ? COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES : 0) |
+        (options.createMLProgram ? COREML_FLAG_CREATE_MLPROGRAM : 0);
+
     Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(
         [self CXXAPIOrtSessionOptions], flags));
     return YES;
diff --git a/onnxruntime/core/providers/coreml/builders/coreml_spec.h b/onnxruntime/core/providers/coreml/builders/coreml_spec.h
index e9cd4af94e5fd..c9adba9e579d0 100644
--- a/onnxruntime/core/providers/coreml/builders/coreml_spec.h
+++ b/onnxruntime/core/providers/coreml/builders/coreml_spec.h
@@ -3,12 +3,28 @@
 
 #pragma once
 
-// TODO come up with a more intuitive way of limiting this to Apple platform builds
-// E.g., putting CoreML EP files that should be enabled iff `defined(__APPLE__)` in a separate directory.
-#if !defined(__APPLE__)
-#error "This file should only be included when building on Apple platforms."
+#include "onnxruntime_config.h"
+
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+
+// Disable warning from protobuf code.
+//
+// In file included from coreml_proto/Model.pb.h:30:
+// In file included from _deps/protobuf-src/src/google/protobuf/extension_set.h:53:
+// _deps/protobuf-src/src/google/protobuf/parse_context.h:328:47:
+//     error: implicit conversion loses integer precision: 'long' to 'int' [-Werror,-Wshorten-64-to-32]
+#ifdef HAS_SHORTEN_64_TO_32
+#pragma GCC diagnostic ignored "-Wshorten-64-to-32"
+#endif
 #endif
 
+// Model.pb.h is generated in the build output directory from the CoreML protobuf files in
+// onnxruntime/core/providers/coreml/coremltools/mlmodel/format
 #include "coreml_proto/Model.pb.h"
 
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
 namespace COREML_SPEC = CoreML::Specification;
diff --git a/onnxruntime/core/providers/coreml/builders/helper.cc b/onnxruntime/core/providers/coreml/builders/helper.cc
index 897856256cc79..bc3ba4432e66d 100644
--- a/onnxruntime/core/providers/coreml/builders/helper.cc
+++ b/onnxruntime/core/providers/coreml/builders/helper.cc
@@ -22,22 +22,35 @@
 namespace onnxruntime {
 namespace coreml {
 
-OpBuilderInputParams MakeOpBuilderParams(const GraphViewer& graph_viewer, uint32_t coreml_flags) {
+OpBuilderInputParams MakeOpBuilderParams(const GraphViewer& graph_viewer,
+                                         int32_t coreml_version,
+                                         uint32_t coreml_flags) {
   return OpBuilderInputParams{graph_viewer,
-                              (coreml_flags & COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES) != 0};
+                              coreml_version,
+                              (coreml_flags & COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES) != 0,
+                              (coreml_flags & COREML_FLAG_CREATE_MLPROGRAM) != 0};
 }
 
-bool IsNodeSupported(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) {
+const IOpBuilder* GetOpBuilder(const Node& node) {
   const auto& op_builders = GetOpBuilders();
-  if (Contains(op_builders, node.OpType())) {
-    const auto* op_builder = op_builders.at(node.OpType());
+  const auto it = op_builders.find(node.OpType());
+  if (it != op_builders.cend()) {
+    return it->second;
+  }
+
+  return nullptr;
+}
+
+bool IsNodeSupported(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) {
+  const auto* op_builder = GetOpBuilder(node);
+  if (op_builder) {
     return op_builder->IsOpSupported(node, input_params, logger);
   } else {
     return false;
   }
 }
 
-bool IsInputSupported(const NodeArg& input, const std::string& parent_name,
+bool IsInputSupported(const Node& node, const NodeArg& input,
                       const OpBuilderInputParams& input_params, const logging::Logger& logger) {
   if (!input.Exists()) {
     // optional input that is not provided
@@ -48,8 +61,8 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name,
   std::vector<int64_t> shape;
   // We do not support input with no shape
   if (!GetShape(input, shape, logger)) {
-    LOGS(logger, VERBOSE) << "Input [" << input_name << "] of [" << parent_name
-                          << "] has no shape";
+    LOGS(logger, VERBOSE) << MakeString("Input [", input_name, "] of Node [", node.Name(), "] type [", node.OpType(),
+                                        "] has no shape");
     return false;
   }
 
@@ -63,11 +76,19 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name,
     // For some undocumented reason, Apple CoreML framework will fail loading the model if the model
     // input has dimension > 16384
     // See this issue, https://github.com/apple/coremltools/issues/1003
+    // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf has maximum texture widths which may be the
+    // root cause.
     if (dim > 16384) {
       LOGS(logger, WARNING) << "CoreML does not support input dim > 16384. Input:" << input_name
                             << ", shape: " << Shape2String(shape);
       return false;
     }
+
+    if (dim == 0) {
+      LOGS(logger, WARNING) << "CoreML does not support shapes with dimension values of 0. Input:" << input_name
+                            << ", shape: " << Shape2String(shape);
+      return false;
+    }
   }
 
   // Limit input shape rank to 5.
@@ -87,13 +108,6 @@ std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewe
                                                   const logging::Logger& logger) {
   std::unordered_set<const Node*> supported_nodes{};
 
-#ifdef __APPLE__
-  if (!util::HasRequiredBaseOS()) {
-    LOGS(logger, WARNING) << "All ops will fallback to CPU EP, because we do not have supported OS";
-    return supported_nodes;
-  }
-#endif
-
   for (const auto& node : graph_viewer.Nodes()) {
     const bool supported = IsNodeSupported(node, input_params, logger);
     LOGS(logger, VERBOSE) << "Operator type: [" << node.OpType()
@@ -149,7 +163,9 @@ bool HasNeuralEngine(const logging::Logger& logger) {
 #else
   // In this case, we are running the EP on non-apple platform, which means we are running the model
   // conversion with CoreML EP enabled, for this we always assume the target system has Neural Engine
-  LOGS(logger, VERBOSE) << "HasNeuralEngine running on non-Apple hardware for model conversion only";
+  LOGS(logger, INFO) << "HasNeuralEngine running on non-Apple hardware. "
+                        "Returning true to enable model conversion and local testing of CoreML EP implementation. "
+                        "No CoreML model will be compiled or run.";
   has_neural_engine = true;
 #endif  // #ifdef __APPLE__
 
diff --git a/onnxruntime/core/providers/coreml/builders/helper.h b/onnxruntime/core/providers/coreml/builders/helper.h
index d8b27ac76ae73..300de2dedd122 100644
--- a/onnxruntime/core/providers/coreml/builders/helper.h
+++ b/onnxruntime/core/providers/coreml/builders/helper.h
@@ -23,10 +23,14 @@ class Logger;
 
 namespace coreml {
 
-OpBuilderInputParams MakeOpBuilderParams(const GraphViewer& graph_viewer, uint32_t coreml_flags);
+OpBuilderInputParams MakeOpBuilderParams(const GraphViewer& graph_viewer,
+                                         int32_t coreml_version,
+                                         uint32_t coreml_flags);
 
-bool IsInputSupported(const NodeArg& node_arg, const std::string& parent_name,
-                      const OpBuilderInputParams& input_params, const logging::Logger& logger);
+const IOpBuilder* GetOpBuilder(const Node& node);
+
+bool IsInputSupported(const Node& node, const NodeArg& node_arg, const OpBuilderInputParams& input_params,
+                      const logging::Logger& logger);
 
 bool IsNodeSupported(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger);
 
diff --git a/onnxruntime/core/providers/coreml/builders/impl/LRN_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/LRN_op_builder.cc
index 53f18b205880c..e9e520156576e 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/LRN_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/LRN_op_builder.cc
@@ -3,39 +3,26 @@
 
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class LRNOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
-
 Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                            const Node& node,
-                                           const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+                                           const logging::Logger& /*logger*/) const {
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   auto* coreml_lrn = layer->mutable_lrn();
 
@@ -56,9 +43,6 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool LRNOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                      const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc
index 88d6616b4e097..dee87ce3632a8 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/activation_op_builder.cc
@@ -2,44 +2,32 @@
 // Licensed under the MIT License.
 
 #include "core/common/narrow.h"
+#include "core/framework/tensorprotoutils.h"
 #include "core/optimizer/initializer.h"
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/framework/tensorprotoutils.h"
-#include "core/providers/coreml/builders/impl/builder_utils.h"
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class ActivationOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
+
   int GetMinSupportedOpSet(const Node& node) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 void ActivationOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& op_type = node.OpType();
   const auto& input_defs = node.InputDefs();
@@ -86,7 +74,7 @@ Status AddPReluWeight(ModelBuilder& model_builder, const Node& node,
 Status ActivationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                   const Node& node,
                                                   const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   const auto& op_type(node.OpType());
   if (op_type == "Sigmoid") {
@@ -115,14 +103,10 @@ Status ActivationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 namespace {
 // assumes that node.OpType() == "PRelu"
-bool IsPReluOpSupported(const Node& node, const OpBuilderInputParams& input_params,
-                        const logging::Logger& logger) {
+bool IsPReluOpSupported(const Node& node, const OpBuilderInputParams& input_params, const logging::Logger& logger) {
   const auto& input_defs = node.InputDefs();
 
   // X input rank must be 3 or 4
diff --git a/onnxruntime/core/providers/coreml/builders/impl/argmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/argmax_op_builder.cc
index 7a5d4a5af673b..e9a8176c8349b 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/argmax_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/argmax_op_builder.cc
@@ -1,37 +1,26 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/shared/utils/utils.h"
-#ifdef __APPLE__
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/op_builder_factory.h"
-
-#include "base_op_builder.h"
+#include "core/providers/shared/utils/utils.h"
 
 namespace onnxruntime {
 namespace coreml {
 
 class ArgMaxOpBuilder : public BaseOpBuilder {
-  // Add operator related
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 Status ArgMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                               const Node& node,
                                               const logging::Logger& /* logger */) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
   const auto& graph_viewer = model_builder.GetGraphViewer();
 
   NodeAttrHelper helper(node);
@@ -67,9 +56,6 @@ Status ArgMaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool ArgMaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                         const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
index 25d5bad14ceb6..2570e6d88ae0d 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.cc
@@ -1,21 +1,18 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/coreml/builders/impl/base_op_builder.h"
-
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
+using namespace CoreML::Specification;
 
 namespace onnxruntime {
 namespace coreml {
 
-// Shared functions
-
+namespace {
 // TODO, move this to shared_library
 bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node& node,
                             const logging::Logger& logger) {
@@ -37,93 +34,78 @@ bool HasExternalInitializer(const InitializedTensorSet& initializers, const Node
 
   return false;
 }
+}  // namespace
 
-// Add operator related
-#ifdef __APPLE__
 Status BaseOpBuilder::AddToModelBuilder(ModelBuilder& model_builder, const Node& node,
-                                        const OpBuilderInputParams& input_params,
                                         const logging::Logger& logger) const {
-  ORT_RETURN_IF_NOT(
-      IsOpSupported(node, input_params, logger),
-      "Unsupported operator ",
-      node.OpType());
-
-  ORT_RETURN_IF_ERROR(AddToModelBuilderImpl(model_builder, node, logger));
-  LOGS(logger, VERBOSE) << "Operator name: [" << node.Name()
-                        << "] type: [" << node.OpType() << "] was added";
-  return Status::OK();
-}
+  Status status = AddToModelBuilderImpl(model_builder, node, logger);
 
-/* static */ std::unique_ptr<COREML_SPEC::NeuralNetworkLayer>
-BaseOpBuilder::CreateNNLayer(ModelBuilder& model_builder, const Node& node) {
-  auto layer_name = node.Name();
-  if (layer_name.empty()) {
-    // CoreML requires layer has a name, while the node name is optional in ONNX
-    // In this case, create a unique name for the layer
-    layer_name = model_builder.GetUniqueName(MakeString("Node_", node.Index(), "_type_", node.OpType()));
+  if (status.IsOK()) {
+    LOGS(logger, VERBOSE) << "Operator name: [" << node.Name() << "] type: [" << node.OpType() << "] was added";
   }
-  return CreateNNLayer(layer_name);
-}
 
-/* static */ std::unique_ptr<COREML_SPEC::NeuralNetworkLayer>
-BaseOpBuilder::CreateNNLayer(const std::string& layer_name) {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = std::make_unique<COREML_SPEC::NeuralNetworkLayer>();
-  layer->set_name(layer_name);
-  return layer;
+  return status;
 }
-#endif
-
-// Operator support related
 
 bool BaseOpBuilder::IsOpSupported(const Node& node, const OpBuilderInputParams& input_params,
                                   const logging::Logger& logger) const {
-  if (!HasSupportedInputs(node, input_params, logger))
+  if (input_params.create_mlprogram && !SupportsMLProgram()) {
+    LOGS(logger, VERBOSE) << "Operator [" << node.OpType() << "] does not support MLProgram";
     return false;
+  }
 
-  // We do not support external initializers for now
-  const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors();
-  if (HasExternalInitializer(initializers, node, logger))
+  if (!HasSupportedOpSet(node, logger)) {
+    return false;
+  }
+
+  if (!HasSupportedInputs(node, input_params, logger)) {
     return false;
+  }
 
-  if (!HasSupportedOpSet(node, logger))
+  // We do not support external initializers for now
+  const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors();
+  if (HasExternalInitializer(initializers, node, logger)) {
     return false;
+  }
 
   return IsOpSupportedImpl(node, input_params, logger);
 }
 
 bool BaseOpBuilder::HasSupportedInputs(const Node& node, const OpBuilderInputParams& input_params,
                                        const logging::Logger& logger) const {
-  const auto node_name = MakeString("Node [", node.Name(), "] type [", node.OpType(), "]");
   for (const auto* input : node.InputDefs()) {
-    if (!IsInputSupported(*input, node_name, input_params, logger)) {
+    if (!IsInputSupported(node, *input, input_params, logger)) {
       return false;
     }
   }
 
-  return HasSupportedInputsImpl(node, logger);
+  return HasSupportedInputsImpl(node, input_params, logger);
 }
 
-bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const {
-  // We only check the type of input 0 by default
-  // specific op builder can override this
+/* static */
+bool BaseOpBuilder::IsInput0Supported(const Node& node, const OpBuilderInputParams& /*input_params*/,
+                                      const logging::Logger& logger) {
   const auto& input = *node.InputDefs()[0];
 
-  int32_t input_type;
-  if (!GetType(input, input_type, logger))
-    return false;
+  int32_t input_type = ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
 
-  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
-    LOGS(logger, VERBOSE) << "[" << node.OpType()
-                          << "] Input type: [" << input_type
-                          << "] is not supported for now";
+  // currently only float is supported
+  if (!GetType(input, input_type, logger) || input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+    LOGS(logger, VERBOSE) << "[" << node.OpType() << "] Input type: [" << input_type << "] is not currently supported";
     return false;
   }
 
   return true;
 }
 
-bool BaseOpBuilder::HasSupportedOpSet(const Node& node,
-                                      const logging::Logger& logger) const {
+bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                                           const logging::Logger& logger) const {
+  // We only check the type of input 0 by default
+  // specific op builder can override this
+  return IsInput0Supported(node, input_params, logger);
+}
+
+bool BaseOpBuilder::HasSupportedOpSet(const Node& node, const logging::Logger& logger) const {
   auto since_version = node.SinceVersion();
   if (since_version < GetMinSupportedOpSet(node) || since_version > GetMaxSupportedOpSet(node)) {
     LOGS(logger, VERBOSE) << node.OpType() << "is only supported for opset ["
diff --git a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
index b4132d3b770ec..06c4dd94ea30d 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/impl/base_op_builder.h
@@ -3,11 +3,9 @@
 
 #pragma once
 
-#include "core/providers/coreml/builders/op_builder.h"
-
-#ifdef __APPLE__
+#include "core/common/span_utils.h"
 #include "core/providers/coreml/builders/coreml_spec.h"
-#endif
+#include "core/providers/coreml/builders/op_builder.h"
 
 namespace onnxruntime {
 namespace coreml {
@@ -18,45 +16,40 @@ class BaseOpBuilder : public IOpBuilder {
  public:
   virtual ~BaseOpBuilder() = default;
 
-  // Add operator related
+  // does the operator implementation support creating an ML Program
+  bool SupportsMLProgram() const override { return false; }
+
+  bool IsOpSupported(const Node& node, const OpBuilderInputParams& input_params,
+                     const logging::Logger& logger) const override final;
 
-#ifdef __APPLE__
- public:
-  virtual void AddInitializersToSkip(ModelBuilder& /* model_builder */, const Node& /* node */) const override {}
   Status AddToModelBuilder(ModelBuilder& model_builder, const Node& node,
-                           const OpBuilderInputParams& input_params,
                            const logging::Logger& logger) const override final;
 
- protected:
-  virtual Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
-                                       const logging::Logger& logger) const = 0;
-
-  static std::unique_ptr<COREML_SPEC::NeuralNetworkLayer>
-  CreateNNLayer(ModelBuilder& model_builder, const Node& node);
-
-  static std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> CreateNNLayer(const std::string& layer_name);
-#endif
-
-  // Operator support related
- public:
-  bool IsOpSupported(const Node& node, const OpBuilderInputParams& input_params,
-                     const logging::Logger& logger) const override final;
+  void AddInitializersToSkip(ModelBuilder& /*model_builder*/, const Node& /*node*/) const override {}
 
  protected:
-  virtual bool IsOpSupportedImpl(const Node& /* node */, const OpBuilderInputParams& /* input_params */,
-                                 const logging::Logger& /* logger */) const {
+  // check if the first input's data type is supported.
+  static bool IsInput0Supported(const Node& node, const OpBuilderInputParams& input_params,
+                                const logging::Logger& logger);
+
+ private:
+  virtual bool IsOpSupportedImpl(const Node& /*node*/, const OpBuilderInputParams& /*input_params*/,
+                                 const logging::Logger& /*logger*/) const {
     return true;
   }
 
-  virtual bool HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const;
+  virtual bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                                      const logging::Logger& logger) const;
 
-  virtual int GetMinSupportedOpSet(const Node& /* node */) const { return 1; }
-  virtual int GetMaxSupportedOpSet(const Node& /* node */) const { return 20; }
+  virtual int GetMinSupportedOpSet(const Node& /*node*/) const { return 1; }
+  virtual int GetMaxSupportedOpSet(const Node& /*node*/) const { return 20; }
 
- private:
   bool HasSupportedOpSet(const Node& node, const logging::Logger& logger) const;
   bool HasSupportedInputs(const Node& node, const OpBuilderInputParams& input_params,
                           const logging::Logger& logger) const;
+
+  virtual Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
+                                       const logging::Logger& logger) const = 0;
 };
 
 }  // namespace coreml
diff --git a/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc
index 391b02eaec497..8da58f659acf1 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/batch_norm_op_builder.cc
@@ -5,30 +5,20 @@
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/impl/builder_utils.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class BatchNormalizationOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 
@@ -36,9 +26,6 @@ class BatchNormalizationOpBuilder : public BaseOpBuilder {
   int GetMinSupportedOpSet(const Node& /* node */) const override { return 7; }
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 void BatchNormalizationOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   // skip everything except input0 for BatchNormalization
   const auto& input_defs = node.InputDefs();
@@ -48,10 +35,9 @@ void BatchNormalizationOpBuilder::AddInitializersToSkip(ModelBuilder& model_buil
   model_builder.AddInitializerToSkip(input_defs[4]->Name());  // var
 }
 
-Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
-                                                          const Node& node,
+Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                                           const logging::Logger& /* logger */) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   const auto& input_defs = node.InputDefs();
   const auto& initializers(model_builder.GetInitializerTensors());
@@ -81,9 +67,6 @@ Status BatchNormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_bu
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool BatchNormalizationOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                                     const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
index 10c9b32d03f37..6074fba1433d9 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/binary_op_builder.cc
@@ -1,35 +1,28 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/framework/tensorprotoutils.h"
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"
-#ifdef __APPLE__
-#include "core/framework/tensorprotoutils.h"
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
-#include "base_op_builder.h"
 
 namespace onnxruntime {
 namespace coreml {
-
 class BinaryOpBuilder : public BaseOpBuilder {
-  // Add operator related
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
-  // Operator support related
+
   int GetMinSupportedOpSet(const Node& node) const override;
 
-  bool HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const override;
+  bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                              const logging::Logger& logger) const override;
 };
 
-#ifdef __APPLE__
-static bool CheckIfBothInputShapesMatch(const Node& node, const logging::Logger& logger) {
+namespace {
+bool CheckIfBothInputShapesMatch(const Node& node, const logging::Logger& logger) {
   const auto& input_defs = node.InputDefs();
 
   const auto* x_shape_proto = input_defs[0]->Shape();
@@ -57,15 +50,14 @@ static bool CheckIfBothInputShapesMatch(const Node& node, const logging::Logger&
                     y_shape_proto->dim().begin(), y_shape_proto->dim().end(),
                     dim_eq);
 }
-
-// Add operator related
+}  // namespace
 
 Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                               const logging::Logger& logger) const {
   const auto& op_type(node.OpType());
   const auto& input_defs(node.InputDefs());
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   if (op_type == "Add") {
     // original mutable_add() has limited broadcasting support
@@ -99,31 +91,28 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 int BinaryOpBuilder::GetMinSupportedOpSet(const Node& /* node */) const {
   // Add/Sub/Mul/Div opset 6- has broadcast attributes we do not support now
   return 7;
 }
 
-bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const {
-  bool is_pow = node.OpType() == "Pow";
-  if (!is_pow) {
-    return BaseOpBuilder::HasSupportedInputsImpl(node, logger);
+bool BinaryOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                                             const logging::Logger& logger) const {
+  if (node.OpType() != "Pow") {
+    return IsInput0Supported(node, input_params, logger);
   }
 
   const auto& input_1 = *node.InputDefs()[0];
   const auto& input_2 = *node.InputDefs()[1];
+
   // Pow we only support both inputs as fp32 for now
   int32_t input_type_1;
-  if (!GetType(input_1, input_type_1, logger))
-    return false;
-
   int32_t input_type_2;
-  if (!GetType(input_2, input_type_2, logger))
+  if (!GetType(input_1, input_type_1, logger) ||
+      !GetType(input_2, input_type_2, logger)) {
     return false;
+  }
 
   if (input_type_1 != ONNX_NAMESPACE::TensorProto_DataType_FLOAT || input_type_1 != input_type_2) {
     LOGS(logger, VERBOSE) << "Pow only supports fp32 inputs, actual input type"
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
index ef66e6b877a1f..710f596b2a562 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.cc
@@ -1,17 +1,16 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#ifdef __APPLE__
-
 #include "core/providers/coreml/builders/impl/builder_utils.h"
 
 #include "core/common/narrow.h"
 #include "core/framework/tensorprotoutils.h"
+#include "core/providers/coreml/builders/coreml_spec.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/shared/utils/utils.h"
 #include "core/optimizer/initializer.h"
 
-#include "coreml_proto/NeuralNetwork.pb.h"
+using namespace COREML_SPEC;
 
 namespace onnxruntime {
 namespace coreml {
@@ -133,7 +132,182 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<c
   CreateCoreMLWeightConvertingDataToFloats(weight, data);
 }
 
+//
+// ML Program Utils
+//
+
+namespace {
+void SetTensorTypeInfo(MILSpec::TensorType& tensor_type, MILSpec::DataType data_type,
+                       std::optional<gsl::span<const int64_t>> shape) {
+  tensor_type.set_datatype(data_type);
+  if (shape) {
+    tensor_type.set_rank(shape->size());
+    for (const auto& dim : *shape) {
+      if (dim >= 0) {
+        tensor_type.add_dimensions()->mutable_constant()->set_size(narrow<int32_t>(dim));
+      } else {
+        tensor_type.add_dimensions()->mutable_unknown()->set_variadic(false);
+      }
+    }
+  }
+}
+
+void SetTensorTypeInfo(MILSpec::TensorType& tensor_type, MILSpec::DataType data_type,
+                       const ONNX_NAMESPACE::TensorShapeProto* shape) {
+  tensor_type.set_datatype(data_type);
+  if (shape) {
+    tensor_type.set_rank(shape->dim_size());
+    for (const auto& dim : shape->dim()) {
+      if (dim.has_dim_value()) {
+        tensor_type.add_dimensions()->mutable_constant()->set_size(narrow<int32_t>(dim.dim_value()));
+      } else {
+        tensor_type.add_dimensions()->mutable_unknown()->set_variadic(false);
+      }
+    }
+  }
+}
+
+template <typename T1, typename T2 = T1>
+void CopyDataToTensorValue(MILSpec::TensorValue& tensor_value, gsl::span<const T1> data) {
+  // need a 'false' that is dependent on the template types to make gcc happy and give a meaningful error message.
+  static_assert(false_for_T<T1> && false_for_T<T2>, "Unsupported data type");  // add specializations below as needed
+}
+
+template <>
+void CopyDataToTensorValue<float>(MILSpec::TensorValue& tensor_value, gsl::span<const float> data) {
+  tensor_value.mutable_floats()->mutable_values()->Add(data.begin(), data.end());
+}
+
+template <>
+void CopyDataToTensorValue<int32_t>(MILSpec::TensorValue& tensor_value, gsl::span<const int32_t> data) {
+  tensor_value.mutable_ints()->mutable_values()->Add(data.begin(), data.end());
+}
+
+template <>
+void CopyDataToTensorValue<std::string>(MILSpec::TensorValue& tensor_value, gsl::span<const std::string> data) {
+  tensor_value.mutable_strings()->mutable_values()->Add(data.begin(), data.end());
+}
+
+// copy int64_t (used by ONNX for strides/indexes/etc.) to int32_t (used by CoreML)
+template <>
+void CopyDataToTensorValue<int64_t, int32_t>(MILSpec::TensorValue& tensor_value, gsl::span<const int64_t> data) {
+  auto& int32_out = *tensor_value.mutable_ints()->mutable_values();
+  int32_out.Reserve(narrow<int32_t>(data.size()));
+  for (const int64_t v : data) {
+    int32_out.AddAlreadyReserved(narrow<int32_t>(v));
+  }
+}
+
+template <>
+void CopyDataToTensorValue<bool>(MILSpec::TensorValue& tensor_value, gsl::span<const bool> data) {
+  tensor_value.mutable_bools()->mutable_values()->Add(data.begin(), data.end());
+}
+
+}  // namespace
+
+MILSpec::DataType OnnxDataTypeToMILSpec(int onnx_type) {
+  switch (static_cast<ONNX_NAMESPACE::TensorProto_DataType>(onnx_type)) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+      return MILSpec::DataType::FLOAT32;
+    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE:
+      return MILSpec::DataType::FLOAT64;
+    case ONNX_NAMESPACE::TensorProto_DataType_BFLOAT16:
+      return MILSpec::DataType::BFLOAT16;
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+      return MILSpec::DataType::FLOAT16;
+
+    case ONNX_NAMESPACE::TensorProto_DataType_INT8:
+      return MILSpec::DataType::INT8;
+    case ONNX_NAMESPACE::TensorProto_DataType_INT16:
+      return MILSpec::DataType::INT16;
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32:
+      return MILSpec::DataType::INT32;
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
+      return MILSpec::DataType::INT64;
+
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
+      return MILSpec::DataType::UINT8;
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT16:
+      return MILSpec::DataType::UINT16;
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
+      return MILSpec::DataType::UINT32;
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
+      return MILSpec::DataType::UINT64;
+
+    case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
+      return MILSpec::DataType::BOOL;
+    case ONNX_NAMESPACE::TensorProto_DataType_STRING:
+      return MILSpec::DataType::STRING;
+    default:
+      ORT_THROW("Unsupported data type: ", onnx_type);
+  }
+}
+
+template <typename T1, typename T2>
+MILSpec::Value CreateTensorValue(const gsl::span<const T1> data,
+                                 std::optional<gsl::span<const int64_t>> shape) {
+  MILSpec::Value value;
+  MILSpec::TensorType& tensor_type = *value.mutable_type()->mutable_tensortype();
+
+  if (shape) {
+    SetTensorTypeInfo(tensor_type, DataTypeToMILSpec<T2>(), *shape);
+  } else {
+    // infer as 1D shape
+    std::vector<int64_t> coreml_shape{narrow<int64_t>(data.size())};
+    SetTensorTypeInfo(tensor_type, DataTypeToMILSpec<T2>(), coreml_shape);
+  }
+
+  MILSpec::TensorValue& tensor_value = *value.mutable_immediatevalue()->mutable_tensor();
+  CopyDataToTensorValue<T1, T2>(tensor_value, data);
+
+  return value;
+}
+
+template <typename T>
+MILSpec::Value CreateScalarTensorValue(const T& data) {
+  gsl::span<const T> data_span{&data, 1};
+  std::vector<int64_t> shape = {};  // empty for scalar
+  return CreateTensorValue<T>(data_span, shape);
+}
+
+// explicit specializations for types we handle so the implementation can be in the .cc file
+template MILSpec::Value CreateTensorValue<int64_t, int32_t>(gsl::span<const int64_t> data,
+                                                            std::optional<gsl::span<const int64_t>> shape);
+
+template MILSpec::Value CreateScalarTensorValue(const float& data);
+template MILSpec::Value CreateScalarTensorValue(const int32_t& data);
+template MILSpec::Value CreateScalarTensorValue(const std::string& data);
+template MILSpec::Value CreateScalarTensorValue(const bool& data);
+
+COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& node_arg) {
+  MILSpec::NamedValueType nvt;
+  nvt.set_name(node_arg.Name());
+  MILSpec::TensorType& tensor_type = *nvt.mutable_type()->mutable_tensortype();
+
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(node_arg.TypeAsProto()->tensor_type().elem_type()),
+                    node_arg.Shape());
+
+  return nvt;
+}
+
+void AddOperationInput(MILSpec::Operation& op, std::string_view input_name, std::string_view value_name) {
+  MILSpec::Argument arg;
+  arg.mutable_arguments()->Add()->set_name(std::string(value_name));
+
+  (*op.mutable_inputs())[input_name] = std::move(arg);
+}
+
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output) {
+  auto& outputs = *op.mutable_outputs();
+  auto& output_arg = *outputs.Add();
+  output_arg.set_name(output.Name());
+
+  MILSpec::ValueType& value = *output_arg.mutable_type();
+  MILSpec::TensorType& tensor_type = *value.mutable_tensortype();
+
+  SetTensorTypeInfo(tensor_type, OnnxDataTypeToMILSpec(output.TypeAsProto()->tensor_type().elem_type()),
+                    output.Shape());
+}
+
 }  // namespace coreml
 }  // namespace onnxruntime
-
-#endif
diff --git a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
index 23b11928f7dc2..8126f0c126914 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
+++ b/onnxruntime/core/providers/coreml/builders/impl/builder_utils.h
@@ -5,22 +5,19 @@
 
 #pragma once
 
-#ifdef __APPLE__
+#include <optional>
 
 #include "core/common/gsl.h"
 #include "core/common/status.h"
 #include "core/graph/basic_types.h"
 #include "core/providers/common.h"
 
-namespace CoreML {
-namespace Specification {
-class WeightParams;
-}
-}  // namespace CoreML
+#include "core/providers/coreml/builders/coreml_spec.h"
 
 namespace onnxruntime {
-namespace coreml {
+class NodeArg;
 
+namespace coreml {
 // Try to see if we can map explicit padding to auto padding for Conv/Pool
 // Since usually use auto padding is more efficient
 Status HandleAutoPad(const std::vector<int64_t> input_shape,
@@ -32,6 +29,10 @@ Status HandleAutoPad(const std::vector<int64_t> input_shape,
                      AutoPadType auto_pad_type,
                      AutoPadType& auto_pad_type_out);
 
+//
+// NeuralNetwork utils
+//
+
 // Copy an onnx initializer data to a coreml weight
 Status CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, const ONNX_NAMESPACE::TensorProto& tensor);
 
@@ -44,7 +45,90 @@ void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<c
 // Copy the int64_t array to a coreml weight
 void CreateCoreMLWeight(CoreML::Specification::WeightParams& weight, gsl::span<const int64_t> data);
 
+//
+// MLProgram utils
+//
+
+// helper for static_assert where the value needs to be dependent on a template parameter
+template <typename>
+constexpr bool false_for_T = false;
+
+template <typename T>
+COREML_SPEC::MILSpec::DataType DataTypeToMILSpec() {
+  if constexpr (std::is_same_v<T, float>) {
+    return COREML_SPEC::MILSpec::DataType::FLOAT32;
+  } else if constexpr (std::is_same_v<T, double>) {
+    return COREML_SPEC::MILSpec::DataType::FLOAT64;
+  } else if constexpr (std::is_same_v<T, BFloat16>) {
+    return COREML_SPEC::MILSpec::DataType::BFLOAT16;
+  } else if constexpr (std::is_same_v<T, MLFloat16>) {
+    return COREML_SPEC::MILSpec::DataType::FLOAT16;
+
+  } else if constexpr (std::is_same_v<T, int8_t>) {
+    return COREML_SPEC::MILSpec::DataType::INT8;
+  } else if constexpr (std::is_same_v<T, int16_t>) {
+    return COREML_SPEC::MILSpec::DataType::INT16;
+  } else if constexpr (std::is_same_v<T, int32_t>) {
+    return COREML_SPEC::MILSpec::DataType::INT32;
+  } else if constexpr (std::is_same_v<T, int64_t>) {
+    return COREML_SPEC::MILSpec::DataType::INT64;
+
+  } else if constexpr (std::is_same_v<T, uint8_t>) {
+    return COREML_SPEC::MILSpec::DataType::UINT8;
+  } else if constexpr (std::is_same_v<T, uint16_t>) {
+    return COREML_SPEC::MILSpec::DataType::UINT16;
+  } else if constexpr (std::is_same_v<T, uint32_t>) {
+    return COREML_SPEC::MILSpec::DataType::UINT32;
+  } else if constexpr (std::is_same_v<T, uint64_t>) {
+    return COREML_SPEC::MILSpec::DataType::UINT64;
+
+  } else if constexpr (std::is_same_v<T, bool>) {
+    return COREML_SPEC::MILSpec::DataType::BOOL;
+  } else if constexpr (std::is_same_v<T, std::string>) {
+    return COREML_SPEC::MILSpec::DataType::STRING;
+  } else {
+    static_assert(false_for_T<T>, "Unsupported type.");
+  }
+}
+
+// The TensorProto.data_type field is an int, but must be a valid TensorProto_DataType value.
+// Use int for the arg so the caller can pass TensorProto.data_type() value and do the cast to enum internally
+COREML_SPEC::MILSpec::DataType OnnxDataTypeToMILSpec(int onnx_type);
+
+/// <summary>
+/// Create a CoreML MILSpec::TensorValue for the given input data.
+/// </summary>
+/// <typeparam name="T1">Original C++ data type</typeparam>
+/// <typeparam name="T2">CoreML C++ data type</typeparam>
+/// <param name="data">ONNX data</param>
+/// <param name="shape">ONNX data shape. Inferred to be a 1D shape of `{data.size()}` if not specified.</param>
+/// <returns>TensorValue containing data.</returns>
+template <typename T1, typename T2 = T1>
+COREML_SPEC::MILSpec::Value CreateTensorValue(gsl::span<const T1> data,
+                                              std::optional<gsl::span<const int64_t>> shape = std::nullopt);
+
+template <typename T>
+COREML_SPEC::MILSpec::Value CreateScalarTensorValue(const T& data);
+
+/// <summary>Create a NamedValueType from an ONNX tensor NodeArg.</summary>
+/// <remarks>Used to create inputs for the 'main' function in an ML Program.</remarks>
+COREML_SPEC::MILSpec::NamedValueType CreateNamedTensorValueType(const NodeArg& node_arg);
+
+/// <summary>
+/// Add an input argument to a MILSpec::Operation
+/// </summary>
+/// <param name="op">Operation to update.</param>
+/// <param name="input_name">The input name defined by the spec for the operation.</param>
+/// <param name="value_name">The name of the value that is providing the input.</param>
+/// <see>"https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html"</see>
+void AddOperationInput(COREML_SPEC::MILSpec::Operation& op,
+                       std::string_view input_name, std::string_view value_name);
+
+/// <summary>
+/// Add an output to a MILSpec::Operation. Name, data type and shape are used from the NodeArg.
+/// </summary>
+/// <param name="op">Operation to update.</param>
+/// <param name="output">NodeArg with details of output to add.</param>
+void AddOperationOutput(COREML_SPEC::MILSpec::Operation& op, const NodeArg& output);
 }  // namespace coreml
 }  // namespace onnxruntime
-
-#endif
diff --git a/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
index 15ee1f0fc7284..70053c2c606a0 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/cast_op_builder.cc
@@ -1,34 +1,25 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/shared/utils/utils.h"
 #include "core/providers/coreml/builders/helper.h"
-#ifdef __APPLE__
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/op_builder_factory.h"
-
-#include "base_op_builder.h"
+#include "core/providers/shared/utils/utils.h"
 
 namespace onnxruntime {
 namespace coreml {
 
 class CastOpBuilder : public BaseOpBuilder {
-  // Add operator related
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
-  // Operator support related
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
-  bool HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const override;
-};
 
-// Add operator related
+  bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                              const logging::Logger& logger) const override;
+};
 
-#ifdef __APPLE__
 Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& /* model_builder */,
                                             const Node& /* node */,
                                             const logging::Logger& /* logger */) const {
@@ -37,9 +28,6 @@ Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& /* model_builder */,
   // Cast node is not provided in CoreML model, so we're skipping adding the Cast node here.
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool CastOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                       const logging::Logger& logger) const {
@@ -84,7 +72,8 @@ bool CastOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara
   return true;
 }
 
-bool CastOpBuilder::HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const {
+bool CastOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
+                                           const logging::Logger& logger) const {
   // We only check the type of input 0
   const auto& input = *node.InputDefs()[0];
 
diff --git a/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc
index a298a8d12c741..9aca172abec98 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/clip_op_builder.cc
@@ -1,37 +1,24 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#ifdef __APPLE__
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"
 
-#include "base_op_builder.h"
-
 namespace onnxruntime {
 namespace coreml {
 
 class ClipOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 void ClipOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   // Both min and max values will be injected into the layer, no need to add to the model
   if (node.SinceVersion() >= 11) {
@@ -58,7 +45,7 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   if (!has_min && !has_max) {
     // Clip without min/max is an identity node
     // In CoreML we don't have identity, use ActivationLinear instead
-    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
     layer->mutable_activation()->mutable_linear()->set_alpha(1.0f);
     *layer->mutable_input()->Add() = input_name;
     *layer->mutable_output()->Add() = output_name;
@@ -83,8 +70,7 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
     // Handle clipping at min first
     if (has_min) {
-      const auto clip_min_layer_name = model_builder.GetUniqueName(MakeString(node_name, "_Clip_min"));
-      std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> min_layer = CreateNNLayer(clip_min_layer_name);
+      std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> min_layer = model_builder.CreateNNLayer(node, "_Clip_min");
       if (min == 0.0f) {  // If min is 0. then this min will be handled by relu
         min_layer->mutable_activation()->mutable_relu();
       } else {  // otherwise, min will be handled by unary->threshold
@@ -101,9 +87,7 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     if (has_max) {
       const auto threshold_output_name = model_builder.GetUniqueName(MakeString(node_name, "threshold_output"));
       {  // Add threshold layer, which is actually max( -1 * min_output, -max)
-        const auto clip_max_threshold_layer_name =
-            model_builder.GetUniqueName(MakeString(node_name, "_Clip_max_threshold"));
-        auto threshold_layer = CreateNNLayer(clip_max_threshold_layer_name);
+        auto threshold_layer = model_builder.CreateNNLayer(node, "_Clip_max_threshold");
         threshold_layer->mutable_unary()->set_alpha(-max);
         threshold_layer->mutable_unary()->set_scale(-1.0f);
         threshold_layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::THRESHOLD);
@@ -112,9 +96,7 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
         model_builder.AddLayer(std::move(threshold_layer));
       }
       {  // Add linear activation layer -1 * threshold_output
-        const auto clip_max_linear_layer_name =
-            model_builder.GetUniqueName(MakeString(node_name, "_Clip_max_linear"));
-        auto linear_layer = CreateNNLayer(clip_max_linear_layer_name);
+        auto linear_layer = model_builder.CreateNNLayer(node, "_Clip_max_linear");
         linear_layer->mutable_activation()->mutable_linear()->set_alpha(-1.0f);
         *linear_layer->mutable_input()->Add() = threshold_output_name;
         *linear_layer->mutable_output()->Add() = output_name;
@@ -125,9 +107,6 @@ Status ClipOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool ClipOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                       const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc
index b1e761024f5c9..34193318a0264 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/concat_op_builder.cc
@@ -4,37 +4,26 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class ConcatOpBuilder : public BaseOpBuilder {
-  // Add operator related
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                               const Node& node,
                                               const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   layer->mutable_concat()->set_sequenceconcat(false);
 
@@ -48,9 +37,7 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
 
-// Operator support related
 bool ConcatOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
                                         const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
diff --git a/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc
index ff9dcbd9f8874..05e43dbbd16af 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/conv_op_builder.cc
@@ -4,39 +4,35 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
-#include "core/providers/coreml/builders/op_builder_factory.h"
-#include "core/providers/shared/utils/utils.h"
-
-#ifdef __APPLE__
 #include "core/providers/coreml/builders/impl/builder_utils.h"
 #include "core/providers/coreml/builders/model_builder.h"
+#include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
-#endif
+#include "core/providers/shared/utils/utils.h"
+
+using namespace CoreML::Specification;
 
 namespace onnxruntime {
 namespace coreml {
 
 class ConvOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& /* node */, const OpBuilderInputParams& /* input_params */,
                          const logging::Logger& /* logger */) const override;
-};
 
-// Add operator related
+  bool SupportsMLProgram() const override { return true; }
+};
 
-#ifdef __APPLE__
 void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  if (model_builder.CreateMLProgram()) {
+    // we add the initializers as 'const' operations via ModelBuilder::RegisterInitializers
+    return;
+  }
+
   const auto& input_defs = node.InputDefs();
 
   // skip the weight and bias (if has it) for conv as we will directly set those as part of the NN layer
@@ -49,136 +45,251 @@ void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Nod
 
 Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                             const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
-
   const auto& input_defs = node.InputDefs();
   const auto& output_defs = node.OutputDefs();
   const auto& input_name = input_defs[0]->Name();
   const auto& output_name = output_defs[0]->Name();
 
-  const auto& weight_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name());
-  std::vector<int64_t> weight_shape = {weight_tensor.dims().cbegin(), weight_tensor.dims().cend()};
+  NodeAttrHelper helper(node);
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (model_builder.CreateMLProgram()) {
+    using namespace CoreML::Specification::MILSpec;
 
-  const bool is_1d_conv = (weight_shape.size() == 3);
+    // https://github.com/apple/coremltools/blob/7.1/coremltools/converters/mil/mil/ops/defs/iOS15/conv.py
 
-  if (is_1d_conv) {
-    // weight_shape needs to be expanded from MXCXH->MXCXHx1
-    weight_shape.push_back(1);
-  }
+    std::unique_ptr<Operation> conv_op = model_builder.CreateOperation(node, "conv");
 
-  NodeAttrHelper helper(node);
-  auto strides = helper.Get("strides", std::vector<int64_t>{1, 1});
-  auto dilations = helper.Get("dilations", std::vector<int64_t>{1, 1});
-  auto onnx_pads = helper.Get("pads", std::vector<int64_t>{0, 0, 0, 0});
-  // Strides/dilations for 1d conv is normally of length 1. Expand them by 1
-  // to meet the required length 2 (for 2d conv it's normally 2)
-  // Similarly 1d conv normally has a length 2 padding. Expand it to length 4 by adding additional zeros.
-  if (is_1d_conv) {
-    if (strides.size() < 2) {
-      ORT_RETURN_IF_NOT(strides.size() == 1, "strides size does not equal 1 for Conv 1d");
-      strides.push_back(1);
+    AddOperationInput(*conv_op, "x", input_name);
+    AddOperationInput(*conv_op, "weight", input_defs[1]->Name());
+
+    if (input_defs.size() > 2) {
+      AddOperationInput(*conv_op, "bias", input_defs[2]->Name());
     }
-    if (dilations.size() < 2) {
-      ORT_RETURN_IF_NOT(dilations.size() == 1, "dilations size does not equal 1 for Conv 1d");
-      dilations.push_back(1);
+
+    // ONNX attributes. Add as inputs if specified/required
+    auto strides = helper.GetInt64s("strides");
+    auto dilations = helper.GetInt64s("dilations");
+    auto groups = helper.GetInt64("group");
+
+    // we know this input has a valid shape due to the check in IsOpSupportedImpl. ignore N and C dims.
+    const auto num_spatial_dims = input_defs[1]->Shape()->dim_size() - 2;
+    const auto& op_type = conv_op->type();
+
+    if (strides) {
+      AddOperationInput(*conv_op, "strides", model_builder.AddConstant(op_type, "strides", *strides));
+    } else {
+      // spec says optional. testing suggests otherwise for at least the iOS15 target (CoreML5)
+      static const auto default_value = std::vector<int64_t>(num_spatial_dims, 1);
+      AddOperationInput(*conv_op, "strides", model_builder.AddConstant(op_type, "strides", default_value));
     }
-    if (onnx_pads.size() < 4) {
-      ORT_RETURN_IF_NOT(onnx_pads.size() == 2, "onnx_pads size does not equal 2 for Conv 1d");
-      onnx_pads.insert(onnx_pads.begin() + 1, 0);
-      onnx_pads.push_back(0);
+
+    if (dilations) {
+      AddOperationInput(*conv_op, "dilations", model_builder.AddConstant(op_type, "dilations", *dilations));
+    } else {
+      // spec says optional. testing suggests otherwise for at least the iOS15 target (CoreML5)
+      static const auto default_value = std::vector<int64_t>(num_spatial_dims, 1);
+      AddOperationInput(*conv_op, "dilations", model_builder.AddConstant(op_type, "dilations", default_value));
     }
-  }
-  const auto group = helper.Get("group", static_cast<int64_t>(1));
-
-  auto* coreml_conv = layer->mutable_convolution();
-
-  std::string expand_output_name = model_builder.GetUniqueName(node.Name() + "_expandDims");
-
-  if (is_1d_conv) {
-    const auto expand_layer_name = model_builder.GetUniqueName(MakeString(node.Name(), "_Conv_expand"));
-    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> expand_layer = CreateNNLayer(expand_layer_name);
-    // Add an expanddims layer here. CoreML only supports 2d convolution, so for 1d Conv case
-    // we need to add an additional dimension here to the input to make it "2d Conv" like.
-    // NxCxH -> NxCxHx1
-    expand_layer->mutable_expanddims()->add_axes(-1);
-    *expand_layer->mutable_input()->Add() = input_name;
-    *expand_layer->mutable_output()->Add() = expand_output_name;
-    model_builder.AddLayer(std::move(expand_layer));
-  }
-  coreml_conv->set_outputchannels(weight_shape[0]);  // M
-  coreml_conv->set_kernelchannels(weight_shape[1]);  // C/Group
-  coreml_conv->add_kernelsize(weight_shape[2]);      // H
-  coreml_conv->add_kernelsize(weight_shape[3]);      // W
-  coreml_conv->set_ngroups(group);
-  *coreml_conv->mutable_stride() = {strides.cbegin(), strides.cend()};
-  *coreml_conv->mutable_dilationfactor() = {dilations.cbegin(), dilations.cend()};
-
-  coreml_conv->set_isdeconvolution(false);
-
-  // Add Padding
-  // Usually using autopadding is more efficient than using explicit padding
-  // Try to see if we can map explicit padding to auto padding
-  std::vector<int64_t> input_shape;
-  ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape");
-  AutoPadType auto_pad_type;
-  ORT_RETURN_IF_ERROR(HandleAutoPad(input_shape, weight_shape[2], weight_shape[3],
-                                    onnx_pads, strides, dilations,
-                                    StringToAutoPadType(helper.Get("auto_pad", "NOTSET")),
-                                    auto_pad_type));
-
-  if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) {
-    auto* padding_type = coreml_conv->mutable_same();
-    if (AutoPadType::SAME_LOWER == auto_pad_type) {  // default is SAME_UPPER
-      padding_type->set_asymmetrymode(COREML_SPEC::SamePadding_SamePaddingMode_TOP_LEFT_HEAVY);
+
+    if (groups) {
+      AddOperationInput(*conv_op, "groups", model_builder.AddScalarConstant(op_type, "groups", *groups));
     }
-  } else {
-    auto* padding_type = coreml_conv->mutable_valid();
-    if (AutoPadType::NOTSET == auto_pad_type && onnx_pads != std::vector<int64_t>{0, 0, 0, 0}) {
-      // NOTSET is adding the explicit padding to the ValidPadding.paddingAmounts
-      auto* height_border = padding_type->mutable_paddingamounts()->add_borderamounts();
-      height_border->set_startedgesize(onnx_pads[0]);
-      height_border->set_endedgesize(onnx_pads[2]);
-      auto* width_border = padding_type->mutable_paddingamounts()->add_borderamounts();
-      width_border->set_startedgesize(onnx_pads[1]);
-      width_border->set_endedgesize(onnx_pads[3]);
+
+    AutoPadType auto_pad_type = StringToAutoPadType(helper.Get("auto_pad", "NOTSET"));
+
+    // pad type (string)
+    //   valid - no pads  (ONNX auto_pad VALID)
+    //   custom - pads input  (ONNX NOTSET)
+    //   same - inferred to be `d_out[i] = ceil(d_in[i] / strides[i])`  (assuming == ONNX SAME_UPPER)
+    //   same_lower - as per same but any extra rows/cols are added at top/left if padding is odd (ONNX SAME_LOWER)
+    //
+    // TODO: See if we want to update HandleAutoPad to support 1D (and 3D) so we can infer if an autopad value
+    //       can be used. TBD if that provides any performance benefit with ML Program though as CoreML could
+    //       potentially do that for us.
+    switch (auto_pad_type) {
+      case AutoPadType::NOTSET: {
+        // use `pads` attribute.
+        auto onnx_pads = helper.GetInt64s("pads");  // 'pads' must be provided if auto_pad is NOTSET
+        if (onnx_pads) {
+          AddOperationInput(*conv_op, "pad_type",
+                            model_builder.AddScalarConstant(op_type, "pad_type", std::string("custom")));
+
+          // need to re-order from x1_start, x2_start..., x1_end, x2_end... to
+          // x1_start, x1_end, x2_start, x2_end,...
+          size_t num_pads = onnx_pads->size();
+          size_t num_dims = num_pads / 2;
+          std::vector<int64_t> reordered_pads(num_pads, 0);
+          for (size_t i = 0; i < num_pads; ++i) {
+            auto cur_dim = i % num_dims;
+            if (i < num_dims) {  // start values
+              reordered_pads[cur_dim * 2] = (*onnx_pads)[i];
+            } else {  // end values
+              reordered_pads[cur_dim * 2 + 1] = (*onnx_pads)[i];
+            }
+          }
+
+          AddOperationInput(*conv_op, "pad", model_builder.AddConstant(op_type, "pad", reordered_pads));
+
+          break;
+        }
+
+        // in theory the pads may not be provided and in that case the default is no padding.
+        // as that is the same as 'valid', fall through
+        [[fallthrough]];
+      }
+      case AutoPadType::VALID:
+        AddOperationInput(*conv_op, "pad_type",
+                          model_builder.AddScalarConstant(op_type, "pad_type", std::string("valid")));
+
+        break;
+      case AutoPadType::SAME_UPPER:
+      case AutoPadType::SAME_LOWER: {
+        const auto pad_type = (auto_pad_type == AutoPadType::SAME_UPPER ? "same" : "same_lower");
+        AddOperationInput(*conv_op, "pad_type",
+                          model_builder.AddScalarConstant(op_type, "pad_type", std::string(pad_type)));
+
+        // despite what the spec says, a 'pad' input seems to be required.
+        // https://github.com/apple/coremltools/issues/2127
+        // provide the default value. passing in an empty vector also works. TBD what's better.
+        std::vector<int64_t> ignored_pads(num_spatial_dims * 2, 0);
+        AddOperationInput(*conv_op, "pad", model_builder.AddConstant(op_type, "pad", ignored_pads));
+
+        break;
+      }
     }
-  }
 
-  // Add weight
-  ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_conv->mutable_weights(), weight_tensor));
+    // set output
+    AddOperationOutput(*conv_op, *node.OutputDefs()[0]);
+
+    model_builder.AddOperation(std::move(conv_op));
+  } else
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
+  {
+    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
+
+    auto strides = helper.Get("strides", std::vector<int64_t>{1, 1});
+    auto dilations = helper.Get("dilations", std::vector<int64_t>{1, 1});
+    auto onnx_pads = helper.Get("pads", std::vector<int64_t>{0, 0, 0, 0});
+    const auto group = helper.Get("group", static_cast<int64_t>(1));
+
+    std::vector<int64_t> input_shape;
+    ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape");
+
+    const auto& weight_tensor = *model_builder.GetInitializerTensors().at(input_defs[1]->Name());
+    std::vector<int64_t> weight_shape = {weight_tensor.dims().cbegin(), weight_tensor.dims().cend()};
+
+    const bool is_1d_conv = (weight_shape.size() == 3);
+
+    // add dummy 'W' dim with value of 1 so we can use 2D conv.
+    if (is_1d_conv) {
+      input_shape.push_back(1);
+      weight_shape.push_back(1);
+
+      // Strides/dilations for 1d conv is normally of length 1. Expand them by 1
+      // to meet the required length 2 (for 2d conv it's normally 2)
+      if (strides.size() < 2) {
+        ORT_RETURN_IF_NOT(strides.size() == 1, "strides size does not equal 1 for Conv 1d");
+        strides.push_back(1);
+      }
+
+      if (dilations.size() < 2) {
+        ORT_RETURN_IF_NOT(dilations.size() == 1, "dilations size does not equal 1 for Conv 1d");
+        dilations.push_back(1);
+      }
+
+      // Similarly 1d conv normally has a length 2 padding. Expand it to length 4 by adding additional zeros.
+      if (onnx_pads.size() < 4) {
+        ORT_RETURN_IF_NOT(onnx_pads.size() == 2, "onnx_pads size does not equal 2 for Conv 1d");
+        onnx_pads.insert(onnx_pads.begin() + 1, 0);
+        onnx_pads.push_back(0);
+      }
+    }
 
-  // Add bias if present
-  if (input_defs.size() > 2) {
-    coreml_conv->set_hasbias(true);
-    const auto& bias_tensor = *model_builder.GetInitializerTensors().at(input_defs[2]->Name());
-    ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_conv->mutable_bias(), bias_tensor));
-  }
+    auto* coreml_conv = layer->mutable_convolution();
 
-  if (is_1d_conv) {
-    std::string conv_output_name = model_builder.GetUniqueName(node.Name() + "_conv_output");
-    *layer->mutable_input()->Add() = expand_output_name;
-    *layer->mutable_output()->Add() = conv_output_name;
-    model_builder.AddLayer(std::move(layer));
-
-    // Add a squeeze layer here. Since CoreML only supports 2d conv and we expanded the dimension by 1 before,
-    // we need to squeeze it back from NxCxHx1->NxCxH.
-    const auto squeeze_layer_name = model_builder.GetUniqueName(MakeString(node.Name(), "_Conv_squeeze"));
-    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> squeeze_layer = CreateNNLayer(squeeze_layer_name);
-    squeeze_layer->mutable_squeeze()->add_axes(-1);
-    *squeeze_layer->mutable_input()->Add() = conv_output_name;
-    *squeeze_layer->mutable_output()->Add() = output_name;
-    model_builder.AddLayer(std::move(squeeze_layer));
-  } else {
-    *layer->mutable_input()->Add() = input_name;
-    *layer->mutable_output()->Add() = output_name;
-    model_builder.AddLayer(std::move(layer));
+    std::string expand_output_name = model_builder.GetUniqueName(node.Name() + "_expandDims");
+
+    if (is_1d_conv) {
+      // Add an expanddims layer here. CoreML only supports 2d convolution, so for 1d Conv case
+      // we need to add an additional dimension here to the input to make it "2d Conv" like.
+      // NxCxH -> NxCxHx1
+      auto expand_layer = model_builder.CreateNNLayer(node, "_Conv_expand");
+      expand_layer->mutable_expanddims()->add_axes(-1);
+      *expand_layer->mutable_input()->Add() = input_name;
+      *expand_layer->mutable_output()->Add() = expand_output_name;
+      model_builder.AddLayer(std::move(expand_layer));
+    }
+
+    coreml_conv->set_outputchannels(weight_shape[0]);  // M
+    coreml_conv->set_kernelchannels(weight_shape[1]);  // C/Group
+    coreml_conv->add_kernelsize(weight_shape[2]);      // H
+    coreml_conv->add_kernelsize(weight_shape[3]);      // W
+    coreml_conv->set_ngroups(group);
+    *coreml_conv->mutable_stride() = {strides.cbegin(), strides.cend()};
+    *coreml_conv->mutable_dilationfactor() = {dilations.cbegin(), dilations.cend()};
+
+    coreml_conv->set_isdeconvolution(false);
+
+    // Add Padding
+    // Usually using autopadding is more efficient than using explicit padding
+    // Try to see if we can map explicit padding to auto padding
+    AutoPadType auto_pad_type;
+    ORT_RETURN_IF_ERROR(HandleAutoPad(input_shape, weight_shape[2], weight_shape[3],
+                                      onnx_pads, strides, dilations,
+                                      StringToAutoPadType(helper.Get("auto_pad", "NOTSET")),
+                                      auto_pad_type));
+
+    if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) {
+      auto* padding_type = coreml_conv->mutable_same();
+      if (AutoPadType::SAME_LOWER == auto_pad_type) {  // default is SAME_UPPER
+        padding_type->set_asymmetrymode(COREML_SPEC::SamePadding_SamePaddingMode_TOP_LEFT_HEAVY);
+      }
+    } else {
+      auto* padding_type = coreml_conv->mutable_valid();
+      if (AutoPadType::NOTSET == auto_pad_type && onnx_pads != std::vector<int64_t>{0, 0, 0, 0}) {
+        // NOTSET is adding the explicit padding to the ValidPadding.paddingAmounts
+        auto* height_border = padding_type->mutable_paddingamounts()->add_borderamounts();
+        height_border->set_startedgesize(onnx_pads[0]);
+        height_border->set_endedgesize(onnx_pads[2]);
+        auto* width_border = padding_type->mutable_paddingamounts()->add_borderamounts();
+        width_border->set_startedgesize(onnx_pads[1]);
+        width_border->set_endedgesize(onnx_pads[3]);
+      }
+    }
+
+    // Add weight
+    ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_conv->mutable_weights(), weight_tensor));
+
+    // Add bias if present
+    if (input_defs.size() > 2) {
+      coreml_conv->set_hasbias(true);
+      const auto& bias_tensor = *model_builder.GetConstantInitializer(input_defs[2]->Name());
+      ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*coreml_conv->mutable_bias(), bias_tensor));
+    }
+
+    if (is_1d_conv) {
+      std::string conv_output_name = model_builder.GetUniqueName(node.Name() + "_conv_output");
+      *layer->mutable_input()->Add() = expand_output_name;
+      *layer->mutable_output()->Add() = conv_output_name;
+      model_builder.AddLayer(std::move(layer));
+
+      // Add a squeeze layer here. Since CoreML only supports 2d conv and we expanded the dimension by 1 before,
+      // we need to squeeze it back from NxCxHx1->NxCxH.
+      auto squeeze_layer = model_builder.CreateNNLayer(node, "_Conv_squeeze");
+      squeeze_layer->mutable_squeeze()->add_axes(-1);
+      *squeeze_layer->mutable_input()->Add() = conv_output_name;
+      *squeeze_layer->mutable_output()->Add() = output_name;
+      model_builder.AddLayer(std::move(squeeze_layer));
+    } else {
+      *layer->mutable_input()->Add() = input_name;
+      *layer->mutable_output()->Add() = output_name;
+      model_builder.AddLayer(std::move(layer));
+    }
   }
 
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool ConvOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                       const logging::Logger& logger) const {
@@ -186,23 +297,73 @@ bool ConvOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara
   const auto& input_defs = node.InputDefs();
 
   const auto& weight_name = input_defs[1]->Name();
-  const auto& initializers = input_params.graph_viewer.GetAllInitializedTensors();
-  if (Contains(initializers, weight_name)) {
-    const auto& tensor = *initializers.at(weight_name);
-    if (tensor.dims().size() != 4 && tensor.dims().size() != 3) {
-      LOGS(logger, VERBOSE) << "Conv [" << name << "] dimension: " << tensor.dims().size()
-                            << " Only conv 2d and conv 1d are supported.";
+  const auto* weight = input_params.graph_viewer.GetConstantInitializer(weight_name, true);
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (input_params.create_mlprogram) {
+    // ML Program supports non-const weight, 1D, 2D and 3D.
+    // keep to 1D and 2D for consistency with the NeuralNetwork implementation for now.
+    // add 3D support as/when needed.
+  } else
+#endif  // defined (COREML_ENABLE_MLPROGRAM)
+  {
+    if (!weight) {
+      LOGS(logger, VERBOSE) << "The weight of Conv [" << name << "] must be a constant initializer";
       return false;
     }
-  } else {
-    LOGS(logger, VERBOSE) << "The weight of Conv [" << name << "] must be known";
+  }
+
+  // use the weight for the shape as it should always be known
+  const auto* weight_shape = input_defs[1]->Shape();
+  int64_t num_dims = weight_shape ? weight_shape->dim_size() : -1;
+
+  // ONNX spec requires N and C as first 2 dims
+  if (num_dims != 3 && num_dims != 4) {
+    LOGS(logger, VERBOSE) << "Conv [" << name << "] is " << num_dims - 2 << "D. "
+                          << "Only 1D and 2D Conv are supported currently.";
     return false;
   }
 
-  if (input_defs.size() > 2) {
-    const auto& bias_name = input_defs[2]->Name();
-    if (!Contains(initializers, bias_name)) {
-      LOGS(logger, VERBOSE) << "The bias of Conv [" << name << "] must be a constant initializer";
+  if (input_defs.size() > 2 && !input_params.graph_viewer.GetConstantInitializer(input_defs[2]->Name(), true)) {
+    LOGS(logger, VERBOSE) << "The bias of Conv [" << name << "] must be a constant initializer";
+    return false;
+  }
+
+  NodeAttrHelper helper(node);
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  // spec says same_lower is supported in CoreML 5. it lies. CoreML 6 is required otherwise you get
+  //   `Unexpected value for parameter pad_type[0] "same_lower" not in ("custom", "same", "valid").`
+  // We _could_ manually calculate the pads, but not implementing that until we have a real use case to justify
+  //  the effort as it's not clear how common usage of same_lower is.
+  if (input_params.create_mlprogram && input_params.coreml_version < 6) {
+    if (StringToAutoPadType(helper.Get("auto_pad", "NOTSET")) == AutoPadType::SAME_LOWER) {
+      LOGS(logger, VERBOSE) << "Pad type of SAME_LOWER  [" << name << "] is not supported until CoreML 6."
+                            << "Available version is CoreML " << input_params.coreml_version;
+      return false;
+    }
+  }
+#endif
+
+  // there's no equivalent to allow a manual kernel shape in CoreML.
+  // it's OK if a specified kernel_shape matches kH and kW dims of the weight input.
+  auto kernel_shape = helper.GetInt64s("kernel_shape");
+  if (kernel_shape) {
+    bool valid = true;
+    if (static_cast<int64_t>(kernel_shape->size()) == num_dims - 2) {
+      for (int i = 0; i < num_dims - 2; ++i) {
+        // check the specified kernel shape matches the weight shape. skip the initial N and C dims in the latter.
+        if ((*kernel_shape)[i] != weight_shape->dim()[i + 2].dim_value()) {
+          valid = false;
+          break;
+        }
+      }
+    } else {
+      valid = false;
+    }
+
+    if (!valid) {
+      LOGS(logger, VERBOSE) << "Conv [" << name << "] kernel_shape attribute does not match the weight shape";
       return false;
     }
   }
diff --git a/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc
index a4ad1c31b5027..1eba312b2577b 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/depthtospace_op_builder.cc
@@ -4,37 +4,26 @@
 #include "core/common/safeint.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class DepthToSpaceOpBuilder : public BaseOpBuilder {
-  // Add operator related
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                     const Node& node,
                                                     const logging::Logger& /* logger */) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   const auto& input_defs = node.InputDefs();
   const auto& output_defs = node.OutputDefs();
@@ -54,9 +43,6 @@ Status DepthToSpaceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool DepthToSpaceOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                               const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/flatten_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/flatten_op_builder.cc
index b303fe7884cb1..f0adb70587bcf 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/flatten_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/flatten_op_builder.cc
@@ -3,39 +3,26 @@
 
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class FlattenOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
-
 Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                const Node& node,
-                                               const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+                                               const logging::Logger& /*logger*/) const {
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   // Note: ONNX Flatten corresponds to CoreML FlattenTo2DLayerParams
   auto* coreml_flatten = layer->mutable_flattento2d();
@@ -51,9 +38,6 @@ Status FlattenOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool FlattenOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                          const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/gather_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gather_op_builder.cc
index 9c7ec306ca093..7d32675e3e510 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/gather_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/gather_op_builder.cc
@@ -2,34 +2,24 @@
 // Licensed under the MIT License.
 
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
-
 #include "core/providers/coreml/builders/op_builder_factory.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#if defined(__APPLE__)
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime::coreml {
 
 class GatherOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
-  bool HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const override;
+  bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                              const logging::Logger& logger) const override;
+
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-#if defined(__APPLE__)
 namespace {
 int64_t GetAxisAttribute(const Node& node) {
   NodeAttrHelper node_attr_helper{node};
@@ -38,8 +28,8 @@ int64_t GetAxisAttribute(const Node& node) {
 }  // namespace
 
 Status GatherOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
-                                              const logging::Logger& logger) const {
-  auto layer = CreateNNLayer(model_builder, node);
+                                              const logging::Logger& /*logger*/) const {
+  auto layer = model_builder.CreateNNLayer(node);
   layer->mutable_gather()->set_axis(GetAxisAttribute(node));
   *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();    // data
   *layer->mutable_input()->Add() = node.InputDefs()[1]->Name();    // indices
@@ -47,10 +37,9 @@ Status GatherOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif  // defined(__APPLE__)
 
-// Operator support related
-bool GatherOpBuilder::HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const {
+bool GatherOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
+                                             const logging::Logger& logger) const {
   int32_t input_type;
   if (!GetType(*node.InputDefs()[0], input_type, logger))
     return false;
diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
index 71b08db6d44d8..48f77354d7c30 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
@@ -7,38 +7,25 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/impl/builder_utils.h"
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class GemmOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& /* node */, const OpBuilderInputParams& /* input_params */,
                          const logging::Logger& /* logger */) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 void GemmOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& op = node.OpType();
   const auto& input_defs(node.InputDefs());
@@ -71,7 +58,7 @@ static Status GetTensorFloatDataTransposed(const ONNX_NAMESPACE::TensorProto& te
 
 Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                             const logging::Logger& /* logger */) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   const auto& op_type = node.OpType();
   const auto& input_defs = node.InputDefs();
@@ -120,9 +107,6 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool GemmOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                       const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/pad_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/pad_op_builder.cc
index ba12600e8bc40..99d6f01cb8c5b 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/pad_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/pad_op_builder.cc
@@ -7,30 +7,20 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class PadOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 
@@ -64,9 +54,6 @@ static InlinedVector<int64_t> GetPaddingAxesData(const InitializedTensorSet& ini
   return axes_tensor_data;
 }
 
-// Add operator related
-
-#ifdef __APPLE__
 void PadOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());  //  pads
   model_builder.AddInitializerToSkip(node.InputDefs()[2]->Name());  //  constant_value
@@ -78,7 +65,7 @@ void PadOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node
 Status PadOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                            const Node& node,
                                            const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   auto* coreml_pad = layer->mutable_padding();
   auto* constant_padding_type = coreml_pad->mutable_constant();  // CoreML::Specification::PaddingLayerParams_PaddingConstant
@@ -122,9 +109,6 @@ Status PadOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool PadOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                      const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc
index fd1c77c851e6f..01aced739b36d 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/pool_op_builder.cc
@@ -4,38 +4,27 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/impl/builder_utils.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/impl/builder_utils.h"
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class PoolOpBuilder : public BaseOpBuilder {
-  // Add operator related
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                             const Node& node,
                                             const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   auto* coreml_pool = layer->mutable_pooling();
   const auto& op_type = node.OpType();
@@ -108,9 +97,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
 
-// Operator support related
 bool PoolOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
                                       const logging::Logger& logger) const {
   const auto& op_type = node.OpType();
diff --git a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc
index 6a2014e7952a2..32378b1f654d8 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/reduction_op_builder.cc
@@ -1,36 +1,27 @@
 // Copyright (c) Shukant Pal.
 // Licensed under the MIT License.
 
+#include "core/optimizer/initializer.h"
 #include "core/providers/common.h"
-#include "core/providers/shared/utils/utils.h"
-
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
-#include "core/optimizer/initializer.h"
-
-#include "base_op_builder.h"
+#include "core/providers/shared/utils/utils.h"
 
 namespace onnxruntime {
 namespace coreml {
 
 class ReductionOpBuilder : public BaseOpBuilder {
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
- private:
+
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-#ifdef __APPLE__
 namespace {
 template <typename T>
 void AddReductionParams(T* params, const std::vector<int64_t>& axes, bool keepdims, bool noop_with_empty_axes) {
@@ -76,7 +67,7 @@ Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co
   const bool keepdims = helper.Get("keepdims", 1) != 0;
   const bool noop_with_empty_axes = helper.Get("noop_with_empty_axes", 0) != 0;
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   if (op_type == "ReduceSum") {
     AddReductionParams(layer->mutable_reducesum(), axes, keepdims, noop_with_empty_axes);
@@ -93,7 +84,6 @@ Status ReductionOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, co
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
 
 bool ReductionOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                            const logging::Logger& logger) const {
@@ -124,4 +114,4 @@ void CreateReductionOpBuilder(const std::string& op_type, OpBuilderRegistrations
 }
 
 }  // namespace coreml
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc
index 67aee73630cdb..7ae1746be3122 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/reshape_op_builder.cc
@@ -6,31 +6,21 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/cpu/tensor/reshape_helper.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class ReshapeOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 
@@ -38,9 +28,6 @@ class ReshapeOpBuilder : public BaseOpBuilder {
   int GetMinSupportedOpSet(const Node& /* node */) const override { return 5; }
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());
 }
@@ -48,7 +35,7 @@ void ReshapeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
 Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                const Node& node,
                                                const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   const auto& input_defs = node.InputDefs();
   const auto& initializers(model_builder.GetInitializerTensors());
@@ -69,9 +56,6 @@ Status ReshapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool ReshapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                          const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc
index 5f963dc30dd8f..35dcde41a6bcf 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/resize_op_builder.cc
@@ -8,31 +8,21 @@
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/cpu/tensor/reshape_helper.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class ResizeOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 
@@ -41,7 +31,7 @@ class ResizeOpBuilder : public BaseOpBuilder {
   int GetMinSupportedOpSet(const Node& /* node */) const override { return 11; }
 };
 
-// Helper functions
+namespace {
 bool GetResizeScales(const InitializedTensorSet& initializers,
                      const Node& node, std::vector<float>& scales,
                      const logging::Logger&) {
@@ -73,10 +63,8 @@ bool GetResizeOutputSizes(const InitializedTensorSet& initializers,
   sizes = std::vector<int64_t>(sizes_data.begin(), sizes_data.end());
   return true;
 }
+}  // namespace
 
-// Add operator related
-
-#ifdef __APPLE__
 void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   // We don't really use ROI here, so add it to skipped list if it's an initializer tensor
   model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());  // ROI
@@ -96,7 +84,7 @@ void ResizeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const N
 Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                               const Node& node,
                                               const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   auto* coreml_upsample = layer->mutable_upsample();
   NodeAttrHelper helper(node);
@@ -131,9 +119,6 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool ResizeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                         const logging::Logger& logger) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc
index fd64153ffd283..a86e3d9538d87 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/shape_op_builder.cc
@@ -2,44 +2,30 @@
 // Licensed under the MIT License.
 
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
-
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/shared/utils/utils.h"  // for NodeAttrHelper
 
-#if defined(__APPLE__)
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime::coreml {
 
 class ShapeOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-#if defined(__APPLE__)
 Status ShapeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
-                                             const logging::Logger& logger) const {
-  auto layer = CreateNNLayer(model_builder, node);
+                                             const logging::Logger& /*logger*/) const {
+  auto layer = model_builder.CreateNNLayer(node);
   layer->mutable_getshape();
   *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
   *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif  // defined(__APPLE__)
 
-// Operator support related
 bool ShapeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
                                        const logging::Logger& logger) const {
   NodeAttrHelper node_attr_helper{node};
diff --git a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
index 2c250b3cc9f5a..b716af738e1b1 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/slice_op_builder.cc
@@ -1,39 +1,31 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/coreml/builders/impl/base_op_builder.h"
-
 #include "core/optimizer/initializer.h"
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/cpu/tensor/slice_helper.h"
 #include "core/providers/shared/utils/utils.h"
 
-#if defined(__APPLE__)
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime::coreml {
 
 class SliceOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   int GetMinSupportedOpSet(const Node& /* node */) const override {
     // Before Slice-10, some inputs were attributes instead. We don't support that for now.
     return 10;
   }
 
-  bool HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const override;
+  bool HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& input_params,
+                              const logging::Logger& logger) const override;
+
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& builder_params,
                          const logging::Logger& logger) const override;
 };
@@ -107,9 +99,6 @@ bool ValidateSliceComputeMetadataForCoreML(const SliceOp::PrepareForComputeMetad
 }
 }  // namespace
 
-// Add operator related
-#if defined(__APPLE__)
-
 void SliceOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& input_defs = node.InputDefs();
 
@@ -132,7 +121,7 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   ORT_RETURN_IF_ERROR(PrepareSliceComputeMetadataFromConstantInitializers(node, model_builder.GetGraphViewer(),
                                                                           compute_metadata));
 
-  auto layer = CreateNNLayer(model_builder, node);
+  auto layer = model_builder.CreateNNLayer(node);
   *layer->mutable_input()->Add() = node.InputDefs()[0]->Name();
   *layer->mutable_output()->Add() = node.OutputDefs()[0]->Name();
   auto* slice_static = layer->mutable_slicestatic();
@@ -163,10 +152,8 @@ Status SliceOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   return Status::OK();
 }
 
-#endif  // defined(__APPLE__)
-
-// Operator support related
-bool SliceOpBuilder::HasSupportedInputsImpl(const Node& node, const logging::Logger& logger) const {
+bool SliceOpBuilder::HasSupportedInputsImpl(const Node& node, const OpBuilderInputParams& /*input_params*/,
+                                            const logging::Logger& logger) const {
   int32_t input_type;
   if (!GetType(*node.InputDefs()[0], input_type, logger))
     return false;
diff --git a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
index c454a2a779f6e..266396a0fe90e 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/softmax_op_builder.cc
@@ -1,43 +1,29 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/coreml/builders/impl/base_op_builder.h"
-
 #include "core/framework/tensorprotoutils.h"
 #include "core/providers/common.h"
-#include "core/providers/coreml/shape_utils.h"
-#include "core/providers/shared/utils/utils.h"
-
-#ifdef __APPLE__
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/op_builder_factory.h"
+#include "core/providers/coreml/shape_utils.h"
+#include "core/providers/shared/utils/utils.h"
 
 namespace onnxruntime {
 namespace coreml {
 
 class SoftmaxOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
-
 Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                const Node& node,
                                                const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
   const auto& input_name = node.InputDefs()[0]->Name();
   const auto& output_name = node.OutputDefs()[0]->Name();
 
@@ -68,9 +54,7 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
     const auto reshape1_output_name = model_builder.GetUniqueName(MakeString(node.Name(), "reshape1_output"));
     {  // Add reshape layer
-      const auto softmax_reshape1_layer_name =
-          model_builder.GetUniqueName(MakeString(node.Name(), "_Softmax_reshape1"));
-      auto reshape_layer = CreateNNLayer(softmax_reshape1_layer_name);
+      auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape1");
       *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {target_shape.cbegin(), target_shape.cend()};
       *reshape_layer->mutable_input()->Add() = input_name;
       *reshape_layer->mutable_output()->Add() = reshape1_output_name;
@@ -86,9 +70,7 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     }
     {
       // Add reshape back layer
-      const auto softmax_reshape2_layer_name =
-          model_builder.GetUniqueName(MakeString(node.Name(), "_Softmax_reshape2"));
-      auto reshape_layer = CreateNNLayer(softmax_reshape2_layer_name);
+      auto reshape_layer = model_builder.CreateNNLayer(node, "_Softmax_reshape2");
       *reshape_layer->mutable_reshapestatic()->mutable_targetshape() = {data_shape.cbegin(), data_shape.cend()};
       *reshape_layer->mutable_input()->Add() = softmax_output_name;
       *reshape_layer->mutable_output()->Add() = output_name;
@@ -99,10 +81,6 @@ Status SoftmaxOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   return Status::OK();
 }
 
-#endif
-
-// Operator support related
-
 bool SoftmaxOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& /* input_params */,
                                          const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
diff --git a/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc
index 56c87c883156b..0497357c45c54 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/split_op_builder.cc
@@ -1,35 +1,24 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-#include "core/providers/coreml/builders/impl/base_op_builder.h"
-
 #include "core/optimizer/initializer.h"
 #include "core/providers/common.h"
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#if defined(__APPLE__)
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class SplitOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 
@@ -37,10 +26,6 @@ class SplitOpBuilder : public BaseOpBuilder {
   int GetMinSupportedOpSet(const Node& /* node */) const override { return 13; }
 };
 
-// Add operator related
-
-#ifdef __APPLE__
-
 void SplitOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
   const auto& input_defs = node.InputDefs();
 
@@ -63,7 +48,7 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   // attribute introduced since opset 18
   uint64_t num_outputs;
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
   auto* coreml_splitnd = layer->mutable_splitnd();
   coreml_splitnd->set_axis(axis);
 
@@ -82,7 +67,7 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     coreml_splitnd->set_numsplits(num_outputs);
   } else {
     // note: for opset 18+ 'num_outputs' is a required attribute
-    num_outputs = narrow<uint64_t>(helper.GetInt("num_outputs").value());
+    num_outputs = narrow<uint64_t>(helper.GetInt64("num_outputs").value());
     // note: checked in IsOpSupportedImpl that ensures the dim value at splitting axis exists
     auto split_dim_size = data_shape[HandleNegativeAxis(axis, data_shape.size())];
     uint64_t chunk_size = narrow<uint64_t>((split_dim_size + num_outputs - 1) / num_outputs);
@@ -111,10 +96,6 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   return Status::OK();
 }
 
-#endif
-
-// Operator support related
-
 bool SplitOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                        const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
@@ -159,7 +140,7 @@ bool SplitOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPar
     }
   } else {
     if (node.SinceVersion() >= 18) {
-      const auto num_outputs = helper.GetInt("num_outputs");
+      const auto num_outputs = helper.GetInt64("num_outputs");
       if (!num_outputs.has_value()) {
         LOGS(logger, VERBOSE) << "No 'num_outputs' provided. For split 18+, num_outputs is a required attribute.";
         return false;
@@ -169,9 +150,10 @@ bool SplitOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPar
                               << "CoreML SplitND requires at least 2 outputs. num_outputs: " << num_outputs.value();
         return false;
       }
-      if (num_outputs.value() != static_cast<int32_t>(node.OutputDefs().size()) || num_outputs.value() > split_dims_at_axis) {
-        LOGS(logger, VERBOSE) << "Invalid num_outputs provided.\n."
-                              << "The value should be smaller or equal to the size of dimension being split. num_outputs: "
+      if (num_outputs.value() != static_cast<int32_t>(node.OutputDefs().size()) ||
+          num_outputs.value() > split_dims_at_axis) {
+        LOGS(logger, VERBOSE) << "Invalid num_outputs provided.\n. The value should be smaller or equal to the size "
+                                 "of dimension being split. num_outputs: "
                               << num_outputs.value();
         return false;
       }
diff --git a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc
index 2e14c85ce69c1..e9cc1c2dbf638 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/squeeze_op_builder.cc
@@ -1,48 +1,30 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
-#include <core/common/safeint.h>
+
+#include "core/common/safeint.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/providers/common.h"
-#include "core/providers/shared/utils/utils.h"
-#include "core/optimizer/initializer.h"
-
-#ifdef __APPLE__
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
 #include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/op_builder_factory.h"
-
-#include "base_op_builder.h"
+#include "core/providers/shared/utils/utils.h"
+#include "core/optimizer/initializer.h"
 
 namespace onnxruntime {
 namespace coreml {
 
 class SqueezeOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- public:
   void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
 
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 
-  // Operator support related
- private:
   bool IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                          const logging::Logger& logger) const override;
 };
 
-// Add operator related
-
-#ifdef __APPLE__
-void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
-  if (node.SinceVersion() > 12 && node.InputDefs().size() > 1) {
-    model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());
-  }
-}
-
-/* static */ Status GetAxes(ModelBuilder& model_builder, const Node& node, std::vector<int64_t>& axes) {
+namespace {
+Status GetAxes(ModelBuilder& model_builder, const Node& node, std::vector<int64_t>& axes) {
   // Squeeze opset 13 use input as axes
   if (node.SinceVersion() > 12) {
     // If axes is not provided, return an empty axes as default to squeeze all
@@ -62,11 +44,18 @@ void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const
 
   return Status::OK();
 }
+}  // namespace
+
+void SqueezeOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  if (node.SinceVersion() > 12 && node.InputDefs().size() > 1) {
+    model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());
+  }
+}
 
 Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                const Node& node,
                                                const logging::Logger& /* logger */) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   auto* coreml_squeeze = layer->mutable_squeeze();
   std::vector<int64_t> axes;
@@ -84,9 +73,6 @@ Status SqueezeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 bool SqueezeOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputParams& input_params,
                                          const logging::Logger& /*logger*/) const {
diff --git a/onnxruntime/core/providers/coreml/builders/impl/transpose_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/transpose_op_builder.cc
index 7d5018a19f74c..f6a61d55a3d63 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/transpose_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/transpose_op_builder.cc
@@ -3,33 +3,23 @@
 
 #include "core/providers/coreml/builders/helper.h"
 #include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/shape_utils.h"
 #include "core/providers/shared/utils/utils.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
-
 namespace onnxruntime {
 namespace coreml {
 
 class TransposeOpBuilder : public BaseOpBuilder {
-  // Add operator related
-#ifdef __APPLE__
- private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 };
 
-// Add operator related
-
-#ifdef __APPLE__
 Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                  const Node& node,
                                                  const logging::Logger& logger) const {
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   NodeAttrHelper helper(node);
   std::vector<int64_t> perm = helper.Get("perm", std::vector<int64_t>());
@@ -51,7 +41,6 @@ Status TransposeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
 
 void CreateTransposeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.builders.push_back(std::make_unique<TransposeOpBuilder>());
diff --git a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc
index 660755b43c043..3403378d59114 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/unary_op_builder.cc
@@ -3,32 +3,25 @@
 
 #include "core/providers/common.h"
 
-#ifdef __APPLE__
-#include "core/providers/coreml/builders/model_builder.h"
-#endif
 #include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/impl/base_op_builder.h"
+#include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/builders/op_builder_factory.h"
 
-#include "base_op_builder.h"
-
 namespace onnxruntime {
 namespace coreml {
 
 class UnaryOpBuilder : public BaseOpBuilder {
- private:
-#ifdef __APPLE__
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override;
-#endif
 };
 
-#ifdef __APPLE__
 Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                              const logging::Logger& /* logger */) const {
   const auto& op_type(node.OpType());
   const auto& input_defs(node.InputDefs());
 
-  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = CreateNNLayer(model_builder, node);
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = model_builder.CreateNNLayer(node);
 
   if (op_type == "Sqrt") {
     layer->mutable_unary()->set_type(COREML_SPEC::UnaryFunctionLayerParams::SQRT);
@@ -45,9 +38,6 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   model_builder.AddLayer(std::move(layer));
   return Status::OK();
 }
-#endif
-
-// Operator support related
 
 void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   op_registrations.builders.push_back(std::make_unique<UnaryOpBuilder>());
@@ -55,4 +45,4 @@ void CreateUnaryOpBuilder(const std::string& op_type, OpBuilderRegistrations& op
 }
 
 }  // namespace coreml
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc
index 9c8b7bce507e4..daab36f7b933d 100644
--- a/onnxruntime/core/providers/coreml/builders/model_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc
@@ -2,56 +2,555 @@
 // Licensed under the MIT License.
 
 #include <fstream>
-#include <core/common/safeint.h>
-
-#include "model_builder.h"
-#include "helper.h"
-#include "op_builder_factory.h"
 
+#include "core/common/safeint.h"
+#include "core/framework/tensorprotoutils.h"
+#include "core/platform/env.h"
 #include "core/providers/common.h"
+#include "core/providers/coreml/builders/model_builder.h"
+#include "core/providers/coreml/builders/helper.h"
+#include "core/providers/coreml/builders/op_builder_factory.h"
 #include "core/providers/coreml/builders/impl/builder_utils.h"
+#include "core/providers/coreml/coreml_provider_factory.h"
 #include "core/providers/coreml/model/host_utils.h"
-#include "core/providers/coreml/model/model.h"
 #include "core/providers/coreml/shape_utils.h"
 
+#if defined(COREML_ENABLE_MLPROGRAM)
+// includes from coremltools-src in _deps
+#include "modelpackage/src/ModelPackage.hpp"
+#include "mlmodel/src/MILBlob/Blob/StorageWriter.hpp"
+using MILBlob::Blob::StorageWriter;
+#endif
+
+using namespace CoreML::Specification;
+
 namespace onnxruntime {
 namespace coreml {
 
-ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger, uint32_t coreml_flags)
-    : graph_viewer_(graph_viewer),
-      logger_(logger),
-      coreml_flags_(coreml_flags) {
+namespace {
+#if defined(COREML_ENABLE_MLPROGRAM)
+// Should the initializer be written to file or kept as an immediate value
+bool ShouldWriteInitializerToWeightsFile(const ONNX_NAMESPACE::TensorProto& tensor_proto) {
+  // https://github.com/apple/coremltools/blob/dbb0094fd0cb936469e35320bf37e866ef7a1da4/coremltools/converters/mil/backend/mil/load.py#L51-L57
+
+  bool use_weight_file = false;
+
+  switch (tensor_proto.data_type()) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
+    case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
+      auto num_elements = TensorShape(utils::GetTensorShapeFromTensorProto(tensor_proto)).Size();
+      use_weight_file = num_elements >= 10;
+      break;
+    }
+    default:
+      break;
+  }
+
+  return use_weight_file;
+}
+
+// copy from the ONNX TensorProto to a CoreML field.
+// T1 is the source type. T2 is the target type. If the types differ, T1 must be smaller than T2.
+// e.g. uint32_t data can be written to RepeatedField<uint64_t>
+template <typename T1, typename T2 = T1>
+void CopyRawDataToRepeatedField(const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                                google::protobuf::RepeatedField<T2>& repeated_field) {
+  const auto& raw_data = tensor_proto.raw_data();
+  const T1* data = reinterpret_cast<const T1*>(raw_data.data());
+  const T1* data_end = data + (raw_data.size() / sizeof(T1));
+  if constexpr (sizeof(T1) == sizeof(T2)) {
+    repeated_field.Add(data, data_end);
+  } else {
+    static_assert(sizeof(T1) < sizeof(T2));
+    // we need to iterate over the data and copy to the repeated field, converting to T2 as we go.
+    repeated_field.Resize(data_end - data, T2(0));
+    for (int i = 0; data != data_end; ++data, ++i) {
+      repeated_field[i] = static_cast<T2>(*data);
+    }
+  }
+}
+
+// copy T data from the TensorProto.int32_t field to TensorValue.bytes
+template <typename T>
+void CopyInt32DataToBytes(const ONNX_NAMESPACE::TensorProto& tensor_proto, MILSpec::TensorValue tensor_value) {
+  const int num_entries = tensor_proto.int32_data_size();
+  std::string& bytes = *tensor_value.mutable_bytes()->mutable_values();
+  bytes.resize(num_entries * sizeof(T));
+  T* out = reinterpret_cast<T*>(bytes.data());
+
+  const int32_t* in = tensor_proto.int32_data().data();
+  for (int i = 0; i < num_entries; ++i) {
+    out[i] = static_cast<T>(in[i]);
+  }
+}
+
+// copy T data from the TensorProto.uint64_data field to TensorValue.bytes
+template <typename T>
+void CopyUInt64DataToBytes(const ONNX_NAMESPACE::TensorProto& tensor_proto, MILSpec::TensorValue tensor_value) {
+  const int num_entries = tensor_proto.uint64_data_size();
+  std::string& bytes = *tensor_value.mutable_bytes()->mutable_values();
+  bytes.resize(num_entries * sizeof(T));
+  T* out = reinterpret_cast<T*>(bytes.data());
+
+  const uint64_t* in = tensor_proto.uint64_data().data();
+  for (int i = 0; i < num_entries; ++i) {
+    out[i] = static_cast<T>(in[i]);
+  }
+}
+
+// NOTE: This supports all the ONNX data types. Weights in CoreML may not need all these
+void CopyOnnxTensorToCoreMLTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                                  MILSpec::TensorValue& tensor_value) {
+  bool has_raw_data = tensor_proto.has_raw_data();
+  auto data_type = tensor_proto.data_type();
+
+  // handling based on
+  // ONNX TensorProto field usage
+  // https://github.com/onnx/onnx/blob/b86cc54efce19530fb953e4b21f57e6b3888534c/onnx/onnx.proto#L544-L572
+  // CoreMLTools conversion implementation that maps data types to fields
+  // https://github.com/apple/coremltools/blob/dbb0094fd0cb936469e35320bf37e866ef7a1da4/coremltools/converters/mil/backend/mil/helper.py#L98
+  // along with some special cased types that are stored in bytes
+  // https://github.com/apple/coremltools/blob/dbb0094fd0cb936469e35320bf37e866ef7a1da4/coremltools/converters/mil/backend/mil/helper.py#L23
+  //   IMMEDIATE_VALUE_TYPES_IN_BYTES = (types.fp16, types.int8, types.uint8, types.uint32)
+
+  switch (data_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
+      // from: float_data/raw, to: floats
+      if (has_raw_data) {
+        CopyRawDataToRepeatedField<float>(tensor_proto, *tensor_value.mutable_floats()->mutable_values());
+      } else {
+        tensor_value.mutable_floats()->mutable_values()->CopyFrom(tensor_proto.float_data());
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: {
+      // from: double_data/raw, to: doubles
+      if (has_raw_data) {
+        CopyRawDataToRepeatedField<double>(tensor_proto, *tensor_value.mutable_doubles()->mutable_values());
+      } else {
+        tensor_value.mutable_doubles()->mutable_values()->CopyFrom(tensor_proto.double_data());
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
+      // from: int32_data/raw, to: ints
+      if (has_raw_data) {
+        CopyRawDataToRepeatedField<int32_t>(tensor_proto, *tensor_value.mutable_ints()->mutable_values());
+      } else {
+        tensor_value.mutable_ints()->mutable_values()->CopyFrom(tensor_proto.int32_data());
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT64: {
+      // from: int64_data/raw, to: longints
+      if (has_raw_data) {
+        CopyRawDataToRepeatedField<int64_t>(tensor_proto, *tensor_value.mutable_longints()->mutable_values());
+
+      } else {
+        tensor_value.mutable_longints()->mutable_values()->CopyFrom(tensor_proto.int64_data());
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: {
+      // from: int32_data/raw, to: bytes
+      if (has_raw_data) {
+        *tensor_value.mutable_bytes()->mutable_values() = tensor_proto.raw_data();
+      } else {
+        // iterate the int32_data, taking the 16-bits from each entry, and copying to the bytes.
+        // we use uint16_t as only the size of the data type matters
+        CopyInt32DataToBytes<uint16_t>(tensor_proto, tensor_value);
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_INT8:
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
+      // from: int32_data/raw, to: bytes
+      if (has_raw_data) {
+        *tensor_value.mutable_bytes()->mutable_values() = tensor_proto.raw_data();
+      } else {
+        // copy from int32_data to bytes. uint8_t for both as only the size of the data type matters when copying
+        CopyInt32DataToBytes<uint8_t>(tensor_proto, tensor_value);
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT32: {
+      // from: uint64_data/raw, to: bytes
+      if (has_raw_data) {
+        *tensor_value.mutable_bytes()->mutable_values() = tensor_proto.raw_data();
+      } else {
+        // copy uint32_t values from TensorProto.uint64_data
+        CopyUInt64DataToBytes<uint32_t>(tensor_proto, tensor_value);
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT64: {
+      // from: uint64_data/raw, to: longints
+      if (has_raw_data) {
+        CopyRawDataToRepeatedField<uint64_t>(tensor_proto, *tensor_value.mutable_longints()->mutable_values());
+      } else {
+        // TODO: Is this safe? Need to check the CopyFrom implementation. As it's a straight copy of bytes this
+        // hopefully can do it as one block instead of iterating and potentially doing a static_cast of each
+        // individual value.
+        tensor_value.mutable_longints()->mutable_values()->CopyFrom(
+            reinterpret_cast<const google::protobuf::RepeatedField<int64_t>&>(tensor_proto.uint64_data()));
+      }
+
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_BOOL: {
+      // from: int32_data/raw, to: bools
+      if (has_raw_data) {
+        CopyRawDataToRepeatedField<bool>(tensor_proto, *tensor_value.mutable_bools()->mutable_values());
+      } else {
+        const auto& int32s = tensor_proto.int32_data();
+        auto& bools = *tensor_value.mutable_bools()->mutable_values();
+        const int num_entries = int32s.size();
+        bools.Reserve(num_entries);
+        const int32_t* in = int32s.data();
+        for (int i = 0; i < num_entries; ++i) {
+          *bools.AddAlreadyReserved() = *in++;
+        }
+      }
+
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_STRING: {
+      // from: string_data (which is protobuf type bytes), to: strings (protobuf type string)
+      // due to the protobuf type mismatch we need to iterate and copy
+      auto& in = tensor_proto.string_data();
+      auto& out = *tensor_value.mutable_strings()->mutable_values();
+      out.Reserve(in.size());
+      for (const auto& iter : in) {
+        *out.Add() = iter;
+      }
+
+      break;
+    }
+    /* Not clear if there's an actual use-case for 16-bit int data currently, so leaving commented out
+    case ONNX_NAMESPACE::TensorProto_DataType_INT16:
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
+      // from: int32_data/raw, to: ints
+      // WARNING: This may change to write to mutable_bytes
+      // https://github.com/apple/coremltools/blob/dbb0094fd0cb936469e35320bf37e866ef7a1da4/coremltools/converters/mil/backend/mil/helper.py#L113-L115
+      if (has_raw_data) {
+          CopyRawDataToRepeatedField<uint16_t, int32_t>(tensor_proto, *tensor_value.mutable_ints()->mutable_values());
+      } else {
+          tensor_value.mutable_ints()->mutable_values()->CopyFrom(tensor_proto.int32_data());
+      }
+      break;
+    } */
+    default:
+      ORT_THROW("AddTensorProtoDataToMILSpecTensorValue: Unsupported data type: ", data_type);
+  }
+}
+
+template <typename T>
+uint64_t WriteRawDataUsingStorageWriter(const onnx::TensorProto& tensor_proto,
+                                        MILBlob::Blob::StorageWriter& writer) {
+  MILBlob::Util::Span<const T> data(reinterpret_cast<const T*>(tensor_proto.raw_data().data()),
+                                    tensor_proto.raw_data().size() / sizeof(T));
+  return writer.WriteData(data);
+}
+
+// Write T1 data from the TensorProto.int32_data field using StorageWriter.
+// Currently int32_data can have any of these data types:
+//   INT32, INT16, INT8, UINT16, UINT8, BOOL, FLOAT16, BFLOAT16,
+//   FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
+// T1 provides the size of the ONNX data type. T2 is the CoreML type.
+// The sizes and layout of T1 and T2 must match as we simply cast the bytes to T2.
+template <typename T1, typename T2 = T1>
+uint64_t WriteFromInt32DataUsingStorageWriter(const onnx::TensorProto& tensor_proto,
+                                              MILBlob::Blob::StorageWriter& writer) {
+  static_assert(sizeof(T1) == sizeof(T2), "Data sizes must match");
+
+  // need to copy to temporary data as we have to extract a subset of bytes from each int32_t entry.
+  // works better to extract the ONNX type first with static_cast, and reinterpret_cast to the CoreML type at the end.
+  std::vector<T1> values;
+  const int num_values = tensor_proto.int32_data_size();
+  values.resize(num_values);  // resize so we're not updating the length inside the copy loop
+
+  const int32_t* in = tensor_proto.int32_data().data();
+  for (int i = 0; i < num_values; ++i) {
+    values[i] = static_cast<T1>(in[i]);
+  }
+
+  MILBlob::Util::Span<const T2> data(reinterpret_cast<const T2*>(values.data()),
+                                     num_values);
+  return writer.WriteData(data);
+}
+
+// write the initializer to weight.bin and return the offset
+// StorageWriter is currently limited to fp32, fp16, bfloat16, uint8/int8, uint16/int16.
+// AFAIK we don't use bfloat16/int16/uint16 for weights in ONNX, so limit handling to fp32, fp16, uint8/int8
+uint64_t CopyOnnxTensorToCoreMLWeightsFile(const onnx::TensorProto& tensor_proto,
+                                           MILBlob::Blob::StorageWriter& writer) {
+  bool has_raw_data = tensor_proto.has_raw_data();
+  auto data_type = tensor_proto.data_type();
+
+  uint64_t offset = 0;
+
+  // See AddTensorProtoDataToMILSpecTensorValue for links to sources for info on where the different typed data is
+  // stored for ONNX and CoreML
+
+  switch (data_type) {
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
+      // from: float_data/raw, to: floats
+      if (has_raw_data) {
+        offset = WriteRawDataUsingStorageWriter<float>(tensor_proto, writer);
+      } else {
+        MILBlob::Util::Span<const float> data(tensor_proto.float_data().data(), tensor_proto.float_data().size());
+        offset = writer.WriteData(data);
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: {
+      // from: int32_data/raw, to: bytes
+      if (has_raw_data) {
+        offset = WriteRawDataUsingStorageWriter<MILBlob::Fp16>(tensor_proto, writer);
+      } else {
+        offset = WriteFromInt32DataUsingStorageWriter<uint16_t, MILBlob::Fp16>(tensor_proto, writer);
+      }
+
+      break;
+    }
+
+    case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
+      // from: int32_data/raw, to: bytes
+      if (has_raw_data) {
+        offset = WriteRawDataUsingStorageWriter<int8_t>(tensor_proto, writer);
+      } else {
+        offset = WriteFromInt32DataUsingStorageWriter<int8_t>(tensor_proto, writer);
+      }
+      break;
+    }
+    case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
+      // from: int32_data/raw, to: bytes
+      if (has_raw_data) {
+        offset = WriteRawDataUsingStorageWriter<uint8_t>(tensor_proto, writer);
+
+      } else {
+        offset = WriteFromInt32DataUsingStorageWriter<uint8_t>(tensor_proto, writer);
+      }
+      break;
+    }
+    default:
+      ORT_THROW("AddWeightToFile: Unsupported data type: ", data_type);
+  }
+
+  return offset;
+}
+
+MILSpec::Value OnnxTensorToCoreMLTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
+                                        MILBlob::Blob::StorageWriter& weights_file_writer) {
+  MILSpec::Value value;
+
+  // populate ValueType with tensor data type, dims and rank
+  MILSpec::ValueType& value_type = *value.mutable_type();
+  MILSpec::TensorType& tensor_type = *value_type.mutable_tensortype();
+  tensor_type.set_datatype(OnnxDataTypeToMILSpec(tensor_proto.data_type()));
+
+  tensor_type.set_rank(tensor_proto.dims().size());
+  for (const auto& dim : tensor_proto.dims()) {
+    tensor_type.add_dimensions()->mutable_constant()->set_size(dim);
+  }
+
+  // add data to either weights.bin or as an immediate value
+  if (ShouldWriteInitializerToWeightsFile(tensor_proto)) {
+    uint64_t offset = CopyOnnxTensorToCoreMLWeightsFile(tensor_proto, weights_file_writer);
+
+    auto* file_value = value.mutable_blobfilevalue();
+    // Filename copied from
+    // https://github.com/apple/coremltools/blob/dbb0094fd0cb936469e35320bf37e866ef7a1da4/coremltools/converters/mil/backend/mil/helper.py#L329
+    file_value->set_filename("@model_path/weights/weight.bin");
+    file_value->set_offset(offset);
+  } else {
+    MILSpec::TensorValue& tensor_value = *value.mutable_immediatevalue()->mutable_tensor();
+    CopyOnnxTensorToCoreMLTensor(tensor_proto, tensor_value);
+  }
+
+  return value;
+}
+
+void CreateEmptyFile(const std::string& filename) {
+  std::ofstream file(filename, std::ofstream::out | std::ofstream::binary);
+  ORT_ENFORCE(file.is_open(), "Failed to open file ", filename);
 }
 
-Status ModelBuilder::Initialize() {
-  coreml_model_ = std::make_unique<CoreML::Specification::Model>();
-  {  // initialize CoreML model
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
+
+std::string GetModelOutputPath(bool create_ml_program) {
+  // path is used to create the ML Package directory for ML Program, and for the model directly otherwise.
+  auto path = util::GetTemporaryFilePath();
+  if (!create_ml_program) {
+    path += ".model.mlmodel";
+  }
+
+  return path;
+}
+}  // namespace
+
+ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger,
+                           int32_t coreml_version, uint32_t coreml_flags)
+    : graph_viewer_(graph_viewer),
+      logger_(logger),
+      coreml_version_(coreml_version),
+      coreml_flags_(coreml_flags),
+      create_ml_program_((coreml_flags_ & COREML_FLAG_CREATE_MLPROGRAM) != 0),
+      model_output_path_(GetModelOutputPath(create_ml_program_)),
+      coreml_model_(std::make_unique<CoreML::Specification::Model>()) {
+  if (create_ml_program_) {
+#if defined(COREML_ENABLE_MLPROGRAM)
+    coreml_model_->set_specificationversion(CoreMLSpecVersion());
+    MILSpec::Program& mlprogram = *coreml_model_->mutable_mlprogram();
+    MILSpec::Function& main = (*mlprogram.mutable_functions())["main"];
+
+    const std::string coreml_opset = "CoreML" + std::to_string(CoreMLVersion());
+    *main.mutable_opset() = coreml_opset;
+    mlprogram_main_ = &(*main.mutable_block_specializations())[coreml_opset];
+
+    // create the ModelPackage. this creates the output directory.
+    mlpackage_ = std::make_unique<MPL::ModelPackage>(model_output_path_, /* create */ true);
+
+    // ModelPackage::addItem does a copy of the file. Due to this we 'add' an empty file first,
+    // and do the actual writes to the file created in the package.
+    // We can't use ModelPackage::createFile as we have to add a directory for the weights.
+    std::string tmp_dir = model_output_path_ + "/tmp";
+    ORT_THROW_IF_ERROR(Env::Default().CreateFolder(ToPathString(tmp_dir)));
+    CreateEmptyFile(tmp_dir + "/weight.bin");
+
+    std::string weights_id = mlpackage_->addItem(tmp_dir, "weights", "com.microsoft.OnnxRuntime",
+                                                 "CoreML Model Weights");
+    auto weights_info = mlpackage_->findItem(weights_id);
+    weights_file_writer_ = std::make_unique<StorageWriter>(weights_info->path() + "/weight.bin");
+#else
+    // should never happen due to handling in coreml_execution_provider.cc
+    ORT_THROW("ML Program is not enabled in this build");
+#endif
+  } else {
     // We support CorelML Specification Version 4 (Core ML 3)
     coreml_model_->set_specificationversion(4);
     auto* neural_network = coreml_model_->mutable_neuralnetwork();
-    neural_network->set_arrayinputshapemapping(::CoreML::Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
+    neural_network->set_arrayinputshapemapping(
+        CoreML::Specification::NeuralNetworkMultiArrayShapeMapping::EXACT_ARRAY_MAPPING);
   }
+}
 
-  PreprocessInitializers();
-  ORT_RETURN_IF_ERROR(RegisterInitializers());
-  ORT_RETURN_IF_ERROR(RegisterModelInputs());
-  ORT_RETURN_IF_ERROR(AddOperations());
-  ORT_RETURN_IF_ERROR(RegisterModelOutputs());
+ModelBuilder::~ModelBuilder() = default;
 
-  return Status::OK();
+/*
+ * NeuralNetwork related helpers
+ */
+std::unique_ptr<NeuralNetworkLayer> ModelBuilder::CreateNNLayer(const Node& node, std::string_view suffix) {
+  auto layer_name = GetUniqueName(node, suffix);
+
+  std::unique_ptr<NeuralNetworkLayer> layer = std::make_unique<NeuralNetworkLayer>();
+  layer->set_name(layer_name);
+  return layer;
+}
+
+void ModelBuilder::AddLayer(std::unique_ptr<NeuralNetworkLayer> layer) {
+  auto* neural_network = coreml_model_->mutable_neuralnetwork();
+  neural_network->mutable_layers()->AddAllocated(layer.release());
 }
 
-/* static */ const IOpBuilder* ModelBuilder::GetOpBuilder(const Node& node) {
-  const auto& op_builders = GetOpBuilders();
-  const auto it = op_builders.find(node.OpType());
-  if (it != op_builders.cend())
-    return it->second;
+#if defined(COREML_ENABLE_MLPROGRAM)
+
+/*
+ * ML Program related helpers
+ */
+std::unique_ptr<COREML_SPEC::MILSpec::Operation> ModelBuilder::CreateOperation(const Node& node,
+                                                                               std::string_view op_type,
+                                                                               std::string_view suffix) {
+  std::string operation_name = GetUniqueName(node, suffix);
+
+  std::unique_ptr<MILSpec::Operation> op = std::make_unique<MILSpec::Operation>();
+  op->set_type(std::string(op_type));
+  (*op->mutable_attributes())["name"] = CreateScalarTensorValue(operation_name);
+
+  return op;
+}
+
+void ModelBuilder::AddConstant(std::string_view name, const ONNX_NAMESPACE::TensorProto& initializer) {
+  MILSpec::Value coreml_tensor = OnnxTensorToCoreMLTensor(initializer, *weights_file_writer_);
+  AddConstantOperation(name, std::move(coreml_tensor));
+}
+
+void ModelBuilder::AddConstantOperation(std::string_view name, MILSpec::Value&& coreml_tensor) {
+  // Replicates coremltools/converters/mil/backend/mil/load.py translate_const logic
+  MILSpec::Operation& const_op = *mlprogram_main_->mutable_operations()->Add();
+  const_op.set_type("const");
+
+  MILSpec::NamedValueType& output = *const_op.mutable_outputs()->Add();
+  output.set_name(std::string(name));
+  *output.mutable_type() = coreml_tensor.type();
+
+  auto& attr_map = *const_op.mutable_attributes();
+  attr_map["name"] = CreateScalarTensorValue(std::string(name));
+  attr_map["val"] = std::move(coreml_tensor);
+}
+
+// Add operation to the Block for the main function in the ML Program
+void ModelBuilder::AddOperation(std::unique_ptr<COREML_SPEC::MILSpec::Operation> operation) {
+  mlprogram_main_->mutable_operations()->AddAllocated(operation.release());
+}
+
+std::string ModelBuilder::AddTensorValueAsConstantOperation(std::string_view op_type, std::string_view value_type,
+                                                            MILSpec::Value&& input_value) {
+  auto unique_value_name = GetUniqueName(MakeString(op_type, "_", value_type));
+  AddConstantOperation(unique_value_name, std::move(input_value));
+  return unique_value_name;
+}
+
+template <typename T>
+std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span<const T> value,
+                                          std::optional<gsl::span<const int64_t>> shape) {
+  // add specialization below
+  static_assert(false_for_T<T>, "Missing specialization for value type");
+  return "";  // unreachable
+}
+
+template <>
+std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type,
+                                          gsl::span<const float> value,
+                                          std::optional<gsl::span<const int64_t>> shape) {
+  auto input_value = CreateTensorValue<float>(value, shape);
+  return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value));
+}
+
+template <>
+std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type,
+                                          gsl::span<const int64_t> value,
+                                          std::optional<gsl::span<const int64_t>> shape) {
+  auto input_value = CreateTensorValue<int64_t, int32_t>(value, shape);  // CoreML uses int32
+  return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value));
+}
+
+template <>
+std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type,
+                                          gsl::span<const bool> value,
+                                          std::optional<gsl::span<const int64_t>> shape) {
+  auto input_value = CreateTensorValue<bool>(value, shape);
+  return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value));
+}
 
-  return nullptr;
+template <>
+std::string ModelBuilder::AddConstantImpl(std::string_view op_type, std::string_view value_type,
+                                          gsl::span<const std::string> value,
+                                          std::optional<gsl::span<const int64_t>> shape) {
+  auto input_value = CreateTensorValue<std::string>(value, shape);
+  return AddTensorValueAsConstantOperation(op_type, value_type, std::move(input_value));
 }
 
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
+
+/*
+ * General implementation
+ */
 void ModelBuilder::PreprocessInitializers() {
-  // TODO: We should be using GetConstantInitializer not GetAllInitializedTensors in all places
+  // TODO: We should be using GetConstantInitializer not GetAllInitializedTensors in all places.
+  // non-constant initializers need to be passed in as model inputs in case they're overridden at runtime.
   const auto& initializers = graph_viewer_.GetAllInitializedTensors();
   const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
 
@@ -64,6 +563,7 @@ void ModelBuilder::PreprocessInitializers() {
         initializer_usage_[input->Name()]++;
       }
     }
+
     if (const auto* op_builder = GetOpBuilder(node)) {
       op_builder->AddInitializersToSkip(*this, node);
     }
@@ -77,27 +577,34 @@ Status ModelBuilder::RegisterInitializers() {
 
     // skip initializer if there is no remaining usage
     auto usage_count = initializer_usage_[name];
-    if (usage_count == 0)
+    if (usage_count == 0) {
       continue;
+    }
 
-    std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer = std::make_unique<COREML_SPEC::NeuralNetworkLayer>();
-    layer->set_name(GetUniqueName("initializer_" + name));
-
-    // TODO,look at using LoadConstantLayer instead of LoadConstantNDLayer
-    auto* constant_tensor = layer->mutable_loadconstantnd();
-    const auto& shape = tensor.dims();
-    if (shape.empty()) {
-      // This is a scalar initializer, CoreML constant layer requires a shape, make this a {1} tensor
-      constant_tensor->mutable_shape()->Add(1);
+    if (create_ml_program_) {
+#if defined(COREML_ENABLE_MLPROGRAM)
+      AddConstant(name, tensor);
+#endif
     } else {
-      std::transform(shape.cbegin(), shape.cend(),
-                     google::protobuf::RepeatedFieldBackInserter(constant_tensor->mutable_shape()),
-                     [](int64_t dim) -> uint64_t { return SafeInt<uint64_t>(dim); });
-    }
+      std::unique_ptr<NeuralNetworkLayer> layer = std::make_unique<NeuralNetworkLayer>();
+      layer->set_name(GetUniqueName("initializer_" + name));
+
+      // TODO,look at using LoadConstantLayer instead of LoadConstantNDLayer
+      auto* constant_tensor = layer->mutable_loadconstantnd();
+      const auto& shape = tensor.dims();
+      if (shape.empty()) {
+        // This is a scalar initializer, CoreML constant layer requires a shape, make this a {1} tensor
+        constant_tensor->mutable_shape()->Add(1);
+      } else {
+        std::transform(shape.cbegin(), shape.cend(),
+                       google::protobuf::RepeatedFieldBackInserter(constant_tensor->mutable_shape()),
+                       [](int64_t dim) -> uint64_t { return SafeInt<uint64_t>(dim); });
+      }
 
-    ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*constant_tensor->mutable_data(), tensor));
-    *layer->mutable_output()->Add() = name;
-    AddLayer(std::move(layer));
+      ORT_RETURN_IF_ERROR(CreateCoreMLWeight(*constant_tensor->mutable_data(), tensor));
+      *layer->mutable_output()->Add() = name;
+      AddLayer(std::move(layer));
+    }
   }
 
   return Status::OK();
@@ -179,15 +686,15 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
     data_type = type_proto->tensor_type().elem_type();
     switch (data_type) {
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-        multi_array->set_datatype(COREML_SPEC::ArrayFeatureType::FLOAT32);
+        multi_array->set_datatype(ArrayFeatureType::FLOAT32);
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-        multi_array->set_datatype(COREML_SPEC::ArrayFeatureType::INT32);
+        multi_array->set_datatype(ArrayFeatureType::INT32);
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_INT64:
         // If we have an int64 input/output type, since COREML_SPEC:ArrayFeatureType does not support INT64
         // we assign it to be INT32 here
-        multi_array->set_datatype(COREML_SPEC::ArrayFeatureType::INT32);
+        multi_array->set_datatype(ArrayFeatureType::INT32);
         if (!is_input) {
           // Record the output names and we need to change them back to Int64 when CoreML EP returns these values to ORT
           AddInt64Output(name);
@@ -204,6 +711,19 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i
 
   input_output_info_.emplace(name, OnnxTensorInfo{data_type, shape});
 
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (create_ml_program_) {
+    MILSpec::Function& main = (*coreml_model_->mutable_mlprogram()->mutable_functions())["main"];
+    if (is_input) {
+      // the model inputs need to be wired up as args to the 'main' function
+      main.mutable_inputs()->Add(CreateNamedTensorValueType(node_arg));
+    } else {
+      // the model outputs need to be set as outputs of the Block for the 'main' function
+      *mlprogram_main_->mutable_outputs()->Add() = node_arg.Name();
+    }
+  }
+#endif  // defined(COREML_ENABLE_MLPROGRAM)
+
   return Status::OK();
 }
 
@@ -215,16 +735,16 @@ Status ModelBuilder::RegisterModelInputs() {
   return Status::OK();
 }
 
-Status ModelBuilder::AddOperations() {
-  const auto builder_params = MakeOpBuilderParams(graph_viewer_, coreml_flags_);
-  const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
-  for (size_t i = 0; i < node_indices.size(); i++) {
-    const auto* node(graph_viewer_.GetNode(node_indices[i]));
-    if (const auto* op_builder = GetOpBuilder(*node)) {
-      ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, *node, builder_params, logger_));
+Status ModelBuilder::ProcessNodes() {
+  for (const auto node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
+    const auto& node = *graph_viewer_.GetNode(node_idx);
+    if (const auto* op_builder = GetOpBuilder(node)) {
+      ORT_RETURN_IF_ERROR(op_builder->AddToModelBuilder(*this, node, logger_));
     } else {
+      // This shouldn't happen as this is called from CoreMLExecutionProvider::Compile and should only be processing
+      // nodes that we said were supported and were returned from CoreMLExecutionProvider::GetCapability.
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "Node [", node->Name(), "], type [", node->OpType(), "] is not supported");
+                             "Node [", node.Name(), "], type [", node.OpType(), "] is not supported");
     }
   }
 
@@ -239,29 +759,72 @@ Status ModelBuilder::RegisterModelOutputs() {
   return Status::OK();
 }
 
-Status ModelBuilder::Compile(std::unique_ptr<Model>& model, const std::string& path) {
-  ORT_RETURN_IF_ERROR(SaveCoreMLModel(path));
-  model.reset(new Model(path, logger_, coreml_flags_));
-  model->SetScalarOutputs(std::move(scalar_outputs_));
-  model->SetInt64Outputs(std::move(int64_outputs_));
-  model->SetInputOutputInfo(std::move(input_output_info_));
-  return model->LoadModel();
+Status ModelBuilder::CreateModel() {
+  PreprocessInitializers();
+
+  ORT_RETURN_IF_ERROR(RegisterInitializers());
+  ORT_RETURN_IF_ERROR(RegisterModelInputs());
+  ORT_RETURN_IF_ERROR(ProcessNodes());
+  ORT_RETURN_IF_ERROR(RegisterModelOutputs());
+
+  return Status::OK();
 }
 
-Status ModelBuilder::SaveCoreMLModel(const std::string& path) {
-  ORT_RETURN_IF_ERROR(Initialize());
-  std::ofstream stream(path, std::ofstream::out | std::ofstream::binary);
-  ORT_RETURN_IF_NOT(coreml_model_->SerializeToOstream(&stream), "Save the CoreML model failed");
+Status ModelBuilder::SaveModel() {
+  std::string output_path = model_output_path_;
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (create_ml_program_) {
+    std::string tmp_model_path = model_output_path_ + "/tmp/model.mlmodel";
+    CreateEmptyFile(tmp_model_path);
+
+    std::string model_id = mlpackage_->setRootModel(tmp_model_path, "model.mlmodel", "com.microsoft.OnnxRuntime",
+                                                    "CoreML Model Specification");
+    auto model_info = mlpackage_->findItem(model_id);
+    output_path = model_info->path();
+  }
+#endif
 
-  // TODO, Delete, debug only
-  if (const char* path = std::getenv("ORT_COREML_EP_CONVERTED_MODEL_PATH")) {
-    std::ofstream temp_stream(path, std::ofstream::out | std::ofstream::binary);
-    ORT_RETURN_IF_NOT(coreml_model_->SerializeToOstream(&temp_stream), "Save the CoreML model failed");
+  // scope this so the stream is closed and flushed by the ofstream dtor
+  {
+    LOGS(logger_, INFO) << "Writing CoreML Model to " << output_path;
+    std::ofstream stream(output_path, std::ofstream::out | std::ofstream::binary);
+    ORT_RETURN_IF_NOT(coreml_model_->SerializeToOstream(&stream), "Saving the CoreML model failed. Path=", output_path);
   }
 
+#if defined(COREML_ENABLE_MLPROGRAM)
+  // need to delete the ModelPackage instance for it to write out the manifest. clear out the other ML Program
+  // related types as well.
+  mlprogram_main_ = nullptr;
+  mlpackage_.reset();
+  weights_file_writer_.reset();
+#endif
+
   return Status::OK();
 }
 
+Status ModelBuilder::LoadModel(std::unique_ptr<Model>& model) {
+  model = std::make_unique<Model>(model_output_path_,
+                                  std::move(input_output_info_),
+                                  std::move(scalar_outputs_),
+                                  std::move(int64_outputs_),
+                                  logger_, coreml_flags_);
+
+  return model->LoadModel();  // load using CoreML API, including compilation
+}
+
+// static
+Status ModelBuilder::Build(const GraphViewer& graph_viewer, const logging::Logger& logger,
+                           int32_t coreml_version, uint32_t coreml_flags,
+                           std::unique_ptr<Model>& model) {
+  ModelBuilder builder(graph_viewer, logger, coreml_version, coreml_flags);
+
+  ORT_RETURN_IF_ERROR(builder.CreateModel());
+  ORT_RETURN_IF_ERROR(builder.SaveModel());
+
+  return builder.LoadModel(model);
+}
+
 void ModelBuilder::AddScalarOutput(const std::string& output_name) {
   scalar_outputs_.insert(output_name);
 }
@@ -270,11 +833,6 @@ void ModelBuilder::AddInt64Output(const std::string& output_name) {
   int64_outputs_.insert(output_name);
 }
 
-void ModelBuilder::AddLayer(std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer) {
-  auto* neural_network = coreml_model_->mutable_neuralnetwork();
-  neural_network->mutable_layers()->AddAllocated(layer.release());
-}
-
 void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
   // decrement usage count if this is a known initializer.
   // For simplicity the OpBuilder::AddInitializersToSkip implementations may call this for arbitrary input names
@@ -289,7 +847,7 @@ void ModelBuilder::AddInputToSkip(const std::string& input_name) {
   skipped_inputs_.insert(input_name);
 }
 
-std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
+std::string ModelBuilder::GetUniqueName(std::string_view base_name) {
   std::string unique_name;
   do {
     std::ostringstream os;
@@ -300,5 +858,12 @@ std::string ModelBuilder::GetUniqueName(const std::string& base_name) {
   return unique_name;
 }
 
+std::string ModelBuilder::GetUniqueName(const Node& node, std::string_view suffix) {
+  if (node.Name().empty()) {
+    return GetUniqueName(MakeString("Node_", node.Index(), "_", node.OpType(), suffix));
+  } else {
+    return GetUniqueName(node.Name() + std::string(suffix));
+  }
+}
 }  // namespace coreml
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.h b/onnxruntime/core/providers/coreml/builders/model_builder.h
index af2d5437be8d1..961ba647257b5 100644
--- a/onnxruntime/core/providers/coreml/builders/model_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.h
@@ -3,57 +3,171 @@
 
 #pragma once
 
+#include "core/common/span_utils.h"
 #include "core/graph/graph_viewer.h"
 #include "core/providers/coreml/builders/coreml_spec.h"
+#include "core/providers/coreml/model/model.h"
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+// coremltools classes
+namespace MPL {
+class ModelPackage;
+}
+
+namespace MILBlob {
+namespace Blob {
+class StorageWriter;
+}
+}  // namespace MILBlob
+#endif
 
 namespace onnxruntime {
 namespace coreml {
 
 class IOpBuilder;
 class Model;
-struct OnnxTensorInfo;
 
 class ModelBuilder {
+ private:
+  ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger,
+               int32_t coreml_version, uint32_t coreml_flags);
+
  public:
-  ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger, uint32_t coreml_flags);
-  ~ModelBuilder() = default;
+  // Create the CoreML model, serialize to disk, load and compile using the CoreML API and return in `model`
+  static Status Build(const GraphViewer& graph_viewer, const logging::Logger& logger,
+                      int32_t coreml_version, uint32_t coreml_flags,
+                      std::unique_ptr<Model>& model);
 
-  Status Compile(std::unique_ptr<Model>& model, const std::string& path);
-  Status SaveCoreMLModel(const std::string& path);
+  ~ModelBuilder();
 
-  // Accessors for members
   const GraphViewer& GetGraphViewer() const { return graph_viewer_; }
   const InitializedTensorSet& GetInitializerTensors() const { return graph_viewer_.GetAllInitializedTensors(); }
-
+  const ONNX_NAMESPACE::TensorProto* GetConstantInitializer(const std::string& name) const {
+    return graph_viewer_.GetConstantInitializer(name, true);
+  }
+
+  // Since CoreML 2 the spec version is +1 as CoreML 1.1 was spec version 2.
+  // We only support CoreML 3 and later so the spec version is always version + 1.
+  int32_t CoreMLVersion() const { return coreml_version_; }
+  int32_t CoreMLSpecVersion() const { return coreml_version_ + 1; }
+
+  // Returns true if we are creating an ML Program
+  bool CreateMLProgram() const {
+#if defined(COREML_ENABLE_MLPROGRAM)
+    return create_ml_program_;
+#else
+    return false;
+#endif
+  }
+
+  /*
+   * NeuralNetworkLayer helpers
+   */
+
+  // Create a NeuralNetwork layer using the node name and optional suffix for the name.
+  // If Node has no name a unique name will be generated from the node index and operator.
+  std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> CreateNNLayer(const Node& node, std::string_view suffix = "");
+
+  // Add layer to the Core ML NeuralNetwork model
   void AddLayer(std::unique_ptr<COREML_SPEC::NeuralNetworkLayer> layer);
 
-  // The initializer will be processed separately, skip it as an initializer
+#if defined(COREML_ENABLE_MLPROGRAM)
+  /*
+   * MLProgram helpers
+   */
+
+  // Create Operation, set type and the unique name attribute.
+  std::unique_ptr<COREML_SPEC::MILSpec::Operation> CreateOperation(const Node& node, std::string_view op_type,
+                                                                   std::string_view suffix = "");
+
+  //
+  // Helpers for adding attributes from ONNX nodes as inputs to an ML Program Operation
+  //
+
+  /// <summary>
+  /// Add a value as a 'const' operation, generating a unique name for the value from op_type and value_type.
+  /// Use for values that were not initializers in the original ONNX model. e.g. attributes from ONNX nodes.
+  /// Add existing initializers using AddConstant with the TensorProto.
+  ///
+  /// e.g. adding the bias input of Gemm would have op_type='gemm' and value_type='bias'.
+  /// </summary>
+  /// <typeparam name="T">Value type.</typeparam>
+  /// <param name="op_type">Typically MILSpec::Operation.type().</param>
+  /// <param name="value_type">Typically the input name of the operation that will consume the value.</param>
+  /// <param name="value">Value to add.</param>
+  /// <param name="shape">Optional shape for the value.
+  /// If T is a primitive type `shape` is ignored and the value is treated as a scalar.
+  /// For a container type, if `shape` is not provided the shape is inferred to be 1-D of {value.size()}.
+  /// </param>
+  /// <returns>Unique name generated for value.</returns>
+  template <typename T>
+  std::string AddConstant(std::string_view op_type, std::string_view value_type, gsl::span<const T> value,
+                          std::optional<gsl::span<const int64_t>> shape = std::nullopt) {
+    static_assert(std::is_same_v<T, float> ||
+                      std::is_same_v<T, int64_t> ||
+                      std::is_same_v<T, std::string> ||
+                      std::is_same_v<T, bool>,
+                  // add specialization in AddConstantImpl for new types if needed
+                  "AddConstant currently supports float, int64_t, std::string and bool.");
+    return AddConstantImpl(op_type, value_type, value, shape);
+  }
+
+  template <typename T>
+  std::string AddConstant(std::string_view op_type, std::string_view value_type, const std::vector<T>& value,
+                          std::optional<gsl::span<const int64_t>> shape = std::nullopt) {
+    return AddConstant(op_type, value_type, AsSpan(value), shape);
+  }
+
+  /// <summary>
+  /// Add a scalar value as a 'const' operation. See AddConstant for details.
+  /// </summary>
+  template <typename T>
+  std::string AddScalarConstant(std::string_view op_type, std::string_view value_type, const T& value) {
+    return AddConstant(op_type, value_type, AsSpan({value}), AsSpan<const int64_t>({}));
+  }
+
+  /// <summary>
+  /// Add an existing a constant ONNX initializer to the ML Program as a 'const' operation
+  /// </summary>
+  /// <param name="name">Initializer name</param>
+  /// <param name="initializer">Initializer data</param>
+  void AddConstant(std::string_view name, const ONNX_NAMESPACE::TensorProto& initializer);
+
+  // add the operation to the main function
+  void AddOperation(std::unique_ptr<COREML_SPEC::MILSpec::Operation> operation);
+#endif
+
+  /*
+   * General helpers
+   */
+
+  // The initializer is processed separately (e.g. layout is transformed) by the operator builder,
+  // so we don't do a copy of the original initializer into the model.
   void AddInitializerToSkip(const std::string& tensor_name);
 
   // There are some input which will not be used, add it to a list which will not
   // be added to CoreML model, since CoreML does not like input unused
   void AddInputToSkip(const std::string& input_name);
 
-  std::string GetUniqueName(const std::string& base_name);
+  std::string GetUniqueName(std::string_view base_name);
+  std::string GetUniqueName(const Node& node, std::string_view suffix);
 
  private:
-  const GraphViewer& graph_viewer_;
-  const logging::Logger& logger_;
-  uint32_t coreml_flags_;
-
-  std::unique_ptr<CoreML::Specification::Model> coreml_model_;
-  std::unordered_set<std::string> scalar_outputs_;
-  std::unordered_set<std::string> int64_outputs_;
-  std::unordered_map<std::string, OnnxTensorInfo> input_output_info_;
-
-  std::unordered_map<std::string, int> initializer_usage_;
-  std::unordered_set<std::string> skipped_inputs_;
-
-  uint32_t name_token_{0};
-  std::unordered_set<std::string> unique_names_;
-
-  // Convert the onnx model to CoreML::Specification::Model
-  Status Initialize();
+#if defined(COREML_ENABLE_MLPROGRAM)
+  template <typename T>
+  std::string AddConstantImpl(std::string_view op_type, std::string_view value_type, gsl::span<const T> value,
+                              std::optional<gsl::span<const int64_t>> shape = std::nullopt);
+
+  void AddConstantOperation(std::string_view name, COREML_SPEC::MILSpec::Value&& initializer);
+  std::string AddTensorValueAsConstantOperation(std::string_view op_type, std::string_view value_type,
+                                                COREML_SPEC::MILSpec::Value&& input_value);
+#endif
+
+  // Convert the ONNX model in graph_viewer_ to a CoreML::Specification::Model and serialize to disk.
+  // We then load it using CoreML in order compile it.
+  Status CreateModel();
+  Status SaveModel();
+  Status LoadModel(std::unique_ptr<Model>& model);
 
   // If a CoreML operation will use initializers directly, we will add the initializers to the skip list
   void PreprocessInitializers();
@@ -61,7 +175,7 @@ class ModelBuilder {
   // Copy and process all the initializers to CoreML model
   Status RegisterInitializers();
 
-  Status AddOperations();
+  Status ProcessNodes();
   Status RegisterModelInputs();
   Status RegisterModelOutputs();
   Status RegisterModelInputOutput(const NodeArg& node_arg, bool is_input);
@@ -72,7 +186,32 @@ class ModelBuilder {
   // Record the onnx int64 type output names
   void AddInt64Output(const std::string& output_name);
 
-  static const IOpBuilder* GetOpBuilder(const Node& node);
+  const GraphViewer& graph_viewer_;
+  const logging::Logger& logger_;
+  const int32_t coreml_version_;
+  const uint32_t coreml_flags_;
+  const bool create_ml_program_;         // ML Program (CoreML5, iOS 15+, macOS 12+) or NeuralNetwork (old)
+  const std::string model_output_path_;  // create_ml_program_ ? dir for mlpackage : filename for mlmodel
+
+  std::unique_ptr<CoreML::Specification::Model> coreml_model_;
+  std::unordered_set<std::string> scalar_outputs_;
+  std::unordered_set<std::string> int64_outputs_;
+  std::unordered_map<std::string, OnnxTensorInfo> input_output_info_;
+
+  std::unordered_map<std::string, int> initializer_usage_;
+  std::unordered_set<std::string> skipped_inputs_;
+
+  uint32_t name_token_{0};
+  std::unordered_set<std::string> unique_names_;
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  // mlprogram_main_ is the main block of the CoreML ML Program.
+  // It is set in CreateModel to the CoreML Model.mlprogram.functions['main'].block_specializations['CoreML<ver>']
+  // entry we create.
+  COREML_SPEC::MILSpec::Block* mlprogram_main_{nullptr};
+  std::unique_ptr<MPL::ModelPackage> mlpackage_;
+  std::unique_ptr<MILBlob::Blob::StorageWriter> weights_file_writer_;
+#endif
 };
 
 }  // namespace coreml
diff --git a/onnxruntime/core/providers/coreml/builders/op_builder.h b/onnxruntime/core/providers/coreml/builders/op_builder.h
index 79de6438c9700..0bb7f280c33e6 100644
--- a/onnxruntime/core/providers/coreml/builders/op_builder.h
+++ b/onnxruntime/core/providers/coreml/builders/op_builder.h
@@ -11,36 +11,39 @@ namespace coreml {
 class ModelBuilder;
 
 struct OpBuilderInputParams {
-  OpBuilderInputParams(const GraphViewer& graph_viewer, bool only_allow_static_input_shapes)
+  OpBuilderInputParams(const GraphViewer& graph_viewer,
+                       int32_t coreml_version,
+                       bool only_allow_static_input_shapes,
+                       bool create_mlprogram)
       : graph_viewer(graph_viewer),
-        only_allow_static_input_shapes(only_allow_static_input_shapes) {}
+        coreml_version(coreml_version),
+        only_allow_static_input_shapes(only_allow_static_input_shapes),
+        create_mlprogram(create_mlprogram) {}
 
   const GraphViewer& graph_viewer;
+  const int32_t coreml_version;  // required to determine which version of an operation can be used.
   const bool only_allow_static_input_shapes;
+  const bool create_mlprogram;  // whether to create ML Program (Core ML 5+) or NeuralNetwork (Core ML 3+)
 };
 
 class IOpBuilder {
  public:
   virtual ~IOpBuilder() = default;
 
-  // Add operator related
-#ifdef __APPLE__
- public:
   // Check if the initializers of this operator need preprocess
   // which will not be copied
   virtual void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const = 0;
 
   // Add the operator to CoreML model
   virtual Status AddToModelBuilder(ModelBuilder& model_builder, const Node& node,
-                                   const OpBuilderInputParams& input_params,
                                    const logging::Logger& logger) const = 0;
-#endif
 
-  // Operator support related
- public:
   // Check if an operator is supported
   virtual bool IsOpSupported(const Node& node, const OpBuilderInputParams& input_params,
                              const logging::Logger& logger) const = 0;
+
+  // Does the builder implementation support creating an ML Program?
+  virtual bool SupportsMLProgram() const = 0;
 };
 
 }  // namespace coreml
diff --git a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
index d72420bcfff88..6469b4cefa5ea 100644
--- a/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
+++ b/onnxruntime/core/providers/coreml/builders/op_builder_factory.h
@@ -3,7 +3,7 @@
 
 #pragma once
 
-#include "op_builder.h"
+#include "core/providers/coreml/builders/op_builder.h"
 
 namespace onnxruntime {
 namespace coreml {
diff --git a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc
index c133f7b82aba4..8e718da07703c 100644
--- a/onnxruntime/core/providers/coreml/coreml_execution_provider.cc
+++ b/onnxruntime/core/providers/coreml/coreml_execution_provider.cc
@@ -2,9 +2,11 @@
 // Licensed under the MIT License.
 
 #include "core/providers/coreml/coreml_execution_provider.h"
+#include "core/providers/coreml/coreml_provider_factory.h"  // defines flags
 
 #include <algorithm>
 
+#include "core/common/logging/logging.h"
 #include "core/framework/compute_capability.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/graph/graph_viewer.h"
@@ -12,12 +14,10 @@
 #include "core/providers/partitioning_utils.h"
 #include "core/session/onnxruntime_cxx_api.h"
 
-#ifdef __APPLE__
 #include "core/providers/coreml/builders/model_builder.h"
 #include "core/providers/coreml/model/host_utils.h"
 #include "core/providers/coreml/model/model.h"
 #include "core/providers/coreml/shape_utils.h"
-#endif
 
 namespace onnxruntime {
 
@@ -25,7 +25,24 @@ constexpr const char* COREML = "CoreML";
 
 CoreMLExecutionProvider::CoreMLExecutionProvider(uint32_t coreml_flags)
     : IExecutionProvider{onnxruntime::kCoreMLExecutionProvider},
-      coreml_flags_(coreml_flags) {
+      coreml_flags_(coreml_flags),
+      coreml_version_(coreml::util::CoreMLVersion()) {
+  if (coreml_version_ < MINIMUM_COREML_VERSION) {
+    LOGS_DEFAULT(ERROR) << "CoreML EP is not supported on this platform.";
+  }
+
+#if defined(COREML_ENABLE_MLPROGRAM)
+  if (coreml_version_ < MINIMUM_COREML_MLPROGRAM_VERSION &&
+      (coreml_flags_ & COREML_FLAG_CREATE_MLPROGRAM) != 0) {
+    LOGS_DEFAULT(WARNING) << "ML Program is not supported on this OS version. Falling back to NeuralNetwork.";
+    coreml_flags_ ^= COREML_FLAG_CREATE_MLPROGRAM;
+  }
+#else
+  if ((coreml_flags_ & COREML_FLAG_CREATE_MLPROGRAM) != 0) {
+    LOGS_DEFAULT(WARNING) << "ML Program is not supported in this build. Falling back to NeuralNetwork.";
+    coreml_flags_ ^= COREML_FLAG_CREATE_MLPROGRAM;
+  }
+#endif
 }
 
 CoreMLExecutionProvider::~CoreMLExecutionProvider() {}
@@ -35,28 +52,34 @@ CoreMLExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
                                        const IKernelLookup& /*kernel_lookup*/) const {
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
-  // We do not run CoreML EP on subgraph, instead we cover this in the control flow nodes
-  // TODO investigate whether we want to support subgraph using CoreML EP
-  if (graph_viewer.IsSubgraph() && !(coreml_flags_ & COREML_FLAG_ENABLE_ON_SUBGRAPH)) {
+  if (coreml_version_ < MINIMUM_COREML_VERSION) {
     return result;
   }
 
   const auto& logger = *GetLogger();
 
+  // We do not run CoreML EP on subgraph, instead we cover this in the control flow nodes
+  // TODO investigate whether we want to support subgraph using CoreML EP. May simply require processing the
+  // implicit inputs of the control flow node that contains the subgraph as inputs to the CoreML model we generate.
+  if (graph_viewer.IsSubgraph() && !(coreml_flags_ & COREML_FLAG_ENABLE_ON_SUBGRAPH)) {
+    return result;
+  }
+
   const bool has_neural_engine = coreml::HasNeuralEngine(logger);
   if ((coreml_flags_ & COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE) && !has_neural_engine) {
-    LOGS(logger, VERBOSE) << "The current system does not have Apple Neural Engine";
+    LOGS(logger, WARNING) << "The current system does not have Apple Neural Engine. CoreML EP will not be used.";
     return result;
   }
 
-  const auto builder_params = coreml::MakeOpBuilderParams(graph_viewer, coreml_flags_);
+  const auto builder_params = coreml::MakeOpBuilderParams(graph_viewer, coreml_version_, coreml_flags_);
   const auto supported_nodes = coreml::GetSupportedNodes(graph_viewer, builder_params, logger);
 
-  const auto gen_metadef_name = [&]() {
-    HashValue model_hash;
-    int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash);
-    return MakeString(COREML, "_", model_hash, "_", metadef_id);
-  };
+  const auto gen_metadef_name =
+      [&]() {
+        HashValue model_hash;
+        int metadef_id = metadef_id_generator_.GenerateId(graph_viewer, model_hash);
+        return MakeString(COREML, "_", model_hash, "_", metadef_id);
+      };
 
   result = utils::CreateSupportedPartitions(graph_viewer, supported_nodes, {},
                                             gen_metadef_name, COREML, kCoreMLExecutionProvider);
@@ -86,17 +109,16 @@ CoreMLExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_vie
   return result;
 }
 
-#ifdef __APPLE__
+#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 common::Status CoreMLExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
                                                 std::vector<NodeComputeInfo>& node_compute_funcs) {
   for (const auto& fused_node_and_graph : fused_nodes_and_graphs) {
     Node& fused_node = fused_node_and_graph.fused_node;
     const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph);
 
-    coreml::ModelBuilder builder(graph_viewer, *GetLogger(), coreml_flags_);
     std::unique_ptr<coreml::Model> coreml_model;
-    const std::string coreml_model_file_path = coreml::util::GetTemporaryFilePath();
-    ORT_RETURN_IF_ERROR(builder.Compile(coreml_model, coreml_model_file_path));
+    ORT_RETURN_IF_ERROR(coreml::ModelBuilder::Build(graph_viewer, *GetLogger(), coreml_version_, coreml_flags_,
+                                                    coreml_model));
 
     {
       const auto& input_defs = fused_node.InputDefs();
@@ -241,22 +263,6 @@ common::Status CoreMLExecutionProvider::Compile(const std::vector<FusedNodeAndGr
 
   return Status::OK();
 }
-#else
-common::Status CoreMLExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>& fused_nodes_and_graphs,
-                                                std::vector<NodeComputeInfo>& node_compute_funcs) {
-  for (const auto& fused_node_and_graph : fused_nodes_and_graphs) {
-    ORT_UNUSED_PARAMETER(fused_node_and_graph);
-    NodeComputeInfo compute_info;
-    compute_info.create_state_func = [](ComputeContext* /*context*/, FunctionState* /*state*/) { return 0; };
-    compute_info.release_state_func = [](FunctionState /*state*/) {};
-    compute_info.compute_func = [](FunctionState /* state */, const OrtApi* /* api */,
-                                   OrtKernelContext* /* context */) {
-      return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED, "Compute is not supported in this build.");
-    };
-    node_compute_funcs.push_back(compute_info);
-  }
-  return Status::OK();
-}
-#endif  //__APPLE__
+#endif  // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/coreml_execution_provider.h b/onnxruntime/core/providers/coreml/coreml_execution_provider.h
index 0201739547dd1..24a001280eef5 100644
--- a/onnxruntime/core/providers/coreml/coreml_execution_provider.h
+++ b/onnxruntime/core/providers/coreml/coreml_execution_provider.h
@@ -3,9 +3,9 @@
 
 #pragma once
 
+#include "core/common/inlined_containers.h"
 #include "core/framework/execution_provider.h"
 #include "core/framework/model_metadef_id_generator.h"
-#include "core/providers/coreml/coreml_provider_factory.h"
 
 namespace onnxruntime {
 namespace coreml {
@@ -26,15 +26,14 @@ class CoreMLExecutionProvider : public IExecutionProvider {
                          std::vector<NodeComputeInfo>& node_compute_funcs) override;
 #endif
 
+ private:
   // The bit flags which define bool options for COREML EP, bits are defined as
   // COREMLFlags in include/onnxruntime/core/providers/coreml/coreml_provider_factory.h
-  const uint32_t coreml_flags_;
-
- private:
-// <fused_node_name, <coreml_model_file_path, compiled_coreml_model>>
-#ifdef __APPLE__
-  std::unordered_map<std::string, std::unique_ptr<onnxruntime::coreml::Model>> coreml_models_;
-#endif
+  uint32_t coreml_flags_;
+  const int32_t coreml_version_;
   ModelMetadefIdGenerator metadef_id_generator_;
+
+  // map of fused_node_name to compiled_coreml_model
+  InlinedHashMap<std::string, std::unique_ptr<onnxruntime::coreml::Model>> coreml_models_;
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/model/host_utils.h b/onnxruntime/core/providers/coreml/model/host_utils.h
index f7f45bce087bc..4f9a014c4d885 100644
--- a/onnxruntime/core/providers/coreml/model/host_utils.h
+++ b/onnxruntime/core/providers/coreml/model/host_utils.h
@@ -8,10 +8,50 @@
 
 #include <string>
 
-#define API_AVAILABLE_OS_VERSIONS API_AVAILABLE(macos(10.15), ios(13))
+#if defined(__APPLE__)
+// See https://apple.github.io/coremltools/mlmodel/Format/Model.html for the info on each CoreML specification version.
+// See https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html for the list of ops
+// in each CoreML specification version.
 
-// Base requireed OS to run CoreML Specification Version 4 (Core ML 3)
-#define HAS_VALID_BASE_OS_VERSION @available(macOS 10.15, iOS 13, *)
+// Specification Versions : OS Availability(Core ML Version)
+//
+// 4 : iOS 13, macOS 10.15, tvOS 13, watchOS 6 (Core ML 3)
+//     - initial version of CoreML EP
+// 5 : iOS 14, macOS 11, tvOS 14, watchOS 7 (Core ML 4)
+//     - additional layers in NeuralNetwork but currently none are implemented by the CoreML EP
+// 6 : iOS 15, macOS 12, tvOS 15, watchOS 8 (Core ML 5)
+//     - adds MLProgram (MILSpec.Program)
+//     - iOS 15 ops
+// 7 : iOS 16, macOS 13, tvOS 16, watchOS 9 (Core ML 6)
+//     - iOS 16 ops
+// 8 : iOS 17, macOS 14, tvOS 17, watchOS 10 (Core ML 7)
+//     - iOS 17 ops
+//
+// **NOTE** We use the Core ML version not the spec version.
+//
+// e.g. iOS 13 has Core ML 3 (which is Core ML Specification version 4), and the related macros are
+// API_AVAILABLE_COREML3, HAS_COREML3_OR_LATER and onnxruntime::coreml::util::CoreMLVersion() will return 3.
+
+// https://developer.apple.com/documentation/swift/marking-api-availability-in-objective-c
+// API_AVAILABLE is used to decorate Objective-C APIs
+#define API_AVAILABLE_COREML3 API_AVAILABLE(macos(10.15), ios(13))
+#define API_AVAILABLE_COREML4 API_AVAILABLE(macos(11), ios(14))
+#define API_AVAILABLE_COREML5 API_AVAILABLE(macos(12), ios(15))
+#define API_AVAILABLE_COREML6 API_AVAILABLE(macos(13), ios(16))
+#define API_AVAILABLE_COREML7 API_AVAILABLE(macos(14), ios(17))
+
+// @available is used in implementation code
+// Base required OS to run CoreML Specification Version 4 (Core ML 3)
+#define HAS_COREML3_OR_LATER @available(macOS 10.15, iOS 13, *)
+#define HAS_COREML4_OR_LATER @available(macOS 11, iOS 14, *)
+#define HAS_COREML5_OR_LATER @available(macOS 12, iOS 15, *)
+#define HAS_COREML6_OR_LATER @available(macOS 13, iOS 16, *)
+#define HAS_COREML7_OR_LATER @available(macOS 14, iOS 17, *)
+
+#endif
+
+#define MINIMUM_COREML_VERSION 3            // first version we support
+#define MINIMUM_COREML_MLPROGRAM_VERSION 5  // first version where ML Program was available
 
 namespace onnxruntime {
 namespace coreml {
@@ -21,6 +61,9 @@ namespace util {
 // This corresponds to [CoreML Specification Version 4 (Core ML 3)]
 bool HasRequiredBaseOS();
 
+// Return the CoreML version if 3 or higher. Otherwise returns -1.
+int CoreMLVersion();
+
 // Get a temporary macOS/iOS temp file path
 std::string GetTemporaryFilePath();
 
diff --git a/onnxruntime/core/providers/coreml/model/host_utils.mm b/onnxruntime/core/providers/coreml/model/host_utils.mm
index 4c394386cd37a..0ae0cf8f0d207 100644
--- a/onnxruntime/core/providers/coreml/model/host_utils.mm
+++ b/onnxruntime/core/providers/coreml/model/host_utils.mm
@@ -10,19 +10,33 @@
 namespace util {
 
 bool HasRequiredBaseOS() {
-  // This may look strange, but it is required "@available(macOS ....)" to safe-guard some code
-  // otherwise the compiler will spit -Wunsupported-availability-guard
-  if (HAS_VALID_BASE_OS_VERSION)
-    return true;
-  else
-    return false;
+  return CoreMLVersion() >= 3;
+}
+
+int32_t CoreMLVersion() {
+  if (HAS_COREML7_OR_LATER)
+    return 7;
+  if (HAS_COREML6_OR_LATER)
+    return 6;
+  if (HAS_COREML5_OR_LATER)
+    return 5;
+  if (HAS_COREML4_OR_LATER)
+    return 4;
+  if (HAS_COREML3_OR_LATER)
+    return 3;
+
+  return -1;
 }
 
 std::string GetTemporaryFilePath() {
-  // Get temporary directory.
+  // Get temporary directory for user.
   NSURL* temporary_directory_url = [NSURL fileURLWithPath:NSTemporaryDirectory() isDirectory:YES];
   // Generate a Unique file name to use.
   NSString* temporary_filename = [[NSProcessInfo processInfo] globallyUniqueString];
+
+  // make it easy to see who generated it
+  temporary_filename = [@"onnxruntime-" stringByAppendingString:temporary_filename];
+
   // Create URL to that file.
   NSURL* temporary_file_url = [temporary_directory_url URLByAppendingPathComponent:temporary_filename];
 
diff --git a/onnxruntime/core/providers/coreml/model/host_utils_stub.cc b/onnxruntime/core/providers/coreml/model/host_utils_stub.cc
new file mode 100644
index 0000000000000..5c383b0274e8c
--- /dev/null
+++ b/onnxruntime/core/providers/coreml/model/host_utils_stub.cc
@@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include <atomic>
+
+#include "core/platform/env.h"
+#include "core/providers/coreml/model/host_utils.h"
+
+namespace onnxruntime {
+namespace coreml {
+namespace util {
+
+bool HasRequiredBaseOS() {
+  return true;
+}
+
+int CoreMLVersion() {
+  return 7;  // CoreML 7 is the latest we support.
+}
+
+std::string GetTemporaryFilePath() {
+  static std::atomic<int> counter = 0;
+
+  // we want to avoid creating endless directories/names whilst avoiding clashes if tests run in parallel so cycle
+  // through 20 potential output names.
+  auto dir_name = "coreml_ep_test_run." + std::to_string(counter++ % 20);
+
+  // to replicate the iOS/macOS host_utils.mm behavior where the output is <user temporary directory>/<unique_name>
+  // we want to return the name of something that does not exist. this is required for ML Package creation.
+  auto& env = Env::Default();
+  if (env.FolderExists(dir_name)) {
+    ORT_THROW_IF_ERROR(env.DeleteFolder(ToPathString(dir_name)));
+  }
+
+  return dir_name;
+}
+
+}  // namespace util
+}  // namespace coreml
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/model/model.h b/onnxruntime/core/providers/coreml/model/model.h
index 105b6a0333b15..b940c4b768aec 100644
--- a/onnxruntime/core/providers/coreml/model/model.h
+++ b/onnxruntime/core/providers/coreml/model/model.h
@@ -33,19 +33,29 @@ using GetOutputTensorMutableRawDataFn = std::function<void*(const std::string& n
                                                             gsl::span<const int64_t> static_shape)>;
 
 class Model {
-  friend class ModelBuilder;
-
  public:
+  Model(const std::string& path,
+        std::unordered_map<std::string, OnnxTensorInfo>&& input_output_info,
+        std::unordered_set<std::string>&& scalar_outputs,
+        std::unordered_set<std::string>&& int64_outputs,
+        const logging::Logger& logger, uint32_t coreml_flags);
+
   ~Model();
   ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Model);
 
+  Status LoadModel();
+
   Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
                  const std::unordered_map<std::string, OnnxTensorInfo>& outputs,
                  const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn);
 
-  bool IsScalarOutput(const std::string& output_name) const;
+  bool IsScalarOutput(const std::string& output_name) const {
+    return Contains(scalar_outputs_, output_name);
+  }
 
-  bool IsInt64Output(const std::string& output_name) const;
+  bool IsInt64Output(const std::string& output_name) const {
+    return Contains(int64_outputs_, output_name);
+  }
 
   // Mutex for exclusive lock to this model object
   OrtMutex& GetMutex() { return mutex_; }
@@ -57,35 +67,27 @@ class Model {
   const std::vector<std::string>& GetOnnxOutputs() const { return onnx_outputs_; }
   void SetOnnxOutputs(std::vector<std::string>&& outputs) { onnx_outputs_ = std::move(outputs); }
 
-  const OnnxTensorInfo* TryGetInputOutputInfo(const std::string& name) const;
-  const OnnxTensorInfo& GetInputOutputInfo(const std::string& name) const;
+  const OnnxTensorInfo* TryGetInputOutputInfo(const std::string& name) const {
+    const auto info_it = input_output_info_.find(name);
+    return info_it != input_output_info_.end() ? &info_it->second : nullptr;
+  }
+
+  const OnnxTensorInfo& GetInputOutputInfo(const std::string& name) const {
+    const auto* info = TryGetInputOutputInfo(name);
+    ORT_ENFORCE(info != nullptr, "Failed to get info for input/output: ", name);
+    return *info;
+  }
 
  private:
   std::unique_ptr<Execution> execution_;
+  std::unordered_map<std::string, OnnxTensorInfo> input_output_info_;
   std::unordered_set<std::string> scalar_outputs_;
   std::unordered_set<std::string> int64_outputs_;
 
   std::vector<std::string> onnx_inputs_;
   std::vector<std::string> onnx_outputs_;
 
-  std::unordered_map<std::string, OnnxTensorInfo> input_output_info_;
-
   OrtMutex mutex_;
-
-  Model(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags);
-  Status LoadModel();
-
-  void SetInputOutputInfo(std::unordered_map<std::string, OnnxTensorInfo>&& input_output_info) {
-    input_output_info_ = std::move(input_output_info);
-  }
-
-  void SetScalarOutputs(std::unordered_set<std::string>&& scalar_outputs) {
-    scalar_outputs_ = std::move(scalar_outputs);
-  }
-
-  void SetInt64Outputs(std::unordered_set<std::string>&& int64_outputs) {
-    int64_outputs_ = std::move(int64_outputs);
-  }
 };
 
 }  // namespace coreml
diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm
index 155201ad4c39c..d5cd70bff9479 100644
--- a/onnxruntime/core/providers/coreml/model/model.mm
+++ b/onnxruntime/core/providers/coreml/model/model.mm
@@ -252,14 +252,14 @@ - (instancetype)initWithPath:(const std::string&)path
                 coreml_flags:(uint32_t)coreml_flags;
 - (void)cleanup;
 - (void)dealloc;
-- (Status)loadModel API_AVAILABLE_OS_VERSIONS;
+- (Status)loadModel API_AVAILABLE_COREML3;
 - (Status)predict:(const std::unordered_map<std::string, OnnxTensorData>&)inputs
                   outputs:(const std::unordered_map<std::string, OnnxTensorInfo>&)outputs
     getOutputTensorDataFn:(const GetOutputTensorMutableRawDataFn&)
                               get_output_tensor_mutable_raw_data_fn
-    API_AVAILABLE_OS_VERSIONS;
+    API_AVAILABLE_COREML3;
 
-@property(nullable) MLModel* model API_AVAILABLE_OS_VERSIONS;
+@property(nullable) MLModel* model API_AVAILABLE_COREML3;
 
 @end
 
@@ -308,6 +308,10 @@ - (Status)loadModel {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create model URL from path");
   }
 
+  // TODO: Update this to version with callback handler as the API used here is deprecated.
+  // https://developer.apple.com/documentation/coreml/mlmodel/3929553-compilemodelaturl
+  // As we call loadModel during EP Compile there shouldn't be an issue letting the actual compile run in the
+  // background. We will have to check for completion in `predict` and block until it is done.
   NSError* error = nil;
   NSURL* compileUrl = [MLModel compileModelAtURL:modelUrl error:&error];
 
@@ -454,7 +458,7 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
     return Status::OK();
   }
 
-  if (HAS_VALID_BASE_OS_VERSION) {
+  if (HAS_COREML3_OR_LATER) {
     Status status{};
     @autoreleasepool {
       status = [execution_ loadModel];
@@ -471,7 +475,7 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
                           const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn) {
   ORT_RETURN_IF_NOT(model_loaded, "Execution::Predict requires Execution::LoadModel");
 
-  if (HAS_VALID_BASE_OS_VERSION) {
+  if (HAS_COREML3_OR_LATER) {
     @autoreleasepool {
       return [execution_ predict:inputs
                          outputs:outputs
@@ -482,8 +486,16 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
   return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Execution::Predict requires macos 10.15+ or ios 13+");
 }
 
-Model::Model(const std::string& path, const logging::Logger& logger, uint32_t coreml_flags)
-    : execution_(std::make_unique<Execution>(path, logger, coreml_flags)) {
+Model::Model(const std::string& path,
+             std::unordered_map<std::string, OnnxTensorInfo>&& input_output_info,
+             std::unordered_set<std::string>&& scalar_outputs,
+             std::unordered_set<std::string>&& int64_outputs,
+             const logging::Logger& logger,
+             uint32_t coreml_flags)
+    : execution_(std::make_unique<Execution>(path, logger, coreml_flags)),
+      input_output_info_(std::move(input_output_info)),
+      scalar_outputs_(std::move(scalar_outputs)),
+      int64_outputs_(std::move(int64_outputs)) {
 }
 
 Model::~Model() {}
@@ -497,25 +509,5 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
                       const GetOutputTensorMutableRawDataFn& get_output_tensor_mutable_raw_data_fn) {
   return execution_->Predict(inputs, outputs, get_output_tensor_mutable_raw_data_fn);
 }
-
-bool Model::IsScalarOutput(const std::string& output_name) const {
-  return Contains(scalar_outputs_, output_name);
-}
-
-bool Model::IsInt64Output(const std::string& output_name) const {
-  return Contains(int64_outputs_, output_name);
-}
-
-const OnnxTensorInfo* Model::TryGetInputOutputInfo(const std::string& name) const {
-  const auto info_it = input_output_info_.find(name);
-  return info_it != input_output_info_.end() ? &info_it->second : nullptr;
-}
-
-const OnnxTensorInfo& Model::GetInputOutputInfo(const std::string& name) const {
-  const auto* info = TryGetInputOutputInfo(name);
-  ORT_ENFORCE(info != nullptr, "Failed to get info for input/output: ", name);
-  return *info;
-}
-
 }  // namespace coreml
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/coreml/model/model_stub.cc b/onnxruntime/core/providers/coreml/model/model_stub.cc
new file mode 100644
index 0000000000000..087c9f8c05d5f
--- /dev/null
+++ b/onnxruntime/core/providers/coreml/model/model_stub.cc
@@ -0,0 +1,38 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/coreml/model/model.h"
+
+namespace onnxruntime {
+namespace coreml {
+
+class Execution {};
+
+Model::Model(const std::string& /*path*/,
+             std::unordered_map<std::string, OnnxTensorInfo>&& input_output_info,
+             std::unordered_set<std::string>&& scalar_outputs,
+             std::unordered_set<std::string>&& int64_outputs,
+             const logging::Logger& /*logger*/,
+             uint32_t /*coreml_flags*/)
+    : execution_(std::make_unique<Execution>()),
+      input_output_info_(std::move(input_output_info)),
+      scalar_outputs_(std::move(scalar_outputs)),
+      int64_outputs_(std::move(int64_outputs)) {
+}
+
+Model::~Model() {
+}
+
+Status Model::LoadModel() {
+  // return OK so we hit more CoreML EP code.
+  return Status::OK();
+}
+
+Status Model::Predict(const std::unordered_map<std::string, OnnxTensorData>& /*inputs*/,
+                      const std::unordered_map<std::string, OnnxTensorInfo>& /*outputs*/,
+                      const GetOutputTensorMutableRawDataFn& /*get_output_tensor_mutable_raw_data_fn*/) {
+  return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Executing a CoreML model is not supported on this platform.");
+}
+
+}  // namespace coreml
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc
index b2225643b788e..edee298ad1ccf 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/impl/split_op_builder.cc
@@ -67,7 +67,7 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
 
   int32_t num_outputs;
   if (node_unit.SinceVersion() >= 18) {
-    num_outputs = SafeInt<int32_t>(*helper.GetInt("num_outputs"));
+    num_outputs = SafeInt<int32_t>(*helper.GetInt64("num_outputs"));
   } else {
     num_outputs = SafeInt<int32_t>(node_unit.Outputs().size());
   }
@@ -127,7 +127,7 @@ bool SplitOpBuilder::IsOpSupportedImpl(const GraphViewer& graph_viewer, const No
   } else {
     uint32_t num_outputs;
     if (node_unit.SinceVersion() >= 18) {
-      auto num_outputs_attr = helper.GetInt("num_outputs");
+      auto num_outputs_attr = helper.GetInt64("num_outputs");
       if (!num_outputs_attr.has_value()) {
         LOGS_DEFAULT(VERBOSE) << "No 'num_outputs' provided. For split 18+, num_outputs is a required attribute.";
         return false;
diff --git a/onnxruntime/core/providers/shared/utils/utils.cc b/onnxruntime/core/providers/shared/utils/utils.cc
index 37ad14ac2e9b1..c07a0929353b1 100644
--- a/onnxruntime/core/providers/shared/utils/utils.cc
+++ b/onnxruntime/core/providers/shared/utils/utils.cc
@@ -118,84 +118,134 @@ NodeAttrHelper::NodeAttrHelper(const NodeUnit& node_unit)
     : node_attributes_(node_unit.GetNode().GetAttributes()) {}
 
 float NodeAttrHelper::Get(const std::string& key, float def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    return entry->second.f();
+  }
 
-  return node_attributes_.at(key).f();
+  return def_val;
 }
 
 int32_t NodeAttrHelper::Get(const std::string& key, int32_t def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    return narrow<int32_t>(entry->second.i());
+  }
 
-  return SafeInt<int32_t>(node_attributes_.at(key).i());
+  return def_val;
 }
 
 uint32_t NodeAttrHelper::Get(const std::string& key, uint32_t def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    return narrow<uint32_t>(entry->second.i());
+  }
 
-  return SafeInt<uint32_t>(node_attributes_.at(key).i());
+  return def_val;
 }
 
 int64_t NodeAttrHelper::Get(const std::string& key, int64_t def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    return entry->second.i();
+  }
 
-  return node_attributes_.at(key).i();
+  return def_val;
 }
 
 const std::string& NodeAttrHelper::Get(const std::string& key, const std::string& def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    return entry->second.s();
+  }
 
-  return node_attributes_.at(key).s();
+  return def_val;
 }
 
 std::vector<int32_t> NodeAttrHelper::Get(const std::string& key, const std::vector<int32_t>& def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    const auto& attr = entry->second;
+    std::vector<int32_t> v;
+    v.reserve(static_cast<size_t>(attr.ints_size()));
+    std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v),
+                   [](int64_t val) -> int32_t { return narrow<int32_t>(val); });
+    return v;
+  }
 
-  const auto& attr(node_attributes_.at(key));
-  std::vector<int32_t> v;
-  v.reserve(static_cast<size_t>(attr.ints_size()));
-  std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v),
-                 [](int64_t val) -> int32_t { return SafeInt<int32_t>(val); });
-  return v;
+  return def_val;
 }
 
 std::vector<uint32_t> NodeAttrHelper::Get(const std::string& key, const std::vector<uint32_t>& def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    const auto& attr = entry->second;
+    std::vector<uint32_t> v;
+    v.reserve(static_cast<size_t>(attr.ints_size()));
+    std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v),
+                   [](int64_t val) -> uint32_t { return narrow<uint32_t>(val); });
+    return v;
+  }
 
-  const auto& attr(node_attributes_.at(key));
-  std::vector<uint32_t> v;
-  v.reserve(static_cast<size_t>(attr.ints_size()));
-  std::transform(attr.ints().cbegin(), attr.ints().cend(), std::back_inserter(v),
-                 [](int64_t val) -> uint32_t { return SafeInt<uint32_t>(val); });
-  return v;
+  return def_val;
 }
 
 std::vector<int64_t> NodeAttrHelper::Get(const std::string& key, const std::vector<int64_t>& def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    const auto& values = entry->second.ints();
+    return std::vector<int64_t>{values.cbegin(), values.cend()};
+  }
 
-  const auto& source(node_attributes_.at(key).ints());
-  return std::vector<int64_t>{source.cbegin(), source.cend()};
+  return def_val;
 }
 
 std::vector<float> NodeAttrHelper::Get(const std::string& key, const std::vector<float>& def_val) const {
-  if (!HasAttr(key))
-    return def_val;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    const auto& values = entry->second.floats();
+    return std::vector<float>{values.cbegin(), values.cend()};
+  }
 
-  const auto& source(node_attributes_.at(key).floats());
-  return std::vector<float>{source.cbegin(), source.cend()};
+  return def_val;
 }
 
-std::optional<int64_t> NodeAttrHelper::GetInt(const std::string& key) const {
-  if (!HasAttr(key))
-    return std::nullopt;
-  return node_attributes_.at(key).i();
+std::optional<float> NodeAttrHelper::GetFloat(const std::string& key) const {
+  std::optional<float> result;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    result = entry->second.f();
+  }
+
+  return result;
+}
+
+std::optional<int64_t> NodeAttrHelper::GetInt64(const std::string& key) const {
+  std::optional<int64_t> result;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    result = entry->second.i();
+  }
+
+  return result;
+}
+
+std::optional<std::vector<float>> NodeAttrHelper::GetFloats(const std::string& key) const {
+  std::optional<std::vector<float>> result;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    const auto& values = entry->second.floats();
+    result = std::vector<float>(values.begin(), values.end());
+  }
+
+  return result;
+}
+
+std::optional<std::vector<int64_t>> NodeAttrHelper::GetInt64s(const std::string& key) const {
+  std::optional<std::vector<int64_t>> result;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    const auto& values = entry->second.ints();
+    result = std::vector<int64_t>(values.begin(), values.end());
+  }
+
+  return result;
+}
+
+std::optional<std::string> NodeAttrHelper::GetString(const std::string& key) const {
+  std::optional<std::string> result;
+  if (auto entry = node_attributes_.find(key); entry != node_attributes_.end()) {
+    result = entry->second.s();
+  }
+
+  return result;
 }
 
 bool NodeAttrHelper::HasAttr(const std::string& key) const {
diff --git a/onnxruntime/core/providers/shared/utils/utils.h b/onnxruntime/core/providers/shared/utils/utils.h
index 31b1aba2e1a63..5813dcc48d72b 100644
--- a/onnxruntime/core/providers/shared/utils/utils.h
+++ b/onnxruntime/core/providers/shared/utils/utils.h
@@ -47,15 +47,17 @@ class NodeAttrHelper {
   // Get the attributes from the target node of the node_unit
   explicit NodeAttrHelper(const NodeUnit& node_unit);
 
+  /*
+   * Get with default
+   */
   float Get(const std::string& key, float def_val) const;
+  std::vector<float> Get(const std::string& key, const std::vector<float>& def_val) const;
 
   int64_t Get(const std::string& key, int64_t def_val) const;
+  std::vector<int64_t> Get(const std::string& key, const std::vector<int64_t>& def_val) const;
 
   const std::string& Get(const std::string& key, const std::string& def_val) const;
 
-  std::vector<int64_t> Get(const std::string& key, const std::vector<int64_t>& def_val) const;
-  std::vector<float> Get(const std::string& key, const std::vector<float>& def_val) const;
-
   // Convert the i() or ints() of the attribute from int64_t to int32_t
   int32_t Get(const std::string& key, int32_t def_val) const;
   std::vector<int32_t> Get(const std::string& key, const std::vector<int32_t>& def_val) const;
@@ -64,7 +66,16 @@ class NodeAttrHelper {
   uint32_t Get(const std::string& key, uint32_t def_val) const;
   std::vector<uint32_t> Get(const std::string& key, const std::vector<uint32_t>& def_val) const;
 
-  std::optional<int64_t> GetInt(const std::string& key) const;
+  /*
+   * Get without default.
+   */
+  std::optional<float> GetFloat(const std::string& key) const;
+  std::optional<std::vector<float>> GetFloats(const std::string& key) const;
+
+  std::optional<int64_t> GetInt64(const std::string& key) const;
+  std::optional<std::vector<int64_t>> GetInt64s(const std::string& key) const;
+
+  std::optional<std::string> GetString(const std::string& key) const;
 
   bool HasAttr(const std::string& key) const;
 
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index a94f7b5b707c7..40b40136af1af 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -208,12 +208,18 @@ std::unique_ptr<IExecutionProvider> DefaultRocmExecutionProvider(bool test_tunab
 }
 
 std::unique_ptr<IExecutionProvider> DefaultCoreMLExecutionProvider() {
-// For any non - macOS system, CoreML will only be used for ort model converter
-// Make it unavailable here, you can still manually append CoreML EP to session for model conversion
+  // To manually test CoreML model generation on a non-macOS platform, comment out the `&& defined(__APPLE__)` below.
+  // The test will create a model but execution of it will obviously fail.
+  // To test creating an ML Program, set the environment variable COREML_EP_TEST_MLPROGRAM to any value.
 #if defined(USE_COREML) && defined(__APPLE__)
   // We want to run UT on CPU only to get output value without losing precision
   uint32_t coreml_flags = 0;
   coreml_flags |= COREML_FLAG_USE_CPU_ONLY;
+
+  if (!Env::Default().GetEnvironmentVar("COREML_EP_TEST_MLPROGRAM").empty()) {
+    coreml_flags |= COREML_FLAG_CREATE_MLPROGRAM;
+  }
+
   return CoreMLProviderFactoryCreator::Create(coreml_flags)->CreateProvider();
 #else
   return nullptr;