diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
index d9bfdc3473ca7..b696aefecf664 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommittedResourceAllocator.cpp
@@ -13,7 +13,7 @@ namespace Dml
         ComPtr<ID3D12Resource> resource;
         auto buffer = CD3DX12_RESOURCE_DESC::Buffer(size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
         ORT_THROW_IF_FAILED(m_device->CreateCommittedResource(
-            &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+            unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
             D3D12_HEAP_FLAG_NONE,
             &buffer,
             D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
index 9bf8c58f7a3ec..4d0ab1f17c82b 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/DmlCommon.h
@@ -6,6 +6,11 @@
 #include <assert.h>
 #include "core/providers/dml/OperatorAuthorHelper/Common.h"
 
+template <class T>
+auto unmove_ptr(T&& t) {
+  return &static_cast<T&>(t);
+}
+
 namespace Dml
 {
     using namespace OperatorHelper;
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
index f97b72aa2d385..5f6bd178aaa15 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/ExecutionProvider.cpp
@@ -636,7 +636,7 @@ namespace Dml
 
     bool IsCpuOnDmlOperator(const onnxruntime::Node& node)
     {
-        auto cpuOnDmlOperators = std::array<char*, 8>{
+        auto cpuOnDmlOperators = std::array<const char*, 8>{
             "SequenceAt",
             "SequenceConstruct",
             "SequenceEmpty",
@@ -659,7 +659,7 @@ namespace Dml
 
     bool IsDmlSequenceOperator(const onnxruntime::Node& node)
     {
-        auto sequence_ops = std::array<char*, 1>{
+        auto sequence_ops = std::array<const char*, 1>{
             "ConcatFromSequence"
         };
 
@@ -675,7 +675,7 @@ namespace Dml
 
     bool IsCustomOpShader(const onnxruntime::Node& node)
     {
-        auto custom_ops = std::array<char*, 3>{
+        auto custom_ops = std::array<const char*, 3>{
             "DFT",
             "STFT",
             "GridSample"
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
index 60b235880e23f..9f9cfad670919 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorBatchNormalization.cpp
@@ -143,7 +143,8 @@ class DmlOperatorBatchNormalization15 : public DmlOperator, BatchNormalizationHe
         );
 
         DML_EXECUTION_FLAGS executionFlags = GetExecutionFlags();
-        m_compiledOperator.Attach(graph.Compile(executionFlags, { batchNormalization }).Detach());
+        std::array<dml::Expression, 1> outputs = { batchNormalization };
+        m_compiledOperator.Attach(graph.Compile(executionFlags, outputs).Detach());
     }
 
     void Compute(const MLOperatorKernelContext& kernelContext) override
diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
index 4f8b5a1bc7fac..e8d5b2746aa13 100644
--- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
+++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorPooling.cpp
@@ -84,7 +84,7 @@ class DmlOperatorPooling : public DmlOperator, public PoolingHelperBase
             poolingDesc.EndPadding = m_kernel.endPadding;
 
             DML_OPERATOR_DESC opDesc = {};
-            opDesc.Type = ApiTraits::OperatorDescTraits<std::remove_reference<decltype(poolingDesc)>::type>::Type;
+            opDesc.Type = ApiTraits::OperatorDescTraits<typename std::remove_reference<decltype(poolingDesc)>::type>::Type;
             opDesc.Desc = &poolingDesc;
             SetDmlOperatorDesc(opDesc, kernelInfo);
         };
diff --git a/winml/lib/Api.Image/CpuDetensorizer.h b/winml/lib/Api.Image/CpuDetensorizer.h
index dbafeed72cda8..c5386cc03e45b 100644
--- a/winml/lib/Api.Image/CpuDetensorizer.h
+++ b/winml/lib/Api.Image/CpuDetensorizer.h
@@ -129,7 +129,7 @@ class CpuDetensorizer {
   }
 
   template <>
-  static float ReadTensor<DirectX::PackedVector::HALF>(
+  float ReadTensor<DirectX::PackedVector::HALF>(
     const DirectX::PackedVector::HALF* pCPUTensor, const NominalRangeConverter& nominalRangeConverter
   ) {
     return nominalRangeConverter.Denormalize(DirectX::PackedVector::XMConvertHalfToFloat(*pCPUTensor));
@@ -169,7 +169,7 @@ class CpuDetensorizer {
 
 #if defined(_M_AMD64) || defined(_M_IX86)
   template <>
-  static void InterleaveRowFloatToByte(
+  void InterleaveRowFloatToByte(
     const float* xChannel,
     const float* yChannel,
     const float* zChannel,
diff --git a/winml/lib/Api.Image/CpuTensorizer.h b/winml/lib/Api.Image/CpuTensorizer.h
index d4e26cde7a420..6be499d890948 100644
--- a/winml/lib/Api.Image/CpuTensorizer.h
+++ b/winml/lib/Api.Image/CpuTensorizer.h
@@ -113,11 +113,11 @@ class CpuTensorizer {
   static T ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter);
 
   template <>
-  static float ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter) {
+  float ConvertByteToFloat(const BYTE& input, const NominalRangeConverter& nominalRangeConverter) {
     return nominalRangeConverter.Normalize(static_cast<float>(input));
   }
   template <>
-  static DirectX::PackedVector::HALF ConvertByteToFloat(
+  DirectX::PackedVector::HALF ConvertByteToFloat(
     const BYTE& input, const NominalRangeConverter& nominalRangeConverter
   ) {
     return nominalRangeConverter.Normalize(DirectX::PackedVector::XMConvertFloatToHalf(input));
@@ -161,7 +161,7 @@ class CpuTensorizer {
 
 #if defined(_M_AMD64) || defined(_M_IX86)
   template <>
-  static void DeinterleaveRowByteToFloat(
+  void DeinterleaveRowByteToFloat(
     _In_ BYTE* pBuffer,
     _Inout_ float* xChannel,
     _Inout_ float* yChannel,
diff --git a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
index 2654885d6bee8..b90d35cbfc58a 100644
--- a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
+++ b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
@@ -501,7 +501,7 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture(
   if (!UAV_resource_ || outputDesc.Format != UAV_resource_->GetDesc().Format ||
         outputDesc.Width != UAV_resource_->GetDesc().Width || outputDesc.Height != UAV_resource_->GetDesc().Height) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)),
       D3D12_HEAP_FLAG_NONE,
       &outputResourceDesc,
       D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
@@ -599,31 +599,31 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToDX12Texture(
 
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         pInputResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE
-      )
+      ))
     );
     command_list_->Dispatch(dispatchWidth, dispatchHeight, 1);
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         pInputResource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS
-      )
+      ))
     );
 
     // Copy the UAV data to the output resource after detensorization
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         UAV_resource_.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE
-      )
+      ))
     );
     command_list_->CopyResource(pOutputResource, UAV_resource_.Get());
     command_list_->ResourceBarrier(
       1,
-      &CD3DX12_RESOURCE_BARRIER::Transition(
+      unmove_ptr(CD3DX12_RESOURCE_BARRIER::Transition(
         UAV_resource_.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS
-      )
+      ))
     );
 
     WINML_THROW_IF_FAILED(command_list_->Close());
@@ -657,9 +657,9 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap(
   // TODO: Make an allocator for readback heaps
   if (!readback_heap_ || readback_heap_->GetDesc().Width < singleVideoFramebufferSize) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(singleVideoFramebufferSize)),
       D3D12_RESOURCE_STATE_COPY_DEST,
       nullptr,
       IID_PPV_ARGS(&readback_heap_)
@@ -689,12 +689,12 @@ void TensorToVideoFrameConverter::ConvertGPUTensorToSoftwareBitmap(
   device_cache.SyncD3D12ToCPU();
 
   void* pCPUTensorBuffer = nullptr;
-  WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, singleVideoFramebufferSize), &pCPUTensorBuffer));
+  WINML_THROW_IF_FAILED(readback_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, singleVideoFramebufferSize)), &pCPUTensorBuffer));
 
   // We avoid the Video Frame pipeline by manually downloading the GPU data to the CPU and detensorize while we are filling the readback heap
   ConvertCPUTensorToSoftwareBitmap(pCPUTensorBuffer, tensorDesc, softwareBitmap);
 
-  readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0));
+  readback_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, 0)));
 }
 
 void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
@@ -708,9 +708,9 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
   // TODO: Make an allocator for readback heaps
   if (!readback_heap_ || readback_heap_->GetDesc().Width < buffer_size_in_bytes) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes)),
       D3D12_RESOURCE_STATE_COPY_DEST,
       nullptr,
       IID_PPV_ARGS(&readback_heap_)
@@ -734,7 +734,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
 
   byte* readback_buffer = nullptr;
   WINML_THROW_IF_FAILED(
-    readback_heap_->Map(0, &CD3DX12_RANGE(0, buffer_size_in_bytes), reinterpret_cast<void**>(&readback_buffer))
+    readback_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, buffer_size_in_bytes)), reinterpret_cast<void**>(&readback_buffer))
   );
   auto readback_buffer_span = gsl::span<byte>(readback_buffer, buffer_size_in_bytes);
   _winml::StoreSpanIntoDisjointBuffers(
@@ -748,7 +748,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
     readback_buffer_span
   );
 
-  readback_heap_->Unmap(0, &CD3DX12_RANGE(0, 0));
+  readback_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, 0)));
 }
 
 D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor(
diff --git a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
index b856c6bdbfeca..0a763c77c94f4 100644
--- a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
+++ b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
@@ -600,9 +600,9 @@ void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor(
   // TODO: Make an allocator for upload heaps
   if (!upload_heap_ || upload_heap_->GetDesc().Width < bufferSize) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(bufferSize),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(bufferSize)),
       D3D12_RESOURCE_STATE_GENERIC_READ,
       nullptr,
       IID_PPV_ARGS(&upload_heap_)
@@ -610,14 +610,14 @@ void VideoFrameToTensorConverter::ConvertSoftwareBitmapToGPUTensor(
   }
 
   void* pCPUTensorBuffer = nullptr;
-  WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), &pCPUTensorBuffer));
+  WINML_THROW_IF_FAILED(upload_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, 0)), unmove_ptr(pCPUTensorBuffer)));
 
   // We avoid the Video Frame pipeline by manually sending the CPU data to the GPU, and we tensorize while we are filling the
   // upload heap. The image may already have been cropped/scaled by the video frame pipeline, so we send the scaled bounds
   // instead of the initial input bounds
   ConvertSoftwareBitmapToCPUTensor(convertedSoftwareBitmap, tensorDesc, scaledBounds, pCPUTensorBuffer);
 
-  upload_heap_->Unmap(0, &CD3DX12_RANGE(0, bufferSize));
+  upload_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, bufferSize)));
 
   ResetCommandList(device_cache);
 
@@ -642,9 +642,9 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
   // Copy the cpu memory into the gpu resource
   if (!upload_heap_ || upload_heap_->GetDesc().Width < buffer_size_in_bytes) {
     WINML_THROW_IF_FAILED(device_cache.GetD3D12Device()->CreateCommittedResource(
-      &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD),
+      unmove_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD)),
       D3D12_HEAP_FLAG_NONE,
-      &CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes),
+      unmove_ptr(CD3DX12_RESOURCE_DESC::Buffer(buffer_size_in_bytes)),
       D3D12_RESOURCE_STATE_GENERIC_READ,
       nullptr,
       IID_PPV_ARGS(&upload_heap_)
@@ -652,7 +652,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
   }
 
   byte* gpu_buffer = nullptr;
-  WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), reinterpret_cast<void**>(&gpu_buffer)));
+  WINML_THROW_IF_FAILED(upload_heap_->Map(0, unmove_ptr(CD3DX12_RANGE(0, 0)), reinterpret_cast<void**>(&gpu_buffer)));
   auto gpu_buffer_span = gsl::span<byte>(gpu_buffer, buffer_size_in_bytes);
 
   _winml::LoadSpanFromDisjointBuffers(
@@ -666,7 +666,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
     gpu_buffer_span
   );
 
-  upload_heap_->Unmap(0, &CD3DX12_RANGE(0, buffer_size_in_bytes));
+  upload_heap_->Unmap(0, unmove_ptr(CD3DX12_RANGE(0, buffer_size_in_bytes)));
 
   ResetCommandList(device_cache);
 
diff --git a/winml/lib/Api.Image/inc/D3DDeviceCache.h b/winml/lib/Api.Image/inc/D3DDeviceCache.h
index 13dd773ccd19d..f3478008095b3 100644
--- a/winml/lib/Api.Image/inc/D3DDeviceCache.h
+++ b/winml/lib/Api.Image/inc/D3DDeviceCache.h
@@ -35,6 +35,12 @@ enum class PipelineStateCacheOperation : unsigned char {
   kCount = 2
 };
 
+template <typename E>
+constexpr auto to_underlying_integer(E e) noexcept {
+  return static_cast<typename std::underlying_type<E>::type>(e);
+}
+
+
 class D3DDeviceCache {
  public:
   ~D3DDeviceCache();
@@ -105,8 +111,10 @@ class D3DDeviceCache {
   winrt::com_ptr<ID3D12RootSignature> detensorize_root_signature_;
 
   winrt::com_ptr<ID3D12PipelineState>
-    cached_pipeline_state[PipelineStateCacheType::kCount][PipelineStateCacheFormat::kCount]
-                         [PipelineStateCacheFormat::kCount][PipelineStateCacheOperation::kCount];
+    cached_pipeline_state[to_underlying_integer(PipelineStateCacheType::kCount)]
+                         [to_underlying_integer(PipelineStateCacheFormat::kCount)]
+                         [to_underlying_integer(PipelineStateCacheFormat::kCount)]
+                         [to_underlying_integer(PipelineStateCacheOperation::kCount)];
 
   winrt::com_ptr<ID3D12Resource> detensorize_vertex_buffer_;
 
diff --git a/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
index 12f676459293b..a3d01cdff42fb 100644
--- a/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
+++ b/winml/lib/Api.Image/inc/TensorToVideoFrameConverter.h
@@ -80,7 +80,7 @@ class TensorToVideoFrameConverter : public ImageConverter {
     _Inout_ wm::VideoFrame& unsupported_video_frame
   );
 
-  static D3D12_SHADER_RESOURCE_VIEW_DESC TensorToVideoFrameConverter::CreateSRVDescriptor(
+  static D3D12_SHADER_RESOURCE_VIEW_DESC CreateSRVDescriptor(
     const UINT32 batch_index, const D3D12_RESOURCE_DESC& resource_description, const ImageTensorDescription& description
   );
 
diff --git a/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
index e34030bbd6833..0cd184ca70b21 100644
--- a/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
+++ b/winml/lib/Api.Image/inc/VideoFrameToTensorConverter.h
@@ -86,7 +86,7 @@ class VideoFrameToTensorConverter : public ImageConverter {
     const UINT32 batch_index, const D3D12_RESOURCE_DESC& resource_description, const ImageTensorDescription& description
   );
 
-  static void VideoFrameToTensorConverter::ConvertSoftwareBitmapToCPUTensor(
+  static void ConvertSoftwareBitmapToCPUTensor(
     _In_ const wgi::SoftwareBitmap& software_bitmap,
     _In_ const ImageTensorDescription& tensor_description,
     _In_ const wgi::BitmapBounds& input_bounds,
diff --git a/winml/lib/Api.Ort/OnnxruntimeEngine.cpp b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
index 78b1a109d2da0..5bb0ce424f66c 100644
--- a/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
+++ b/winml/lib/Api.Ort/OnnxruntimeEngine.cpp
@@ -862,12 +862,12 @@ struct FillMapTensors {
   static HRESULT Run(
     const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
   ) {
-    AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
+    typename AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast<void**>(&keys_mutable_data)), ort_api
     );
 
-    AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
+    typename AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast<void**>(&values_mutable_data)), ort_api
     );
@@ -888,7 +888,7 @@ struct FillMapTensors<HSTRING, TAbiValue> {
   static HRESULT Run(
     const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
   ) {
-    AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
+    typename AbiTypeInfo<TAbiValue>::OrtType* values_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(values_ort_value, reinterpret_cast<void**>(&values_mutable_data)), ort_api
     );
@@ -916,7 +916,7 @@ struct FillMapTensors<TAbiKey, HSTRING> {
   static HRESULT Run(
     const OrtApi* ort_api, IInspectable* map_insp, OrtValue* keys_ort_value, OrtValue* values_ort_value
   ) {
-    AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
+    typename AbiTypeInfo<TAbiKey>::OrtType* keys_mutable_data;
     RETURN_HR_IF_NOT_OK_MSG(
       ort_api->GetTensorMutableData(keys_ort_value, reinterpret_cast<void**>(&keys_mutable_data)), ort_api
     );
diff --git a/winml/lib/Api/ImageFeatureValue.h b/winml/lib/Api/ImageFeatureValue.h
index 83a21c8679cf3..9df4d7010ff66 100644
--- a/winml/lib/Api/ImageFeatureValue.h
+++ b/winml/lib/Api/ImageFeatureValue.h
@@ -23,7 +23,7 @@ struct ImageFeatureValue : ImageFeatureValueT<ImageFeatureValue, _winml::ILotusV
   wfc::IIterable<Windows::Media::VideoFrame> VideoFrames();
   winml::LearningModelFeatureKind Kind();
 
-  static winml::ImageFeatureValue ImageFeatureValue::Create(
+  static winml::ImageFeatureValue Create(
     uint32_t batchSize, Windows::Graphics::Imaging::BitmapPixelFormat format, uint32_t width, uint32_t height
   );
   static winml::ImageFeatureValue CreateFromVideoFrame(Windows::Media::VideoFrame const& image);
diff --git a/winml/lib/Api/impl/MapBase.h b/winml/lib/Api/impl/MapBase.h
index 4686d8a05ad9b..6424f85ef5631 100644
--- a/winml/lib/Api/impl/MapBase.h
+++ b/winml/lib/Api/impl/MapBase.h
@@ -7,6 +7,8 @@
 
 #include "MapFeatureDescriptor.h"
 #include "TensorFeatureDescriptor.h"
+#include "LearningModelSession.h"
+#include "IMapFeatureValue.h"
 
 namespace _winml {
 
diff --git a/winml/lib/Api/impl/SequenceBase.h b/winml/lib/Api/impl/SequenceBase.h
index 82ac6a439dae0..7c4df9783cf04 100644
--- a/winml/lib/Api/impl/SequenceBase.h
+++ b/winml/lib/Api/impl/SequenceBase.h
@@ -6,6 +6,10 @@
 #include "MapFeatureDescriptor.h"
 #include "SequenceFeatureDescriptor.h"
 #include "TensorFeatureDescriptor.h"
+#include "LearningModelSession.h"
+#include "ISequenceFeatureValue.h"
+
+#include "FeatureValues.h"
 
 namespace _winml {
 
@@ -169,55 +173,55 @@ struct SequenceBase : public winrt::implements<
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorBoolean>() {
-    return winmlp::TensorBoolean::Create();
+    return winml::TensorBoolean::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorFloat>() {
-    return winmlp::TensorFloat::Create();
+    return winml::TensorFloat::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorDouble>() {
-    return winmlp::TensorDouble::Create();
+    return winml::TensorDouble::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt8Bit>() {
-    return winmlp::TensorInt8Bit::Create();
+    return winml::TensorInt8Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt8Bit>() {
-    return winmlp::TensorUInt8Bit::Create();
+    return winml::TensorUInt8Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt16Bit>() {
-    return winmlp::TensorUInt16Bit::Create();
+    return winml::TensorUInt16Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt16Bit>() {
-    return winmlp::TensorInt16Bit::Create();
+    return winml::TensorInt16Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt32Bit>() {
-    return winmlp::TensorUInt32Bit::Create();
+    return winml::TensorUInt32Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt32Bit>() {
-    return winmlp::TensorInt32Bit::Create();
+    return winml::TensorInt32Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorUInt64Bit>() {
-    return winmlp::TensorUInt64Bit::Create();
+    return winml::TensorUInt64Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorInt64Bit>() {
-    return winmlp::TensorInt64Bit::Create();
+    return winml::TensorInt64Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorFloat16Bit>() {
-    return winmlp::TensorFloat16Bit::Create();
+    return winml::TensorFloat16Bit::Create();
   }
   template <>
   auto CreatePlaceholderTensor<winml::TensorString>() {
-    return winmlp::TensorString::Create();
+    return winml::TensorString::Create();
   }
 
   void AppendValue(_winml::BindingContext& context, wfc::IVector<T> data, winrt::com_ptr<_winml::IValue> value) {
diff --git a/winml/lib/Api/impl/TensorBase.h b/winml/lib/Api/impl/TensorBase.h
index c9299a00ddaa2..c8332e11234b5 100644
--- a/winml/lib/Api/impl/TensorBase.h
+++ b/winml/lib/Api/impl/TensorBase.h
@@ -875,7 +875,7 @@ struct TensorBase : TBase {
 
     WINML_THROW_HR_IF_TRUE_MSG(
       E_ILLEGAL_METHOD_CALL,
-      std::is_same<T, std::string>::value,
+      (std::is_same<T, std::string>::value),
       "TensorString objects cannot be created from IBuffers!"
     );
   }
diff --git a/winml/lib/Api/impl/TensorKindFrom.h b/winml/lib/Api/impl/TensorKindFrom.h
index f2b59ac9d70ec..0d3f185a64d19 100644
--- a/winml/lib/Api/impl/TensorKindFrom.h
+++ b/winml/lib/Api/impl/TensorKindFrom.h
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include "TensorFeatureDescriptor.h"
+
 namespace _winml {
 
 // We need to define our own type for Half since DirectX::PackedVector::Half resolves to uint16_t per its typedef declaration.
diff --git a/winml/lib/Api/pch/pch.h b/winml/lib/Api/pch/pch.h
index 8c09085a993b5..57591afd45547 100644
--- a/winml/lib/Api/pch/pch.h
+++ b/winml/lib/Api/pch/pch.h
@@ -12,5 +12,6 @@
 
 #include "cppwinrt_onnx.h"
 #include "dx.h"
+#include "FeatureValues.h"
 
 #pragma warning(pop)
diff --git a/winml/lib/Common/inc/common.h b/winml/lib/Common/inc/common.h
index 58549e6e52195..7d259d66e8a04 100644
--- a/winml/lib/Common/inc/common.h
+++ b/winml/lib/Common/inc/common.h
@@ -51,3 +51,8 @@ TRACELOGGING_DECLARE_PROVIDER(winml_trace_logging_provider);
 #include "NamespaceAliases.h"
 #include "StringHelpers.h"
 #include "WinML_Lock.h"
+
+template <class T>
+auto unmove_ptr(T&& t) {
+  return &static_cast<T&>(t);
+}