From 7f9e19120b344c40d57d719032edad8ee1952267 Mon Sep 17 00:00:00 2001
From: Ryan Lai <rylai@microsoft.com>
Date: Wed, 9 Sep 2020 15:59:26 -0700
Subject: [PATCH] Build MicrosoftMLRunner for using WinML nuget and make
 WinMLRunner only load from System32 (#348)

* move some headers around

* add nuget

* Refactor bindingutilities and outputhelper into .cpp and header files

* remove dllload.cpp

* Remove windows.ai.machinelearning header

* Fix testing

* Don't change sln

* Fix x86 build

* Added different configurations

* Add preprocessor definitions to separate between headers and namespaces

* Change naming of EXE for nuget builds

* Update OutputHelper.cpp

remove pragma in outputhelper.cpp

* remove pragma from cpp

* Switch between native header and update documentation

* use const cast

* rename nuget to NuGet and address PR comments

Co-authored-by: Ryan Lai <ryalai96@gamil.com>
---
 Testing/WinMLRunnerTest/WinMLRunnerTest.cpp   |  106 +-
 .../WinMLRunnerTest/WinMLRunnerTest.vcxproj   |  354 +++++
 Tools/WinMLRunner/README.md                   |    7 +-
 Tools/WinMLRunner/WinMLRunner.sln             |  124 +-
 Tools/WinMLRunner/WinMLRunner.vcxproj         |  159 ++-
 Tools/WinMLRunner/WinMLRunnerDLL.vcxproj      |  234 ----
 .../WinMLRunner/WinMLRunnerScenarios.vcxproj  |  174 ++-
 .../WinMLRunnerScenarios.vcxproj.filters      |    3 +
 .../WinMLRunner/WinMLRunnerStaticLib.vcxproj  |  152 +-
 .../WinMLRunnerStaticLib.vcxproj.filters      |   12 +-
 .../WinMLRunner/WinMLRunner_Link_DLL.vcxproj  |  267 ----
 Tools/WinMLRunner/packages.config             |    5 +
 Tools/WinMLRunner/src/BindingUtilities.cpp    | 1008 ++++++++++++++
 Tools/WinMLRunner/src/BindingUtilities.h      |  970 +------------
 Tools/WinMLRunner/src/CommandLineArgs.cpp     |   29 +-
 Tools/WinMLRunner/src/CommandLineArgs.h       |    3 +-
 Tools/WinMLRunner/src/Common.h                |    6 +-
 Tools/WinMLRunner/src/Concurrency.cpp         |    4 +
 .../src/LearningModelDeviceHelper.cpp         |    6 +-
 Tools/WinMLRunner/src/OutputHelper.cpp        | 1197 ++++++++++++++++
 Tools/WinMLRunner/src/OutputHelper.h          | 1218 +----------------
 Tools/WinMLRunner/src/Run.cpp                 |    1 +
 Tools/WinMLRunner/src/Run.h                   |    2 +-
 Tools/WinMLRunner/src/TimerHelper.h           |    2 -
 Tools/WinMLRunner/src/TypeHelper.h            |    4 +
 25 files changed, 3223 insertions(+), 2824 deletions(-)
 delete mode 100644 Tools/WinMLRunner/WinMLRunnerDLL.vcxproj
 delete mode 100644 Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj
 create mode 100644 Tools/WinMLRunner/packages.config
 create mode 100644 Tools/WinMLRunner/src/BindingUtilities.cpp
 create mode 100644 Tools/WinMLRunner/src/OutputHelper.cpp
diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
index 7a625fe5..db0b4e89 100644
--- a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
+++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
@@ -15,7 +15,7 @@
 #include <cmath>
 
 using namespace Microsoft::VisualStudio::CppUnitTestFramework;
-static HRESULT RunProc(LPWSTR commandLine)
+static HRESULT RunProc(wchar_t* commandLine)
 {
     STARTUPINFO SI = { 0 };
     PROCESS_INFORMATION PI = { 0 };
@@ -273,7 +273,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(3), GetOutputCSVLineCount());
@@ -283,7 +283,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -293,7 +293,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -304,7 +304,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-CPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -316,7 +316,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-CPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -328,7 +328,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-CPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -339,7 +339,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-GPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -351,7 +351,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-GPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -363,7 +363,7 @@ namespace WinMLRunnerTest
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });
             // Binding GPU Tensor with Session created with CPU device isn't supported.
-            Assert::AreEqual(E_INVALIDARG, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(E_INVALIDARG, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
         TEST_METHOD(GarbageInputGpuClientDeviceCpuBoundRGBImage)
         {
@@ -371,7 +371,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-CPUBoundInput", L"-RGB", L"-CreateDeviceOnClient" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -383,7 +383,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-CPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -395,7 +395,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-CPUBoundInput", L"-BGR", L"-CreateDeviceOnClient" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -407,7 +407,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-CPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -419,7 +419,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-CPUBoundInput", L"-tensor", L"-CreateDeviceOnClient" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -431,7 +431,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-CPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -443,7 +443,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-GPUBoundInput", L"-RGB", L"-CreateDeviceOnClient" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -455,7 +455,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-GPUBoundInput", L"-RGB", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -467,7 +467,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-GPUBoundInput", L"-BGR", L"-CreateDeviceOnClient" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -479,7 +479,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-GPUBoundInput", L"-BGR", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -491,7 +491,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-GPUBoundInput", L"-tensor", L"-CreateDeviceOnClient" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -503,7 +503,7 @@ namespace WinMLRunnerTest
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-GPU",
                                L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
@@ -512,7 +512,7 @@ namespace WinMLRunnerTest
         TEST_METHOD(RunAllModelsInFolderGarbageInput)
         {
             const std::wstring command = BuildCommand({ EXE_PATH, L"-folder", INPUT_FOLDER_PATH, L"-PerfOutput", OUTPUT_PATH, L"-perf" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(5), GetOutputCSVLineCount());
@@ -545,7 +545,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"fish.png";
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         TEST_METHOD(ProvidedImageInputOnlyGpu)
@@ -553,7 +553,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"fish.png";
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         TEST_METHOD(ProvidedImageInputFolder)
@@ -573,7 +573,7 @@ namespace WinMLRunnerTest
                 system(copyCommand.c_str());
             }
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", L"SqueezeNet.onnx", L"-InputImageFolder", INPUT_FOLDER_PATH });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             std::string removeCommand = "rd /s /q ";
             removeCommand += std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
@@ -586,7 +586,7 @@ namespace WinMLRunnerTest
             const std::wstring inputPath = CURRENT_PATH + L"fish_112.png";
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath, L"-autoScale", L"Cubic" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         TEST_METHOD_WITH_NAME(ProvidedImageInputOnlyCpuPerIterationPerformance)
@@ -597,7 +597,7 @@ namespace WinMLRunnerTest
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath, L"-PerfOutput", OUTPUT_PATH, L"-perf",
                                L"-SavePerIterationPerf", L"-BaseOutputPath", tensorDataPath,
                                L"-PerIterationPath PerIterationData", L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount(tensorDataPath + L"\\PerIterationData\\Summary.csv"));
@@ -609,7 +609,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv",
                                                   tensorDataPath + L"\\softmaxout_1CpuIteration1.csv"));
         }
@@ -620,7 +620,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
                                                   tensorDataPath + L"\\softmaxout_1GpuIteration1.csv"));
         }
@@ -631,7 +631,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_CPU.csv",
                 tensorDataPath + L"\\Plus214_Output_0CpuIteration1.csv"));
         }
@@ -642,7 +642,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_GPU.csv",
                 tensorDataPath + L"\\Plus214_Output_0GpuIteration1.csv"));
         }
@@ -653,7 +653,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_CPU.csv",
                                                       tensorDataPath + L"\\softmaxout_1CpuIteration1.csv"));
         }
@@ -664,7 +664,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv",
                                                       tensorDataPath + L"\\softmaxout_1GpuIteration1.csv"));
         }
@@ -676,7 +676,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU",
                                                         L"-Tensor" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv",
                                                   tensorDataPath + L"\\softmaxout_1CpuIteration1.csv"));
         }
@@ -688,7 +688,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU",
                                                         L"-Tensor Identity" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
                                                   tensorDataPath + L"\\softmaxout_1GpuIteration1.csv"));
         }
@@ -700,7 +700,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU",
                                                         L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\DenseNet121_fp32_kitten_224_input_CPU.csv",
                                                   tensorDataPath + L"\\fc6_1CpuIteration1.csv"));
         }
@@ -712,7 +712,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU",
                                                         L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\DenseNet121_fp32_kitten_224_input_GPU.csv",
                                                   tensorDataPath + L"\\fc6_1GpuIteration1.csv"));
         }
@@ -724,7 +724,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU",
                                                         L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\DenseNet121_fp16_kitten_224_input_CPU.csv",
                                                       tensorDataPath + L"\\fc6_1CpuIteration1.csv"));
         }
@@ -736,7 +736,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model ", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU",
                                                         L"-Tensor Normalize 255 0.485,0.456,0.406 0.229,0.224,0.225" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\DenseNet121_fp16_kitten_224_input_GPU.csv",
                                                       tensorDataPath + L"\\fc6_1GpuIteration1.csv"));
         }
@@ -762,7 +762,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"kitten_224.csv";
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath });
-            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         TEST_METHOD(ProvidedCSVBadBinding)
@@ -770,7 +770,7 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"horizontal-crop.csv";
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath });
-            Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         TEST_METHOD_WITH_NAME(ProvidedCSVInputGPUSaveCpuBoundTensor)
@@ -779,7 +779,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
                                                   tensorDataPath + L"\\softmaxout_1GpuIteration1.csv"));
         }
@@ -790,7 +790,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU", L"-GPUBoundInput" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
                 tensorDataPath + L"\\softmaxout_1GpuIteration1.csv"));
         }
@@ -801,7 +801,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv",
                                                   tensorDataPath + L"\\softmaxout_1CpuIteration1.csv"));
         }
@@ -812,7 +812,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-GPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv",
                                                       tensorDataPath + L"\\softmaxout_1GpuIteration1.csv"));
         }
@@ -823,7 +823,7 @@ namespace WinMLRunnerTest
             const std::wstring tensorDataPath = TENSOR_DATA_PATH + L"\\" + METHOD_NAME;
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
                                                         L"-SaveTensorData", L"First", L"-PerIterationPath", tensorDataPath, L"-CPU" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_CPU.csv",
                                                       tensorDataPath + L"\\softmaxout_1CpuIteration1.csv"));
         }
@@ -855,7 +855,7 @@ namespace WinMLRunnerTest
             const std::wstring command = BuildCommand({
                 EXE_PATH, L"-folder", INPUT_FOLDER_PATH, L"-ConcurrentLoad", L"-NumThreads", L"5"
             });
-            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
     };
 
@@ -865,19 +865,19 @@ namespace WinMLRunnerTest
         TEST_METHOD(LoadModelFailModelNotFound)
         {
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", L"invalid_model_name" });
-            Assert::AreEqual(HRESULT_FROM_WIN32(E_FAIL), RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND), RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         TEST_METHOD(TestPrintUsage)
         {
             const std::wstring command = BuildCommand({ EXE_PATH });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
         
         TEST_METHOD(TestTopK)
         {
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", CURRENT_PATH + L"SqueezeNet.onnx", L"-TopK", L"5" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
         }
 
         /* Commenting out test until WinMLRunnerDLL.dll is properly written and ABI friendly
@@ -891,11 +891,11 @@ namespace WinMLRunnerTest
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring dllPath = CURRENT_PATH + L"WinMLRunnerDLL.dll";
             const std::wstring command = BuildCommand({ L"WinMLRunner_Link_DLL.exe",  L"-model", modelPath });
-            Assert::AreEqual(S_OK, RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(S_OK, RunProc(const_cast<wchar_t*>(command.c_str())));
 
             //Rename WinMLRunnerDLL and then run DLL Linked Executable and check if failed
             rename("WinMLRunnerDLL.dll", "WinMLRunnerDLL_renamed");
-            Assert::AreEqual(static_cast<HRESULT>(STATUS_DLL_NOT_FOUND), RunProc((wchar_t *)command.c_str()));
+            Assert::AreEqual(static_cast<HRESULT>(STATUS_DLL_NOT_FOUND), RunProc(const_cast<wchar_t*>(command.c_str())));
 
             //rename back to original naming
             rename("WinMLRunnerDLL_renamed", "WinMLRunnerDLL.dll");
diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj b/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj
index 40de20af..0eb9ef4c 100644
--- a/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj
+++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj
@@ -1,10 +1,26 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug_NuGet|Win32">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug_NuGet|x64">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release_NuGet|Win32">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release_NuGet|x64">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -34,6 +50,13 @@
     <CharacterSet>Unicode</CharacterSet>
     <UseOfMfc>false</UseOfMfc>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>false</UseOfMfc>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -42,6 +65,14 @@
     <CharacterSet>Unicode</CharacterSet>
     <UseOfMfc>false</UseOfMfc>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>false</UseOfMfc>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
@@ -49,6 +80,13 @@
     <CharacterSet>Unicode</CharacterSet>
     <UseOfMfc>false</UseOfMfc>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>false</UseOfMfc>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>DynamicLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -57,6 +95,14 @@
     <CharacterSet>Unicode</CharacterSet>
     <UseOfMfc>false</UseOfMfc>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>false</UseOfMfc>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -65,30 +111,56 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
     <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>true</LinkIncremental>
     <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -105,6 +177,22 @@
       <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <UseFullPaths>true</UseFullPaths>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -121,6 +209,22 @@
       <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <UseFullPaths>true</UseFullPaths>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -142,6 +246,27 @@
       <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <UseFullPaths>true</UseFullPaths>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -163,297 +288,526 @@
       <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <UseFullPaths>true</UseFullPaths>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>$(TargetDir)\WinMLRunnerStaticLib\Filehelper.obj;</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="WinMLRunnerTest.cpp" />
   </ItemGroup>
   <ItemGroup>
     <Content Include="..\..\SharedContent\media\fish.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <FileType>Document</FileType>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\media\horizontal-crop.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\media\kitten_224.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\models\SqueezeNet.onnx">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\models\SqueezeNet_fp16.onnx">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\models\keras_Add_ImageNet_small.onnx">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
   </ItemGroup>
   <ItemGroup>
     <Content Include="..\..\SharedContent\media\fish.png">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\media\fish_112.png">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\media\horizontal-crop.png">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\media\kitten_224.png">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_fish_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_fish_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_garbage_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_garbage_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_fp16_fish_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_fp16_fish_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_fp16_garbage_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Squeezenet_fp16_garbage_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Mnist_8_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\Mnist_8_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\models\DenseNet121_fp32.onnx">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="..\..\SharedContent\models\DenseNet121_fp16.onnx">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\DenseNet121_fp32_kitten_224_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\DenseNet121_fp32_kitten_224_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\DenseNet121_fp16_kitten_224_input_CPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
     <Content Include="OutputTensorData\DenseNet121_fp16_kitten_224_input_GPU.csv">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">true</DeploymentContent>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">false</ExcludedFromBuild>
       <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">true</DeploymentContent>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </Content>
   </ItemGroup>
diff --git a/Tools/WinMLRunner/README.md b/Tools/WinMLRunner/README.md
index 5166a975..1bff24ae 100644
--- a/Tools/WinMLRunner/README.md
+++ b/Tools/WinMLRunner/README.md
@@ -83,6 +83,9 @@ Run a model on the CPU and GPU separately, and by binding the input to the CPU a
 Run a model on the CPU with the input bound to the GPU and loaded as an RGB image:
 > WinMLRunner.exe -model c:\\data\\SqueezeNet.onnx -CPU -GPUBoundInput -RGB
 
+## Using Microsoft.AI.Machinelearning NuGet
+WinMLRunner can be built to use WinML's NuGet package : Microsoft.AI.Machinelearning NuGet. Simply build with the target configuration "Debug_NuGet" or "Release_NuGet". MicrosoftMLRunner.exe will be created and will use ```Microsoft.AI.MachineLearning.dll``` in the immediate directory of the executuble instead of loading ```Windows.AI.MachineLearning.dll``` from System32. MicrosoftMLRunner is useful to compare performance with an older version or testing a newer version of WinML's NuGet. For more information, please reference [Microsoft.AI.MachineLearning NuGet page](https://www.nuget.org/packages/Microsoft.AI.MachineLearning).
+
 ## Default output
 
 **Running a good model:**
@@ -286,10 +289,6 @@ tracerpt.exe winmllog.etl -o logdump.csv -of CSV
 2. Windows Performance Analyzer (from Visual Studio)
  * Launch Windows Performance Analyzer and open the winmllog.etl.
 
-## Dynamic DLL Loading
-
-If you want to run WinMLRunner with another version of WinML (e.g. comparing the performance with an older version or testing a newer version), simply place the `windows.ai.machinelearning.dll` and `directml.dll` files in the same folder as WinMLRunner.exe. WinMLRunner will look for for these DLLs first and fall back to `C:/Windows/System32` if it doesn't find them.
-
 ## Known issues
 
 - Sequence/Map inputs are not supported yet (the model is just skipped, so it doesn't block other models in a folder);
diff --git a/Tools/WinMLRunner/WinMLRunner.sln b/Tools/WinMLRunner/WinMLRunner.sln
index b3d17052..f3acc713 100644
--- a/Tools/WinMLRunner/WinMLRunner.sln
+++ b/Tools/WinMLRunner/WinMLRunner.sln
@@ -17,60 +17,80 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WinMLRunnerScenarios", "Win
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
-		Debug|ARM64 = Debug|ARM64
-		Debug|x64 = Debug|x64
-		Debug|x86 = Debug|x86
-		Release|ARM64 = Release|ARM64
-		Release|x64 = Release|x64
-		Release|x86 = Release|x86
+		Debug_Inbox|x64 = Debug_Inbox|x64
+		Debug_Inbox|x86 = Debug_Inbox|x86
+		Debug_NuGet|x64 = Debug_NuGet|x64
+		Debug_NuGet|x86 = Debug_NuGet|x86
+		Release_Inbox|x64 = Release_Inbox|x64
+		Release_Inbox|x86 = Release_Inbox|x86
+		Release_NuGet|x64 = Release_NuGet|x64
+		Release_NuGet|x86 = Release_NuGet|x86
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|ARM64.ActiveCfg = Debug|Win32
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x64.ActiveCfg = Debug|x64
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x64.Build.0 = Debug|x64
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x86.ActiveCfg = Debug|Win32
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug|x86.Build.0 = Debug|Win32
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|ARM64.ActiveCfg = Release|Win32
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x64.ActiveCfg = Release|x64
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x64.Build.0 = Release|x64
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x86.ActiveCfg = Release|Win32
-		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release|x86.Build.0 = Release|Win32
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|ARM64.ActiveCfg = Debug|ARM64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|ARM64.Build.0 = Debug|ARM64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x64.ActiveCfg = Debug|x64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x64.Build.0 = Debug|x64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x86.ActiveCfg = Debug|Win32
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug|x86.Build.0 = Debug|Win32
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release|ARM64.ActiveCfg = Release|ARM64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release|ARM64.Build.0 = Release|ARM64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x64.ActiveCfg = Release|x64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x64.Build.0 = Release|x64
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x86.ActiveCfg = Release|Win32
-		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release|x86.Build.0 = Release|Win32
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|ARM64.ActiveCfg = Debug|ARM64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|ARM64.Build.0 = Debug|ARM64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x64.ActiveCfg = Debug|x64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x64.Build.0 = Debug|x64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x86.ActiveCfg = Debug|Win32
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug|x86.Build.0 = Debug|Win32
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|ARM64.ActiveCfg = Release|ARM64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|ARM64.Build.0 = Release|ARM64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x64.ActiveCfg = Release|x64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x64.Build.0 = Release|x64
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x86.ActiveCfg = Release|Win32
-		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release|x86.Build.0 = Release|Win32
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug|ARM64.ActiveCfg = Debug|ARM64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug|ARM64.Build.0 = Debug|ARM64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x64.ActiveCfg = Debug|x64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x64.Build.0 = Debug|x64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x86.ActiveCfg = Debug|Win32
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug|x86.Build.0 = Debug|Win32
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Release|ARM64.ActiveCfg = Release|ARM64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Release|ARM64.Build.0 = Release|ARM64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Release|x64.ActiveCfg = Release|x64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Release|x64.Build.0 = Release|x64
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Release|x86.ActiveCfg = Release|Win32
-		{C174D45D-C189-475B-B1A7-494939EE7491}.Release|x86.Build.0 = Release|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x64.ActiveCfg = Debug|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x64.Build.0 = Debug|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x86.ActiveCfg = Debug|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_Inbox|x86.Build.0 = Debug|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x64.ActiveCfg = Release|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x64.Build.0 = Release|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x86.ActiveCfg = Release|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_Inbox|x86.Build.0 = Release|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x64.Build.0 = Release_NuGet|x64
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32
+		{E9D4AC92-8295-4FB4-BF7D-3FAF74B564E8}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x64.ActiveCfg = Debug|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x64.Build.0 = Debug|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x86.ActiveCfg = Debug|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_Inbox|x86.Build.0 = Debug|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x64.ActiveCfg = Release|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x64.Build.0 = Release|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x86.ActiveCfg = Release|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_Inbox|x86.Build.0 = Release|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x64.Build.0 = Release_NuGet|x64
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32
+		{31653A2F-02CC-4A95-9880-BF86965FB262}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x64.ActiveCfg = Debug|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x64.Build.0 = Debug|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x86.ActiveCfg = Debug|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_Inbox|x86.Build.0 = Debug|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x64.ActiveCfg = Release|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x64.Build.0 = Release|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x86.ActiveCfg = Release|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_Inbox|x86.Build.0 = Release|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x64.Build.0 = Release_NuGet|x64
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32
+		{C3BCBEA1-90E6-426F-88AC-64C274BCEF45}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x64.ActiveCfg = Debug|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x64.Build.0 = Debug|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x86.ActiveCfg = Debug|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_Inbox|x86.Build.0 = Debug|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x64.ActiveCfg = Debug_NuGet|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x64.Build.0 = Debug_NuGet|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x86.ActiveCfg = Debug_NuGet|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Debug_NuGet|x86.Build.0 = Debug_NuGet|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x64.ActiveCfg = Release|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x64.Build.0 = Release|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x86.ActiveCfg = Release|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_Inbox|x86.Build.0 = Release|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x64.ActiveCfg = Release_NuGet|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x64.Build.0 = Release_NuGet|x64
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x86.ActiveCfg = Release_NuGet|Win32
+		{C174D45D-C189-475B-B1A7-494939EE7491}.Release_NuGet|x86.Build.0 = Release_NuGet|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
diff --git a/Tools/WinMLRunner/WinMLRunner.vcxproj b/Tools/WinMLRunner/WinMLRunner.vcxproj
index 773260ec..b1d43b71 100644
--- a/Tools/WinMLRunner/WinMLRunner.vcxproj
+++ b/Tools/WinMLRunner/WinMLRunner.vcxproj
@@ -1,17 +1,27 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props')" />
+  <Import Project="packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props" Condition="Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props')" />
   <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|ARM64">
-      <Configuration>Debug</Configuration>
-      <Platform>ARM64</Platform>
+    <ProjectConfiguration Include="Debug_NuGet|Win32">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug_NuGet|x64">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>x64</Platform>
     </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|ARM64">
-      <Configuration>Release</Configuration>
-      <Platform>ARM64</Platform>
+    <ProjectConfiguration Include="Release_NuGet|Win32">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release_NuGet|x64">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>x64</Platform>
     </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
@@ -29,6 +39,9 @@
   <ItemGroup>
     <ClCompile Include="src/main.cpp" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="WinMLRunnerScenarios.vcxproj">
       <Project>{c174d45d-c189-475b-b1a7-494939ee7491}</Project>
@@ -49,7 +62,6 @@
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>WinMLRunner</RootNamespace>
     <WindowsTargetPlatformVersion>10.0.18362.0</WindowsTargetPlatformVersion>
-    <WindowsSDKDesktopARM64Support>true</WindowsSDKDesktopARM64Support>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -58,7 +70,7 @@
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
@@ -71,7 +83,7 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
@@ -84,6 +96,12 @@
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>Application</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -91,6 +109,13 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -99,48 +124,66 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <TargetName>MicrosoftMLRunner</TargetName>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
     <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
     <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
     <LinkIncremental>true</LinkIncremental>
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
     <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
     <LinkIncremental>false</LinkIncremental>
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <TargetName>MicrosoftMLRunner</TargetName>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -169,6 +212,35 @@
       <AdditionalIncludeDirectories>$(IntDir)</AdditionalIncludeDirectories>
     </ResourceCompile>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>WindowsApp.lib; mincore.lib; DXGI.lib</AdditionalDependencies>
+      <DelayLoadDLLs>"ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll</DelayLoadDLLs>
+    </Link>
+    <PreBuildEvent>
+      <Command>$(ProjectDir)src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir)</Command>
+    </PreBuildEvent>
+    <ResourceCompile>
+      <AdditionalIncludeDirectories>$(IntDir)</AdditionalIncludeDirectories>
+    </ResourceCompile>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -195,13 +267,13 @@
       <Command>$(ProjectDir)\src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir)</Command>
     </PreBuildEvent>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_WINML_NUGET;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <PrecompiledHeaderFile>
       </PrecompiledHeaderFile>
@@ -213,7 +285,7 @@
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <AdditionalDependencies>WindowsApp.lib; mincore.lib; DXGI.lib</AdditionalDependencies>
-      <DelayLoadDLLs>"ext-ms-win-dxcore-l1-1-0.dll"</DelayLoadDLLs>
+      <DelayLoadDLLs>"ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll</DelayLoadDLLs>
     </Link>
     <ResourceCompile>
       <AdditionalIncludeDirectories>$(IntDir)</AdditionalIncludeDirectories>
@@ -253,7 +325,7 @@
       <Command>$(ProjectDir)\src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir)</Command>
     </PreBuildEvent>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
@@ -261,7 +333,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_WINML_NUGET;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <PrecompiledHeaderFile>
       </PrecompiledHeaderFile>
@@ -276,7 +348,7 @@
       <OptimizeReferences>true</OptimizeReferences>
       <GenerateDebugInformation>true</GenerateDebugInformation>
       <AdditionalDependencies>WindowsApp.lib; mincore.lib; DXGI.lib</AdditionalDependencies>
-      <DelayLoadDLLs>"ext-ms-win-dxcore-l1-1-0.dll"</DelayLoadDLLs>
+      <DelayLoadDLLs>"ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll</DelayLoadDLLs>
     </Link>
     <ResourceCompile>
       <AdditionalIncludeDirectories>$(IntDir)</AdditionalIncludeDirectories>
@@ -318,7 +390,52 @@
       <AdditionalIncludeDirectories>$(IntDir)</AdditionalIncludeDirectories>
     </ResourceCompile>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <AdditionalIncludeDirectories>
+      </AdditionalIncludeDirectories>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>WindowsApp.lib; mincore.lib; DXGI.lib</AdditionalDependencies>
+      <DelayLoadDLLs>"ext-ms-win-dxcore-l1-1-0.dll"; dxgi.dll; d3d11.dll</DelayLoadDLLs>
+    </Link>
+    <PreBuildEvent>
+      <Command>$(ProjectDir)src\GenerateVersionStrings.cmd $(ProjectDir) $(IntDir)</Command>
+    </PreBuildEvent>
+    <ResourceCompile>
+      <AdditionalIncludeDirectories>$(IntDir)</AdditionalIncludeDirectories>
+    </ResourceCompile>
+  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
+    <Import Project="packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets" Condition="Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets')" />
+    <Import Project="packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets')" />
   </ImportGroup>
+  <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
+    <PropertyGroup>
+      <ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them.  For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
+    </PropertyGroup>
+    <Error Condition="!Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props'))" />
+    <Error Condition="!Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets'))" />
+    <Error Condition="!Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props'))" />
+    <Error Condition="!Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets'))" />
+  </Target>
 </Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/WinMLRunnerDLL.vcxproj b/Tools/WinMLRunner/WinMLRunnerDLL.vcxproj
deleted file mode 100644
index 413f12f3..00000000
--- a/Tools/WinMLRunner/WinMLRunnerDLL.vcxproj
+++ /dev/null
@@ -1,234 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|ARM64">
-      <Configuration>Debug</Configuration>
-      <Platform>ARM64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|ARM64">
-      <Configuration>Release</Configuration>
-      <Platform>ARM64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="WinMLRunnerStaticLib.vcxproj">
-      <Project>{c3bcbea1-90e6-426f-88ac-64c274bcef45}</Project>
-      <LinkLibraryDependencies>true</LinkLibraryDependencies>
-      <UseLibraryDependencyInputs>true</UseLibraryDependencyInputs>
-    </ProjectReference>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src/dllmain.cpp" />
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <CppWinRTEnabled>true</CppWinRTEnabled>
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{81EA9CC6-8A26-4583-B1A4-84740EF815C8}</ProjectGuid>
-    <RootNamespace>Benchmark</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
-    <ProjectName>WinMLRunnerDLL</ProjectName>
-    <WindowsSDKDesktopARM64Support>true</WindowsSDKDesktopARM64Support>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
-    <ConfigurationType>DynamicLibrary</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
-    <TargetName>$(ProjectName)</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
-    <TargetName>$(ProjectName)</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
-    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
-    <TargetName>$(ProjectName)</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
-    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
-    <TargetName>$(ProjectName)</TargetName>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
-    <TargetName>$(ProjectName)</TargetName>
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <TargetName>$(ProjectName)</TargetName>
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <ShowIncludes>true</ShowIncludes>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib;</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib;</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-      <ConformanceMode>true</ConformanceMode>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib;</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ShowIncludes>true</ShowIncludes>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib;</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ShowIncludes>true</ShowIncludes>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib;</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
-    <ClCompile>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <ShowIncludes>true</ShowIncludes>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-    </ClCompile>
-    <Link>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <SubSystem>Console</SubSystem>
-      <AdditionalDependencies>WindowsApp.lib;$(TargetDir)\WinMLRunnerStaticLib.lib;</AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj
index e67b4674..c574b409 100644
--- a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj
+++ b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj
@@ -1,10 +1,28 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props')" />
+  <Import Project="packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props" Condition="Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props')" />
   <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug_NuGet|Win32">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug_NuGet|x64">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
+    <ProjectConfiguration Include="Release_NuGet|Win32">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release_NuGet|x64">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
       <Platform>Win32</Platform>
@@ -26,6 +44,9 @@
     <ClCompile Include="src/Concurrency.cpp" />
     <ClCompile Include="src\ThreadPool.cpp" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
   <PropertyGroup Label="Globals">
     <VCProjectVersion>15.0</VCProjectVersion>
     <ProjectGuid>{C174D45D-C189-475B-B1A7-494939EE7491}</ProjectGuid>
@@ -40,6 +61,12 @@
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -47,12 +74,25 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -60,6 +100,13 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -68,32 +115,64 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
     <LinkIncremental>true</LinkIncremental>
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
+    <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
+    <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
     <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
     <LinkIncremental>false</LinkIncremental>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -109,6 +188,21 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -127,6 +221,26 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>E:\winml\Windows-Machine-Learning\Tools\WinMLRunner\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -150,6 +264,31 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>E:\winml\Windows-Machine-Learning\Tools\WinMLRunner\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -170,7 +309,38 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
+    <Import Project="packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets" Condition="Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets')" />
+    <Import Project="packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets')" />
   </ImportGroup>
+  <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
+    <PropertyGroup>
+      <ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them.  For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
+    </PropertyGroup>
+    <Error Condition="!Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props'))" />
+    <Error Condition="!Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets'))" />
+    <Error Condition="!Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props'))" />
+    <Error Condition="!Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets'))" />
+  </Target>
 </Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters
index cfef2a87..027f252a 100644
--- a/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters
+++ b/Tools/WinMLRunner/WinMLRunnerScenarios.vcxproj.filters
@@ -30,4 +30,7 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj
index 9fecd048..aa645a4d 100644
--- a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj
+++ b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj
@@ -1,17 +1,27 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props" Condition="Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props')" />
+  <Import Project="packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props')" />
   <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|ARM64">
-      <Configuration>Debug</Configuration>
-      <Platform>ARM64</Platform>
+    <ProjectConfiguration Include="Debug_NuGet|Win32">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug_NuGet|x64">
+      <Configuration>Debug_NuGet</Configuration>
+      <Platform>x64</Platform>
     </ProjectConfiguration>
     <ProjectConfiguration Include="Debug|Win32">
       <Configuration>Debug</Configuration>
       <Platform>Win32</Platform>
     </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|ARM64">
-      <Configuration>Release</Configuration>
-      <Platform>ARM64</Platform>
+    <ProjectConfiguration Include="Release_NuGet|Win32">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release_NuGet|x64">
+      <Configuration>Release_NuGet</Configuration>
+      <Platform>x64</Platform>
     </ProjectConfiguration>
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
@@ -39,10 +49,14 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="src/CommandLineArgs.cpp" />
-    <ClCompile Include="src/dllload.cpp" />
     <ClCompile Include="src/Filehelper.cpp" />
     <ClCompile Include="src/Run.cpp" />
+    <ClCompile Include="src\BindingUtilities.cpp" />
     <ClCompile Include="src\LearningModelDeviceHelper.cpp" />
+    <ClCompile Include="src\OutputHelper.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <VCProjectVersion>15.0</VCProjectVersion>
@@ -50,7 +64,6 @@
     <Keyword>Win32Proj</Keyword>
     <RootNamespace>WinMLRunnerStaticLib</RootNamespace>
     <WindowsTargetPlatformVersion>10.0.18362.0</WindowsTargetPlatformVersion>
-    <WindowsSDKDesktopARM64Support>true</WindowsSDKDesktopARM64Support>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -59,7 +72,7 @@
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>true</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
@@ -72,7 +85,7 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
     <PlatformToolset>v141</PlatformToolset>
@@ -85,6 +98,12 @@
     <PlatformToolset>v141</PlatformToolset>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
     <ConfigurationType>StaticLibrary</ConfigurationType>
     <UseDebugLibraries>false</UseDebugLibraries>
@@ -92,6 +111,13 @@
     <WholeProgramOptimization>true</WholeProgramOptimization>
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
   <ImportGroup Label="ExtensionSettings">
   </ImportGroup>
@@ -100,48 +126,64 @@
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'" Label="PropertySheets">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
   <PropertyGroup Label="UserMacros" />
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>false</LinkIncremental>
     <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
     <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
     <LinkIncremental>false</LinkIncremental>
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <LinkIncremental>true</LinkIncremental>
     <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
     <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
     <LinkIncremental>true</LinkIncremental>
-    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+    <OutDir>$(SolutionDir)x86\$(Configuration)\</OutDir>
+    <IntDir>x86\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <LinkIncremental>true</LinkIncremental>
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+  </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <LinkIncremental>false</LinkIncremental>
     <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
   </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IntDir>$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
+  </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
@@ -156,8 +198,7 @@
       <PrecompiledHeaderOutputFile />
       <LanguageStandard>stdcpp17</LanguageStandard>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <AdditionalIncludeDirectories>
-      </AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -166,7 +207,7 @@
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
@@ -174,7 +215,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_WINML_NUGET;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <PrecompiledHeaderFile>
       </PrecompiledHeaderFile>
@@ -182,6 +223,7 @@
       </PrecompiledHeaderOutputFile>
       <LanguageStandard>stdcpp17</LanguageStandard>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -201,27 +243,27 @@
       <PrecompiledHeaderFile />
       <PrecompiledHeaderOutputFile />
       <LanguageStandard>stdcpp17</LanguageStandard>
-      <AdditionalIncludeDirectories>
-      </AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
     </Link>
   </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|Win32'">
     <ClCompile>
       <PrecompiledHeader>NotUsing</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
       <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_WINML_NUGET;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
       <PrecompiledHeaderFile>
       </PrecompiledHeaderFile>
       <PrecompiledHeaderOutputFile>
       </PrecompiledHeaderOutputFile>
       <LanguageStandard>stdcpp17</LanguageStandard>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -239,8 +281,27 @@
       <LanguageStandard>stdcpp17</LanguageStandard>
       <PrecompiledHeaderFile />
       <PrecompiledHeaderOutputFile />
-      <AdditionalIncludeDirectories>
-      </AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -261,8 +322,32 @@
       <PrecompiledHeaderFile />
       <PrecompiledHeaderOutputFile />
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <AdditionalIncludeDirectories>
-      </AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_NuGet|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <SDLCheck>true</SDLCheck>
+      <PreprocessorDefinitions>USE_WINML_NUGET;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <PrecompiledHeaderFile>
+      </PrecompiledHeaderFile>
+      <PrecompiledHeaderOutputFile>
+      </PrecompiledHeaderOutputFile>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -273,5 +358,16 @@
   </ItemDefinitionGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
+    <Import Project="packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets')" />
+    <Import Project="packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets" Condition="Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets')" />
   </ImportGroup>
+  <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
+    <PropertyGroup>
+      <ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them.  For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
+    </PropertyGroup>
+    <Error Condition="!Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.props'))" />
+    <Error Condition="!Exists('packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.Windows.CppWinRT.2.0.200729.8\build\native\Microsoft.Windows.CppWinRT.targets'))" />
+    <Error Condition="!Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.props'))" />
+    <Error Condition="!Exists('packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets')" Text="$([System.String]::Format('$(ErrorText)', 'packages\Microsoft.AI.MachineLearning.1.4.0\build\native\Microsoft.AI.MachineLearning.targets'))" />
+  </Target>
 </Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters
index ec0b0cce..dd01f055 100644
--- a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters
+++ b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters
@@ -4,9 +4,6 @@
     <ClCompile Include="src/CommandLineArgs.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="src/dllload.cpp">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="src/Filehelper.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -16,6 +13,12 @@
     <ClCompile Include="src\LearningModelDeviceHelper.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="src\BindingUtilities.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="src\OutputHelper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="src/BindingUtilities.h">
@@ -54,4 +57,7 @@
       <UniqueIdentifier>{50101483-1f70-4657-bdeb-edbb6b1d0aaf}</UniqueIdentifier>
     </Filter>
   </ItemGroup>
+  <ItemGroup>
+    <None Include="packages.config" />
+  </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj b/Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj
deleted file mode 100644
index d25fa157..00000000
--- a/Tools/WinMLRunner/WinMLRunner_Link_DLL.vcxproj
+++ /dev/null
@@ -1,267 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
-  <ItemGroup Label="ProjectConfigurations">
-    <ProjectConfiguration Include="Debug|ARM64">
-      <Configuration>Debug</Configuration>
-      <Platform>ARM64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|Win32">
-      <Configuration>Debug</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|ARM64">
-      <Configuration>Release</Configuration>
-      <Platform>ARM64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|Win32">
-      <Configuration>Release</Configuration>
-      <Platform>Win32</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Debug|x64">
-      <Configuration>Debug</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-    <ProjectConfiguration Include="Release|x64">
-      <Configuration>Release</Configuration>
-      <Platform>x64</Platform>
-    </ProjectConfiguration>
-  </ItemGroup>
-  <ItemGroup>
-    <ClCompile Include="src\main.cpp" />
-  </ItemGroup>
-  <ItemGroup>
-    <ProjectReference Include="WinMLRunnerDLL.vcxproj">
-      <Project>{81ea9cc6-8a26-4583-b1a4-84740ef815c8}</Project>
-    </ProjectReference>
-  </ItemGroup>
-  <PropertyGroup Label="Globals">
-    <VCProjectVersion>15.0</VCProjectVersion>
-    <ProjectGuid>{A1DFBB85-290F-4D1C-8699-0100DB6373F2}</ProjectGuid>
-    <Keyword>Win32Proj</Keyword>
-    <RootNamespace>WinMLRunnerLinkDLL</RootNamespace>
-    <WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>true</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
-    <ConfigurationType>Application</ConfigurationType>
-    <UseDebugLibraries>false</UseDebugLibraries>
-    <PlatformToolset>v141</PlatformToolset>
-    <WholeProgramOptimization>true</WholeProgramOptimization>
-    <CharacterSet>Unicode</CharacterSet>
-  </PropertyGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
-  <ImportGroup Label="ExtensionSettings">
-  </ImportGroup>
-  <ImportGroup Label="Shared">
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
-  </ImportGroup>
-  <PropertyGroup Label="UserMacros" />
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <LinkIncremental>true</LinkIncremental>
-    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
-    <IntDir>x86\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
-    <LinkIncremental>true</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <LinkIncremental>false</LinkIncremental>
-    <OutDir>$(SolutionDir)\x86\$(Configuration)\</OutDir>
-    <IntDir>x86\$(Configuration)\</IntDir>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
-  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <LinkIncremental>false</LinkIncremental>
-  </PropertyGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <PrecompiledHeaderFile>
-      </PrecompiledHeaderFile>
-      <PrecompiledHeaderOutputFile />
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <PrecompiledHeaderFile>
-      </PrecompiledHeaderFile>
-      <PrecompiledHeaderOutputFile />
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>Disabled</Optimization>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <PrecompiledHeaderFile>
-      </PrecompiledHeaderFile>
-      <PrecompiledHeaderOutputFile>
-      </PrecompiledHeaderOutputFile>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <PrecompiledHeaderFile>
-      </PrecompiledHeaderFile>
-      <PrecompiledHeaderOutputFile />
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <PrecompiledHeaderFile>
-      </PrecompiledHeaderFile>
-      <PrecompiledHeaderOutputFile>
-      </PrecompiledHeaderOutputFile>
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
-    <ClCompile>
-      <PrecompiledHeader>NotUsing</PrecompiledHeader>
-      <WarningLevel>Level3</WarningLevel>
-      <Optimization>MaxSpeed</Optimization>
-      <FunctionLevelLinking>true</FunctionLevelLinking>
-      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <SDLCheck>true</SDLCheck>
-      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <ConformanceMode>true</ConformanceMode>
-      <PrecompiledHeaderFile>
-      </PrecompiledHeaderFile>
-      <PrecompiledHeaderOutputFile />
-      <LanguageStandard>stdcpp17</LanguageStandard>
-    </ClCompile>
-    <Link>
-      <SubSystem>Console</SubSystem>
-      <EnableCOMDATFolding>true</EnableCOMDATFolding>
-      <OptimizeReferences>true</OptimizeReferences>
-      <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>
-      </AdditionalDependencies>
-    </Link>
-  </ItemDefinitionGroup>
-  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-  <ImportGroup Label="ExtensionTargets">
-  </ImportGroup>
-</Project>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/packages.config b/Tools/WinMLRunner/packages.config
new file mode 100644
index 00000000..839ccacd
--- /dev/null
+++ b/Tools/WinMLRunner/packages.config
@@ -0,0 +1,5 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<packages>
+  <package id="Microsoft.AI.MachineLearning" version="1.4.0" targetFramework="native" />
+  <package id="Microsoft.Windows.CppWinRT" version="2.0.200729.8" targetFramework="native" />
+</packages>
\ No newline at end of file
diff --git a/Tools/WinMLRunner/src/BindingUtilities.cpp b/Tools/WinMLRunner/src/BindingUtilities.cpp
new file mode 100644
index 00000000..f1e7cdb2
--- /dev/null
+++ b/Tools/WinMLRunner/src/BindingUtilities.cpp
@@ -0,0 +1,1008 @@
+#include "d3dx12.h"
+#include <random>
+#include <time.h>
+#ifdef USE_WINML_NUGET
+#include "Microsoft.AI.Machinelearning.Native.h"
+#else
+#include "Windows.AI.Machinelearning.Native.h"
+#endif
+#include "MemoryBuffer.h"
+#include "TypeHelper.h"
+#include "CommandLineArgs.h"
+#include "OutputHelper.h"
+#include "BindingUtilities.h"
+using namespace winrt::Windows::Media;
+using namespace winrt::Windows::Storage;
+using namespace winrt::Windows::Storage::Streams;
+#ifdef USE_WINML_NUGET
+using namespace winrt::Microsoft::AI::MachineLearning;
+#else
+using namespace winrt::Windows::AI::MachineLearning;
+#endif
+using namespace winrt::Windows::Foundation::Collections;
+using namespace winrt::Windows::Graphics::DirectX;
+using namespace winrt::Windows::Graphics::Imaging;
+using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
+using namespace DirectX::PackedVector;
+
+inline size_t hash_data(void const* ptr, size_t const bytes) noexcept
+{
+#ifdef _WIN64
+    constexpr size_t fnv_offset_basis = 14695981039346656037ULL;
+    constexpr size_t fnv_prime = 1099511628211ULL;
+#else
+    constexpr size_t fnv_offset_basis = 2166136261U;
+    constexpr size_t fnv_prime = 16777619U;
+#endif
+    size_t result = fnv_offset_basis;
+    uint8_t const* const buffer = static_cast<uint8_t const*>(ptr);
+
+    for (size_t next = 0; next < bytes; ++next)
+    {
+        result ^= buffer[next];
+        result *= fnv_prime;
+    }
+
+    return result;
+}
+
+template <TensorKind T> struct TensorKindToPointerType
+{
+    static_assert(true, "No TensorKind mapped for given type!");
+};
+template <> struct TensorKindToPointerType<TensorKind::UInt8>
+{
+    typedef uint8_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Int8>
+{
+    typedef int8_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::UInt16>
+{
+    typedef uint16_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Int16>
+{
+    typedef int16_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::UInt32>
+{
+    typedef uint32_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Int32>
+{
+    typedef int32_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::UInt64>
+{
+    typedef uint64_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Int64>
+{
+    typedef int64_t Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Boolean>
+{
+    typedef boolean Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Double>
+{
+    typedef double Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Float>
+{
+    typedef float Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::Float16>
+{
+    typedef HALF Type;
+};
+template <> struct TensorKindToPointerType<TensorKind::String>
+{
+    typedef winrt::hstring Type;
+};
+
+template <TensorKind T> struct TensorKindToValue
+{
+    static_assert(true, "No TensorKind mapped for given type!");
+};
+template <> struct TensorKindToValue<TensorKind::UInt8>
+{
+    typedef TensorUInt8Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int8>
+{
+    typedef TensorInt8Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::UInt16>
+{
+    typedef TensorUInt16Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int16>
+{
+    typedef TensorInt16Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::UInt32>
+{
+    typedef TensorUInt32Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int32>
+{
+    typedef TensorInt32Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::UInt64>
+{
+    typedef TensorUInt64Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Int64>
+{
+    typedef TensorInt64Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::Boolean>
+{
+    typedef TensorBoolean Type;
+};
+template <> struct TensorKindToValue<TensorKind::Double>
+{
+    typedef TensorDouble Type;
+};
+template <> struct TensorKindToValue<TensorKind::Float>
+{
+    typedef TensorFloat Type;
+};
+template <> struct TensorKindToValue<TensorKind::Float16>
+{
+    typedef TensorFloat16Bit Type;
+};
+template <> struct TensorKindToValue<TensorKind::String>
+{
+    typedef TensorString Type;
+};
+
+template <TensorKind T, typename ToType, typename FromType> ToType ConvertToPointerType(FromType value)
+{
+    static_assert(true, "No TensorKind mapped for given type!");
+    return 0;
+};
+template <> uint8_t ConvertToPointerType<TensorKind::UInt8>(float value) { return static_cast<uint8_t>(value); };
+template <> int8_t ConvertToPointerType<TensorKind::Int8>(float value) { return static_cast<int8_t>(value); };
+template <> uint16_t ConvertToPointerType<TensorKind::UInt16>(float value) { return static_cast<uint16_t>(value); };
+template <> int16_t ConvertToPointerType<TensorKind::Int16>(float value) { return static_cast<int16_t>(value); };
+template <> uint32_t ConvertToPointerType<TensorKind::UInt32>(float value) { return static_cast<uint32_t>(value); };
+template <> int32_t ConvertToPointerType<TensorKind::Int32>(float value) { return static_cast<int32_t>(value); };
+template <> uint64_t ConvertToPointerType<TensorKind::UInt64>(float value) { return static_cast<uint64_t>(value); };
+template <> int64_t ConvertToPointerType<TensorKind::Int64>(float value) { return static_cast<int64_t>(value); };
+template <> boolean ConvertToPointerType<TensorKind::Boolean>(float value) { return static_cast<boolean>(value); };
+template <> double ConvertToPointerType<TensorKind::Double>(double value) { return static_cast<double>(value); };
+template <> float ConvertToPointerType<TensorKind::Float>(float value) { return static_cast<float>(value); };
+template <> HALF ConvertToPointerType<TensorKind::Float16>(float value) { return XMConvertFloatToHalf(value); };
+template <> winrt::hstring ConvertToPointerType<TensorKind::String>(winrt::hstring value)
+{
+    return static_cast<winrt::hstring>(value);
+};
+
+ColorManagementMode GetColorManagementMode(const LearningModel& model)
+
+{
+    // Get model color space gamma
+    hstring gammaSpace = L"";
+    try
+    {
+        gammaSpace = model.Metadata().Lookup(L"Image.ColorSpaceGamma");
+    }
+    catch (...)
+    {
+        printf("    Model does not have color space gamma information. Will color manage to sRGB by default...\n");
+    }
+    if (gammaSpace == L"" || _wcsicmp(gammaSpace.c_str(), L"SRGB") == 0)
+    {
+        return ColorManagementMode::ColorManageToSRgb;
+    }
+    // Due diligence should be done to make sure that the input image is within the model's colorspace. There are
+    // multiple non-sRGB color spaces.
+    printf("    Model metadata indicates that color gamma space is : %ws. Will not manage color space to sRGB...\n",
+           gammaSpace.c_str());
+    return ColorManagementMode::DoNotColorManage;
+}
+
+void GetHeightAndWidthFromLearningModelFeatureDescriptor(const ILearningModelFeatureDescriptor& modelFeatureDescriptor,
+                                                         uint64_t& width, uint64_t& height)
+{
+    if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Tensor)
+    {
+        // We assume NCHW
+        auto tensorDescriptor = modelFeatureDescriptor.try_as<TensorFeatureDescriptor>();
+        if (tensorDescriptor.Shape().Size() != 4)
+        {
+            throw hresult_invalid_argument(L"Cannot generate arbitrary image for tensor input of dimensions: " +
+                                           tensorDescriptor.Shape().Size());
+        }
+        height = tensorDescriptor.Shape().GetAt(2);
+        width = tensorDescriptor.Shape().GetAt(3);
+    }
+    else if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Image)
+    {
+        auto imageDescriptor = modelFeatureDescriptor.try_as<IImageFeatureDescriptor>();
+        height = imageDescriptor.Height();
+        width = imageDescriptor.Width();
+    }
+    else
+    {
+        throw hresult_not_implemented(
+            L"Generating arbitrary image not supported for input types that aren't tensor or image.");
+    }
+}
+
+namespace BindingUtilities
+{
+    static unsigned int seed = 0;
+    static std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned int> randomBitsEngineChar;
+
+    SoftwareBitmap GenerateGarbageImage(const ILearningModelFeatureDescriptor& modelFeatureDescriptor,
+                                        InputDataType inputDataType)
+    {
+        assert(inputDataType != InputDataType::Tensor);
+        uint64_t width = 0;
+        uint64_t height = 0;
+        GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height);
+
+        // We have to create RGBA8 or BGRA8 images, so we need 4 channels
+        uint32_t totalByteSize = static_cast<uint32_t>(width) * static_cast<uint32_t>(height) * 4;
+
+        // Generate values for the image based on a seed
+        std::vector<uint8_t> data(totalByteSize);
+        randomBitsEngineChar.seed(seed++);
+        std::generate(data.begin(), data.end(), randomBitsEngineChar);
+
+        // Write the values to a buffer
+        winrt::array_view<const uint8_t> dataView(data);
+        InMemoryRandomAccessStream dataStream;
+        DataWriter dataWriter(dataStream);
+        dataWriter.WriteBytes(dataView);
+        IBuffer buffer = dataWriter.DetachBuffer();
+
+        // Create the software bitmap
+        return SoftwareBitmap::CreateCopyFromBuffer(buffer, TypeHelper::GetBitmapPixelFormat(inputDataType),
+                                                    static_cast<int32_t>(width), static_cast<int32_t>(height));
+    }
+
+    SoftwareBitmap LoadImageFile(const ILearningModelFeatureDescriptor& modelFeatureDescriptor,
+                                 const InputDataType inputDataType, const hstring& filePath,
+                                 const CommandLineArgs& args, uint32_t iterationNum,
+                                 ColorManagementMode colorManagementMode)
+    {
+        // We assume NCHW and NCDHW
+        uint64_t width = 0;
+        uint64_t height = 0;
+        GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height);
+        IRandomAccessStream stream;
+        BitmapDecoder decoder = NULL;
+        try
+        {
+            // open the file
+            StorageFile file = StorageFile::GetFileFromPathAsync(filePath).get();
+            // get a stream on it
+            stream = file.OpenAsync(FileAccessMode::Read).get();
+            // Create the decoder from the stream
+            decoder = BitmapDecoder::CreateAsync(stream).get();
+        }
+        catch (hresult_error hr)
+        {
+            printf("    Failed to load the image file, make sure you are using fully qualified paths\r\n");
+            printf("    %ws\n", hr.message().c_str());
+            exit(hr.code());
+        }
+        BitmapPixelFormat format = inputDataType == InputDataType::Tensor
+                                       ? decoder.BitmapPixelFormat()
+                                       : TypeHelper::GetBitmapPixelFormat(inputDataType);
+        try
+        {
+            // If input dimensions are different from tensor input, then scale / crop while reading
+            if (args.IsAutoScale() && (decoder.PixelHeight() != height || decoder.PixelWidth() != width))
+            {
+                if (!args.TerseOutput() || iterationNum == 0)
+                    std::cout << std::endl
+                              << "Binding Utilities: AutoScaling input image to match model input dimensions...";
+
+                // Create a transform object with default parameters (no transform)
+                auto transform = BitmapTransform();
+                transform.ScaledHeight(static_cast<uint32_t>(height));
+                transform.ScaledWidth(static_cast<uint32_t>(width));
+                transform.InterpolationMode(args.AutoScaleInterpMode());
+
+                // get the bitmap
+                return decoder
+                    .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), transform,
+                                            ExifOrientationMode::RespectExifOrientation, colorManagementMode)
+                    .get();
+            }
+            else
+            {
+                // get the bitmap
+                return decoder
+                    .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), BitmapTransform(),
+                                            ExifOrientationMode::RespectExifOrientation, colorManagementMode)
+                    .get();
+            }
+        }
+        catch (hresult_error hr)
+        {
+            printf("    Failed to create SoftwareBitmap! Please make sure that input image is within the model's "
+                   "colorspace.\n");
+            printf("    %ws\n", hr.message().c_str());
+            exit(hr.code());
+        }
+    }
+
+    VideoFrame CreateVideoFrame(const SoftwareBitmap& softwareBitmap, InputBindingType inputBindingType,
+                                InputDataType inputDataType, const IDirect3DDevice winrtDevice)
+    {
+        VideoFrame inputImage = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap);
+
+        if (inputBindingType == InputBindingType::GPU)
+        {
+            VideoFrame gpuImage =
+                winrtDevice
+                    ? VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType),
+                                                                  softwareBitmap.PixelWidth(),
+                                                                  softwareBitmap.PixelHeight(), winrtDevice)
+                    : VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType),
+                                                                  softwareBitmap.PixelWidth(),
+                                                                  softwareBitmap.PixelHeight());
+
+            inputImage.CopyToAsync(gpuImage).get();
+
+            return gpuImage;
+        }
+
+        return inputImage;
+    }
+
+    struct InputBufferDesc
+    {
+        uint8_t* elements;
+        uint32_t totalSizeInBytes;
+        uint32_t numChannelsPerElement;
+        uint32_t elementStrideInBytes;
+        bool isPlanar;
+        TensorKind channelFormat;
+        BitmapPixelFormat elementFormat;
+
+        InputBufferDesc()
+            : elements(nullptr), totalSizeInBytes(0), numChannelsPerElement(0), elementStrideInBytes(0), isPlanar(0),
+              channelFormat(TensorKind::Undefined), elementFormat(BitmapPixelFormat::Unknown)
+        {
+        }
+    };
+
+    void ReadCSVIntoBuffer(const std::wstring& csvFilePath, InputBufferDesc& inputBufferDesc)
+    {
+        std::ifstream fileStream;
+        fileStream.open(csvFilePath);
+        if (!fileStream.is_open())
+        {
+            ThrowFailure(L"BindingUtilities: could not open data file.");
+        }
+
+        uint32_t pos = 0;
+        std::string line;
+        float_t* pData = (float_t*)inputBufferDesc.elements;
+        while (std::getline(fileStream, line, ','))
+        {
+            *pData = std::stof(line);
+            ++pData;
+
+            ++pos;
+            if (pos >= inputBufferDesc.totalSizeInBytes)
+                break;
+        }
+
+        // Check to see if csv didn't fill in entire buffer and throw or fill with zeros?
+        if (pos != (inputBufferDesc.totalSizeInBytes * inputBufferDesc.numChannelsPerElement) /
+                       inputBufferDesc.elementStrideInBytes)
+        {
+            throw hresult_invalid_argument(L"CSV input size/shape is different from what model expects!");
+        }
+    }
+
+    // Roll the array correctly for the tensor
+    template <TensorKind TKind, typename InputType>
+    void CopyTensorFromBuffer(void* actualData, uint32_t tensorHeight, uint32_t tensorWidth,
+                              const InputBufferDesc& inputBufferDesc, float scale, const std::vector<float>& means,
+                              const std::vector<float>& stddevs)
+    {
+        using WriteType = typename TensorKindToPointerType<TKind>::Type;
+
+        WriteType* pDataOut = static_cast<WriteType*>(actualData);
+        InputType* pDataIn = (InputType*)inputBufferDesc.elements;
+        uint32_t elementOffsetMultiplier = inputBufferDesc.isPlanar ? inputBufferDesc.numChannelsPerElement : 1;
+        uint32_t channelOffsetMultiplier = inputBufferDesc.isPlanar ? 1 : tensorHeight * tensorWidth;
+        for (uint32_t element = 0; element < tensorHeight * tensorWidth; ++element)
+        {
+            for (uint32_t channel = 0; channel < inputBufferDesc.numChannelsPerElement; ++channel)
+            {
+                pDataOut[element * elementOffsetMultiplier + channel * channelOffsetMultiplier] =
+                    ConvertToPointerType<TKind, WriteType>(((pDataIn[channel] / scale) - means[channel]) /
+                                                           stddevs[channel]);
+            }
+            pDataIn += inputBufferDesc.elementStrideInBytes / sizeof(InputType);
+        }
+    }
+
+    template <TensorKind TKind, typename WriteType>
+    static void GenerateRandomData(WriteType* data, uint32_t sizeInBytes, uint32_t maxValue)
+    {
+        static std::independent_bits_engine<std::default_random_engine, sizeof(uint32_t) * 8, uint32_t>
+            randomBitsEngine;
+        randomBitsEngine.seed(seed++);
+
+        WriteType* begin = data;
+        WriteType* end = reinterpret_cast<WriteType*>(reinterpret_cast<BYTE*>(data) + sizeInBytes);
+        while (begin <= end)
+        {
+            *begin = maxValue * static_cast<float>(randomBitsEngine()) / (randomBitsEngine.max)();
+            ++begin;
+        }
+    }
+
+    template <TensorKind TKind>
+    static ITensor CreateTensor(const CommandLineArgs& args, const std::vector<int64_t>& tensorShape,
+                                const InputBindingType inputBindingType, const InputBufferDesc& inputBufferDesc)
+    {
+        using TensorValue = typename TensorKindToValue<TKind>::Type;
+        using WriteType = typename TensorKindToPointerType<TKind>::Type;
+
+        // Map the incoming Tensor as a TensorNative to get the actual data buffer.
+        auto tensorValue = TensorValue::Create(tensorShape);
+
+        com_ptr<ITensorNative> spTensorValueNative;
+        tensorValue.as(spTensorValueNative);
+
+        WriteType* actualData;
+        uint32_t actualSizeInBytes;
+        THROW_IF_FAILED(spTensorValueNative->GetBuffer(reinterpret_cast<BYTE**>(&actualData), &actualSizeInBytes));
+
+        if (args.IsCSVInput() || args.IsImageInput())
+        {
+            // Assumes NCHW
+            uint32_t channels = static_cast<uint32_t>(tensorShape[1]);
+            uint32_t tensorHeight = static_cast<uint32_t>(tensorShape[2]);
+            uint32_t tensorWidth = static_cast<uint32_t>(tensorShape[3]);
+
+            // Check to make sure the sizes are right
+            uint32_t inputElementCount = inputBufferDesc.totalSizeInBytes / inputBufferDesc.elementStrideInBytes;
+            uint32_t outputElementCount = actualSizeInBytes / (channels * sizeof(WriteType));
+            if (inputElementCount != outputElementCount)
+            {
+                throw hresult_invalid_argument(L"Input size / shape is different from what the model expects");
+            }
+
+            float scale;
+            std::vector<float> means = {};
+            std::vector<float> stddevs = {};
+
+            const auto& tensorizeArgs = args.TensorizeArgs();
+            const auto& normalizeParams = tensorizeArgs.Normalize;
+            switch (tensorizeArgs.Func)
+            {
+                case TensorizeFuncs::Identity:
+                    scale = 1.0f;
+                    means.resize(channels, 0.0f);
+                    stddevs.resize(channels, 1.0f);
+                    break;
+                case TensorizeFuncs::Normalize:
+                    switch (inputBufferDesc.elementFormat)
+                    {
+                        case BitmapPixelFormat::Gray8:
+                        case BitmapPixelFormat::Gray16:
+                        case BitmapPixelFormat::Rgba8:
+                        case BitmapPixelFormat::Rgba16:
+                            scale = normalizeParams.Scale;
+                            means.resize(channels);
+                            stddevs.resize(channels);
+                            for (uint32_t i = 0; i < channels; ++i)
+                            {
+                                means[i] = normalizeParams.Means[i];
+                                stddevs[i] = normalizeParams.StdDevs[i];
+                            }
+                            break;
+                        case BitmapPixelFormat::Bgra8:
+                            scale = normalizeParams.Scale;
+                            means.resize(channels);
+                            stddevs.resize(channels);
+                            for (uint32_t i = 0; i < channels; ++i)
+                            {
+                                means[channels - 1 - i] = normalizeParams.Means[i];
+                                stddevs[channels - 1 - i] = normalizeParams.StdDevs[i];
+                            }
+                            break;
+
+                        default:
+                            throw hresult_invalid_argument(
+                                L"CreateTensor<TKind>: Unhandled SoftwareBitmap pixel format");
+                    }
+                    break;
+                default:
+                    throw hresult_invalid_argument(L"CreateTensor<TKind>: Unknown Tensorize Function");
+            }
+
+            switch (inputBufferDesc.channelFormat)
+            {
+                case TensorKind::UInt8:
+                    CopyTensorFromBuffer<TKind, uint8_t>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
+                                                         means, stddevs);
+                    break;
+                case TensorKind::Float:
+                    CopyTensorFromBuffer<TKind, float>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
+                                                       means, stddevs);
+                    break;
+                default:
+                    throw hresult_not_implemented(L"Creating Tensors for Input Images with unhandled channel format!");
+            }
+        }
+        // Garbage Data
+        else if (args.IsGarbageDataRange())
+        {
+            GenerateRandomData<TKind>(actualData, actualSizeInBytes, args.GarbageDataMaxValue());
+        }
+
+        if (inputBindingType == InputBindingType::CPU)
+        {
+            return tensorValue;
+        }
+        else // GPU Tensor
+        {
+            com_ptr<ID3D12Resource> pGPUResource = nullptr;
+            try
+            {
+                // create the d3d device.
+                com_ptr<ID3D12Device> pD3D12Device = nullptr;
+                D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device),
+                                  reinterpret_cast<void**>(&pD3D12Device));
+
+                pD3D12Device->CreateCommittedResource(
+                    &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
+                    &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
+                    D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), pGPUResource.put_void());
+                if (!args.IsGarbageInput())
+                {
+                    com_ptr<ID3D12Resource> imageUploadHeap;
+                    // Create the GPU upload buffer.
+                    pD3D12Device->CreateCommittedResource(
+                        &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
+                        &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
+                        __uuidof(ID3D12Resource), imageUploadHeap.put_void());
+
+                    // create the command queue.
+                    com_ptr<ID3D12CommandQueue> dxQueue = nullptr;
+                    D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
+                    commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
+                    pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue),
+                                                     reinterpret_cast<void**>(&dxQueue));
+                    com_ptr<ILearningModelDeviceFactoryNative> devicefactory =
+                        get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();
+                    com_ptr<::IUnknown> spUnk;
+                    devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());
+
+                    // Create ID3D12GraphicsCommandList and Allocator
+                    D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;
+                    com_ptr<ID3D12CommandAllocator> alloctor;
+                    com_ptr<ID3D12GraphicsCommandList> cmdList;
+                    pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(),
+                                                         alloctor.put_void());
+                    pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr,
+                                                    winrt::guid_of<ID3D12CommandList>(), cmdList.put_void());
+
+                    // Copy from Cpu to GPU
+                    D3D12_SUBRESOURCE_DATA CPUData = {};
+                    CPUData.pData = actualData;
+                    CPUData.RowPitch = actualSizeInBytes;
+                    CPUData.SlicePitch = actualSizeInBytes;
+                    UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);
+
+                    // Close the command list and execute it to begin the initial GPU setup.
+                    cmdList->Close();
+                    ID3D12CommandList* ppCommandLists[] = { cmdList.get() };
+                    dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
+
+                    // Create Event
+                    HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
+
+                    // Create Fence
+                    ::Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;
+                    THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
+                                                              IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf())));
+                    // Adds fence to queue
+                    THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1));
+                    THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent));
+
+                    // Wait for signal
+                    DWORD retVal = WaitForSingleObject(directEvent, INFINITE);
+                    if (retVal != WAIT_OBJECT_0)
+                    {
+                        THROW_IF_FAILED(E_UNEXPECTED);
+                    }
+                }
+            }
+            catch (...)
+            {
+                std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl;
+                throw;
+            }
+            com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorValue, ITensorStaticsNative>();
+            com_ptr<::IUnknown> spUnkTensor;
+            tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), const_cast<int64_t*>(tensorShape.data()),
+                                                   static_cast<int>(tensorShape.size()), spUnkTensor.put());
+            TensorValue returnTensor(nullptr);
+            spUnkTensor.try_as(returnTensor);
+            return returnTensor;
+        }
+    }
+
+    // Process the descriptor to gather and normalize the shape
+    void ProcessDescriptor(const ILearningModelFeatureDescriptor& description, std::vector<int64_t>& shape,
+                           TensorKind& tensorKind, InputBufferDesc& inputBufferDesc)
+    {
+        // Try Image Feature Descriptor
+        auto imageFeatureDescriptor = description.try_as<ImageFeatureDescriptor>();
+        if (imageFeatureDescriptor)
+        {
+            int64_t channels;
+            inputBufferDesc.elementFormat = imageFeatureDescriptor.BitmapPixelFormat();
+            switch (inputBufferDesc.elementFormat)
+            {
+                case BitmapPixelFormat::Gray8:
+                case BitmapPixelFormat::Gray16:
+                    channels = 1;
+                    break;
+                case BitmapPixelFormat::Bgra8:
+                case BitmapPixelFormat::Rgba16:
+                case BitmapPixelFormat::Rgba8:
+                    channels = 3;
+                    break;
+                default:
+                    throw hresult_not_implemented(L"BitmapPixel format not yet handled by WinMLRunner.");
+            }
+
+            tensorKind = TensorKind::Float;
+            shape.push_back(1);
+            shape.push_back(channels);
+            shape.push_back(static_cast<int64_t>(imageFeatureDescriptor.Height()));
+            shape.push_back(static_cast<int64_t>(imageFeatureDescriptor.Width()));
+            return;
+        }
+
+        auto tensorDescriptor = description.try_as<TensorFeatureDescriptor>();
+        if (tensorDescriptor)
+        {
+            IVectorView<int64_t> tensorShape = tensorDescriptor.Shape();
+            for (uint32_t dim = 0; dim < tensorShape.Size(); dim++)
+            {
+                int64_t dimSize = tensorShape.GetAt(dim);
+                if (dimSize > 0) // If the dimension is greater than 0, then it is known.
+                {
+                    shape.push_back(dimSize);
+                }
+                else // otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an
+                     // invalid model.
+                {
+                    if (dimSize == -1)
+                    {
+                        shape.push_back(1);
+                    }
+                    else
+                    {
+                        throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " +
+                                                       dimSize);
+                    }
+                }
+            }
+
+            tensorKind = tensorDescriptor.TensorKind();
+            return;
+        }
+
+        throw hresult_invalid_argument(L"ProcessDescriptor: Unknown desription type!");
+    } // namespace BindingUtilities
+
+    // Binds tensor floats, ints, doubles from CSV data.
+    ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const std::wstring& imagePath,
+                                 const InputBindingType inputBindingType, const InputDataType inputDataType,
+                                 const CommandLineArgs& args, uint32_t iterationNum,
+                                 ColorManagementMode colorManagementMode)
+    {
+        InputBufferDesc inputBufferDesc = {};
+
+        std::vector<int64_t> shape = {};
+        TensorKind tensorKind = TensorKind::Undefined;
+        ProcessDescriptor(description, shape, tensorKind, inputBufferDesc);
+
+        SoftwareBitmap softwareBitmap(nullptr);
+        if (args.IsCSVInput())
+        {
+            inputBufferDesc.channelFormat = TensorKind::Float;
+            inputBufferDesc.isPlanar = true;
+
+            // Assumes shape is in the format of 'NCHW'
+            inputBufferDesc.numChannelsPerElement = static_cast<uint32_t>(shape[1]);
+
+            // Assumes no gaps in the input csv file
+            inputBufferDesc.elementStrideInBytes = inputBufferDesc.numChannelsPerElement * sizeof(float_t);
+
+            inputBufferDesc.totalSizeInBytes = sizeof(float_t);
+            for (uint32_t i = 0; i < shape.size(); ++i)
+                inputBufferDesc.totalSizeInBytes *= static_cast<uint32_t>(shape[i]);
+
+            inputBufferDesc.elements = new uint8_t[inputBufferDesc.totalSizeInBytes];
+
+            ReadCSVIntoBuffer(args.CsvPath(), inputBufferDesc);
+        }
+        else if (args.IsImageInput())
+        {
+            softwareBitmap =
+                LoadImageFile(description, inputDataType, imagePath.c_str(), args, iterationNum, colorManagementMode);
+
+            // Get Pointers to the SoftwareBitmap data buffers
+            const BitmapBuffer sbBitmapBuffer(softwareBitmap.LockBuffer(BitmapBufferAccessMode::Read));
+            winrt::Windows::Foundation::IMemoryBufferReference sbReference = sbBitmapBuffer.CreateReference();
+            auto sbByteAccess = sbReference.as<::Windows::Foundation::IMemoryBufferByteAccess>();
+            winrt::check_hresult(sbByteAccess->GetBuffer(&inputBufferDesc.elements, &inputBufferDesc.totalSizeInBytes));
+
+            inputBufferDesc.isPlanar = false;
+            inputBufferDesc.elementFormat = softwareBitmap.BitmapPixelFormat();
+            switch (inputBufferDesc.elementFormat)
+            {
+                case BitmapPixelFormat::Gray8:
+                    inputBufferDesc.channelFormat = TensorKind::UInt8;
+                    inputBufferDesc.numChannelsPerElement = 1;
+                    inputBufferDesc.elementStrideInBytes = sizeof(uint8_t);
+                    break;
+                case BitmapPixelFormat::Gray16:
+                    inputBufferDesc.channelFormat = TensorKind::UInt16;
+                    inputBufferDesc.numChannelsPerElement = 1;
+                    inputBufferDesc.elementStrideInBytes = sizeof(uint16_t);
+                    break;
+                case BitmapPixelFormat::Bgra8:
+                    inputBufferDesc.channelFormat = TensorKind::UInt8;
+                    inputBufferDesc.numChannelsPerElement = 3;
+                    inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t);
+                    break;
+                case BitmapPixelFormat::Rgba8:
+                    inputBufferDesc.channelFormat = TensorKind::UInt8;
+                    inputBufferDesc.numChannelsPerElement = 3;
+                    inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t);
+                    break;
+                case BitmapPixelFormat::Rgba16:
+                    inputBufferDesc.channelFormat = TensorKind::UInt16;
+                    inputBufferDesc.numChannelsPerElement = 3;
+                    inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint16_t);
+                    break;
+                default:
+                    throw hresult_invalid_argument(L"Unknown BitmapPixelFormat in input image.");
+            }
+        }
+
+        switch (tensorKind)
+        {
+            case TensorKind::Undefined:
+            {
+                std::cout << "BindingUtilities: TensorKind is undefined." << std::endl;
+                throw hresult_invalid_argument();
+            }
+            case TensorKind::Float:
+            {
+                return CreateTensor<TensorKind::Float>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::Float16:
+            {
+                return CreateTensor<TensorKind::Float16>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::Double:
+            {
+                return CreateTensor<TensorKind::Double>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::Int8:
+            {
+                return CreateTensor<TensorKind::Int8>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::UInt8:
+            {
+                return CreateTensor<TensorKind::UInt8>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::Int16:
+            {
+                return CreateTensor<TensorKind::Int16>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::UInt16:
+            {
+                return CreateTensor<TensorKind::UInt16>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::Int32:
+            {
+                return CreateTensor<TensorKind::Int32>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::UInt32:
+            {
+                return CreateTensor<TensorKind::UInt32>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::Int64:
+            {
+                return CreateTensor<TensorKind::Int64>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+            case TensorKind::UInt64:
+            {
+                return CreateTensor<TensorKind::UInt64>(args, shape, inputBindingType, inputBufferDesc);
+            }
+            break;
+        }
+        std::cout << "BindingUtilities: TensorKind has not been implemented." << std::endl;
+        throw hresult_not_implemented();
+    }
+
+    ImageFeatureValue CreateBindableImage(const ILearningModelFeatureDescriptor& featureDescriptor,
+                                          const std::wstring& imagePath, InputBindingType inputBindingType,
+                                          InputDataType inputDataType, const IDirect3DDevice winrtDevice,
+                                          const CommandLineArgs& args, uint32_t iterationNum,
+                                          ColorManagementMode colorManagementMode)
+    {
+        auto softwareBitmap = imagePath.empty() ? GenerateGarbageImage(featureDescriptor, inputDataType)
+                                                : LoadImageFile(featureDescriptor, inputDataType, imagePath.c_str(),
+                                                                args, iterationNum, colorManagementMode);
+        auto videoFrame = CreateVideoFrame(softwareBitmap, inputBindingType, inputDataType, winrtDevice);
+        return ImageFeatureValue::CreateFromVideoFrame(videoFrame);
+    }
+
+    template <typename K, typename V>
+    void OutputSequenceBinding(IMapView<hstring, winrt::Windows::Foundation::IInspectable> results, hstring name)
+    {
+        auto map = results.Lookup(name).as<IVectorView<IMap<K, V>>>().GetAt(0);
+        auto iter = map.First();
+
+        K maxKey = -1;
+        V maxVal = -1;
+
+        while (iter.HasCurrent())
+        {
+            auto pair = iter.Current();
+            if (pair.Value() > maxKey)
+            {
+                maxVal = pair.Value();
+                maxKey = pair.Key();
+            }
+            iter.MoveNext();
+        }
+        std::cout << " " << maxKey << " " << maxVal << std::endl;
+    }
+
+    void PrintOrSaveEvaluationResults(const LearningModel& model, const CommandLineArgs& args,
+                                      const IMapView<hstring, winrt::Windows::Foundation::IInspectable>& results,
+                                      OutputHelper& output, int iterationNum)
+    {
+        for (auto&& desc : model.OutputFeatures())
+        {
+            if (desc.Kind() == LearningModelFeatureKind::Tensor)
+            {
+                std::wstring name(desc.Name());
+                if (args.IsSaveTensor() && args.SaveTensorMode() == L"First" && iterationNum > 0)
+                {
+                    return;
+                }
+                if (args.IsSaveTensor())
+                {
+                    output.SetDefaultCSVIterationResult(iterationNum, args, name);
+                }
+                void* tensor;
+                uint32_t uCapacity;
+                com_ptr<ITensorNative> itn = results.Lookup(desc.Name()).as<ITensorNative>();
+                HRESULT(itn->GetBuffer(reinterpret_cast<BYTE**>(&tensor), &uCapacity));
+                int size = 0;
+                unsigned int topK = args.TopK();
+                std::vector<std::pair<float, int>> maxKValues;
+                std::ofstream fout;
+                if (args.IsSaveTensor())
+                {
+                    fout.open(output.GetCsvFileNamePerIterationResult(), std::ios_base::app);
+                    fout << "Index"
+                         << ","
+                         << "Value" << std::endl;
+                }
+                TensorFeatureDescriptor tensorDescriptor = desc.as<TensorFeatureDescriptor>();
+                TensorKind tensorKind = tensorDescriptor.TensorKind();
+                switch (tensorKind)
+                {
+                    case TensorKind::String:
+                    {
+                        if (!args.IsGarbageInput())
+                        {
+                            auto resultVector = results.Lookup(desc.Name()).as<TensorString>().GetAsVectorView();
+                            auto output = resultVector.GetAt(0).data();
+                            std::wcout << " Result: " << output << std::endl;
+                        }
+                    }
+                    break;
+                    case TensorKind::Float16:
+                    {
+                        output.ProcessTensorResult<HALF>(args, tensor, uCapacity, maxKValues, fout, topK);
+                    }
+                    break;
+                    case TensorKind::Float:
+                    {
+                        output.ProcessTensorResult<float>(args, tensor, uCapacity, maxKValues, fout, topK);
+                    }
+                    break;
+                    case TensorKind::Int64:
+                    {
+                        auto resultVector = results.Lookup(desc.Name()).as<TensorInt64Bit>().GetAsVectorView();
+                        if (!args.IsGarbageInput())
+                        {
+                            auto output = resultVector.GetAt(0);
+                            std::wcout << " Result: " << output << std::endl;
+                        }
+                    }
+                    break;
+                    default:
+                    {
+                        std::cout << "BindingUtilities: output type not implemented.";
+                    }
+                    break;
+                }
+                if (args.IsSaveTensor())
+                {
+                    fout.close();
+                    for (auto& pair : maxKValues)
+                    {
+                        auto maxValue = pair.first;
+                        auto maxIndex = pair.second;
+                        std::string iterationResult =
+                            "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue);
+                        output.SaveResult(iterationNum, iterationResult,
+                                          static_cast<int>(hash_data(tensor, uCapacity)));
+                    }
+                }
+                if (!args.IsGarbageInput() && iterationNum == 0)
+                {
+                    std::wcout << L"Outputting top " << args.TopK() << L" values" << std::endl;
+                    std::wcout << L"Feature Name: " << name << std::endl;
+                    for (auto& pair : maxKValues)
+                    {
+                        auto maxValue = pair.first;
+                        auto maxIndex = pair.second;
+                        std::wcout << L" index: " << maxIndex << L", value: " << maxValue << std::endl;
+                    }
+                }
+            }
+            else if (desc.Kind() == LearningModelFeatureKind::Sequence)
+            {
+                auto seqDescriptor = desc.as<SequenceFeatureDescriptor>();
+                auto mapDescriptor = seqDescriptor.ElementDescriptor().as<MapFeatureDescriptor>();
+                auto keyKind = mapDescriptor.KeyKind();
+                auto valueKind = mapDescriptor.ValueDescriptor();
+                auto tensorKind = valueKind.as<TensorFeatureDescriptor>().TensorKind();
+                switch (keyKind)
+                {
+                    case TensorKind::Int64:
+                    {
+                        OutputSequenceBinding<int64_t, float>(results, desc.Name());
+                    }
+                    break;
+                    case TensorKind::Float:
+                    {
+                        OutputSequenceBinding<float, float>(results, desc.Name());
+                    }
+                    break;
+                }
+            }
+        }
+    }
+}; // namespace BindingUtilities
diff --git a/Tools/WinMLRunner/src/BindingUtilities.h b/Tools/WinMLRunner/src/BindingUtilities.h
index cb6fae76..934af937 100644
--- a/Tools/WinMLRunner/src/BindingUtilities.h
+++ b/Tools/WinMLRunner/src/BindingUtilities.h
@@ -1,975 +1,25 @@
 #pragma once
-#include <random>
-#include <time.h>
-#include "Common.h"
-#include "Windows.AI.Machinelearning.Native.h"
-#include "d3dx12.h"
-#include "MemoryBuffer.h"
-using namespace winrt::Windows::Media;
-using namespace winrt::Windows::Storage;
-using namespace winrt::Windows::Storage::Streams;
-using namespace winrt::Windows::AI::MachineLearning;
-using namespace winrt::Windows::Foundation::Collections;
-using namespace winrt::Windows::Graphics::DirectX;
-using namespace winrt::Windows::Graphics::Imaging;
-using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
-using namespace DirectX::PackedVector;
-
-template <TensorKind T> struct TensorKindToPointerType
-{
-    static_assert(true, "No TensorKind mapped for given type!");
-};
-template <> struct TensorKindToPointerType<TensorKind::UInt8>
-{
-    typedef uint8_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Int8>
-{
-    typedef int8_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::UInt16>
-{
-    typedef uint16_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Int16>
-{
-    typedef int16_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::UInt32>
-{
-    typedef uint32_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Int32>
-{
-    typedef int32_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::UInt64>
-{
-    typedef uint64_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Int64>
-{
-    typedef int64_t Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Boolean>
-{
-    typedef boolean Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Double>
-{
-    typedef double Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Float>
-{
-    typedef float Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::Float16>
-{
-    typedef HALF Type;
-};
-template <> struct TensorKindToPointerType<TensorKind::String>
-{
-    typedef winrt::hstring Type;
-};
-
-template <TensorKind T> struct TensorKindToValue
-{
-    static_assert(true, "No TensorKind mapped for given type!");
-};
-template <> struct TensorKindToValue<TensorKind::UInt8>
-{
-    typedef TensorUInt8Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::Int8>
-{
-    typedef TensorInt8Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::UInt16>
-{
-    typedef TensorUInt16Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::Int16>
-{
-    typedef TensorInt16Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::UInt32>
-{
-    typedef TensorUInt32Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::Int32>
-{
-    typedef TensorInt32Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::UInt64>
-{
-    typedef TensorUInt64Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::Int64>
-{
-    typedef TensorInt64Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::Boolean>
-{
-    typedef TensorBoolean Type;
-};
-template <> struct TensorKindToValue<TensorKind::Double>
-{
-    typedef TensorDouble Type;
-};
-template <> struct TensorKindToValue<TensorKind::Float>
-{
-    typedef TensorFloat Type;
-};
-template <> struct TensorKindToValue<TensorKind::Float16>
-{
-    typedef TensorFloat16Bit Type;
-};
-template <> struct TensorKindToValue<TensorKind::String>
-{
-    typedef TensorString Type;
-};
-
-template <TensorKind T, typename ToType, typename FromType> ToType ConvertToPointerType(FromType value)
-{
-    static_assert(true, "No TensorKind mapped for given type!");
-    return 0;
-};
-template <> uint8_t ConvertToPointerType<TensorKind::UInt8>(float value) { return static_cast<uint8_t>(value); };
-template <> int8_t ConvertToPointerType<TensorKind::Int8>(float value) { return static_cast<int8_t>(value); };
-template <> uint16_t ConvertToPointerType<TensorKind::UInt16>(float value) { return static_cast<uint16_t>(value); };
-template <> int16_t ConvertToPointerType<TensorKind::Int16>(float value) { return static_cast<int16_t>(value); };
-template <> uint32_t ConvertToPointerType<TensorKind::UInt32>(float value) { return static_cast<uint32_t>(value); };
-template <> int32_t ConvertToPointerType<TensorKind::Int32>(float value) { return static_cast<int32_t>(value); };
-template <> uint64_t ConvertToPointerType<TensorKind::UInt64>(float value) { return static_cast<uint64_t>(value); };
-template <> int64_t ConvertToPointerType<TensorKind::Int64>(float value) { return static_cast<int64_t>(value); };
-template <> boolean ConvertToPointerType<TensorKind::Boolean>(float value) { return static_cast<boolean>(value); };
-template <> double ConvertToPointerType<TensorKind::Double>(double value) { return static_cast<double>(value); };
-template <> float ConvertToPointerType<TensorKind::Float>(float value) { return static_cast<float>(value); };
-template <> HALF ConvertToPointerType<TensorKind::Float16>(float value) { return XMConvertFloatToHalf(value); };
-template <> winrt::hstring ConvertToPointerType<TensorKind::String>(winrt::hstring value)
-{
-    return static_cast<winrt::hstring>(value);
-};
-
-static ColorManagementMode GetColorManagementMode(const LearningModel& model)
-
-{
-    // Get model color space gamma
-    hstring gammaSpace = L"";
-    try
-    {
-        gammaSpace = model.Metadata().Lookup(L"Image.ColorSpaceGamma");
-    }
-    catch (...)
-    {
-        printf("    Model does not have color space gamma information. Will color manage to sRGB by default...\n");
-    }
-    if (gammaSpace == L"" || _wcsicmp(gammaSpace.c_str(), L"SRGB") == 0)
-    {
-        return ColorManagementMode::ColorManageToSRgb;
-    }
-    // Due diligence should be done to make sure that the input image is within the model's colorspace. There are
-    // multiple non-sRGB color spaces.
-    printf("    Model metadata indicates that color gamma space is : %ws. Will not manage color space to sRGB...\n",
-           gammaSpace.c_str());
-    return ColorManagementMode::DoNotColorManage;
-}
+#include <Windows.Graphics.DirectX.Direct3D11.interop.h>
 
+ColorManagementMode GetColorManagementMode(const LearningModel& model);
 void GetHeightAndWidthFromLearningModelFeatureDescriptor(const ILearningModelFeatureDescriptor& modelFeatureDescriptor,
-                                                         uint64_t& width, uint64_t& height)
-{
-    if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Tensor)
-    {
-        // We assume NCHW
-        auto tensorDescriptor = modelFeatureDescriptor.try_as<TensorFeatureDescriptor>();
-        if (tensorDescriptor.Shape().Size() != 4)
-        {
-            throw hresult_invalid_argument(L"Cannot generate arbitrary image for tensor input of dimensions: " +
-                                           tensorDescriptor.Shape().Size());
-        }
-        height = tensorDescriptor.Shape().GetAt(2);
-        width = tensorDescriptor.Shape().GetAt(3);
-    }
-    else if (modelFeatureDescriptor.Kind() == LearningModelFeatureKind::Image)
-    {
-        auto imageDescriptor = modelFeatureDescriptor.try_as<IImageFeatureDescriptor>();
-        height = imageDescriptor.Height();
-        width = imageDescriptor.Width();
-    }
-    else
-    {
-        throw hresult_not_implemented(
-            L"Generating arbitrary image not supported for input types that aren't tensor or image.");
-    }
-}
+                                                         uint64_t& width, uint64_t& height);
 
 namespace BindingUtilities
 {
-    static unsigned int seed = 0;
-    static std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned int> randomBitsEngineChar;
-
-    SoftwareBitmap GenerateGarbageImage(const ILearningModelFeatureDescriptor& modelFeatureDescriptor,
-                                        InputDataType inputDataType)
-    {
-        assert(inputDataType != InputDataType::Tensor);
-        uint64_t width = 0;
-        uint64_t height = 0;
-        GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height);
-
-        // We have to create RGBA8 or BGRA8 images, so we need 4 channels
-        uint32_t totalByteSize = static_cast<uint32_t>(width) * static_cast<uint32_t>(height) * 4;
-
-        // Generate values for the image based on a seed
-        std::vector<uint8_t> data(totalByteSize);
-        randomBitsEngineChar.seed(seed++);
-        std::generate(data.begin(), data.end(), randomBitsEngineChar);
-
-        // Write the values to a buffer
-        winrt::array_view<const uint8_t> dataView(data);
-        InMemoryRandomAccessStream dataStream;
-        DataWriter dataWriter(dataStream);
-        dataWriter.WriteBytes(dataView);
-        IBuffer buffer = dataWriter.DetachBuffer();
-
-        // Create the software bitmap
-        return SoftwareBitmap::CreateCopyFromBuffer(buffer, TypeHelper::GetBitmapPixelFormat(inputDataType),
-                                                    static_cast<int32_t>(width), static_cast<int32_t>(height));
-    }
-
-    SoftwareBitmap LoadImageFile(const ILearningModelFeatureDescriptor& modelFeatureDescriptor,
-                                 const InputDataType inputDataType, const hstring& filePath,
-                                 const CommandLineArgs& args, uint32_t iterationNum,
-                                 ColorManagementMode colorManagementMode)
-    {
-        // We assume NCHW and NCDHW
-        uint64_t width = 0;
-        uint64_t height = 0;
-        GetHeightAndWidthFromLearningModelFeatureDescriptor(modelFeatureDescriptor, width, height);
-        IRandomAccessStream stream;
-        BitmapDecoder decoder = NULL;
-        try
-        {
-            // open the file
-            StorageFile file = StorageFile::GetFileFromPathAsync(filePath).get();
-            // get a stream on it
-            stream = file.OpenAsync(FileAccessMode::Read).get();
-            // Create the decoder from the stream
-            decoder = BitmapDecoder::CreateAsync(stream).get();
-        }
-        catch (hresult_error hr)
-        {
-            printf("    Failed to load the image file, make sure you are using fully qualified paths\r\n");
-            printf("    %ws\n", hr.message().c_str());
-            exit(hr.code());
-        }
-        BitmapPixelFormat format = inputDataType == InputDataType::Tensor
-                                       ? decoder.BitmapPixelFormat()
-                                       : TypeHelper::GetBitmapPixelFormat(inputDataType);
-        try
-        {
-            // If input dimensions are different from tensor input, then scale / crop while reading
-            if (args.IsAutoScale() && (decoder.PixelHeight() != height || decoder.PixelWidth() != width))
-            {
-                if (!args.TerseOutput() || iterationNum == 0)
-                    std::cout << std::endl
-                              << "Binding Utilities: AutoScaling input image to match model input dimensions...";
-
-                // Create a transform object with default parameters (no transform)
-                auto transform = BitmapTransform();
-                transform.ScaledHeight(static_cast<uint32_t>(height));
-                transform.ScaledWidth(static_cast<uint32_t>(width));
-                transform.InterpolationMode(args.AutoScaleInterpMode());
-
-                // get the bitmap
-                return decoder
-                    .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), transform,
-                                            ExifOrientationMode::RespectExifOrientation, colorManagementMode).get();
-            }
-            else
-            {
-                // get the bitmap
-                return decoder
-                    .GetSoftwareBitmapAsync(format, decoder.BitmapAlphaMode(), BitmapTransform(),
-                                            ExifOrientationMode::RespectExifOrientation, colorManagementMode).get();
-            }
-        }
-        catch (hresult_error hr)
-        {
-            printf("    Failed to create SoftwareBitmap! Please make sure that input image is within the model's "
-                   "colorspace.\n");
-            printf("    %ws\n", hr.message().c_str());
-            exit(hr.code());
-        }
-    }
-
-    VideoFrame CreateVideoFrame(const SoftwareBitmap& softwareBitmap, InputBindingType inputBindingType,
-                                InputDataType inputDataType, const IDirect3DDevice winrtDevice)
-    {
-        VideoFrame inputImage = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap);
-
-        if (inputBindingType == InputBindingType::GPU)
-        {
-            VideoFrame gpuImage =
-                winrtDevice
-                    ? VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType),
-                                                                  softwareBitmap.PixelWidth(),
-                                                                  softwareBitmap.PixelHeight(), winrtDevice)
-                    : VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType),
-                                                                  softwareBitmap.PixelWidth(),
-                                                                  softwareBitmap.PixelHeight());
-
-            inputImage.CopyToAsync(gpuImage).get();
-
-            return gpuImage;
-        }
-
-        return inputImage;
-    }
-
-    struct InputBufferDesc
-    {
-        uint8_t* elements;
-        uint32_t totalSizeInBytes;
-        uint32_t numChannelsPerElement;
-        uint32_t elementStrideInBytes;
-        bool isPlanar;
-        TensorKind channelFormat;
-        BitmapPixelFormat elementFormat;
-
-        InputBufferDesc()
-            : elements(nullptr), totalSizeInBytes(0), numChannelsPerElement(0), elementStrideInBytes(0), isPlanar(0),
-              channelFormat(TensorKind::Undefined), elementFormat(BitmapPixelFormat::Unknown)
-        {
-        }
-    };
-
-    void ReadCSVIntoBuffer(const std::wstring& csvFilePath, InputBufferDesc& inputBufferDesc)
-    {
-        std::ifstream fileStream;
-        fileStream.open(csvFilePath);
-        if (!fileStream.is_open())
-        {
-            ThrowFailure(L"BindingUtilities: could not open data file.");
-        }
-
-        uint32_t pos = 0;
-        std::string line;
-        float_t* pData = (float_t*)inputBufferDesc.elements;
-        while (std::getline(fileStream, line, ','))
-        {
-            *pData = std::stof(line);
-            ++pData;
-
-            ++pos;
-            if (pos >= inputBufferDesc.totalSizeInBytes)
-                break;
-        }
-
-        // Check to see if csv didn't fill in entire buffer and throw or fill with zeros?
-        if (pos != (inputBufferDesc.totalSizeInBytes * inputBufferDesc.numChannelsPerElement) /
-                       inputBufferDesc.elementStrideInBytes)
-        {
-            throw hresult_invalid_argument(L"CSV input size/shape is different from what model expects!");
-        }
-    }
-
-    // Roll the array correctly for the tensor
-    template <TensorKind TKind, typename InputType>
-    void CopyTensorFromBuffer(void* actualData, uint32_t tensorHeight, uint32_t tensorWidth,
-                              const InputBufferDesc& inputBufferDesc, float scale, const std::vector<float>& means,
-                              const std::vector<float>& stddevs)
-    {
-        using WriteType = typename TensorKindToPointerType<TKind>::Type;
-
-        WriteType* pDataOut = static_cast<WriteType*>(actualData);
-        InputType* pDataIn = (InputType*)inputBufferDesc.elements;
-        uint32_t elementOffsetMultiplier = inputBufferDesc.isPlanar ? inputBufferDesc.numChannelsPerElement : 1;
-        uint32_t channelOffsetMultiplier = inputBufferDesc.isPlanar ? 1 : tensorHeight * tensorWidth;
-        for (uint32_t element = 0; element < tensorHeight * tensorWidth; ++element)
-        {
-            for (uint32_t channel = 0; channel < inputBufferDesc.numChannelsPerElement; ++channel)
-            {
-                pDataOut[element * elementOffsetMultiplier + channel * channelOffsetMultiplier] =
-                    ConvertToPointerType<TKind, WriteType>(((pDataIn[channel] / scale) - means[channel]) /
-                                                           stddevs[channel]);
-            }
-            pDataIn += inputBufferDesc.elementStrideInBytes / sizeof(InputType);
-        }
-    }
-
-    template <TensorKind TKind, typename WriteType>
-    static void GenerateRandomData(WriteType* data, uint32_t sizeInBytes, uint32_t maxValue)
-    {
-        static std::independent_bits_engine<std::default_random_engine, sizeof(uint32_t) * 8, uint32_t>
-            randomBitsEngine;
-        randomBitsEngine.seed(seed++);
-
-        WriteType* begin = data;
-        WriteType* end = reinterpret_cast<WriteType*>(reinterpret_cast<BYTE*>(data) + sizeInBytes);
-        while (begin <= end)
-        {
-            *begin = maxValue * static_cast<float>(randomBitsEngine()) / (randomBitsEngine.max)();
-            ++begin;
-        }
-    }
-
-    template <TensorKind TKind>
-    static ITensor CreateTensor(const CommandLineArgs& args, const std::vector<int64_t>& tensorShape,
-                                const InputBindingType inputBindingType, const InputBufferDesc& inputBufferDesc)
-    {
-        using TensorValue = typename TensorKindToValue<TKind>::Type;
-        using WriteType = typename TensorKindToPointerType<TKind>::Type;
-
-        // Map the incoming Tensor as a TensorNative to get the actual data buffer.
-        auto tensorValue = TensorValue::Create(tensorShape);
-
-        com_ptr<ITensorNative> spTensorValueNative;
-        tensorValue.as(spTensorValueNative);
-
-        WriteType* actualData;
-        uint32_t actualSizeInBytes;
-        THROW_IF_FAILED(spTensorValueNative->GetBuffer(reinterpret_cast<BYTE**>(&actualData), &actualSizeInBytes));
-
-        if (args.IsCSVInput() || args.IsImageInput())
-        {
-            // Assumes NCHW
-            uint32_t channels = static_cast<uint32_t>(tensorShape[1]);
-            uint32_t tensorHeight = static_cast<uint32_t>(tensorShape[2]);
-            uint32_t tensorWidth = static_cast<uint32_t>(tensorShape[3]);
-
-            // Check to make sure the sizes are right
-            uint32_t inputElementCount = inputBufferDesc.totalSizeInBytes / inputBufferDesc.elementStrideInBytes;
-            uint32_t outputElementCount = actualSizeInBytes / (channels * sizeof(WriteType));
-            if (inputElementCount != outputElementCount)
-            {
-                throw hresult_invalid_argument(L"Input size / shape is different from what the model expects");
-            }
-
-            float scale;
-            std::vector<float> means = {};
-            std::vector<float> stddevs = {};
-
-            const auto& tensorizeArgs = args.TensorizeArgs();
-            const auto& normalizeParams = tensorizeArgs.Normalize;
-            switch (tensorizeArgs.Func)
-            {
-                case TensorizeFuncs::Identity:
-                    scale = 1.0f;
-                    means.resize(channels, 0.0f);
-                    stddevs.resize(channels, 1.0f);
-                    break;
-                case TensorizeFuncs::Normalize:
-                    switch (inputBufferDesc.elementFormat)
-                    {
-                        case BitmapPixelFormat::Gray8:
-                        case BitmapPixelFormat::Gray16:
-                        case BitmapPixelFormat::Rgba8:
-                        case BitmapPixelFormat::Rgba16:
-                            scale = normalizeParams.Scale;
-                            means.resize(channels);
-                            stddevs.resize(channels);
-                            for (uint32_t i = 0; i < channels; ++i)
-                            {
-                                means[i] = normalizeParams.Means[i];
-                                stddevs[i] = normalizeParams.StdDevs[i];
-                            }
-                            break;
-                        case BitmapPixelFormat::Bgra8:
-                            scale = normalizeParams.Scale;
-                            means.resize(channels);
-                            stddevs.resize(channels);
-                            for (uint32_t i = 0; i < channels; ++i)
-                            {
-                                means[channels - 1 - i] = normalizeParams.Means[i];
-                                stddevs[channels - 1 - i] = normalizeParams.StdDevs[i];
-                            }
-                            break;
-
-                        default:
-                            throw hresult_invalid_argument(
-                                L"CreateTensor<TKind>: Unhandled SoftwareBitmap pixel format");
-                    }
-                    break;
-                default:
-                    throw hresult_invalid_argument(L"CreateTensor<TKind>: Unknown Tensorize Function");
-            }
-
-            switch (inputBufferDesc.channelFormat)
-            {
-                case TensorKind::UInt8:
-                    CopyTensorFromBuffer<TKind, uint8_t>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
-                                                         means, stddevs);
-                    break;
-                case TensorKind::Float:
-                    CopyTensorFromBuffer<TKind, float>(actualData, tensorHeight, tensorWidth, inputBufferDesc, scale,
-                                                       means, stddevs);
-                    break;
-                default:
-                    throw hresult_not_implemented(L"Creating Tensors for Input Images with unhandled channel format!");
-            }
-        }
-        // Garbage Data
-        else if (args.IsGarbageDataRange())
-        {
-            GenerateRandomData<TKind>(actualData, actualSizeInBytes, args.GarbageDataMaxValue());
-        }
-
-        if (inputBindingType == InputBindingType::CPU)
-        {
-            return tensorValue;
-        }
-        else // GPU Tensor
-        {
-            com_ptr<ID3D12Resource> pGPUResource = nullptr;
-            try
-            {
-                // create the d3d device.
-                com_ptr<ID3D12Device> pD3D12Device = nullptr;
-                D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device),
-                                  reinterpret_cast<void**>(&pD3D12Device));
-
-                pD3D12Device->CreateCommittedResource(
-                    &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), D3D12_HEAP_FLAG_NONE,
-                    &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
-                    D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), pGPUResource.put_void());
-                if (!args.IsGarbageInput())
-                {
-                    com_ptr<ID3D12Resource> imageUploadHeap;
-                    // Create the GPU upload buffer.
-                    pD3D12Device->CreateCommittedResource(
-                        &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
-                        &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
-                        __uuidof(ID3D12Resource), imageUploadHeap.put_void());
-
-                    // create the command queue.
-                    com_ptr<ID3D12CommandQueue> dxQueue = nullptr;
-                    D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
-                    commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
-                    pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue),
-                                                     reinterpret_cast<void**>(&dxQueue));
-                    com_ptr<ILearningModelDeviceFactoryNative> devicefactory =
-                        get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();
-                    com_ptr<::IUnknown> spUnk;
-                    devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());
-
-                    // Create ID3D12GraphicsCommandList and Allocator
-                    D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;
-                    com_ptr<ID3D12CommandAllocator> alloctor;
-                    com_ptr<ID3D12GraphicsCommandList> cmdList;
-                    pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(),
-                                                         alloctor.put_void());
-                    pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr,
-                                                    winrt::guid_of<ID3D12CommandList>(), cmdList.put_void());
-
-                    // Copy from Cpu to GPU
-                    D3D12_SUBRESOURCE_DATA CPUData = {};
-                    CPUData.pData = actualData;
-                    CPUData.RowPitch = actualSizeInBytes;
-                    CPUData.SlicePitch = actualSizeInBytes;
-                    UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);
-
-                    // Close the command list and execute it to begin the initial GPU setup.
-                    cmdList->Close();
-                    ID3D12CommandList* ppCommandLists[] = { cmdList.get() };
-                    dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
-
-                    // Create Event
-                    HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
-
-                    // Create Fence
-                    Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;
-                    THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
-                                                              IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf())));
-                    // Adds fence to queue
-                    THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1));
-                    THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent));
-
-                    // Wait for signal
-                    DWORD retVal = WaitForSingleObject(directEvent, INFINITE);
-                    if (retVal != WAIT_OBJECT_0)
-                    {
-                        THROW_IF_FAILED(E_UNEXPECTED);
-                    }
-                }
-            }
-            catch (...)
-            {
-                std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl;
-                throw;
-            }
-            com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorValue, ITensorStaticsNative>();
-            com_ptr<::IUnknown> spUnkTensor;
-            tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), const_cast<int64_t*>(tensorShape.data()),
-                                                   static_cast<int>(tensorShape.size()), spUnkTensor.put());
-            TensorValue returnTensor(nullptr);
-            spUnkTensor.try_as(returnTensor);
-            return returnTensor;
-        }
-    }
-
-    // Process the descriptor to gather and normalize the shape
-    void ProcessDescriptor(const ILearningModelFeatureDescriptor& description, std::vector<int64_t>& shape,
-                           TensorKind& tensorKind, InputBufferDesc& inputBufferDesc)
-    {
-        // Try Image Feature Descriptor
-        auto imageFeatureDescriptor = description.try_as<ImageFeatureDescriptor>();
-        if (imageFeatureDescriptor)
-        {
-            int64_t channels;
-            inputBufferDesc.elementFormat = imageFeatureDescriptor.BitmapPixelFormat();
-            switch (inputBufferDesc.elementFormat)
-            {
-                case BitmapPixelFormat::Gray8:
-                case BitmapPixelFormat::Gray16:
-                    channels = 1;
-                    break;
-                case BitmapPixelFormat::Bgra8:
-                case BitmapPixelFormat::Rgba16:
-                case BitmapPixelFormat::Rgba8:
-                    channels = 3;
-                    break;
-                default:
-                    throw hresult_not_implemented(L"BitmapPixel format not yet handled by WinMLRunner.");
-            }
-
-            tensorKind = TensorKind::Float;
-            shape.push_back(1);
-            shape.push_back(channels);
-            shape.push_back(static_cast<int64_t>(imageFeatureDescriptor.Height()));
-            shape.push_back(static_cast<int64_t>(imageFeatureDescriptor.Width()));
-            return;
-        }
-
-        auto tensorDescriptor = description.try_as<TensorFeatureDescriptor>();
-        if (tensorDescriptor)
-        {
-            IVectorView<int64_t> tensorShape = tensorDescriptor.Shape();
-            for (uint32_t dim = 0; dim < tensorShape.Size(); dim++)
-            {
-                int64_t dimSize = tensorShape.GetAt(dim);
-                if (dimSize > 0) // If the dimension is greater than 0, then it is known.
-                {
-                    shape.push_back(dimSize);
-                }
-                else // otherwise, make sure that the dimension is -1, representing free dimension. If not, then it's an
-                     // invalid model.
-                {
-                    if (dimSize == -1)
-                    {
-                        shape.push_back(1);
-                    }
-                    else
-                    {
-                        throw hresult_invalid_argument(L"Failed to create a tensor with an unknown dimension of: " +
-                                                       dimSize);
-                    }
-                }
-            }
-
-            tensorKind = tensorDescriptor.TensorKind();
-            return;
-        }
-
-        throw hresult_invalid_argument(L"ProcessDescriptor: Unknown desription type!");
-    } // namespace BindingUtilities
-
-    // Binds tensor floats, ints, doubles from CSV data.
     ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const std::wstring& imagePath,
                                  const InputBindingType inputBindingType, const InputDataType inputDataType,
                                  const CommandLineArgs& args, uint32_t iterationNum,
-                                 ColorManagementMode colorManagementMode)
-    {
-        InputBufferDesc inputBufferDesc = {};
-
-        std::vector<int64_t> shape = {};
-        TensorKind tensorKind = TensorKind::Undefined;
-        ProcessDescriptor(description, shape, tensorKind, inputBufferDesc);
-
-        SoftwareBitmap softwareBitmap(nullptr);
-        if (args.IsCSVInput())
-        {
-            inputBufferDesc.channelFormat = TensorKind::Float;
-            inputBufferDesc.isPlanar = true;
-
-            // Assumes shape is in the format of 'NCHW'
-            inputBufferDesc.numChannelsPerElement = static_cast<uint32_t>(shape[1]);
-
-            // Assumes no gaps in the input csv file
-            inputBufferDesc.elementStrideInBytes = inputBufferDesc.numChannelsPerElement * sizeof(float_t);
-
-            inputBufferDesc.totalSizeInBytes = sizeof(float_t);
-            for (uint32_t i = 0; i < shape.size(); ++i)
-                inputBufferDesc.totalSizeInBytes *= static_cast<uint32_t>(shape[i]);
-
-            inputBufferDesc.elements = new uint8_t[inputBufferDesc.totalSizeInBytes];
-
-            ReadCSVIntoBuffer(args.CsvPath(), inputBufferDesc);
-        }
-        else if (args.IsImageInput())
-        {
-            softwareBitmap =
-                LoadImageFile(description, inputDataType, imagePath.c_str(), args, iterationNum, colorManagementMode);
-
-            // Get Pointers to the SoftwareBitmap data buffers
-            const BitmapBuffer sbBitmapBuffer(softwareBitmap.LockBuffer(BitmapBufferAccessMode::Read));
-            winrt::Windows::Foundation::IMemoryBufferReference sbReference = sbBitmapBuffer.CreateReference();
-            auto sbByteAccess = sbReference.as<::Windows::Foundation::IMemoryBufferByteAccess>();
-            winrt::check_hresult(sbByteAccess->GetBuffer(&inputBufferDesc.elements, &inputBufferDesc.totalSizeInBytes));
-
-            inputBufferDesc.isPlanar = false;
-            inputBufferDesc.elementFormat = softwareBitmap.BitmapPixelFormat();
-            switch (inputBufferDesc.elementFormat)
-            {
-                case BitmapPixelFormat::Gray8:
-                    inputBufferDesc.channelFormat = TensorKind::UInt8;
-                    inputBufferDesc.numChannelsPerElement = 1;
-                    inputBufferDesc.elementStrideInBytes = sizeof(uint8_t);
-                    break;
-                case BitmapPixelFormat::Gray16:
-                    inputBufferDesc.channelFormat = TensorKind::UInt16;
-                    inputBufferDesc.numChannelsPerElement = 1;
-                    inputBufferDesc.elementStrideInBytes = sizeof(uint16_t);
-                    break;
-                case BitmapPixelFormat::Bgra8:
-                    inputBufferDesc.channelFormat = TensorKind::UInt8;
-                    inputBufferDesc.numChannelsPerElement = 3;
-                    inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t);
-                    break;
-                case BitmapPixelFormat::Rgba8:
-                    inputBufferDesc.channelFormat = TensorKind::UInt8;
-                    inputBufferDesc.numChannelsPerElement = 3;
-                    inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint8_t);
-                    break;
-                case BitmapPixelFormat::Rgba16:
-                    inputBufferDesc.channelFormat = TensorKind::UInt16;
-                    inputBufferDesc.numChannelsPerElement = 3;
-                    inputBufferDesc.elementStrideInBytes = 4 * sizeof(uint16_t);
-                    break;
-                default:
-                    throw hresult_invalid_argument(L"Unknown BitmapPixelFormat in input image.");
-            }
-        }
-
-        switch (tensorKind)
-        {
-            case TensorKind::Undefined:
-            {
-                std::cout << "BindingUtilities: TensorKind is undefined." << std::endl;
-                throw hresult_invalid_argument();
-            }
-            case TensorKind::Float:
-            {
-                return CreateTensor<TensorKind::Float>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::Float16:
-            {
-                return CreateTensor<TensorKind::Float16>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::Double:
-            {
-                return CreateTensor<TensorKind::Double>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::Int8:
-            {
-                return CreateTensor<TensorKind::Int8>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::UInt8:
-            {
-                return CreateTensor<TensorKind::UInt8>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::Int16:
-            {
-                return CreateTensor<TensorKind::Int16>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::UInt16:
-            {
-                return CreateTensor<TensorKind::UInt16>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::Int32:
-            {
-                return CreateTensor<TensorKind::Int32>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::UInt32:
-            {
-                return CreateTensor<TensorKind::UInt32>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::Int64:
-            {
-                return CreateTensor<TensorKind::Int64>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-            case TensorKind::UInt64:
-            {
-                return CreateTensor<TensorKind::UInt64>(args, shape, inputBindingType, inputBufferDesc);
-            }
-            break;
-        }
-        std::cout << "BindingUtilities: TensorKind has not been implemented." << std::endl;
-        throw hresult_not_implemented();
-    }
+                                 ColorManagementMode colorManagementMode);
 
     ImageFeatureValue CreateBindableImage(const ILearningModelFeatureDescriptor& featureDescriptor,
                                           const std::wstring& imagePath, InputBindingType inputBindingType,
-                                          InputDataType inputDataType, const IDirect3DDevice winrtDevice,
+                                          InputDataType inputDataType, const winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice winrtDevice,
                                           const CommandLineArgs& args, uint32_t iterationNum,
-                                          ColorManagementMode colorManagementMode)
-    {
-        auto softwareBitmap = imagePath.empty() ? GenerateGarbageImage(featureDescriptor, inputDataType)
-                                                : LoadImageFile(featureDescriptor, inputDataType, imagePath.c_str(),
-                                                                args, iterationNum, colorManagementMode);
-        auto videoFrame = CreateVideoFrame(softwareBitmap, inputBindingType, inputDataType, winrtDevice);
-        return ImageFeatureValue::CreateFromVideoFrame(videoFrame);
-    }
-
-    template <typename K, typename V>
-    void OutputSequenceBinding(IMapView<hstring, winrt::Windows::Foundation::IInspectable> results, hstring name)
-    {
-        auto map = results.Lookup(name).as<IVectorView<IMap<K, V>>>().GetAt(0);
-        auto iter = map.First();
-
-        K maxKey = -1;
-        V maxVal = -1;
-
-        while (iter.HasCurrent())
-        {
-            auto pair = iter.Current();
-            if (pair.Value() > maxKey)
-            {
-                maxVal = pair.Value();
-                maxKey = pair.Key();
-            }
-            iter.MoveNext();
-        }
-        std::cout << " " << maxKey << " " << maxVal << std::endl;
-    }
+                                          ColorManagementMode colorManagementMode);
 
     void PrintOrSaveEvaluationResults(const LearningModel& model, const CommandLineArgs& args,
-                                      const IMapView<hstring, winrt::Windows::Foundation::IInspectable>& results,
-                                      OutputHelper& output, int iterationNum)
-    {
-        for (auto&& desc : model.OutputFeatures())
-        {
-            if (desc.Kind() == LearningModelFeatureKind::Tensor)
-            {
-                std::wstring name(desc.Name());
-                if (args.IsSaveTensor() && args.SaveTensorMode() == L"First" && iterationNum > 0)
-                {
-                    return;
-                }
-                if (args.IsSaveTensor())
-                {
-                    output.SetDefaultCSVIterationResult(iterationNum, args, name);
-                }
-                void* tensor;
-                uint32_t uCapacity;
-                com_ptr<ITensorNative> itn = results.Lookup(desc.Name()).as<ITensorNative>();
-                HRESULT(itn->GetBuffer(reinterpret_cast<BYTE**>(&tensor), &uCapacity));
-                int size = 0;
-                unsigned int topK = args.TopK();
-                std::vector<std::pair<float, int>> maxKValues;
-                std::ofstream fout;
-                if (args.IsSaveTensor())
-                {
-                    fout.open(output.getCsvFileNamePerIterationResult(), std::ios_base::app);
-                    fout << "Index"
-                         << ","
-                         << "Value" << std::endl;
-                }
-                TensorFeatureDescriptor tensorDescriptor = desc.as<TensorFeatureDescriptor>();
-                TensorKind tensorKind = tensorDescriptor.TensorKind();
-                switch (tensorKind)
-                {
-                    case TensorKind::String:
-                    {
-                        if (!args.IsGarbageInput())
-                        {
-                            auto resultVector = results.Lookup(desc.Name()).as<TensorString>().GetAsVectorView();
-                            auto output = resultVector.GetAt(0).data();
-                            std::wcout << " Result: " << output << std::endl;
-                        }
-                    }
-                    break;
-                    case TensorKind::Float16:
-                    {
-                        output.ProcessTensorResult<HALF>(args, tensor, uCapacity, maxKValues, fout, topK);
-                    }
-                    break;
-                    case TensorKind::Float:
-                    {
-                        output.ProcessTensorResult<float>(args, tensor, uCapacity, maxKValues, fout, topK);
-                    }
-                    break;
-                    case TensorKind::Int64:
-                    {
-                        auto resultVector = results.Lookup(desc.Name()).as<TensorInt64Bit>().GetAsVectorView();
-                        if (!args.IsGarbageInput())
-                        {
-                            auto output = resultVector.GetAt(0);
-                            std::wcout << " Result: " << output << std::endl;
-                        }
-                    }
-                    break;
-                    default:
-                    {
-                        std::cout << "BindingUtilities: output type not implemented.";
-                    }
-                    break;
-                }
-                if (args.IsSaveTensor())
-                {
-                    fout.close();
-                    for (auto& pair : maxKValues)
-                    {
-                        auto maxValue = pair.first;
-                        auto maxIndex = pair.second;
-                        std::string iterationResult =
-                            "Index: " + std::to_string(maxIndex) + "; Value: " + std::to_string(maxValue);
-                        output.SaveResult(iterationNum, iterationResult,
-                                          static_cast<int>(hash_data(tensor, uCapacity)));
-                    }
-                }
-                if (!args.IsGarbageInput() && iterationNum == 0)
-                {
-                    std::wcout << L"Outputting top " << args.TopK() << L" values" << std::endl;
-                    std::wcout << L"Feature Name: " << name << std::endl;
-                    for (auto& pair : maxKValues)
-                    {
-                        auto maxValue = pair.first;
-                        auto maxIndex = pair.second;
-                        std::wcout << L" index: " << maxIndex << L", value: " << maxValue << std::endl;
-                    }
-                }
-            }
-            else if (desc.Kind() == LearningModelFeatureKind::Sequence)
-            {
-                auto seqDescriptor = desc.as<SequenceFeatureDescriptor>();
-                auto mapDescriptor = seqDescriptor.ElementDescriptor().as<MapFeatureDescriptor>();
-                auto keyKind = mapDescriptor.KeyKind();
-                auto valueKind = mapDescriptor.ValueDescriptor();
-                auto tensorKind = valueKind.as<TensorFeatureDescriptor>().TensorKind();
-                switch (keyKind)
-                {
-                    case TensorKind::Int64:
-                    {
-                        OutputSequenceBinding<int64_t, float>(results, desc.Name());
-                    }
-                    break;
-                    case TensorKind::Float:
-                    {
-                        OutputSequenceBinding<float, float>(results, desc.Name());
-                    }
-                    break;
-                }
-            }
-        }
-    }
-}; // namespace BindingUtilities
+                                      const winrt::Windows::Foundation::Collections::IMapView<hstring, winrt::Windows::Foundation::IInspectable>& results,
+                                      OutputHelper& output, int iterationNum);
+
+}
\ No newline at end of file
diff --git a/Tools/WinMLRunner/src/CommandLineArgs.cpp b/Tools/WinMLRunner/src/CommandLineArgs.cpp
index 5210976f..3710ed2a 100644
--- a/Tools/WinMLRunner/src/CommandLineArgs.cpp
+++ b/Tools/WinMLRunner/src/CommandLineArgs.cpp
@@ -7,17 +7,26 @@
 #include <filesystem>
 #include <codecvt>
 #include "Filehelper.h"
-
-using namespace Windows::AI::MachineLearning;
-
 void CommandLineArgs::PrintUsage()
 {
+#ifdef USE_WINML_NUGET
+    std::cout << "MicrosoftML Runner" << std::endl;
+#else
     std::cout << "WinML Runner" << std::endl;
+#endif
     std::cout << " ---------------------------------------------------------------" << std::endl;
-    std::cout << "WinmlRunner.exe <-model | -folder> <fully qualified path> [options]" << std::endl;
+#ifdef USE_WINML_NUGET
+    std::cout << "MicrosoftMLRunner.exe <-model | -folder> <fully qualified path> [options]" << std::endl;
+#else
+    std::cout << "WinMLRunner.exe <-model | -folder> <fully qualified path> [options]" << std::endl;
+#endif
     std::cout << std::endl;
     std::cout << "options: " << std::endl;
+#ifdef USE_WINML_NUGET
+    std::cout << "  -version: prints the version information for this build of MicrosoftMLRunner.exe" << std::endl;
+#else
     std::cout << "  -version: prints the version information for this build of WinMLRunner.exe" << std::endl;
+#endif
     std::cout << "  -CPU : run model on default CPU" << std::endl;
     std::cout << "  -GPU : run model on default GPU" << std::endl;
     std::cout << "  -GPUHighPerformance : run model on GPU with highest performance" << std::endl;
@@ -242,7 +251,12 @@ CommandLineArgs::CommandLineArgs(const std::vector<std::wstring>& args)
             if (!IsDebuggerPresent())
             {
                 throw hresult_invalid_argument(
-                    L"-DebugEvaluate flag should only be used when WinMLRunner is under a user-mode debugger!");
+#ifdef USE_WINML_NUGET
+                    L"-DebugEvaluate flag should only be used when MicrosoftMLRunner is under a user-mode debugger!"
+#else
+                    L"-DebugEvaluate flag should only be used when WinMLRunner is under a user-mode debugger!"
+#endif
+                );
             }
             ToggleEvaluationDebugOutput(true);
         }
@@ -471,8 +485,11 @@ void CommandLineArgs::SetupOutputDirectories(const std::wstring& sBaseOutputPath
         if (m_perfOutputPath.empty())
         {
             if (sPerfOutputPath.empty())
+#ifdef USE_WINML_NUGET
+                PerfOutputPath = L"MicrosoftMLRunner[" + oss.str() + L"].csv";
+#else
                 PerfOutputPath = L"WinMLRunner[" + oss.str() + L"].csv";
-
+#endif
             PerfOutputPath = BaseOutputPath / PerfOutputPath;
             m_perfOutputPath = PerfOutputPath.c_str();
         }
diff --git a/Tools/WinMLRunner/src/CommandLineArgs.h b/Tools/WinMLRunner/src/CommandLineArgs.h
index fd717af6..041d706a 100644
--- a/Tools/WinMLRunner/src/CommandLineArgs.h
+++ b/Tools/WinMLRunner/src/CommandLineArgs.h
@@ -1,6 +1,7 @@
 #pragma once
 #include "Common.h"
-
+#include <winrt/Windows.Graphics.Imaging.h>
+#include "TypeHelper.h"
 enum TensorizeFuncs
 {
     Identity = 0,
diff --git a/Tools/WinMLRunner/src/Common.h b/Tools/WinMLRunner/src/Common.h
index 544b0690..481fd069 100644
--- a/Tools/WinMLRunner/src/Common.h
+++ b/Tools/WinMLRunner/src/Common.h
@@ -4,7 +4,11 @@
 #endif
 // unknown.h needs to be inlcuded before any winrt headers
 #include <unknwn.h>
+#ifdef USE_WINML_NUGET
+#include <winrt/Microsoft.AI.MachineLearning.h>
+#else
 #include <winrt/Windows.AI.MachineLearning.h>
+#endif
 #include <winrt/Windows.Foundation.Collections.h>
 #include <winrt/Windows.Media.h>
 #include <winrt/Windows.Graphics.Imaging.h>
@@ -22,8 +26,6 @@
 #include <cassert>
 #include <fstream>
 #include <dxgi1_6.h>
-#include "TypeHelper.h"
-#include "TimerHelper.h"
 #include "DirectXPackedVector.h"
 
 #ifndef BLOCK_DXCORE
diff --git a/Tools/WinMLRunner/src/Concurrency.cpp b/Tools/WinMLRunner/src/Concurrency.cpp
index 55a9e2be..7ee42b00 100644
--- a/Tools/WinMLRunner/src/Concurrency.cpp
+++ b/Tools/WinMLRunner/src/Concurrency.cpp
@@ -7,7 +7,11 @@
 #include "ThreadPool.h"
 
 using namespace winrt;
+#ifdef USE_WINML_NUGET
+using namespace winrt::Microsoft::AI::MachineLearning;
+#else
 using namespace winrt::Windows::AI::MachineLearning;
+#endif
 
 void load_model(const std::wstring& path, bool print_info)
 {
diff --git a/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp b/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp
index 682646fb..007e92ec 100644
--- a/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp
+++ b/Tools/WinMLRunner/src/LearningModelDeviceHelper.cpp
@@ -1,12 +1,12 @@
-#include "LearningModelDeviceHelper.h"
-#include "TypeHelper.h"
-#include "Common.h"
 #include "d3d11.h"
 #include "d3dx12.h"
+#include "LearningModelDeviceHelper.h"
+#include "Common.h"
 #include <Windows.Graphics.DirectX.Direct3D11.interop.h>
 #include "Windows.AI.MachineLearning.Native.h"
 #include <codecvt>
 #include "OutputHelper.h"
+#include "TypeHelper.h"
 using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
 
 #ifdef DXCORE_SUPPORTED_BUILD
diff --git a/Tools/WinMLRunner/src/OutputHelper.cpp b/Tools/WinMLRunner/src/OutputHelper.cpp
new file mode 100644
index 00000000..8ef34e65
--- /dev/null
+++ b/Tools/WinMLRunner/src/OutputHelper.cpp
@@ -0,0 +1,1197 @@
+#include "Common.h"
+#include "CommandLineArgs.h"
+#include <fstream>
+#include <ctime>
+#include <locale>
+#include <utility>
+#include <codecvt>
+#include <iomanip>
+#include <dxgi.h>
+#include <Windows.Graphics.DirectX.Direct3D11.interop.h>
+#include <filesystem>
+#include <queue>
+#include "TimerHelper.h"
+#include "LearningModelDeviceHelper.h"
+#include "OutputHelper.h"
+
+#ifdef USE_WINML_NUGET
+using namespace winrt::Microsoft::AI::MachineLearning;
+#else
+using namespace winrt::Windows::AI::MachineLearning;
+#endif
+using namespace winrt::Windows::Storage::Streams;
+using namespace ::Windows::Graphics::DirectX::Direct3D11;
+using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
+using namespace DirectX::PackedVector;
+
+void OutputHelper::PrintLoadingInfo(const std::wstring& modelPath) const
+{
+    wprintf(L"Loading model (path = %s)...\n", modelPath.c_str());
+}
+
+void OutputHelper::PrintBindingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType,
+                        InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation,
+                        const std::string& status) const
+{
+    printf("Binding (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation = "
+            "%s)...%s\n",
+            TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(),
+            TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(),
+            status.c_str());
+}
+
+void OutputHelper::PrintEvaluatingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType,
+                                       InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation,
+                                       const std::string& status) const
+{
+    printf("Evaluating (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation "
+            "= %s)...%s\n",
+            TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(),
+            TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(),
+            status.c_str());
+}
+
+void OutputHelper::PrintModelInfo(const std::wstring& modelPath, const LearningModel& model) const
+{
+    std::cout << "=================================================================" << std::endl;
+    std::wcout << "Name: " << model.Name().c_str() << std::endl;
+    std::wcout << "Author: " << model.Author().c_str() << std::endl;
+    std::wcout << "Version: " << model.Version() << std::endl;
+    std::wcout << "Domain: " << model.Domain().c_str() << std::endl;
+    std::wcout << "Description: " << model.Description().c_str() << std::endl;
+    std::wcout << "Path: " << modelPath << std::endl;
+    std::cout << "Support FP16: " << std::boolalpha << doesModelContainFP16(model) << std::endl;
+
+    std::cout << std::endl;
+    // print out information about input of model
+    std::cout << "Input Feature Info:" << std::endl;
+    for (auto&& inputFeature : model.InputFeatures())
+    {
+        PrintFeatureDescriptorInfo(inputFeature);
+    }
+    // print out information about output of model
+    std::cout << "Output Feature Info:" << std::endl;
+    for (auto&& outputFeature : model.OutputFeatures())
+    {
+        PrintFeatureDescriptorInfo(outputFeature);
+    }
+    std::cout << "=================================================================" << std::endl;
+    std::cout << std::endl;
+}
+
+void OutputHelper::PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor& descriptor) const
+{
+    // IMPORTANT: This learningModelFeatureKind array needs to match the "enum class
+    // LearningModelFeatureKind" idl in Windows.AI.MachineLearning.0.h
+    const std::string learningModelFeatureKind[] = {
+        "Tensor",
+        "Sequence",
+        "Map",
+        "Image",
+    };
+    std::wstring name(descriptor.Name());
+    std::wcout << "Name: " << name << std::endl;
+    std::wcout << "Feature Kind: " << FeatureDescriptorToString(descriptor) << std::endl;
+    std::cout << std::endl;
+}
+
+void OutputHelper::PrintHardwareInfo() const
+{
+    std::cout << "WinML Runner" << std::endl;
+    std::cout << "Printing available GPUs with DXGI.." << std::endl;
+    com_ptr<IDXGIFactory6> factory;
+    CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void());
+    std::vector<com_ptr<IDXGIAdapter1>> validAdapters;
+    for (UINT i = 0;; ++i)
+    {
+        com_ptr<IDXGIAdapter1> spAdapter;
+        if (factory->EnumAdapters1(i, spAdapter.put()) != S_OK)
+        {
+            break;
+        }
+        DXGI_ADAPTER_DESC1 pDesc;
+        spAdapter->GetDesc1(&pDesc);
+
+        // is a software adapter
+        if (pDesc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE || (pDesc.VendorId == 0x1414 && pDesc.DeviceId == 0x8c))
+        {
+            continue;
+        }
+        // valid GPU adapter
+        else
+        {
+            printf("Index: %d, Description: %ls\n", static_cast<int>(validAdapters.size()), pDesc.Description);
+            validAdapters.push_back(spAdapter);
+        }
+    }
+    std::cout << std::endl;
+}
+
+void OutputHelper::PrintLearningModelDevice(const LearningModelDeviceWithMetadata& device)
+{
+    if (device.DeviceType == DeviceType::CPU)
+    {
+        std::cout << "\nCreated LearningModelDevice with CPU device" << std::endl;
+        return;
+    }
+
+    IDirect3DDevice d3dDevice = device.LearningModelDevice.Direct3D11Device();
+    com_ptr<IDirect3DDxgiInterfaceAccess> dxgi;
+    dxgi = d3dDevice.try_as<IDirect3DDxgiInterfaceAccess>();
+    if (dxgi)
+    {
+        com_ptr<IDXGIDevice> dxgiDevice;
+        dxgi->GetInterface(__uuidof(IDXGIDevice), dxgiDevice.put_void());
+        com_ptr<IDXGIAdapter> adapter;
+        dxgiDevice->GetAdapter(adapter.put());
+        DXGI_ADAPTER_DESC description;
+        if (SUCCEEDED(adapter->GetDesc(&description)))
+        {
+            std::wcout << L"\nCreated LearningModelDevice with GPU: " << description.Description << std::endl;
+        }
+    }
+    else
+    {
+        std::cout << "Failed to Print Learning Model Device Information" << std::endl;
+    }
+}
+
+void OutputHelper::PrintResults(const Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t numIterations, DeviceType deviceType,
+                    InputBindingType inputBindingType, InputDataType inputDataType,
+                    DeviceCreationLocation deviceCreationLocation, bool isPerformanceConsoleOutputVerbose) const
+{
+    double loadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
+    double createSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER);
+
+    double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
+    double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER);
+    double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER);
+    double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER);
+    double firstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER);
+
+    double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
+    double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER);
+    double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER);
+    double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER);
+    double firstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER);
+
+    double firstLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+    double firstLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double firstLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double firstLoadPeakWorkingSetUsage = profiler[LOAD_MODEL].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
+
+    double firstSessionCreationWorkingSetMemoryUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE);
+    double firstSessionCreationSharedMemoryUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double firstSessionCreationDedicatedMemoryUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double firstSessionPeakWorkingSetUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
+
+    double averageBindMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevBindMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minBindMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxBindMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE);
+    double firstBindMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+    double firstBindPeakMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
+
+    double averageEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevEvalMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minEvalMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxEvalMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE);
+    double firstEvalMemoryUsage = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+    double firstEvalPeakMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
+
+    double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double firstBindDedicatedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double firstEvalDedicatedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+    double firstBindSharedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+    double firstEvalSharedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double firstIterationWorkingSetMemoryUsage =
+        profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE) +
+        profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE) +
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE) +
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+
+    double firstIterationSharedMemoryUsage =
+        profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) +
+        profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) +
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) +
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double firstIterationDedicatedMemoryUsage =
+        profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) +
+        profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) +
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) +
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double firstIterationPeakWorkingSet = firstLoadPeakWorkingSetUsage + firstSessionPeakWorkingSetUsage +
+                                            firstBindPeakMemoryUsage + firstEvalPeakMemoryUsage;
+
+    printf("\nResults (device = %s, numIterations = %d, inputBinding = %s, inputDataType = %s, "
+            "deviceCreationLocation = %s):\n",
+            TypeHelper::Stringify(deviceType).c_str(), numIterations,
+            TypeHelper::Stringify(inputBindingType).c_str(), TypeHelper::Stringify(inputDataType).c_str(),
+            TypeHelper::Stringify(deviceCreationLocation).c_str());
+
+    std::cout << "\nFirst Iteration Performance (load, bind, session creation, and evaluate): " << std::endl;
+    std::cout << "  Load: " << loadTime << " ms" << std::endl;
+    std::cout << "  Bind: " << firstBindTime << " ms" << std::endl;
+    std::cout << "  Session Creation: " << createSessionTime << " ms" << std::endl;
+    std::cout << "  Evaluate: " << firstEvalTime << " ms" << std::endl;
+
+    if (isPerformanceConsoleOutputVerbose)
+    {
+        std::cout << "\n  Working Set Memory usage (load): " << firstLoadWorkingSetMemoryUsage << " MB"
+                    << std::endl;
+        std::cout << "  Working Set Memory usage (session creation): " << firstSessionCreationWorkingSetMemoryUsage
+                    << " MB" << std::endl;
+        std::cout << "  Working Set Memory usage (bind): " << firstBindMemoryUsage << " MB" << std::endl;
+    }
+    else
+    {
+        std::cout << std::endl;
+    }
+    std::cout << "  Working Set Memory usage (evaluate): " << firstEvalMemoryUsage << " MB" << std::endl;
+    std::cout << "  Working Set Memory usage (load, bind, session creation, and evaluate): "
+                << firstIterationWorkingSetMemoryUsage << " MB" << std::endl;
+
+    if (isPerformanceConsoleOutputVerbose)
+    {
+        std::cout << std::endl;
+        std::cout << "  Peak Working Set Memory Difference (from start to load): " << firstLoadPeakWorkingSetUsage
+                    << " MB" << std::endl;
+        std::cout << "  Peak Working Set Memory Difference (from model load to session creation): "
+                    << firstSessionPeakWorkingSetUsage << " MB" << std::endl;
+        std::cout << "  Peak Working Set Memory Difference (from session to bind): " << firstBindPeakMemoryUsage
+                    << " MB" << std::endl;
+        std::cout << "  Peak Working Set Memory Difference (from bind to evaluate): " << firstEvalPeakMemoryUsage
+                    << " MB" << std::endl;
+    }
+
+    std::cout << "  Peak Working Set Memory Difference (load, bind, session creation, and evaluate): "
+                << firstIterationPeakWorkingSet << " MB" << std::endl;
+
+    if (isPerformanceConsoleOutputVerbose)
+    {
+        std::cout << "\n  Dedicated Memory usage (load): " << firstLoadDedicatedMemoryUsage << " MB" << std::endl;
+        std::cout << "  Dedicated Memory usage (session creation): " << firstSessionCreationDedicatedMemoryUsage
+                    << " MB" << std::endl;
+        std::cout << "  Dedicated Memory usage (bind): " << firstBindDedicatedMemoryUsage << " MB" << std::endl;
+    }
+    else
+    {
+        std::cout << std::endl;
+    }
+    std::cout << "  Dedicated Memory usage (evaluate): " << firstEvalDedicatedMemoryUsage << " MB" << std::endl;
+    std::cout << "  Dedicated Memory usage (load, bind, session creation, and evaluate): "
+                << firstIterationDedicatedMemoryUsage << " MB" << std::endl;
+
+    if (isPerformanceConsoleOutputVerbose)
+    {
+        std::cout << "\n  Shared Memory usage (load): " << firstLoadSharedMemoryUsage << " MB" << std::endl;
+        std::cout << "  Shared Memory usage (session creation): " << firstSessionCreationSharedMemoryUsage << " MB"
+                    << std::endl;
+        std::cout << "  Shared Memory usage (bind): " << firstBindSharedMemoryUsage << " MB" << std::endl;
+    }
+    else
+    {
+        std::cout << std::endl;
+    }
+    std::cout << "  Shared Memory usage (evaluate): " << firstEvalSharedMemoryUsage << " MB" << std::endl;
+    std::cout << "  Shared Memory usage (load, bind, session creation, and evaluate): "
+                << firstIterationSharedMemoryUsage << " MB" << std::endl;
+
+    if (numIterations > 1)
+    {
+        printf("\nAverage Performance excluding first iteration. Iterations %d to %d. (Iterations greater than 1 "
+                "only bind and evaluate)\n",
+                2, numIterations);
+        std::cout << "  Average Bind: " << averageBindTime << " ms" << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Minimum Bind: " << minBindTime << " ms" << std::endl;
+            std::cout << "  Maximum Bind: " << maxBindTime << " ms" << std::endl;
+            std::cout << "  Standard Deviation Bind: " << stdevBindTime << " ms" << std::endl;
+        }
+        std::cout << "  Average Evaluate: " << averageEvalTime << " ms" << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Minimum Evaluate: " << minEvalTime << " ms" << std::endl;
+            std::cout << "  Maximum Evaluate: " << maxEvalTime << " ms" << std::endl;
+            std::cout << "  Standard Deviation Evaluate: " << stdevEvalTime << " ms" << std::endl;
+        }
+
+        std::cout << "\n  Average Working Set Memory usage (bind): " << averageBindMemoryUsage << " MB"
+                    << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Min Working Set Memory usage (bind): " << minBindMemoryUsage << " MB" << std::endl;
+            std::cout << "  Max Working Set Memory usage (bind): " << maxBindMemoryUsage << " MB" << std::endl;
+            std::cout << "  Standard Deviation Working Set Memory usage (bind): " << stdevBindMemoryUsage << " MB"
+                        << std::endl;
+        }
+        std::cout << "  Average Working Set Memory usage (evaluate): " << averageEvalMemoryUsage << " MB"
+                    << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Min Working Set Memory usage (evaluate): " << minEvalMemoryUsage << " MB" << std::endl;
+            std::cout << "  Max Working Set Memory usage (evaluate): " << maxEvalMemoryUsage << " MB" << std::endl;
+            std::cout << "  Standard Deviation Working Set Memory usage (evaluate): " << stdevEvalMemoryUsage
+                        << " MB" << std::endl;
+        }
+
+        std::cout << "\n  Average Dedicated Memory usage (bind): " << averageBindDedicatedMemoryUsage << " MB"
+                    << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Min Dedicated Memory usage (bind): " << minBindDedicatedMemoryUsage << " MB"
+                        << std::endl;
+            std::cout << "  Max Dedicated Memory usage (bind): " << maxBindDedicatedMemoryUsage << " MB"
+                        << std::endl;
+            std::cout << "  Standard Deviation Working Set Memory usage (evaluate): "
+                        << stdevBindDedicatedMemoryUsage << " MB" << std::endl;
+        }
+        std::cout << "  Average Dedicated Memory usage (evaluate): " << averageEvalDedicatedMemoryUsage << " MB"
+                    << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Min Dedicated Memory usage (evaluate): " << minEvalDedicatedMemoryUsage << " MB"
+                        << std::endl;
+            std::cout << "  Max Dedicated Memory usage (evaluate): " << maxEvalDedicatedMemoryUsage << " MB"
+                        << std::endl;
+            std::cout << "  Standard Deviation Dedicated Memory usage (evaluate): " << stdevEvalDedicatedMemoryUsage
+                        << " MB" << std::endl;
+        }
+
+        std::cout << "\n  Average Shared Memory usage (bind): " << averageBindSharedMemoryUsage << " MB"
+                    << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Min Shared Memory usage (bind): " << minBindSharedMemoryUsage << " MB" << std::endl;
+            std::cout << "  Max Shared Memory usage (bind): " << maxBindSharedMemoryUsage << " MB" << std::endl;
+            std::cout << "  Standard Deviation Shared Memory usage (bind): " << stdevBindSharedMemoryUsage << " MB"
+                        << std::endl;
+        }
+        std::cout << "  Average Shared Memory usage (evaluate): " << averageEvalSharedMemoryUsage << " MB"
+                    << std::endl;
+        if (isPerformanceConsoleOutputVerbose)
+        {
+            std::cout << "  Min Shared Memory usage (evaluate): " << minEvalSharedMemoryUsage << " MB" << std::endl;
+            std::cout << "  Max Shared Memory usage (evaluate): " << maxEvalSharedMemoryUsage << " MB" << std::endl;
+            std::cout << "  Standard Deviation Shared Memory usage (evaluate): " << stdevEvalSharedMemoryUsage
+                        << " MB" << std::endl;
+        }
+    }
+    std::cout << std::endl << std::endl << std::endl;
+}
+
+std::wstring OutputHelper::FeatureDescriptorToString(const ILearningModelFeatureDescriptor& descriptor)
+{
+    switch (descriptor.Kind())
+    {
+        case LearningModelFeatureKind::Tensor:
+        {
+            auto tensorDescriptor = descriptor.as<TensorFeatureDescriptor>();
+            return TypeHelper::Stringify(tensorDescriptor.TensorKind());
+        }
+        case LearningModelFeatureKind::Image:
+        {
+            auto imageDescriptor = descriptor.as<ImageFeatureDescriptor>();
+            std::wstring str = L"Image (Height: " + std::to_wstring(imageDescriptor.Height()) + L", Width:  " +
+                                std::to_wstring(imageDescriptor.Width()) + L")";
+            return str;
+        }
+        case LearningModelFeatureKind::Map:
+        {
+            auto mapDescriptor = descriptor.as<MapFeatureDescriptor>();
+            std::wstring str = L"Map<" + TypeHelper::Stringify(mapDescriptor.KeyKind()) + L",";
+            str += FeatureDescriptorToString(mapDescriptor.ValueDescriptor());
+            str += L">";
+            return str;
+        }
+        case LearningModelFeatureKind::Sequence:
+        {
+            auto sequenceDescriptor = descriptor.as<SequenceFeatureDescriptor>();
+            std::wstring str = L"List<" + FeatureDescriptorToString(sequenceDescriptor.ElementDescriptor()) + L">";
+            return str;
+        }
+        default:
+            return (L"Invalid feature %s.", descriptor.Name().c_str());
+    }
+}
+
+bool OutputHelper::doesDescriptorContainFP16(const ILearningModelFeatureDescriptor& descriptor)
+{
+    switch (descriptor.Kind())
+    {
+        case LearningModelFeatureKind::Tensor:
+        {
+            return descriptor.as<TensorFeatureDescriptor>().TensorKind() == TensorKind::Float16;
+        }
+        break;
+        case LearningModelFeatureKind::Map:
+        {
+            auto mapDescriptor = descriptor.as<MapFeatureDescriptor>();
+            if (mapDescriptor.KeyKind() == TensorKind::Float16)
+            {
+                return true;
+            }
+            return doesDescriptorContainFP16(mapDescriptor.ValueDescriptor());
+        }
+        break;
+        case LearningModelFeatureKind::Sequence:
+        {
+            return doesDescriptorContainFP16(descriptor.as<SequenceFeatureDescriptor>().ElementDescriptor());
+        }
+        break;
+        default:
+        {
+            return false;
+        }
+    }
+}
+
+bool OutputHelper::doesModelContainFP16(const LearningModel& model)
+{
+    for (auto&& inputFeature : model.InputFeatures())
+    {
+        if (doesDescriptorContainFP16(inputFeature))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+void OutputHelper::SaveLoadTimes(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum)
+{
+    m_clockLoadTimes[iterNum] = profiler[LOAD_MODEL].GetClockTime();
+}
+
+void OutputHelper::SaveBindTimes(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum)
+{
+    m_clockBindTimes[iterNum] =
+        (iterNum == 0) ? profiler[BIND_VALUE_FIRST_RUN].GetClockTime() : profiler[BIND_VALUE].GetClockTime();
+}
+
+void OutputHelper::SaveEvalPerformance(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum)
+{
+    enum WINML_MODEL_TEST_PERF eval = (iterNum == 0) ? EVAL_MODEL_FIRST_RUN : EVAL_MODEL;
+    m_clockEvalTimes[iterNum] = profiler[eval].GetClockTime();
+    m_CPUWorkingDiff[iterNum] = profiler[eval].GetCpuWorkingDiff();
+    m_CPUWorkingStart[iterNum] = profiler[eval].GetCpuWorkingStart();
+    m_GPUSharedDiff[iterNum] = profiler[eval].GetGpuSharedDiff();
+    m_GPUSharedStart[iterNum] = profiler[eval].GetGpuSharedStart();
+    m_GPUDedicatedDiff[iterNum] = profiler[eval].GetGpuDedicatedDiff();
+}
+
+void OutputHelper::SaveResult(uint32_t iterationNum, std::string result, int hashcode)
+{
+    m_outputResult[iterationNum] = result;
+    m_outputTensorHash[iterationNum] = hashcode;
+}
+
+void OutputHelper::SetDefaultPerIterationFolder(const std::wstring& folderName)
+{
+    m_folderNamePerIteration = folderName;
+    if (std::filesystem::create_directories(m_folderNamePerIteration.c_str()) != 0)
+        std::wcout << L"Folder [" + m_folderNamePerIteration + L"] cannot be created";
+}
+
+void OutputHelper::SetDefaultCSVFileNamePerIteration()
+{
+    m_csvFileNamePerIterationSummary = m_folderNamePerIteration + L"\\Summary.csv";
+}
+
+std::wstring OutputHelper::GetDefaultCSVFileNamePerIteration() { return m_csvFileNamePerIterationSummary; }
+
+std::wstring OutputHelper::GetCsvFileNamePerIterationResult() { return m_csvFileNamePerIterationResult; }
+
+void OutputHelper::SetDefaultCSVIterationResult(uint32_t iterationNum, const CommandLineArgs& args,
+                                                std::wstring& featureName)
+{
+    if (args.UseCPU() && args.UseGPU())
+    {
+        if (!m_flagGpuDevice)
+        {
+            m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration";
+            if (iterationNum == args.NumIterations() - 1 || args.SaveTensorMode() == L"First")
+            {
+                m_flagGpuDevice = true;
+            }
+        }
+        else
+        {
+            m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration";
+        }
+    }
+    else if (args.UseGPU())
+    {
+        m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration";
+    }
+    else
+    {
+        m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration";
+    }
+    m_csvFileNamePerIterationResult = m_fileNameResultDevice + std::to_wstring(iterationNum + 1) + L".csv";
+}
+
+void OutputHelper::SetCSVFileName(const std::wstring& fileName) { m_csvFileName = fileName; }
+
+void OutputHelper::WritePerIterationPerformance(const CommandLineArgs& args, const std::wstring model,
+                                    const std::wstring imagePath)
+{
+    if (m_csvFileNamePerIterationSummary.length() > 0)
+    {
+        bool bNewFile = false;
+        std::ifstream fin;
+        fin.open(m_csvFileNamePerIterationSummary);
+        std::filebuf* outbuf = fin.rdbuf();
+        if (EOF == outbuf->sbumpc())
+        {
+            bNewFile = true;
+        }
+        fin.close();
+
+        std::ofstream fout;
+        fout.open(m_csvFileNamePerIterationSummary, std::ios_base::app);
+
+        std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+        std::string modelName = converter.to_bytes(model);
+        std::string fileNameResultDevice = converter.to_bytes(m_fileNameResultDevice);
+        std::string inputName = args.IsCSVInput() ? converter.to_bytes(args.CsvPath())
+                                                    : args.IsImageInput() ? converter.to_bytes(imagePath) : "";
+
+        if (bNewFile)
+        {
+            if (args.IsPerIterationCapture())
+            {
+                fout << "Model Name"
+                        << ","
+                        << "Input Name"
+                        << ","
+                        << "Iterations"
+                        << ","
+                        << "Iteration Number "
+                        << ","
+                        << "CPU Working Set Diff (MB)"
+                        << ","
+                        << "CPU Working Set Start (MB)"
+                        << ","
+                        << "GPU Shared Memory Diff (MB)"
+                        << ","
+                        << "GPU Shared Memory Start (MB)"
+                        << ","
+                        << "GPU Dedicated Memory Diff (MB)"
+                        << ","
+                        << "Load (ms)"
+                        << ","
+                        << "Bind (ms)"
+                        << ","
+                        << "Evaluate (ms)"
+                        << ",";
+
+                if (args.IsSaveTensor())
+                {
+                    fout << "Result"
+                            << ","
+                            << "OutputTensorHash"
+                            << ","
+                            << "FileName";
+                }
+            }
+
+            else if (args.IsSaveTensor())
+            {
+                fout << "Iteration Number"
+                        << ","
+                        << "Result"
+                        << ","
+                        << "OutputTensorHash"
+                        << ","
+                        << "FileName";
+            }
+            fout << std::endl;
+        }
+
+        if (args.IsPerIterationCapture())
+        {
+            for (uint32_t i = 0; i < args.NumIterations(); i++)
+            {
+                fout << modelName << "," << inputName << "," << args.NumIterations() << "," << i + 1 << ","
+                        << m_CPUWorkingDiff[i] << "," << m_CPUWorkingStart[i] << "," << m_GPUSharedDiff[i] << ","
+                        << m_GPUSharedStart[i] << "," << m_GPUDedicatedDiff[i] << "," << m_clockLoadTimes[i] << ","
+                        << m_clockBindTimes[i] << "," << m_clockEvalTimes[i] << ",";
+
+                if (args.IsSaveTensor() &&
+                    (args.SaveTensorMode() == L"All" || (args.SaveTensorMode() == L"First" && i == 0)))
+                {
+                    fout << m_outputResult[i] << "," << m_outputTensorHash[i] << ","
+                            << fileNameResultDevice + std::to_string(i + 1) + ".csv"
+                            << ",";
+                }
+                fout << std::endl;
+            }
+        }
+        else if (args.IsSaveTensor())
+        {
+            for (uint32_t i = 0; i < args.NumIterations(); i++)
+            {
+                fout << i + 1 << "," << m_outputResult[i] << "," << m_outputTensorHash[i] << ","
+                        << fileNameResultDevice + std::to_string(i + 1) + ".csv" << std::endl;
+                if (args.SaveTensorMode() == L"First" && i == 0)
+                {
+                    break;
+                }
+            }
+        }
+        fout.close();
+    }
+}
+
+template <typename T>
+void OutputHelper::ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity,
+                         std::vector<std::pair<float, int>>& maxValues, std::ofstream& fout, unsigned int k)
+{
+    // Create a priority queue of size k that pops the lowest value first
+    // We will remove lowest values as we iterate over all the values
+    auto cmp = [](std::pair<float, int> x, std::pair<float, int> y) { return x.first > y.first; };
+    std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>, decltype(cmp)> topKvalues(cmp);
+
+    T* tensor = (T*)buffer;
+    int size = uCapacity / sizeof(T);
+    for (int i = 0; i < size; i++)
+    {
+        float val = 0;
+        if (!std::is_same<T, HALF>::value)
+        {
+            val = *(tensor + i);
+        }
+        else
+        {
+            val = XMConvertHalfToFloat(static_cast<HALF>(*(tensor + i)));
+        }
+        if (args.IsSaveTensor())
+        {
+            fout << i << "," << val << std::endl;
+        }
+
+        if (topKvalues.size() < k)
+        {
+            topKvalues.push({ val, i });
+        }
+        else if (k > 0)
+        {
+            auto maxValue = topKvalues.top().first;
+            if (maxValue < val)
+            {
+                topKvalues.pop();
+                topKvalues.push({ val, i });
+            }
+        }
+    }
+    while (!topKvalues.empty())
+    {
+        auto pair = topKvalues.top();
+        maxValues.push_back(pair);
+        topKvalues.pop();
+    }
+    // Put vector in order of highest value to lowest
+    std::reverse(maxValues.begin(), maxValues.end());
+}
+template void OutputHelper::ProcessTensorResult<float>(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity,
+                                                       std::vector<std::pair<float, int>>& maxValues, std::ofstream& fout,
+                                                       unsigned int k);
+template void OutputHelper::ProcessTensorResult<HALF>(const CommandLineArgs& args, const void* buffer,
+                                                       const uint32_t uCapacity,
+                                                       std::vector<std::pair<float, int>>& maxValues,
+                                                       std::ofstream& fout, unsigned int k);
+
+void OutputHelper::WritePerformanceDataToCSV(const Profiler<WINML_MODEL_TEST_PERF>& profiler, int numIterations,
+                            std::wstring model, const std::string& deviceType, const std::string& inputBinding,
+                            const std::string& inputType, const std::string& deviceCreationLocation,
+                            const std::vector<std::pair<std::string, std::string>>& perfFileMetadata) const
+{
+    double averageLoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
+    double stdevLoadTime = profiler[LOAD_MODEL].GetStdev(CounterType::TIMER);
+    double minLoadTime = profiler[LOAD_MODEL].GetMin(CounterType::TIMER);
+    double maxLoadTime = profiler[LOAD_MODEL].GetMax(CounterType::TIMER);
+    uint32_t numberLoadIterations = profiler[LOAD_MODEL].GetCount();
+
+    double averageCreateSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER);
+    double stdevCreateSessionTime = profiler[CREATE_SESSION].GetStdev(CounterType::TIMER);
+    double minCreateSessionTime = profiler[CREATE_SESSION].GetMin(CounterType::TIMER);
+    double maxCreateSessionTime = profiler[CREATE_SESSION].GetMax(CounterType::TIMER);
+    uint32_t numberCreateSessionIterations = profiler[CREATE_SESSION].GetCount();
+
+    double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
+    double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER);
+    double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER);
+    double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER);
+
+    double averageFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER);
+    double stdevFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::TIMER);
+    double minFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::TIMER);
+    double maxFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::TIMER);
+
+    double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
+    double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER);
+    double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER);
+    double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER);
+
+    double averageFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER);
+    double stdevFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetStdev(CounterType::TIMER);
+    double minFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMin(CounterType::TIMER);
+    double maxFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMax(CounterType::TIMER);
+
+    double averageLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::WORKING_SET_USAGE);
+
+    double averageCreateSessionWorkingSetMemoryUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevCreateSessionWorkingSetMemoryUsage =
+        profiler[CREATE_SESSION].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::WORKING_SET_USAGE);
+
+    double averageBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE);
+
+    double averageFirstBindWorkingSetMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevFirstBindWorkingSetMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minFirstBindWorkingSetMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxFirstBindWorkingSetMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::WORKING_SET_USAGE);
+
+    double averageEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE);
+    double minEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE);
+    double maxEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE);
+
+    double averageFirstEvalWorkingSetMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+    double stdevFirstEvalWorkingSetMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+    double minFirstEvalWorkingSetMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+    double maxFirstEvalWorkingSetMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
+
+    double averageLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageCreateSessionDedicatedMemoryUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevCreateSessionDedicatedMemoryUsage =
+        profiler[CREATE_SESSION].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minCreateSessionDedicatedMemoryUsage =
+        profiler[CREATE_SESSION].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxCreateSessionDedicatedMemoryUsage =
+        profiler[CREATE_SESSION].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageFirstBindDedicatedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevFirstBindDedicatedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minFirstBindDedicatedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxFirstBindDedicatedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageFirstEvalDedicatedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double stdevFirstEvalDedicatedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double minFirstEvalDedicatedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+    double maxFirstEvalDedicatedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+    double averageLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double averageCreateSessionSharedMemoryUsage =
+        profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevCreateSessionSharedMemoryUsage =
+        profiler[CREATE_SESSION].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double averageFirstBindSharedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevFirstBindSharedMemoryUsage =
+        profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
+    double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
+
+    double averageFirstEvalSharedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double stdevFirstEvalSharedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double minFirstEvalSharedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+    double maxFirstEvalSharedMemoryUsage =
+        profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+
+    if (!m_csvFileName.empty())
+    {
+        // Check if header exists
+        bool bNewFile = false;
+        std::ifstream fin;
+        fin.open(m_csvFileName);
+        std::filebuf* outbuf = fin.rdbuf();
+        if (EOF == outbuf->sbumpc())
+        {
+            bNewFile = true;
+        }
+        fin.close();
+
+        std::ofstream fout;
+        fout.open(m_csvFileName, std::ios_base::app);
+
+        std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+        std::string modelName = converter.to_bytes(model);
+
+        if (bNewFile)
+        {
+            fout << "model name"
+                    << ","
+                    << "device type"
+                    << ","
+                    << "input binding"
+                    << ","
+                    << "input type"
+                    << ","
+                    << "device creation location"
+                    << ","
+                    << "iterations"
+                    << ","
+                    << "load iterations"
+                    << ","
+                    << "session creation iterations"
+                    << ","
+                    << "average load (ms)"
+                    << ","
+                    << "standard deviation load (ms)"
+                    << ","
+                    << "min load (ms)"
+                    << ","
+                    << "max load (ms)"
+                    << ","
+                    << "average session creation (ms)"
+                    << ","
+                    << "standard deviation session creation (ms)"
+                    << ","
+                    << "min session creation (ms)"
+                    << ","
+                    << "max session creation (ms)"
+                    << ","
+                    << "average first bind (ms)"
+                    << ","
+                    << "standard deviation first bind (ms)"
+                    << ","
+                    << "min first bind (ms)"
+                    << ","
+                    << "max first bind (ms)"
+                    << ","
+                    << "average bind (ms)"
+                    << ","
+                    << "standard deviation bind (ms)"
+                    << ","
+                    << "min bind (ms)"
+                    << ","
+                    << "max bind (ms)"
+                    << ","
+                    << "average first evaluate (ms)"
+                    << ","
+                    << "standard deviation first evaluate (ms)"
+                    << ","
+                    << "min first evaluate (ms)"
+                    << ","
+                    << "max first evaluate (ms)"
+                    << ","
+                    << "average evaluate (ms)"
+                    << ","
+                    << "standard deviation evaluate (ms)"
+                    << ","
+                    << "min evaluate (ms)"
+                    << ","
+                    << "max evaluate (ms)"
+                    << ","
+                    << "load average working set memory (MB)"
+                    << ","
+                    << "load standard deviation working set memory (MB)"
+                    << ","
+                    << "load min working set memory (MB)"
+                    << ","
+                    << "load max working set memory (MB)"
+                    << ","
+                    << "session creation average working set memory (MB)"
+                    << ","
+                    << "session creation standard deviation working set memory (MB)"
+                    << ","
+                    << "session creation min working set memory (MB)"
+                    << ","
+                    << "session creation max working set memory (MB)"
+                    << ","
+                    << "first bind average working set memory (MB)"
+                    << ","
+                    << "first bind standard deviation working set memory (MB)"
+                    << ","
+                    << "first bind min working set memory (MB)"
+                    << ","
+                    << "first bind max working set memory (MB)"
+                    << ","
+                    << "bind average working set memory (MB)"
+                    << ","
+                    << "bind standard deviation working set memory (MB)"
+                    << ","
+                    << "bind min working set memory (MB)"
+                    << ","
+                    << "bind max working set memory (MB)"
+                    << ","
+                    << "first evaluate average working set memory (MB)"
+                    << ","
+                    << "first evaluate standard deviation working set memory (MB)"
+                    << ","
+                    << "first evaluate min working set memory (MB)"
+                    << ","
+                    << "first evaluate max working set memory (MB)"
+                    << ","
+                    << "evaluate average working set memory (MB)"
+                    << ","
+                    << "evaluate standard deviation working set memory (MB)"
+                    << ","
+                    << "evaluate min working set memory (MB)"
+                    << ","
+                    << "evaluate max working set memory (MB)"
+                    << ","
+                    << "load average dedicated memory (MB)"
+                    << ","
+                    << "load standard deviation dedicated memory (MB)"
+                    << ","
+                    << "load min dedicated memory (MB)"
+                    << ","
+                    << "load max dedicated memory (MB)"
+                    << ","
+                    << "session creation average dedicated memory (MB)"
+                    << ","
+                    << "session creation standard deviation dedicated memory (MB)"
+                    << ","
+                    << "session creation min dedicated memory (MB)"
+                    << ","
+                    << "session creation max dedicated memory (MB)"
+                    << ","
+                    << "first bind average dedicated memory (MB)"
+                    << ","
+                    << "first bind standard deviation dedicated memory (MB)"
+                    << ","
+                    << "first bind min dedicated memory (MB)"
+                    << ","
+                    << "first bind max dedicated memory (MB)"
+                    << ","
+                    << "bind average dedicated memory (MB)"
+                    << ","
+                    << "bind standard deviation dedicated memory (MB)"
+                    << ","
+                    << "bind min dedicated memory (MB)"
+                    << ","
+                    << "bind max dedicated memory (MB)"
+                    << ","
+                    << "first evaluate average dedicated memory (MB)"
+                    << ","
+                    << "first evaluate standard deviation dedicated memory (MB)"
+                    << ","
+                    << "first evaluate min dedicated memory (MB)"
+                    << ","
+                    << "first evaluate max dedicated memory (MB)"
+                    << ","
+                    << "evaluate average dedicated memory (MB)"
+                    << ","
+                    << "evaluate standard deviation dedicated memory (MB)"
+                    << ","
+                    << "evaluate min dedicated memory (MB)"
+                    << ","
+                    << "evaluate max dedicated memory (MB)"
+                    << ","
+                    << "load average shared memory (MB)"
+                    << ","
+                    << "load standard deviation shared memory (MB)"
+                    << ","
+                    << "load min shared memory (MB)"
+                    << ","
+                    << "load max shared memory (MB)"
+                    << ","
+                    << "session creation average shared memory (MB)"
+                    << ","
+                    << "session creation standard deviation shared memory (MB)"
+                    << ","
+                    << "session creation min shared memory (MB)"
+                    << ","
+                    << "session creation max shared memory (MB)"
+                    << ","
+                    << "first bind average shared memory (MB)"
+                    << ","
+                    << "first bind standard deviation shared memory (MB)"
+                    << ","
+                    << "first bind min shared memory (MB)"
+                    << ","
+                    << "first bind max shared memory (MB)"
+                    << ","
+                    << "bind average shared memory (MB)"
+                    << ","
+                    << "bind standard deviation shared memory (MB)"
+                    << ","
+                    << "bind min shared memory (MB)"
+                    << ","
+                    << "bind max shared memory (MB)"
+                    << ","
+                    << "first evaluate average shared memory (MB)"
+                    << ","
+                    << "first evaluate standard deviation shared memory (MB)"
+                    << ","
+                    << "first evaluate min shared memory (MB)"
+                    << ","
+                    << "first evaluate max shared memory (MB)"
+                    << ","
+                    << "evaluate average shared memory (MB)"
+                    << ","
+                    << "evaluate standard deviation shared memory (MB)"
+                    << ","
+                    << "evaluate min shared memory (MB)"
+                    << ","
+                    << "evaluate max shared memory (MB)"
+                    << ",";
+            for (auto metaDataPair : perfFileMetadata)
+            {
+                fout << metaDataPair.first << ",";
+            }
+            fout << std::endl;
+        }
+        fout << modelName << "," << deviceType << "," << inputBinding << "," << inputType << ","
+                << deviceCreationLocation << "," << numIterations << "," << numberLoadIterations << ","
+                << numberCreateSessionIterations << "," << averageLoadTime << "," << stdevLoadTime << ","
+                << minLoadTime << "," << maxLoadTime << "," << averageCreateSessionTime << ","
+                << stdevCreateSessionTime << "," << minCreateSessionTime << "," << maxCreateSessionTime << ","
+                << averageFirstBindTime << "," << stdevFirstBindTime << "," << minFirstBindTime << ","
+                << maxFirstBindTime << "," << (numIterations <= 1 ? 0 : averageBindTime) << ","
+                << (numIterations <= 1 ? 0 : stdevBindTime) << "," << (numIterations <= 1 ? 0 : minBindTime) << ","
+                << (numIterations <= 1 ? 0 : maxBindTime) << "," << averageFirstEvalTime << "," << stdevFirstEvalTime
+                << "," << minFirstEvalTime << "," << maxFirstEvalTime << ","
+                << (numIterations <= 1 ? 0 : averageEvalTime) << "," << (numIterations <= 1 ? 0 : stdevEvalTime) << ","
+                << (numIterations <= 1 ? 0 : minEvalTime) << "," << (numIterations <= 1 ? 0 : maxEvalTime) << ","
+
+                << averageLoadWorkingSetMemoryUsage << "," << stdevLoadWorkingSetMemoryUsage << ","
+                << minLoadWorkingSetMemoryUsage << "," << maxLoadWorkingSetMemoryUsage << ","
+                << averageCreateSessionWorkingSetMemoryUsage << "," << stdevCreateSessionWorkingSetMemoryUsage << ","
+                << minCreateSessionWorkingSetMemoryUsage << "," << maxCreateSessionWorkingSetMemoryUsage << ","
+                << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << ","
+                << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << ","
+                << (numIterations <= 1 ? 0 : averageBindWorkingSetMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : stdevBindWorkingSetMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : maxBindWorkingSetMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : minBindWorkingSetMemoryUsage) << ","
+                << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << ","
+                << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << ","
+                << (numIterations <= 1 ? 0 : averageEvalWorkingSetMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : stdevEvalWorkingSetMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : maxEvalWorkingSetMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : minEvalWorkingSetMemoryUsage) << ","
+
+                << averageLoadDedicatedMemoryUsage << "," << stdevLoadDedicatedMemoryUsage << ","
+                << minLoadDedicatedMemoryUsage << "," << maxLoadDedicatedMemoryUsage << ","
+                << averageCreateSessionDedicatedMemoryUsage << "," << stdevCreateSessionDedicatedMemoryUsage << ","
+                << minCreateSessionDedicatedMemoryUsage << "," << maxCreateSessionDedicatedMemoryUsage << ","
+                << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << ","
+                << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << ","
+                << (numIterations <= 1 ? 0 : averageBindDedicatedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : stdevBindDedicatedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : maxBindDedicatedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : minBindDedicatedMemoryUsage) << ","
+                << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << ","
+                << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << ","
+                << (numIterations <= 1 ? 0 : averageEvalDedicatedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : stdevEvalDedicatedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : maxEvalDedicatedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : minEvalDedicatedMemoryUsage) << ","
+
+                << averageLoadSharedMemoryUsage << "," << stdevLoadSharedMemoryUsage << "," << minLoadSharedMemoryUsage
+                << "," << maxLoadSharedMemoryUsage << "," << averageCreateSessionSharedMemoryUsage << ","
+                << stdevCreateSessionSharedMemoryUsage << "," << minCreateSessionSharedMemoryUsage << ","
+                << maxCreateSessionSharedMemoryUsage << "," << averageFirstBindSharedMemoryUsage << ","
+                << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << ","
+                << maxFirstBindSharedMemoryUsage << "," << (numIterations <= 1 ? 0 : averageBindSharedMemoryUsage)
+                << "," << (numIterations <= 1 ? 0 : stdevBindSharedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : maxBindSharedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : minBindSharedMemoryUsage) << "," << averageFirstBindSharedMemoryUsage
+                << "," << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << ","
+                << maxFirstBindSharedMemoryUsage << "," << (numIterations <= 1 ? 0 : averageEvalSharedMemoryUsage)
+                << "," << (numIterations <= 1 ? 0 : stdevEvalSharedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : maxEvalSharedMemoryUsage) << ","
+                << (numIterations <= 1 ? 0 : minEvalSharedMemoryUsage) << ",";
+        for (auto metaDataPair : perfFileMetadata)
+        {
+            fout << metaDataPair.second << ",";
+        }
+        fout << std::endl;
+        fout.close();
+    }
+}
diff --git a/Tools/WinMLRunner/src/OutputHelper.h b/Tools/WinMLRunner/src/OutputHelper.h
index 13b666de..7489e5eb 100644
--- a/Tools/WinMLRunner/src/OutputHelper.h
+++ b/Tools/WinMLRunner/src/OutputHelper.h
@@ -1,49 +1,9 @@
-#pragma once
-#include "Common.h"
-#include "CommandLineArgs.h"
-#include <fstream>
-#include <ctime>
-#include <locale>
-#include <utility>
-#include <codecvt>
-#include <iomanip>
-#include <dxgi.h>
-#include <Windows.Graphics.DirectX.Direct3D11.interop.h>
-#include <filesystem>
-#include <queue>
-
 #if defined(_AMD64_)
 // PIX markers only work on amd64
 #include <DXProgrammableCapture.h>
 #endif
-
-using namespace winrt::Windows::AI::MachineLearning;
-using namespace winrt::Windows::Storage::Streams;
-using namespace ::Windows::Graphics::DirectX::Direct3D11;
-using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
-using namespace DirectX::PackedVector;
-
-inline size_t hash_data(void const* ptr, size_t const bytes) noexcept
-{
-#ifdef _WIN64
-    constexpr size_t fnv_offset_basis = 14695981039346656037ULL;
-    constexpr size_t fnv_prime = 1099511628211ULL;
-#else
-    constexpr size_t fnv_offset_basis = 2166136261U;
-    constexpr size_t fnv_prime = 16777619U;
-#endif
-    size_t result = fnv_offset_basis;
-    uint8_t const* const buffer = static_cast<uint8_t const*>(ptr);
-
-    for (size_t next = 0; next < bytes; ++next)
-    {
-        result ^= buffer[next];
-        result *= fnv_prime;
-    }
-
-    return result;
-}
-
+#include "TimerHelper.h"
+#include "LearningModelDeviceHelper.h"
 // Stores performance information and handles output to the command line and CSV files.
 class OutputHelper
 {
@@ -62,1162 +22,50 @@ class OutputHelper
         m_outputTensorHash.resize(numIterations, 0);
     }
 
-    void PrintLoadingInfo(const std::wstring& modelPath) const
-    {
-        wprintf(L"Loading model (path = %s)...\n", modelPath.c_str());
-    }
-
+    void PrintLoadingInfo(const std::wstring& modelPath) const;
     void PrintBindingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType,
                           InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation,
-                          const std::string& status) const
-    {
-        printf("Binding (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation = "
-               "%s)...%s\n",
-               TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(),
-               TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(),
-               status.c_str());
-    }
-
+                          const std::string& status) const;
     void PrintEvaluatingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType,
                              InputDataType inputDataType, DeviceCreationLocation deviceCreationLocation,
-                             const std::string& status) const
-    {
-        printf("Evaluating (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s, deviceCreationLocation "
-               "= %s)...%s\n",
-               TypeHelper::Stringify(deviceType).c_str(), iteration, TypeHelper::Stringify(inputBindingType).c_str(),
-               TypeHelper::Stringify(inputDataType).c_str(), TypeHelper::Stringify(deviceCreationLocation).c_str(),
-               status.c_str());
-    }
-
-    void PrintModelInfo(std::wstring modelPath, LearningModel model) const
-    {
-        std::cout << "=================================================================" << std::endl;
-        std::wcout << "Name: " << model.Name().c_str() << std::endl;
-        std::wcout << "Author: " << model.Author().c_str() << std::endl;
-        std::wcout << "Version: " << model.Version() << std::endl;
-        std::wcout << "Domain: " << model.Domain().c_str() << std::endl;
-        std::wcout << "Description: " << model.Description().c_str() << std::endl;
-        std::wcout << "Path: " << modelPath << std::endl;
-        std::cout << "Support FP16: " << std::boolalpha << doesModelContainFP16(model) << std::endl;
-
-        std::cout << std::endl;
-        // print out information about input of model
-        std::cout << "Input Feature Info:" << std::endl;
-        for (auto&& inputFeature : model.InputFeatures())
-        {
-            PrintFeatureDescriptorInfo(inputFeature);
-        }
-        // print out information about output of model
-        std::cout << "Output Feature Info:" << std::endl;
-        for (auto&& outputFeature : model.OutputFeatures())
-        {
-            PrintFeatureDescriptorInfo(outputFeature);
-        }
-        std::cout << "=================================================================" << std::endl;
-        std::cout << std::endl;
-    }
-
-    void PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor& descriptor) const
-    {
-        // IMPORTANT: This learningModelFeatureKind array needs to match the "enum class
-        // LearningModelFeatureKind" idl in Windows.AI.MachineLearning.0.h
-        const std::string learningModelFeatureKind[] = {
-            "Tensor",
-            "Sequence",
-            "Map",
-            "Image",
-        };
-        std::wstring name(descriptor.Name());
-        std::wcout << "Name: " << name << std::endl;
-        std::wcout << "Feature Kind: " << FeatureDescriptorToString(descriptor) << std::endl;
-        std::cout << std::endl;
-    }
-
-    void PrintHardwareInfo() const
-    {
-        std::cout << "WinML Runner" << std::endl;
-        std::cout << "Printing available GPUs with DXGI.." << std::endl;
-        com_ptr<IDXGIFactory6> factory;
-        CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void());
-        std::vector<com_ptr<IDXGIAdapter1>> validAdapters;
-        for (UINT i = 0;; ++i)
-        {
-            com_ptr<IDXGIAdapter1> spAdapter;
-            if (factory->EnumAdapters1(i, spAdapter.put()) != S_OK)
-            {
-                break;
-            }
-            DXGI_ADAPTER_DESC1 pDesc;
-            spAdapter->GetDesc1(&pDesc);
-
-            // is a software adapter
-            if (pDesc.Flags == DXGI_ADAPTER_FLAG_SOFTWARE || (pDesc.VendorId == 0x1414 && pDesc.DeviceId == 0x8c))
-            {
-                continue;
-            }
-            // valid GPU adapter
-            else
-            {
-                printf("Index: %d, Description: %ls\n", static_cast<int>(validAdapters.size()), pDesc.Description);
-                validAdapters.push_back(spAdapter);
-            }
-        }
-        std::cout << std::endl;
-    }
-
-    static void PrintLearningModelDevice(const LearningModelDeviceWithMetadata& device)
-    {
-        if (device.DeviceType == DeviceType::CPU)
-        {
-            std::cout << "\nCreated LearningModelDevice with CPU device" << std::endl;
-            return;
-        }
-
-        IDirect3DDevice d3dDevice = device.LearningModelDevice.Direct3D11Device();
-        com_ptr<IDirect3DDxgiInterfaceAccess> dxgi;
-        dxgi = d3dDevice.try_as<IDirect3DDxgiInterfaceAccess>();
-        if (dxgi)
-        {
-            com_ptr<IDXGIDevice> dxgiDevice;
-            dxgi->GetInterface(__uuidof(IDXGIDevice), dxgiDevice.put_void());
-            com_ptr<IDXGIAdapter> adapter;
-            dxgiDevice->GetAdapter(adapter.put());
-            DXGI_ADAPTER_DESC description;
-            if (SUCCEEDED(adapter->GetDesc(&description)))
-            {
-                std::wcout << L"\nCreated LearningModelDevice with GPU: " << description.Description << std::endl;
-            }
-        }
-        else
-        {
-            std::cout << "Failed to Print Learning Model Device Information" << std::endl;
-        }
-    }
-
+                             const std::string& status) const;
+    void PrintModelInfo(const std::wstring& modelPath, const LearningModel& model) const;
+    void PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor& descriptor) const;
+    void PrintHardwareInfo() const;
     void PrintResults(const Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t numIterations, DeviceType deviceType,
                       InputBindingType inputBindingType, InputDataType inputDataType,
-                      DeviceCreationLocation deviceCreationLocation, bool isPerformanceConsoleOutputVerbose) const
-    {
-        double loadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
-        double createSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER);
-
-        double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
-        double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER);
-        double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER);
-        double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER);
-        double firstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER);
-
-        double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
-        double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER);
-        double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER);
-        double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER);
-        double firstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER);
-
-        double firstLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
-        double firstLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double firstLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double firstLoadPeakWorkingSetUsage = profiler[LOAD_MODEL].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
-
-        double firstSessionCreationWorkingSetMemoryUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE);
-        double firstSessionCreationSharedMemoryUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double firstSessionCreationDedicatedMemoryUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double firstSessionPeakWorkingSetUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
-
-        double averageBindMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevBindMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minBindMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxBindMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE);
-        double firstBindMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-        double firstBindPeakMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
-
-        double averageEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevEvalMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minEvalMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxEvalMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE);
-        double firstEvalMemoryUsage = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-        double firstEvalPeakMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::PEAK_WORKING_SET_USAGE);
-
-        double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double firstBindDedicatedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double firstEvalDedicatedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-        double firstBindSharedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-        double firstEvalSharedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double firstIterationWorkingSetMemoryUsage =
-            profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE) +
-            profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE) +
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE) +
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-
-        double firstIterationSharedMemoryUsage =
-            profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) +
-            profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) +
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE) +
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double firstIterationDedicatedMemoryUsage =
-            profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) +
-            profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) +
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE) +
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double firstIterationPeakWorkingSet = firstLoadPeakWorkingSetUsage + firstSessionPeakWorkingSetUsage +
-                                              firstBindPeakMemoryUsage + firstEvalPeakMemoryUsage;
-
-        printf("\nResults (device = %s, numIterations = %d, inputBinding = %s, inputDataType = %s, "
-               "deviceCreationLocation = %s):\n",
-               TypeHelper::Stringify(deviceType).c_str(), numIterations,
-               TypeHelper::Stringify(inputBindingType).c_str(), TypeHelper::Stringify(inputDataType).c_str(),
-               TypeHelper::Stringify(deviceCreationLocation).c_str());
-
-        std::cout << "\nFirst Iteration Performance (load, bind, session creation, and evaluate): " << std::endl;
-        std::cout << "  Load: " << loadTime << " ms" << std::endl;
-        std::cout << "  Bind: " << firstBindTime << " ms" << std::endl;
-        std::cout << "  Session Creation: " << createSessionTime << " ms" << std::endl;
-        std::cout << "  Evaluate: " << firstEvalTime << " ms" << std::endl;
-
-        if (isPerformanceConsoleOutputVerbose)
-        {
-            std::cout << "\n  Working Set Memory usage (load): " << firstLoadWorkingSetMemoryUsage << " MB"
-                      << std::endl;
-            std::cout << "  Working Set Memory usage (session creation): " << firstSessionCreationWorkingSetMemoryUsage
-                      << " MB" << std::endl;
-            std::cout << "  Working Set Memory usage (bind): " << firstBindMemoryUsage << " MB" << std::endl;
-        }
-        else
-        {
-            std::cout << std::endl;
-        }
-        std::cout << "  Working Set Memory usage (evaluate): " << firstEvalMemoryUsage << " MB" << std::endl;
-        std::cout << "  Working Set Memory usage (load, bind, session creation, and evaluate): "
-                  << firstIterationWorkingSetMemoryUsage << " MB" << std::endl;
-
-        if (isPerformanceConsoleOutputVerbose)
-        {
-            std::cout << std::endl;
-            std::cout << "  Peak Working Set Memory Difference (from start to load): " << firstLoadPeakWorkingSetUsage
-                      << " MB" << std::endl;
-            std::cout << "  Peak Working Set Memory Difference (from model load to session creation): "
-                      << firstSessionPeakWorkingSetUsage << " MB" << std::endl;
-            std::cout << "  Peak Working Set Memory Difference (from session to bind): " << firstBindPeakMemoryUsage
-                      << " MB" << std::endl;
-            std::cout << "  Peak Working Set Memory Difference (from bind to evaluate): " << firstEvalPeakMemoryUsage
-                      << " MB" << std::endl;
-        }
-
-        std::cout << "  Peak Working Set Memory Difference (load, bind, session creation, and evaluate): "
-                  << firstIterationPeakWorkingSet << " MB" << std::endl;
-
-        if (isPerformanceConsoleOutputVerbose)
-        {
-            std::cout << "\n  Dedicated Memory usage (load): " << firstLoadDedicatedMemoryUsage << " MB" << std::endl;
-            std::cout << "  Dedicated Memory usage (session creation): " << firstSessionCreationDedicatedMemoryUsage
-                      << " MB" << std::endl;
-            std::cout << "  Dedicated Memory usage (bind): " << firstBindDedicatedMemoryUsage << " MB" << std::endl;
-        }
-        else
-        {
-            std::cout << std::endl;
-        }
-        std::cout << "  Dedicated Memory usage (evaluate): " << firstEvalDedicatedMemoryUsage << " MB" << std::endl;
-        std::cout << "  Dedicated Memory usage (load, bind, session creation, and evaluate): "
-                  << firstIterationDedicatedMemoryUsage << " MB" << std::endl;
-
-        if (isPerformanceConsoleOutputVerbose)
-        {
-            std::cout << "\n  Shared Memory usage (load): " << firstLoadSharedMemoryUsage << " MB" << std::endl;
-            std::cout << "  Shared Memory usage (session creation): " << firstSessionCreationSharedMemoryUsage << " MB"
-                      << std::endl;
-            std::cout << "  Shared Memory usage (bind): " << firstBindSharedMemoryUsage << " MB" << std::endl;
-        }
-        else
-        {
-            std::cout << std::endl;
-        }
-        std::cout << "  Shared Memory usage (evaluate): " << firstEvalSharedMemoryUsage << " MB" << std::endl;
-        std::cout << "  Shared Memory usage (load, bind, session creation, and evaluate): "
-                  << firstIterationSharedMemoryUsage << " MB" << std::endl;
-
-        if (numIterations > 1)
-        {
-            printf("\nAverage Performance excluding first iteration. Iterations %d to %d. (Iterations greater than 1 "
-                   "only bind and evaluate)\n",
-                   2, numIterations);
-            std::cout << "  Average Bind: " << averageBindTime << " ms" << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Minimum Bind: " << minBindTime << " ms" << std::endl;
-                std::cout << "  Maximum Bind: " << maxBindTime << " ms" << std::endl;
-                std::cout << "  Standard Deviation Bind: " << stdevBindTime << " ms" << std::endl;
-            }
-            std::cout << "  Average Evaluate: " << averageEvalTime << " ms" << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Minimum Evaluate: " << minEvalTime << " ms" << std::endl;
-                std::cout << "  Maximum Evaluate: " << maxEvalTime << " ms" << std::endl;
-                std::cout << "  Standard Deviation Evaluate: " << stdevEvalTime << " ms" << std::endl;
-            }
-
-            std::cout << "\n  Average Working Set Memory usage (bind): " << averageBindMemoryUsage << " MB"
-                      << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Min Working Set Memory usage (bind): " << minBindMemoryUsage << " MB" << std::endl;
-                std::cout << "  Max Working Set Memory usage (bind): " << maxBindMemoryUsage << " MB" << std::endl;
-                std::cout << "  Standard Deviation Working Set Memory usage (bind): " << stdevBindMemoryUsage << " MB"
-                          << std::endl;
-            }
-            std::cout << "  Average Working Set Memory usage (evaluate): " << averageEvalMemoryUsage << " MB"
-                      << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Min Working Set Memory usage (evaluate): " << minEvalMemoryUsage << " MB" << std::endl;
-                std::cout << "  Max Working Set Memory usage (evaluate): " << maxEvalMemoryUsage << " MB" << std::endl;
-                std::cout << "  Standard Deviation Working Set Memory usage (evaluate): " << stdevEvalMemoryUsage
-                          << " MB" << std::endl;
-            }
-
-            std::cout << "\n  Average Dedicated Memory usage (bind): " << averageBindDedicatedMemoryUsage << " MB"
-                      << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Min Dedicated Memory usage (bind): " << minBindDedicatedMemoryUsage << " MB"
-                          << std::endl;
-                std::cout << "  Max Dedicated Memory usage (bind): " << maxBindDedicatedMemoryUsage << " MB"
-                          << std::endl;
-                std::cout << "  Standard Deviation Working Set Memory usage (evaluate): "
-                          << stdevBindDedicatedMemoryUsage << " MB" << std::endl;
-            }
-            std::cout << "  Average Dedicated Memory usage (evaluate): " << averageEvalDedicatedMemoryUsage << " MB"
-                      << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Min Dedicated Memory usage (evaluate): " << minEvalDedicatedMemoryUsage << " MB"
-                          << std::endl;
-                std::cout << "  Max Dedicated Memory usage (evaluate): " << maxEvalDedicatedMemoryUsage << " MB"
-                          << std::endl;
-                std::cout << "  Standard Deviation Dedicated Memory usage (evaluate): " << stdevEvalDedicatedMemoryUsage
-                          << " MB" << std::endl;
-            }
-
-            std::cout << "\n  Average Shared Memory usage (bind): " << averageBindSharedMemoryUsage << " MB"
-                      << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Min Shared Memory usage (bind): " << minBindSharedMemoryUsage << " MB" << std::endl;
-                std::cout << "  Max Shared Memory usage (bind): " << maxBindSharedMemoryUsage << " MB" << std::endl;
-                std::cout << "  Standard Deviation Shared Memory usage (bind): " << stdevBindSharedMemoryUsage << " MB"
-                          << std::endl;
-            }
-            std::cout << "  Average Shared Memory usage (evaluate): " << averageEvalSharedMemoryUsage << " MB"
-                      << std::endl;
-            if (isPerformanceConsoleOutputVerbose)
-            {
-                std::cout << "  Min Shared Memory usage (evaluate): " << minEvalSharedMemoryUsage << " MB" << std::endl;
-                std::cout << "  Max Shared Memory usage (evaluate): " << maxEvalSharedMemoryUsage << " MB" << std::endl;
-                std::cout << "  Standard Deviation Shared Memory usage (evaluate): " << stdevEvalSharedMemoryUsage
-                          << " MB" << std::endl;
-            }
-        }
-        std::cout << std::endl << std::endl << std::endl;
-    }
-
-    static std::wstring FeatureDescriptorToString(const ILearningModelFeatureDescriptor& descriptor)
-    {
-        switch (descriptor.Kind())
-        {
-            case LearningModelFeatureKind::Tensor:
-            {
-                auto tensorDescriptor = descriptor.as<TensorFeatureDescriptor>();
-                return TypeHelper::Stringify(tensorDescriptor.TensorKind());
-            }
-            case LearningModelFeatureKind::Image:
-            {
-                auto imageDescriptor = descriptor.as<ImageFeatureDescriptor>();
-                std::wstring str = L"Image (Height: " + std::to_wstring(imageDescriptor.Height()) + L", Width:  " +
-                                   std::to_wstring(imageDescriptor.Width()) + L")";
-                return str;
-            }
-            case LearningModelFeatureKind::Map:
-            {
-                auto mapDescriptor = descriptor.as<MapFeatureDescriptor>();
-                std::wstring str = L"Map<" + TypeHelper::Stringify(mapDescriptor.KeyKind()) + L",";
-                str += FeatureDescriptorToString(mapDescriptor.ValueDescriptor());
-                str += L">";
-                return str;
-            }
-            case LearningModelFeatureKind::Sequence:
-            {
-                auto sequenceDescriptor = descriptor.as<SequenceFeatureDescriptor>();
-                std::wstring str = L"List<" + FeatureDescriptorToString(sequenceDescriptor.ElementDescriptor()) + L">";
-                return str;
-            }
-            default:
-                return (L"Invalid feature %s.", descriptor.Name().c_str());
-        }
-    }
-
-    static bool doesDescriptorContainFP16(const ILearningModelFeatureDescriptor& descriptor)
-    {
-        switch (descriptor.Kind())
-        {
-            case LearningModelFeatureKind::Tensor:
-            {
-                return descriptor.as<TensorFeatureDescriptor>().TensorKind() == TensorKind::Float16;
-            }
-            break;
-            case LearningModelFeatureKind::Map:
-            {
-                auto mapDescriptor = descriptor.as<MapFeatureDescriptor>();
-                if (mapDescriptor.KeyKind() == TensorKind::Float16)
-                {
-                    return true;
-                }
-                return doesDescriptorContainFP16(mapDescriptor.ValueDescriptor());
-            }
-            break;
-            case LearningModelFeatureKind::Sequence:
-            {
-                return doesDescriptorContainFP16(descriptor.as<SequenceFeatureDescriptor>().ElementDescriptor());
-            }
-            break;
-            default:
-            {
-                return false;
-            }
-        }
-    }
-
-    static bool doesModelContainFP16(const LearningModel model)
-    {
-        for (auto&& inputFeature : model.InputFeatures())
-        {
-            if (doesDescriptorContainFP16(inputFeature))
-            {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    void SaveLoadTimes(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum)
-    {
-        m_clockLoadTimes[iterNum] = profiler[LOAD_MODEL].GetClockTime();
-    }
-    void SaveBindTimes(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum)
-    {
-        m_clockBindTimes[iterNum] =
-            (iterNum == 0) ? profiler[BIND_VALUE_FIRST_RUN].GetClockTime() : profiler[BIND_VALUE].GetClockTime();
-    }
-    void SaveEvalPerformance(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum)
-    {
-        enum WINML_MODEL_TEST_PERF eval = (iterNum == 0) ? EVAL_MODEL_FIRST_RUN : EVAL_MODEL;
-        m_clockEvalTimes[iterNum] = profiler[eval].GetClockTime();
-        m_CPUWorkingDiff[iterNum] = profiler[eval].GetCpuWorkingDiff();
-        m_CPUWorkingStart[iterNum] = profiler[eval].GetCpuWorkingStart();
-        m_GPUSharedDiff[iterNum] = profiler[eval].GetGpuSharedDiff();
-        m_GPUSharedStart[iterNum] = profiler[eval].GetGpuSharedStart();
-        m_GPUDedicatedDiff[iterNum] = profiler[eval].GetGpuDedicatedDiff();
-    }
-
-    void SaveResult(uint32_t iterationNum, std::string result, int hashcode)
-    {
-        m_outputResult[iterationNum] = result;
-        m_outputTensorHash[iterationNum] = hashcode;
-    }
-
-    void SetDefaultPerIterationFolder(const std::wstring& folderName)
-    {
-        m_folderNamePerIteration = folderName;
-        if (std::filesystem::create_directories(m_folderNamePerIteration.c_str()) != 0)
-            std::wcout << L"Folder [" + m_folderNamePerIteration + L"] cannot be created";
-    }
-
-    void SetDefaultCSVFileNamePerIteration()
-    {
-        m_csvFileNamePerIterationSummary = m_folderNamePerIteration + L"\\Summary.csv";
-    }
-
-    void SetDefaultCSVIterationResult(uint32_t iterationNum, const CommandLineArgs& args, std::wstring& featureName)
-    {
-        if (args.UseCPU() && args.UseGPU())
-        {
-            if (!m_flagGpuDevice)
-            {
-                m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration";
-                if (iterationNum == args.NumIterations() - 1 || args.SaveTensorMode() == L"First")
-                {
-                    m_flagGpuDevice = true;
-                }
-            }
-            else
-            {
-                m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration";
-            }
-        }
-        else if (args.UseGPU())
-        {
-            m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"GpuIteration";
-        }
-        else
-        {
-            m_fileNameResultDevice = m_folderNamePerIteration + L"\\" + featureName + L"CpuIteration";
-        }
-        m_csvFileNamePerIterationResult = m_fileNameResultDevice + std::to_wstring(iterationNum + 1) + L".csv";
-    }
-
-    void SetCSVFileName(const std::wstring& fileName) { m_csvFileName = fileName; }
-
+                      DeviceCreationLocation deviceCreationLocation, bool isPerformanceConsoleOutputVerbose) const;
+    void SaveLoadTimes(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum);
+    void SaveBindTimes(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum);
+    void SaveEvalPerformance(Profiler<WINML_MODEL_TEST_PERF>& profiler, uint32_t iterNum);
+    void SaveResult(uint32_t iterationNum, std::string result, int hashcode);
+    void SetDefaultPerIterationFolder(const std::wstring& folderName);
+    void SetDefaultCSVFileNamePerIteration();
+    std::wstring GetDefaultCSVFileNamePerIteration();
+    std::wstring GetCsvFileNamePerIterationResult();
+    void SetDefaultCSVIterationResult(uint32_t iterationNum, const CommandLineArgs& args, std::wstring& featureName);
+    void SetCSVFileName(const std::wstring& fileName);
     void WritePerIterationPerformance(const CommandLineArgs& args, const std::wstring model,
-                                      const std::wstring imagePath)
-    {
-        if (m_csvFileNamePerIterationSummary.length() > 0)
-        {
-            bool bNewFile = false;
-            std::ifstream fin;
-            fin.open(m_csvFileNamePerIterationSummary);
-            std::filebuf* outbuf = fin.rdbuf();
-            if (EOF == outbuf->sbumpc())
-            {
-                bNewFile = true;
-            }
-            fin.close();
-
-            std::ofstream fout;
-            fout.open(m_csvFileNamePerIterationSummary, std::ios_base::app);
-
-            std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
-            std::string modelName = converter.to_bytes(model);
-            std::string fileNameResultDevice = converter.to_bytes(m_fileNameResultDevice);
-            std::string inputName = args.IsCSVInput() ? converter.to_bytes(args.CsvPath())
-                                                      : args.IsImageInput() ? converter.to_bytes(imagePath) : "";
-
-            if (bNewFile)
-            {
-                if (args.IsPerIterationCapture())
-                {
-                    fout << "Model Name"
-                         << ","
-                         << "Input Name"
-                         << ","
-                         << "Iterations"
-                         << ","
-                         << "Iteration Number "
-                         << ","
-                         << "CPU Working Set Diff (MB)"
-                         << ","
-                         << "CPU Working Set Start (MB)"
-                         << ","
-                         << "GPU Shared Memory Diff (MB)"
-                         << ","
-                         << "GPU Shared Memory Start (MB)"
-                         << ","
-                         << "GPU Dedicated Memory Diff (MB)"
-                         << ","
-                         << "Load (ms)"
-                         << ","
-                         << "Bind (ms)"
-                         << ","
-                         << "Evaluate (ms)"
-                         << ",";
-
-                    if (args.IsSaveTensor())
-                    {
-                        fout << "Result"
-                             << ","
-                             << "OutputTensorHash"
-                             << ","
-                             << "FileName";
-                    }
-                }
-
-                else if (args.IsSaveTensor())
-                {
-                    fout << "Iteration Number"
-                         << ","
-                         << "Result"
-                         << ","
-                         << "OutputTensorHash"
-                         << ","
-                         << "FileName";
-                }
-                fout << std::endl;
-            }
-
-            if (args.IsPerIterationCapture())
-            {
-                for (uint32_t i = 0; i < args.NumIterations(); i++)
-                {
-                    fout << modelName << "," << inputName << "," << args.NumIterations() << "," << i + 1 << ","
-                         << m_CPUWorkingDiff[i] << "," << m_CPUWorkingStart[i] << "," << m_GPUSharedDiff[i] << ","
-                         << m_GPUSharedStart[i] << "," << m_GPUDedicatedDiff[i] << "," << m_clockLoadTimes[i] << ","
-                         << m_clockBindTimes[i] << "," << m_clockEvalTimes[i] << ",";
-
-                    if (args.IsSaveTensor() &&
-                        (args.SaveTensorMode() == L"All" || (args.SaveTensorMode() == L"First" && i == 0)))
-                    {
-                        fout << m_outputResult[i] << "," << m_outputTensorHash[i] << ","
-                             << fileNameResultDevice + std::to_string(i + 1) + ".csv"
-                             << ",";
-                    }
-                    fout << std::endl;
-                }
-            }
-            else if (args.IsSaveTensor())
-            {
-                for (uint32_t i = 0; i < args.NumIterations(); i++)
-                {
-                    fout << i + 1 << "," << m_outputResult[i] << "," << m_outputTensorHash[i] << ","
-                         << fileNameResultDevice + std::to_string(i + 1) + ".csv" << std::endl;
-                    if (args.SaveTensorMode() == L"First" && i == 0)
-                    {
-                        break;
-                    }
-                }
-            }
-            fout.close();
-        }
-    }
-
-    template <typename T>
-    void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity,
-                             std::vector<std::pair<float, int>>& maxValues, std::ofstream& fout, unsigned int k)
-    {
-        // Create a priority queue of size k that pops the lowest value first
-        // We will remove lowest values as we iterate over all the values
-        auto cmp = [](std::pair<float, int> x, std::pair<float, int> y) { return x.first > y.first; };
-        std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>, decltype(cmp)> topKvalues(cmp);
-
-        T* tensor = (T*)buffer;
-        int size = uCapacity / sizeof(T);
-        for (int i = 0; i < size; i++)
-        {
-            float val = 0;
-            if (!std::is_same<T, HALF>::value)
-            {
-                val = *(tensor + i);
-            }
-            else
-            {
-                val = XMConvertHalfToFloat(static_cast<HALF>(*(tensor + i)));
-            }
-            if (args.IsSaveTensor())
-            {
-                fout << i << "," << val << std::endl;
-            }
-
-            if (topKvalues.size() < k)
-            {
-                topKvalues.push({ val, i });
-            }
-            else if (k > 0)
-            {
-                auto maxValue = topKvalues.top().first;
-                if (maxValue < val)
-                {
-                    topKvalues.pop();
-                    topKvalues.push({ val, i });
-                }
-            }
-        }
-        while (!topKvalues.empty())
-        {
-            auto pair = topKvalues.top();
-            maxValues.push_back(pair);
-            topKvalues.pop();
-        }
-        // Put vector in order of highest value to lowest
-        std::reverse(maxValues.begin(), maxValues.end());
-    }
-
+                                      const std::wstring imagePath);
     void WritePerformanceDataToCSV(const Profiler<WINML_MODEL_TEST_PERF>& profiler, int numIterations,
                                    std::wstring model, const std::string& deviceType, const std::string& inputBinding,
                                    const std::string& inputType, const std::string& deviceCreationLocation,
-                                   const std::vector<std::pair<std::string, std::string>>& perfFileMetadata) const
-    {
-        double averageLoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
-        double stdevLoadTime = profiler[LOAD_MODEL].GetStdev(CounterType::TIMER);
-        double minLoadTime = profiler[LOAD_MODEL].GetMin(CounterType::TIMER);
-        double maxLoadTime = profiler[LOAD_MODEL].GetMax(CounterType::TIMER);
-        uint32_t numberLoadIterations = profiler[LOAD_MODEL].GetCount();
-
-        double averageCreateSessionTime = profiler[CREATE_SESSION].GetAverage(CounterType::TIMER);
-        double stdevCreateSessionTime = profiler[CREATE_SESSION].GetStdev(CounterType::TIMER);
-        double minCreateSessionTime = profiler[CREATE_SESSION].GetMin(CounterType::TIMER);
-        double maxCreateSessionTime = profiler[CREATE_SESSION].GetMax(CounterType::TIMER);
-        uint32_t numberCreateSessionIterations = profiler[CREATE_SESSION].GetCount();
-
-        double averageBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
-        double stdevBindTime = profiler[BIND_VALUE].GetStdev(CounterType::TIMER);
-        double minBindTime = profiler[BIND_VALUE].GetMin(CounterType::TIMER);
-        double maxBindTime = profiler[BIND_VALUE].GetMax(CounterType::TIMER);
-
-        double averageFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::TIMER);
-        double stdevFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::TIMER);
-        double minFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::TIMER);
-        double maxFirstBindTime = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::TIMER);
-
-        double averageEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
-        double stdevEvalTime = profiler[EVAL_MODEL].GetStdev(CounterType::TIMER);
-        double minEvalTime = profiler[EVAL_MODEL].GetMin(CounterType::TIMER);
-        double maxEvalTime = profiler[EVAL_MODEL].GetMax(CounterType::TIMER);
-
-        double averageFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::TIMER);
-        double stdevFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetStdev(CounterType::TIMER);
-        double minFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMin(CounterType::TIMER);
-        double maxFirstEvalTime = profiler[EVAL_MODEL_FIRST_RUN].GetMax(CounterType::TIMER);
-
-        double averageLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxLoadWorkingSetMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::WORKING_SET_USAGE);
-
-        double averageCreateSessionWorkingSetMemoryUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevCreateSessionWorkingSetMemoryUsage =
-            profiler[CREATE_SESSION].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxCreateSessionWorkingSetMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::WORKING_SET_USAGE);
-
-        double averageBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxBindWorkingSetMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::WORKING_SET_USAGE);
-
-        double averageFirstBindWorkingSetMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevFirstBindWorkingSetMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minFirstBindWorkingSetMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxFirstBindWorkingSetMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::WORKING_SET_USAGE);
-
-        double averageEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::WORKING_SET_USAGE);
-        double minEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::WORKING_SET_USAGE);
-        double maxEvalWorkingSetMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::WORKING_SET_USAGE);
-
-        double averageFirstEvalWorkingSetMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-        double stdevFirstEvalWorkingSetMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-        double minFirstEvalWorkingSetMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-        double maxFirstEvalWorkingSetMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::WORKING_SET_USAGE);
-
-        double averageLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxLoadDedicatedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageCreateSessionDedicatedMemoryUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevCreateSessionDedicatedMemoryUsage =
-            profiler[CREATE_SESSION].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minCreateSessionDedicatedMemoryUsage =
-            profiler[CREATE_SESSION].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxCreateSessionDedicatedMemoryUsage =
-            profiler[CREATE_SESSION].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxBindDedicatedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageFirstBindDedicatedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevFirstBindDedicatedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minFirstBindDedicatedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxFirstBindDedicatedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageFirstEvalDedicatedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double stdevFirstEvalDedicatedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double minFirstEvalDedicatedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-        double maxFirstEvalDedicatedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
-
-        double averageLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxLoadSharedMemoryUsage = profiler[LOAD_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double averageCreateSessionSharedMemoryUsage =
-            profiler[CREATE_SESSION].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevCreateSessionSharedMemoryUsage =
-            profiler[CREATE_SESSION].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxCreateSessionSharedMemoryUsage = profiler[CREATE_SESSION].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double averageBindSharedMemoryUsage = profiler[BIND_VALUE].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevBindSharedMemoryUsage = profiler[BIND_VALUE].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minBindSharedMemoryUsage = profiler[BIND_VALUE].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxBindSharedMemoryUsage = profiler[BIND_VALUE].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double averageFirstBindSharedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevFirstBindSharedMemoryUsage =
-            profiler[BIND_VALUE_FIRST_RUN].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxFirstBindSharedMemoryUsage = profiler[BIND_VALUE_FIRST_RUN].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double averageEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetStdev(CounterType::GPU_SHARED_MEM_USAGE);
-        double minEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMin(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetMax(CounterType::GPU_SHARED_MEM_USAGE);
-
-        double averageFirstEvalSharedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double stdevFirstEvalSharedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double minFirstEvalSharedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-        double maxFirstEvalSharedMemoryUsage =
-            profiler[EVAL_MODEL_FIRST_RUN].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-
-        if (!m_csvFileName.empty())
-        {
-            // Check if header exists
-            bool bNewFile = false;
-            std::ifstream fin;
-            fin.open(m_csvFileName);
-            std::filebuf* outbuf = fin.rdbuf();
-            if (EOF == outbuf->sbumpc())
-            {
-                bNewFile = true;
-            }
-            fin.close();
-
-            std::ofstream fout;
-            fout.open(m_csvFileName, std::ios_base::app);
-
-            std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
-            std::string modelName = converter.to_bytes(model);
-
-            if (bNewFile)
-            {
-                fout << "model name"
-                     << ","
-                     << "device type"
-                     << ","
-                     << "input binding"
-                     << ","
-                     << "input type"
-                     << ","
-                     << "device creation location"
-                     << ","
-                     << "iterations"
-                     << ","
-                     << "load iterations"
-                     << ","
-                     << "session creation iterations"
-                     << ","
-                     << "average load (ms)"
-                     << ","
-                     << "standard deviation load (ms)"
-                     << ","
-                     << "min load (ms)"
-                     << ","
-                     << "max load (ms)"
-                     << ","
-                     << "average session creation (ms)"
-                     << ","
-                     << "standard deviation session creation (ms)"
-                     << ","
-                     << "min session creation (ms)"
-                     << ","
-                     << "max session creation (ms)"
-                     << ","
-                     << "average first bind (ms)"
-                     << ","
-                     << "standard deviation first bind (ms)"
-                     << ","
-                     << "min first bind (ms)"
-                     << ","
-                     << "max first bind (ms)"
-                     << ","
-                     << "average bind (ms)"
-                     << ","
-                     << "standard deviation bind (ms)"
-                     << ","
-                     << "min bind (ms)"
-                     << ","
-                     << "max bind (ms)"
-                     << ","
-                     << "average first evaluate (ms)"
-                     << ","
-                     << "standard deviation first evaluate (ms)"
-                     << ","
-                     << "min first evaluate (ms)"
-                     << ","
-                     << "max first evaluate (ms)"
-                     << ","
-                     << "average evaluate (ms)"
-                     << ","
-                     << "standard deviation evaluate (ms)"
-                     << ","
-                     << "min evaluate (ms)"
-                     << ","
-                     << "max evaluate (ms)"
-                     << ","
-                     << "load average working set memory (MB)"
-                     << ","
-                     << "load standard deviation working set memory (MB)"
-                     << ","
-                     << "load min working set memory (MB)"
-                     << ","
-                     << "load max working set memory (MB)"
-                     << ","
-                     << "session creation average working set memory (MB)"
-                     << ","
-                     << "session creation standard deviation working set memory (MB)"
-                     << ","
-                     << "session creation min working set memory (MB)"
-                     << ","
-                     << "session creation max working set memory (MB)"
-                     << ","
-                     << "first bind average working set memory (MB)"
-                     << ","
-                     << "first bind standard deviation working set memory (MB)"
-                     << ","
-                     << "first bind min working set memory (MB)"
-                     << ","
-                     << "first bind max working set memory (MB)"
-                     << ","
-                     << "bind average working set memory (MB)"
-                     << ","
-                     << "bind standard deviation working set memory (MB)"
-                     << ","
-                     << "bind min working set memory (MB)"
-                     << ","
-                     << "bind max working set memory (MB)"
-                     << ","
-                     << "first evaluate average working set memory (MB)"
-                     << ","
-                     << "first evaluate standard deviation working set memory (MB)"
-                     << ","
-                     << "first evaluate min working set memory (MB)"
-                     << ","
-                     << "first evaluate max working set memory (MB)"
-                     << ","
-                     << "evaluate average working set memory (MB)"
-                     << ","
-                     << "evaluate standard deviation working set memory (MB)"
-                     << ","
-                     << "evaluate min working set memory (MB)"
-                     << ","
-                     << "evaluate max working set memory (MB)"
-                     << ","
-                     << "load average dedicated memory (MB)"
-                     << ","
-                     << "load standard deviation dedicated memory (MB)"
-                     << ","
-                     << "load min dedicated memory (MB)"
-                     << ","
-                     << "load max dedicated memory (MB)"
-                     << ","
-                     << "session creation average dedicated memory (MB)"
-                     << ","
-                     << "session creation standard deviation dedicated memory (MB)"
-                     << ","
-                     << "session creation min dedicated memory (MB)"
-                     << ","
-                     << "session creation max dedicated memory (MB)"
-                     << ","
-                     << "first bind average dedicated memory (MB)"
-                     << ","
-                     << "first bind standard deviation dedicated memory (MB)"
-                     << ","
-                     << "first bind min dedicated memory (MB)"
-                     << ","
-                     << "first bind max dedicated memory (MB)"
-                     << ","
-                     << "bind average dedicated memory (MB)"
-                     << ","
-                     << "bind standard deviation dedicated memory (MB)"
-                     << ","
-                     << "bind min dedicated memory (MB)"
-                     << ","
-                     << "bind max dedicated memory (MB)"
-                     << ","
-                     << "first evaluate average dedicated memory (MB)"
-                     << ","
-                     << "first evaluate standard deviation dedicated memory (MB)"
-                     << ","
-                     << "first evaluate min dedicated memory (MB)"
-                     << ","
-                     << "first evaluate max dedicated memory (MB)"
-                     << ","
-                     << "evaluate average dedicated memory (MB)"
-                     << ","
-                     << "evaluate standard deviation dedicated memory (MB)"
-                     << ","
-                     << "evaluate min dedicated memory (MB)"
-                     << ","
-                     << "evaluate max dedicated memory (MB)"
-                     << ","
-                     << "load average shared memory (MB)"
-                     << ","
-                     << "load standard deviation shared memory (MB)"
-                     << ","
-                     << "load min shared memory (MB)"
-                     << ","
-                     << "load max shared memory (MB)"
-                     << ","
-                     << "session creation average shared memory (MB)"
-                     << ","
-                     << "session creation standard deviation shared memory (MB)"
-                     << ","
-                     << "session creation min shared memory (MB)"
-                     << ","
-                     << "session creation max shared memory (MB)"
-                     << ","
-                     << "first bind average shared memory (MB)"
-                     << ","
-                     << "first bind standard deviation shared memory (MB)"
-                     << ","
-                     << "first bind min shared memory (MB)"
-                     << ","
-                     << "first bind max shared memory (MB)"
-                     << ","
-                     << "bind average shared memory (MB)"
-                     << ","
-                     << "bind standard deviation shared memory (MB)"
-                     << ","
-                     << "bind min shared memory (MB)"
-                     << ","
-                     << "bind max shared memory (MB)"
-                     << ","
-                     << "first evaluate average shared memory (MB)"
-                     << ","
-                     << "first evaluate standard deviation shared memory (MB)"
-                     << ","
-                     << "first evaluate min shared memory (MB)"
-                     << ","
-                     << "first evaluate max shared memory (MB)"
-                     << ","
-                     << "evaluate average shared memory (MB)"
-                     << ","
-                     << "evaluate standard deviation shared memory (MB)"
-                     << ","
-                     << "evaluate min shared memory (MB)"
-                     << ","
-                     << "evaluate max shared memory (MB)"
-                     << ",";
-                for (auto metaDataPair : perfFileMetadata)
-                {
-                    fout << metaDataPair.first << ",";
-                }
-                fout << std::endl;
-            }
-            fout << modelName << "," << deviceType << "," << inputBinding << "," << inputType << ","
-                 << deviceCreationLocation << "," << numIterations << "," << numberLoadIterations << "," << numberCreateSessionIterations << "," 
-                 << averageLoadTime << "," << stdevLoadTime << "," << minLoadTime << "," << maxLoadTime << ","
-                 << averageCreateSessionTime << "," << stdevCreateSessionTime << "," << minCreateSessionTime << "," << maxCreateSessionTime << ","
-                 << averageFirstBindTime << "," << stdevFirstBindTime << "," << minFirstBindTime << "," << maxFirstBindTime << ","
-                 << (numIterations <= 1 ? 0 : averageBindTime) << "," << (numIterations <= 1 ? 0 : stdevBindTime) << ","
-                 << (numIterations <= 1 ? 0 : minBindTime) << "," << (numIterations <= 1 ? 0 : maxBindTime) << ","
-                 << averageFirstEvalTime << "," << stdevFirstEvalTime << "," << minFirstEvalTime << "," << maxFirstEvalTime<< ","
-                 << (numIterations <= 1 ? 0 : averageEvalTime) << "," << (numIterations <= 1 ? 0 : stdevEvalTime) << "," 
-                 << (numIterations <= 1 ? 0 : minEvalTime) << "," << (numIterations <= 1 ? 0 : maxEvalTime) << ","
-                 
-                 << averageLoadWorkingSetMemoryUsage << "," << stdevLoadWorkingSetMemoryUsage << "," << minLoadWorkingSetMemoryUsage << "," << maxLoadWorkingSetMemoryUsage << ","
-                 << averageCreateSessionWorkingSetMemoryUsage << "," << stdevCreateSessionWorkingSetMemoryUsage << "," << minCreateSessionWorkingSetMemoryUsage << "," << maxCreateSessionWorkingSetMemoryUsage << ","
-                 << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << "," << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << ","
-                 << (numIterations <= 1 ? 0 : averageBindWorkingSetMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : stdevBindWorkingSetMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : maxBindWorkingSetMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : minBindWorkingSetMemoryUsage) << ","
-                 << averageFirstBindWorkingSetMemoryUsage << "," << stdevFirstBindWorkingSetMemoryUsage << "," << minFirstBindWorkingSetMemoryUsage << "," << maxFirstBindWorkingSetMemoryUsage << ","
-                 << (numIterations <= 1 ? 0 : averageEvalWorkingSetMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : stdevEvalWorkingSetMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : maxEvalWorkingSetMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : minEvalWorkingSetMemoryUsage) << ","
-                 
-                 << averageLoadDedicatedMemoryUsage << "," << stdevLoadDedicatedMemoryUsage << "," << minLoadDedicatedMemoryUsage << "," << maxLoadDedicatedMemoryUsage << ","
-                 << averageCreateSessionDedicatedMemoryUsage << "," << stdevCreateSessionDedicatedMemoryUsage << "," << minCreateSessionDedicatedMemoryUsage << "," << maxCreateSessionDedicatedMemoryUsage << ","
-                 << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << "," << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << ","
-                 << (numIterations <= 1 ? 0 : averageBindDedicatedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : stdevBindDedicatedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : maxBindDedicatedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : minBindDedicatedMemoryUsage) << ","
-                 << averageFirstBindDedicatedMemoryUsage << "," << stdevFirstBindDedicatedMemoryUsage << "," << minFirstBindDedicatedMemoryUsage << "," << maxFirstBindDedicatedMemoryUsage << ","
-                 << (numIterations <= 1 ? 0 : averageEvalDedicatedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : stdevEvalDedicatedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : maxEvalDedicatedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : minEvalDedicatedMemoryUsage) << ","
-
-                 << averageLoadSharedMemoryUsage << "," << stdevLoadSharedMemoryUsage << "," << minLoadSharedMemoryUsage << "," << maxLoadSharedMemoryUsage << ","
-                 << averageCreateSessionSharedMemoryUsage << "," << stdevCreateSessionSharedMemoryUsage << "," << minCreateSessionSharedMemoryUsage << "," << maxCreateSessionSharedMemoryUsage << ","
-                 << averageFirstBindSharedMemoryUsage << "," << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << "," << maxFirstBindSharedMemoryUsage << ","
-                 << (numIterations <= 1 ? 0 : averageBindSharedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : stdevBindSharedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : maxBindSharedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : minBindSharedMemoryUsage) << ","
-                 << averageFirstBindSharedMemoryUsage << "," << stdevFirstBindSharedMemoryUsage << "," << minFirstBindSharedMemoryUsage << "," << maxFirstBindSharedMemoryUsage << ","
-                 << (numIterations <= 1 ? 0 : averageEvalSharedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : stdevEvalSharedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : maxEvalSharedMemoryUsage) << ","
-                 << (numIterations <= 1 ? 0 : minEvalSharedMemoryUsage) << ",";
-            for (auto metaDataPair : perfFileMetadata)
-            {
-                fout << metaDataPair.second << ",";
-            }
-            fout << std::endl;
-            fout.close();
-        }
-    }
-
-    std::vector<double> m_clockLoadTimes;
-    std::vector<double> m_clockBindTimes;
-    std::vector<double> m_clockEvalTimes;
-
-    std::wstring getCsvFileNamePerIterationResult() { return m_csvFileNamePerIterationResult; }
-#if defined(_AMD64_)
+                                   const std::vector<std::pair<std::string, std::string>>& perfFileMetadata) const;
+    static void PrintLearningModelDevice(const LearningModelDeviceWithMetadata& device);
+    static std::wstring FeatureDescriptorToString(const ILearningModelFeatureDescriptor& descriptor);
+    static bool doesDescriptorContainFP16(const ILearningModelFeatureDescriptor& descriptor);
+    static bool doesModelContainFP16(const LearningModel& model);
+    template <typename T>
+    static void ProcessTensorResult(const CommandLineArgs& args, const void* buffer, const uint32_t uCapacity,
+                                    std::vector<std::pair<float, int>>& maxValues, std::ofstream& fout, unsigned int k);
     // PIX markers only work on amd64
+#if defined(_AMD64_)
     com_ptr<IDXGraphicsAnalysis>& GetGraphicsAnalysis() { return m_graphicsAnalysis; }
 #endif
 private:
+    std::vector<double> m_clockLoadTimes;
+    std::vector<double> m_clockBindTimes;
+    std::vector<double> m_clockEvalTimes;
     std::wstring m_csvFileName;
     std::wstring m_csvFileNamePerIterationSummary;
     std::wstring m_csvFileNamePerIterationResult;
@@ -1240,4 +88,4 @@ class OutputHelper
     // PIX markers only work on amd64
     com_ptr<IDXGraphicsAnalysis> m_graphicsAnalysis = nullptr;
 #endif
-};
+};
\ No newline at end of file
diff --git a/Tools/WinMLRunner/src/Run.cpp b/Tools/WinMLRunner/src/Run.cpp
index 55c0a912..5dcb33b7 100644
--- a/Tools/WinMLRunner/src/Run.cpp
+++ b/Tools/WinMLRunner/src/Run.cpp
@@ -7,6 +7,7 @@
 #include <Windows.Graphics.DirectX.Direct3D11.interop.h>
 #include "Scenarios.h"
 #include <winrt/Windows.Foundation.Metadata.h>
+
 using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
 using namespace winrt::Windows::Foundation::Metadata;
 std::vector<ILearningModelFeatureValue> GenerateInputFeatures(const LearningModel& model, const CommandLineArgs& args,
diff --git a/Tools/WinMLRunner/src/Run.h b/Tools/WinMLRunner/src/Run.h
index 3796cd05..93c4aacc 100644
--- a/Tools/WinMLRunner/src/Run.h
+++ b/Tools/WinMLRunner/src/Run.h
@@ -1,6 +1,6 @@
 #include "CommandLineArgs.h"
 #include "LearningModelDeviceHelper.h"
-
+#include "TimerHelper.h"
 int run(CommandLineArgs& args,
     Profiler<WINML_MODEL_TEST_PERF>& profiler,
     const std::vector<LearningModelDeviceWithMetadata>& deviceList,
diff --git a/Tools/WinMLRunner/src/TimerHelper.h b/Tools/WinMLRunner/src/TimerHelper.h
index 7608ed63..2dff78f5 100644
--- a/Tools/WinMLRunner/src/TimerHelper.h
+++ b/Tools/WinMLRunner/src/TimerHelper.h
@@ -1,7 +1,5 @@
 #pragma once
 
-#include "Common.h"
-#include <windows.h>
 #include <cmath>
 #ifndef DISABLE_GPU_COUNTERS
 #include <Pdh.h>
diff --git a/Tools/WinMLRunner/src/TypeHelper.h b/Tools/WinMLRunner/src/TypeHelper.h
index db18a684..44ef5850 100644
--- a/Tools/WinMLRunner/src/TypeHelper.h
+++ b/Tools/WinMLRunner/src/TypeHelper.h
@@ -1,7 +1,11 @@
 #pragma once
 #include "Common.h"
 
+#ifdef USE_WINML_NUGET
+using namespace winrt::Microsoft::AI::MachineLearning;
+#else
 using namespace winrt::Windows::AI::MachineLearning;
+#endif
 using namespace winrt::Windows::Graphics::DirectX;
 using namespace winrt::Windows::Graphics::Imaging;